{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8139, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012286521685710776, "grad_norm": 1.603324938033752, "learning_rate": 3.685503685503685e-08, "loss": 0.5026, "step": 1 }, { "epoch": 0.0002457304337142155, "grad_norm": 1.428100462150552, "learning_rate": 7.37100737100737e-08, "loss": 0.512, "step": 2 }, { "epoch": 0.00036859565057132326, "grad_norm": 1.2887663800285174, "learning_rate": 1.1056511056511058e-07, "loss": 0.4703, "step": 3 }, { "epoch": 0.000491460867428431, "grad_norm": 1.2718257297386195, "learning_rate": 1.474201474201474e-07, "loss": 0.4545, "step": 4 }, { "epoch": 0.0006143260842855387, "grad_norm": 1.378999154357745, "learning_rate": 1.8427518427518426e-07, "loss": 0.4823, "step": 5 }, { "epoch": 0.0007371913011426465, "grad_norm": 1.597153352070934, "learning_rate": 2.2113022113022115e-07, "loss": 0.545, "step": 6 }, { "epoch": 0.0008600565179997543, "grad_norm": 1.512256288970955, "learning_rate": 2.57985257985258e-07, "loss": 0.4704, "step": 7 }, { "epoch": 0.000982921734856862, "grad_norm": 1.3956318536656023, "learning_rate": 2.948402948402948e-07, "loss": 0.5186, "step": 8 }, { "epoch": 0.0011057869517139699, "grad_norm": 1.6613919169474936, "learning_rate": 3.3169533169533167e-07, "loss": 0.4953, "step": 9 }, { "epoch": 0.0012286521685710775, "grad_norm": 1.3150264852312088, "learning_rate": 3.685503685503685e-07, "loss": 0.4837, "step": 10 }, { "epoch": 0.0013515173854281852, "grad_norm": 1.8571089824432478, "learning_rate": 4.0540540540540546e-07, "loss": 0.5969, "step": 11 }, { "epoch": 0.001474382602285293, "grad_norm": 1.4330624049213283, "learning_rate": 4.422604422604423e-07, "loss": 0.4782, "step": 12 }, { "epoch": 0.0015972478191424008, "grad_norm": 1.6004039683552072, "learning_rate": 4.791154791154791e-07, "loss": 0.5095, "step": 13 }, { "epoch": 0.0017201130359995086, "grad_norm": 1.2832378975912375, "learning_rate": 5.15970515970516e-07, "loss": 0.4436, "step": 14 }, { "epoch": 0.0018429782528566164, "grad_norm": 1.5195511257418715, "learning_rate": 5.528255528255528e-07, "loss": 0.5519, "step": 15 }, { "epoch": 0.001965843469713724, "grad_norm": 1.699811198552415, "learning_rate": 5.896805896805896e-07, "loss": 0.5713, "step": 16 }, { "epoch": 0.0020887086865708318, "grad_norm": 1.2309894697084056, "learning_rate": 6.265356265356265e-07, "loss": 0.4541, "step": 17 }, { "epoch": 0.0022115739034279398, "grad_norm": 1.5028936603893828, "learning_rate": 6.633906633906633e-07, "loss": 0.5129, "step": 18 }, { "epoch": 0.0023344391202850473, "grad_norm": 1.5010539935905738, "learning_rate": 7.002457002457002e-07, "loss": 0.5518, "step": 19 }, { "epoch": 0.002457304337142155, "grad_norm": 1.4898921221172023, "learning_rate": 7.37100737100737e-07, "loss": 0.5194, "step": 20 }, { "epoch": 0.002580169553999263, "grad_norm": 1.4456340269185366, "learning_rate": 7.73955773955774e-07, "loss": 0.542, "step": 21 }, { "epoch": 0.0027030347708563705, "grad_norm": 1.4454522313325697, "learning_rate": 8.108108108108109e-07, "loss": 0.5187, "step": 22 }, { "epoch": 0.0028258999877134785, "grad_norm": 1.4233317961788152, "learning_rate": 8.476658476658478e-07, "loss": 0.5085, "step": 23 }, { "epoch": 0.002948765204570586, "grad_norm": 1.314986750783574, "learning_rate": 8.845208845208846e-07, "loss": 0.5104, "step": 24 }, { "epoch": 0.0030716304214276936, "grad_norm": 1.40768134540736, "learning_rate": 9.213759213759215e-07, "loss": 0.4478, "step": 25 }, { "epoch": 0.0031944956382848016, "grad_norm": 1.4794445811250256, "learning_rate": 9.582309582309582e-07, "loss": 0.4591, "step": 26 }, { "epoch": 0.003317360855141909, "grad_norm": 1.5086721447329507, "learning_rate": 9.95085995085995e-07, "loss": 0.5911, "step": 27 }, { "epoch": 0.003440226071999017, "grad_norm": 1.446277952814564, "learning_rate": 1.031941031941032e-06, "loss": 0.5167, "step": 28 }, { "epoch": 0.003563091288856125, "grad_norm": 1.3944410026921856, "learning_rate": 1.0687960687960688e-06, "loss": 0.5199, "step": 29 }, { "epoch": 0.003685956505713233, "grad_norm": 1.5398093996256128, "learning_rate": 1.1056511056511056e-06, "loss": 0.5505, "step": 30 }, { "epoch": 0.0038088217225703404, "grad_norm": 1.4069072160712408, "learning_rate": 1.1425061425061425e-06, "loss": 0.4702, "step": 31 }, { "epoch": 0.003931686939427448, "grad_norm": 1.2617275794057357, "learning_rate": 1.1793611793611793e-06, "loss": 0.4506, "step": 32 }, { "epoch": 0.004054552156284556, "grad_norm": 1.4019437295688641, "learning_rate": 1.2162162162162162e-06, "loss": 0.5515, "step": 33 }, { "epoch": 0.0041774173731416635, "grad_norm": 1.300424026546657, "learning_rate": 1.253071253071253e-06, "loss": 0.4774, "step": 34 }, { "epoch": 0.004300282589998771, "grad_norm": 1.363891792168875, "learning_rate": 1.28992628992629e-06, "loss": 0.5114, "step": 35 }, { "epoch": 0.0044231478068558795, "grad_norm": 1.4898018522184897, "learning_rate": 1.3267813267813267e-06, "loss": 0.519, "step": 36 }, { "epoch": 0.004546013023712987, "grad_norm": 1.336391979213965, "learning_rate": 1.3636363636363636e-06, "loss": 0.4734, "step": 37 }, { "epoch": 0.004668878240570095, "grad_norm": 1.3377875562578532, "learning_rate": 1.4004914004914004e-06, "loss": 0.5325, "step": 38 }, { "epoch": 0.004791743457427202, "grad_norm": 1.121600727576231, "learning_rate": 1.4373464373464373e-06, "loss": 0.4502, "step": 39 }, { "epoch": 0.00491460867428431, "grad_norm": 1.3172913917142601, "learning_rate": 1.474201474201474e-06, "loss": 0.5361, "step": 40 }, { "epoch": 0.005037473891141418, "grad_norm": 10.153350986805975, "learning_rate": 1.5110565110565112e-06, "loss": 0.5059, "step": 41 }, { "epoch": 0.005160339107998526, "grad_norm": 1.0891495468844234, "learning_rate": 1.547911547911548e-06, "loss": 0.445, "step": 42 }, { "epoch": 0.005283204324855633, "grad_norm": 1.2471055376891682, "learning_rate": 1.584766584766585e-06, "loss": 0.4917, "step": 43 }, { "epoch": 0.005406069541712741, "grad_norm": 1.3583339025253822, "learning_rate": 1.6216216216216219e-06, "loss": 0.5702, "step": 44 }, { "epoch": 0.0055289347585698485, "grad_norm": 1.0304061851010278, "learning_rate": 1.6584766584766586e-06, "loss": 0.4642, "step": 45 }, { "epoch": 0.005651799975426957, "grad_norm": 1.1127382687526324, "learning_rate": 1.6953316953316955e-06, "loss": 0.5053, "step": 46 }, { "epoch": 0.0057746651922840645, "grad_norm": 1.1148106447575274, "learning_rate": 1.7321867321867323e-06, "loss": 0.4853, "step": 47 }, { "epoch": 0.005897530409141172, "grad_norm": 1.027325295640197, "learning_rate": 1.7690417690417692e-06, "loss": 0.5197, "step": 48 }, { "epoch": 0.00602039562599828, "grad_norm": 0.8877957130835421, "learning_rate": 1.805896805896806e-06, "loss": 0.4528, "step": 49 }, { "epoch": 0.006143260842855387, "grad_norm": 0.855016876942104, "learning_rate": 1.842751842751843e-06, "loss": 0.542, "step": 50 }, { "epoch": 0.006266126059712496, "grad_norm": 0.8947519402840014, "learning_rate": 1.8796068796068799e-06, "loss": 0.4764, "step": 51 }, { "epoch": 0.006388991276569603, "grad_norm": 0.9355024681499206, "learning_rate": 1.9164619164619164e-06, "loss": 0.4851, "step": 52 }, { "epoch": 0.006511856493426711, "grad_norm": 0.8500669653799359, "learning_rate": 1.9533169533169534e-06, "loss": 0.5023, "step": 53 }, { "epoch": 0.006634721710283818, "grad_norm": 0.8962968745546787, "learning_rate": 1.99017199017199e-06, "loss": 0.4322, "step": 54 }, { "epoch": 0.006757586927140926, "grad_norm": 0.7686495230535406, "learning_rate": 2.0270270270270273e-06, "loss": 0.4556, "step": 55 }, { "epoch": 0.006880452143998034, "grad_norm": 0.7370681521660536, "learning_rate": 2.063882063882064e-06, "loss": 0.3977, "step": 56 }, { "epoch": 0.007003317360855142, "grad_norm": 0.7944770050046931, "learning_rate": 2.1007371007371007e-06, "loss": 0.4506, "step": 57 }, { "epoch": 0.00712618257771225, "grad_norm": 0.7554082447273786, "learning_rate": 2.1375921375921377e-06, "loss": 0.4646, "step": 58 }, { "epoch": 0.007249047794569357, "grad_norm": 0.8720167569995653, "learning_rate": 2.1744471744471746e-06, "loss": 0.4672, "step": 59 }, { "epoch": 0.007371913011426466, "grad_norm": 0.7604719531032247, "learning_rate": 2.211302211302211e-06, "loss": 0.4516, "step": 60 }, { "epoch": 0.007494778228283573, "grad_norm": 0.8011883002908144, "learning_rate": 2.248157248157248e-06, "loss": 0.5131, "step": 61 }, { "epoch": 0.007617643445140681, "grad_norm": 0.6748476168330207, "learning_rate": 2.285012285012285e-06, "loss": 0.4524, "step": 62 }, { "epoch": 0.007740508661997788, "grad_norm": 0.6491855403223123, "learning_rate": 2.321867321867322e-06, "loss": 0.4101, "step": 63 }, { "epoch": 0.007863373878854897, "grad_norm": 0.5719523702507022, "learning_rate": 2.3587223587223586e-06, "loss": 0.4349, "step": 64 }, { "epoch": 0.007986239095712003, "grad_norm": 0.6186931157808783, "learning_rate": 2.3955773955773955e-06, "loss": 0.4142, "step": 65 }, { "epoch": 0.008109104312569112, "grad_norm": 0.5957062256781661, "learning_rate": 2.4324324324324325e-06, "loss": 0.4104, "step": 66 }, { "epoch": 0.008231969529426219, "grad_norm": 0.6146675164075225, "learning_rate": 2.4692874692874694e-06, "loss": 0.4206, "step": 67 }, { "epoch": 0.008354834746283327, "grad_norm": 0.5989422238901845, "learning_rate": 2.506142506142506e-06, "loss": 0.3818, "step": 68 }, { "epoch": 0.008477699963140435, "grad_norm": 0.6734602831091736, "learning_rate": 2.542997542997543e-06, "loss": 0.4911, "step": 69 }, { "epoch": 0.008600565179997542, "grad_norm": 0.5381576200178793, "learning_rate": 2.57985257985258e-06, "loss": 0.3772, "step": 70 }, { "epoch": 0.00872343039685465, "grad_norm": 0.5004890910895633, "learning_rate": 2.616707616707617e-06, "loss": 0.4088, "step": 71 }, { "epoch": 0.008846295613711759, "grad_norm": 0.6971963930213113, "learning_rate": 2.6535626535626533e-06, "loss": 0.4044, "step": 72 }, { "epoch": 0.008969160830568866, "grad_norm": 0.6083940334666799, "learning_rate": 2.6904176904176907e-06, "loss": 0.4692, "step": 73 }, { "epoch": 0.009092026047425974, "grad_norm": 0.6512694430324091, "learning_rate": 2.7272727272727272e-06, "loss": 0.4702, "step": 74 }, { "epoch": 0.009214891264283081, "grad_norm": 0.6008851421154525, "learning_rate": 2.764127764127764e-06, "loss": 0.4301, "step": 75 }, { "epoch": 0.00933775648114019, "grad_norm": 0.5582726507084783, "learning_rate": 2.8009828009828007e-06, "loss": 0.4335, "step": 76 }, { "epoch": 0.009460621697997298, "grad_norm": 0.5982257441261355, "learning_rate": 2.837837837837838e-06, "loss": 0.4968, "step": 77 }, { "epoch": 0.009583486914854404, "grad_norm": 0.49782306796712933, "learning_rate": 2.8746928746928746e-06, "loss": 0.4217, "step": 78 }, { "epoch": 0.009706352131711513, "grad_norm": 0.5960234117545699, "learning_rate": 2.9115479115479116e-06, "loss": 0.441, "step": 79 }, { "epoch": 0.00982921734856862, "grad_norm": 0.5183018810583673, "learning_rate": 2.948402948402948e-06, "loss": 0.3377, "step": 80 }, { "epoch": 0.009952082565425728, "grad_norm": 0.6115849569301451, "learning_rate": 2.9852579852579855e-06, "loss": 0.3908, "step": 81 }, { "epoch": 0.010074947782282836, "grad_norm": 0.5654592739152451, "learning_rate": 3.0221130221130224e-06, "loss": 0.5543, "step": 82 }, { "epoch": 0.010197812999139943, "grad_norm": 0.6129138995493904, "learning_rate": 3.058968058968059e-06, "loss": 0.4084, "step": 83 }, { "epoch": 0.010320678215997052, "grad_norm": 0.4721360536373142, "learning_rate": 3.095823095823096e-06, "loss": 0.4135, "step": 84 }, { "epoch": 0.010443543432854158, "grad_norm": 0.5221293553119344, "learning_rate": 3.132678132678133e-06, "loss": 0.4216, "step": 85 }, { "epoch": 0.010566408649711267, "grad_norm": 0.6082878151571083, "learning_rate": 3.16953316953317e-06, "loss": 0.5435, "step": 86 }, { "epoch": 0.010689273866568375, "grad_norm": 0.5249482166885526, "learning_rate": 3.2063882063882063e-06, "loss": 0.5268, "step": 87 }, { "epoch": 0.010812139083425482, "grad_norm": 0.59037848452242, "learning_rate": 3.2432432432432437e-06, "loss": 0.4831, "step": 88 }, { "epoch": 0.01093500430028259, "grad_norm": 0.5173088066591236, "learning_rate": 3.2800982800982802e-06, "loss": 0.4504, "step": 89 }, { "epoch": 0.011057869517139697, "grad_norm": 0.4269810367256875, "learning_rate": 3.316953316953317e-06, "loss": 0.3679, "step": 90 }, { "epoch": 0.011180734733996806, "grad_norm": 0.5115724430077834, "learning_rate": 3.3538083538083537e-06, "loss": 0.366, "step": 91 }, { "epoch": 0.011303599950853914, "grad_norm": 0.4824424677754483, "learning_rate": 3.390663390663391e-06, "loss": 0.3837, "step": 92 }, { "epoch": 0.01142646516771102, "grad_norm": 0.4799240583014041, "learning_rate": 3.4275184275184276e-06, "loss": 0.4669, "step": 93 }, { "epoch": 0.011549330384568129, "grad_norm": 0.5607129869950703, "learning_rate": 3.4643734643734646e-06, "loss": 0.4097, "step": 94 }, { "epoch": 0.011672195601425236, "grad_norm": 0.5048534881933457, "learning_rate": 3.501228501228501e-06, "loss": 0.4532, "step": 95 }, { "epoch": 0.011795060818282344, "grad_norm": 0.7512956105336404, "learning_rate": 3.5380835380835385e-06, "loss": 0.504, "step": 96 }, { "epoch": 0.011917926035139453, "grad_norm": 0.5060182536897282, "learning_rate": 3.574938574938575e-06, "loss": 0.4474, "step": 97 }, { "epoch": 0.01204079125199656, "grad_norm": 0.7145518059026312, "learning_rate": 3.611793611793612e-06, "loss": 0.443, "step": 98 }, { "epoch": 0.012163656468853668, "grad_norm": 0.4423303937361937, "learning_rate": 3.648648648648649e-06, "loss": 0.4005, "step": 99 }, { "epoch": 0.012286521685710775, "grad_norm": 0.4429639810842417, "learning_rate": 3.685503685503686e-06, "loss": 0.3908, "step": 100 }, { "epoch": 0.012409386902567883, "grad_norm": 0.39857205565317344, "learning_rate": 3.7223587223587224e-06, "loss": 0.396, "step": 101 }, { "epoch": 0.012532252119424991, "grad_norm": 0.6085942427656343, "learning_rate": 3.7592137592137598e-06, "loss": 0.4264, "step": 102 }, { "epoch": 0.012655117336282098, "grad_norm": 0.44457912476319017, "learning_rate": 3.796068796068796e-06, "loss": 0.3992, "step": 103 }, { "epoch": 0.012777982553139207, "grad_norm": 0.4297011142354609, "learning_rate": 3.832923832923833e-06, "loss": 0.4455, "step": 104 }, { "epoch": 0.012900847769996313, "grad_norm": 0.39545728787569656, "learning_rate": 3.869778869778871e-06, "loss": 0.3522, "step": 105 }, { "epoch": 0.013023712986853422, "grad_norm": 0.5249571655380402, "learning_rate": 3.906633906633907e-06, "loss": 0.3474, "step": 106 }, { "epoch": 0.01314657820371053, "grad_norm": 0.47543127529686574, "learning_rate": 3.943488943488944e-06, "loss": 0.403, "step": 107 }, { "epoch": 0.013269443420567637, "grad_norm": 0.5706468689220986, "learning_rate": 3.98034398034398e-06, "loss": 0.3934, "step": 108 }, { "epoch": 0.013392308637424745, "grad_norm": 0.3893033812377796, "learning_rate": 4.0171990171990176e-06, "loss": 0.3958, "step": 109 }, { "epoch": 0.013515173854281852, "grad_norm": 0.5855243417651349, "learning_rate": 4.0540540540540545e-06, "loss": 0.4706, "step": 110 }, { "epoch": 0.01363803907113896, "grad_norm": 0.4768763350285752, "learning_rate": 4.090909090909091e-06, "loss": 0.4862, "step": 111 }, { "epoch": 0.013760904287996069, "grad_norm": 0.45047125553011325, "learning_rate": 4.127764127764128e-06, "loss": 0.44, "step": 112 }, { "epoch": 0.013883769504853176, "grad_norm": 0.5107061905718472, "learning_rate": 4.164619164619165e-06, "loss": 0.4627, "step": 113 }, { "epoch": 0.014006634721710284, "grad_norm": 0.4648183811546032, "learning_rate": 4.2014742014742015e-06, "loss": 0.4047, "step": 114 }, { "epoch": 0.014129499938567392, "grad_norm": 0.46518997346225466, "learning_rate": 4.2383292383292384e-06, "loss": 0.4788, "step": 115 }, { "epoch": 0.0142523651554245, "grad_norm": 0.39031766630437553, "learning_rate": 4.275184275184275e-06, "loss": 0.4888, "step": 116 }, { "epoch": 0.014375230372281608, "grad_norm": 0.4614918624521767, "learning_rate": 4.312039312039312e-06, "loss": 0.4218, "step": 117 }, { "epoch": 0.014498095589138714, "grad_norm": 0.3692942336640937, "learning_rate": 4.348894348894349e-06, "loss": 0.3929, "step": 118 }, { "epoch": 0.014620960805995823, "grad_norm": 0.6509571296434564, "learning_rate": 4.385749385749385e-06, "loss": 0.4592, "step": 119 }, { "epoch": 0.014743826022852931, "grad_norm": 0.5791944380871621, "learning_rate": 4.422604422604422e-06, "loss": 0.4477, "step": 120 }, { "epoch": 0.014866691239710038, "grad_norm": 0.42861740255364467, "learning_rate": 4.45945945945946e-06, "loss": 0.3905, "step": 121 }, { "epoch": 0.014989556456567146, "grad_norm": 0.5973939022332371, "learning_rate": 4.496314496314496e-06, "loss": 0.5086, "step": 122 }, { "epoch": 0.015112421673424253, "grad_norm": 0.47127316595721064, "learning_rate": 4.533169533169533e-06, "loss": 0.3793, "step": 123 }, { "epoch": 0.015235286890281361, "grad_norm": 0.40721983350500507, "learning_rate": 4.57002457002457e-06, "loss": 0.4087, "step": 124 }, { "epoch": 0.01535815210713847, "grad_norm": 0.4673731183596258, "learning_rate": 4.606879606879607e-06, "loss": 0.4863, "step": 125 }, { "epoch": 0.015481017323995577, "grad_norm": 0.5121202079552781, "learning_rate": 4.643734643734644e-06, "loss": 0.5277, "step": 126 }, { "epoch": 0.015603882540852685, "grad_norm": 0.46741943911581824, "learning_rate": 4.680589680589681e-06, "loss": 0.4284, "step": 127 }, { "epoch": 0.015726747757709793, "grad_norm": 0.5461760780825028, "learning_rate": 4.717444717444717e-06, "loss": 0.4286, "step": 128 }, { "epoch": 0.0158496129745669, "grad_norm": 0.3998882311799952, "learning_rate": 4.754299754299755e-06, "loss": 0.3474, "step": 129 }, { "epoch": 0.015972478191424007, "grad_norm": 0.5291306607783219, "learning_rate": 4.791154791154791e-06, "loss": 0.4852, "step": 130 }, { "epoch": 0.016095343408281115, "grad_norm": 0.46206559498190675, "learning_rate": 4.828009828009828e-06, "loss": 0.3661, "step": 131 }, { "epoch": 0.016218208625138224, "grad_norm": 0.3988327283612701, "learning_rate": 4.864864864864865e-06, "loss": 0.3925, "step": 132 }, { "epoch": 0.016341073841995332, "grad_norm": 0.3934404064676721, "learning_rate": 4.901719901719902e-06, "loss": 0.434, "step": 133 }, { "epoch": 0.016463939058852437, "grad_norm": 0.4164992908571235, "learning_rate": 4.938574938574939e-06, "loss": 0.4696, "step": 134 }, { "epoch": 0.016586804275709546, "grad_norm": 0.4230468765751428, "learning_rate": 4.975429975429976e-06, "loss": 0.5021, "step": 135 }, { "epoch": 0.016709669492566654, "grad_norm": 0.4883468602429549, "learning_rate": 5.012285012285012e-06, "loss": 0.3928, "step": 136 }, { "epoch": 0.016832534709423762, "grad_norm": 0.5073172670585047, "learning_rate": 5.04914004914005e-06, "loss": 0.432, "step": 137 }, { "epoch": 0.01695539992628087, "grad_norm": 0.4193406879165758, "learning_rate": 5.085995085995086e-06, "loss": 0.4393, "step": 138 }, { "epoch": 0.01707826514313798, "grad_norm": 0.44018444120606987, "learning_rate": 5.122850122850123e-06, "loss": 0.4312, "step": 139 }, { "epoch": 0.017201130359995084, "grad_norm": 0.4241345782718789, "learning_rate": 5.15970515970516e-06, "loss": 0.4781, "step": 140 }, { "epoch": 0.017323995576852193, "grad_norm": 0.4429951499440448, "learning_rate": 5.196560196560197e-06, "loss": 0.4664, "step": 141 }, { "epoch": 0.0174468607937093, "grad_norm": 0.42313581650206183, "learning_rate": 5.233415233415234e-06, "loss": 0.5131, "step": 142 }, { "epoch": 0.01756972601056641, "grad_norm": 0.4433155852623209, "learning_rate": 5.2702702702702705e-06, "loss": 0.4175, "step": 143 }, { "epoch": 0.017692591227423518, "grad_norm": 0.5362233946748364, "learning_rate": 5.307125307125307e-06, "loss": 0.5587, "step": 144 }, { "epoch": 0.017815456444280623, "grad_norm": 0.5209982706044964, "learning_rate": 5.3439803439803444e-06, "loss": 0.4214, "step": 145 }, { "epoch": 0.01793832166113773, "grad_norm": 0.4686225076436868, "learning_rate": 5.380835380835381e-06, "loss": 0.3935, "step": 146 }, { "epoch": 0.01806118687799484, "grad_norm": 0.6185227014611167, "learning_rate": 5.4176904176904175e-06, "loss": 0.4814, "step": 147 }, { "epoch": 0.01818405209485195, "grad_norm": 0.5081822440264036, "learning_rate": 5.4545454545454545e-06, "loss": 0.4344, "step": 148 }, { "epoch": 0.018306917311709057, "grad_norm": 0.3570689812115862, "learning_rate": 5.491400491400491e-06, "loss": 0.336, "step": 149 }, { "epoch": 0.018429782528566162, "grad_norm": 0.414303521563402, "learning_rate": 5.528255528255528e-06, "loss": 0.4267, "step": 150 }, { "epoch": 0.01855264774542327, "grad_norm": 0.4533539859474672, "learning_rate": 5.565110565110565e-06, "loss": 0.3601, "step": 151 }, { "epoch": 0.01867551296228038, "grad_norm": 0.4678052677049452, "learning_rate": 5.601965601965601e-06, "loss": 0.3672, "step": 152 }, { "epoch": 0.018798378179137487, "grad_norm": 0.42497512998337633, "learning_rate": 5.638820638820639e-06, "loss": 0.4372, "step": 153 }, { "epoch": 0.018921243395994596, "grad_norm": 0.4742466961676224, "learning_rate": 5.675675675675676e-06, "loss": 0.5023, "step": 154 }, { "epoch": 0.0190441086128517, "grad_norm": 0.3710902342334724, "learning_rate": 5.712530712530712e-06, "loss": 0.383, "step": 155 }, { "epoch": 0.01916697382970881, "grad_norm": 0.44962518742085394, "learning_rate": 5.749385749385749e-06, "loss": 0.3923, "step": 156 }, { "epoch": 0.019289839046565917, "grad_norm": 0.557161586681521, "learning_rate": 5.786240786240787e-06, "loss": 0.5055, "step": 157 }, { "epoch": 0.019412704263423026, "grad_norm": 0.5299289295977175, "learning_rate": 5.823095823095823e-06, "loss": 0.4431, "step": 158 }, { "epoch": 0.019535569480280134, "grad_norm": 0.522677045716278, "learning_rate": 5.85995085995086e-06, "loss": 0.4435, "step": 159 }, { "epoch": 0.01965843469713724, "grad_norm": 0.45237846364690026, "learning_rate": 5.896805896805896e-06, "loss": 0.4089, "step": 160 }, { "epoch": 0.019781299913994348, "grad_norm": 0.3808268921971058, "learning_rate": 5.933660933660934e-06, "loss": 0.3929, "step": 161 }, { "epoch": 0.019904165130851456, "grad_norm": 0.633783367934235, "learning_rate": 5.970515970515971e-06, "loss": 0.4727, "step": 162 }, { "epoch": 0.020027030347708565, "grad_norm": 0.45074709305536237, "learning_rate": 6.007371007371007e-06, "loss": 0.3651, "step": 163 }, { "epoch": 0.020149895564565673, "grad_norm": 0.5732348555701932, "learning_rate": 6.044226044226045e-06, "loss": 0.426, "step": 164 }, { "epoch": 0.020272760781422778, "grad_norm": 0.4027404381309523, "learning_rate": 6.081081081081082e-06, "loss": 0.3274, "step": 165 }, { "epoch": 0.020395625998279886, "grad_norm": 0.3727085847574403, "learning_rate": 6.117936117936118e-06, "loss": 0.4712, "step": 166 }, { "epoch": 0.020518491215136995, "grad_norm": 0.5604527851220885, "learning_rate": 6.154791154791155e-06, "loss": 0.4233, "step": 167 }, { "epoch": 0.020641356431994103, "grad_norm": 0.4853193532632929, "learning_rate": 6.191646191646192e-06, "loss": 0.3877, "step": 168 }, { "epoch": 0.02076422164885121, "grad_norm": 0.3871628328082718, "learning_rate": 6.228501228501229e-06, "loss": 0.3652, "step": 169 }, { "epoch": 0.020887086865708317, "grad_norm": 0.5411295721477973, "learning_rate": 6.265356265356266e-06, "loss": 0.4773, "step": 170 }, { "epoch": 0.021009952082565425, "grad_norm": 0.48194197819737433, "learning_rate": 6.302211302211302e-06, "loss": 0.4608, "step": 171 }, { "epoch": 0.021132817299422534, "grad_norm": 0.41251725133024253, "learning_rate": 6.33906633906634e-06, "loss": 0.4054, "step": 172 }, { "epoch": 0.021255682516279642, "grad_norm": 0.4462644294206386, "learning_rate": 6.3759213759213766e-06, "loss": 0.4145, "step": 173 }, { "epoch": 0.02137854773313675, "grad_norm": 0.4782564888472773, "learning_rate": 6.412776412776413e-06, "loss": 0.5514, "step": 174 }, { "epoch": 0.021501412949993855, "grad_norm": 0.4745979623707723, "learning_rate": 6.44963144963145e-06, "loss": 0.4132, "step": 175 }, { "epoch": 0.021624278166850964, "grad_norm": 0.47968991231945307, "learning_rate": 6.486486486486487e-06, "loss": 0.4972, "step": 176 }, { "epoch": 0.021747143383708072, "grad_norm": 0.5144312316005866, "learning_rate": 6.5233415233415235e-06, "loss": 0.4229, "step": 177 }, { "epoch": 0.02187000860056518, "grad_norm": 0.4856611971042782, "learning_rate": 6.5601965601965605e-06, "loss": 0.5235, "step": 178 }, { "epoch": 0.02199287381742229, "grad_norm": 0.3919456011246653, "learning_rate": 6.5970515970515966e-06, "loss": 0.346, "step": 179 }, { "epoch": 0.022115739034279394, "grad_norm": 0.4682985370363355, "learning_rate": 6.633906633906634e-06, "loss": 0.3714, "step": 180 }, { "epoch": 0.022238604251136503, "grad_norm": 0.40004784695180984, "learning_rate": 6.670761670761671e-06, "loss": 0.4386, "step": 181 }, { "epoch": 0.02236146946799361, "grad_norm": 0.508288736856916, "learning_rate": 6.707616707616707e-06, "loss": 0.4158, "step": 182 }, { "epoch": 0.02248433468485072, "grad_norm": 0.3288453519075374, "learning_rate": 6.744471744471744e-06, "loss": 0.3814, "step": 183 }, { "epoch": 0.022607199901707828, "grad_norm": 0.4609107695997207, "learning_rate": 6.781326781326782e-06, "loss": 0.4026, "step": 184 }, { "epoch": 0.022730065118564933, "grad_norm": 0.5700002267043061, "learning_rate": 6.818181818181818e-06, "loss": 0.4137, "step": 185 }, { "epoch": 0.02285293033542204, "grad_norm": 0.511221909835052, "learning_rate": 6.855036855036855e-06, "loss": 0.4614, "step": 186 }, { "epoch": 0.02297579555227915, "grad_norm": 0.378800073832921, "learning_rate": 6.891891891891892e-06, "loss": 0.3884, "step": 187 }, { "epoch": 0.023098660769136258, "grad_norm": 0.707640226194748, "learning_rate": 6.928746928746929e-06, "loss": 0.5055, "step": 188 }, { "epoch": 0.023221525985993367, "grad_norm": 0.41976873385124897, "learning_rate": 6.965601965601966e-06, "loss": 0.3933, "step": 189 }, { "epoch": 0.02334439120285047, "grad_norm": 0.43704209940764205, "learning_rate": 7.002457002457002e-06, "loss": 0.3936, "step": 190 }, { "epoch": 0.02346725641970758, "grad_norm": 0.44405702767082117, "learning_rate": 7.039312039312039e-06, "loss": 0.3387, "step": 191 }, { "epoch": 0.02359012163656469, "grad_norm": 0.40219114761254054, "learning_rate": 7.076167076167077e-06, "loss": 0.4235, "step": 192 }, { "epoch": 0.023712986853421797, "grad_norm": 0.41660183665292894, "learning_rate": 7.113022113022113e-06, "loss": 0.3322, "step": 193 }, { "epoch": 0.023835852070278905, "grad_norm": 0.3565730337521631, "learning_rate": 7.14987714987715e-06, "loss": 0.3787, "step": 194 }, { "epoch": 0.02395871728713601, "grad_norm": 0.4037105144359673, "learning_rate": 7.186732186732187e-06, "loss": 0.4336, "step": 195 }, { "epoch": 0.02408158250399312, "grad_norm": 0.45707145669389765, "learning_rate": 7.223587223587224e-06, "loss": 0.4593, "step": 196 }, { "epoch": 0.024204447720850227, "grad_norm": 0.4962638071024084, "learning_rate": 7.260442260442261e-06, "loss": 0.4428, "step": 197 }, { "epoch": 0.024327312937707336, "grad_norm": 0.469118496845936, "learning_rate": 7.297297297297298e-06, "loss": 0.4349, "step": 198 }, { "epoch": 0.024450178154564444, "grad_norm": 0.4126743019608096, "learning_rate": 7.334152334152334e-06, "loss": 0.3774, "step": 199 }, { "epoch": 0.02457304337142155, "grad_norm": 0.4266497410542221, "learning_rate": 7.371007371007372e-06, "loss": 0.3394, "step": 200 }, { "epoch": 0.024695908588278657, "grad_norm": 0.5727892194960817, "learning_rate": 7.407862407862408e-06, "loss": 0.4725, "step": 201 }, { "epoch": 0.024818773805135766, "grad_norm": 0.43359120771855497, "learning_rate": 7.444717444717445e-06, "loss": 0.4041, "step": 202 }, { "epoch": 0.024941639021992874, "grad_norm": 0.42626251356545425, "learning_rate": 7.481572481572482e-06, "loss": 0.4299, "step": 203 }, { "epoch": 0.025064504238849983, "grad_norm": 0.4640737318761848, "learning_rate": 7.5184275184275195e-06, "loss": 0.4058, "step": 204 }, { "epoch": 0.025187369455707088, "grad_norm": 0.46997444856704707, "learning_rate": 7.555282555282556e-06, "loss": 0.4635, "step": 205 }, { "epoch": 0.025310234672564196, "grad_norm": 0.49427200120146014, "learning_rate": 7.592137592137592e-06, "loss": 0.3495, "step": 206 }, { "epoch": 0.025433099889421305, "grad_norm": 0.41683409626733126, "learning_rate": 7.6289926289926295e-06, "loss": 0.4132, "step": 207 }, { "epoch": 0.025555965106278413, "grad_norm": 0.47945253895071027, "learning_rate": 7.665847665847666e-06, "loss": 0.4276, "step": 208 }, { "epoch": 0.02567883032313552, "grad_norm": 0.5090970115453091, "learning_rate": 7.702702702702703e-06, "loss": 0.4207, "step": 209 }, { "epoch": 0.025801695539992626, "grad_norm": 0.33770952371645496, "learning_rate": 7.739557739557741e-06, "loss": 0.3558, "step": 210 }, { "epoch": 0.025924560756849735, "grad_norm": 0.45147856930916075, "learning_rate": 7.776412776412776e-06, "loss": 0.408, "step": 211 }, { "epoch": 0.026047425973706843, "grad_norm": 0.41261254155905597, "learning_rate": 7.813267813267813e-06, "loss": 0.3418, "step": 212 }, { "epoch": 0.026170291190563952, "grad_norm": 0.4346368208941382, "learning_rate": 7.85012285012285e-06, "loss": 0.3938, "step": 213 }, { "epoch": 0.02629315640742106, "grad_norm": 0.41093572343901125, "learning_rate": 7.886977886977887e-06, "loss": 0.3905, "step": 214 }, { "epoch": 0.026416021624278165, "grad_norm": 0.4436996763505994, "learning_rate": 7.923832923832924e-06, "loss": 0.3979, "step": 215 }, { "epoch": 0.026538886841135274, "grad_norm": 0.4961749028516787, "learning_rate": 7.96068796068796e-06, "loss": 0.3588, "step": 216 }, { "epoch": 0.026661752057992382, "grad_norm": 0.82736792867698, "learning_rate": 7.997542997542998e-06, "loss": 0.4357, "step": 217 }, { "epoch": 0.02678461727484949, "grad_norm": 0.4383921352788302, "learning_rate": 8.034398034398035e-06, "loss": 0.3758, "step": 218 }, { "epoch": 0.0269074824917066, "grad_norm": 0.4300662478905699, "learning_rate": 8.07125307125307e-06, "loss": 0.3394, "step": 219 }, { "epoch": 0.027030347708563704, "grad_norm": 0.45647074841948776, "learning_rate": 8.108108108108109e-06, "loss": 0.4661, "step": 220 }, { "epoch": 0.027153212925420812, "grad_norm": 0.38758078844644683, "learning_rate": 8.144963144963144e-06, "loss": 0.3891, "step": 221 }, { "epoch": 0.02727607814227792, "grad_norm": 0.3919526215443392, "learning_rate": 8.181818181818181e-06, "loss": 0.4492, "step": 222 }, { "epoch": 0.02739894335913503, "grad_norm": 0.45690355077603534, "learning_rate": 8.21867321867322e-06, "loss": 0.4089, "step": 223 }, { "epoch": 0.027521808575992138, "grad_norm": 0.4422895341836523, "learning_rate": 8.255528255528255e-06, "loss": 0.3948, "step": 224 }, { "epoch": 0.027644673792849243, "grad_norm": 0.4670433295830671, "learning_rate": 8.292383292383292e-06, "loss": 0.3379, "step": 225 }, { "epoch": 0.02776753900970635, "grad_norm": 0.4060767531380582, "learning_rate": 8.32923832923833e-06, "loss": 0.3868, "step": 226 }, { "epoch": 0.02789040422656346, "grad_norm": 0.42484135982083016, "learning_rate": 8.366093366093366e-06, "loss": 0.3565, "step": 227 }, { "epoch": 0.028013269443420568, "grad_norm": 0.36791904077586995, "learning_rate": 8.402948402948403e-06, "loss": 0.3536, "step": 228 }, { "epoch": 0.028136134660277676, "grad_norm": 0.4602014037938782, "learning_rate": 8.43980343980344e-06, "loss": 0.3539, "step": 229 }, { "epoch": 0.028258999877134785, "grad_norm": 0.44579092026573697, "learning_rate": 8.476658476658477e-06, "loss": 0.5068, "step": 230 }, { "epoch": 0.02838186509399189, "grad_norm": 0.4017260469320796, "learning_rate": 8.513513513513514e-06, "loss": 0.4181, "step": 231 }, { "epoch": 0.028504730310849, "grad_norm": 0.4049257732356442, "learning_rate": 8.55036855036855e-06, "loss": 0.3818, "step": 232 }, { "epoch": 0.028627595527706107, "grad_norm": 0.47786695849146227, "learning_rate": 8.587223587223588e-06, "loss": 0.3442, "step": 233 }, { "epoch": 0.028750460744563215, "grad_norm": 0.46154272575544747, "learning_rate": 8.624078624078625e-06, "loss": 0.3963, "step": 234 }, { "epoch": 0.028873325961420324, "grad_norm": 0.34763678213115684, "learning_rate": 8.66093366093366e-06, "loss": 0.452, "step": 235 }, { "epoch": 0.02899619117827743, "grad_norm": 0.4647293208340207, "learning_rate": 8.697788697788699e-06, "loss": 0.4036, "step": 236 }, { "epoch": 0.029119056395134537, "grad_norm": 0.3659159486210594, "learning_rate": 8.734643734643734e-06, "loss": 0.3945, "step": 237 }, { "epoch": 0.029241921611991645, "grad_norm": 0.4085155394701318, "learning_rate": 8.77149877149877e-06, "loss": 0.3562, "step": 238 }, { "epoch": 0.029364786828848754, "grad_norm": 0.36757561770939373, "learning_rate": 8.80835380835381e-06, "loss": 0.3484, "step": 239 }, { "epoch": 0.029487652045705862, "grad_norm": 0.4305359023641455, "learning_rate": 8.845208845208845e-06, "loss": 0.3853, "step": 240 }, { "epoch": 0.029610517262562967, "grad_norm": 0.4034870192524002, "learning_rate": 8.882063882063882e-06, "loss": 0.4169, "step": 241 }, { "epoch": 0.029733382479420076, "grad_norm": 0.42685087984412634, "learning_rate": 8.91891891891892e-06, "loss": 0.4415, "step": 242 }, { "epoch": 0.029856247696277184, "grad_norm": 0.43864567552505396, "learning_rate": 8.955773955773956e-06, "loss": 0.4067, "step": 243 }, { "epoch": 0.029979112913134293, "grad_norm": 0.4353917657445975, "learning_rate": 8.992628992628992e-06, "loss": 0.394, "step": 244 }, { "epoch": 0.0301019781299914, "grad_norm": 0.4951476863161691, "learning_rate": 9.02948402948403e-06, "loss": 0.4004, "step": 245 }, { "epoch": 0.030224843346848506, "grad_norm": 0.4740701466756803, "learning_rate": 9.066339066339066e-06, "loss": 0.4296, "step": 246 }, { "epoch": 0.030347708563705614, "grad_norm": 0.4587923547471107, "learning_rate": 9.103194103194103e-06, "loss": 0.3678, "step": 247 }, { "epoch": 0.030470573780562723, "grad_norm": 0.3691100900504288, "learning_rate": 9.14004914004914e-06, "loss": 0.3704, "step": 248 }, { "epoch": 0.03059343899741983, "grad_norm": 0.4039246680693952, "learning_rate": 9.176904176904177e-06, "loss": 0.3231, "step": 249 }, { "epoch": 0.03071630421427694, "grad_norm": 0.46897749003598704, "learning_rate": 9.213759213759214e-06, "loss": 0.3575, "step": 250 }, { "epoch": 0.030839169431134045, "grad_norm": 0.5063118833753837, "learning_rate": 9.250614250614251e-06, "loss": 0.3692, "step": 251 }, { "epoch": 0.030962034647991153, "grad_norm": 0.40402143251763645, "learning_rate": 9.287469287469288e-06, "loss": 0.4593, "step": 252 }, { "epoch": 0.03108489986484826, "grad_norm": 0.4731686310347647, "learning_rate": 9.324324324324325e-06, "loss": 0.3839, "step": 253 }, { "epoch": 0.03120776508170537, "grad_norm": 0.38479202024792614, "learning_rate": 9.361179361179362e-06, "loss": 0.3498, "step": 254 }, { "epoch": 0.03133063029856248, "grad_norm": 0.4261949785539157, "learning_rate": 9.398034398034399e-06, "loss": 0.4086, "step": 255 }, { "epoch": 0.03145349551541959, "grad_norm": 0.4250943209384239, "learning_rate": 9.434889434889434e-06, "loss": 0.3901, "step": 256 }, { "epoch": 0.031576360732276695, "grad_norm": 0.5129479347381749, "learning_rate": 9.471744471744471e-06, "loss": 0.4899, "step": 257 }, { "epoch": 0.0316992259491338, "grad_norm": 0.7655576679676852, "learning_rate": 9.50859950859951e-06, "loss": 0.4526, "step": 258 }, { "epoch": 0.031822091165990905, "grad_norm": 0.4206757776232858, "learning_rate": 9.545454545454545e-06, "loss": 0.4357, "step": 259 }, { "epoch": 0.031944956382848014, "grad_norm": 0.37690279868351895, "learning_rate": 9.582309582309582e-06, "loss": 0.4126, "step": 260 }, { "epoch": 0.03206782159970512, "grad_norm": 0.4649332931401451, "learning_rate": 9.61916461916462e-06, "loss": 0.3846, "step": 261 }, { "epoch": 0.03219068681656223, "grad_norm": 0.529953470282815, "learning_rate": 9.656019656019656e-06, "loss": 0.3836, "step": 262 }, { "epoch": 0.03231355203341934, "grad_norm": 0.4242094277681987, "learning_rate": 9.692874692874693e-06, "loss": 0.4709, "step": 263 }, { "epoch": 0.03243641725027645, "grad_norm": 0.37954099812722136, "learning_rate": 9.72972972972973e-06, "loss": 0.385, "step": 264 }, { "epoch": 0.032559282467133556, "grad_norm": 0.4042808488073107, "learning_rate": 9.766584766584767e-06, "loss": 0.4783, "step": 265 }, { "epoch": 0.032682147683990664, "grad_norm": 0.3957202614512033, "learning_rate": 9.803439803439804e-06, "loss": 0.3956, "step": 266 }, { "epoch": 0.03280501290084777, "grad_norm": 0.3997447329796342, "learning_rate": 9.84029484029484e-06, "loss": 0.3789, "step": 267 }, { "epoch": 0.032927878117704874, "grad_norm": 0.48597525445854317, "learning_rate": 9.877149877149878e-06, "loss": 0.4801, "step": 268 }, { "epoch": 0.03305074333456198, "grad_norm": 0.43381472613379624, "learning_rate": 9.914004914004915e-06, "loss": 0.4494, "step": 269 }, { "epoch": 0.03317360855141909, "grad_norm": 0.38986923534434975, "learning_rate": 9.950859950859952e-06, "loss": 0.4398, "step": 270 }, { "epoch": 0.0332964737682762, "grad_norm": 0.4743286750386871, "learning_rate": 9.987714987714989e-06, "loss": 0.4054, "step": 271 }, { "epoch": 0.03341933898513331, "grad_norm": 0.43570123566601066, "learning_rate": 1.0024570024570024e-05, "loss": 0.4409, "step": 272 }, { "epoch": 0.033542204201990417, "grad_norm": 0.43237079848509175, "learning_rate": 1.0061425061425062e-05, "loss": 0.4613, "step": 273 }, { "epoch": 0.033665069418847525, "grad_norm": 0.4220407126009151, "learning_rate": 1.00982800982801e-05, "loss": 0.3602, "step": 274 }, { "epoch": 0.03378793463570463, "grad_norm": 0.4621139797356595, "learning_rate": 1.0135135135135135e-05, "loss": 0.3608, "step": 275 }, { "epoch": 0.03391079985256174, "grad_norm": 0.4747945154220299, "learning_rate": 1.0171990171990172e-05, "loss": 0.388, "step": 276 }, { "epoch": 0.03403366506941885, "grad_norm": 0.5558459661293613, "learning_rate": 1.020884520884521e-05, "loss": 0.4346, "step": 277 }, { "epoch": 0.03415653028627596, "grad_norm": 0.5003640793957003, "learning_rate": 1.0245700245700245e-05, "loss": 0.4202, "step": 278 }, { "epoch": 0.03427939550313306, "grad_norm": 0.46057241382880204, "learning_rate": 1.0282555282555282e-05, "loss": 0.415, "step": 279 }, { "epoch": 0.03440226071999017, "grad_norm": 0.414479398414628, "learning_rate": 1.031941031941032e-05, "loss": 0.3926, "step": 280 }, { "epoch": 0.03452512593684728, "grad_norm": 0.3951062489442739, "learning_rate": 1.0356265356265356e-05, "loss": 0.3957, "step": 281 }, { "epoch": 0.034647991153704386, "grad_norm": 0.6504663421206138, "learning_rate": 1.0393120393120393e-05, "loss": 0.4894, "step": 282 }, { "epoch": 0.034770856370561494, "grad_norm": 0.9224041342329022, "learning_rate": 1.042997542997543e-05, "loss": 0.4308, "step": 283 }, { "epoch": 0.0348937215874186, "grad_norm": 0.4461551202709316, "learning_rate": 1.0466830466830467e-05, "loss": 0.4432, "step": 284 }, { "epoch": 0.03501658680427571, "grad_norm": 2.091322786346542, "learning_rate": 1.0503685503685504e-05, "loss": 0.474, "step": 285 }, { "epoch": 0.03513945202113282, "grad_norm": 0.5030951338932765, "learning_rate": 1.0540540540540541e-05, "loss": 0.419, "step": 286 }, { "epoch": 0.03526231723798993, "grad_norm": 0.37225971304454974, "learning_rate": 1.0577395577395578e-05, "loss": 0.3996, "step": 287 }, { "epoch": 0.035385182454847036, "grad_norm": 0.49000123494142184, "learning_rate": 1.0614250614250613e-05, "loss": 0.3899, "step": 288 }, { "epoch": 0.03550804767170414, "grad_norm": 0.384549213323594, "learning_rate": 1.0651105651105652e-05, "loss": 0.4195, "step": 289 }, { "epoch": 0.035630912888561246, "grad_norm": 0.42935710800156485, "learning_rate": 1.0687960687960689e-05, "loss": 0.36, "step": 290 }, { "epoch": 0.035753778105418355, "grad_norm": 0.4385789935445074, "learning_rate": 1.0724815724815724e-05, "loss": 0.3518, "step": 291 }, { "epoch": 0.03587664332227546, "grad_norm": 0.4533967924355237, "learning_rate": 1.0761670761670763e-05, "loss": 0.4273, "step": 292 }, { "epoch": 0.03599950853913257, "grad_norm": 0.34928847245972344, "learning_rate": 1.07985257985258e-05, "loss": 0.4044, "step": 293 }, { "epoch": 0.03612237375598968, "grad_norm": 0.5006375272200118, "learning_rate": 1.0835380835380835e-05, "loss": 0.3328, "step": 294 }, { "epoch": 0.03624523897284679, "grad_norm": 0.3735894234886142, "learning_rate": 1.0872235872235874e-05, "loss": 0.3663, "step": 295 }, { "epoch": 0.0363681041897039, "grad_norm": 0.47483725368208324, "learning_rate": 1.0909090909090909e-05, "loss": 0.4317, "step": 296 }, { "epoch": 0.036490969406561005, "grad_norm": 0.5130164675642208, "learning_rate": 1.0945945945945946e-05, "loss": 0.4328, "step": 297 }, { "epoch": 0.036613834623418114, "grad_norm": 0.4102202876196586, "learning_rate": 1.0982800982800983e-05, "loss": 0.4119, "step": 298 }, { "epoch": 0.036736699840275215, "grad_norm": 0.4080302957757013, "learning_rate": 1.101965601965602e-05, "loss": 0.398, "step": 299 }, { "epoch": 0.036859565057132324, "grad_norm": 0.38539977753261506, "learning_rate": 1.1056511056511057e-05, "loss": 0.4006, "step": 300 }, { "epoch": 0.03698243027398943, "grad_norm": 0.3789512267734138, "learning_rate": 1.1093366093366094e-05, "loss": 0.3759, "step": 301 }, { "epoch": 0.03710529549084654, "grad_norm": 0.4597457092099576, "learning_rate": 1.113022113022113e-05, "loss": 0.3921, "step": 302 }, { "epoch": 0.03722816070770365, "grad_norm": 0.4019701522182803, "learning_rate": 1.1167076167076168e-05, "loss": 0.4293, "step": 303 }, { "epoch": 0.03735102592456076, "grad_norm": 0.4486962962400255, "learning_rate": 1.1203931203931203e-05, "loss": 0.4174, "step": 304 }, { "epoch": 0.037473891141417866, "grad_norm": 0.4248245939563311, "learning_rate": 1.1240786240786241e-05, "loss": 0.475, "step": 305 }, { "epoch": 0.037596756358274974, "grad_norm": 0.48571679376133875, "learning_rate": 1.1277641277641278e-05, "loss": 0.3342, "step": 306 }, { "epoch": 0.03771962157513208, "grad_norm": 0.4725129955675953, "learning_rate": 1.1314496314496314e-05, "loss": 0.3985, "step": 307 }, { "epoch": 0.03784248679198919, "grad_norm": 0.4067570722785139, "learning_rate": 1.1351351351351352e-05, "loss": 0.4641, "step": 308 }, { "epoch": 0.03796535200884629, "grad_norm": 0.4064230427946474, "learning_rate": 1.138820638820639e-05, "loss": 0.351, "step": 309 }, { "epoch": 0.0380882172257034, "grad_norm": 0.4668741346958155, "learning_rate": 1.1425061425061425e-05, "loss": 0.4134, "step": 310 }, { "epoch": 0.03821108244256051, "grad_norm": 0.44990032899971344, "learning_rate": 1.1461916461916463e-05, "loss": 0.4116, "step": 311 }, { "epoch": 0.03833394765941762, "grad_norm": 0.37959447055488044, "learning_rate": 1.1498771498771498e-05, "loss": 0.3899, "step": 312 }, { "epoch": 0.038456812876274726, "grad_norm": 0.3858970555422907, "learning_rate": 1.1535626535626535e-05, "loss": 0.3184, "step": 313 }, { "epoch": 0.038579678093131835, "grad_norm": 0.5869224709139494, "learning_rate": 1.1572481572481574e-05, "loss": 0.3861, "step": 314 }, { "epoch": 0.03870254330998894, "grad_norm": 0.4066113511878084, "learning_rate": 1.160933660933661e-05, "loss": 0.3343, "step": 315 }, { "epoch": 0.03882540852684605, "grad_norm": 0.39895233151740417, "learning_rate": 1.1646191646191646e-05, "loss": 0.3534, "step": 316 }, { "epoch": 0.03894827374370316, "grad_norm": 0.4555249482074185, "learning_rate": 1.1683046683046683e-05, "loss": 0.3982, "step": 317 }, { "epoch": 0.03907113896056027, "grad_norm": 0.48914544506372465, "learning_rate": 1.171990171990172e-05, "loss": 0.4311, "step": 318 }, { "epoch": 0.03919400417741737, "grad_norm": 0.44821974201959014, "learning_rate": 1.1756756756756757e-05, "loss": 0.449, "step": 319 }, { "epoch": 0.03931686939427448, "grad_norm": 0.4645341802957626, "learning_rate": 1.1793611793611792e-05, "loss": 0.4225, "step": 320 }, { "epoch": 0.03943973461113159, "grad_norm": 0.4880729762581246, "learning_rate": 1.1830466830466831e-05, "loss": 0.4481, "step": 321 }, { "epoch": 0.039562599827988695, "grad_norm": 0.3323191055325133, "learning_rate": 1.1867321867321868e-05, "loss": 0.3533, "step": 322 }, { "epoch": 0.039685465044845804, "grad_norm": 0.5895205171624047, "learning_rate": 1.1904176904176903e-05, "loss": 0.4743, "step": 323 }, { "epoch": 0.03980833026170291, "grad_norm": 0.4176330069921961, "learning_rate": 1.1941031941031942e-05, "loss": 0.4259, "step": 324 }, { "epoch": 0.03993119547856002, "grad_norm": 0.7041968959109467, "learning_rate": 1.1977886977886979e-05, "loss": 0.4802, "step": 325 }, { "epoch": 0.04005406069541713, "grad_norm": 0.4250347660849787, "learning_rate": 1.2014742014742014e-05, "loss": 0.3532, "step": 326 }, { "epoch": 0.04017692591227424, "grad_norm": 0.4306314225272278, "learning_rate": 1.2051597051597053e-05, "loss": 0.4531, "step": 327 }, { "epoch": 0.040299791129131346, "grad_norm": 0.5691811627963269, "learning_rate": 1.208845208845209e-05, "loss": 0.4266, "step": 328 }, { "epoch": 0.04042265634598845, "grad_norm": 0.3843169288578875, "learning_rate": 1.2125307125307125e-05, "loss": 0.4491, "step": 329 }, { "epoch": 0.040545521562845556, "grad_norm": 0.3997045215047791, "learning_rate": 1.2162162162162164e-05, "loss": 0.3599, "step": 330 }, { "epoch": 0.040668386779702664, "grad_norm": 0.3791962589107743, "learning_rate": 1.2199017199017199e-05, "loss": 0.3881, "step": 331 }, { "epoch": 0.04079125199655977, "grad_norm": 0.478584953633015, "learning_rate": 1.2235872235872236e-05, "loss": 0.4069, "step": 332 }, { "epoch": 0.04091411721341688, "grad_norm": 0.4950451022516096, "learning_rate": 1.2272727272727274e-05, "loss": 0.4119, "step": 333 }, { "epoch": 0.04103698243027399, "grad_norm": 0.40390571883433257, "learning_rate": 1.230958230958231e-05, "loss": 0.3975, "step": 334 }, { "epoch": 0.0411598476471311, "grad_norm": 0.4775460080491238, "learning_rate": 1.2346437346437347e-05, "loss": 0.4541, "step": 335 }, { "epoch": 0.04128271286398821, "grad_norm": 0.5444437093423099, "learning_rate": 1.2383292383292384e-05, "loss": 0.4822, "step": 336 }, { "epoch": 0.041405578080845315, "grad_norm": 0.5175789547696228, "learning_rate": 1.242014742014742e-05, "loss": 0.4309, "step": 337 }, { "epoch": 0.04152844329770242, "grad_norm": 0.4369577727045973, "learning_rate": 1.2457002457002457e-05, "loss": 0.404, "step": 338 }, { "epoch": 0.041651308514559525, "grad_norm": 0.4415926201303459, "learning_rate": 1.2493857493857493e-05, "loss": 0.363, "step": 339 }, { "epoch": 0.04177417373141663, "grad_norm": 0.5813197593361984, "learning_rate": 1.2530712530712531e-05, "loss": 0.4747, "step": 340 }, { "epoch": 0.04189703894827374, "grad_norm": 0.38845173430237434, "learning_rate": 1.2567567567567568e-05, "loss": 0.4163, "step": 341 }, { "epoch": 0.04201990416513085, "grad_norm": 0.40026979173266, "learning_rate": 1.2604422604422604e-05, "loss": 0.4416, "step": 342 }, { "epoch": 0.04214276938198796, "grad_norm": 0.5107262195041659, "learning_rate": 1.2641277641277642e-05, "loss": 0.4067, "step": 343 }, { "epoch": 0.04226563459884507, "grad_norm": 0.47142402576678677, "learning_rate": 1.267813267813268e-05, "loss": 0.4444, "step": 344 }, { "epoch": 0.042388499815702176, "grad_norm": 0.550007258345193, "learning_rate": 1.2714987714987714e-05, "loss": 0.3695, "step": 345 }, { "epoch": 0.042511365032559284, "grad_norm": 0.4756544848828658, "learning_rate": 1.2751842751842753e-05, "loss": 0.4298, "step": 346 }, { "epoch": 0.04263423024941639, "grad_norm": 0.40344399507923445, "learning_rate": 1.2788697788697788e-05, "loss": 0.3649, "step": 347 }, { "epoch": 0.0427570954662735, "grad_norm": 0.5018660681328381, "learning_rate": 1.2825552825552825e-05, "loss": 0.4564, "step": 348 }, { "epoch": 0.0428799606831306, "grad_norm": 0.4324408270575211, "learning_rate": 1.2862407862407864e-05, "loss": 0.4845, "step": 349 }, { "epoch": 0.04300282589998771, "grad_norm": 0.4451952713809081, "learning_rate": 1.28992628992629e-05, "loss": 0.3403, "step": 350 }, { "epoch": 0.04312569111684482, "grad_norm": 0.442280841437203, "learning_rate": 1.2936117936117936e-05, "loss": 0.3867, "step": 351 }, { "epoch": 0.04324855633370193, "grad_norm": 0.4337637969752764, "learning_rate": 1.2972972972972975e-05, "loss": 0.3638, "step": 352 }, { "epoch": 0.043371421550559036, "grad_norm": 0.4034348954395848, "learning_rate": 1.300982800982801e-05, "loss": 0.5379, "step": 353 }, { "epoch": 0.043494286767416145, "grad_norm": 0.49071525928387577, "learning_rate": 1.3046683046683047e-05, "loss": 0.3079, "step": 354 }, { "epoch": 0.04361715198427325, "grad_norm": 0.46060005646816127, "learning_rate": 1.3083538083538084e-05, "loss": 0.3826, "step": 355 }, { "epoch": 0.04374001720113036, "grad_norm": 0.7112264028546806, "learning_rate": 1.3120393120393121e-05, "loss": 0.5027, "step": 356 }, { "epoch": 0.04386288241798747, "grad_norm": 0.4063237532344088, "learning_rate": 1.3157248157248158e-05, "loss": 0.342, "step": 357 }, { "epoch": 0.04398574763484458, "grad_norm": 0.38190707735075763, "learning_rate": 1.3194103194103193e-05, "loss": 0.4151, "step": 358 }, { "epoch": 0.04410861285170168, "grad_norm": 0.44819566237076697, "learning_rate": 1.3230958230958232e-05, "loss": 0.3561, "step": 359 }, { "epoch": 0.04423147806855879, "grad_norm": 0.483652080727959, "learning_rate": 1.3267813267813269e-05, "loss": 0.5047, "step": 360 }, { "epoch": 0.0443543432854159, "grad_norm": 0.5094468112534715, "learning_rate": 1.3304668304668304e-05, "loss": 0.4479, "step": 361 }, { "epoch": 0.044477208502273005, "grad_norm": 0.40211088947952495, "learning_rate": 1.3341523341523343e-05, "loss": 0.3436, "step": 362 }, { "epoch": 0.044600073719130114, "grad_norm": 0.3975526414289062, "learning_rate": 1.3378378378378378e-05, "loss": 0.4679, "step": 363 }, { "epoch": 0.04472293893598722, "grad_norm": 0.4013618525833683, "learning_rate": 1.3415233415233415e-05, "loss": 0.3459, "step": 364 }, { "epoch": 0.04484580415284433, "grad_norm": 0.4553159871865281, "learning_rate": 1.3452088452088453e-05, "loss": 0.4527, "step": 365 }, { "epoch": 0.04496866936970144, "grad_norm": 0.4138873777296668, "learning_rate": 1.3488943488943489e-05, "loss": 0.3957, "step": 366 }, { "epoch": 0.04509153458655855, "grad_norm": 0.4138323211829703, "learning_rate": 1.3525798525798526e-05, "loss": 0.3983, "step": 367 }, { "epoch": 0.045214399803415656, "grad_norm": 0.39675024458956065, "learning_rate": 1.3562653562653564e-05, "loss": 0.3517, "step": 368 }, { "epoch": 0.045337265020272764, "grad_norm": 0.37125964901323416, "learning_rate": 1.35995085995086e-05, "loss": 0.3096, "step": 369 }, { "epoch": 0.045460130237129866, "grad_norm": 0.45215631801774214, "learning_rate": 1.3636363636363637e-05, "loss": 0.3886, "step": 370 }, { "epoch": 0.045582995453986974, "grad_norm": 0.5477035434581109, "learning_rate": 1.3673218673218674e-05, "loss": 0.4414, "step": 371 }, { "epoch": 0.04570586067084408, "grad_norm": 0.41453116507112586, "learning_rate": 1.371007371007371e-05, "loss": 0.423, "step": 372 }, { "epoch": 0.04582872588770119, "grad_norm": 0.40468738648965413, "learning_rate": 1.3746928746928747e-05, "loss": 0.3819, "step": 373 }, { "epoch": 0.0459515911045583, "grad_norm": 0.3753263616371913, "learning_rate": 1.3783783783783784e-05, "loss": 0.3572, "step": 374 }, { "epoch": 0.04607445632141541, "grad_norm": 0.4927521819012302, "learning_rate": 1.3820638820638821e-05, "loss": 0.4629, "step": 375 }, { "epoch": 0.046197321538272516, "grad_norm": 0.37782545771823906, "learning_rate": 1.3857493857493858e-05, "loss": 0.3118, "step": 376 }, { "epoch": 0.046320186755129625, "grad_norm": 0.40872057602377004, "learning_rate": 1.3894348894348894e-05, "loss": 0.4143, "step": 377 }, { "epoch": 0.04644305197198673, "grad_norm": 0.39330197652825827, "learning_rate": 1.3931203931203932e-05, "loss": 0.4416, "step": 378 }, { "epoch": 0.04656591718884384, "grad_norm": 0.5069576214824641, "learning_rate": 1.3968058968058967e-05, "loss": 0.4023, "step": 379 }, { "epoch": 0.04668878240570094, "grad_norm": 0.3985088236270232, "learning_rate": 1.4004914004914004e-05, "loss": 0.3499, "step": 380 }, { "epoch": 0.04681164762255805, "grad_norm": 0.6221891243432423, "learning_rate": 1.4041769041769043e-05, "loss": 0.3851, "step": 381 }, { "epoch": 0.04693451283941516, "grad_norm": 0.5376251558362877, "learning_rate": 1.4078624078624078e-05, "loss": 0.4209, "step": 382 }, { "epoch": 0.04705737805627227, "grad_norm": 0.4091883674962394, "learning_rate": 1.4115479115479115e-05, "loss": 0.4364, "step": 383 }, { "epoch": 0.04718024327312938, "grad_norm": 0.37162865109285786, "learning_rate": 1.4152334152334154e-05, "loss": 0.345, "step": 384 }, { "epoch": 0.047303108489986485, "grad_norm": 0.42592513480335226, "learning_rate": 1.4189189189189189e-05, "loss": 0.3669, "step": 385 }, { "epoch": 0.047425973706843594, "grad_norm": 0.4485976572429992, "learning_rate": 1.4226044226044226e-05, "loss": 0.4113, "step": 386 }, { "epoch": 0.0475488389237007, "grad_norm": 0.39077314161704846, "learning_rate": 1.4262899262899263e-05, "loss": 0.4198, "step": 387 }, { "epoch": 0.04767170414055781, "grad_norm": 0.41159441117045265, "learning_rate": 1.42997542997543e-05, "loss": 0.4112, "step": 388 }, { "epoch": 0.04779456935741492, "grad_norm": 0.46707179543132743, "learning_rate": 1.4336609336609337e-05, "loss": 0.4027, "step": 389 }, { "epoch": 0.04791743457427202, "grad_norm": 0.3749876771242115, "learning_rate": 1.4373464373464374e-05, "loss": 0.4126, "step": 390 }, { "epoch": 0.04804029979112913, "grad_norm": 0.37659005055986006, "learning_rate": 1.441031941031941e-05, "loss": 0.3822, "step": 391 }, { "epoch": 0.04816316500798624, "grad_norm": 0.4707389663653198, "learning_rate": 1.4447174447174448e-05, "loss": 0.4453, "step": 392 }, { "epoch": 0.048286030224843346, "grad_norm": 0.458704949406905, "learning_rate": 1.4484029484029485e-05, "loss": 0.3807, "step": 393 }, { "epoch": 0.048408895441700454, "grad_norm": 0.4348033398790392, "learning_rate": 1.4520884520884522e-05, "loss": 0.379, "step": 394 }, { "epoch": 0.04853176065855756, "grad_norm": 0.5268506325666469, "learning_rate": 1.4557739557739557e-05, "loss": 0.4217, "step": 395 }, { "epoch": 0.04865462587541467, "grad_norm": 0.3414365283329993, "learning_rate": 1.4594594594594596e-05, "loss": 0.3041, "step": 396 }, { "epoch": 0.04877749109227178, "grad_norm": 0.39488379933067735, "learning_rate": 1.4631449631449633e-05, "loss": 0.3926, "step": 397 }, { "epoch": 0.04890035630912889, "grad_norm": 0.45550787731996223, "learning_rate": 1.4668304668304668e-05, "loss": 0.3738, "step": 398 }, { "epoch": 0.049023221525986, "grad_norm": 0.3608546303498292, "learning_rate": 1.4705159705159705e-05, "loss": 0.336, "step": 399 }, { "epoch": 0.0491460867428431, "grad_norm": 0.39326652008642893, "learning_rate": 1.4742014742014743e-05, "loss": 0.3587, "step": 400 }, { "epoch": 0.049268951959700207, "grad_norm": 0.364617594992912, "learning_rate": 1.4778869778869779e-05, "loss": 0.3567, "step": 401 }, { "epoch": 0.049391817176557315, "grad_norm": 0.32434328191746337, "learning_rate": 1.4815724815724816e-05, "loss": 0.3908, "step": 402 }, { "epoch": 0.04951468239341442, "grad_norm": 0.442545538107822, "learning_rate": 1.4852579852579853e-05, "loss": 0.3846, "step": 403 }, { "epoch": 0.04963754761027153, "grad_norm": 0.4148981157889746, "learning_rate": 1.488943488943489e-05, "loss": 0.4539, "step": 404 }, { "epoch": 0.04976041282712864, "grad_norm": 0.4898657163474319, "learning_rate": 1.4926289926289926e-05, "loss": 0.4189, "step": 405 }, { "epoch": 0.04988327804398575, "grad_norm": 0.462010487552032, "learning_rate": 1.4963144963144963e-05, "loss": 0.417, "step": 406 }, { "epoch": 0.05000614326084286, "grad_norm": 0.4595562004757287, "learning_rate": 1.5e-05, "loss": 0.3816, "step": 407 }, { "epoch": 0.050129008477699966, "grad_norm": 0.3957343113332706, "learning_rate": 1.5036855036855039e-05, "loss": 0.3749, "step": 408 }, { "epoch": 0.050251873694557074, "grad_norm": 0.44423186091769207, "learning_rate": 1.5073710073710073e-05, "loss": 0.3704, "step": 409 }, { "epoch": 0.050374738911414176, "grad_norm": 0.47972525806341426, "learning_rate": 1.5110565110565111e-05, "loss": 0.3369, "step": 410 }, { "epoch": 0.050497604128271284, "grad_norm": 0.4940382958419242, "learning_rate": 1.5147420147420148e-05, "loss": 0.3698, "step": 411 }, { "epoch": 0.05062046934512839, "grad_norm": 0.3281231718531294, "learning_rate": 1.5184275184275183e-05, "loss": 0.3726, "step": 412 }, { "epoch": 0.0507433345619855, "grad_norm": 0.40555683461330216, "learning_rate": 1.5221130221130222e-05, "loss": 0.3563, "step": 413 }, { "epoch": 0.05086619977884261, "grad_norm": 0.3935479097368003, "learning_rate": 1.5257985257985259e-05, "loss": 0.3899, "step": 414 }, { "epoch": 0.05098906499569972, "grad_norm": 0.5351045159305292, "learning_rate": 1.5294840294840294e-05, "loss": 0.4325, "step": 415 }, { "epoch": 0.051111930212556826, "grad_norm": 0.49810319408295806, "learning_rate": 1.533169533169533e-05, "loss": 0.4242, "step": 416 }, { "epoch": 0.051234795429413935, "grad_norm": 0.42898757737880155, "learning_rate": 1.536855036855037e-05, "loss": 0.3453, "step": 417 }, { "epoch": 0.05135766064627104, "grad_norm": 0.43909748866550913, "learning_rate": 1.5405405405405405e-05, "loss": 0.3511, "step": 418 }, { "epoch": 0.05148052586312815, "grad_norm": 0.47338622822937143, "learning_rate": 1.5442260442260442e-05, "loss": 0.411, "step": 419 }, { "epoch": 0.05160339107998525, "grad_norm": 0.5228209146269727, "learning_rate": 1.5479115479115482e-05, "loss": 0.4466, "step": 420 }, { "epoch": 0.05172625629684236, "grad_norm": 0.4614448816830962, "learning_rate": 1.5515970515970516e-05, "loss": 0.4925, "step": 421 }, { "epoch": 0.05184912151369947, "grad_norm": 0.4585972060635247, "learning_rate": 1.5552825552825553e-05, "loss": 0.4192, "step": 422 }, { "epoch": 0.05197198673055658, "grad_norm": 0.44249451863760947, "learning_rate": 1.5589680589680593e-05, "loss": 0.4403, "step": 423 }, { "epoch": 0.05209485194741369, "grad_norm": 0.4987824832074661, "learning_rate": 1.5626535626535627e-05, "loss": 0.4908, "step": 424 }, { "epoch": 0.052217717164270795, "grad_norm": 0.41861358366024487, "learning_rate": 1.5663390663390664e-05, "loss": 0.3832, "step": 425 }, { "epoch": 0.052340582381127904, "grad_norm": 0.4359499819877769, "learning_rate": 1.57002457002457e-05, "loss": 0.3615, "step": 426 }, { "epoch": 0.05246344759798501, "grad_norm": 0.46605973154069363, "learning_rate": 1.5737100737100738e-05, "loss": 0.4081, "step": 427 }, { "epoch": 0.05258631281484212, "grad_norm": 0.5331322454235369, "learning_rate": 1.5773955773955775e-05, "loss": 0.3978, "step": 428 }, { "epoch": 0.05270917803169923, "grad_norm": 0.4887610480464572, "learning_rate": 1.5810810810810808e-05, "loss": 0.3692, "step": 429 }, { "epoch": 0.05283204324855633, "grad_norm": 0.3501324121253161, "learning_rate": 1.584766584766585e-05, "loss": 0.3353, "step": 430 }, { "epoch": 0.05295490846541344, "grad_norm": 0.3727431879471768, "learning_rate": 1.5884520884520886e-05, "loss": 0.3666, "step": 431 }, { "epoch": 0.05307777368227055, "grad_norm": 0.49758865934670604, "learning_rate": 1.592137592137592e-05, "loss": 0.4199, "step": 432 }, { "epoch": 0.053200638899127656, "grad_norm": 0.45731214842815665, "learning_rate": 1.595823095823096e-05, "loss": 0.428, "step": 433 }, { "epoch": 0.053323504115984764, "grad_norm": 0.39126440731353773, "learning_rate": 1.5995085995085996e-05, "loss": 0.458, "step": 434 }, { "epoch": 0.05344636933284187, "grad_norm": 0.4658622906231354, "learning_rate": 1.603194103194103e-05, "loss": 0.427, "step": 435 }, { "epoch": 0.05356923454969898, "grad_norm": 0.3658865126330229, "learning_rate": 1.606879606879607e-05, "loss": 0.4251, "step": 436 }, { "epoch": 0.05369209976655609, "grad_norm": 0.6153074824688815, "learning_rate": 1.6105651105651107e-05, "loss": 0.5075, "step": 437 }, { "epoch": 0.0538149649834132, "grad_norm": 0.44470704447495035, "learning_rate": 1.614250614250614e-05, "loss": 0.4321, "step": 438 }, { "epoch": 0.053937830200270306, "grad_norm": 0.43940872661253755, "learning_rate": 1.617936117936118e-05, "loss": 0.3849, "step": 439 }, { "epoch": 0.05406069541712741, "grad_norm": 0.43015335403634786, "learning_rate": 1.6216216216216218e-05, "loss": 0.3801, "step": 440 }, { "epoch": 0.054183560633984516, "grad_norm": 0.3976389439077302, "learning_rate": 1.625307125307125e-05, "loss": 0.4302, "step": 441 }, { "epoch": 0.054306425850841625, "grad_norm": 0.5950356397096763, "learning_rate": 1.628992628992629e-05, "loss": 0.5473, "step": 442 }, { "epoch": 0.05442929106769873, "grad_norm": 0.3850535463012881, "learning_rate": 1.632678132678133e-05, "loss": 0.4069, "step": 443 }, { "epoch": 0.05455215628455584, "grad_norm": 0.5172117421928707, "learning_rate": 1.6363636363636363e-05, "loss": 0.3638, "step": 444 }, { "epoch": 0.05467502150141295, "grad_norm": 0.42325554752907196, "learning_rate": 1.64004914004914e-05, "loss": 0.3833, "step": 445 }, { "epoch": 0.05479788671827006, "grad_norm": 0.498702822968175, "learning_rate": 1.643734643734644e-05, "loss": 0.3603, "step": 446 }, { "epoch": 0.05492075193512717, "grad_norm": 0.40615081773667217, "learning_rate": 1.6474201474201473e-05, "loss": 0.3695, "step": 447 }, { "epoch": 0.055043617151984275, "grad_norm": 0.4602967464608959, "learning_rate": 1.651105651105651e-05, "loss": 0.4183, "step": 448 }, { "epoch": 0.055166482368841384, "grad_norm": 0.9183826744672624, "learning_rate": 1.654791154791155e-05, "loss": 0.544, "step": 449 }, { "epoch": 0.055289347585698485, "grad_norm": 0.3847111123923237, "learning_rate": 1.6584766584766584e-05, "loss": 0.3385, "step": 450 }, { "epoch": 0.055412212802555594, "grad_norm": 0.3772410833050005, "learning_rate": 1.662162162162162e-05, "loss": 0.3308, "step": 451 }, { "epoch": 0.0555350780194127, "grad_norm": 0.5718875337878215, "learning_rate": 1.665847665847666e-05, "loss": 0.4835, "step": 452 }, { "epoch": 0.05565794323626981, "grad_norm": 0.4054976316295017, "learning_rate": 1.6695331695331695e-05, "loss": 0.4389, "step": 453 }, { "epoch": 0.05578080845312692, "grad_norm": 0.5000820405899831, "learning_rate": 1.6732186732186732e-05, "loss": 0.4651, "step": 454 }, { "epoch": 0.05590367366998403, "grad_norm": 0.3075085442389403, "learning_rate": 1.6769041769041772e-05, "loss": 0.3852, "step": 455 }, { "epoch": 0.056026538886841136, "grad_norm": 0.43025048687265277, "learning_rate": 1.6805896805896806e-05, "loss": 0.4618, "step": 456 }, { "epoch": 0.056149404103698244, "grad_norm": 0.5683060457279632, "learning_rate": 1.6842751842751843e-05, "loss": 0.5182, "step": 457 }, { "epoch": 0.05627226932055535, "grad_norm": 0.46291002540866577, "learning_rate": 1.687960687960688e-05, "loss": 0.3654, "step": 458 }, { "epoch": 0.05639513453741246, "grad_norm": 0.4033949693672396, "learning_rate": 1.6916461916461917e-05, "loss": 0.4046, "step": 459 }, { "epoch": 0.05651799975426957, "grad_norm": 0.5019227852492792, "learning_rate": 1.6953316953316954e-05, "loss": 0.4198, "step": 460 }, { "epoch": 0.05664086497112667, "grad_norm": 0.3954082242992044, "learning_rate": 1.699017199017199e-05, "loss": 0.4494, "step": 461 }, { "epoch": 0.05676373018798378, "grad_norm": 0.5136658256355038, "learning_rate": 1.7027027027027028e-05, "loss": 0.3769, "step": 462 }, { "epoch": 0.05688659540484089, "grad_norm": 0.45115496041764475, "learning_rate": 1.7063882063882065e-05, "loss": 0.3856, "step": 463 }, { "epoch": 0.057009460621698, "grad_norm": 0.5068098817669896, "learning_rate": 1.71007371007371e-05, "loss": 0.3831, "step": 464 }, { "epoch": 0.057132325838555105, "grad_norm": 0.38866758723489003, "learning_rate": 1.713759213759214e-05, "loss": 0.4372, "step": 465 }, { "epoch": 0.05725519105541221, "grad_norm": 0.44705935209171277, "learning_rate": 1.7174447174447175e-05, "loss": 0.3265, "step": 466 }, { "epoch": 0.05737805627226932, "grad_norm": 0.40105688940491957, "learning_rate": 1.7211302211302212e-05, "loss": 0.3882, "step": 467 }, { "epoch": 0.05750092148912643, "grad_norm": 0.3625537922735169, "learning_rate": 1.724815724815725e-05, "loss": 0.3708, "step": 468 }, { "epoch": 0.05762378670598354, "grad_norm": 0.5887274456214979, "learning_rate": 1.7285012285012286e-05, "loss": 0.4555, "step": 469 }, { "epoch": 0.05774665192284065, "grad_norm": 0.45349557552223146, "learning_rate": 1.732186732186732e-05, "loss": 0.4612, "step": 470 }, { "epoch": 0.05786951713969775, "grad_norm": 0.43372757877796436, "learning_rate": 1.735872235872236e-05, "loss": 0.4199, "step": 471 }, { "epoch": 0.05799238235655486, "grad_norm": 0.5724379672184021, "learning_rate": 1.7395577395577397e-05, "loss": 0.4216, "step": 472 }, { "epoch": 0.058115247573411966, "grad_norm": 0.37190454170780046, "learning_rate": 1.743243243243243e-05, "loss": 0.3507, "step": 473 }, { "epoch": 0.058238112790269074, "grad_norm": 0.4180173318546785, "learning_rate": 1.7469287469287468e-05, "loss": 0.442, "step": 474 }, { "epoch": 0.05836097800712618, "grad_norm": 0.40090079555446423, "learning_rate": 1.7506142506142508e-05, "loss": 0.3838, "step": 475 }, { "epoch": 0.05848384322398329, "grad_norm": 0.41357566365857296, "learning_rate": 1.754299754299754e-05, "loss": 0.4289, "step": 476 }, { "epoch": 0.0586067084408404, "grad_norm": 0.3955734778671286, "learning_rate": 1.757985257985258e-05, "loss": 0.3402, "step": 477 }, { "epoch": 0.05872957365769751, "grad_norm": 0.4246835650247971, "learning_rate": 1.761670761670762e-05, "loss": 0.4165, "step": 478 }, { "epoch": 0.058852438874554616, "grad_norm": 0.465981528676942, "learning_rate": 1.7653562653562652e-05, "loss": 0.4202, "step": 479 }, { "epoch": 0.058975304091411725, "grad_norm": 0.42496021273393914, "learning_rate": 1.769041769041769e-05, "loss": 0.3681, "step": 480 }, { "epoch": 0.059098169308268826, "grad_norm": 0.3957599754194292, "learning_rate": 1.772727272727273e-05, "loss": 0.3836, "step": 481 }, { "epoch": 0.059221034525125935, "grad_norm": 0.3952542702921577, "learning_rate": 1.7764127764127763e-05, "loss": 0.3823, "step": 482 }, { "epoch": 0.05934389974198304, "grad_norm": 0.5048562879912427, "learning_rate": 1.78009828009828e-05, "loss": 0.3705, "step": 483 }, { "epoch": 0.05946676495884015, "grad_norm": 0.5083908596586227, "learning_rate": 1.783783783783784e-05, "loss": 0.4218, "step": 484 }, { "epoch": 0.05958963017569726, "grad_norm": 0.459062286792498, "learning_rate": 1.7874692874692874e-05, "loss": 0.4579, "step": 485 }, { "epoch": 0.05971249539255437, "grad_norm": 0.4952664722898546, "learning_rate": 1.791154791154791e-05, "loss": 0.5144, "step": 486 }, { "epoch": 0.05983536060941148, "grad_norm": 0.45055150492997253, "learning_rate": 1.794840294840295e-05, "loss": 0.4512, "step": 487 }, { "epoch": 0.059958225826268585, "grad_norm": 0.4428237444342488, "learning_rate": 1.7985257985257985e-05, "loss": 0.3839, "step": 488 }, { "epoch": 0.060081091043125694, "grad_norm": 0.3634759919574338, "learning_rate": 1.8022113022113022e-05, "loss": 0.3294, "step": 489 }, { "epoch": 0.0602039562599828, "grad_norm": 0.5113527893380888, "learning_rate": 1.805896805896806e-05, "loss": 0.416, "step": 490 }, { "epoch": 0.060326821476839904, "grad_norm": 0.5506947659769932, "learning_rate": 1.8095823095823096e-05, "loss": 0.4432, "step": 491 }, { "epoch": 0.06044968669369701, "grad_norm": 0.46189832646751544, "learning_rate": 1.8132678132678133e-05, "loss": 0.3933, "step": 492 }, { "epoch": 0.06057255191055412, "grad_norm": 0.5093852312989277, "learning_rate": 1.816953316953317e-05, "loss": 0.5363, "step": 493 }, { "epoch": 0.06069541712741123, "grad_norm": 0.4504107555335815, "learning_rate": 1.8206388206388207e-05, "loss": 0.4207, "step": 494 }, { "epoch": 0.06081828234426834, "grad_norm": 0.4489626125093316, "learning_rate": 1.8243243243243244e-05, "loss": 0.4434, "step": 495 }, { "epoch": 0.060941147561125446, "grad_norm": 0.4263491492853248, "learning_rate": 1.828009828009828e-05, "loss": 0.4635, "step": 496 }, { "epoch": 0.061064012777982554, "grad_norm": 0.5236297750246603, "learning_rate": 1.8316953316953318e-05, "loss": 0.3989, "step": 497 }, { "epoch": 0.06118687799483966, "grad_norm": 0.4906541366331045, "learning_rate": 1.8353808353808355e-05, "loss": 0.4153, "step": 498 }, { "epoch": 0.06130974321169677, "grad_norm": 0.41999378630810347, "learning_rate": 1.839066339066339e-05, "loss": 0.367, "step": 499 }, { "epoch": 0.06143260842855388, "grad_norm": 0.4553056482122545, "learning_rate": 1.842751842751843e-05, "loss": 0.4575, "step": 500 }, { "epoch": 0.06155547364541098, "grad_norm": 0.373499753378816, "learning_rate": 1.8464373464373465e-05, "loss": 0.3361, "step": 501 }, { "epoch": 0.06167833886226809, "grad_norm": 0.40779966750045143, "learning_rate": 1.8501228501228502e-05, "loss": 0.3231, "step": 502 }, { "epoch": 0.0618012040791252, "grad_norm": 0.38875468481647, "learning_rate": 1.853808353808354e-05, "loss": 0.3994, "step": 503 }, { "epoch": 0.061924069295982306, "grad_norm": 0.411402048331215, "learning_rate": 1.8574938574938576e-05, "loss": 0.3556, "step": 504 }, { "epoch": 0.062046934512839415, "grad_norm": 0.39606455080692715, "learning_rate": 1.8611793611793613e-05, "loss": 0.4115, "step": 505 }, { "epoch": 0.06216979972969652, "grad_norm": 0.5577522025932611, "learning_rate": 1.864864864864865e-05, "loss": 0.3928, "step": 506 }, { "epoch": 0.06229266494655363, "grad_norm": 0.379955888173454, "learning_rate": 1.8685503685503687e-05, "loss": 0.3941, "step": 507 }, { "epoch": 0.06241553016341074, "grad_norm": 0.41106754986418764, "learning_rate": 1.8722358722358724e-05, "loss": 0.372, "step": 508 }, { "epoch": 0.06253839538026784, "grad_norm": 0.38532032262672494, "learning_rate": 1.8759213759213758e-05, "loss": 0.3887, "step": 509 }, { "epoch": 0.06266126059712496, "grad_norm": 0.3930142084501437, "learning_rate": 1.8796068796068798e-05, "loss": 0.4007, "step": 510 }, { "epoch": 0.06278412581398206, "grad_norm": 0.4138806388067088, "learning_rate": 1.883292383292383e-05, "loss": 0.3644, "step": 511 }, { "epoch": 0.06290699103083917, "grad_norm": 0.41640046013182475, "learning_rate": 1.886977886977887e-05, "loss": 0.386, "step": 512 }, { "epoch": 0.06302985624769628, "grad_norm": 0.44471636541283105, "learning_rate": 1.890663390663391e-05, "loss": 0.4101, "step": 513 }, { "epoch": 0.06315272146455339, "grad_norm": 0.4178778489709168, "learning_rate": 1.8943488943488942e-05, "loss": 0.4043, "step": 514 }, { "epoch": 0.06327558668141049, "grad_norm": 0.5105567531829918, "learning_rate": 1.898034398034398e-05, "loss": 0.4134, "step": 515 }, { "epoch": 0.0633984518982676, "grad_norm": 0.41699537750745247, "learning_rate": 1.901719901719902e-05, "loss": 0.332, "step": 516 }, { "epoch": 0.06352131711512471, "grad_norm": 0.335273404114517, "learning_rate": 1.9054054054054053e-05, "loss": 0.3707, "step": 517 }, { "epoch": 0.06364418233198181, "grad_norm": 0.3814727078334036, "learning_rate": 1.909090909090909e-05, "loss": 0.4948, "step": 518 }, { "epoch": 0.06376704754883893, "grad_norm": 0.4551630935150509, "learning_rate": 1.912776412776413e-05, "loss": 0.4431, "step": 519 }, { "epoch": 0.06388991276569603, "grad_norm": 0.5079628265936569, "learning_rate": 1.9164619164619164e-05, "loss": 0.4004, "step": 520 }, { "epoch": 0.06401277798255314, "grad_norm": 0.4468924694082191, "learning_rate": 1.92014742014742e-05, "loss": 0.464, "step": 521 }, { "epoch": 0.06413564319941024, "grad_norm": 0.46101000781764423, "learning_rate": 1.923832923832924e-05, "loss": 0.3967, "step": 522 }, { "epoch": 0.06425850841626736, "grad_norm": 0.6234255159129398, "learning_rate": 1.9275184275184275e-05, "loss": 0.4182, "step": 523 }, { "epoch": 0.06438137363312446, "grad_norm": 0.3724167272229798, "learning_rate": 1.9312039312039312e-05, "loss": 0.4243, "step": 524 }, { "epoch": 0.06450423884998158, "grad_norm": 0.4087695256077267, "learning_rate": 1.934889434889435e-05, "loss": 0.4179, "step": 525 }, { "epoch": 0.06462710406683868, "grad_norm": 0.43130608379632973, "learning_rate": 1.9385749385749386e-05, "loss": 0.4468, "step": 526 }, { "epoch": 0.06474996928369578, "grad_norm": 0.4828073528672038, "learning_rate": 1.9422604422604423e-05, "loss": 0.371, "step": 527 }, { "epoch": 0.0648728345005529, "grad_norm": 0.4008541121337365, "learning_rate": 1.945945945945946e-05, "loss": 0.3688, "step": 528 }, { "epoch": 0.06499569971741, "grad_norm": 0.4357895658237957, "learning_rate": 1.9496314496314497e-05, "loss": 0.3271, "step": 529 }, { "epoch": 0.06511856493426711, "grad_norm": 0.4068002361526985, "learning_rate": 1.9533169533169534e-05, "loss": 0.2814, "step": 530 }, { "epoch": 0.06524143015112421, "grad_norm": 0.4645609998483007, "learning_rate": 1.957002457002457e-05, "loss": 0.437, "step": 531 }, { "epoch": 0.06536429536798133, "grad_norm": 0.44670080931926964, "learning_rate": 1.9606879606879607e-05, "loss": 0.3599, "step": 532 }, { "epoch": 0.06548716058483843, "grad_norm": 0.42224415419424594, "learning_rate": 1.9643734643734644e-05, "loss": 0.3879, "step": 533 }, { "epoch": 0.06561002580169555, "grad_norm": 0.4143628476049705, "learning_rate": 1.968058968058968e-05, "loss": 0.4542, "step": 534 }, { "epoch": 0.06573289101855265, "grad_norm": 0.36311229121436706, "learning_rate": 1.971744471744472e-05, "loss": 0.3688, "step": 535 }, { "epoch": 0.06585575623540975, "grad_norm": 0.4899541841518559, "learning_rate": 1.9754299754299755e-05, "loss": 0.3723, "step": 536 }, { "epoch": 0.06597862145226686, "grad_norm": 0.4628743619395287, "learning_rate": 1.9791154791154792e-05, "loss": 0.3755, "step": 537 }, { "epoch": 0.06610148666912397, "grad_norm": 0.4362405832759002, "learning_rate": 1.982800982800983e-05, "loss": 0.4094, "step": 538 }, { "epoch": 0.06622435188598108, "grad_norm": 0.4567365493030196, "learning_rate": 1.9864864864864866e-05, "loss": 0.5173, "step": 539 }, { "epoch": 0.06634721710283818, "grad_norm": 0.47532589974350636, "learning_rate": 1.9901719901719903e-05, "loss": 0.3825, "step": 540 }, { "epoch": 0.0664700823196953, "grad_norm": 0.4899409349577937, "learning_rate": 1.9938574938574937e-05, "loss": 0.4077, "step": 541 }, { "epoch": 0.0665929475365524, "grad_norm": 0.4226144969610679, "learning_rate": 1.9975429975429977e-05, "loss": 0.3544, "step": 542 }, { "epoch": 0.06671581275340951, "grad_norm": 0.45517383332411565, "learning_rate": 2.0012285012285014e-05, "loss": 0.3411, "step": 543 }, { "epoch": 0.06683867797026662, "grad_norm": 0.6551193107475537, "learning_rate": 2.0049140049140048e-05, "loss": 0.4061, "step": 544 }, { "epoch": 0.06696154318712373, "grad_norm": 0.5247368196169213, "learning_rate": 2.0085995085995088e-05, "loss": 0.4815, "step": 545 }, { "epoch": 0.06708440840398083, "grad_norm": 1.027331003966886, "learning_rate": 2.0122850122850125e-05, "loss": 0.4713, "step": 546 }, { "epoch": 0.06720727362083793, "grad_norm": 0.4796772434810298, "learning_rate": 2.015970515970516e-05, "loss": 0.4135, "step": 547 }, { "epoch": 0.06733013883769505, "grad_norm": 0.3998311093137369, "learning_rate": 2.01965601965602e-05, "loss": 0.3812, "step": 548 }, { "epoch": 0.06745300405455215, "grad_norm": 0.3704271103912502, "learning_rate": 2.0233415233415236e-05, "loss": 0.4071, "step": 549 }, { "epoch": 0.06757586927140927, "grad_norm": 0.48897539752843916, "learning_rate": 2.027027027027027e-05, "loss": 0.3464, "step": 550 }, { "epoch": 0.06769873448826637, "grad_norm": 0.4470276709754359, "learning_rate": 2.030712530712531e-05, "loss": 0.3715, "step": 551 }, { "epoch": 0.06782159970512348, "grad_norm": 0.3994507422372814, "learning_rate": 2.0343980343980343e-05, "loss": 0.3578, "step": 552 }, { "epoch": 0.06794446492198059, "grad_norm": 0.4561906367719322, "learning_rate": 2.038083538083538e-05, "loss": 0.4225, "step": 553 }, { "epoch": 0.0680673301388377, "grad_norm": 0.3946454060861364, "learning_rate": 2.041769041769042e-05, "loss": 0.3937, "step": 554 }, { "epoch": 0.0681901953556948, "grad_norm": 0.5081540617569618, "learning_rate": 2.0454545454545454e-05, "loss": 0.4732, "step": 555 }, { "epoch": 0.06831306057255192, "grad_norm": 0.45175848758476905, "learning_rate": 2.049140049140049e-05, "loss": 0.4662, "step": 556 }, { "epoch": 0.06843592578940902, "grad_norm": 0.49207373901039925, "learning_rate": 2.0528255528255528e-05, "loss": 0.3275, "step": 557 }, { "epoch": 0.06855879100626612, "grad_norm": 0.37456095350337687, "learning_rate": 2.0565110565110565e-05, "loss": 0.3875, "step": 558 }, { "epoch": 0.06868165622312324, "grad_norm": 0.4318749623086615, "learning_rate": 2.0601965601965602e-05, "loss": 0.3814, "step": 559 }, { "epoch": 0.06880452143998034, "grad_norm": 0.43807173447735454, "learning_rate": 2.063882063882064e-05, "loss": 0.3513, "step": 560 }, { "epoch": 0.06892738665683745, "grad_norm": 0.3814181541436543, "learning_rate": 2.0675675675675676e-05, "loss": 0.3511, "step": 561 }, { "epoch": 0.06905025187369455, "grad_norm": 0.51417155379688, "learning_rate": 2.0712530712530713e-05, "loss": 0.3917, "step": 562 }, { "epoch": 0.06917311709055167, "grad_norm": 0.3997647819409291, "learning_rate": 2.074938574938575e-05, "loss": 0.3914, "step": 563 }, { "epoch": 0.06929598230740877, "grad_norm": 0.5082634628129126, "learning_rate": 2.0786240786240787e-05, "loss": 0.4458, "step": 564 }, { "epoch": 0.06941884752426589, "grad_norm": 0.41382303949264215, "learning_rate": 2.0823095823095824e-05, "loss": 0.4148, "step": 565 }, { "epoch": 0.06954171274112299, "grad_norm": 0.4412762859445653, "learning_rate": 2.085995085995086e-05, "loss": 0.3826, "step": 566 }, { "epoch": 0.06966457795798009, "grad_norm": 0.45920468468347514, "learning_rate": 2.0896805896805897e-05, "loss": 0.4048, "step": 567 }, { "epoch": 0.0697874431748372, "grad_norm": 0.37418953796638277, "learning_rate": 2.0933660933660934e-05, "loss": 0.4089, "step": 568 }, { "epoch": 0.0699103083916943, "grad_norm": 0.3627312142379779, "learning_rate": 2.097051597051597e-05, "loss": 0.433, "step": 569 }, { "epoch": 0.07003317360855142, "grad_norm": 0.42600070143404417, "learning_rate": 2.1007371007371008e-05, "loss": 0.3292, "step": 570 }, { "epoch": 0.07015603882540852, "grad_norm": 0.5104856831660366, "learning_rate": 2.1044226044226045e-05, "loss": 0.394, "step": 571 }, { "epoch": 0.07027890404226564, "grad_norm": 0.4147350255799406, "learning_rate": 2.1081081081081082e-05, "loss": 0.3796, "step": 572 }, { "epoch": 0.07040176925912274, "grad_norm": 0.44771046398348285, "learning_rate": 2.111793611793612e-05, "loss": 0.4806, "step": 573 }, { "epoch": 0.07052463447597986, "grad_norm": 0.42688288573333305, "learning_rate": 2.1154791154791156e-05, "loss": 0.3317, "step": 574 }, { "epoch": 0.07064749969283696, "grad_norm": 0.36581509465233586, "learning_rate": 2.1191646191646193e-05, "loss": 0.4537, "step": 575 }, { "epoch": 0.07077036490969407, "grad_norm": 0.36938092802552813, "learning_rate": 2.1228501228501227e-05, "loss": 0.3906, "step": 576 }, { "epoch": 0.07089323012655117, "grad_norm": 0.4534879063089479, "learning_rate": 2.1265356265356267e-05, "loss": 0.3913, "step": 577 }, { "epoch": 0.07101609534340828, "grad_norm": 0.4845482438811693, "learning_rate": 2.1302211302211304e-05, "loss": 0.4263, "step": 578 }, { "epoch": 0.07113896056026539, "grad_norm": 0.3803430055566044, "learning_rate": 2.1339066339066337e-05, "loss": 0.298, "step": 579 }, { "epoch": 0.07126182577712249, "grad_norm": 0.38589926243335976, "learning_rate": 2.1375921375921378e-05, "loss": 0.4198, "step": 580 }, { "epoch": 0.07138469099397961, "grad_norm": 0.35752081719111173, "learning_rate": 2.1412776412776415e-05, "loss": 0.3785, "step": 581 }, { "epoch": 0.07150755621083671, "grad_norm": 0.3858800481664248, "learning_rate": 2.1449631449631448e-05, "loss": 0.331, "step": 582 }, { "epoch": 0.07163042142769382, "grad_norm": 0.39215460695477994, "learning_rate": 2.148648648648649e-05, "loss": 0.3661, "step": 583 }, { "epoch": 0.07175328664455093, "grad_norm": 0.6305952760625889, "learning_rate": 2.1523341523341526e-05, "loss": 0.4134, "step": 584 }, { "epoch": 0.07187615186140804, "grad_norm": 0.4033951312554551, "learning_rate": 2.156019656019656e-05, "loss": 0.5078, "step": 585 }, { "epoch": 0.07199901707826514, "grad_norm": 0.40514672924850764, "learning_rate": 2.15970515970516e-05, "loss": 0.3701, "step": 586 }, { "epoch": 0.07212188229512224, "grad_norm": 0.5282292279573272, "learning_rate": 2.1633906633906636e-05, "loss": 0.4874, "step": 587 }, { "epoch": 0.07224474751197936, "grad_norm": 0.3817806929079992, "learning_rate": 2.167076167076167e-05, "loss": 0.4095, "step": 588 }, { "epoch": 0.07236761272883646, "grad_norm": 0.4030275561429454, "learning_rate": 2.170761670761671e-05, "loss": 0.3654, "step": 589 }, { "epoch": 0.07249047794569358, "grad_norm": 0.5334607843966963, "learning_rate": 2.1744471744471747e-05, "loss": 0.4492, "step": 590 }, { "epoch": 0.07261334316255068, "grad_norm": 0.47269442645910087, "learning_rate": 2.178132678132678e-05, "loss": 0.3695, "step": 591 }, { "epoch": 0.0727362083794078, "grad_norm": 0.4322001693704707, "learning_rate": 2.1818181818181818e-05, "loss": 0.3454, "step": 592 }, { "epoch": 0.0728590735962649, "grad_norm": 0.4783888642444378, "learning_rate": 2.1855036855036855e-05, "loss": 0.403, "step": 593 }, { "epoch": 0.07298193881312201, "grad_norm": 0.47305518871214625, "learning_rate": 2.1891891891891892e-05, "loss": 0.4605, "step": 594 }, { "epoch": 0.07310480402997911, "grad_norm": 0.3745488367508014, "learning_rate": 2.192874692874693e-05, "loss": 0.3934, "step": 595 }, { "epoch": 0.07322766924683623, "grad_norm": 0.5100692507119257, "learning_rate": 2.1965601965601966e-05, "loss": 0.4289, "step": 596 }, { "epoch": 0.07335053446369333, "grad_norm": 0.3624357364325689, "learning_rate": 2.2002457002457003e-05, "loss": 0.3968, "step": 597 }, { "epoch": 0.07347339968055043, "grad_norm": 0.39073783521846783, "learning_rate": 2.203931203931204e-05, "loss": 0.3906, "step": 598 }, { "epoch": 0.07359626489740755, "grad_norm": 0.5057715114640918, "learning_rate": 2.2076167076167076e-05, "loss": 0.4208, "step": 599 }, { "epoch": 0.07371913011426465, "grad_norm": 0.5622469963155187, "learning_rate": 2.2113022113022113e-05, "loss": 0.367, "step": 600 }, { "epoch": 0.07384199533112176, "grad_norm": 0.4148591441795123, "learning_rate": 2.214987714987715e-05, "loss": 0.4472, "step": 601 }, { "epoch": 0.07396486054797886, "grad_norm": 0.5000857887896974, "learning_rate": 2.2186732186732187e-05, "loss": 0.5456, "step": 602 }, { "epoch": 0.07408772576483598, "grad_norm": 0.4081097501996043, "learning_rate": 2.2223587223587224e-05, "loss": 0.4113, "step": 603 }, { "epoch": 0.07421059098169308, "grad_norm": 0.44487764632377125, "learning_rate": 2.226044226044226e-05, "loss": 0.403, "step": 604 }, { "epoch": 0.0743334561985502, "grad_norm": 0.4161459187309689, "learning_rate": 2.2297297297297298e-05, "loss": 0.472, "step": 605 }, { "epoch": 0.0744563214154073, "grad_norm": 0.48759990352212357, "learning_rate": 2.2334152334152335e-05, "loss": 0.4104, "step": 606 }, { "epoch": 0.0745791866322644, "grad_norm": 0.5203016720770651, "learning_rate": 2.2371007371007372e-05, "loss": 0.4212, "step": 607 }, { "epoch": 0.07470205184912151, "grad_norm": 0.4376127609341165, "learning_rate": 2.2407862407862406e-05, "loss": 0.3389, "step": 608 }, { "epoch": 0.07482491706597862, "grad_norm": 0.4263576552447238, "learning_rate": 2.2444717444717446e-05, "loss": 0.403, "step": 609 }, { "epoch": 0.07494778228283573, "grad_norm": 0.5085366087761439, "learning_rate": 2.2481572481572483e-05, "loss": 0.4212, "step": 610 }, { "epoch": 0.07507064749969283, "grad_norm": 0.42915044447957806, "learning_rate": 2.2518427518427517e-05, "loss": 0.4099, "step": 611 }, { "epoch": 0.07519351271654995, "grad_norm": 0.4447616396579292, "learning_rate": 2.2555282555282557e-05, "loss": 0.3842, "step": 612 }, { "epoch": 0.07531637793340705, "grad_norm": 0.3925498366360837, "learning_rate": 2.2592137592137594e-05, "loss": 0.3663, "step": 613 }, { "epoch": 0.07543924315026417, "grad_norm": 0.42749711838976134, "learning_rate": 2.2628992628992627e-05, "loss": 0.3885, "step": 614 }, { "epoch": 0.07556210836712127, "grad_norm": 0.4213635284129931, "learning_rate": 2.2665847665847668e-05, "loss": 0.3007, "step": 615 }, { "epoch": 0.07568497358397838, "grad_norm": 0.3714411691592053, "learning_rate": 2.2702702702702705e-05, "loss": 0.3758, "step": 616 }, { "epoch": 0.07580783880083548, "grad_norm": 0.43730730383046934, "learning_rate": 2.2739557739557738e-05, "loss": 0.4099, "step": 617 }, { "epoch": 0.07593070401769259, "grad_norm": 0.4635857562377928, "learning_rate": 2.277641277641278e-05, "loss": 0.4262, "step": 618 }, { "epoch": 0.0760535692345497, "grad_norm": 0.39471252109693106, "learning_rate": 2.2813267813267816e-05, "loss": 0.3743, "step": 619 }, { "epoch": 0.0761764344514068, "grad_norm": 0.39995085167685057, "learning_rate": 2.285012285012285e-05, "loss": 0.3935, "step": 620 }, { "epoch": 0.07629929966826392, "grad_norm": 0.442741870197244, "learning_rate": 2.288697788697789e-05, "loss": 0.4353, "step": 621 }, { "epoch": 0.07642216488512102, "grad_norm": 0.4503354727684881, "learning_rate": 2.2923832923832926e-05, "loss": 0.3721, "step": 622 }, { "epoch": 0.07654503010197813, "grad_norm": 0.43483805713552737, "learning_rate": 2.296068796068796e-05, "loss": 0.4109, "step": 623 }, { "epoch": 0.07666789531883524, "grad_norm": 0.35218235354871874, "learning_rate": 2.2997542997542997e-05, "loss": 0.4315, "step": 624 }, { "epoch": 0.07679076053569235, "grad_norm": 0.4950807151672981, "learning_rate": 2.3034398034398037e-05, "loss": 0.4499, "step": 625 }, { "epoch": 0.07691362575254945, "grad_norm": 0.5142728097116462, "learning_rate": 2.307125307125307e-05, "loss": 0.4872, "step": 626 }, { "epoch": 0.07703649096940655, "grad_norm": 0.48250187150271945, "learning_rate": 2.3108108108108108e-05, "loss": 0.4373, "step": 627 }, { "epoch": 0.07715935618626367, "grad_norm": 0.45732980827348646, "learning_rate": 2.3144963144963148e-05, "loss": 0.3883, "step": 628 }, { "epoch": 0.07728222140312077, "grad_norm": 0.49258575495468215, "learning_rate": 2.318181818181818e-05, "loss": 0.4, "step": 629 }, { "epoch": 0.07740508661997789, "grad_norm": 0.5165868503510684, "learning_rate": 2.321867321867322e-05, "loss": 0.3701, "step": 630 }, { "epoch": 0.07752795183683499, "grad_norm": 0.4521753246178467, "learning_rate": 2.3255528255528256e-05, "loss": 0.491, "step": 631 }, { "epoch": 0.0776508170536921, "grad_norm": 0.4524919163704214, "learning_rate": 2.3292383292383292e-05, "loss": 0.4305, "step": 632 }, { "epoch": 0.0777736822705492, "grad_norm": 0.37344157841906944, "learning_rate": 2.332923832923833e-05, "loss": 0.353, "step": 633 }, { "epoch": 0.07789654748740632, "grad_norm": 0.43204490006223245, "learning_rate": 2.3366093366093366e-05, "loss": 0.3917, "step": 634 }, { "epoch": 0.07801941270426342, "grad_norm": 0.4354173178804185, "learning_rate": 2.3402948402948403e-05, "loss": 0.3715, "step": 635 }, { "epoch": 0.07814227792112054, "grad_norm": 0.48628091606623947, "learning_rate": 2.343980343980344e-05, "loss": 0.4986, "step": 636 }, { "epoch": 0.07826514313797764, "grad_norm": 0.39100637242834524, "learning_rate": 2.3476658476658477e-05, "loss": 0.4572, "step": 637 }, { "epoch": 0.07838800835483474, "grad_norm": 0.9034918972360437, "learning_rate": 2.3513513513513514e-05, "loss": 0.5234, "step": 638 }, { "epoch": 0.07851087357169186, "grad_norm": 0.40175604895160416, "learning_rate": 2.355036855036855e-05, "loss": 0.4887, "step": 639 }, { "epoch": 0.07863373878854896, "grad_norm": 0.46503530672165677, "learning_rate": 2.3587223587223585e-05, "loss": 0.3324, "step": 640 }, { "epoch": 0.07875660400540607, "grad_norm": 0.5135301063480268, "learning_rate": 2.3624078624078625e-05, "loss": 0.4277, "step": 641 }, { "epoch": 0.07887946922226317, "grad_norm": 0.43993440417910135, "learning_rate": 2.3660933660933662e-05, "loss": 0.4757, "step": 642 }, { "epoch": 0.07900233443912029, "grad_norm": 0.4577554498921362, "learning_rate": 2.3697788697788696e-05, "loss": 0.4367, "step": 643 }, { "epoch": 0.07912519965597739, "grad_norm": 0.4612893773446549, "learning_rate": 2.3734643734643736e-05, "loss": 0.4369, "step": 644 }, { "epoch": 0.0792480648728345, "grad_norm": 0.4280172532007803, "learning_rate": 2.3771498771498773e-05, "loss": 0.4136, "step": 645 }, { "epoch": 0.07937093008969161, "grad_norm": 0.38488928058471117, "learning_rate": 2.3808353808353806e-05, "loss": 0.4105, "step": 646 }, { "epoch": 0.07949379530654872, "grad_norm": 0.41186613096240826, "learning_rate": 2.3845208845208847e-05, "loss": 0.4092, "step": 647 }, { "epoch": 0.07961666052340582, "grad_norm": 0.40556018460057125, "learning_rate": 2.3882063882063884e-05, "loss": 0.402, "step": 648 }, { "epoch": 0.07973952574026293, "grad_norm": 0.602302446568112, "learning_rate": 2.3918918918918917e-05, "loss": 0.3884, "step": 649 }, { "epoch": 0.07986239095712004, "grad_norm": 0.3858471851837935, "learning_rate": 2.3955773955773958e-05, "loss": 0.3765, "step": 650 }, { "epoch": 0.07998525617397714, "grad_norm": 0.4352372510502871, "learning_rate": 2.3992628992628995e-05, "loss": 0.4601, "step": 651 }, { "epoch": 0.08010812139083426, "grad_norm": 0.5446728707483878, "learning_rate": 2.4029484029484028e-05, "loss": 0.4129, "step": 652 }, { "epoch": 0.08023098660769136, "grad_norm": 0.4259586558267965, "learning_rate": 2.406633906633907e-05, "loss": 0.3991, "step": 653 }, { "epoch": 0.08035385182454848, "grad_norm": 0.4199023008685881, "learning_rate": 2.4103194103194105e-05, "loss": 0.34, "step": 654 }, { "epoch": 0.08047671704140558, "grad_norm": 0.4494200228696098, "learning_rate": 2.414004914004914e-05, "loss": 0.4068, "step": 655 }, { "epoch": 0.08059958225826269, "grad_norm": 0.51439526799355, "learning_rate": 2.417690417690418e-05, "loss": 0.4318, "step": 656 }, { "epoch": 0.0807224474751198, "grad_norm": 0.5634542236642999, "learning_rate": 2.4213759213759216e-05, "loss": 0.4046, "step": 657 }, { "epoch": 0.0808453126919769, "grad_norm": 0.38757395763424246, "learning_rate": 2.425061425061425e-05, "loss": 0.4076, "step": 658 }, { "epoch": 0.08096817790883401, "grad_norm": 0.4156067023854106, "learning_rate": 2.4287469287469287e-05, "loss": 0.3134, "step": 659 }, { "epoch": 0.08109104312569111, "grad_norm": 0.5387928601090685, "learning_rate": 2.4324324324324327e-05, "loss": 0.4203, "step": 660 }, { "epoch": 0.08121390834254823, "grad_norm": 0.4102390761625214, "learning_rate": 2.436117936117936e-05, "loss": 0.373, "step": 661 }, { "epoch": 0.08133677355940533, "grad_norm": 0.4309516599635571, "learning_rate": 2.4398034398034398e-05, "loss": 0.3783, "step": 662 }, { "epoch": 0.08145963877626244, "grad_norm": 0.4762995725822135, "learning_rate": 2.4434889434889438e-05, "loss": 0.3419, "step": 663 }, { "epoch": 0.08158250399311955, "grad_norm": 0.40168553479933533, "learning_rate": 2.447174447174447e-05, "loss": 0.4727, "step": 664 }, { "epoch": 0.08170536920997666, "grad_norm": 0.39641136947454575, "learning_rate": 2.450859950859951e-05, "loss": 0.4062, "step": 665 }, { "epoch": 0.08182823442683376, "grad_norm": 0.4006126652014121, "learning_rate": 2.454545454545455e-05, "loss": 0.3563, "step": 666 }, { "epoch": 0.08195109964369088, "grad_norm": 0.47271794477713946, "learning_rate": 2.4582309582309582e-05, "loss": 0.3986, "step": 667 }, { "epoch": 0.08207396486054798, "grad_norm": 0.4480103993964653, "learning_rate": 2.461916461916462e-05, "loss": 0.4472, "step": 668 }, { "epoch": 0.08219683007740508, "grad_norm": 0.4093015726989949, "learning_rate": 2.465601965601966e-05, "loss": 0.3979, "step": 669 }, { "epoch": 0.0823196952942622, "grad_norm": 0.3887104390843092, "learning_rate": 2.4692874692874693e-05, "loss": 0.465, "step": 670 }, { "epoch": 0.0824425605111193, "grad_norm": 0.4274095325858278, "learning_rate": 2.472972972972973e-05, "loss": 0.4393, "step": 671 }, { "epoch": 0.08256542572797641, "grad_norm": 0.4396896944469327, "learning_rate": 2.4766584766584767e-05, "loss": 0.3782, "step": 672 }, { "epoch": 0.08268829094483351, "grad_norm": 0.37273831645047195, "learning_rate": 2.4803439803439804e-05, "loss": 0.4793, "step": 673 }, { "epoch": 0.08281115616169063, "grad_norm": 0.5213293215377909, "learning_rate": 2.484029484029484e-05, "loss": 0.3877, "step": 674 }, { "epoch": 0.08293402137854773, "grad_norm": 0.5030278087557469, "learning_rate": 2.4877149877149875e-05, "loss": 0.4728, "step": 675 }, { "epoch": 0.08305688659540485, "grad_norm": 0.4623797023573801, "learning_rate": 2.4914004914004915e-05, "loss": 0.4538, "step": 676 }, { "epoch": 0.08317975181226195, "grad_norm": 0.38125629198004074, "learning_rate": 2.4950859950859952e-05, "loss": 0.3985, "step": 677 }, { "epoch": 0.08330261702911905, "grad_norm": 0.4272958560374932, "learning_rate": 2.4987714987714985e-05, "loss": 0.4649, "step": 678 }, { "epoch": 0.08342548224597617, "grad_norm": 0.3841475173386606, "learning_rate": 2.5024570024570026e-05, "loss": 0.3971, "step": 679 }, { "epoch": 0.08354834746283327, "grad_norm": 0.41299250908605, "learning_rate": 2.5061425061425063e-05, "loss": 0.3999, "step": 680 }, { "epoch": 0.08367121267969038, "grad_norm": 0.3837177094859441, "learning_rate": 2.5098280098280096e-05, "loss": 0.4263, "step": 681 }, { "epoch": 0.08379407789654748, "grad_norm": 0.5328934758079363, "learning_rate": 2.5135135135135137e-05, "loss": 0.4656, "step": 682 }, { "epoch": 0.0839169431134046, "grad_norm": 0.4492150614573493, "learning_rate": 2.5171990171990174e-05, "loss": 0.4735, "step": 683 }, { "epoch": 0.0840398083302617, "grad_norm": 0.46660782880626606, "learning_rate": 2.5208845208845207e-05, "loss": 0.3644, "step": 684 }, { "epoch": 0.08416267354711882, "grad_norm": 0.4431449940521086, "learning_rate": 2.5245700245700248e-05, "loss": 0.465, "step": 685 }, { "epoch": 0.08428553876397592, "grad_norm": 0.4106352220646102, "learning_rate": 2.5282555282555284e-05, "loss": 0.4076, "step": 686 }, { "epoch": 0.08440840398083303, "grad_norm": 0.34127986322160814, "learning_rate": 2.5319410319410318e-05, "loss": 0.4374, "step": 687 }, { "epoch": 0.08453126919769013, "grad_norm": 0.4305838631917953, "learning_rate": 2.535626535626536e-05, "loss": 0.4183, "step": 688 }, { "epoch": 0.08465413441454724, "grad_norm": 0.4377413574846819, "learning_rate": 2.5393120393120395e-05, "loss": 0.3994, "step": 689 }, { "epoch": 0.08477699963140435, "grad_norm": 0.44384756674228487, "learning_rate": 2.542997542997543e-05, "loss": 0.39, "step": 690 }, { "epoch": 0.08489986484826145, "grad_norm": 0.363669259668362, "learning_rate": 2.5466830466830466e-05, "loss": 0.4311, "step": 691 }, { "epoch": 0.08502273006511857, "grad_norm": 0.4346312717482192, "learning_rate": 2.5503685503685506e-05, "loss": 0.4037, "step": 692 }, { "epoch": 0.08514559528197567, "grad_norm": 0.41357242612230305, "learning_rate": 2.554054054054054e-05, "loss": 0.397, "step": 693 }, { "epoch": 0.08526846049883278, "grad_norm": 0.48932478848361244, "learning_rate": 2.5577395577395577e-05, "loss": 0.4203, "step": 694 }, { "epoch": 0.08539132571568989, "grad_norm": 0.4270416101287537, "learning_rate": 2.5614250614250617e-05, "loss": 0.4454, "step": 695 }, { "epoch": 0.085514190932547, "grad_norm": 0.46277096944468804, "learning_rate": 2.565110565110565e-05, "loss": 0.398, "step": 696 }, { "epoch": 0.0856370561494041, "grad_norm": 0.4747979269650483, "learning_rate": 2.5687960687960688e-05, "loss": 0.4127, "step": 697 }, { "epoch": 0.0857599213662612, "grad_norm": 0.3423172224865879, "learning_rate": 2.5724815724815728e-05, "loss": 0.35, "step": 698 }, { "epoch": 0.08588278658311832, "grad_norm": 0.3940417612129314, "learning_rate": 2.576167076167076e-05, "loss": 0.486, "step": 699 }, { "epoch": 0.08600565179997542, "grad_norm": 0.3712345973318008, "learning_rate": 2.57985257985258e-05, "loss": 0.433, "step": 700 }, { "epoch": 0.08612851701683254, "grad_norm": 0.4775890258808309, "learning_rate": 2.583538083538084e-05, "loss": 0.4208, "step": 701 }, { "epoch": 0.08625138223368964, "grad_norm": 0.3666221727931046, "learning_rate": 2.5872235872235872e-05, "loss": 0.3647, "step": 702 }, { "epoch": 0.08637424745054675, "grad_norm": 0.4289515811492861, "learning_rate": 2.590909090909091e-05, "loss": 0.4317, "step": 703 }, { "epoch": 0.08649711266740386, "grad_norm": 0.36921622495285555, "learning_rate": 2.594594594594595e-05, "loss": 0.3799, "step": 704 }, { "epoch": 0.08661997788426097, "grad_norm": 0.5250281822484069, "learning_rate": 2.5982800982800983e-05, "loss": 0.5028, "step": 705 }, { "epoch": 0.08674284310111807, "grad_norm": 0.4588406643540372, "learning_rate": 2.601965601965602e-05, "loss": 0.4297, "step": 706 }, { "epoch": 0.08686570831797519, "grad_norm": 0.3922113707056867, "learning_rate": 2.6056511056511057e-05, "loss": 0.4205, "step": 707 }, { "epoch": 0.08698857353483229, "grad_norm": 0.3814310339313689, "learning_rate": 2.6093366093366094e-05, "loss": 0.3312, "step": 708 }, { "epoch": 0.08711143875168939, "grad_norm": 0.44841631891007233, "learning_rate": 2.613022113022113e-05, "loss": 0.4693, "step": 709 }, { "epoch": 0.0872343039685465, "grad_norm": 0.44913090949351764, "learning_rate": 2.6167076167076168e-05, "loss": 0.4285, "step": 710 }, { "epoch": 0.08735716918540361, "grad_norm": 0.3826781762135045, "learning_rate": 2.6203931203931205e-05, "loss": 0.4293, "step": 711 }, { "epoch": 0.08748003440226072, "grad_norm": 0.43116862102980086, "learning_rate": 2.6240786240786242e-05, "loss": 0.4326, "step": 712 }, { "epoch": 0.08760289961911782, "grad_norm": 0.4437246002769442, "learning_rate": 2.6277641277641275e-05, "loss": 0.4493, "step": 713 }, { "epoch": 0.08772576483597494, "grad_norm": 0.417962104764186, "learning_rate": 2.6314496314496316e-05, "loss": 0.4164, "step": 714 }, { "epoch": 0.08784863005283204, "grad_norm": 0.40701295514060326, "learning_rate": 2.6351351351351353e-05, "loss": 0.3716, "step": 715 }, { "epoch": 0.08797149526968916, "grad_norm": 0.47870272192015473, "learning_rate": 2.6388206388206386e-05, "loss": 0.4223, "step": 716 }, { "epoch": 0.08809436048654626, "grad_norm": 0.6563832566885048, "learning_rate": 2.6425061425061427e-05, "loss": 0.427, "step": 717 }, { "epoch": 0.08821722570340336, "grad_norm": 0.4924690240440309, "learning_rate": 2.6461916461916464e-05, "loss": 0.3772, "step": 718 }, { "epoch": 0.08834009092026048, "grad_norm": 0.4146207007432604, "learning_rate": 2.6498771498771497e-05, "loss": 0.3691, "step": 719 }, { "epoch": 0.08846295613711758, "grad_norm": 0.39004438764897487, "learning_rate": 2.6535626535626537e-05, "loss": 0.3894, "step": 720 }, { "epoch": 0.08858582135397469, "grad_norm": 0.39785188088988344, "learning_rate": 2.6572481572481574e-05, "loss": 0.3431, "step": 721 }, { "epoch": 0.0887086865708318, "grad_norm": 0.4806115018172493, "learning_rate": 2.6609336609336608e-05, "loss": 0.4278, "step": 722 }, { "epoch": 0.08883155178768891, "grad_norm": 0.4781359515574856, "learning_rate": 2.6646191646191645e-05, "loss": 0.3498, "step": 723 }, { "epoch": 0.08895441700454601, "grad_norm": 0.5992080053230083, "learning_rate": 2.6683046683046685e-05, "loss": 0.4143, "step": 724 }, { "epoch": 0.08907728222140313, "grad_norm": 0.399103373510207, "learning_rate": 2.671990171990172e-05, "loss": 0.4592, "step": 725 }, { "epoch": 0.08920014743826023, "grad_norm": 0.38281988848579496, "learning_rate": 2.6756756756756756e-05, "loss": 0.3888, "step": 726 }, { "epoch": 0.08932301265511734, "grad_norm": 0.6230174844514709, "learning_rate": 2.6793611793611796e-05, "loss": 0.4261, "step": 727 }, { "epoch": 0.08944587787197444, "grad_norm": 0.3606233915373861, "learning_rate": 2.683046683046683e-05, "loss": 0.362, "step": 728 }, { "epoch": 0.08956874308883155, "grad_norm": 0.5125980244232305, "learning_rate": 2.6867321867321867e-05, "loss": 0.5203, "step": 729 }, { "epoch": 0.08969160830568866, "grad_norm": 0.38856181409221247, "learning_rate": 2.6904176904176907e-05, "loss": 0.3843, "step": 730 }, { "epoch": 0.08981447352254576, "grad_norm": 0.4362252113301355, "learning_rate": 2.694103194103194e-05, "loss": 0.3844, "step": 731 }, { "epoch": 0.08993733873940288, "grad_norm": 0.5118714500137854, "learning_rate": 2.6977886977886977e-05, "loss": 0.4739, "step": 732 }, { "epoch": 0.09006020395625998, "grad_norm": 0.427403871640443, "learning_rate": 2.7014742014742018e-05, "loss": 0.334, "step": 733 }, { "epoch": 0.0901830691731171, "grad_norm": 0.45507938419346594, "learning_rate": 2.705159705159705e-05, "loss": 0.4095, "step": 734 }, { "epoch": 0.0903059343899742, "grad_norm": 0.47850572027332217, "learning_rate": 2.708845208845209e-05, "loss": 0.4042, "step": 735 }, { "epoch": 0.09042879960683131, "grad_norm": 0.3903998425571494, "learning_rate": 2.712530712530713e-05, "loss": 0.3869, "step": 736 }, { "epoch": 0.09055166482368841, "grad_norm": 0.4346209349511018, "learning_rate": 2.7162162162162162e-05, "loss": 0.4636, "step": 737 }, { "epoch": 0.09067453004054553, "grad_norm": 0.3629282753876749, "learning_rate": 2.71990171990172e-05, "loss": 0.368, "step": 738 }, { "epoch": 0.09079739525740263, "grad_norm": 0.4180803774852463, "learning_rate": 2.7235872235872236e-05, "loss": 0.3694, "step": 739 }, { "epoch": 0.09092026047425973, "grad_norm": 0.47533055160362253, "learning_rate": 2.7272727272727273e-05, "loss": 0.4365, "step": 740 }, { "epoch": 0.09104312569111685, "grad_norm": 0.5194230715721446, "learning_rate": 2.730958230958231e-05, "loss": 0.4279, "step": 741 }, { "epoch": 0.09116599090797395, "grad_norm": 0.4783044144335658, "learning_rate": 2.7346437346437347e-05, "loss": 0.4496, "step": 742 }, { "epoch": 0.09128885612483106, "grad_norm": 0.390526586399212, "learning_rate": 2.7383292383292384e-05, "loss": 0.3343, "step": 743 }, { "epoch": 0.09141172134168817, "grad_norm": 0.3697505642279855, "learning_rate": 2.742014742014742e-05, "loss": 0.4074, "step": 744 }, { "epoch": 0.09153458655854528, "grad_norm": 0.4054667279576075, "learning_rate": 2.7457002457002458e-05, "loss": 0.3626, "step": 745 }, { "epoch": 0.09165745177540238, "grad_norm": 0.36835314490406823, "learning_rate": 2.7493857493857495e-05, "loss": 0.3377, "step": 746 }, { "epoch": 0.0917803169922595, "grad_norm": 0.4063333070205588, "learning_rate": 2.7530712530712532e-05, "loss": 0.3736, "step": 747 }, { "epoch": 0.0919031822091166, "grad_norm": 0.49504369544575383, "learning_rate": 2.756756756756757e-05, "loss": 0.4891, "step": 748 }, { "epoch": 0.0920260474259737, "grad_norm": 0.44684718416117686, "learning_rate": 2.7604422604422606e-05, "loss": 0.4661, "step": 749 }, { "epoch": 0.09214891264283082, "grad_norm": 0.4123499158971052, "learning_rate": 2.7641277641277643e-05, "loss": 0.5164, "step": 750 }, { "epoch": 0.09227177785968792, "grad_norm": 0.4553829042209028, "learning_rate": 2.767813267813268e-05, "loss": 0.498, "step": 751 }, { "epoch": 0.09239464307654503, "grad_norm": 0.42439133153802105, "learning_rate": 2.7714987714987717e-05, "loss": 0.4439, "step": 752 }, { "epoch": 0.09251750829340213, "grad_norm": 0.4139501039966014, "learning_rate": 2.7751842751842753e-05, "loss": 0.3225, "step": 753 }, { "epoch": 0.09264037351025925, "grad_norm": 0.4210229651147875, "learning_rate": 2.7788697788697787e-05, "loss": 0.4315, "step": 754 }, { "epoch": 0.09276323872711635, "grad_norm": 0.4198038415253811, "learning_rate": 2.7825552825552827e-05, "loss": 0.4298, "step": 755 }, { "epoch": 0.09288610394397347, "grad_norm": 0.6096915609024015, "learning_rate": 2.7862407862407864e-05, "loss": 0.4356, "step": 756 }, { "epoch": 0.09300896916083057, "grad_norm": 0.41953239497620864, "learning_rate": 2.7899262899262898e-05, "loss": 0.3814, "step": 757 }, { "epoch": 0.09313183437768768, "grad_norm": 0.477686417909286, "learning_rate": 2.7936117936117935e-05, "loss": 0.3828, "step": 758 }, { "epoch": 0.09325469959454478, "grad_norm": 0.39592474206052297, "learning_rate": 2.7972972972972975e-05, "loss": 0.4642, "step": 759 }, { "epoch": 0.09337756481140189, "grad_norm": 0.45939475667168006, "learning_rate": 2.800982800982801e-05, "loss": 0.442, "step": 760 }, { "epoch": 0.093500430028259, "grad_norm": 0.5384415037652743, "learning_rate": 2.8046683046683046e-05, "loss": 0.4633, "step": 761 }, { "epoch": 0.0936232952451161, "grad_norm": 0.4525596496406155, "learning_rate": 2.8083538083538086e-05, "loss": 0.3476, "step": 762 }, { "epoch": 0.09374616046197322, "grad_norm": 0.44039288561341355, "learning_rate": 2.812039312039312e-05, "loss": 0.3548, "step": 763 }, { "epoch": 0.09386902567883032, "grad_norm": 0.433411536445068, "learning_rate": 2.8157248157248157e-05, "loss": 0.4319, "step": 764 }, { "epoch": 0.09399189089568744, "grad_norm": 0.5242823571088789, "learning_rate": 2.8194103194103197e-05, "loss": 0.3752, "step": 765 }, { "epoch": 0.09411475611254454, "grad_norm": 0.425355560979627, "learning_rate": 2.823095823095823e-05, "loss": 0.4511, "step": 766 }, { "epoch": 0.09423762132940165, "grad_norm": 0.44541151355548275, "learning_rate": 2.8267813267813267e-05, "loss": 0.4332, "step": 767 }, { "epoch": 0.09436048654625875, "grad_norm": 0.44034267729369403, "learning_rate": 2.8304668304668308e-05, "loss": 0.5223, "step": 768 }, { "epoch": 0.09448335176311586, "grad_norm": 0.5653222920998356, "learning_rate": 2.834152334152334e-05, "loss": 0.4716, "step": 769 }, { "epoch": 0.09460621697997297, "grad_norm": 0.3969026183860036, "learning_rate": 2.8378378378378378e-05, "loss": 0.4045, "step": 770 }, { "epoch": 0.09472908219683007, "grad_norm": 0.3991132143013556, "learning_rate": 2.841523341523342e-05, "loss": 0.4443, "step": 771 }, { "epoch": 0.09485194741368719, "grad_norm": 0.3942126172923388, "learning_rate": 2.8452088452088452e-05, "loss": 0.339, "step": 772 }, { "epoch": 0.09497481263054429, "grad_norm": 0.36866971037305274, "learning_rate": 2.848894348894349e-05, "loss": 0.3746, "step": 773 }, { "epoch": 0.0950976778474014, "grad_norm": 0.45012968222170097, "learning_rate": 2.8525798525798526e-05, "loss": 0.375, "step": 774 }, { "epoch": 0.0952205430642585, "grad_norm": 0.33200841003386833, "learning_rate": 2.8562653562653563e-05, "loss": 0.3883, "step": 775 }, { "epoch": 0.09534340828111562, "grad_norm": 0.44043547055281823, "learning_rate": 2.85995085995086e-05, "loss": 0.4442, "step": 776 }, { "epoch": 0.09546627349797272, "grad_norm": 0.338449293202568, "learning_rate": 2.8636363636363637e-05, "loss": 0.4149, "step": 777 }, { "epoch": 0.09558913871482984, "grad_norm": 0.3972688359819994, "learning_rate": 2.8673218673218674e-05, "loss": 0.3658, "step": 778 }, { "epoch": 0.09571200393168694, "grad_norm": 0.3856445846954942, "learning_rate": 2.871007371007371e-05, "loss": 0.3761, "step": 779 }, { "epoch": 0.09583486914854404, "grad_norm": 0.4061569372889297, "learning_rate": 2.8746928746928748e-05, "loss": 0.3994, "step": 780 }, { "epoch": 0.09595773436540116, "grad_norm": 0.32657485789428226, "learning_rate": 2.8783783783783785e-05, "loss": 0.3857, "step": 781 }, { "epoch": 0.09608059958225826, "grad_norm": 0.37817238513794754, "learning_rate": 2.882063882063882e-05, "loss": 0.4214, "step": 782 }, { "epoch": 0.09620346479911537, "grad_norm": 0.4364214127717959, "learning_rate": 2.885749385749386e-05, "loss": 0.3685, "step": 783 }, { "epoch": 0.09632633001597248, "grad_norm": 0.4302016489630854, "learning_rate": 2.8894348894348896e-05, "loss": 0.3825, "step": 784 }, { "epoch": 0.09644919523282959, "grad_norm": 0.32552727498663503, "learning_rate": 2.8931203931203933e-05, "loss": 0.3396, "step": 785 }, { "epoch": 0.09657206044968669, "grad_norm": 0.39855503431833444, "learning_rate": 2.896805896805897e-05, "loss": 0.3562, "step": 786 }, { "epoch": 0.09669492566654381, "grad_norm": 0.4915428651046144, "learning_rate": 2.9004914004914006e-05, "loss": 0.415, "step": 787 }, { "epoch": 0.09681779088340091, "grad_norm": 0.44393601992191944, "learning_rate": 2.9041769041769043e-05, "loss": 0.4002, "step": 788 }, { "epoch": 0.09694065610025801, "grad_norm": 0.3745323936680244, "learning_rate": 2.907862407862408e-05, "loss": 0.4147, "step": 789 }, { "epoch": 0.09706352131711513, "grad_norm": 0.41669990945771435, "learning_rate": 2.9115479115479114e-05, "loss": 0.3894, "step": 790 }, { "epoch": 0.09718638653397223, "grad_norm": 0.372968441805735, "learning_rate": 2.9152334152334154e-05, "loss": 0.3981, "step": 791 }, { "epoch": 0.09730925175082934, "grad_norm": 0.4125020078332026, "learning_rate": 2.918918918918919e-05, "loss": 0.3885, "step": 792 }, { "epoch": 0.09743211696768644, "grad_norm": 0.3918898471410056, "learning_rate": 2.9226044226044225e-05, "loss": 0.4345, "step": 793 }, { "epoch": 0.09755498218454356, "grad_norm": 0.3837424511105669, "learning_rate": 2.9262899262899265e-05, "loss": 0.3347, "step": 794 }, { "epoch": 0.09767784740140066, "grad_norm": 0.3807484018292796, "learning_rate": 2.92997542997543e-05, "loss": 0.4686, "step": 795 }, { "epoch": 0.09780071261825778, "grad_norm": 0.3974248762638174, "learning_rate": 2.9336609336609336e-05, "loss": 0.3741, "step": 796 }, { "epoch": 0.09792357783511488, "grad_norm": 0.4881069523295057, "learning_rate": 2.9373464373464376e-05, "loss": 0.5001, "step": 797 }, { "epoch": 0.098046443051972, "grad_norm": 0.3702507685427788, "learning_rate": 2.941031941031941e-05, "loss": 0.3983, "step": 798 }, { "epoch": 0.0981693082688291, "grad_norm": 0.4051710391502045, "learning_rate": 2.9447174447174446e-05, "loss": 0.4231, "step": 799 }, { "epoch": 0.0982921734856862, "grad_norm": 0.45711216209509625, "learning_rate": 2.9484029484029487e-05, "loss": 0.3571, "step": 800 }, { "epoch": 0.09841503870254331, "grad_norm": 0.35236118052835386, "learning_rate": 2.952088452088452e-05, "loss": 0.4264, "step": 801 }, { "epoch": 0.09853790391940041, "grad_norm": 0.4889258996155437, "learning_rate": 2.9557739557739557e-05, "loss": 0.4032, "step": 802 }, { "epoch": 0.09866076913625753, "grad_norm": 0.415786742602552, "learning_rate": 2.9594594594594598e-05, "loss": 0.3303, "step": 803 }, { "epoch": 0.09878363435311463, "grad_norm": 0.37770298661882123, "learning_rate": 2.963144963144963e-05, "loss": 0.3761, "step": 804 }, { "epoch": 0.09890649956997175, "grad_norm": 0.4550452104512448, "learning_rate": 2.9668304668304668e-05, "loss": 0.344, "step": 805 }, { "epoch": 0.09902936478682885, "grad_norm": 0.4657801418789308, "learning_rate": 2.9705159705159705e-05, "loss": 0.447, "step": 806 }, { "epoch": 0.09915223000368596, "grad_norm": 0.4690063263019145, "learning_rate": 2.9742014742014742e-05, "loss": 0.4144, "step": 807 }, { "epoch": 0.09927509522054306, "grad_norm": 0.3429608663350365, "learning_rate": 2.977886977886978e-05, "loss": 0.3586, "step": 808 }, { "epoch": 0.09939796043740017, "grad_norm": 0.42241083366952437, "learning_rate": 2.9815724815724816e-05, "loss": 0.3943, "step": 809 }, { "epoch": 0.09952082565425728, "grad_norm": 0.4150218639707356, "learning_rate": 2.9852579852579853e-05, "loss": 0.3377, "step": 810 }, { "epoch": 0.09964369087111438, "grad_norm": 0.37644268373642376, "learning_rate": 2.988943488943489e-05, "loss": 0.3284, "step": 811 }, { "epoch": 0.0997665560879715, "grad_norm": 0.4399963421698676, "learning_rate": 2.9926289926289927e-05, "loss": 0.3769, "step": 812 }, { "epoch": 0.0998894213048286, "grad_norm": 0.46449813851024413, "learning_rate": 2.9963144963144964e-05, "loss": 0.4768, "step": 813 }, { "epoch": 0.10001228652168571, "grad_norm": 0.4079571464938467, "learning_rate": 3e-05, "loss": 0.3827, "step": 814 }, { "epoch": 0.10013515173854282, "grad_norm": 0.4133148963748298, "learning_rate": 2.999999862042364e-05, "loss": 0.3449, "step": 815 }, { "epoch": 0.10025801695539993, "grad_norm": 0.4321172538840111, "learning_rate": 2.999999448169481e-05, "loss": 0.4336, "step": 816 }, { "epoch": 0.10038088217225703, "grad_norm": 0.45094173536464044, "learning_rate": 2.9999987583814276e-05, "loss": 0.3964, "step": 817 }, { "epoch": 0.10050374738911415, "grad_norm": 0.4237343092050061, "learning_rate": 2.9999977926783303e-05, "loss": 0.4082, "step": 818 }, { "epoch": 0.10062661260597125, "grad_norm": 0.4335397955110797, "learning_rate": 2.999996551060367e-05, "loss": 0.3769, "step": 819 }, { "epoch": 0.10074947782282835, "grad_norm": 0.4453087642731216, "learning_rate": 2.999995033527766e-05, "loss": 0.375, "step": 820 }, { "epoch": 0.10087234303968547, "grad_norm": 0.40143451751265036, "learning_rate": 2.999993240080806e-05, "loss": 0.4314, "step": 821 }, { "epoch": 0.10099520825654257, "grad_norm": 0.42970727602485825, "learning_rate": 2.9999911707198176e-05, "loss": 0.443, "step": 822 }, { "epoch": 0.10111807347339968, "grad_norm": 0.4169127341145284, "learning_rate": 2.999988825445181e-05, "loss": 0.4225, "step": 823 }, { "epoch": 0.10124093869025678, "grad_norm": 0.4324422433369813, "learning_rate": 2.999986204257328e-05, "loss": 0.5048, "step": 824 }, { "epoch": 0.1013638039071139, "grad_norm": 0.37987677342878584, "learning_rate": 2.9999833071567397e-05, "loss": 0.4523, "step": 825 }, { "epoch": 0.101486669123971, "grad_norm": 0.453384148246915, "learning_rate": 2.9999801341439506e-05, "loss": 0.4058, "step": 826 }, { "epoch": 0.10160953434082812, "grad_norm": 0.412666582515205, "learning_rate": 2.999976685219543e-05, "loss": 0.4008, "step": 827 }, { "epoch": 0.10173239955768522, "grad_norm": 0.5174969058108763, "learning_rate": 2.9999729603841524e-05, "loss": 0.4771, "step": 828 }, { "epoch": 0.10185526477454233, "grad_norm": 0.4046488342122342, "learning_rate": 2.999968959638463e-05, "loss": 0.3748, "step": 829 }, { "epoch": 0.10197812999139944, "grad_norm": 0.4702751608924727, "learning_rate": 2.999964682983211e-05, "loss": 0.366, "step": 830 }, { "epoch": 0.10210099520825654, "grad_norm": 0.4349175645761593, "learning_rate": 2.9999601304191835e-05, "loss": 0.4175, "step": 831 }, { "epoch": 0.10222386042511365, "grad_norm": 0.7264076646285719, "learning_rate": 2.9999553019472177e-05, "loss": 0.5027, "step": 832 }, { "epoch": 0.10234672564197075, "grad_norm": 0.548656539111077, "learning_rate": 2.9999501975682015e-05, "loss": 0.3596, "step": 833 }, { "epoch": 0.10246959085882787, "grad_norm": 0.4904476950083573, "learning_rate": 2.9999448172830738e-05, "loss": 0.3974, "step": 834 }, { "epoch": 0.10259245607568497, "grad_norm": 0.40834316541842375, "learning_rate": 2.9999391610928247e-05, "loss": 0.4156, "step": 835 }, { "epoch": 0.10271532129254209, "grad_norm": 0.42081824600545037, "learning_rate": 2.999933228998494e-05, "loss": 0.3927, "step": 836 }, { "epoch": 0.10283818650939919, "grad_norm": 0.39763846380658385, "learning_rate": 2.9999270210011737e-05, "loss": 0.4202, "step": 837 }, { "epoch": 0.1029610517262563, "grad_norm": 0.4015380716938442, "learning_rate": 2.999920537102005e-05, "loss": 0.3876, "step": 838 }, { "epoch": 0.1030839169431134, "grad_norm": 0.4532573651713248, "learning_rate": 2.9999137773021807e-05, "loss": 0.3737, "step": 839 }, { "epoch": 0.1032067821599705, "grad_norm": 0.45091755982696363, "learning_rate": 2.9999067416029446e-05, "loss": 0.4431, "step": 840 }, { "epoch": 0.10332964737682762, "grad_norm": 0.32808541305407857, "learning_rate": 2.9998994300055905e-05, "loss": 0.4023, "step": 841 }, { "epoch": 0.10345251259368472, "grad_norm": 0.38568509428119685, "learning_rate": 2.9998918425114633e-05, "loss": 0.3486, "step": 842 }, { "epoch": 0.10357537781054184, "grad_norm": 0.46777363558088275, "learning_rate": 2.9998839791219593e-05, "loss": 0.441, "step": 843 }, { "epoch": 0.10369824302739894, "grad_norm": 0.376490860276276, "learning_rate": 2.999875839838524e-05, "loss": 0.3925, "step": 844 }, { "epoch": 0.10382110824425606, "grad_norm": 0.4038325123411972, "learning_rate": 2.999867424662655e-05, "loss": 0.3953, "step": 845 }, { "epoch": 0.10394397346111316, "grad_norm": 0.4480409911559731, "learning_rate": 2.9998587335959002e-05, "loss": 0.4301, "step": 846 }, { "epoch": 0.10406683867797027, "grad_norm": 0.41440476039624224, "learning_rate": 2.9998497666398586e-05, "loss": 0.4123, "step": 847 }, { "epoch": 0.10418970389482737, "grad_norm": 0.33615925790760415, "learning_rate": 2.999840523796179e-05, "loss": 0.3312, "step": 848 }, { "epoch": 0.10431256911168449, "grad_norm": 0.3707914896011657, "learning_rate": 2.9998310050665622e-05, "loss": 0.4317, "step": 849 }, { "epoch": 0.10443543432854159, "grad_norm": 0.3584382449776852, "learning_rate": 2.9998212104527582e-05, "loss": 0.3386, "step": 850 }, { "epoch": 0.10455829954539869, "grad_norm": 0.4181572784674837, "learning_rate": 2.9998111399565696e-05, "loss": 0.4275, "step": 851 }, { "epoch": 0.10468116476225581, "grad_norm": 0.42836599109544554, "learning_rate": 2.9998007935798486e-05, "loss": 0.3282, "step": 852 }, { "epoch": 0.10480402997911291, "grad_norm": 0.445719307232722, "learning_rate": 2.999790171324498e-05, "loss": 0.3637, "step": 853 }, { "epoch": 0.10492689519597002, "grad_norm": 0.3777051030826283, "learning_rate": 2.9997792731924718e-05, "loss": 0.4389, "step": 854 }, { "epoch": 0.10504976041282713, "grad_norm": 0.40879545039984744, "learning_rate": 2.9997680991857744e-05, "loss": 0.3294, "step": 855 }, { "epoch": 0.10517262562968424, "grad_norm": 0.4519993133034799, "learning_rate": 2.999756649306462e-05, "loss": 0.4026, "step": 856 }, { "epoch": 0.10529549084654134, "grad_norm": 0.3888812990062456, "learning_rate": 2.99974492355664e-05, "loss": 0.4234, "step": 857 }, { "epoch": 0.10541835606339846, "grad_norm": 0.406284978985191, "learning_rate": 2.9997329219384655e-05, "loss": 0.3444, "step": 858 }, { "epoch": 0.10554122128025556, "grad_norm": 0.412988330771367, "learning_rate": 2.999720644454146e-05, "loss": 0.4014, "step": 859 }, { "epoch": 0.10566408649711266, "grad_norm": 0.38083936930922624, "learning_rate": 2.9997080911059402e-05, "loss": 0.4065, "step": 860 }, { "epoch": 0.10578695171396978, "grad_norm": 0.35833018112368453, "learning_rate": 2.9996952618961567e-05, "loss": 0.3551, "step": 861 }, { "epoch": 0.10590981693082688, "grad_norm": 0.3500246921979098, "learning_rate": 2.9996821568271563e-05, "loss": 0.446, "step": 862 }, { "epoch": 0.106032682147684, "grad_norm": 0.40260795741956745, "learning_rate": 2.9996687759013483e-05, "loss": 0.4131, "step": 863 }, { "epoch": 0.1061555473645411, "grad_norm": 0.45018088897710573, "learning_rate": 2.9996551191211948e-05, "loss": 0.496, "step": 864 }, { "epoch": 0.10627841258139821, "grad_norm": 0.4002616109737674, "learning_rate": 2.9996411864892078e-05, "loss": 0.4745, "step": 865 }, { "epoch": 0.10640127779825531, "grad_norm": 0.4566595377852294, "learning_rate": 2.9996269780079497e-05, "loss": 0.4338, "step": 866 }, { "epoch": 0.10652414301511243, "grad_norm": 0.7686252072238472, "learning_rate": 2.999612493680035e-05, "loss": 0.4355, "step": 867 }, { "epoch": 0.10664700823196953, "grad_norm": 0.4240326736104535, "learning_rate": 2.9995977335081273e-05, "loss": 0.4492, "step": 868 }, { "epoch": 0.10676987344882664, "grad_norm": 0.40066208553930716, "learning_rate": 2.9995826974949413e-05, "loss": 0.3645, "step": 869 }, { "epoch": 0.10689273866568375, "grad_norm": 0.5184448655639319, "learning_rate": 2.9995673856432436e-05, "loss": 0.5267, "step": 870 }, { "epoch": 0.10701560388254085, "grad_norm": 0.4756735733479086, "learning_rate": 2.9995517979558503e-05, "loss": 0.4048, "step": 871 }, { "epoch": 0.10713846909939796, "grad_norm": 0.39681665608697453, "learning_rate": 2.9995359344356287e-05, "loss": 0.3732, "step": 872 }, { "epoch": 0.10726133431625506, "grad_norm": 0.4371042789072854, "learning_rate": 2.999519795085497e-05, "loss": 0.4006, "step": 873 }, { "epoch": 0.10738419953311218, "grad_norm": 0.4486680433272071, "learning_rate": 2.9995033799084232e-05, "loss": 0.4451, "step": 874 }, { "epoch": 0.10750706474996928, "grad_norm": 0.3658204519990498, "learning_rate": 2.999486688907428e-05, "loss": 0.4181, "step": 875 }, { "epoch": 0.1076299299668264, "grad_norm": 0.3785309539251781, "learning_rate": 2.9994697220855805e-05, "loss": 0.3546, "step": 876 }, { "epoch": 0.1077527951836835, "grad_norm": 0.4017010225883238, "learning_rate": 2.9994524794460016e-05, "loss": 0.435, "step": 877 }, { "epoch": 0.10787566040054061, "grad_norm": 0.44089111752671034, "learning_rate": 2.9994349609918643e-05, "loss": 0.3536, "step": 878 }, { "epoch": 0.10799852561739771, "grad_norm": 0.4917472045075793, "learning_rate": 2.999417166726389e-05, "loss": 0.4378, "step": 879 }, { "epoch": 0.10812139083425482, "grad_norm": 0.3879087680578922, "learning_rate": 2.999399096652851e-05, "loss": 0.3823, "step": 880 }, { "epoch": 0.10824425605111193, "grad_norm": 0.41126787569472445, "learning_rate": 2.9993807507745725e-05, "loss": 0.3918, "step": 881 }, { "epoch": 0.10836712126796903, "grad_norm": 0.411827003660391, "learning_rate": 2.999362129094929e-05, "loss": 0.4181, "step": 882 }, { "epoch": 0.10848998648482615, "grad_norm": 0.48952481722096997, "learning_rate": 2.9993432316173456e-05, "loss": 0.4961, "step": 883 }, { "epoch": 0.10861285170168325, "grad_norm": 0.4124418159727177, "learning_rate": 2.999324058345298e-05, "loss": 0.4162, "step": 884 }, { "epoch": 0.10873571691854036, "grad_norm": 0.49662450774129674, "learning_rate": 2.9993046092823137e-05, "loss": 0.4925, "step": 885 }, { "epoch": 0.10885858213539747, "grad_norm": 0.4571369493470264, "learning_rate": 2.9992848844319697e-05, "loss": 0.3905, "step": 886 }, { "epoch": 0.10898144735225458, "grad_norm": 0.47140164557700476, "learning_rate": 2.9992648837978944e-05, "loss": 0.4343, "step": 887 }, { "epoch": 0.10910431256911168, "grad_norm": 0.4823573858087624, "learning_rate": 2.9992446073837665e-05, "loss": 0.4053, "step": 888 }, { "epoch": 0.1092271777859688, "grad_norm": 0.3813317155110179, "learning_rate": 2.9992240551933163e-05, "loss": 0.37, "step": 889 }, { "epoch": 0.1093500430028259, "grad_norm": 0.3844831374423649, "learning_rate": 2.9992032272303238e-05, "loss": 0.4322, "step": 890 }, { "epoch": 0.109472908219683, "grad_norm": 0.4244542005752022, "learning_rate": 2.9991821234986205e-05, "loss": 0.4216, "step": 891 }, { "epoch": 0.10959577343654012, "grad_norm": 0.38813249860109084, "learning_rate": 2.9991607440020885e-05, "loss": 0.4442, "step": 892 }, { "epoch": 0.10971863865339722, "grad_norm": 0.43172986469842795, "learning_rate": 2.999139088744659e-05, "loss": 0.4389, "step": 893 }, { "epoch": 0.10984150387025433, "grad_norm": 0.5468652752580008, "learning_rate": 2.9991171577303175e-05, "loss": 0.4346, "step": 894 }, { "epoch": 0.10996436908711144, "grad_norm": 0.5083756427898927, "learning_rate": 2.9990949509630964e-05, "loss": 0.4037, "step": 895 }, { "epoch": 0.11008723430396855, "grad_norm": 0.47774619946610314, "learning_rate": 2.9990724684470814e-05, "loss": 0.4741, "step": 896 }, { "epoch": 0.11021009952082565, "grad_norm": 0.494657650155271, "learning_rate": 2.999049710186407e-05, "loss": 0.4534, "step": 897 }, { "epoch": 0.11033296473768277, "grad_norm": 0.4196270156258804, "learning_rate": 2.9990266761852607e-05, "loss": 0.395, "step": 898 }, { "epoch": 0.11045582995453987, "grad_norm": 0.4076766075878796, "learning_rate": 2.9990033664478786e-05, "loss": 0.3086, "step": 899 }, { "epoch": 0.11057869517139697, "grad_norm": 0.42021387833579765, "learning_rate": 2.9989797809785484e-05, "loss": 0.416, "step": 900 }, { "epoch": 0.11070156038825409, "grad_norm": 0.45723442654734675, "learning_rate": 2.998955919781609e-05, "loss": 0.4398, "step": 901 }, { "epoch": 0.11082442560511119, "grad_norm": 0.5032033228261324, "learning_rate": 2.998931782861449e-05, "loss": 0.4481, "step": 902 }, { "epoch": 0.1109472908219683, "grad_norm": 0.4326396194232661, "learning_rate": 2.998907370222509e-05, "loss": 0.4197, "step": 903 }, { "epoch": 0.1110701560388254, "grad_norm": 0.5433395835959789, "learning_rate": 2.9988826818692784e-05, "loss": 0.5015, "step": 904 }, { "epoch": 0.11119302125568252, "grad_norm": 0.4010174410354839, "learning_rate": 2.998857717806299e-05, "loss": 0.3608, "step": 905 }, { "epoch": 0.11131588647253962, "grad_norm": 0.4481029220234808, "learning_rate": 2.9988324780381633e-05, "loss": 0.3735, "step": 906 }, { "epoch": 0.11143875168939674, "grad_norm": 0.46659921537845395, "learning_rate": 2.9988069625695134e-05, "loss": 0.4074, "step": 907 }, { "epoch": 0.11156161690625384, "grad_norm": 0.4414622488686511, "learning_rate": 2.998781171405043e-05, "loss": 0.4002, "step": 908 }, { "epoch": 0.11168448212311095, "grad_norm": 0.3818685166681223, "learning_rate": 2.9987551045494956e-05, "loss": 0.3718, "step": 909 }, { "epoch": 0.11180734733996806, "grad_norm": 0.41570648515057695, "learning_rate": 2.998728762007667e-05, "loss": 0.3622, "step": 910 }, { "epoch": 0.11193021255682516, "grad_norm": 0.44806846390262756, "learning_rate": 2.998702143784402e-05, "loss": 0.4235, "step": 911 }, { "epoch": 0.11205307777368227, "grad_norm": 0.40709552486294465, "learning_rate": 2.998675249884597e-05, "loss": 0.4264, "step": 912 }, { "epoch": 0.11217594299053937, "grad_norm": 0.39922472223929295, "learning_rate": 2.998648080313199e-05, "loss": 0.3848, "step": 913 }, { "epoch": 0.11229880820739649, "grad_norm": 0.41117693469214533, "learning_rate": 2.9986206350752058e-05, "loss": 0.4108, "step": 914 }, { "epoch": 0.11242167342425359, "grad_norm": 0.4139782284980534, "learning_rate": 2.9985929141756655e-05, "loss": 0.3902, "step": 915 }, { "epoch": 0.1125445386411107, "grad_norm": 0.4760576426712186, "learning_rate": 2.998564917619678e-05, "loss": 0.4021, "step": 916 }, { "epoch": 0.11266740385796781, "grad_norm": 0.4599403257120455, "learning_rate": 2.9985366454123914e-05, "loss": 0.409, "step": 917 }, { "epoch": 0.11279026907482492, "grad_norm": 0.44579829539081234, "learning_rate": 2.9985080975590083e-05, "loss": 0.3619, "step": 918 }, { "epoch": 0.11291313429168202, "grad_norm": 0.405961776467687, "learning_rate": 2.998479274064778e-05, "loss": 0.3718, "step": 919 }, { "epoch": 0.11303599950853914, "grad_norm": 0.38537917989366544, "learning_rate": 2.9984501749350038e-05, "loss": 0.3538, "step": 920 }, { "epoch": 0.11315886472539624, "grad_norm": 0.36848479352094476, "learning_rate": 2.9984208001750372e-05, "loss": 0.4237, "step": 921 }, { "epoch": 0.11328172994225334, "grad_norm": 0.47252431205527284, "learning_rate": 2.9983911497902822e-05, "loss": 0.4337, "step": 922 }, { "epoch": 0.11340459515911046, "grad_norm": 0.41827998416958945, "learning_rate": 2.9983612237861927e-05, "loss": 0.4203, "step": 923 }, { "epoch": 0.11352746037596756, "grad_norm": 0.38954908956716067, "learning_rate": 2.998331022168273e-05, "loss": 0.4086, "step": 924 }, { "epoch": 0.11365032559282467, "grad_norm": 0.4038156280073093, "learning_rate": 2.9983005449420792e-05, "loss": 0.4354, "step": 925 }, { "epoch": 0.11377319080968178, "grad_norm": 0.5172162296385219, "learning_rate": 2.998269792113217e-05, "loss": 0.4795, "step": 926 }, { "epoch": 0.11389605602653889, "grad_norm": 0.4442449677381021, "learning_rate": 2.9982387636873428e-05, "loss": 0.3914, "step": 927 }, { "epoch": 0.114018921243396, "grad_norm": 0.39684272425457895, "learning_rate": 2.9982074596701644e-05, "loss": 0.4272, "step": 928 }, { "epoch": 0.11414178646025311, "grad_norm": 0.3207615151692819, "learning_rate": 2.9981758800674404e-05, "loss": 0.3788, "step": 929 }, { "epoch": 0.11426465167711021, "grad_norm": 0.506634933221788, "learning_rate": 2.9981440248849793e-05, "loss": 0.467, "step": 930 }, { "epoch": 0.11438751689396731, "grad_norm": 0.3639265012794569, "learning_rate": 2.9981118941286402e-05, "loss": 0.3687, "step": 931 }, { "epoch": 0.11451038211082443, "grad_norm": 0.49837727304437335, "learning_rate": 2.9980794878043338e-05, "loss": 0.4422, "step": 932 }, { "epoch": 0.11463324732768153, "grad_norm": 0.4863216653536042, "learning_rate": 2.9980468059180215e-05, "loss": 0.4767, "step": 933 }, { "epoch": 0.11475611254453864, "grad_norm": 0.37326713421984964, "learning_rate": 2.9980138484757137e-05, "loss": 0.3667, "step": 934 }, { "epoch": 0.11487897776139575, "grad_norm": 0.43999671217481207, "learning_rate": 2.9979806154834743e-05, "loss": 0.4062, "step": 935 }, { "epoch": 0.11500184297825286, "grad_norm": 0.36228592672689625, "learning_rate": 2.9979471069474148e-05, "loss": 0.3399, "step": 936 }, { "epoch": 0.11512470819510996, "grad_norm": 0.4995329651039893, "learning_rate": 2.9979133228736998e-05, "loss": 0.4092, "step": 937 }, { "epoch": 0.11524757341196708, "grad_norm": 0.3796370949718274, "learning_rate": 2.997879263268543e-05, "loss": 0.3337, "step": 938 }, { "epoch": 0.11537043862882418, "grad_norm": 0.37714675085066607, "learning_rate": 2.99784492813821e-05, "loss": 0.3612, "step": 939 }, { "epoch": 0.1154933038456813, "grad_norm": 0.48687116569151645, "learning_rate": 2.9978103174890167e-05, "loss": 0.4668, "step": 940 }, { "epoch": 0.1156161690625384, "grad_norm": 0.45673866129896123, "learning_rate": 2.9977754313273286e-05, "loss": 0.3685, "step": 941 }, { "epoch": 0.1157390342793955, "grad_norm": 0.42091058434234907, "learning_rate": 2.9977402696595638e-05, "loss": 0.3819, "step": 942 }, { "epoch": 0.11586189949625261, "grad_norm": 0.44943928750099504, "learning_rate": 2.9977048324921895e-05, "loss": 0.4574, "step": 943 }, { "epoch": 0.11598476471310971, "grad_norm": 0.5860942805121306, "learning_rate": 2.997669119831724e-05, "loss": 0.4688, "step": 944 }, { "epoch": 0.11610762992996683, "grad_norm": 0.3962351322336526, "learning_rate": 2.9976331316847366e-05, "loss": 0.3696, "step": 945 }, { "epoch": 0.11623049514682393, "grad_norm": 0.39396321449639987, "learning_rate": 2.9975968680578472e-05, "loss": 0.3751, "step": 946 }, { "epoch": 0.11635336036368105, "grad_norm": 0.41172403502665156, "learning_rate": 2.997560328957726e-05, "loss": 0.5371, "step": 947 }, { "epoch": 0.11647622558053815, "grad_norm": 0.40723253010741817, "learning_rate": 2.9975235143910945e-05, "loss": 0.3725, "step": 948 }, { "epoch": 0.11659909079739526, "grad_norm": 0.35512153677389974, "learning_rate": 2.9974864243647243e-05, "loss": 0.3834, "step": 949 }, { "epoch": 0.11672195601425236, "grad_norm": 0.48770151267259654, "learning_rate": 2.997449058885438e-05, "loss": 0.4261, "step": 950 }, { "epoch": 0.11684482123110947, "grad_norm": 0.45258262326205334, "learning_rate": 2.9974114179601085e-05, "loss": 0.4843, "step": 951 }, { "epoch": 0.11696768644796658, "grad_norm": 0.400379488712034, "learning_rate": 2.9973735015956596e-05, "loss": 0.3507, "step": 952 }, { "epoch": 0.11709055166482368, "grad_norm": 0.35006346804668936, "learning_rate": 2.9973353097990665e-05, "loss": 0.4209, "step": 953 }, { "epoch": 0.1172134168816808, "grad_norm": 0.3992321414046776, "learning_rate": 2.997296842577353e-05, "loss": 0.3857, "step": 954 }, { "epoch": 0.1173362820985379, "grad_norm": 0.4067982961624198, "learning_rate": 2.9972580999375957e-05, "loss": 0.5289, "step": 955 }, { "epoch": 0.11745914731539502, "grad_norm": 0.3817956892966253, "learning_rate": 2.997219081886921e-05, "loss": 0.4042, "step": 956 }, { "epoch": 0.11758201253225212, "grad_norm": 0.35087747669845976, "learning_rate": 2.9971797884325062e-05, "loss": 0.3888, "step": 957 }, { "epoch": 0.11770487774910923, "grad_norm": 0.38171557751972984, "learning_rate": 2.997140219581579e-05, "loss": 0.4113, "step": 958 }, { "epoch": 0.11782774296596633, "grad_norm": 0.46801115585737774, "learning_rate": 2.9971003753414173e-05, "loss": 0.461, "step": 959 }, { "epoch": 0.11795060818282345, "grad_norm": 0.46249552469451594, "learning_rate": 2.997060255719351e-05, "loss": 0.4529, "step": 960 }, { "epoch": 0.11807347339968055, "grad_norm": 0.4091821267288011, "learning_rate": 2.997019860722759e-05, "loss": 0.526, "step": 961 }, { "epoch": 0.11819633861653765, "grad_norm": 0.46127316662267875, "learning_rate": 2.9969791903590727e-05, "loss": 0.4845, "step": 962 }, { "epoch": 0.11831920383339477, "grad_norm": 0.40698539052683297, "learning_rate": 2.996938244635772e-05, "loss": 0.4028, "step": 963 }, { "epoch": 0.11844206905025187, "grad_norm": 0.41170428093532624, "learning_rate": 2.9968970235603897e-05, "loss": 0.4158, "step": 964 }, { "epoch": 0.11856493426710898, "grad_norm": 0.34122479442718967, "learning_rate": 2.996855527140507e-05, "loss": 0.454, "step": 965 }, { "epoch": 0.11868779948396609, "grad_norm": 0.3650779850447386, "learning_rate": 2.9968137553837578e-05, "loss": 0.3556, "step": 966 }, { "epoch": 0.1188106647008232, "grad_norm": 0.37916205818156656, "learning_rate": 2.996771708297826e-05, "loss": 0.3997, "step": 967 }, { "epoch": 0.1189335299176803, "grad_norm": 0.424585474606302, "learning_rate": 2.9967293858904447e-05, "loss": 0.4443, "step": 968 }, { "epoch": 0.11905639513453742, "grad_norm": 0.3554451221844901, "learning_rate": 2.9966867881693995e-05, "loss": 0.4282, "step": 969 }, { "epoch": 0.11917926035139452, "grad_norm": 0.36737925209098093, "learning_rate": 2.996643915142526e-05, "loss": 0.3706, "step": 970 }, { "epoch": 0.11930212556825162, "grad_norm": 0.39020314896007285, "learning_rate": 2.9966007668177112e-05, "loss": 0.4173, "step": 971 }, { "epoch": 0.11942499078510874, "grad_norm": 0.33197301198422347, "learning_rate": 2.9965573432028907e-05, "loss": 0.4876, "step": 972 }, { "epoch": 0.11954785600196584, "grad_norm": 0.4325920730037543, "learning_rate": 2.9965136443060523e-05, "loss": 0.4347, "step": 973 }, { "epoch": 0.11967072121882295, "grad_norm": 0.7195833691550371, "learning_rate": 2.9964696701352337e-05, "loss": 0.5101, "step": 974 }, { "epoch": 0.11979358643568006, "grad_norm": 0.4650867936747841, "learning_rate": 2.9964254206985248e-05, "loss": 0.4037, "step": 975 }, { "epoch": 0.11991645165253717, "grad_norm": 0.3451336846299479, "learning_rate": 2.9963808960040645e-05, "loss": 0.3768, "step": 976 }, { "epoch": 0.12003931686939427, "grad_norm": 0.43904035563009136, "learning_rate": 2.9963360960600427e-05, "loss": 0.3897, "step": 977 }, { "epoch": 0.12016218208625139, "grad_norm": 0.3549664050005311, "learning_rate": 2.9962910208747e-05, "loss": 0.3324, "step": 978 }, { "epoch": 0.12028504730310849, "grad_norm": 0.5104260139448664, "learning_rate": 2.996245670456328e-05, "loss": 0.4328, "step": 979 }, { "epoch": 0.1204079125199656, "grad_norm": 0.423441167810477, "learning_rate": 2.996200044813268e-05, "loss": 0.3654, "step": 980 }, { "epoch": 0.1205307777368227, "grad_norm": 0.3762775283273009, "learning_rate": 2.996154143953913e-05, "loss": 0.3219, "step": 981 }, { "epoch": 0.12065364295367981, "grad_norm": 0.3449858204323068, "learning_rate": 2.9961079678867064e-05, "loss": 0.4098, "step": 982 }, { "epoch": 0.12077650817053692, "grad_norm": 0.331013109238878, "learning_rate": 2.9960615166201417e-05, "loss": 0.3533, "step": 983 }, { "epoch": 0.12089937338739402, "grad_norm": 0.4181374748443255, "learning_rate": 2.996014790162763e-05, "loss": 0.3266, "step": 984 }, { "epoch": 0.12102223860425114, "grad_norm": 0.41535517686462303, "learning_rate": 2.995967788523166e-05, "loss": 0.4563, "step": 985 }, { "epoch": 0.12114510382110824, "grad_norm": 0.4313358709987006, "learning_rate": 2.995920511709996e-05, "loss": 0.3905, "step": 986 }, { "epoch": 0.12126796903796536, "grad_norm": 0.5326400811565671, "learning_rate": 2.995872959731949e-05, "loss": 0.4209, "step": 987 }, { "epoch": 0.12139083425482246, "grad_norm": 0.3935625800739947, "learning_rate": 2.9958251325977726e-05, "loss": 0.3729, "step": 988 }, { "epoch": 0.12151369947167957, "grad_norm": 0.4316356282112627, "learning_rate": 2.9957770303162634e-05, "loss": 0.4084, "step": 989 }, { "epoch": 0.12163656468853667, "grad_norm": 0.3232215893949876, "learning_rate": 2.99572865289627e-05, "loss": 0.3533, "step": 990 }, { "epoch": 0.12175942990539378, "grad_norm": 0.39884783046888744, "learning_rate": 2.995680000346691e-05, "loss": 0.366, "step": 991 }, { "epoch": 0.12188229512225089, "grad_norm": 0.41333549475302206, "learning_rate": 2.995631072676476e-05, "loss": 0.4187, "step": 992 }, { "epoch": 0.122005160339108, "grad_norm": 0.3854028087849847, "learning_rate": 2.9955818698946243e-05, "loss": 0.4442, "step": 993 }, { "epoch": 0.12212802555596511, "grad_norm": 0.4402202566477024, "learning_rate": 2.9955323920101872e-05, "loss": 0.3645, "step": 994 }, { "epoch": 0.12225089077282221, "grad_norm": 0.39196500951302443, "learning_rate": 2.9954826390322653e-05, "loss": 0.4007, "step": 995 }, { "epoch": 0.12237375598967933, "grad_norm": 0.39616322405645166, "learning_rate": 2.9954326109700104e-05, "loss": 0.3603, "step": 996 }, { "epoch": 0.12249662120653643, "grad_norm": 0.3596016553987534, "learning_rate": 2.9953823078326252e-05, "loss": 0.3031, "step": 997 }, { "epoch": 0.12261948642339354, "grad_norm": 0.35007286297875334, "learning_rate": 2.995331729629362e-05, "loss": 0.4336, "step": 998 }, { "epoch": 0.12274235164025064, "grad_norm": 0.4666247315546114, "learning_rate": 2.9952808763695247e-05, "loss": 0.4202, "step": 999 }, { "epoch": 0.12286521685710776, "grad_norm": 0.3501111004354163, "learning_rate": 2.9952297480624678e-05, "loss": 0.3957, "step": 1000 }, { "epoch": 0.12298808207396486, "grad_norm": 0.34816875838016687, "learning_rate": 2.9951783447175954e-05, "loss": 0.3635, "step": 1001 }, { "epoch": 0.12311094729082196, "grad_norm": 0.4901086445199873, "learning_rate": 2.9951266663443634e-05, "loss": 0.4428, "step": 1002 }, { "epoch": 0.12323381250767908, "grad_norm": 0.4269981038874794, "learning_rate": 2.995074712952277e-05, "loss": 0.3893, "step": 1003 }, { "epoch": 0.12335667772453618, "grad_norm": 0.504342771786635, "learning_rate": 2.995022484550893e-05, "loss": 0.4239, "step": 1004 }, { "epoch": 0.1234795429413933, "grad_norm": 0.41404520860386296, "learning_rate": 2.994969981149819e-05, "loss": 0.4464, "step": 1005 }, { "epoch": 0.1236024081582504, "grad_norm": 0.3628301534763645, "learning_rate": 2.9949172027587116e-05, "loss": 0.3777, "step": 1006 }, { "epoch": 0.12372527337510751, "grad_norm": 0.39164498656916097, "learning_rate": 2.99486414938728e-05, "loss": 0.4795, "step": 1007 }, { "epoch": 0.12384813859196461, "grad_norm": 0.4715137931923768, "learning_rate": 2.9948108210452824e-05, "loss": 0.3795, "step": 1008 }, { "epoch": 0.12397100380882173, "grad_norm": 0.4118016119826611, "learning_rate": 2.9947572177425285e-05, "loss": 0.342, "step": 1009 }, { "epoch": 0.12409386902567883, "grad_norm": 0.3492642665230072, "learning_rate": 2.9947033394888786e-05, "loss": 0.4455, "step": 1010 }, { "epoch": 0.12421673424253594, "grad_norm": 0.4004093289095699, "learning_rate": 2.9946491862942426e-05, "loss": 0.3923, "step": 1011 }, { "epoch": 0.12433959945939305, "grad_norm": 0.36317966654436035, "learning_rate": 2.994594758168582e-05, "loss": 0.3695, "step": 1012 }, { "epoch": 0.12446246467625015, "grad_norm": 0.6297442253988312, "learning_rate": 2.9945400551219084e-05, "loss": 0.4673, "step": 1013 }, { "epoch": 0.12458532989310726, "grad_norm": 0.4053447116510649, "learning_rate": 2.9944850771642843e-05, "loss": 0.4049, "step": 1014 }, { "epoch": 0.12470819510996436, "grad_norm": 0.3952233677525275, "learning_rate": 2.9944298243058217e-05, "loss": 0.4745, "step": 1015 }, { "epoch": 0.12483106032682148, "grad_norm": 0.4567164672094169, "learning_rate": 2.9943742965566854e-05, "loss": 0.4337, "step": 1016 }, { "epoch": 0.12495392554367858, "grad_norm": 0.4778579967969269, "learning_rate": 2.9943184939270882e-05, "loss": 0.4328, "step": 1017 }, { "epoch": 0.12507679076053568, "grad_norm": 0.38922341221781664, "learning_rate": 2.994262416427295e-05, "loss": 0.4631, "step": 1018 }, { "epoch": 0.1251996559773928, "grad_norm": 0.3988417370382264, "learning_rate": 2.994206064067621e-05, "loss": 0.4135, "step": 1019 }, { "epoch": 0.12532252119424991, "grad_norm": 0.42898778583192254, "learning_rate": 2.994149436858432e-05, "loss": 0.4484, "step": 1020 }, { "epoch": 0.12544538641110703, "grad_norm": 0.4626628002384725, "learning_rate": 2.994092534810144e-05, "loss": 0.3801, "step": 1021 }, { "epoch": 0.12556825162796412, "grad_norm": 0.4192443085678549, "learning_rate": 2.9940353579332233e-05, "loss": 0.3988, "step": 1022 }, { "epoch": 0.12569111684482123, "grad_norm": 0.4528299467160234, "learning_rate": 2.9939779062381876e-05, "loss": 0.431, "step": 1023 }, { "epoch": 0.12581398206167835, "grad_norm": 0.4309536014137613, "learning_rate": 2.9939201797356053e-05, "loss": 0.4007, "step": 1024 }, { "epoch": 0.12593684727853544, "grad_norm": 0.3836018804744661, "learning_rate": 2.993862178436094e-05, "loss": 0.3792, "step": 1025 }, { "epoch": 0.12605971249539255, "grad_norm": 0.38406990716010875, "learning_rate": 2.9938039023503233e-05, "loss": 0.4291, "step": 1026 }, { "epoch": 0.12618257771224967, "grad_norm": 0.43996939015337916, "learning_rate": 2.9937453514890123e-05, "loss": 0.472, "step": 1027 }, { "epoch": 0.12630544292910678, "grad_norm": 0.3509925009234385, "learning_rate": 2.9936865258629312e-05, "loss": 0.3546, "step": 1028 }, { "epoch": 0.12642830814596387, "grad_norm": 0.369484443607374, "learning_rate": 2.9936274254829007e-05, "loss": 0.3575, "step": 1029 }, { "epoch": 0.12655117336282098, "grad_norm": 0.47265067326831267, "learning_rate": 2.9935680503597917e-05, "loss": 0.3912, "step": 1030 }, { "epoch": 0.1266740385796781, "grad_norm": 0.4974121959433691, "learning_rate": 2.993508400504526e-05, "loss": 0.426, "step": 1031 }, { "epoch": 0.1267969037965352, "grad_norm": 0.37626082167786506, "learning_rate": 2.9934484759280756e-05, "loss": 0.4341, "step": 1032 }, { "epoch": 0.1269197690133923, "grad_norm": 0.3819609590041559, "learning_rate": 2.9933882766414634e-05, "loss": 0.3452, "step": 1033 }, { "epoch": 0.12704263423024942, "grad_norm": 0.48779462060790574, "learning_rate": 2.9933278026557627e-05, "loss": 0.5096, "step": 1034 }, { "epoch": 0.12716549944710653, "grad_norm": 0.3948736416011947, "learning_rate": 2.9932670539820975e-05, "loss": 0.3566, "step": 1035 }, { "epoch": 0.12728836466396362, "grad_norm": 0.46937069703772516, "learning_rate": 2.9932060306316416e-05, "loss": 0.447, "step": 1036 }, { "epoch": 0.12741122988082074, "grad_norm": 0.3881240684909998, "learning_rate": 2.9931447326156204e-05, "loss": 0.3414, "step": 1037 }, { "epoch": 0.12753409509767785, "grad_norm": 0.3968867077059252, "learning_rate": 2.9930831599453087e-05, "loss": 0.4355, "step": 1038 }, { "epoch": 0.12765696031453497, "grad_norm": 0.3601182120747664, "learning_rate": 2.9930213126320333e-05, "loss": 0.3777, "step": 1039 }, { "epoch": 0.12777982553139206, "grad_norm": 0.4078795950102352, "learning_rate": 2.9929591906871696e-05, "loss": 0.4573, "step": 1040 }, { "epoch": 0.12790269074824917, "grad_norm": 0.48136415368930974, "learning_rate": 2.992896794122145e-05, "loss": 0.4418, "step": 1041 }, { "epoch": 0.12802555596510629, "grad_norm": 0.37688086174605767, "learning_rate": 2.992834122948437e-05, "loss": 0.4618, "step": 1042 }, { "epoch": 0.12814842118196337, "grad_norm": 0.3956634833139999, "learning_rate": 2.9927711771775735e-05, "loss": 0.4217, "step": 1043 }, { "epoch": 0.1282712863988205, "grad_norm": 0.41381994152645724, "learning_rate": 2.9927079568211327e-05, "loss": 0.4084, "step": 1044 }, { "epoch": 0.1283941516156776, "grad_norm": 0.317086970635743, "learning_rate": 2.9926444618907438e-05, "loss": 0.359, "step": 1045 }, { "epoch": 0.12851701683253472, "grad_norm": 0.4472744569609632, "learning_rate": 2.9925806923980863e-05, "loss": 0.471, "step": 1046 }, { "epoch": 0.1286398820493918, "grad_norm": 0.4217386470202943, "learning_rate": 2.9925166483548903e-05, "loss": 0.4202, "step": 1047 }, { "epoch": 0.12876274726624892, "grad_norm": 0.30705407029834486, "learning_rate": 2.9924523297729358e-05, "loss": 0.3665, "step": 1048 }, { "epoch": 0.12888561248310604, "grad_norm": 0.3849939967317385, "learning_rate": 2.9923877366640543e-05, "loss": 0.3955, "step": 1049 }, { "epoch": 0.12900847769996315, "grad_norm": 0.455357348854226, "learning_rate": 2.9923228690401273e-05, "loss": 0.461, "step": 1050 }, { "epoch": 0.12913134291682024, "grad_norm": 0.43170795912661, "learning_rate": 2.9922577269130866e-05, "loss": 0.4585, "step": 1051 }, { "epoch": 0.12925420813367736, "grad_norm": 0.42809946038507557, "learning_rate": 2.9921923102949142e-05, "loss": 0.4098, "step": 1052 }, { "epoch": 0.12937707335053447, "grad_norm": 0.5011372314906338, "learning_rate": 2.992126619197644e-05, "loss": 0.4514, "step": 1053 }, { "epoch": 0.12949993856739156, "grad_norm": 0.3509995514340457, "learning_rate": 2.9920606536333587e-05, "loss": 0.3827, "step": 1054 }, { "epoch": 0.12962280378424867, "grad_norm": 0.36948051998458414, "learning_rate": 2.991994413614193e-05, "loss": 0.4235, "step": 1055 }, { "epoch": 0.1297456690011058, "grad_norm": 0.3828008981179216, "learning_rate": 2.9919278991523307e-05, "loss": 0.4434, "step": 1056 }, { "epoch": 0.1298685342179629, "grad_norm": 0.3802354324073408, "learning_rate": 2.9918611102600066e-05, "loss": 0.3331, "step": 1057 }, { "epoch": 0.12999139943482, "grad_norm": 0.3788149923379212, "learning_rate": 2.9917940469495065e-05, "loss": 0.4322, "step": 1058 }, { "epoch": 0.1301142646516771, "grad_norm": 0.3766681241794117, "learning_rate": 2.9917267092331655e-05, "loss": 0.3361, "step": 1059 }, { "epoch": 0.13023712986853422, "grad_norm": 0.3337701410311746, "learning_rate": 2.9916590971233714e-05, "loss": 0.3869, "step": 1060 }, { "epoch": 0.13035999508539134, "grad_norm": 0.4130702711886172, "learning_rate": 2.99159121063256e-05, "loss": 0.438, "step": 1061 }, { "epoch": 0.13048286030224843, "grad_norm": 0.3957995792373614, "learning_rate": 2.991523049773218e-05, "loss": 0.3673, "step": 1062 }, { "epoch": 0.13060572551910554, "grad_norm": 0.4104493510227939, "learning_rate": 2.9914546145578843e-05, "loss": 0.4541, "step": 1063 }, { "epoch": 0.13072859073596266, "grad_norm": 0.49961412282091683, "learning_rate": 2.9913859049991464e-05, "loss": 0.4414, "step": 1064 }, { "epoch": 0.13085145595281975, "grad_norm": 0.38608064212044263, "learning_rate": 2.991316921109644e-05, "loss": 0.4266, "step": 1065 }, { "epoch": 0.13097432116967686, "grad_norm": 0.34815939969238274, "learning_rate": 2.991247662902065e-05, "loss": 0.407, "step": 1066 }, { "epoch": 0.13109718638653398, "grad_norm": 0.414537190230124, "learning_rate": 2.9911781303891493e-05, "loss": 0.396, "step": 1067 }, { "epoch": 0.1312200516033911, "grad_norm": 0.43907451406488, "learning_rate": 2.9911083235836872e-05, "loss": 0.4323, "step": 1068 }, { "epoch": 0.13134291682024818, "grad_norm": 0.46139478598957606, "learning_rate": 2.9910382424985196e-05, "loss": 0.4506, "step": 1069 }, { "epoch": 0.1314657820371053, "grad_norm": 0.3857568332589837, "learning_rate": 2.9909678871465368e-05, "loss": 0.4552, "step": 1070 }, { "epoch": 0.1315886472539624, "grad_norm": 0.5428462088573083, "learning_rate": 2.9908972575406802e-05, "loss": 0.3766, "step": 1071 }, { "epoch": 0.1317115124708195, "grad_norm": 0.4030070229071064, "learning_rate": 2.990826353693942e-05, "loss": 0.4273, "step": 1072 }, { "epoch": 0.1318343776876766, "grad_norm": 0.3158790426676812, "learning_rate": 2.9907551756193645e-05, "loss": 0.3842, "step": 1073 }, { "epoch": 0.13195724290453373, "grad_norm": 0.43221601362987044, "learning_rate": 2.9906837233300403e-05, "loss": 0.4641, "step": 1074 }, { "epoch": 0.13208010812139084, "grad_norm": 0.4377628542156037, "learning_rate": 2.9906119968391125e-05, "loss": 0.4419, "step": 1075 }, { "epoch": 0.13220297333824793, "grad_norm": 0.374610562169411, "learning_rate": 2.990539996159775e-05, "loss": 0.4024, "step": 1076 }, { "epoch": 0.13232583855510505, "grad_norm": 0.382216160159782, "learning_rate": 2.9904677213052712e-05, "loss": 0.3496, "step": 1077 }, { "epoch": 0.13244870377196216, "grad_norm": 0.43040114245029776, "learning_rate": 2.990395172288897e-05, "loss": 0.4079, "step": 1078 }, { "epoch": 0.13257156898881928, "grad_norm": 0.39701770174518347, "learning_rate": 2.9903223491239958e-05, "loss": 0.4256, "step": 1079 }, { "epoch": 0.13269443420567636, "grad_norm": 0.3715998057693571, "learning_rate": 2.9902492518239638e-05, "loss": 0.3555, "step": 1080 }, { "epoch": 0.13281729942253348, "grad_norm": 0.4643520550626088, "learning_rate": 2.990175880402246e-05, "loss": 0.428, "step": 1081 }, { "epoch": 0.1329401646393906, "grad_norm": 0.36741211224339904, "learning_rate": 2.9901022348723397e-05, "loss": 0.4283, "step": 1082 }, { "epoch": 0.13306302985624768, "grad_norm": 0.3731579834436177, "learning_rate": 2.990028315247791e-05, "loss": 0.3875, "step": 1083 }, { "epoch": 0.1331858950731048, "grad_norm": 0.42398759438702877, "learning_rate": 2.9899541215421965e-05, "loss": 0.3911, "step": 1084 }, { "epoch": 0.13330876028996191, "grad_norm": 0.43425489911358595, "learning_rate": 2.9898796537692044e-05, "loss": 0.447, "step": 1085 }, { "epoch": 0.13343162550681903, "grad_norm": 0.3528253871332106, "learning_rate": 2.9898049119425118e-05, "loss": 0.4464, "step": 1086 }, { "epoch": 0.13355449072367612, "grad_norm": 0.49443217210693347, "learning_rate": 2.9897298960758674e-05, "loss": 0.4438, "step": 1087 }, { "epoch": 0.13367735594053323, "grad_norm": 0.41943782820963316, "learning_rate": 2.98965460618307e-05, "loss": 0.3679, "step": 1088 }, { "epoch": 0.13380022115739035, "grad_norm": 0.4083062345581697, "learning_rate": 2.989579042277969e-05, "loss": 0.4093, "step": 1089 }, { "epoch": 0.13392308637424746, "grad_norm": 0.35425270366238976, "learning_rate": 2.9895032043744625e-05, "loss": 0.4146, "step": 1090 }, { "epoch": 0.13404595159110455, "grad_norm": 0.42362636986838365, "learning_rate": 2.989427092486502e-05, "loss": 0.4693, "step": 1091 }, { "epoch": 0.13416881680796167, "grad_norm": 0.4053777708421201, "learning_rate": 2.989350706628087e-05, "loss": 0.3662, "step": 1092 }, { "epoch": 0.13429168202481878, "grad_norm": 0.3926152533889559, "learning_rate": 2.9892740468132683e-05, "loss": 0.4062, "step": 1093 }, { "epoch": 0.13441454724167587, "grad_norm": 0.33446730382809803, "learning_rate": 2.9891971130561467e-05, "loss": 0.391, "step": 1094 }, { "epoch": 0.13453741245853298, "grad_norm": 0.3342009188544942, "learning_rate": 2.9891199053708743e-05, "loss": 0.4405, "step": 1095 }, { "epoch": 0.1346602776753901, "grad_norm": 0.3560507003063666, "learning_rate": 2.9890424237716524e-05, "loss": 0.3718, "step": 1096 }, { "epoch": 0.13478314289224722, "grad_norm": 0.36633389290644836, "learning_rate": 2.9889646682727334e-05, "loss": 0.4105, "step": 1097 }, { "epoch": 0.1349060081091043, "grad_norm": 0.4524498299210035, "learning_rate": 2.98888663888842e-05, "loss": 0.3988, "step": 1098 }, { "epoch": 0.13502887332596142, "grad_norm": 0.33821493774202205, "learning_rate": 2.988808335633065e-05, "loss": 0.3774, "step": 1099 }, { "epoch": 0.13515173854281853, "grad_norm": 0.41552991020054275, "learning_rate": 2.988729758521072e-05, "loss": 0.3312, "step": 1100 }, { "epoch": 0.13527460375967565, "grad_norm": 0.762131225939438, "learning_rate": 2.9886509075668947e-05, "loss": 0.5283, "step": 1101 }, { "epoch": 0.13539746897653274, "grad_norm": 0.41735962877996347, "learning_rate": 2.9885717827850372e-05, "loss": 0.4786, "step": 1102 }, { "epoch": 0.13552033419338985, "grad_norm": 0.4322367384311565, "learning_rate": 2.988492384190054e-05, "loss": 0.3755, "step": 1103 }, { "epoch": 0.13564319941024697, "grad_norm": 0.33726371125178467, "learning_rate": 2.98841271179655e-05, "loss": 0.4466, "step": 1104 }, { "epoch": 0.13576606462710405, "grad_norm": 0.3543124150369191, "learning_rate": 2.9883327656191806e-05, "loss": 0.3298, "step": 1105 }, { "epoch": 0.13588892984396117, "grad_norm": 0.4136560662454279, "learning_rate": 2.9882525456726507e-05, "loss": 0.364, "step": 1106 }, { "epoch": 0.13601179506081829, "grad_norm": 0.4267546236540144, "learning_rate": 2.988172051971717e-05, "loss": 0.4136, "step": 1107 }, { "epoch": 0.1361346602776754, "grad_norm": 0.5690490590543782, "learning_rate": 2.988091284531185e-05, "loss": 0.4651, "step": 1108 }, { "epoch": 0.1362575254945325, "grad_norm": 0.3666426819069893, "learning_rate": 2.988010243365912e-05, "loss": 0.3603, "step": 1109 }, { "epoch": 0.1363803907113896, "grad_norm": 0.3997032230581559, "learning_rate": 2.9879289284908053e-05, "loss": 0.4301, "step": 1110 }, { "epoch": 0.13650325592824672, "grad_norm": 0.35764077370609915, "learning_rate": 2.9878473399208215e-05, "loss": 0.4458, "step": 1111 }, { "epoch": 0.13662612114510383, "grad_norm": 0.3333963817676452, "learning_rate": 2.9877654776709685e-05, "loss": 0.4227, "step": 1112 }, { "epoch": 0.13674898636196092, "grad_norm": 0.4767697787301997, "learning_rate": 2.9876833417563044e-05, "loss": 0.4707, "step": 1113 }, { "epoch": 0.13687185157881804, "grad_norm": 0.37208097011762176, "learning_rate": 2.9876009321919372e-05, "loss": 0.4258, "step": 1114 }, { "epoch": 0.13699471679567515, "grad_norm": 0.5123329969105193, "learning_rate": 2.9875182489930263e-05, "loss": 0.4028, "step": 1115 }, { "epoch": 0.13711758201253224, "grad_norm": 0.3667170285389533, "learning_rate": 2.98743529217478e-05, "loss": 0.405, "step": 1116 }, { "epoch": 0.13724044722938936, "grad_norm": 0.4283757470632006, "learning_rate": 2.9873520617524585e-05, "loss": 0.4349, "step": 1117 }, { "epoch": 0.13736331244624647, "grad_norm": 0.3086250499389357, "learning_rate": 2.9872685577413712e-05, "loss": 0.3497, "step": 1118 }, { "epoch": 0.1374861776631036, "grad_norm": 0.3736166410877637, "learning_rate": 2.987184780156878e-05, "loss": 0.4619, "step": 1119 }, { "epoch": 0.13760904287996067, "grad_norm": 0.34400883745482164, "learning_rate": 2.9871007290143884e-05, "loss": 0.4634, "step": 1120 }, { "epoch": 0.1377319080968178, "grad_norm": 0.4690634343405086, "learning_rate": 2.9870164043293645e-05, "loss": 0.4944, "step": 1121 }, { "epoch": 0.1378547733136749, "grad_norm": 0.4171584128135604, "learning_rate": 2.9869318061173168e-05, "loss": 0.4015, "step": 1122 }, { "epoch": 0.137977638530532, "grad_norm": 0.40914237270486215, "learning_rate": 2.9868469343938063e-05, "loss": 0.4516, "step": 1123 }, { "epoch": 0.1381005037473891, "grad_norm": 0.38510923357101823, "learning_rate": 2.9867617891744447e-05, "loss": 0.3388, "step": 1124 }, { "epoch": 0.13822336896424622, "grad_norm": 0.3924225443939752, "learning_rate": 2.9866763704748938e-05, "loss": 0.3856, "step": 1125 }, { "epoch": 0.13834623418110334, "grad_norm": 0.31990961646774574, "learning_rate": 2.9865906783108663e-05, "loss": 0.3613, "step": 1126 }, { "epoch": 0.13846909939796043, "grad_norm": 0.39304756017823217, "learning_rate": 2.9865047126981238e-05, "loss": 0.3761, "step": 1127 }, { "epoch": 0.13859196461481754, "grad_norm": 0.36686750138888136, "learning_rate": 2.9864184736524808e-05, "loss": 0.3658, "step": 1128 }, { "epoch": 0.13871482983167466, "grad_norm": 0.3384452494325055, "learning_rate": 2.9863319611897985e-05, "loss": 0.4344, "step": 1129 }, { "epoch": 0.13883769504853177, "grad_norm": 0.38432925889771974, "learning_rate": 2.9862451753259912e-05, "loss": 0.323, "step": 1130 }, { "epoch": 0.13896056026538886, "grad_norm": 0.3775417206811398, "learning_rate": 2.986158116077023e-05, "loss": 0.4045, "step": 1131 }, { "epoch": 0.13908342548224598, "grad_norm": 0.38464294673769106, "learning_rate": 2.986070783458907e-05, "loss": 0.3245, "step": 1132 }, { "epoch": 0.1392062906991031, "grad_norm": 0.42984147446763843, "learning_rate": 2.9859831774877077e-05, "loss": 0.4323, "step": 1133 }, { "epoch": 0.13932915591596018, "grad_norm": 0.33952812956423734, "learning_rate": 2.9858952981795407e-05, "loss": 0.4488, "step": 1134 }, { "epoch": 0.1394520211328173, "grad_norm": 0.384632092900927, "learning_rate": 2.985807145550569e-05, "loss": 0.3893, "step": 1135 }, { "epoch": 0.1395748863496744, "grad_norm": 0.48189321454221673, "learning_rate": 2.9857187196170093e-05, "loss": 0.436, "step": 1136 }, { "epoch": 0.13969775156653153, "grad_norm": 0.36195896915875236, "learning_rate": 2.985630020395126e-05, "loss": 0.4013, "step": 1137 }, { "epoch": 0.1398206167833886, "grad_norm": 0.5225198349212775, "learning_rate": 2.9855410479012354e-05, "loss": 0.4503, "step": 1138 }, { "epoch": 0.13994348200024573, "grad_norm": 0.3629279797875969, "learning_rate": 2.985451802151703e-05, "loss": 0.3699, "step": 1139 }, { "epoch": 0.14006634721710284, "grad_norm": 0.37365265947028053, "learning_rate": 2.9853622831629448e-05, "loss": 0.3803, "step": 1140 }, { "epoch": 0.14018921243395996, "grad_norm": 0.35402951192219223, "learning_rate": 2.985272490951428e-05, "loss": 0.3942, "step": 1141 }, { "epoch": 0.14031207765081705, "grad_norm": 0.3610508100530925, "learning_rate": 2.9851824255336686e-05, "loss": 0.3737, "step": 1142 }, { "epoch": 0.14043494286767416, "grad_norm": 0.4357901283928351, "learning_rate": 2.9850920869262338e-05, "loss": 0.4472, "step": 1143 }, { "epoch": 0.14055780808453128, "grad_norm": 0.42428141251918444, "learning_rate": 2.9850014751457407e-05, "loss": 0.4505, "step": 1144 }, { "epoch": 0.14068067330138836, "grad_norm": 0.43358866521221556, "learning_rate": 2.984910590208857e-05, "loss": 0.4147, "step": 1145 }, { "epoch": 0.14080353851824548, "grad_norm": 0.3317070558130978, "learning_rate": 2.9848194321322996e-05, "loss": 0.4362, "step": 1146 }, { "epoch": 0.1409264037351026, "grad_norm": 0.3634180603981268, "learning_rate": 2.9847280009328377e-05, "loss": 0.4124, "step": 1147 }, { "epoch": 0.1410492689519597, "grad_norm": 0.42973190728817723, "learning_rate": 2.9846362966272888e-05, "loss": 0.4675, "step": 1148 }, { "epoch": 0.1411721341688168, "grad_norm": 0.3458051897884604, "learning_rate": 2.984544319232521e-05, "loss": 0.4285, "step": 1149 }, { "epoch": 0.14129499938567391, "grad_norm": 0.3314862923748586, "learning_rate": 2.9844520687654537e-05, "loss": 0.4223, "step": 1150 }, { "epoch": 0.14141786460253103, "grad_norm": 0.45019301299034453, "learning_rate": 2.984359545243055e-05, "loss": 0.4869, "step": 1151 }, { "epoch": 0.14154072981938814, "grad_norm": 0.32697656381176843, "learning_rate": 2.9842667486823446e-05, "loss": 0.4062, "step": 1152 }, { "epoch": 0.14166359503624523, "grad_norm": 0.39003498580309626, "learning_rate": 2.9841736791003914e-05, "loss": 0.3871, "step": 1153 }, { "epoch": 0.14178646025310235, "grad_norm": 0.40748057354096406, "learning_rate": 2.9840803365143153e-05, "loss": 0.365, "step": 1154 }, { "epoch": 0.14190932546995946, "grad_norm": 0.3812802228310405, "learning_rate": 2.983986720941286e-05, "loss": 0.3465, "step": 1155 }, { "epoch": 0.14203219068681655, "grad_norm": 0.458091166762099, "learning_rate": 2.983892832398523e-05, "loss": 0.4209, "step": 1156 }, { "epoch": 0.14215505590367367, "grad_norm": 0.47842602441334536, "learning_rate": 2.983798670903297e-05, "loss": 0.5269, "step": 1157 }, { "epoch": 0.14227792112053078, "grad_norm": 0.3870669557665687, "learning_rate": 2.9837042364729284e-05, "loss": 0.4175, "step": 1158 }, { "epoch": 0.1424007863373879, "grad_norm": 0.391529706140356, "learning_rate": 2.9836095291247875e-05, "loss": 0.5067, "step": 1159 }, { "epoch": 0.14252365155424498, "grad_norm": 0.40644308426534653, "learning_rate": 2.9835145488762952e-05, "loss": 0.3898, "step": 1160 }, { "epoch": 0.1426465167711021, "grad_norm": 0.39231789718816756, "learning_rate": 2.983419295744923e-05, "loss": 0.4948, "step": 1161 }, { "epoch": 0.14276938198795922, "grad_norm": 0.38225816163474746, "learning_rate": 2.983323769748191e-05, "loss": 0.4311, "step": 1162 }, { "epoch": 0.1428922472048163, "grad_norm": 0.3700652359774395, "learning_rate": 2.983227970903672e-05, "loss": 0.4386, "step": 1163 }, { "epoch": 0.14301511242167342, "grad_norm": 0.3930896394728307, "learning_rate": 2.983131899228986e-05, "loss": 0.4417, "step": 1164 }, { "epoch": 0.14313797763853053, "grad_norm": 0.3774518556091981, "learning_rate": 2.983035554741806e-05, "loss": 0.414, "step": 1165 }, { "epoch": 0.14326084285538765, "grad_norm": 0.31225092911358066, "learning_rate": 2.9829389374598538e-05, "loss": 0.3103, "step": 1166 }, { "epoch": 0.14338370807224474, "grad_norm": 0.444672092067488, "learning_rate": 2.982842047400901e-05, "loss": 0.3913, "step": 1167 }, { "epoch": 0.14350657328910185, "grad_norm": 0.4267325982741409, "learning_rate": 2.9827448845827697e-05, "loss": 0.4318, "step": 1168 }, { "epoch": 0.14362943850595897, "grad_norm": 0.4596093384414418, "learning_rate": 2.9826474490233337e-05, "loss": 0.4445, "step": 1169 }, { "epoch": 0.14375230372281608, "grad_norm": 0.3969218527725183, "learning_rate": 2.9825497407405144e-05, "loss": 0.3682, "step": 1170 }, { "epoch": 0.14387516893967317, "grad_norm": 0.46458001367367663, "learning_rate": 2.982451759752285e-05, "loss": 0.3818, "step": 1171 }, { "epoch": 0.14399803415653029, "grad_norm": 0.45308343151028063, "learning_rate": 2.982353506076668e-05, "loss": 0.4517, "step": 1172 }, { "epoch": 0.1441208993733874, "grad_norm": 0.4098679806379063, "learning_rate": 2.9822549797317374e-05, "loss": 0.3693, "step": 1173 }, { "epoch": 0.1442437645902445, "grad_norm": 0.4435284980416306, "learning_rate": 2.9821561807356158e-05, "loss": 0.3967, "step": 1174 }, { "epoch": 0.1443666298071016, "grad_norm": 0.40273950982568785, "learning_rate": 2.9820571091064767e-05, "loss": 0.455, "step": 1175 }, { "epoch": 0.14448949502395872, "grad_norm": 0.6039834084453309, "learning_rate": 2.9819577648625442e-05, "loss": 0.4306, "step": 1176 }, { "epoch": 0.14461236024081583, "grad_norm": 0.4301774774723159, "learning_rate": 2.981858148022092e-05, "loss": 0.4136, "step": 1177 }, { "epoch": 0.14473522545767292, "grad_norm": 0.4241357348735733, "learning_rate": 2.9817582586034433e-05, "loss": 0.3457, "step": 1178 }, { "epoch": 0.14485809067453004, "grad_norm": 0.4139198613022733, "learning_rate": 2.981658096624972e-05, "loss": 0.3724, "step": 1179 }, { "epoch": 0.14498095589138715, "grad_norm": 0.368679461596687, "learning_rate": 2.9815576621051036e-05, "loss": 0.4554, "step": 1180 }, { "epoch": 0.14510382110824427, "grad_norm": 0.42370742225570907, "learning_rate": 2.9814569550623108e-05, "loss": 0.4001, "step": 1181 }, { "epoch": 0.14522668632510136, "grad_norm": 0.40039077275998786, "learning_rate": 2.981355975515119e-05, "loss": 0.3839, "step": 1182 }, { "epoch": 0.14534955154195847, "grad_norm": 0.46940950646924956, "learning_rate": 2.9812547234821024e-05, "loss": 0.4334, "step": 1183 }, { "epoch": 0.1454724167588156, "grad_norm": 0.3639548762131431, "learning_rate": 2.981153198981886e-05, "loss": 0.3346, "step": 1184 }, { "epoch": 0.14559528197567267, "grad_norm": 0.41717309997798513, "learning_rate": 2.9810514020331437e-05, "loss": 0.4241, "step": 1185 }, { "epoch": 0.1457181471925298, "grad_norm": 0.4509504038867809, "learning_rate": 2.980949332654601e-05, "loss": 0.4894, "step": 1186 }, { "epoch": 0.1458410124093869, "grad_norm": 0.4325379402109613, "learning_rate": 2.9808469908650335e-05, "loss": 0.3595, "step": 1187 }, { "epoch": 0.14596387762624402, "grad_norm": 0.3629532946727146, "learning_rate": 2.980744376683265e-05, "loss": 0.4213, "step": 1188 }, { "epoch": 0.1460867428431011, "grad_norm": 0.4387970660810725, "learning_rate": 2.9806414901281716e-05, "loss": 0.3745, "step": 1189 }, { "epoch": 0.14620960805995822, "grad_norm": 0.4215972314615007, "learning_rate": 2.9805383312186784e-05, "loss": 0.3855, "step": 1190 }, { "epoch": 0.14633247327681534, "grad_norm": 0.40852858566362654, "learning_rate": 2.980434899973761e-05, "loss": 0.4017, "step": 1191 }, { "epoch": 0.14645533849367245, "grad_norm": 0.35047312408090864, "learning_rate": 2.9803311964124444e-05, "loss": 0.3496, "step": 1192 }, { "epoch": 0.14657820371052954, "grad_norm": 0.31685417330586874, "learning_rate": 2.9802272205538045e-05, "loss": 0.3917, "step": 1193 }, { "epoch": 0.14670106892738666, "grad_norm": 0.4130175964921057, "learning_rate": 2.980122972416967e-05, "loss": 0.4627, "step": 1194 }, { "epoch": 0.14682393414424377, "grad_norm": 0.526636405551489, "learning_rate": 2.980018452021108e-05, "loss": 0.4935, "step": 1195 }, { "epoch": 0.14694679936110086, "grad_norm": 0.4276982292201359, "learning_rate": 2.9799136593854524e-05, "loss": 0.3404, "step": 1196 }, { "epoch": 0.14706966457795798, "grad_norm": 0.48406481329179224, "learning_rate": 2.979808594529277e-05, "loss": 0.4663, "step": 1197 }, { "epoch": 0.1471925297948151, "grad_norm": 0.38057794765757263, "learning_rate": 2.979703257471908e-05, "loss": 0.376, "step": 1198 }, { "epoch": 0.1473153950116722, "grad_norm": 0.3534069585956711, "learning_rate": 2.9795976482327206e-05, "loss": 0.3786, "step": 1199 }, { "epoch": 0.1474382602285293, "grad_norm": 0.3797322097594495, "learning_rate": 2.979491766831141e-05, "loss": 0.3415, "step": 1200 }, { "epoch": 0.1475611254453864, "grad_norm": 0.4809842628815004, "learning_rate": 2.9793856132866465e-05, "loss": 0.4093, "step": 1201 }, { "epoch": 0.14768399066224353, "grad_norm": 0.44896863933805725, "learning_rate": 2.979279187618762e-05, "loss": 0.4655, "step": 1202 }, { "epoch": 0.14780685587910064, "grad_norm": 0.3708159049863144, "learning_rate": 2.9791724898470646e-05, "loss": 0.4137, "step": 1203 }, { "epoch": 0.14792972109595773, "grad_norm": 0.5131759998120041, "learning_rate": 2.9790655199911803e-05, "loss": 0.4083, "step": 1204 }, { "epoch": 0.14805258631281484, "grad_norm": 0.3918059747498154, "learning_rate": 2.978958278070786e-05, "loss": 0.4345, "step": 1205 }, { "epoch": 0.14817545152967196, "grad_norm": 0.45652667305148337, "learning_rate": 2.9788507641056077e-05, "loss": 0.3688, "step": 1206 }, { "epoch": 0.14829831674652905, "grad_norm": 0.42303351420317287, "learning_rate": 2.9787429781154216e-05, "loss": 0.3825, "step": 1207 }, { "epoch": 0.14842118196338616, "grad_norm": 0.4268260949902638, "learning_rate": 2.9786349201200554e-05, "loss": 0.4098, "step": 1208 }, { "epoch": 0.14854404718024328, "grad_norm": 0.4464291615302899, "learning_rate": 2.9785265901393843e-05, "loss": 0.368, "step": 1209 }, { "epoch": 0.1486669123971004, "grad_norm": 0.36763816366649305, "learning_rate": 2.978417988193336e-05, "loss": 0.3619, "step": 1210 }, { "epoch": 0.14878977761395748, "grad_norm": 0.3445607281186868, "learning_rate": 2.9783091143018862e-05, "loss": 0.3358, "step": 1211 }, { "epoch": 0.1489126428308146, "grad_norm": 0.49146329781005627, "learning_rate": 2.9781999684850625e-05, "loss": 0.3965, "step": 1212 }, { "epoch": 0.1490355080476717, "grad_norm": 0.38809574811426434, "learning_rate": 2.9780905507629405e-05, "loss": 0.3516, "step": 1213 }, { "epoch": 0.1491583732645288, "grad_norm": 0.44611921604303234, "learning_rate": 2.9779808611556478e-05, "loss": 0.3768, "step": 1214 }, { "epoch": 0.14928123848138591, "grad_norm": 0.42023930641268586, "learning_rate": 2.977870899683361e-05, "loss": 0.4154, "step": 1215 }, { "epoch": 0.14940410369824303, "grad_norm": 0.3973378691312072, "learning_rate": 2.9777606663663058e-05, "loss": 0.3176, "step": 1216 }, { "epoch": 0.14952696891510014, "grad_norm": 0.43319039229137796, "learning_rate": 2.9776501612247603e-05, "loss": 0.455, "step": 1217 }, { "epoch": 0.14964983413195723, "grad_norm": 0.4610017104870566, "learning_rate": 2.97753938427905e-05, "loss": 0.4177, "step": 1218 }, { "epoch": 0.14977269934881435, "grad_norm": 0.4494075346753611, "learning_rate": 2.9774283355495527e-05, "loss": 0.3863, "step": 1219 }, { "epoch": 0.14989556456567146, "grad_norm": 0.4273895204814871, "learning_rate": 2.9773170150566943e-05, "loss": 0.4417, "step": 1220 }, { "epoch": 0.15001842978252858, "grad_norm": 0.4285768955256981, "learning_rate": 2.9772054228209514e-05, "loss": 0.421, "step": 1221 }, { "epoch": 0.15014129499938567, "grad_norm": 0.40336185919315476, "learning_rate": 2.9770935588628513e-05, "loss": 0.4096, "step": 1222 }, { "epoch": 0.15026416021624278, "grad_norm": 0.3782962528297601, "learning_rate": 2.9769814232029703e-05, "loss": 0.4419, "step": 1223 }, { "epoch": 0.1503870254330999, "grad_norm": 0.40499795921660214, "learning_rate": 2.976869015861935e-05, "loss": 0.4051, "step": 1224 }, { "epoch": 0.15050989064995698, "grad_norm": 0.3673388828637791, "learning_rate": 2.976756336860422e-05, "loss": 0.376, "step": 1225 }, { "epoch": 0.1506327558668141, "grad_norm": 0.4294143216007326, "learning_rate": 2.976643386219158e-05, "loss": 0.4367, "step": 1226 }, { "epoch": 0.15075562108367122, "grad_norm": 0.4010685482792183, "learning_rate": 2.97653016395892e-05, "loss": 0.3038, "step": 1227 }, { "epoch": 0.15087848630052833, "grad_norm": 0.3262962336831068, "learning_rate": 2.9764166701005334e-05, "loss": 0.3915, "step": 1228 }, { "epoch": 0.15100135151738542, "grad_norm": 0.3718152616476307, "learning_rate": 2.9763029046648753e-05, "loss": 0.3476, "step": 1229 }, { "epoch": 0.15112421673424253, "grad_norm": 0.3674584325058857, "learning_rate": 2.976188867672872e-05, "loss": 0.326, "step": 1230 }, { "epoch": 0.15124708195109965, "grad_norm": 0.4406978088910398, "learning_rate": 2.9760745591455e-05, "loss": 0.4279, "step": 1231 }, { "epoch": 0.15136994716795676, "grad_norm": 0.34566826723564, "learning_rate": 2.9759599791037855e-05, "loss": 0.3504, "step": 1232 }, { "epoch": 0.15149281238481385, "grad_norm": 0.4562396654779036, "learning_rate": 2.9758451275688044e-05, "loss": 0.4237, "step": 1233 }, { "epoch": 0.15161567760167097, "grad_norm": 0.38684068082274414, "learning_rate": 2.975730004561684e-05, "loss": 0.4296, "step": 1234 }, { "epoch": 0.15173854281852808, "grad_norm": 0.49956022823367885, "learning_rate": 2.9756146101035995e-05, "loss": 0.4327, "step": 1235 }, { "epoch": 0.15186140803538517, "grad_norm": 0.4259609174193928, "learning_rate": 2.9754989442157767e-05, "loss": 0.4428, "step": 1236 }, { "epoch": 0.15198427325224229, "grad_norm": 0.6013755470273378, "learning_rate": 2.9753830069194926e-05, "loss": 0.4433, "step": 1237 }, { "epoch": 0.1521071384690994, "grad_norm": 0.3344353676554677, "learning_rate": 2.9752667982360725e-05, "loss": 0.3612, "step": 1238 }, { "epoch": 0.15223000368595652, "grad_norm": 0.5038436394955765, "learning_rate": 2.975150318186892e-05, "loss": 0.4135, "step": 1239 }, { "epoch": 0.1523528689028136, "grad_norm": 0.3761532522744391, "learning_rate": 2.9750335667933775e-05, "loss": 0.449, "step": 1240 }, { "epoch": 0.15247573411967072, "grad_norm": 0.42527432585545144, "learning_rate": 2.9749165440770037e-05, "loss": 0.4227, "step": 1241 }, { "epoch": 0.15259859933652783, "grad_norm": 0.36452361669008493, "learning_rate": 2.9747992500592977e-05, "loss": 0.3675, "step": 1242 }, { "epoch": 0.15272146455338495, "grad_norm": 0.3519279470148422, "learning_rate": 2.9746816847618333e-05, "loss": 0.3643, "step": 1243 }, { "epoch": 0.15284432977024204, "grad_norm": 0.3784599615937289, "learning_rate": 2.974563848206237e-05, "loss": 0.4089, "step": 1244 }, { "epoch": 0.15296719498709915, "grad_norm": 0.3841028816277102, "learning_rate": 2.9744457404141837e-05, "loss": 0.4055, "step": 1245 }, { "epoch": 0.15309006020395627, "grad_norm": 0.3759748985075495, "learning_rate": 2.9743273614073987e-05, "loss": 0.3885, "step": 1246 }, { "epoch": 0.15321292542081336, "grad_norm": 0.3897369126365617, "learning_rate": 2.974208711207657e-05, "loss": 0.3548, "step": 1247 }, { "epoch": 0.15333579063767047, "grad_norm": 0.3355408970318744, "learning_rate": 2.9740897898367827e-05, "loss": 0.3795, "step": 1248 }, { "epoch": 0.1534586558545276, "grad_norm": 0.3620347257461029, "learning_rate": 2.973970597316652e-05, "loss": 0.4103, "step": 1249 }, { "epoch": 0.1535815210713847, "grad_norm": 0.3840073027475184, "learning_rate": 2.9738511336691887e-05, "loss": 0.4571, "step": 1250 }, { "epoch": 0.1537043862882418, "grad_norm": 0.3478586336732372, "learning_rate": 2.973731398916368e-05, "loss": 0.3707, "step": 1251 }, { "epoch": 0.1538272515050989, "grad_norm": 0.38870287518763624, "learning_rate": 2.9736113930802134e-05, "loss": 0.4507, "step": 1252 }, { "epoch": 0.15395011672195602, "grad_norm": 0.5018840797567286, "learning_rate": 2.9734911161828e-05, "loss": 0.4501, "step": 1253 }, { "epoch": 0.1540729819388131, "grad_norm": 0.3308317088216043, "learning_rate": 2.973370568246252e-05, "loss": 0.4725, "step": 1254 }, { "epoch": 0.15419584715567022, "grad_norm": 0.35800650293915826, "learning_rate": 2.9732497492927424e-05, "loss": 0.3569, "step": 1255 }, { "epoch": 0.15431871237252734, "grad_norm": 0.42517638886778486, "learning_rate": 2.9731286593444967e-05, "loss": 0.4199, "step": 1256 }, { "epoch": 0.15444157758938445, "grad_norm": 0.37919102532811133, "learning_rate": 2.973007298423787e-05, "loss": 0.4261, "step": 1257 }, { "epoch": 0.15456444280624154, "grad_norm": 0.3607157444251023, "learning_rate": 2.9728856665529378e-05, "loss": 0.4011, "step": 1258 }, { "epoch": 0.15468730802309866, "grad_norm": 0.37096861942106046, "learning_rate": 2.9727637637543225e-05, "loss": 0.4315, "step": 1259 }, { "epoch": 0.15481017323995577, "grad_norm": 0.3408222614883327, "learning_rate": 2.9726415900503635e-05, "loss": 0.3915, "step": 1260 }, { "epoch": 0.1549330384568129, "grad_norm": 0.42004583726806805, "learning_rate": 2.9725191454635346e-05, "loss": 0.3471, "step": 1261 }, { "epoch": 0.15505590367366998, "grad_norm": 0.42408133416258326, "learning_rate": 2.9723964300163584e-05, "loss": 0.4329, "step": 1262 }, { "epoch": 0.1551787688905271, "grad_norm": 0.33588040088640714, "learning_rate": 2.9722734437314084e-05, "loss": 0.4372, "step": 1263 }, { "epoch": 0.1553016341073842, "grad_norm": 0.36402655174789145, "learning_rate": 2.972150186631306e-05, "loss": 0.441, "step": 1264 }, { "epoch": 0.1554244993242413, "grad_norm": 0.3944043109307015, "learning_rate": 2.9720266587387236e-05, "loss": 0.38, "step": 1265 }, { "epoch": 0.1555473645410984, "grad_norm": 0.4321753887401295, "learning_rate": 2.971902860076384e-05, "loss": 0.3706, "step": 1266 }, { "epoch": 0.15567022975795552, "grad_norm": 0.3552980525107531, "learning_rate": 2.9717787906670592e-05, "loss": 0.3954, "step": 1267 }, { "epoch": 0.15579309497481264, "grad_norm": 0.36338196546212065, "learning_rate": 2.9716544505335705e-05, "loss": 0.3541, "step": 1268 }, { "epoch": 0.15591596019166973, "grad_norm": 0.37021356504633823, "learning_rate": 2.9715298396987898e-05, "loss": 0.4559, "step": 1269 }, { "epoch": 0.15603882540852684, "grad_norm": 0.47149531832124436, "learning_rate": 2.971404958185638e-05, "loss": 0.4099, "step": 1270 }, { "epoch": 0.15616169062538396, "grad_norm": 0.4094565958175947, "learning_rate": 2.9712798060170868e-05, "loss": 0.3757, "step": 1271 }, { "epoch": 0.15628455584224107, "grad_norm": 0.3412298676323027, "learning_rate": 2.9711543832161565e-05, "loss": 0.3717, "step": 1272 }, { "epoch": 0.15640742105909816, "grad_norm": 0.4369192938905562, "learning_rate": 2.9710286898059185e-05, "loss": 0.4652, "step": 1273 }, { "epoch": 0.15653028627595528, "grad_norm": 0.43149621374134606, "learning_rate": 2.970902725809493e-05, "loss": 0.5148, "step": 1274 }, { "epoch": 0.1566531514928124, "grad_norm": 0.41325022888311164, "learning_rate": 2.97077649125005e-05, "loss": 0.3809, "step": 1275 }, { "epoch": 0.15677601670966948, "grad_norm": 0.3756717099774089, "learning_rate": 2.9706499861508098e-05, "loss": 0.3278, "step": 1276 }, { "epoch": 0.1568988819265266, "grad_norm": 0.3947520415433905, "learning_rate": 2.9705232105350427e-05, "loss": 0.3536, "step": 1277 }, { "epoch": 0.1570217471433837, "grad_norm": 0.43488921882696935, "learning_rate": 2.970396164426067e-05, "loss": 0.3826, "step": 1278 }, { "epoch": 0.15714461236024083, "grad_norm": 0.4651824752265371, "learning_rate": 2.970268847847253e-05, "loss": 0.4582, "step": 1279 }, { "epoch": 0.1572674775770979, "grad_norm": 0.3761787615436489, "learning_rate": 2.9701412608220193e-05, "loss": 0.3344, "step": 1280 }, { "epoch": 0.15739034279395503, "grad_norm": 0.4044869047139004, "learning_rate": 2.970013403373835e-05, "loss": 0.3708, "step": 1281 }, { "epoch": 0.15751320801081214, "grad_norm": 0.3481452693606846, "learning_rate": 2.9698852755262186e-05, "loss": 0.3524, "step": 1282 }, { "epoch": 0.15763607322766926, "grad_norm": 0.433575720292433, "learning_rate": 2.9697568773027385e-05, "loss": 0.438, "step": 1283 }, { "epoch": 0.15775893844452635, "grad_norm": 0.3788695973316724, "learning_rate": 2.9696282087270116e-05, "loss": 0.492, "step": 1284 }, { "epoch": 0.15788180366138346, "grad_norm": 0.4344375495171853, "learning_rate": 2.9694992698227074e-05, "loss": 0.3792, "step": 1285 }, { "epoch": 0.15800466887824058, "grad_norm": 0.2999331060551982, "learning_rate": 2.9693700606135425e-05, "loss": 0.3862, "step": 1286 }, { "epoch": 0.15812753409509767, "grad_norm": 0.42506125423824886, "learning_rate": 2.969240581123284e-05, "loss": 0.4903, "step": 1287 }, { "epoch": 0.15825039931195478, "grad_norm": 0.4424996575253648, "learning_rate": 2.969110831375749e-05, "loss": 0.3653, "step": 1288 }, { "epoch": 0.1583732645288119, "grad_norm": 0.5015755578999119, "learning_rate": 2.968980811394804e-05, "loss": 0.422, "step": 1289 }, { "epoch": 0.158496129745669, "grad_norm": 0.3978046209237085, "learning_rate": 2.9688505212043656e-05, "loss": 0.4479, "step": 1290 }, { "epoch": 0.1586189949625261, "grad_norm": 0.4454667351322841, "learning_rate": 2.9687199608283992e-05, "loss": 0.394, "step": 1291 }, { "epoch": 0.15874186017938322, "grad_norm": 0.4469511938365453, "learning_rate": 2.9685891302909213e-05, "loss": 0.4269, "step": 1292 }, { "epoch": 0.15886472539624033, "grad_norm": 0.37530857151508, "learning_rate": 2.9684580296159973e-05, "loss": 0.3547, "step": 1293 }, { "epoch": 0.15898759061309745, "grad_norm": 0.4135354227320437, "learning_rate": 2.9683266588277417e-05, "loss": 0.4484, "step": 1294 }, { "epoch": 0.15911045582995453, "grad_norm": 0.3737181319097474, "learning_rate": 2.9681950179503196e-05, "loss": 0.4569, "step": 1295 }, { "epoch": 0.15923332104681165, "grad_norm": 0.39215684974430737, "learning_rate": 2.968063107007946e-05, "loss": 0.4177, "step": 1296 }, { "epoch": 0.15935618626366876, "grad_norm": 0.4057683599088259, "learning_rate": 2.967930926024884e-05, "loss": 0.3512, "step": 1297 }, { "epoch": 0.15947905148052585, "grad_norm": 0.36360211352716926, "learning_rate": 2.9677984750254482e-05, "loss": 0.3951, "step": 1298 }, { "epoch": 0.15960191669738297, "grad_norm": 0.4447243907001284, "learning_rate": 2.967665754034002e-05, "loss": 0.37, "step": 1299 }, { "epoch": 0.15972478191424008, "grad_norm": 0.31581446836880395, "learning_rate": 2.9675327630749587e-05, "loss": 0.4266, "step": 1300 }, { "epoch": 0.1598476471310972, "grad_norm": 0.3343217248565972, "learning_rate": 2.967399502172781e-05, "loss": 0.3713, "step": 1301 }, { "epoch": 0.15997051234795429, "grad_norm": 0.4338966181047856, "learning_rate": 2.9672659713519805e-05, "loss": 0.4396, "step": 1302 }, { "epoch": 0.1600933775648114, "grad_norm": 0.3369120186667099, "learning_rate": 2.9671321706371206e-05, "loss": 0.4468, "step": 1303 }, { "epoch": 0.16021624278166852, "grad_norm": 0.35109876856474354, "learning_rate": 2.966998100052813e-05, "loss": 0.405, "step": 1304 }, { "epoch": 0.1603391079985256, "grad_norm": 0.4122555964017112, "learning_rate": 2.966863759623718e-05, "loss": 0.4049, "step": 1305 }, { "epoch": 0.16046197321538272, "grad_norm": 0.3871058997623775, "learning_rate": 2.9667291493745478e-05, "loss": 0.4757, "step": 1306 }, { "epoch": 0.16058483843223983, "grad_norm": 0.367627626700369, "learning_rate": 2.9665942693300626e-05, "loss": 0.3468, "step": 1307 }, { "epoch": 0.16070770364909695, "grad_norm": 0.37338660009358615, "learning_rate": 2.9664591195150725e-05, "loss": 0.3658, "step": 1308 }, { "epoch": 0.16083056886595404, "grad_norm": 0.38313512522563975, "learning_rate": 2.966323699954438e-05, "loss": 0.3993, "step": 1309 }, { "epoch": 0.16095343408281115, "grad_norm": 0.4563788933440846, "learning_rate": 2.966188010673068e-05, "loss": 0.4568, "step": 1310 }, { "epoch": 0.16107629929966827, "grad_norm": 0.4589404981209039, "learning_rate": 2.9660520516959227e-05, "loss": 0.408, "step": 1311 }, { "epoch": 0.16119916451652538, "grad_norm": 0.40924432247226183, "learning_rate": 2.9659158230480098e-05, "loss": 0.4153, "step": 1312 }, { "epoch": 0.16132202973338247, "grad_norm": 0.3336538452213012, "learning_rate": 2.9657793247543875e-05, "loss": 0.3244, "step": 1313 }, { "epoch": 0.1614448949502396, "grad_norm": 0.32012393244642917, "learning_rate": 2.965642556840165e-05, "loss": 0.4445, "step": 1314 }, { "epoch": 0.1615677601670967, "grad_norm": 0.46008739476159555, "learning_rate": 2.9655055193304987e-05, "loss": 0.3885, "step": 1315 }, { "epoch": 0.1616906253839538, "grad_norm": 0.373879089940527, "learning_rate": 2.9653682122505966e-05, "loss": 0.4422, "step": 1316 }, { "epoch": 0.1618134906008109, "grad_norm": 0.3959582866011498, "learning_rate": 2.965230635625715e-05, "loss": 0.4731, "step": 1317 }, { "epoch": 0.16193635581766802, "grad_norm": 0.3427326507958463, "learning_rate": 2.9650927894811607e-05, "loss": 0.4527, "step": 1318 }, { "epoch": 0.16205922103452514, "grad_norm": 0.365213035955697, "learning_rate": 2.9649546738422887e-05, "loss": 0.408, "step": 1319 }, { "epoch": 0.16218208625138222, "grad_norm": 0.406685516367406, "learning_rate": 2.9648162887345052e-05, "loss": 0.4172, "step": 1320 }, { "epoch": 0.16230495146823934, "grad_norm": 0.43533871190356255, "learning_rate": 2.9646776341832648e-05, "loss": 0.3577, "step": 1321 }, { "epoch": 0.16242781668509645, "grad_norm": 0.3720663169708545, "learning_rate": 2.964538710214073e-05, "loss": 0.4005, "step": 1322 }, { "epoch": 0.16255068190195357, "grad_norm": 0.40941300855243207, "learning_rate": 2.9643995168524827e-05, "loss": 0.4233, "step": 1323 }, { "epoch": 0.16267354711881066, "grad_norm": 0.40490507255496605, "learning_rate": 2.964260054124098e-05, "loss": 0.4169, "step": 1324 }, { "epoch": 0.16279641233566777, "grad_norm": 0.338816217341077, "learning_rate": 2.964120322054573e-05, "loss": 0.4017, "step": 1325 }, { "epoch": 0.1629192775525249, "grad_norm": 0.38571820458651696, "learning_rate": 2.9639803206696102e-05, "loss": 0.4841, "step": 1326 }, { "epoch": 0.16304214276938198, "grad_norm": 0.34292641828537157, "learning_rate": 2.963840049994961e-05, "loss": 0.3514, "step": 1327 }, { "epoch": 0.1631650079862391, "grad_norm": 0.3859748780990678, "learning_rate": 2.9636995100564282e-05, "loss": 0.3974, "step": 1328 }, { "epoch": 0.1632878732030962, "grad_norm": 0.46338337236096055, "learning_rate": 2.9635587008798632e-05, "loss": 0.4821, "step": 1329 }, { "epoch": 0.16341073841995332, "grad_norm": 0.37866081738204493, "learning_rate": 2.9634176224911665e-05, "loss": 0.4225, "step": 1330 }, { "epoch": 0.1635336036368104, "grad_norm": 0.3600270902502686, "learning_rate": 2.9632762749162886e-05, "loss": 0.3672, "step": 1331 }, { "epoch": 0.16365646885366752, "grad_norm": 0.4045082068990034, "learning_rate": 2.9631346581812293e-05, "loss": 0.424, "step": 1332 }, { "epoch": 0.16377933407052464, "grad_norm": 0.4305978524634844, "learning_rate": 2.962992772312039e-05, "loss": 0.3922, "step": 1333 }, { "epoch": 0.16390219928738176, "grad_norm": 0.418764954721383, "learning_rate": 2.9628506173348158e-05, "loss": 0.3817, "step": 1334 }, { "epoch": 0.16402506450423884, "grad_norm": 0.33015384918599533, "learning_rate": 2.9627081932757084e-05, "loss": 0.3572, "step": 1335 }, { "epoch": 0.16414792972109596, "grad_norm": 0.4713800966062914, "learning_rate": 2.962565500160915e-05, "loss": 0.3951, "step": 1336 }, { "epoch": 0.16427079493795307, "grad_norm": 0.4095331815596944, "learning_rate": 2.9624225380166827e-05, "loss": 0.3341, "step": 1337 }, { "epoch": 0.16439366015481016, "grad_norm": 0.39540209916142943, "learning_rate": 2.962279306869309e-05, "loss": 0.3539, "step": 1338 }, { "epoch": 0.16451652537166728, "grad_norm": 0.39922681376414987, "learning_rate": 2.9621358067451398e-05, "loss": 0.4181, "step": 1339 }, { "epoch": 0.1646393905885244, "grad_norm": 0.36641787481610627, "learning_rate": 2.961992037670571e-05, "loss": 0.396, "step": 1340 }, { "epoch": 0.1647622558053815, "grad_norm": 0.4435218605804204, "learning_rate": 2.9618479996720488e-05, "loss": 0.4501, "step": 1341 }, { "epoch": 0.1648851210222386, "grad_norm": 0.4242932636217542, "learning_rate": 2.9617036927760672e-05, "loss": 0.4393, "step": 1342 }, { "epoch": 0.1650079862390957, "grad_norm": 0.39934599094733747, "learning_rate": 2.9615591170091707e-05, "loss": 0.3669, "step": 1343 }, { "epoch": 0.16513085145595283, "grad_norm": 0.3443121431747638, "learning_rate": 2.961414272397953e-05, "loss": 0.3683, "step": 1344 }, { "epoch": 0.1652537166728099, "grad_norm": 0.4523923054953027, "learning_rate": 2.961269158969058e-05, "loss": 0.4339, "step": 1345 }, { "epoch": 0.16537658188966703, "grad_norm": 0.35702315829747433, "learning_rate": 2.9611237767491776e-05, "loss": 0.3674, "step": 1346 }, { "epoch": 0.16549944710652414, "grad_norm": 0.4283129241340653, "learning_rate": 2.9609781257650543e-05, "loss": 0.3834, "step": 1347 }, { "epoch": 0.16562231232338126, "grad_norm": 0.412068005736469, "learning_rate": 2.960832206043479e-05, "loss": 0.3889, "step": 1348 }, { "epoch": 0.16574517754023835, "grad_norm": 0.35566040041818525, "learning_rate": 2.960686017611294e-05, "loss": 0.3939, "step": 1349 }, { "epoch": 0.16586804275709546, "grad_norm": 0.35828271027494035, "learning_rate": 2.9605395604953888e-05, "loss": 0.3653, "step": 1350 }, { "epoch": 0.16599090797395258, "grad_norm": 0.38892220675675115, "learning_rate": 2.960392834722703e-05, "loss": 0.433, "step": 1351 }, { "epoch": 0.1661137731908097, "grad_norm": 0.343513705176893, "learning_rate": 2.960245840320226e-05, "loss": 0.4019, "step": 1352 }, { "epoch": 0.16623663840766678, "grad_norm": 0.31510245553799326, "learning_rate": 2.9600985773149972e-05, "loss": 0.3413, "step": 1353 }, { "epoch": 0.1663595036245239, "grad_norm": 0.40132147589924116, "learning_rate": 2.959951045734104e-05, "loss": 0.3738, "step": 1354 }, { "epoch": 0.166482368841381, "grad_norm": 0.3880216391554565, "learning_rate": 2.9598032456046846e-05, "loss": 0.4176, "step": 1355 }, { "epoch": 0.1666052340582381, "grad_norm": 0.3896921651988522, "learning_rate": 2.9596551769539248e-05, "loss": 0.383, "step": 1356 }, { "epoch": 0.16672809927509522, "grad_norm": 0.34966118296653115, "learning_rate": 2.9595068398090614e-05, "loss": 0.3559, "step": 1357 }, { "epoch": 0.16685096449195233, "grad_norm": 0.4481997081491503, "learning_rate": 2.9593582341973803e-05, "loss": 0.4317, "step": 1358 }, { "epoch": 0.16697382970880945, "grad_norm": 0.3660566366669517, "learning_rate": 2.959209360146216e-05, "loss": 0.3669, "step": 1359 }, { "epoch": 0.16709669492566653, "grad_norm": 0.42706922484548654, "learning_rate": 2.9590602176829532e-05, "loss": 0.424, "step": 1360 }, { "epoch": 0.16721956014252365, "grad_norm": 0.3897679500270815, "learning_rate": 2.958910806835026e-05, "loss": 0.3921, "step": 1361 }, { "epoch": 0.16734242535938076, "grad_norm": 0.4224322671872921, "learning_rate": 2.958761127629917e-05, "loss": 0.504, "step": 1362 }, { "epoch": 0.16746529057623788, "grad_norm": 0.3179270205919647, "learning_rate": 2.9586111800951588e-05, "loss": 0.3533, "step": 1363 }, { "epoch": 0.16758815579309497, "grad_norm": 0.44874486482643083, "learning_rate": 2.9584609642583337e-05, "loss": 0.4509, "step": 1364 }, { "epoch": 0.16771102100995208, "grad_norm": 0.3673811806464266, "learning_rate": 2.958310480147073e-05, "loss": 0.3886, "step": 1365 }, { "epoch": 0.1678338862268092, "grad_norm": 0.30708000803216406, "learning_rate": 2.9581597277890565e-05, "loss": 0.3541, "step": 1366 }, { "epoch": 0.16795675144366629, "grad_norm": 0.3879014221053647, "learning_rate": 2.958008707212015e-05, "loss": 0.4524, "step": 1367 }, { "epoch": 0.1680796166605234, "grad_norm": 0.3908952346009626, "learning_rate": 2.9578574184437264e-05, "loss": 0.3367, "step": 1368 }, { "epoch": 0.16820248187738052, "grad_norm": 0.3605594586255655, "learning_rate": 2.9577058615120212e-05, "loss": 0.3763, "step": 1369 }, { "epoch": 0.16832534709423763, "grad_norm": 0.46730574137176795, "learning_rate": 2.9575540364447755e-05, "loss": 0.3845, "step": 1370 }, { "epoch": 0.16844821231109472, "grad_norm": 0.38131447331184515, "learning_rate": 2.9574019432699182e-05, "loss": 0.3912, "step": 1371 }, { "epoch": 0.16857107752795183, "grad_norm": 0.43552714483430693, "learning_rate": 2.9572495820154245e-05, "loss": 0.4063, "step": 1372 }, { "epoch": 0.16869394274480895, "grad_norm": 0.31275448234252196, "learning_rate": 2.957096952709321e-05, "loss": 0.3624, "step": 1373 }, { "epoch": 0.16881680796166607, "grad_norm": 0.3668206005547462, "learning_rate": 2.9569440553796824e-05, "loss": 0.3728, "step": 1374 }, { "epoch": 0.16893967317852315, "grad_norm": 0.42447395837187846, "learning_rate": 2.9567908900546335e-05, "loss": 0.4198, "step": 1375 }, { "epoch": 0.16906253839538027, "grad_norm": 0.44247904923473114, "learning_rate": 2.956637456762348e-05, "loss": 0.4067, "step": 1376 }, { "epoch": 0.16918540361223738, "grad_norm": 0.3684559943428574, "learning_rate": 2.9564837555310494e-05, "loss": 0.3564, "step": 1377 }, { "epoch": 0.16930826882909447, "grad_norm": 0.38959907794625237, "learning_rate": 2.9563297863890093e-05, "loss": 0.4094, "step": 1378 }, { "epoch": 0.1694311340459516, "grad_norm": 0.35059768772217376, "learning_rate": 2.956175549364549e-05, "loss": 0.4027, "step": 1379 }, { "epoch": 0.1695539992628087, "grad_norm": 0.34899782999515, "learning_rate": 2.956021044486041e-05, "loss": 0.4239, "step": 1380 }, { "epoch": 0.16967686447966582, "grad_norm": 0.44150616579693774, "learning_rate": 2.9558662717819038e-05, "loss": 0.4373, "step": 1381 }, { "epoch": 0.1697997296965229, "grad_norm": 0.39411894332389674, "learning_rate": 2.955711231280608e-05, "loss": 0.4212, "step": 1382 }, { "epoch": 0.16992259491338002, "grad_norm": 0.48171573394323364, "learning_rate": 2.955555923010672e-05, "loss": 0.4782, "step": 1383 }, { "epoch": 0.17004546013023714, "grad_norm": 0.35699501087966484, "learning_rate": 2.9554003470006633e-05, "loss": 0.4194, "step": 1384 }, { "epoch": 0.17016832534709425, "grad_norm": 0.41379447743072556, "learning_rate": 2.9552445032791988e-05, "loss": 0.4173, "step": 1385 }, { "epoch": 0.17029119056395134, "grad_norm": 0.3995586351490257, "learning_rate": 2.955088391874946e-05, "loss": 0.3714, "step": 1386 }, { "epoch": 0.17041405578080845, "grad_norm": 0.4291900793567121, "learning_rate": 2.9549320128166202e-05, "loss": 0.4393, "step": 1387 }, { "epoch": 0.17053692099766557, "grad_norm": 0.4577286719153799, "learning_rate": 2.954775366132986e-05, "loss": 0.4382, "step": 1388 }, { "epoch": 0.17065978621452266, "grad_norm": 0.42494679125421053, "learning_rate": 2.954618451852858e-05, "loss": 0.4265, "step": 1389 }, { "epoch": 0.17078265143137977, "grad_norm": 0.37782511241382327, "learning_rate": 2.9544612700050994e-05, "loss": 0.3456, "step": 1390 }, { "epoch": 0.1709055166482369, "grad_norm": 0.42951695739869944, "learning_rate": 2.9543038206186223e-05, "loss": 0.3642, "step": 1391 }, { "epoch": 0.171028381865094, "grad_norm": 0.3233861024729271, "learning_rate": 2.9541461037223888e-05, "loss": 0.3856, "step": 1392 }, { "epoch": 0.1711512470819511, "grad_norm": 0.3593684296661726, "learning_rate": 2.9539881193454105e-05, "loss": 0.438, "step": 1393 }, { "epoch": 0.1712741122988082, "grad_norm": 0.3759752522856755, "learning_rate": 2.953829867516747e-05, "loss": 0.3944, "step": 1394 }, { "epoch": 0.17139697751566532, "grad_norm": 0.43333520519139507, "learning_rate": 2.9536713482655074e-05, "loss": 0.4526, "step": 1395 }, { "epoch": 0.1715198427325224, "grad_norm": 0.3232391758510125, "learning_rate": 2.9535125616208507e-05, "loss": 0.3353, "step": 1396 }, { "epoch": 0.17164270794937952, "grad_norm": 0.41116588850608965, "learning_rate": 2.953353507611985e-05, "loss": 0.4394, "step": 1397 }, { "epoch": 0.17176557316623664, "grad_norm": 0.3765110163638843, "learning_rate": 2.9531941862681667e-05, "loss": 0.3505, "step": 1398 }, { "epoch": 0.17188843838309376, "grad_norm": 0.4216847524275927, "learning_rate": 2.953034597618702e-05, "loss": 0.4534, "step": 1399 }, { "epoch": 0.17201130359995084, "grad_norm": 0.4129642339447356, "learning_rate": 2.9528747416929467e-05, "loss": 0.5074, "step": 1400 }, { "epoch": 0.17213416881680796, "grad_norm": 0.42008412560430924, "learning_rate": 2.952714618520305e-05, "loss": 0.404, "step": 1401 }, { "epoch": 0.17225703403366507, "grad_norm": 0.37418334155774774, "learning_rate": 2.95255422813023e-05, "loss": 0.401, "step": 1402 }, { "epoch": 0.1723798992505222, "grad_norm": 0.38325810524704534, "learning_rate": 2.952393570552225e-05, "loss": 0.3278, "step": 1403 }, { "epoch": 0.17250276446737928, "grad_norm": 0.3925574832542034, "learning_rate": 2.9522326458158415e-05, "loss": 0.3924, "step": 1404 }, { "epoch": 0.1726256296842364, "grad_norm": 0.29165308911643895, "learning_rate": 2.9520714539506812e-05, "loss": 0.4239, "step": 1405 }, { "epoch": 0.1727484949010935, "grad_norm": 0.42589621584422976, "learning_rate": 2.951909994986394e-05, "loss": 0.4296, "step": 1406 }, { "epoch": 0.1728713601179506, "grad_norm": 0.3810496035650119, "learning_rate": 2.951748268952679e-05, "loss": 0.3634, "step": 1407 }, { "epoch": 0.1729942253348077, "grad_norm": 0.3734447025196176, "learning_rate": 2.951586275879285e-05, "loss": 0.501, "step": 1408 }, { "epoch": 0.17311709055166483, "grad_norm": 0.37364704146976613, "learning_rate": 2.9514240157960093e-05, "loss": 0.4358, "step": 1409 }, { "epoch": 0.17323995576852194, "grad_norm": 0.32844186375734413, "learning_rate": 2.951261488732699e-05, "loss": 0.4131, "step": 1410 }, { "epoch": 0.17336282098537903, "grad_norm": 0.4378924210272781, "learning_rate": 2.9510986947192494e-05, "loss": 0.3749, "step": 1411 }, { "epoch": 0.17348568620223614, "grad_norm": 0.4566626405645941, "learning_rate": 2.9509356337856054e-05, "loss": 0.4523, "step": 1412 }, { "epoch": 0.17360855141909326, "grad_norm": 0.4242634592681337, "learning_rate": 2.9507723059617616e-05, "loss": 0.4257, "step": 1413 }, { "epoch": 0.17373141663595038, "grad_norm": 0.5259070649915198, "learning_rate": 2.9506087112777602e-05, "loss": 0.4455, "step": 1414 }, { "epoch": 0.17385428185280746, "grad_norm": 0.5891989461654191, "learning_rate": 2.9504448497636945e-05, "loss": 0.4028, "step": 1415 }, { "epoch": 0.17397714706966458, "grad_norm": 0.3861946187068874, "learning_rate": 2.9502807214497047e-05, "loss": 0.422, "step": 1416 }, { "epoch": 0.1741000122865217, "grad_norm": 0.3767432934170536, "learning_rate": 2.9501163263659818e-05, "loss": 0.3755, "step": 1417 }, { "epoch": 0.17422287750337878, "grad_norm": 0.3929604300891876, "learning_rate": 2.949951664542765e-05, "loss": 0.3649, "step": 1418 }, { "epoch": 0.1743457427202359, "grad_norm": 0.3371334796412385, "learning_rate": 2.9497867360103427e-05, "loss": 0.4164, "step": 1419 }, { "epoch": 0.174468607937093, "grad_norm": 0.32876857235675094, "learning_rate": 2.9496215407990524e-05, "loss": 0.3859, "step": 1420 }, { "epoch": 0.17459147315395013, "grad_norm": 0.36489941374851775, "learning_rate": 2.949456078939281e-05, "loss": 0.3342, "step": 1421 }, { "epoch": 0.17471433837080722, "grad_norm": 0.38810948587626587, "learning_rate": 2.949290350461464e-05, "loss": 0.3372, "step": 1422 }, { "epoch": 0.17483720358766433, "grad_norm": 0.5181684560036562, "learning_rate": 2.9491243553960856e-05, "loss": 0.5083, "step": 1423 }, { "epoch": 0.17496006880452145, "grad_norm": 0.43499426088026066, "learning_rate": 2.9489580937736805e-05, "loss": 0.4294, "step": 1424 }, { "epoch": 0.17508293402137856, "grad_norm": 0.39899636875470373, "learning_rate": 2.94879156562483e-05, "loss": 0.4147, "step": 1425 }, { "epoch": 0.17520579923823565, "grad_norm": 0.40055110923751996, "learning_rate": 2.9486247709801674e-05, "loss": 0.357, "step": 1426 }, { "epoch": 0.17532866445509276, "grad_norm": 0.4448345042876189, "learning_rate": 2.948457709870373e-05, "loss": 0.4476, "step": 1427 }, { "epoch": 0.17545152967194988, "grad_norm": 0.3707567174208644, "learning_rate": 2.948290382326176e-05, "loss": 0.46, "step": 1428 }, { "epoch": 0.17557439488880697, "grad_norm": 0.5078627257668874, "learning_rate": 2.948122788378356e-05, "loss": 0.4721, "step": 1429 }, { "epoch": 0.17569726010566408, "grad_norm": 0.4170162570919047, "learning_rate": 2.9479549280577402e-05, "loss": 0.3969, "step": 1430 }, { "epoch": 0.1758201253225212, "grad_norm": 0.3658572660188734, "learning_rate": 2.947786801395206e-05, "loss": 0.4011, "step": 1431 }, { "epoch": 0.1759429905393783, "grad_norm": 0.4627445999162075, "learning_rate": 2.947618408421679e-05, "loss": 0.4733, "step": 1432 }, { "epoch": 0.1760658557562354, "grad_norm": 0.34146458972998567, "learning_rate": 2.9474497491681337e-05, "loss": 0.3458, "step": 1433 }, { "epoch": 0.17618872097309252, "grad_norm": 0.5656913234282659, "learning_rate": 2.947280823665594e-05, "loss": 0.4364, "step": 1434 }, { "epoch": 0.17631158618994963, "grad_norm": 0.346431461141425, "learning_rate": 2.9471116319451324e-05, "loss": 0.4116, "step": 1435 }, { "epoch": 0.17643445140680672, "grad_norm": 0.3225582327732717, "learning_rate": 2.9469421740378713e-05, "loss": 0.3498, "step": 1436 }, { "epoch": 0.17655731662366383, "grad_norm": 0.34193306340476165, "learning_rate": 2.9467724499749813e-05, "loss": 0.3727, "step": 1437 }, { "epoch": 0.17668018184052095, "grad_norm": 0.3673440434171748, "learning_rate": 2.9466024597876814e-05, "loss": 0.4184, "step": 1438 }, { "epoch": 0.17680304705737807, "grad_norm": 0.4234132641847153, "learning_rate": 2.9464322035072407e-05, "loss": 0.3714, "step": 1439 }, { "epoch": 0.17692591227423515, "grad_norm": 0.35299757340043475, "learning_rate": 2.9462616811649767e-05, "loss": 0.3568, "step": 1440 }, { "epoch": 0.17704877749109227, "grad_norm": 0.442801555047919, "learning_rate": 2.9460908927922557e-05, "loss": 0.4722, "step": 1441 }, { "epoch": 0.17717164270794938, "grad_norm": 0.35915672557724, "learning_rate": 2.945919838420493e-05, "loss": 0.3399, "step": 1442 }, { "epoch": 0.1772945079248065, "grad_norm": 0.310962038435407, "learning_rate": 2.9457485180811535e-05, "loss": 0.3715, "step": 1443 }, { "epoch": 0.1774173731416636, "grad_norm": 0.43375392348900177, "learning_rate": 2.94557693180575e-05, "loss": 0.422, "step": 1444 }, { "epoch": 0.1775402383585207, "grad_norm": 0.41199805721676697, "learning_rate": 2.9454050796258448e-05, "loss": 0.4, "step": 1445 }, { "epoch": 0.17766310357537782, "grad_norm": 0.4027720596873335, "learning_rate": 2.9452329615730488e-05, "loss": 0.3757, "step": 1446 }, { "epoch": 0.1777859687922349, "grad_norm": 0.40987818344956095, "learning_rate": 2.9450605776790225e-05, "loss": 0.4491, "step": 1447 }, { "epoch": 0.17790883400909202, "grad_norm": 0.3442500685775517, "learning_rate": 2.9448879279754743e-05, "loss": 0.4881, "step": 1448 }, { "epoch": 0.17803169922594914, "grad_norm": 0.4183177136791224, "learning_rate": 2.944715012494162e-05, "loss": 0.3673, "step": 1449 }, { "epoch": 0.17815456444280625, "grad_norm": 0.3439244532889935, "learning_rate": 2.9445418312668924e-05, "loss": 0.3568, "step": 1450 }, { "epoch": 0.17827742965966334, "grad_norm": 0.34465781262428535, "learning_rate": 2.944368384325522e-05, "loss": 0.4057, "step": 1451 }, { "epoch": 0.17840029487652045, "grad_norm": 0.395502662776808, "learning_rate": 2.9441946717019535e-05, "loss": 0.3344, "step": 1452 }, { "epoch": 0.17852316009337757, "grad_norm": 0.4664169553035116, "learning_rate": 2.9440206934281413e-05, "loss": 0.473, "step": 1453 }, { "epoch": 0.17864602531023469, "grad_norm": 0.38606940055930156, "learning_rate": 2.943846449536087e-05, "loss": 0.3643, "step": 1454 }, { "epoch": 0.17876889052709177, "grad_norm": 0.3534932775809874, "learning_rate": 2.9436719400578426e-05, "loss": 0.3622, "step": 1455 }, { "epoch": 0.1788917557439489, "grad_norm": 0.35947125778440664, "learning_rate": 2.9434971650255067e-05, "loss": 0.3598, "step": 1456 }, { "epoch": 0.179014620960806, "grad_norm": 0.45552481712622667, "learning_rate": 2.9433221244712293e-05, "loss": 0.4067, "step": 1457 }, { "epoch": 0.1791374861776631, "grad_norm": 0.3614031830864955, "learning_rate": 2.9431468184272072e-05, "loss": 0.3641, "step": 1458 }, { "epoch": 0.1792603513945202, "grad_norm": 0.3729738119845403, "learning_rate": 2.942971246925687e-05, "loss": 0.3855, "step": 1459 }, { "epoch": 0.17938321661137732, "grad_norm": 0.37557498657922966, "learning_rate": 2.942795409998964e-05, "loss": 0.33, "step": 1460 }, { "epoch": 0.17950608182823444, "grad_norm": 0.44042796096840525, "learning_rate": 2.9426193076793817e-05, "loss": 0.4128, "step": 1461 }, { "epoch": 0.17962894704509152, "grad_norm": 0.3772458306187124, "learning_rate": 2.942442939999334e-05, "loss": 0.3149, "step": 1462 }, { "epoch": 0.17975181226194864, "grad_norm": 0.34821563743420153, "learning_rate": 2.9422663069912616e-05, "loss": 0.4032, "step": 1463 }, { "epoch": 0.17987467747880576, "grad_norm": 0.4388415178762752, "learning_rate": 2.942089408687656e-05, "loss": 0.4294, "step": 1464 }, { "epoch": 0.17999754269566287, "grad_norm": 0.3776438082576206, "learning_rate": 2.9419122451210556e-05, "loss": 0.5152, "step": 1465 }, { "epoch": 0.18012040791251996, "grad_norm": 0.3218331521676603, "learning_rate": 2.941734816324049e-05, "loss": 0.4388, "step": 1466 }, { "epoch": 0.18024327312937707, "grad_norm": 0.2953618270440429, "learning_rate": 2.9415571223292726e-05, "loss": 0.3332, "step": 1467 }, { "epoch": 0.1803661383462342, "grad_norm": 0.6425958174568922, "learning_rate": 2.9413791631694128e-05, "loss": 0.4596, "step": 1468 }, { "epoch": 0.18048900356309128, "grad_norm": 0.3456204115253766, "learning_rate": 2.9412009388772033e-05, "loss": 0.3775, "step": 1469 }, { "epoch": 0.1806118687799484, "grad_norm": 0.3679209552624679, "learning_rate": 2.941022449485428e-05, "loss": 0.4464, "step": 1470 }, { "epoch": 0.1807347339968055, "grad_norm": 0.44994668981526276, "learning_rate": 2.940843695026918e-05, "loss": 0.44, "step": 1471 }, { "epoch": 0.18085759921366262, "grad_norm": 0.41198016870089266, "learning_rate": 2.9406646755345544e-05, "loss": 0.3798, "step": 1472 }, { "epoch": 0.1809804644305197, "grad_norm": 0.29522168261347304, "learning_rate": 2.9404853910412674e-05, "loss": 0.359, "step": 1473 }, { "epoch": 0.18110332964737683, "grad_norm": 0.3850504545765917, "learning_rate": 2.9403058415800344e-05, "loss": 0.3727, "step": 1474 }, { "epoch": 0.18122619486423394, "grad_norm": 0.43303881670225697, "learning_rate": 2.9401260271838822e-05, "loss": 0.4307, "step": 1475 }, { "epoch": 0.18134906008109106, "grad_norm": 0.7824879132257765, "learning_rate": 2.9399459478858872e-05, "loss": 0.516, "step": 1476 }, { "epoch": 0.18147192529794814, "grad_norm": 0.35925113002274844, "learning_rate": 2.939765603719173e-05, "loss": 0.3767, "step": 1477 }, { "epoch": 0.18159479051480526, "grad_norm": 0.3492525392188001, "learning_rate": 2.9395849947169136e-05, "loss": 0.4058, "step": 1478 }, { "epoch": 0.18171765573166238, "grad_norm": 0.3840290935907956, "learning_rate": 2.939404120912331e-05, "loss": 0.3412, "step": 1479 }, { "epoch": 0.18184052094851946, "grad_norm": 0.4347585300014263, "learning_rate": 2.9392229823386944e-05, "loss": 0.4026, "step": 1480 }, { "epoch": 0.18196338616537658, "grad_norm": 0.5275192145436797, "learning_rate": 2.9390415790293236e-05, "loss": 0.5873, "step": 1481 }, { "epoch": 0.1820862513822337, "grad_norm": 0.401527985229391, "learning_rate": 2.938859911017588e-05, "loss": 0.4323, "step": 1482 }, { "epoch": 0.1822091165990908, "grad_norm": 0.315560638764761, "learning_rate": 2.938677978336902e-05, "loss": 0.3922, "step": 1483 }, { "epoch": 0.1823319818159479, "grad_norm": 0.4206760248519214, "learning_rate": 2.9384957810207326e-05, "loss": 0.4061, "step": 1484 }, { "epoch": 0.182454847032805, "grad_norm": 0.3697561721430475, "learning_rate": 2.938313319102593e-05, "loss": 0.4387, "step": 1485 }, { "epoch": 0.18257771224966213, "grad_norm": 0.34528051520207786, "learning_rate": 2.9381305926160464e-05, "loss": 0.3912, "step": 1486 }, { "epoch": 0.18270057746651922, "grad_norm": 0.3324885591510252, "learning_rate": 2.9379476015947035e-05, "loss": 0.336, "step": 1487 }, { "epoch": 0.18282344268337633, "grad_norm": 0.41523146338860834, "learning_rate": 2.9377643460722256e-05, "loss": 0.4176, "step": 1488 }, { "epoch": 0.18294630790023345, "grad_norm": 0.3683525979117417, "learning_rate": 2.9375808260823192e-05, "loss": 0.3648, "step": 1489 }, { "epoch": 0.18306917311709056, "grad_norm": 0.48498781204622515, "learning_rate": 2.9373970416587437e-05, "loss": 0.4531, "step": 1490 }, { "epoch": 0.18319203833394765, "grad_norm": 0.42975793082651503, "learning_rate": 2.9372129928353042e-05, "loss": 0.4412, "step": 1491 }, { "epoch": 0.18331490355080476, "grad_norm": 0.4484209782325991, "learning_rate": 2.9370286796458552e-05, "loss": 0.4363, "step": 1492 }, { "epoch": 0.18343776876766188, "grad_norm": 0.483998785582023, "learning_rate": 2.9368441021243e-05, "loss": 0.4944, "step": 1493 }, { "epoch": 0.183560633984519, "grad_norm": 0.33739535174869173, "learning_rate": 2.9366592603045906e-05, "loss": 0.3784, "step": 1494 }, { "epoch": 0.18368349920137608, "grad_norm": 0.4009051791117732, "learning_rate": 2.936474154220727e-05, "loss": 0.4197, "step": 1495 }, { "epoch": 0.1838063644182332, "grad_norm": 0.40969234251256087, "learning_rate": 2.936288783906759e-05, "loss": 0.4672, "step": 1496 }, { "epoch": 0.1839292296350903, "grad_norm": 0.3126939513534763, "learning_rate": 2.936103149396784e-05, "loss": 0.3191, "step": 1497 }, { "epoch": 0.1840520948519474, "grad_norm": 0.38970831472603623, "learning_rate": 2.9359172507249477e-05, "loss": 0.371, "step": 1498 }, { "epoch": 0.18417496006880452, "grad_norm": 0.46346728504228185, "learning_rate": 2.935731087925445e-05, "loss": 0.5208, "step": 1499 }, { "epoch": 0.18429782528566163, "grad_norm": 0.37777449043336364, "learning_rate": 2.935544661032521e-05, "loss": 0.3428, "step": 1500 }, { "epoch": 0.18442069050251875, "grad_norm": 0.3588177869204624, "learning_rate": 2.935357970080465e-05, "loss": 0.4515, "step": 1501 }, { "epoch": 0.18454355571937583, "grad_norm": 0.4707832684433042, "learning_rate": 2.93517101510362e-05, "loss": 0.4097, "step": 1502 }, { "epoch": 0.18466642093623295, "grad_norm": 0.4314856614653687, "learning_rate": 2.9349837961363736e-05, "loss": 0.4607, "step": 1503 }, { "epoch": 0.18478928615309007, "grad_norm": 0.32983996054558845, "learning_rate": 2.9347963132131644e-05, "loss": 0.4124, "step": 1504 }, { "epoch": 0.18491215136994718, "grad_norm": 0.4437208054239879, "learning_rate": 2.9346085663684784e-05, "loss": 0.464, "step": 1505 }, { "epoch": 0.18503501658680427, "grad_norm": 0.4036851092650488, "learning_rate": 2.9344205556368502e-05, "loss": 0.418, "step": 1506 }, { "epoch": 0.18515788180366138, "grad_norm": 0.42212981106557046, "learning_rate": 2.9342322810528635e-05, "loss": 0.4745, "step": 1507 }, { "epoch": 0.1852807470205185, "grad_norm": 0.5879824805162495, "learning_rate": 2.93404374265115e-05, "loss": 0.5315, "step": 1508 }, { "epoch": 0.1854036122373756, "grad_norm": 0.4376794533378667, "learning_rate": 2.93385494046639e-05, "loss": 0.3586, "step": 1509 }, { "epoch": 0.1855264774542327, "grad_norm": 0.3806941738941298, "learning_rate": 2.933665874533313e-05, "loss": 0.3786, "step": 1510 }, { "epoch": 0.18564934267108982, "grad_norm": 0.4278889356962055, "learning_rate": 2.9334765448866953e-05, "loss": 0.4208, "step": 1511 }, { "epoch": 0.18577220788794693, "grad_norm": 0.45308306129929093, "learning_rate": 2.933286951561364e-05, "loss": 0.3939, "step": 1512 }, { "epoch": 0.18589507310480402, "grad_norm": 0.3522376800259322, "learning_rate": 2.9330970945921932e-05, "loss": 0.4237, "step": 1513 }, { "epoch": 0.18601793832166114, "grad_norm": 0.4042633729202196, "learning_rate": 2.9329069740141057e-05, "loss": 0.4913, "step": 1514 }, { "epoch": 0.18614080353851825, "grad_norm": 0.3862203408064831, "learning_rate": 2.9327165898620734e-05, "loss": 0.4254, "step": 1515 }, { "epoch": 0.18626366875537537, "grad_norm": 0.49052710110100045, "learning_rate": 2.9325259421711155e-05, "loss": 0.4121, "step": 1516 }, { "epoch": 0.18638653397223245, "grad_norm": 0.3650762352977382, "learning_rate": 2.9323350309763006e-05, "loss": 0.4255, "step": 1517 }, { "epoch": 0.18650939918908957, "grad_norm": 0.35699468997542444, "learning_rate": 2.9321438563127464e-05, "loss": 0.4293, "step": 1518 }, { "epoch": 0.18663226440594669, "grad_norm": 0.3499150008799107, "learning_rate": 2.931952418215617e-05, "loss": 0.3504, "step": 1519 }, { "epoch": 0.18675512962280377, "grad_norm": 0.45553551269220804, "learning_rate": 2.9317607167201273e-05, "loss": 0.4822, "step": 1520 }, { "epoch": 0.1868779948396609, "grad_norm": 0.4776835175127713, "learning_rate": 2.931568751861539e-05, "loss": 0.4468, "step": 1521 }, { "epoch": 0.187000860056518, "grad_norm": 0.3728536239565901, "learning_rate": 2.9313765236751626e-05, "loss": 0.401, "step": 1522 }, { "epoch": 0.18712372527337512, "grad_norm": 0.37994622631291736, "learning_rate": 2.9311840321963578e-05, "loss": 0.4264, "step": 1523 }, { "epoch": 0.1872465904902322, "grad_norm": 0.3620460784925427, "learning_rate": 2.9309912774605313e-05, "loss": 0.4348, "step": 1524 }, { "epoch": 0.18736945570708932, "grad_norm": 0.41909780722730666, "learning_rate": 2.9307982595031398e-05, "loss": 0.4613, "step": 1525 }, { "epoch": 0.18749232092394644, "grad_norm": 0.405023283989829, "learning_rate": 2.9306049783596875e-05, "loss": 0.4456, "step": 1526 }, { "epoch": 0.18761518614080352, "grad_norm": 0.3893642477495935, "learning_rate": 2.9304114340657272e-05, "loss": 0.3968, "step": 1527 }, { "epoch": 0.18773805135766064, "grad_norm": 0.39960408193705005, "learning_rate": 2.9302176266568607e-05, "loss": 0.4171, "step": 1528 }, { "epoch": 0.18786091657451776, "grad_norm": 0.38997507498711226, "learning_rate": 2.9300235561687368e-05, "loss": 0.4527, "step": 1529 }, { "epoch": 0.18798378179137487, "grad_norm": 0.3589678059535735, "learning_rate": 2.9298292226370533e-05, "loss": 0.3337, "step": 1530 }, { "epoch": 0.18810664700823196, "grad_norm": 0.340990584110546, "learning_rate": 2.9296346260975576e-05, "loss": 0.4228, "step": 1531 }, { "epoch": 0.18822951222508907, "grad_norm": 0.33392832336719513, "learning_rate": 2.9294397665860437e-05, "loss": 0.3607, "step": 1532 }, { "epoch": 0.1883523774419462, "grad_norm": 0.39715474327998707, "learning_rate": 2.929244644138355e-05, "loss": 0.4659, "step": 1533 }, { "epoch": 0.1884752426588033, "grad_norm": 0.3705054878097287, "learning_rate": 2.929049258790383e-05, "loss": 0.342, "step": 1534 }, { "epoch": 0.1885981078756604, "grad_norm": 0.3746637474113698, "learning_rate": 2.9288536105780674e-05, "loss": 0.3686, "step": 1535 }, { "epoch": 0.1887209730925175, "grad_norm": 0.46090601670912734, "learning_rate": 2.9286576995373966e-05, "loss": 0.5169, "step": 1536 }, { "epoch": 0.18884383830937462, "grad_norm": 0.3470070296973488, "learning_rate": 2.9284615257044076e-05, "loss": 0.4248, "step": 1537 }, { "epoch": 0.1889667035262317, "grad_norm": 0.49126372195605383, "learning_rate": 2.9282650891151844e-05, "loss": 0.4598, "step": 1538 }, { "epoch": 0.18908956874308883, "grad_norm": 0.3782458038358276, "learning_rate": 2.9280683898058608e-05, "loss": 0.4609, "step": 1539 }, { "epoch": 0.18921243395994594, "grad_norm": 0.3937255517548064, "learning_rate": 2.9278714278126182e-05, "loss": 0.4143, "step": 1540 }, { "epoch": 0.18933529917680306, "grad_norm": 0.34862852786608245, "learning_rate": 2.9276742031716866e-05, "loss": 0.4168, "step": 1541 }, { "epoch": 0.18945816439366014, "grad_norm": 0.4128087043265643, "learning_rate": 2.9274767159193438e-05, "loss": 0.4441, "step": 1542 }, { "epoch": 0.18958102961051726, "grad_norm": 0.361190507311468, "learning_rate": 2.927278966091917e-05, "loss": 0.3999, "step": 1543 }, { "epoch": 0.18970389482737438, "grad_norm": 0.39488261709162886, "learning_rate": 2.9270809537257805e-05, "loss": 0.4132, "step": 1544 }, { "epoch": 0.1898267600442315, "grad_norm": 0.3388171554217992, "learning_rate": 2.926882678857358e-05, "loss": 0.4233, "step": 1545 }, { "epoch": 0.18994962526108858, "grad_norm": 0.39733379328393176, "learning_rate": 2.92668414152312e-05, "loss": 0.4255, "step": 1546 }, { "epoch": 0.1900724904779457, "grad_norm": 0.3363434369713239, "learning_rate": 2.926485341759586e-05, "loss": 0.4073, "step": 1547 }, { "epoch": 0.1901953556948028, "grad_norm": 0.42125776962584116, "learning_rate": 2.926286279603325e-05, "loss": 0.3926, "step": 1548 }, { "epoch": 0.1903182209116599, "grad_norm": 0.3748402439725868, "learning_rate": 2.9260869550909526e-05, "loss": 0.3382, "step": 1549 }, { "epoch": 0.190441086128517, "grad_norm": 0.3958179599296675, "learning_rate": 2.9258873682591334e-05, "loss": 0.3347, "step": 1550 }, { "epoch": 0.19056395134537413, "grad_norm": 0.42155420359697476, "learning_rate": 2.9256875191445797e-05, "loss": 0.4676, "step": 1551 }, { "epoch": 0.19068681656223124, "grad_norm": 0.4390136955186095, "learning_rate": 2.925487407784053e-05, "loss": 0.3877, "step": 1552 }, { "epoch": 0.19080968177908833, "grad_norm": 0.44796145648799207, "learning_rate": 2.925287034214362e-05, "loss": 0.3927, "step": 1553 }, { "epoch": 0.19093254699594545, "grad_norm": 0.3484916532164758, "learning_rate": 2.925086398472365e-05, "loss": 0.3816, "step": 1554 }, { "epoch": 0.19105541221280256, "grad_norm": 0.4125890763611902, "learning_rate": 2.9248855005949665e-05, "loss": 0.4124, "step": 1555 }, { "epoch": 0.19117827742965968, "grad_norm": 0.36762852362101683, "learning_rate": 2.924684340619121e-05, "loss": 0.3867, "step": 1556 }, { "epoch": 0.19130114264651676, "grad_norm": 0.3741157318637638, "learning_rate": 2.92448291858183e-05, "loss": 0.4007, "step": 1557 }, { "epoch": 0.19142400786337388, "grad_norm": 0.3847363879762112, "learning_rate": 2.924281234520145e-05, "loss": 0.4549, "step": 1558 }, { "epoch": 0.191546873080231, "grad_norm": 0.3168397421334318, "learning_rate": 2.924079288471163e-05, "loss": 0.4272, "step": 1559 }, { "epoch": 0.19166973829708808, "grad_norm": 0.42743975493353104, "learning_rate": 2.9238770804720318e-05, "loss": 0.3895, "step": 1560 }, { "epoch": 0.1917926035139452, "grad_norm": 0.36705069723595474, "learning_rate": 2.923674610559946e-05, "loss": 0.3528, "step": 1561 }, { "epoch": 0.1919154687308023, "grad_norm": 0.36072765081037067, "learning_rate": 2.9234718787721477e-05, "loss": 0.4274, "step": 1562 }, { "epoch": 0.19203833394765943, "grad_norm": 0.3986381929647755, "learning_rate": 2.9232688851459293e-05, "loss": 0.3947, "step": 1563 }, { "epoch": 0.19216119916451652, "grad_norm": 0.4021911424708003, "learning_rate": 2.9230656297186298e-05, "loss": 0.4322, "step": 1564 }, { "epoch": 0.19228406438137363, "grad_norm": 0.3871794327654661, "learning_rate": 2.9228621125276363e-05, "loss": 0.3938, "step": 1565 }, { "epoch": 0.19240692959823075, "grad_norm": 0.4877977318682903, "learning_rate": 2.9226583336103855e-05, "loss": 0.5004, "step": 1566 }, { "epoch": 0.19252979481508786, "grad_norm": 0.36290503902006743, "learning_rate": 2.9224542930043595e-05, "loss": 0.3552, "step": 1567 }, { "epoch": 0.19265266003194495, "grad_norm": 0.41887538786995215, "learning_rate": 2.9222499907470917e-05, "loss": 0.3601, "step": 1568 }, { "epoch": 0.19277552524880207, "grad_norm": 0.37478003958192113, "learning_rate": 2.922045426876162e-05, "loss": 0.416, "step": 1569 }, { "epoch": 0.19289839046565918, "grad_norm": 0.3716326772953682, "learning_rate": 2.921840601429198e-05, "loss": 0.3405, "step": 1570 }, { "epoch": 0.19302125568251627, "grad_norm": 0.40458917163997166, "learning_rate": 2.9216355144438766e-05, "loss": 0.3901, "step": 1571 }, { "epoch": 0.19314412089937338, "grad_norm": 0.3973412404822327, "learning_rate": 2.9214301659579218e-05, "loss": 0.4629, "step": 1572 }, { "epoch": 0.1932669861162305, "grad_norm": 0.3847813431357089, "learning_rate": 2.921224556009106e-05, "loss": 0.462, "step": 1573 }, { "epoch": 0.19338985133308761, "grad_norm": 0.3499047412473859, "learning_rate": 2.9210186846352504e-05, "loss": 0.375, "step": 1574 }, { "epoch": 0.1935127165499447, "grad_norm": 0.39581581078715394, "learning_rate": 2.9208125518742232e-05, "loss": 0.404, "step": 1575 }, { "epoch": 0.19363558176680182, "grad_norm": 0.3296293210161265, "learning_rate": 2.9206061577639415e-05, "loss": 0.4217, "step": 1576 }, { "epoch": 0.19375844698365893, "grad_norm": 0.34188050915196955, "learning_rate": 2.9203995023423697e-05, "loss": 0.4607, "step": 1577 }, { "epoch": 0.19388131220051602, "grad_norm": 0.34933862880602334, "learning_rate": 2.9201925856475214e-05, "loss": 0.3653, "step": 1578 }, { "epoch": 0.19400417741737314, "grad_norm": 0.32109492926949235, "learning_rate": 2.9199854077174573e-05, "loss": 0.3997, "step": 1579 }, { "epoch": 0.19412704263423025, "grad_norm": 0.384601433956787, "learning_rate": 2.9197779685902862e-05, "loss": 0.4467, "step": 1580 }, { "epoch": 0.19424990785108737, "grad_norm": 0.3769323960002537, "learning_rate": 2.9195702683041657e-05, "loss": 0.4778, "step": 1581 }, { "epoch": 0.19437277306794445, "grad_norm": 0.41640525901076453, "learning_rate": 2.9193623068973003e-05, "loss": 0.4552, "step": 1582 }, { "epoch": 0.19449563828480157, "grad_norm": 0.437067590140436, "learning_rate": 2.919154084407943e-05, "loss": 0.403, "step": 1583 }, { "epoch": 0.19461850350165869, "grad_norm": 0.3911839726573372, "learning_rate": 2.9189456008743964e-05, "loss": 0.3842, "step": 1584 }, { "epoch": 0.1947413687185158, "grad_norm": 0.4003699264353185, "learning_rate": 2.918736856335008e-05, "loss": 0.4012, "step": 1585 }, { "epoch": 0.1948642339353729, "grad_norm": 0.37216718311004143, "learning_rate": 2.9185278508281757e-05, "loss": 0.3971, "step": 1586 }, { "epoch": 0.19498709915223, "grad_norm": 0.36888218264019906, "learning_rate": 2.9183185843923446e-05, "loss": 0.4062, "step": 1587 }, { "epoch": 0.19510996436908712, "grad_norm": 0.3785729445886331, "learning_rate": 2.9181090570660086e-05, "loss": 0.4, "step": 1588 }, { "epoch": 0.1952328295859442, "grad_norm": 0.3803713702613787, "learning_rate": 2.917899268887708e-05, "loss": 0.4387, "step": 1589 }, { "epoch": 0.19535569480280132, "grad_norm": 0.38902963006047925, "learning_rate": 2.9176892198960324e-05, "loss": 0.3611, "step": 1590 }, { "epoch": 0.19547856001965844, "grad_norm": 0.43345573207713, "learning_rate": 2.9174789101296186e-05, "loss": 0.3712, "step": 1591 }, { "epoch": 0.19560142523651555, "grad_norm": 0.37635650990150216, "learning_rate": 2.9172683396271523e-05, "loss": 0.364, "step": 1592 }, { "epoch": 0.19572429045337264, "grad_norm": 0.3240958557163446, "learning_rate": 2.917057508427366e-05, "loss": 0.3807, "step": 1593 }, { "epoch": 0.19584715567022976, "grad_norm": 0.42108408238275785, "learning_rate": 2.916846416569041e-05, "loss": 0.4179, "step": 1594 }, { "epoch": 0.19597002088708687, "grad_norm": 0.4154820123281875, "learning_rate": 2.916635064091006e-05, "loss": 0.4052, "step": 1595 }, { "epoch": 0.196092886103944, "grad_norm": 0.4243406841774199, "learning_rate": 2.9164234510321387e-05, "loss": 0.4359, "step": 1596 }, { "epoch": 0.19621575132080107, "grad_norm": 0.4542919470968189, "learning_rate": 2.9162115774313628e-05, "loss": 0.4042, "step": 1597 }, { "epoch": 0.1963386165376582, "grad_norm": 0.4098102431867592, "learning_rate": 2.9159994433276525e-05, "loss": 0.4648, "step": 1598 }, { "epoch": 0.1964614817545153, "grad_norm": 0.42868799669444385, "learning_rate": 2.9157870487600268e-05, "loss": 0.4245, "step": 1599 }, { "epoch": 0.1965843469713724, "grad_norm": 0.3786231505115621, "learning_rate": 2.9155743937675556e-05, "loss": 0.4472, "step": 1600 }, { "epoch": 0.1967072121882295, "grad_norm": 0.42373107948088307, "learning_rate": 2.915361478389355e-05, "loss": 0.4224, "step": 1601 }, { "epoch": 0.19683007740508662, "grad_norm": 0.5182291682577364, "learning_rate": 2.9151483026645895e-05, "loss": 0.4088, "step": 1602 }, { "epoch": 0.19695294262194374, "grad_norm": 0.3577348785903126, "learning_rate": 2.914934866632471e-05, "loss": 0.3962, "step": 1603 }, { "epoch": 0.19707580783880083, "grad_norm": 0.3336965452276358, "learning_rate": 2.91472117033226e-05, "loss": 0.4229, "step": 1604 }, { "epoch": 0.19719867305565794, "grad_norm": 0.5912030564156255, "learning_rate": 2.9145072138032648e-05, "loss": 0.4575, "step": 1605 }, { "epoch": 0.19732153827251506, "grad_norm": 0.4831371475409694, "learning_rate": 2.9142929970848406e-05, "loss": 0.4297, "step": 1606 }, { "epoch": 0.19744440348937217, "grad_norm": 0.3916246864756758, "learning_rate": 2.9140785202163918e-05, "loss": 0.403, "step": 1607 }, { "epoch": 0.19756726870622926, "grad_norm": 0.35982630965887225, "learning_rate": 2.91386378323737e-05, "loss": 0.3422, "step": 1608 }, { "epoch": 0.19769013392308638, "grad_norm": 0.45726695063297895, "learning_rate": 2.9136487861872743e-05, "loss": 0.4039, "step": 1609 }, { "epoch": 0.1978129991399435, "grad_norm": 0.4887997561822433, "learning_rate": 2.9134335291056522e-05, "loss": 0.4406, "step": 1610 }, { "epoch": 0.19793586435680058, "grad_norm": 0.44343264203573424, "learning_rate": 2.9132180120320987e-05, "loss": 0.4752, "step": 1611 }, { "epoch": 0.1980587295736577, "grad_norm": 0.48596918887074964, "learning_rate": 2.9130022350062573e-05, "loss": 0.4434, "step": 1612 }, { "epoch": 0.1981815947905148, "grad_norm": 0.485431555411544, "learning_rate": 2.9127861980678185e-05, "loss": 0.4404, "step": 1613 }, { "epoch": 0.19830446000737192, "grad_norm": 0.3650626891305087, "learning_rate": 2.9125699012565204e-05, "loss": 0.4582, "step": 1614 }, { "epoch": 0.198427325224229, "grad_norm": 0.42655168696252677, "learning_rate": 2.91235334461215e-05, "loss": 0.4706, "step": 1615 }, { "epoch": 0.19855019044108613, "grad_norm": 0.4648230441860458, "learning_rate": 2.912136528174541e-05, "loss": 0.4431, "step": 1616 }, { "epoch": 0.19867305565794324, "grad_norm": 0.37096753662237164, "learning_rate": 2.9119194519835762e-05, "loss": 0.3868, "step": 1617 }, { "epoch": 0.19879592087480033, "grad_norm": 0.3981780660194219, "learning_rate": 2.9117021160791844e-05, "loss": 0.3687, "step": 1618 }, { "epoch": 0.19891878609165745, "grad_norm": 0.40444831849846247, "learning_rate": 2.9114845205013436e-05, "loss": 0.4627, "step": 1619 }, { "epoch": 0.19904165130851456, "grad_norm": 0.36374168059075573, "learning_rate": 2.9112666652900796e-05, "loss": 0.4197, "step": 1620 }, { "epoch": 0.19916451652537168, "grad_norm": 0.4992409914585133, "learning_rate": 2.9110485504854643e-05, "loss": 0.5004, "step": 1621 }, { "epoch": 0.19928738174222876, "grad_norm": 0.4358129972817312, "learning_rate": 2.9108301761276194e-05, "loss": 0.373, "step": 1622 }, { "epoch": 0.19941024695908588, "grad_norm": 0.4062652219155636, "learning_rate": 2.910611542256713e-05, "loss": 0.4596, "step": 1623 }, { "epoch": 0.199533112175943, "grad_norm": 0.31971278836326444, "learning_rate": 2.9103926489129616e-05, "loss": 0.427, "step": 1624 }, { "epoch": 0.1996559773928001, "grad_norm": 0.33372551704778025, "learning_rate": 2.910173496136629e-05, "loss": 0.4081, "step": 1625 }, { "epoch": 0.1997788426096572, "grad_norm": 0.46886359132363764, "learning_rate": 2.9099540839680272e-05, "loss": 0.4772, "step": 1626 }, { "epoch": 0.1999017078265143, "grad_norm": 0.5170198991206173, "learning_rate": 2.9097344124475155e-05, "loss": 0.5074, "step": 1627 }, { "epoch": 0.20002457304337143, "grad_norm": 0.48110671796689713, "learning_rate": 2.909514481615501e-05, "loss": 0.4319, "step": 1628 }, { "epoch": 0.20014743826022852, "grad_norm": 0.45620915217744773, "learning_rate": 2.9092942915124386e-05, "loss": 0.4453, "step": 1629 }, { "epoch": 0.20027030347708563, "grad_norm": 0.3675788538879967, "learning_rate": 2.909073842178831e-05, "loss": 0.4904, "step": 1630 }, { "epoch": 0.20039316869394275, "grad_norm": 0.45959081336209734, "learning_rate": 2.9088531336552285e-05, "loss": 0.4761, "step": 1631 }, { "epoch": 0.20051603391079986, "grad_norm": 0.39361088945907363, "learning_rate": 2.9086321659822285e-05, "loss": 0.3737, "step": 1632 }, { "epoch": 0.20063889912765695, "grad_norm": 0.45073594787309, "learning_rate": 2.908410939200477e-05, "loss": 0.3963, "step": 1633 }, { "epoch": 0.20076176434451407, "grad_norm": 0.460134493203638, "learning_rate": 2.908189453350667e-05, "loss": 0.4504, "step": 1634 }, { "epoch": 0.20088462956137118, "grad_norm": 0.40454142925790193, "learning_rate": 2.9079677084735396e-05, "loss": 0.4063, "step": 1635 }, { "epoch": 0.2010074947782283, "grad_norm": 0.37301275463967287, "learning_rate": 2.9077457046098833e-05, "loss": 0.4149, "step": 1636 }, { "epoch": 0.20113035999508538, "grad_norm": 0.3241670943452751, "learning_rate": 2.9075234418005344e-05, "loss": 0.4527, "step": 1637 }, { "epoch": 0.2012532252119425, "grad_norm": 0.37857046790293025, "learning_rate": 2.907300920086376e-05, "loss": 0.3802, "step": 1638 }, { "epoch": 0.20137609042879961, "grad_norm": 0.33586602939866017, "learning_rate": 2.90707813950834e-05, "loss": 0.4459, "step": 1639 }, { "epoch": 0.2014989556456567, "grad_norm": 0.46927726579176193, "learning_rate": 2.906855100107406e-05, "loss": 0.4456, "step": 1640 }, { "epoch": 0.20162182086251382, "grad_norm": 0.3523051723569945, "learning_rate": 2.9066318019245994e-05, "loss": 0.454, "step": 1641 }, { "epoch": 0.20174468607937093, "grad_norm": 0.3369752680094132, "learning_rate": 2.9064082450009956e-05, "loss": 0.3603, "step": 1642 }, { "epoch": 0.20186755129622805, "grad_norm": 0.5261773918591574, "learning_rate": 2.9061844293777156e-05, "loss": 0.4972, "step": 1643 }, { "epoch": 0.20199041651308514, "grad_norm": 0.33370324691655623, "learning_rate": 2.9059603550959296e-05, "loss": 0.3584, "step": 1644 }, { "epoch": 0.20211328172994225, "grad_norm": 0.4096488560115729, "learning_rate": 2.9057360221968546e-05, "loss": 0.4466, "step": 1645 }, { "epoch": 0.20223614694679937, "grad_norm": 0.3547351784684172, "learning_rate": 2.9055114307217543e-05, "loss": 0.3688, "step": 1646 }, { "epoch": 0.20235901216365648, "grad_norm": 0.3000783835285307, "learning_rate": 2.9052865807119415e-05, "loss": 0.3981, "step": 1647 }, { "epoch": 0.20248187738051357, "grad_norm": 0.3495358694956799, "learning_rate": 2.905061472208776e-05, "loss": 0.4295, "step": 1648 }, { "epoch": 0.20260474259737069, "grad_norm": 0.49514034768888393, "learning_rate": 2.9048361052536644e-05, "loss": 0.5098, "step": 1649 }, { "epoch": 0.2027276078142278, "grad_norm": 0.4283496836390344, "learning_rate": 2.904610479888062e-05, "loss": 0.38, "step": 1650 }, { "epoch": 0.2028504730310849, "grad_norm": 0.3717924127600523, "learning_rate": 2.9043845961534713e-05, "loss": 0.394, "step": 1651 }, { "epoch": 0.202973338247942, "grad_norm": 0.35555141918806893, "learning_rate": 2.904158454091442e-05, "loss": 0.3775, "step": 1652 }, { "epoch": 0.20309620346479912, "grad_norm": 0.3554987234867664, "learning_rate": 2.9039320537435706e-05, "loss": 0.4132, "step": 1653 }, { "epoch": 0.20321906868165623, "grad_norm": 0.34334014205274255, "learning_rate": 2.9037053951515036e-05, "loss": 0.391, "step": 1654 }, { "epoch": 0.20334193389851332, "grad_norm": 0.36017003691606214, "learning_rate": 2.9034784783569324e-05, "loss": 0.4423, "step": 1655 }, { "epoch": 0.20346479911537044, "grad_norm": 0.39485020173738256, "learning_rate": 2.9032513034015965e-05, "loss": 0.4393, "step": 1656 }, { "epoch": 0.20358766433222755, "grad_norm": 0.41850998126780736, "learning_rate": 2.903023870327284e-05, "loss": 0.4598, "step": 1657 }, { "epoch": 0.20371052954908467, "grad_norm": 0.45482887983790443, "learning_rate": 2.90279617917583e-05, "loss": 0.4684, "step": 1658 }, { "epoch": 0.20383339476594176, "grad_norm": 0.37044106614485905, "learning_rate": 2.9025682299891154e-05, "loss": 0.382, "step": 1659 }, { "epoch": 0.20395625998279887, "grad_norm": 0.4293858566297992, "learning_rate": 2.902340022809071e-05, "loss": 0.4518, "step": 1660 }, { "epoch": 0.204079125199656, "grad_norm": 0.3067844763283167, "learning_rate": 2.9021115576776745e-05, "loss": 0.4207, "step": 1661 }, { "epoch": 0.20420199041651307, "grad_norm": 0.41702456901166085, "learning_rate": 2.9018828346369496e-05, "loss": 0.3543, "step": 1662 }, { "epoch": 0.2043248556333702, "grad_norm": 0.4248796867173025, "learning_rate": 2.9016538537289688e-05, "loss": 0.4305, "step": 1663 }, { "epoch": 0.2044477208502273, "grad_norm": 0.47988872383466474, "learning_rate": 2.901424614995852e-05, "loss": 0.4276, "step": 1664 }, { "epoch": 0.20457058606708442, "grad_norm": 0.446867154564427, "learning_rate": 2.901195118479765e-05, "loss": 0.4841, "step": 1665 }, { "epoch": 0.2046934512839415, "grad_norm": 0.34301730719374235, "learning_rate": 2.900965364222924e-05, "loss": 0.4414, "step": 1666 }, { "epoch": 0.20481631650079862, "grad_norm": 0.40345524646100434, "learning_rate": 2.900735352267589e-05, "loss": 0.4407, "step": 1667 }, { "epoch": 0.20493918171765574, "grad_norm": 0.33370320611278614, "learning_rate": 2.9005050826560704e-05, "loss": 0.3575, "step": 1668 }, { "epoch": 0.20506204693451283, "grad_norm": 0.42099269405987083, "learning_rate": 2.9002745554307247e-05, "loss": 0.4074, "step": 1669 }, { "epoch": 0.20518491215136994, "grad_norm": 0.34342659025886124, "learning_rate": 2.900043770633955e-05, "loss": 0.3973, "step": 1670 }, { "epoch": 0.20530777736822706, "grad_norm": 0.34130984664158287, "learning_rate": 2.8998127283082138e-05, "loss": 0.3874, "step": 1671 }, { "epoch": 0.20543064258508417, "grad_norm": 0.41853811321864, "learning_rate": 2.8995814284959992e-05, "loss": 0.3756, "step": 1672 }, { "epoch": 0.20555350780194126, "grad_norm": 0.4141799527877904, "learning_rate": 2.8993498712398575e-05, "loss": 0.427, "step": 1673 }, { "epoch": 0.20567637301879838, "grad_norm": 0.3428029186453999, "learning_rate": 2.8991180565823823e-05, "loss": 0.3938, "step": 1674 }, { "epoch": 0.2057992382356555, "grad_norm": 0.3469007481667497, "learning_rate": 2.8988859845662137e-05, "loss": 0.3916, "step": 1675 }, { "epoch": 0.2059221034525126, "grad_norm": 0.46855440793970293, "learning_rate": 2.8986536552340406e-05, "loss": 0.4251, "step": 1676 }, { "epoch": 0.2060449686693697, "grad_norm": 0.42897063969689997, "learning_rate": 2.8984210686285982e-05, "loss": 0.4878, "step": 1677 }, { "epoch": 0.2061678338862268, "grad_norm": 0.43400032286597057, "learning_rate": 2.8981882247926695e-05, "loss": 0.3986, "step": 1678 }, { "epoch": 0.20629069910308392, "grad_norm": 0.44637149155398365, "learning_rate": 2.897955123769084e-05, "loss": 0.4025, "step": 1679 }, { "epoch": 0.206413564319941, "grad_norm": 0.34129303160159613, "learning_rate": 2.8977217656007198e-05, "loss": 0.4274, "step": 1680 }, { "epoch": 0.20653642953679813, "grad_norm": 0.38902660972650405, "learning_rate": 2.897488150330501e-05, "loss": 0.3341, "step": 1681 }, { "epoch": 0.20665929475365524, "grad_norm": 0.4564436239033609, "learning_rate": 2.8972542780014002e-05, "loss": 0.4491, "step": 1682 }, { "epoch": 0.20678215997051236, "grad_norm": 0.4327685503465924, "learning_rate": 2.8970201486564367e-05, "loss": 0.466, "step": 1683 }, { "epoch": 0.20690502518736945, "grad_norm": 0.3881493017235871, "learning_rate": 2.896785762338677e-05, "loss": 0.4005, "step": 1684 }, { "epoch": 0.20702789040422656, "grad_norm": 0.34748162499636226, "learning_rate": 2.8965511190912342e-05, "loss": 0.3801, "step": 1685 }, { "epoch": 0.20715075562108368, "grad_norm": 0.42515585377135157, "learning_rate": 2.89631621895727e-05, "loss": 0.4787, "step": 1686 }, { "epoch": 0.2072736208379408, "grad_norm": 0.3192670403764972, "learning_rate": 2.8960810619799933e-05, "loss": 0.5004, "step": 1687 }, { "epoch": 0.20739648605479788, "grad_norm": 0.31451725792699736, "learning_rate": 2.8958456482026586e-05, "loss": 0.3579, "step": 1688 }, { "epoch": 0.207519351271655, "grad_norm": 0.5147745515592173, "learning_rate": 2.8956099776685695e-05, "loss": 0.444, "step": 1689 }, { "epoch": 0.2076422164885121, "grad_norm": 0.3551194779331285, "learning_rate": 2.895374050421076e-05, "loss": 0.3846, "step": 1690 }, { "epoch": 0.2077650817053692, "grad_norm": 0.37477729130701265, "learning_rate": 2.8951378665035754e-05, "loss": 0.3801, "step": 1691 }, { "epoch": 0.2078879469222263, "grad_norm": 0.3188826955029582, "learning_rate": 2.894901425959512e-05, "loss": 0.3478, "step": 1692 }, { "epoch": 0.20801081213908343, "grad_norm": 0.4581604536341526, "learning_rate": 2.894664728832377e-05, "loss": 0.4176, "step": 1693 }, { "epoch": 0.20813367735594054, "grad_norm": 0.4240032309705669, "learning_rate": 2.8944277751657106e-05, "loss": 0.3365, "step": 1694 }, { "epoch": 0.20825654257279763, "grad_norm": 0.3149588914617037, "learning_rate": 2.894190565003097e-05, "loss": 0.374, "step": 1695 }, { "epoch": 0.20837940778965475, "grad_norm": 0.35940718242789504, "learning_rate": 2.893953098388172e-05, "loss": 0.3619, "step": 1696 }, { "epoch": 0.20850227300651186, "grad_norm": 0.3903644169917172, "learning_rate": 2.8937153753646138e-05, "loss": 0.366, "step": 1697 }, { "epoch": 0.20862513822336898, "grad_norm": 0.3736994868092499, "learning_rate": 2.8934773959761512e-05, "loss": 0.371, "step": 1698 }, { "epoch": 0.20874800344022607, "grad_norm": 0.35389136800015397, "learning_rate": 2.8932391602665585e-05, "loss": 0.5463, "step": 1699 }, { "epoch": 0.20887086865708318, "grad_norm": 0.36827931100417777, "learning_rate": 2.8930006682796578e-05, "loss": 0.409, "step": 1700 }, { "epoch": 0.2089937338739403, "grad_norm": 0.38963683484327877, "learning_rate": 2.892761920059318e-05, "loss": 0.3601, "step": 1701 }, { "epoch": 0.20911659909079738, "grad_norm": 0.3847693455964251, "learning_rate": 2.8925229156494553e-05, "loss": 0.4006, "step": 1702 }, { "epoch": 0.2092394643076545, "grad_norm": 0.4124468597966599, "learning_rate": 2.892283655094033e-05, "loss": 0.4415, "step": 1703 }, { "epoch": 0.20936232952451161, "grad_norm": 0.395484753128147, "learning_rate": 2.892044138437062e-05, "loss": 0.3837, "step": 1704 }, { "epoch": 0.20948519474136873, "grad_norm": 0.38022929510152964, "learning_rate": 2.8918043657225994e-05, "loss": 0.4762, "step": 1705 }, { "epoch": 0.20960805995822582, "grad_norm": 0.44082775186102513, "learning_rate": 2.8915643369947497e-05, "loss": 0.377, "step": 1706 }, { "epoch": 0.20973092517508293, "grad_norm": 0.41359316617506686, "learning_rate": 2.891324052297665e-05, "loss": 0.394, "step": 1707 }, { "epoch": 0.20985379039194005, "grad_norm": 0.31915567265903777, "learning_rate": 2.891083511675544e-05, "loss": 0.4087, "step": 1708 }, { "epoch": 0.20997665560879714, "grad_norm": 0.3922185586557953, "learning_rate": 2.8908427151726325e-05, "loss": 0.4648, "step": 1709 }, { "epoch": 0.21009952082565425, "grad_norm": 0.3616669426650242, "learning_rate": 2.8906016628332233e-05, "loss": 0.4064, "step": 1710 }, { "epoch": 0.21022238604251137, "grad_norm": 0.4069192164347009, "learning_rate": 2.8903603547016565e-05, "loss": 0.3813, "step": 1711 }, { "epoch": 0.21034525125936848, "grad_norm": 0.39870680471951064, "learning_rate": 2.8901187908223195e-05, "loss": 0.366, "step": 1712 }, { "epoch": 0.21046811647622557, "grad_norm": 0.4301497941415775, "learning_rate": 2.8898769712396458e-05, "loss": 0.4948, "step": 1713 }, { "epoch": 0.21059098169308269, "grad_norm": 0.41525575449133717, "learning_rate": 2.8896348959981173e-05, "loss": 0.4069, "step": 1714 }, { "epoch": 0.2107138469099398, "grad_norm": 0.34368560784380153, "learning_rate": 2.8893925651422614e-05, "loss": 0.39, "step": 1715 }, { "epoch": 0.21083671212679692, "grad_norm": 0.37937303079865337, "learning_rate": 2.8891499787166542e-05, "loss": 0.4493, "step": 1716 }, { "epoch": 0.210959577343654, "grad_norm": 0.3542647340487318, "learning_rate": 2.8889071367659172e-05, "loss": 0.4019, "step": 1717 }, { "epoch": 0.21108244256051112, "grad_norm": 0.37635720147059737, "learning_rate": 2.8886640393347195e-05, "loss": 0.3845, "step": 1718 }, { "epoch": 0.21120530777736823, "grad_norm": 0.3540537477385094, "learning_rate": 2.888420686467778e-05, "loss": 0.4467, "step": 1719 }, { "epoch": 0.21132817299422532, "grad_norm": 0.3527369292950369, "learning_rate": 2.8881770782098547e-05, "loss": 0.4226, "step": 1720 }, { "epoch": 0.21145103821108244, "grad_norm": 0.3368554692510392, "learning_rate": 2.8879332146057612e-05, "loss": 0.3627, "step": 1721 }, { "epoch": 0.21157390342793955, "grad_norm": 0.3371425467459933, "learning_rate": 2.887689095700354e-05, "loss": 0.4143, "step": 1722 }, { "epoch": 0.21169676864479667, "grad_norm": 0.35093894835660605, "learning_rate": 2.8874447215385365e-05, "loss": 0.4331, "step": 1723 }, { "epoch": 0.21181963386165376, "grad_norm": 0.3222372025879762, "learning_rate": 2.8872000921652607e-05, "loss": 0.3443, "step": 1724 }, { "epoch": 0.21194249907851087, "grad_norm": 0.32713392666177527, "learning_rate": 2.8869552076255243e-05, "loss": 0.3648, "step": 1725 }, { "epoch": 0.212065364295368, "grad_norm": 0.38910661708493205, "learning_rate": 2.886710067964372e-05, "loss": 0.4177, "step": 1726 }, { "epoch": 0.2121882295122251, "grad_norm": 0.41500618338926887, "learning_rate": 2.8864646732268962e-05, "loss": 0.4357, "step": 1727 }, { "epoch": 0.2123110947290822, "grad_norm": 0.34288378301052186, "learning_rate": 2.8862190234582348e-05, "loss": 0.4514, "step": 1728 }, { "epoch": 0.2124339599459393, "grad_norm": 0.41465192611813756, "learning_rate": 2.8859731187035746e-05, "loss": 0.5141, "step": 1729 }, { "epoch": 0.21255682516279642, "grad_norm": 0.33911166558626227, "learning_rate": 2.8857269590081472e-05, "loss": 0.3383, "step": 1730 }, { "epoch": 0.2126796903796535, "grad_norm": 0.37486782700688515, "learning_rate": 2.885480544417232e-05, "loss": 0.3708, "step": 1731 }, { "epoch": 0.21280255559651062, "grad_norm": 0.38435092669737664, "learning_rate": 2.8852338749761566e-05, "loss": 0.4561, "step": 1732 }, { "epoch": 0.21292542081336774, "grad_norm": 0.4404641771959448, "learning_rate": 2.884986950730293e-05, "loss": 0.4307, "step": 1733 }, { "epoch": 0.21304828603022485, "grad_norm": 0.3880922593731356, "learning_rate": 2.884739771725062e-05, "loss": 0.428, "step": 1734 }, { "epoch": 0.21317115124708194, "grad_norm": 0.3882767038478529, "learning_rate": 2.88449233800593e-05, "loss": 0.3849, "step": 1735 }, { "epoch": 0.21329401646393906, "grad_norm": 0.44427860684435416, "learning_rate": 2.8842446496184114e-05, "loss": 0.4674, "step": 1736 }, { "epoch": 0.21341688168079617, "grad_norm": 0.31939519844864367, "learning_rate": 2.883996706608067e-05, "loss": 0.3667, "step": 1737 }, { "epoch": 0.2135397468976533, "grad_norm": 0.3751778674933865, "learning_rate": 2.8837485090205033e-05, "loss": 0.3735, "step": 1738 }, { "epoch": 0.21366261211451038, "grad_norm": 0.41667491528836725, "learning_rate": 2.883500056901376e-05, "loss": 0.4286, "step": 1739 }, { "epoch": 0.2137854773313675, "grad_norm": 0.34421424545366686, "learning_rate": 2.883251350296385e-05, "loss": 0.4389, "step": 1740 }, { "epoch": 0.2139083425482246, "grad_norm": 0.3394884791920741, "learning_rate": 2.8830023892512792e-05, "loss": 0.4603, "step": 1741 }, { "epoch": 0.2140312077650817, "grad_norm": 0.3377412654301812, "learning_rate": 2.8827531738118526e-05, "loss": 0.3938, "step": 1742 }, { "epoch": 0.2141540729819388, "grad_norm": 0.3406977974128251, "learning_rate": 2.8825037040239473e-05, "loss": 0.3478, "step": 1743 }, { "epoch": 0.21427693819879592, "grad_norm": 0.4946511318942286, "learning_rate": 2.8822539799334513e-05, "loss": 0.4314, "step": 1744 }, { "epoch": 0.21439980341565304, "grad_norm": 0.38804856150463607, "learning_rate": 2.8820040015863e-05, "loss": 0.4705, "step": 1745 }, { "epoch": 0.21452266863251013, "grad_norm": 0.3648323399129059, "learning_rate": 2.8817537690284755e-05, "loss": 0.3542, "step": 1746 }, { "epoch": 0.21464553384936724, "grad_norm": 0.3985897994743742, "learning_rate": 2.881503282306006e-05, "loss": 0.4188, "step": 1747 }, { "epoch": 0.21476839906622436, "grad_norm": 0.4394789877799237, "learning_rate": 2.881252541464967e-05, "loss": 0.4116, "step": 1748 }, { "epoch": 0.21489126428308147, "grad_norm": 0.4091801757038946, "learning_rate": 2.8810015465514808e-05, "loss": 0.4609, "step": 1749 }, { "epoch": 0.21501412949993856, "grad_norm": 0.4851856691522555, "learning_rate": 2.880750297611716e-05, "loss": 0.4042, "step": 1750 }, { "epoch": 0.21513699471679568, "grad_norm": 0.35714605170991753, "learning_rate": 2.8804987946918888e-05, "loss": 0.4033, "step": 1751 }, { "epoch": 0.2152598599336528, "grad_norm": 0.4719775138124527, "learning_rate": 2.880247037838261e-05, "loss": 0.4059, "step": 1752 }, { "epoch": 0.21538272515050988, "grad_norm": 0.37506350949691963, "learning_rate": 2.879995027097142e-05, "loss": 0.3764, "step": 1753 }, { "epoch": 0.215505590367367, "grad_norm": 0.3640268318922268, "learning_rate": 2.8797427625148872e-05, "loss": 0.4066, "step": 1754 }, { "epoch": 0.2156284555842241, "grad_norm": 0.4756584378899063, "learning_rate": 2.8794902441378992e-05, "loss": 0.4223, "step": 1755 }, { "epoch": 0.21575132080108123, "grad_norm": 0.47180689384462066, "learning_rate": 2.8792374720126268e-05, "loss": 0.4279, "step": 1756 }, { "epoch": 0.2158741860179383, "grad_norm": 0.43770478734777374, "learning_rate": 2.8789844461855665e-05, "loss": 0.4479, "step": 1757 }, { "epoch": 0.21599705123479543, "grad_norm": 0.40097420652429905, "learning_rate": 2.8787311667032603e-05, "loss": 0.4243, "step": 1758 }, { "epoch": 0.21611991645165254, "grad_norm": 0.36561109469287306, "learning_rate": 2.8784776336122975e-05, "loss": 0.4195, "step": 1759 }, { "epoch": 0.21624278166850963, "grad_norm": 0.4156496278473133, "learning_rate": 2.8782238469593136e-05, "loss": 0.4763, "step": 1760 }, { "epoch": 0.21636564688536675, "grad_norm": 0.38878853235553507, "learning_rate": 2.8779698067909907e-05, "loss": 0.3909, "step": 1761 }, { "epoch": 0.21648851210222386, "grad_norm": 0.4087471423581013, "learning_rate": 2.8777155131540588e-05, "loss": 0.3613, "step": 1762 }, { "epoch": 0.21661137731908098, "grad_norm": 0.33487814830417856, "learning_rate": 2.877460966095293e-05, "loss": 0.4078, "step": 1763 }, { "epoch": 0.21673424253593807, "grad_norm": 0.3386459166062808, "learning_rate": 2.8772061656615155e-05, "loss": 0.3973, "step": 1764 }, { "epoch": 0.21685710775279518, "grad_norm": 0.39495871542082067, "learning_rate": 2.8769511118995955e-05, "loss": 0.4091, "step": 1765 }, { "epoch": 0.2169799729696523, "grad_norm": 0.4584505701868716, "learning_rate": 2.8766958048564477e-05, "loss": 0.445, "step": 1766 }, { "epoch": 0.2171028381865094, "grad_norm": 0.3354368358008948, "learning_rate": 2.8764402445790358e-05, "loss": 0.4191, "step": 1767 }, { "epoch": 0.2172257034033665, "grad_norm": 0.3510027139958763, "learning_rate": 2.8761844311143665e-05, "loss": 0.4329, "step": 1768 }, { "epoch": 0.21734856862022361, "grad_norm": 0.49514697807052604, "learning_rate": 2.8759283645094964e-05, "loss": 0.4967, "step": 1769 }, { "epoch": 0.21747143383708073, "grad_norm": 0.42889983761570094, "learning_rate": 2.875672044811527e-05, "loss": 0.3872, "step": 1770 }, { "epoch": 0.21759429905393782, "grad_norm": 0.3337810189212921, "learning_rate": 2.8754154720676063e-05, "loss": 0.3693, "step": 1771 }, { "epoch": 0.21771716427079493, "grad_norm": 0.3810937995904149, "learning_rate": 2.8751586463249294e-05, "loss": 0.4223, "step": 1772 }, { "epoch": 0.21784002948765205, "grad_norm": 0.4033599627948427, "learning_rate": 2.8749015676307378e-05, "loss": 0.3885, "step": 1773 }, { "epoch": 0.21796289470450916, "grad_norm": 0.3308936588186546, "learning_rate": 2.8746442360323192e-05, "loss": 0.3796, "step": 1774 }, { "epoch": 0.21808575992136625, "grad_norm": 0.48480265092281294, "learning_rate": 2.8743866515770083e-05, "loss": 0.4448, "step": 1775 }, { "epoch": 0.21820862513822337, "grad_norm": 0.5345910516827965, "learning_rate": 2.8741288143121862e-05, "loss": 0.4351, "step": 1776 }, { "epoch": 0.21833149035508048, "grad_norm": 0.376537732688662, "learning_rate": 2.8738707242852802e-05, "loss": 0.3686, "step": 1777 }, { "epoch": 0.2184543555719376, "grad_norm": 0.4623087618405629, "learning_rate": 2.8736123815437646e-05, "loss": 0.4026, "step": 1778 }, { "epoch": 0.21857722078879468, "grad_norm": 0.3432867337710265, "learning_rate": 2.8733537861351592e-05, "loss": 0.4008, "step": 1779 }, { "epoch": 0.2187000860056518, "grad_norm": 0.37887469763368137, "learning_rate": 2.8730949381070315e-05, "loss": 0.3191, "step": 1780 }, { "epoch": 0.21882295122250892, "grad_norm": 0.382603604239866, "learning_rate": 2.8728358375069946e-05, "loss": 0.3142, "step": 1781 }, { "epoch": 0.218945816439366, "grad_norm": 0.412157984210304, "learning_rate": 2.8725764843827087e-05, "loss": 0.4778, "step": 1782 }, { "epoch": 0.21906868165622312, "grad_norm": 0.3906763528786389, "learning_rate": 2.8723168787818804e-05, "loss": 0.4154, "step": 1783 }, { "epoch": 0.21919154687308023, "grad_norm": 0.36939516963882857, "learning_rate": 2.8720570207522613e-05, "loss": 0.3562, "step": 1784 }, { "epoch": 0.21931441208993735, "grad_norm": 0.33936440886870306, "learning_rate": 2.8717969103416516e-05, "loss": 0.4045, "step": 1785 }, { "epoch": 0.21943727730679444, "grad_norm": 0.32190861446596275, "learning_rate": 2.871536547597897e-05, "loss": 0.4494, "step": 1786 }, { "epoch": 0.21956014252365155, "grad_norm": 0.39430434292244376, "learning_rate": 2.871275932568889e-05, "loss": 0.402, "step": 1787 }, { "epoch": 0.21968300774050867, "grad_norm": 0.3945238190641575, "learning_rate": 2.8710150653025656e-05, "loss": 0.4243, "step": 1788 }, { "epoch": 0.21980587295736578, "grad_norm": 0.3636867803372244, "learning_rate": 2.870753945846913e-05, "loss": 0.4273, "step": 1789 }, { "epoch": 0.21992873817422287, "grad_norm": 0.3764953580684033, "learning_rate": 2.8704925742499614e-05, "loss": 0.4132, "step": 1790 }, { "epoch": 0.22005160339108, "grad_norm": 0.34773558414046857, "learning_rate": 2.870230950559789e-05, "loss": 0.4275, "step": 1791 }, { "epoch": 0.2201744686079371, "grad_norm": 0.3675647924759336, "learning_rate": 2.8699690748245194e-05, "loss": 0.3463, "step": 1792 }, { "epoch": 0.2202973338247942, "grad_norm": 0.48841787880832677, "learning_rate": 2.8697069470923233e-05, "loss": 0.3819, "step": 1793 }, { "epoch": 0.2204201990416513, "grad_norm": 0.4257369816303052, "learning_rate": 2.8694445674114163e-05, "loss": 0.3899, "step": 1794 }, { "epoch": 0.22054306425850842, "grad_norm": 0.31405002483117933, "learning_rate": 2.8691819358300633e-05, "loss": 0.3701, "step": 1795 }, { "epoch": 0.22066592947536554, "grad_norm": 0.3977012490568089, "learning_rate": 2.868919052396572e-05, "loss": 0.4107, "step": 1796 }, { "epoch": 0.22078879469222262, "grad_norm": 0.3976463861321385, "learning_rate": 2.8686559171592987e-05, "loss": 0.4953, "step": 1797 }, { "epoch": 0.22091165990907974, "grad_norm": 0.3689205804304291, "learning_rate": 2.868392530166646e-05, "loss": 0.4671, "step": 1798 }, { "epoch": 0.22103452512593685, "grad_norm": 0.42739495274502687, "learning_rate": 2.8681288914670615e-05, "loss": 0.4218, "step": 1799 }, { "epoch": 0.22115739034279394, "grad_norm": 0.34898435728264504, "learning_rate": 2.86786500110904e-05, "loss": 0.4324, "step": 1800 }, { "epoch": 0.22128025555965106, "grad_norm": 0.3459865671902565, "learning_rate": 2.867600859141122e-05, "loss": 0.4295, "step": 1801 }, { "epoch": 0.22140312077650817, "grad_norm": 0.37820206534021866, "learning_rate": 2.8673364656118962e-05, "loss": 0.37, "step": 1802 }, { "epoch": 0.2215259859933653, "grad_norm": 0.41811589339118105, "learning_rate": 2.8670718205699944e-05, "loss": 0.3791, "step": 1803 }, { "epoch": 0.22164885121022238, "grad_norm": 0.4055984827907777, "learning_rate": 2.866806924064097e-05, "loss": 0.3705, "step": 1804 }, { "epoch": 0.2217717164270795, "grad_norm": 0.31964179899590406, "learning_rate": 2.86654177614293e-05, "loss": 0.3795, "step": 1805 }, { "epoch": 0.2218945816439366, "grad_norm": 0.33528425043956955, "learning_rate": 2.8662763768552656e-05, "loss": 0.4078, "step": 1806 }, { "epoch": 0.22201744686079372, "grad_norm": 0.3918495962820569, "learning_rate": 2.8660107262499223e-05, "loss": 0.3596, "step": 1807 }, { "epoch": 0.2221403120776508, "grad_norm": 0.3988776738164092, "learning_rate": 2.8657448243757646e-05, "loss": 0.3873, "step": 1808 }, { "epoch": 0.22226317729450792, "grad_norm": 0.414830082456442, "learning_rate": 2.865478671281704e-05, "loss": 0.4648, "step": 1809 }, { "epoch": 0.22238604251136504, "grad_norm": 0.3461654515036933, "learning_rate": 2.865212267016697e-05, "loss": 0.4256, "step": 1810 }, { "epoch": 0.22250890772822213, "grad_norm": 0.4103989171771081, "learning_rate": 2.8649456116297475e-05, "loss": 0.3829, "step": 1811 }, { "epoch": 0.22263177294507924, "grad_norm": 0.3643839532442663, "learning_rate": 2.8646787051699045e-05, "loss": 0.4198, "step": 1812 }, { "epoch": 0.22275463816193636, "grad_norm": 0.33155078790183273, "learning_rate": 2.8644115476862636e-05, "loss": 0.4542, "step": 1813 }, { "epoch": 0.22287750337879347, "grad_norm": 0.40205085538996366, "learning_rate": 2.8641441392279676e-05, "loss": 0.4439, "step": 1814 }, { "epoch": 0.22300036859565056, "grad_norm": 0.44277320910364437, "learning_rate": 2.8638764798442037e-05, "loss": 0.3929, "step": 1815 }, { "epoch": 0.22312323381250768, "grad_norm": 0.41097508909406877, "learning_rate": 2.8636085695842063e-05, "loss": 0.4439, "step": 1816 }, { "epoch": 0.2232460990293648, "grad_norm": 0.3877240807816728, "learning_rate": 2.863340408497256e-05, "loss": 0.423, "step": 1817 }, { "epoch": 0.2233689642462219, "grad_norm": 0.3482295038349041, "learning_rate": 2.8630719966326793e-05, "loss": 0.4823, "step": 1818 }, { "epoch": 0.223491829463079, "grad_norm": 0.3583406319403993, "learning_rate": 2.8628033340398484e-05, "loss": 0.3865, "step": 1819 }, { "epoch": 0.2236146946799361, "grad_norm": 0.4515860708830148, "learning_rate": 2.8625344207681822e-05, "loss": 0.4236, "step": 1820 }, { "epoch": 0.22373755989679323, "grad_norm": 0.5779165906699721, "learning_rate": 2.8622652568671458e-05, "loss": 0.4536, "step": 1821 }, { "epoch": 0.2238604251136503, "grad_norm": 0.36621512809655005, "learning_rate": 2.86199584238625e-05, "loss": 0.3826, "step": 1822 }, { "epoch": 0.22398329033050743, "grad_norm": 0.3890125283708797, "learning_rate": 2.861726177375052e-05, "loss": 0.479, "step": 1823 }, { "epoch": 0.22410615554736454, "grad_norm": 0.4381109419776825, "learning_rate": 2.8614562618831543e-05, "loss": 0.3834, "step": 1824 }, { "epoch": 0.22422902076422166, "grad_norm": 0.382893900776636, "learning_rate": 2.861186095960207e-05, "loss": 0.3727, "step": 1825 }, { "epoch": 0.22435188598107875, "grad_norm": 0.34660175855161557, "learning_rate": 2.860915679655905e-05, "loss": 0.33, "step": 1826 }, { "epoch": 0.22447475119793586, "grad_norm": 0.3696478946251683, "learning_rate": 2.8606450130199895e-05, "loss": 0.4261, "step": 1827 }, { "epoch": 0.22459761641479298, "grad_norm": 0.3268781473325427, "learning_rate": 2.8603740961022475e-05, "loss": 0.4032, "step": 1828 }, { "epoch": 0.2247204816316501, "grad_norm": 0.42363021865870387, "learning_rate": 2.8601029289525133e-05, "loss": 0.3954, "step": 1829 }, { "epoch": 0.22484334684850718, "grad_norm": 0.4360866358101477, "learning_rate": 2.8598315116206657e-05, "loss": 0.4867, "step": 1830 }, { "epoch": 0.2249662120653643, "grad_norm": 0.4048795860306504, "learning_rate": 2.8595598441566304e-05, "loss": 0.4043, "step": 1831 }, { "epoch": 0.2250890772822214, "grad_norm": 0.327608567946627, "learning_rate": 2.859287926610379e-05, "loss": 0.3693, "step": 1832 }, { "epoch": 0.2252119424990785, "grad_norm": 0.4558180521605417, "learning_rate": 2.859015759031929e-05, "loss": 0.3925, "step": 1833 }, { "epoch": 0.22533480771593561, "grad_norm": 0.430297503659917, "learning_rate": 2.8587433414713433e-05, "loss": 0.3849, "step": 1834 }, { "epoch": 0.22545767293279273, "grad_norm": 0.34474806669284774, "learning_rate": 2.8584706739787315e-05, "loss": 0.4591, "step": 1835 }, { "epoch": 0.22558053814964985, "grad_norm": 0.41101720225594573, "learning_rate": 2.8581977566042495e-05, "loss": 0.365, "step": 1836 }, { "epoch": 0.22570340336650693, "grad_norm": 0.39029431169437717, "learning_rate": 2.8579245893980984e-05, "loss": 0.4056, "step": 1837 }, { "epoch": 0.22582626858336405, "grad_norm": 0.3281082919773035, "learning_rate": 2.8576511724105255e-05, "loss": 0.4282, "step": 1838 }, { "epoch": 0.22594913380022116, "grad_norm": 0.3954517320388179, "learning_rate": 2.857377505691824e-05, "loss": 0.4079, "step": 1839 }, { "epoch": 0.22607199901707828, "grad_norm": 0.4334648990600565, "learning_rate": 2.8571035892923333e-05, "loss": 0.3952, "step": 1840 }, { "epoch": 0.22619486423393537, "grad_norm": 0.38449294786893196, "learning_rate": 2.8568294232624384e-05, "loss": 0.3644, "step": 1841 }, { "epoch": 0.22631772945079248, "grad_norm": 0.3353715400894475, "learning_rate": 2.856555007652571e-05, "loss": 0.4258, "step": 1842 }, { "epoch": 0.2264405946676496, "grad_norm": 0.4079011828205644, "learning_rate": 2.856280342513207e-05, "loss": 0.4162, "step": 1843 }, { "epoch": 0.22656345988450668, "grad_norm": 0.5285351700893794, "learning_rate": 2.8560054278948694e-05, "loss": 0.4865, "step": 1844 }, { "epoch": 0.2266863251013638, "grad_norm": 0.40030591422477835, "learning_rate": 2.855730263848128e-05, "loss": 0.4443, "step": 1845 }, { "epoch": 0.22680919031822092, "grad_norm": 0.4150983763306605, "learning_rate": 2.8554548504235963e-05, "loss": 0.4143, "step": 1846 }, { "epoch": 0.22693205553507803, "grad_norm": 0.39431345134763374, "learning_rate": 2.8551791876719357e-05, "loss": 0.3883, "step": 1847 }, { "epoch": 0.22705492075193512, "grad_norm": 0.38569015129519213, "learning_rate": 2.8549032756438523e-05, "loss": 0.4797, "step": 1848 }, { "epoch": 0.22717778596879223, "grad_norm": 0.31942839102528114, "learning_rate": 2.8546271143900976e-05, "loss": 0.3702, "step": 1849 }, { "epoch": 0.22730065118564935, "grad_norm": 0.3711880680257084, "learning_rate": 2.8543507039614706e-05, "loss": 0.3932, "step": 1850 }, { "epoch": 0.22742351640250644, "grad_norm": 0.3011687429493588, "learning_rate": 2.8540740444088148e-05, "loss": 0.3765, "step": 1851 }, { "epoch": 0.22754638161936355, "grad_norm": 0.5213861107079182, "learning_rate": 2.8537971357830198e-05, "loss": 0.4393, "step": 1852 }, { "epoch": 0.22766924683622067, "grad_norm": 0.4141165309771986, "learning_rate": 2.853519978135022e-05, "loss": 0.5172, "step": 1853 }, { "epoch": 0.22779211205307778, "grad_norm": 0.4132968201353999, "learning_rate": 2.8532425715158018e-05, "loss": 0.4231, "step": 1854 }, { "epoch": 0.22791497726993487, "grad_norm": 0.343876646070149, "learning_rate": 2.8529649159763868e-05, "loss": 0.361, "step": 1855 }, { "epoch": 0.228037842486792, "grad_norm": 0.39769284346579775, "learning_rate": 2.852687011567849e-05, "loss": 0.3827, "step": 1856 }, { "epoch": 0.2281607077036491, "grad_norm": 0.4353739297356592, "learning_rate": 2.852408858341309e-05, "loss": 0.5002, "step": 1857 }, { "epoch": 0.22828357292050622, "grad_norm": 0.34366480253855664, "learning_rate": 2.8521304563479295e-05, "loss": 0.422, "step": 1858 }, { "epoch": 0.2284064381373633, "grad_norm": 0.32230672982825675, "learning_rate": 2.8518518056389217e-05, "loss": 0.325, "step": 1859 }, { "epoch": 0.22852930335422042, "grad_norm": 0.4090324512767959, "learning_rate": 2.851572906265541e-05, "loss": 0.4627, "step": 1860 }, { "epoch": 0.22865216857107754, "grad_norm": 0.3413969153136419, "learning_rate": 2.8512937582790896e-05, "loss": 0.428, "step": 1861 }, { "epoch": 0.22877503378793462, "grad_norm": 0.39698215613979687, "learning_rate": 2.851014361730915e-05, "loss": 0.3736, "step": 1862 }, { "epoch": 0.22889789900479174, "grad_norm": 0.3629071264620721, "learning_rate": 2.85073471667241e-05, "loss": 0.449, "step": 1863 }, { "epoch": 0.22902076422164885, "grad_norm": 0.62324599235748, "learning_rate": 2.8504548231550143e-05, "loss": 0.5497, "step": 1864 }, { "epoch": 0.22914362943850597, "grad_norm": 0.34143897491368613, "learning_rate": 2.850174681230211e-05, "loss": 0.3985, "step": 1865 }, { "epoch": 0.22926649465536306, "grad_norm": 0.40034423108472567, "learning_rate": 2.8498942909495316e-05, "loss": 0.361, "step": 1866 }, { "epoch": 0.22938935987222017, "grad_norm": 0.7536725099010916, "learning_rate": 2.849613652364552e-05, "loss": 0.5814, "step": 1867 }, { "epoch": 0.2295122250890773, "grad_norm": 0.327710757000283, "learning_rate": 2.8493327655268934e-05, "loss": 0.4039, "step": 1868 }, { "epoch": 0.2296350903059344, "grad_norm": 0.4043593898542583, "learning_rate": 2.8490516304882233e-05, "loss": 0.5078, "step": 1869 }, { "epoch": 0.2297579555227915, "grad_norm": 0.3850400619792719, "learning_rate": 2.8487702473002548e-05, "loss": 0.4624, "step": 1870 }, { "epoch": 0.2298808207396486, "grad_norm": 0.7249880024359941, "learning_rate": 2.8484886160147463e-05, "loss": 0.4291, "step": 1871 }, { "epoch": 0.23000368595650572, "grad_norm": 0.3476798090461196, "learning_rate": 2.8482067366835017e-05, "loss": 0.4093, "step": 1872 }, { "epoch": 0.2301265511733628, "grad_norm": 0.44700680933979103, "learning_rate": 2.847924609358372e-05, "loss": 0.3633, "step": 1873 }, { "epoch": 0.23024941639021992, "grad_norm": 0.3250764572834534, "learning_rate": 2.8476422340912517e-05, "loss": 0.3518, "step": 1874 }, { "epoch": 0.23037228160707704, "grad_norm": 0.48818035540609245, "learning_rate": 2.8473596109340824e-05, "loss": 0.4898, "step": 1875 }, { "epoch": 0.23049514682393415, "grad_norm": 0.35528256116640405, "learning_rate": 2.8470767399388505e-05, "loss": 0.4411, "step": 1876 }, { "epoch": 0.23061801204079124, "grad_norm": 0.5249000497630724, "learning_rate": 2.846793621157588e-05, "loss": 0.5359, "step": 1877 }, { "epoch": 0.23074087725764836, "grad_norm": 0.6588381476897017, "learning_rate": 2.8465102546423734e-05, "loss": 0.4616, "step": 1878 }, { "epoch": 0.23086374247450547, "grad_norm": 0.3983287566268464, "learning_rate": 2.84622664044533e-05, "loss": 0.392, "step": 1879 }, { "epoch": 0.2309866076913626, "grad_norm": 0.34226317601844697, "learning_rate": 2.845942778618627e-05, "loss": 0.3016, "step": 1880 }, { "epoch": 0.23110947290821968, "grad_norm": 0.34337318667456374, "learning_rate": 2.8456586692144783e-05, "loss": 0.3731, "step": 1881 }, { "epoch": 0.2312323381250768, "grad_norm": 0.3963969477444451, "learning_rate": 2.845374312285144e-05, "loss": 0.4628, "step": 1882 }, { "epoch": 0.2313552033419339, "grad_norm": 0.42710063194729514, "learning_rate": 2.8450897078829305e-05, "loss": 0.4299, "step": 1883 }, { "epoch": 0.231478068558791, "grad_norm": 0.3838406927168319, "learning_rate": 2.8448048560601882e-05, "loss": 0.4091, "step": 1884 }, { "epoch": 0.2316009337756481, "grad_norm": 0.2857741083796205, "learning_rate": 2.844519756869314e-05, "loss": 0.375, "step": 1885 }, { "epoch": 0.23172379899250523, "grad_norm": 0.31452489126354294, "learning_rate": 2.8442344103627502e-05, "loss": 0.4032, "step": 1886 }, { "epoch": 0.23184666420936234, "grad_norm": 0.33191805925428325, "learning_rate": 2.843948816592984e-05, "loss": 0.3653, "step": 1887 }, { "epoch": 0.23196952942621943, "grad_norm": 0.45648672384089545, "learning_rate": 2.8436629756125486e-05, "loss": 0.4014, "step": 1888 }, { "epoch": 0.23209239464307654, "grad_norm": 0.30507726009256364, "learning_rate": 2.8433768874740236e-05, "loss": 0.3907, "step": 1889 }, { "epoch": 0.23221525985993366, "grad_norm": 0.39336472342193674, "learning_rate": 2.843090552230032e-05, "loss": 0.3901, "step": 1890 }, { "epoch": 0.23233812507679075, "grad_norm": 0.3784425819592001, "learning_rate": 2.8428039699332427e-05, "loss": 0.3863, "step": 1891 }, { "epoch": 0.23246099029364786, "grad_norm": 0.3168097489928488, "learning_rate": 2.8425171406363722e-05, "loss": 0.3297, "step": 1892 }, { "epoch": 0.23258385551050498, "grad_norm": 0.49638720765714567, "learning_rate": 2.8422300643921806e-05, "loss": 0.437, "step": 1893 }, { "epoch": 0.2327067207273621, "grad_norm": 0.3948244878354161, "learning_rate": 2.8419427412534727e-05, "loss": 0.4077, "step": 1894 }, { "epoch": 0.23282958594421918, "grad_norm": 0.3305109779619282, "learning_rate": 2.841655171273101e-05, "loss": 0.4235, "step": 1895 }, { "epoch": 0.2329524511610763, "grad_norm": 0.3578113370640016, "learning_rate": 2.8413673545039608e-05, "loss": 0.3704, "step": 1896 }, { "epoch": 0.2330753163779334, "grad_norm": 0.3831208686006629, "learning_rate": 2.841079290998995e-05, "loss": 0.4626, "step": 1897 }, { "epoch": 0.23319818159479053, "grad_norm": 0.3153115064261642, "learning_rate": 2.840790980811191e-05, "loss": 0.406, "step": 1898 }, { "epoch": 0.23332104681164761, "grad_norm": 0.3625345382393138, "learning_rate": 2.8405024239935813e-05, "loss": 0.4508, "step": 1899 }, { "epoch": 0.23344391202850473, "grad_norm": 0.411180847103771, "learning_rate": 2.840213620599244e-05, "loss": 0.3689, "step": 1900 }, { "epoch": 0.23356677724536185, "grad_norm": 0.3308469924171937, "learning_rate": 2.839924570681303e-05, "loss": 0.3523, "step": 1901 }, { "epoch": 0.23368964246221893, "grad_norm": 0.3861761317834472, "learning_rate": 2.839635274292927e-05, "loss": 0.4287, "step": 1902 }, { "epoch": 0.23381250767907605, "grad_norm": 0.35404494113913454, "learning_rate": 2.83934573148733e-05, "loss": 0.4023, "step": 1903 }, { "epoch": 0.23393537289593316, "grad_norm": 0.3460560534937293, "learning_rate": 2.8390559423177718e-05, "loss": 0.3888, "step": 1904 }, { "epoch": 0.23405823811279028, "grad_norm": 0.3661610642556469, "learning_rate": 2.8387659068375566e-05, "loss": 0.4382, "step": 1905 }, { "epoch": 0.23418110332964737, "grad_norm": 0.363719469870802, "learning_rate": 2.8384756251000354e-05, "loss": 0.4076, "step": 1906 }, { "epoch": 0.23430396854650448, "grad_norm": 0.42174041473437074, "learning_rate": 2.8381850971586035e-05, "loss": 0.4044, "step": 1907 }, { "epoch": 0.2344268337633616, "grad_norm": 0.35446934137995123, "learning_rate": 2.8378943230667012e-05, "loss": 0.3986, "step": 1908 }, { "epoch": 0.2345496989802187, "grad_norm": 0.3642202313157208, "learning_rate": 2.8376033028778145e-05, "loss": 0.4351, "step": 1909 }, { "epoch": 0.2346725641970758, "grad_norm": 0.3667784651932486, "learning_rate": 2.8373120366454753e-05, "loss": 0.387, "step": 1910 }, { "epoch": 0.23479542941393292, "grad_norm": 0.31621875567963503, "learning_rate": 2.8370205244232598e-05, "loss": 0.3494, "step": 1911 }, { "epoch": 0.23491829463079003, "grad_norm": 0.31975725723426207, "learning_rate": 2.8367287662647894e-05, "loss": 0.3785, "step": 1912 }, { "epoch": 0.23504115984764712, "grad_norm": 0.34668228410610924, "learning_rate": 2.8364367622237314e-05, "loss": 0.4325, "step": 1913 }, { "epoch": 0.23516402506450423, "grad_norm": 0.3869072483683326, "learning_rate": 2.8361445123537982e-05, "loss": 0.3221, "step": 1914 }, { "epoch": 0.23528689028136135, "grad_norm": 0.3878972335486069, "learning_rate": 2.8358520167087472e-05, "loss": 0.3937, "step": 1915 }, { "epoch": 0.23540975549821846, "grad_norm": 0.4316205508154163, "learning_rate": 2.835559275342381e-05, "loss": 0.4195, "step": 1916 }, { "epoch": 0.23553262071507555, "grad_norm": 0.3594424685316158, "learning_rate": 2.8352662883085475e-05, "loss": 0.4424, "step": 1917 }, { "epoch": 0.23565548593193267, "grad_norm": 0.3725580224039656, "learning_rate": 2.8349730556611394e-05, "loss": 0.405, "step": 1918 }, { "epoch": 0.23577835114878978, "grad_norm": 0.3967993976659829, "learning_rate": 2.8346795774540958e-05, "loss": 0.3974, "step": 1919 }, { "epoch": 0.2359012163656469, "grad_norm": 0.35118709486881833, "learning_rate": 2.8343858537414e-05, "loss": 0.3677, "step": 1920 }, { "epoch": 0.236024081582504, "grad_norm": 0.3692215276457503, "learning_rate": 2.8340918845770795e-05, "loss": 0.4879, "step": 1921 }, { "epoch": 0.2361469467993611, "grad_norm": 0.3294298567701966, "learning_rate": 2.8337976700152087e-05, "loss": 0.3871, "step": 1922 }, { "epoch": 0.23626981201621822, "grad_norm": 0.3852770052949507, "learning_rate": 2.833503210109907e-05, "loss": 0.3793, "step": 1923 }, { "epoch": 0.2363926772330753, "grad_norm": 0.3138157555098737, "learning_rate": 2.8332085049153374e-05, "loss": 0.4015, "step": 1924 }, { "epoch": 0.23651554244993242, "grad_norm": 0.3536754035830522, "learning_rate": 2.8329135544857096e-05, "loss": 0.3836, "step": 1925 }, { "epoch": 0.23663840766678954, "grad_norm": 0.3249626705853453, "learning_rate": 2.8326183588752778e-05, "loss": 0.3478, "step": 1926 }, { "epoch": 0.23676127288364665, "grad_norm": 0.4058707913345836, "learning_rate": 2.832322918138341e-05, "loss": 0.3658, "step": 1927 }, { "epoch": 0.23688413810050374, "grad_norm": 0.44780099327312284, "learning_rate": 2.8320272323292443e-05, "loss": 0.4063, "step": 1928 }, { "epoch": 0.23700700331736085, "grad_norm": 0.3642924983225071, "learning_rate": 2.8317313015023762e-05, "loss": 0.3967, "step": 1929 }, { "epoch": 0.23712986853421797, "grad_norm": 0.4516800065454726, "learning_rate": 2.8314351257121724e-05, "loss": 0.4397, "step": 1930 }, { "epoch": 0.23725273375107508, "grad_norm": 0.4306496485205513, "learning_rate": 2.8311387050131112e-05, "loss": 0.4199, "step": 1931 }, { "epoch": 0.23737559896793217, "grad_norm": 0.3695458619380457, "learning_rate": 2.830842039459718e-05, "loss": 0.4743, "step": 1932 }, { "epoch": 0.2374984641847893, "grad_norm": 0.41215293153479704, "learning_rate": 2.830545129106563e-05, "loss": 0.4023, "step": 1933 }, { "epoch": 0.2376213294016464, "grad_norm": 0.3455237390095082, "learning_rate": 2.83024797400826e-05, "loss": 0.3323, "step": 1934 }, { "epoch": 0.2377441946185035, "grad_norm": 0.4713141920003127, "learning_rate": 2.8299505742194693e-05, "loss": 0.3736, "step": 1935 }, { "epoch": 0.2378670598353606, "grad_norm": 0.4029365396833897, "learning_rate": 2.829652929794895e-05, "loss": 0.4452, "step": 1936 }, { "epoch": 0.23798992505221772, "grad_norm": 0.4090193849860802, "learning_rate": 2.829355040789288e-05, "loss": 0.4695, "step": 1937 }, { "epoch": 0.23811279026907484, "grad_norm": 0.37364318863683993, "learning_rate": 2.829056907257442e-05, "loss": 0.4902, "step": 1938 }, { "epoch": 0.23823565548593192, "grad_norm": 0.3080300700778998, "learning_rate": 2.8287585292541977e-05, "loss": 0.3739, "step": 1939 }, { "epoch": 0.23835852070278904, "grad_norm": 0.366957076175219, "learning_rate": 2.828459906834439e-05, "loss": 0.3658, "step": 1940 }, { "epoch": 0.23848138591964615, "grad_norm": 0.4090736367769831, "learning_rate": 2.828161040053096e-05, "loss": 0.4377, "step": 1941 }, { "epoch": 0.23860425113650324, "grad_norm": 0.38330916400718346, "learning_rate": 2.8278619289651423e-05, "loss": 0.4309, "step": 1942 }, { "epoch": 0.23872711635336036, "grad_norm": 0.3685538250750066, "learning_rate": 2.827562573625599e-05, "loss": 0.4302, "step": 1943 }, { "epoch": 0.23884998157021747, "grad_norm": 0.465774810790295, "learning_rate": 2.8272629740895294e-05, "loss": 0.4753, "step": 1944 }, { "epoch": 0.2389728467870746, "grad_norm": 0.40918425943710796, "learning_rate": 2.8269631304120433e-05, "loss": 0.4503, "step": 1945 }, { "epoch": 0.23909571200393168, "grad_norm": 0.319411173767567, "learning_rate": 2.8266630426482956e-05, "loss": 0.402, "step": 1946 }, { "epoch": 0.2392185772207888, "grad_norm": 0.5223371320122355, "learning_rate": 2.8263627108534843e-05, "loss": 0.3941, "step": 1947 }, { "epoch": 0.2393414424376459, "grad_norm": 0.3753508487222715, "learning_rate": 2.826062135082854e-05, "loss": 0.4174, "step": 1948 }, { "epoch": 0.23946430765450302, "grad_norm": 0.3930010518440563, "learning_rate": 2.8257613153916946e-05, "loss": 0.3877, "step": 1949 }, { "epoch": 0.2395871728713601, "grad_norm": 0.33399347617824243, "learning_rate": 2.8254602518353384e-05, "loss": 0.402, "step": 1950 }, { "epoch": 0.23971003808821723, "grad_norm": 0.29339865715592917, "learning_rate": 2.825158944469165e-05, "loss": 0.4139, "step": 1951 }, { "epoch": 0.23983290330507434, "grad_norm": 0.4134549199036834, "learning_rate": 2.8248573933485977e-05, "loss": 0.3771, "step": 1952 }, { "epoch": 0.23995576852193143, "grad_norm": 0.4193329046308112, "learning_rate": 2.824555598529105e-05, "loss": 0.3703, "step": 1953 }, { "epoch": 0.24007863373878854, "grad_norm": 0.39140819464884363, "learning_rate": 2.8242535600662e-05, "loss": 0.3436, "step": 1954 }, { "epoch": 0.24020149895564566, "grad_norm": 0.32488578146416436, "learning_rate": 2.8239512780154406e-05, "loss": 0.4182, "step": 1955 }, { "epoch": 0.24032436417250277, "grad_norm": 0.3464847648738566, "learning_rate": 2.8236487524324298e-05, "loss": 0.3916, "step": 1956 }, { "epoch": 0.24044722938935986, "grad_norm": 0.43633055284242867, "learning_rate": 2.823345983372815e-05, "loss": 0.496, "step": 1957 }, { "epoch": 0.24057009460621698, "grad_norm": 0.3765626912214035, "learning_rate": 2.8230429708922886e-05, "loss": 0.4399, "step": 1958 }, { "epoch": 0.2406929598230741, "grad_norm": 0.44363918640188915, "learning_rate": 2.8227397150465884e-05, "loss": 0.4225, "step": 1959 }, { "epoch": 0.2408158250399312, "grad_norm": 0.42093807519311205, "learning_rate": 2.8224362158914958e-05, "loss": 0.473, "step": 1960 }, { "epoch": 0.2409386902567883, "grad_norm": 0.30779972906446934, "learning_rate": 2.8221324734828376e-05, "loss": 0.4544, "step": 1961 }, { "epoch": 0.2410615554736454, "grad_norm": 0.39107277153900927, "learning_rate": 2.8218284878764848e-05, "loss": 0.4184, "step": 1962 }, { "epoch": 0.24118442069050253, "grad_norm": 0.3723441934811606, "learning_rate": 2.8215242591283545e-05, "loss": 0.4008, "step": 1963 }, { "epoch": 0.24130728590735961, "grad_norm": 0.33691493973551934, "learning_rate": 2.8212197872944067e-05, "loss": 0.4504, "step": 1964 }, { "epoch": 0.24143015112421673, "grad_norm": 0.4511876356577434, "learning_rate": 2.820915072430648e-05, "loss": 0.4585, "step": 1965 }, { "epoch": 0.24155301634107385, "grad_norm": 0.4735038682257266, "learning_rate": 2.8206101145931275e-05, "loss": 0.3711, "step": 1966 }, { "epoch": 0.24167588155793096, "grad_norm": 0.37874357389303864, "learning_rate": 2.8203049138379415e-05, "loss": 0.327, "step": 1967 }, { "epoch": 0.24179874677478805, "grad_norm": 0.36943307685407567, "learning_rate": 2.8199994702212287e-05, "loss": 0.4797, "step": 1968 }, { "epoch": 0.24192161199164516, "grad_norm": 0.35885889389984654, "learning_rate": 2.819693783799174e-05, "loss": 0.3984, "step": 1969 }, { "epoch": 0.24204447720850228, "grad_norm": 0.7475509819132273, "learning_rate": 2.8193878546280067e-05, "loss": 0.5438, "step": 1970 }, { "epoch": 0.2421673424253594, "grad_norm": 0.4227379543537814, "learning_rate": 2.8190816827639994e-05, "loss": 0.4419, "step": 1971 }, { "epoch": 0.24229020764221648, "grad_norm": 0.38142635470864034, "learning_rate": 2.8187752682634715e-05, "loss": 0.4304, "step": 1972 }, { "epoch": 0.2424130728590736, "grad_norm": 0.3815298423259299, "learning_rate": 2.818468611182785e-05, "loss": 0.4242, "step": 1973 }, { "epoch": 0.2425359380759307, "grad_norm": 0.30268032014075225, "learning_rate": 2.8181617115783483e-05, "loss": 0.3588, "step": 1974 }, { "epoch": 0.2426588032927878, "grad_norm": 0.36968444144365104, "learning_rate": 2.8178545695066137e-05, "loss": 0.4192, "step": 1975 }, { "epoch": 0.24278166850964492, "grad_norm": 0.48299018413099326, "learning_rate": 2.817547185024077e-05, "loss": 0.3811, "step": 1976 }, { "epoch": 0.24290453372650203, "grad_norm": 0.39734517362834, "learning_rate": 2.8172395581872808e-05, "loss": 0.3919, "step": 1977 }, { "epoch": 0.24302739894335915, "grad_norm": 0.43582347732207904, "learning_rate": 2.81693168905281e-05, "loss": 0.5636, "step": 1978 }, { "epoch": 0.24315026416021623, "grad_norm": 0.3477815227418544, "learning_rate": 2.816623577677296e-05, "loss": 0.3948, "step": 1979 }, { "epoch": 0.24327312937707335, "grad_norm": 0.553215158791964, "learning_rate": 2.8163152241174133e-05, "loss": 0.4591, "step": 1980 }, { "epoch": 0.24339599459393046, "grad_norm": 0.3037881361937377, "learning_rate": 2.816006628429882e-05, "loss": 0.3461, "step": 1981 }, { "epoch": 0.24351885981078755, "grad_norm": 0.4832276623177763, "learning_rate": 2.8156977906714657e-05, "loss": 0.3761, "step": 1982 }, { "epoch": 0.24364172502764467, "grad_norm": 0.3596333211954867, "learning_rate": 2.8153887108989734e-05, "loss": 0.4306, "step": 1983 }, { "epoch": 0.24376459024450178, "grad_norm": 0.34509692419554683, "learning_rate": 2.8150793891692582e-05, "loss": 0.3728, "step": 1984 }, { "epoch": 0.2438874554613589, "grad_norm": 0.33730121183248063, "learning_rate": 2.8147698255392183e-05, "loss": 0.3916, "step": 1985 }, { "epoch": 0.244010320678216, "grad_norm": 0.3522269280785066, "learning_rate": 2.8144600200657953e-05, "loss": 0.3814, "step": 1986 }, { "epoch": 0.2441331858950731, "grad_norm": 0.47929249600385926, "learning_rate": 2.8141499728059765e-05, "loss": 0.4404, "step": 1987 }, { "epoch": 0.24425605111193022, "grad_norm": 0.3010727373623807, "learning_rate": 2.8138396838167925e-05, "loss": 0.4334, "step": 1988 }, { "epoch": 0.24437891632878733, "grad_norm": 0.4482766304666789, "learning_rate": 2.8135291531553192e-05, "loss": 0.4257, "step": 1989 }, { "epoch": 0.24450178154564442, "grad_norm": 0.35266891871378375, "learning_rate": 2.8132183808786772e-05, "loss": 0.3252, "step": 1990 }, { "epoch": 0.24462464676250154, "grad_norm": 0.47070200647316524, "learning_rate": 2.8129073670440297e-05, "loss": 0.5016, "step": 1991 }, { "epoch": 0.24474751197935865, "grad_norm": 0.35121998908204477, "learning_rate": 2.812596111708587e-05, "loss": 0.4492, "step": 1992 }, { "epoch": 0.24487037719621574, "grad_norm": 0.39303020023210433, "learning_rate": 2.8122846149296025e-05, "loss": 0.3462, "step": 1993 }, { "epoch": 0.24499324241307285, "grad_norm": 0.3695084531540039, "learning_rate": 2.8119728767643725e-05, "loss": 0.3641, "step": 1994 }, { "epoch": 0.24511610762992997, "grad_norm": 0.2877183895312709, "learning_rate": 2.8116608972702414e-05, "loss": 0.4288, "step": 1995 }, { "epoch": 0.24523897284678708, "grad_norm": 0.35600654504591395, "learning_rate": 2.811348676504594e-05, "loss": 0.3744, "step": 1996 }, { "epoch": 0.24536183806364417, "grad_norm": 0.3788704416034678, "learning_rate": 2.8110362145248617e-05, "loss": 0.4108, "step": 1997 }, { "epoch": 0.2454847032805013, "grad_norm": 0.3171104628705505, "learning_rate": 2.8107235113885206e-05, "loss": 0.4519, "step": 1998 }, { "epoch": 0.2456075684973584, "grad_norm": 0.34506539121000795, "learning_rate": 2.81041056715309e-05, "loss": 0.4666, "step": 1999 }, { "epoch": 0.24573043371421552, "grad_norm": 0.35666038793726434, "learning_rate": 2.8100973818761332e-05, "loss": 0.4122, "step": 2000 }, { "epoch": 0.2458532989310726, "grad_norm": 0.33736882432206305, "learning_rate": 2.80978395561526e-05, "loss": 0.3401, "step": 2001 }, { "epoch": 0.24597616414792972, "grad_norm": 0.36109710961334646, "learning_rate": 2.8094702884281224e-05, "loss": 0.4371, "step": 2002 }, { "epoch": 0.24609902936478684, "grad_norm": 0.46246194417894987, "learning_rate": 2.8091563803724172e-05, "loss": 0.5119, "step": 2003 }, { "epoch": 0.24622189458164392, "grad_norm": 0.4188418973957365, "learning_rate": 2.808842231505886e-05, "loss": 0.4489, "step": 2004 }, { "epoch": 0.24634475979850104, "grad_norm": 0.35923096915928926, "learning_rate": 2.8085278418863142e-05, "loss": 0.396, "step": 2005 }, { "epoch": 0.24646762501535815, "grad_norm": 0.39471629455579704, "learning_rate": 2.8082132115715323e-05, "loss": 0.3767, "step": 2006 }, { "epoch": 0.24659049023221527, "grad_norm": 0.34380349223212825, "learning_rate": 2.8078983406194142e-05, "loss": 0.4158, "step": 2007 }, { "epoch": 0.24671335544907236, "grad_norm": 0.36697291273426513, "learning_rate": 2.8075832290878782e-05, "loss": 0.4299, "step": 2008 }, { "epoch": 0.24683622066592947, "grad_norm": 0.3768784267291757, "learning_rate": 2.8072678770348876e-05, "loss": 0.4201, "step": 2009 }, { "epoch": 0.2469590858827866, "grad_norm": 0.3956000893444817, "learning_rate": 2.8069522845184484e-05, "loss": 0.4573, "step": 2010 }, { "epoch": 0.2470819510996437, "grad_norm": 0.47563527512935694, "learning_rate": 2.8066364515966126e-05, "loss": 0.3569, "step": 2011 }, { "epoch": 0.2472048163165008, "grad_norm": 0.398498288895717, "learning_rate": 2.8063203783274755e-05, "loss": 0.4136, "step": 2012 }, { "epoch": 0.2473276815333579, "grad_norm": 0.33940934354472657, "learning_rate": 2.8060040647691765e-05, "loss": 0.4597, "step": 2013 }, { "epoch": 0.24745054675021502, "grad_norm": 0.4005334013012599, "learning_rate": 2.8056875109798994e-05, "loss": 0.4086, "step": 2014 }, { "epoch": 0.2475734119670721, "grad_norm": 0.3583192102034911, "learning_rate": 2.8053707170178726e-05, "loss": 0.3641, "step": 2015 }, { "epoch": 0.24769627718392923, "grad_norm": 0.49267376824574305, "learning_rate": 2.8050536829413676e-05, "loss": 0.4241, "step": 2016 }, { "epoch": 0.24781914240078634, "grad_norm": 0.39641141271463315, "learning_rate": 2.8047364088087015e-05, "loss": 0.405, "step": 2017 }, { "epoch": 0.24794200761764346, "grad_norm": 0.34577684647545737, "learning_rate": 2.8044188946782344e-05, "loss": 0.4154, "step": 2018 }, { "epoch": 0.24806487283450054, "grad_norm": 0.31733225510289936, "learning_rate": 2.804101140608371e-05, "loss": 0.4228, "step": 2019 }, { "epoch": 0.24818773805135766, "grad_norm": 0.3538863114586506, "learning_rate": 2.8037831466575603e-05, "loss": 0.3841, "step": 2020 }, { "epoch": 0.24831060326821477, "grad_norm": 0.3662888622349609, "learning_rate": 2.8034649128842952e-05, "loss": 0.3784, "step": 2021 }, { "epoch": 0.2484334684850719, "grad_norm": 0.4595427962442957, "learning_rate": 2.8031464393471126e-05, "loss": 0.4955, "step": 2022 }, { "epoch": 0.24855633370192898, "grad_norm": 0.3475869098680028, "learning_rate": 2.8028277261045933e-05, "loss": 0.3518, "step": 2023 }, { "epoch": 0.2486791989187861, "grad_norm": 0.3270305517115987, "learning_rate": 2.8025087732153634e-05, "loss": 0.3672, "step": 2024 }, { "epoch": 0.2488020641356432, "grad_norm": 0.48769705422998705, "learning_rate": 2.802189580738092e-05, "loss": 0.3927, "step": 2025 }, { "epoch": 0.2489249293525003, "grad_norm": 0.40760106281643715, "learning_rate": 2.8018701487314917e-05, "loss": 0.4498, "step": 2026 }, { "epoch": 0.2490477945693574, "grad_norm": 0.29495287178194296, "learning_rate": 2.8015504772543204e-05, "loss": 0.3487, "step": 2027 }, { "epoch": 0.24917065978621453, "grad_norm": 0.42774078817049666, "learning_rate": 2.8012305663653797e-05, "loss": 0.4306, "step": 2028 }, { "epoch": 0.24929352500307164, "grad_norm": 0.3032759116432842, "learning_rate": 2.800910416123515e-05, "loss": 0.4177, "step": 2029 }, { "epoch": 0.24941639021992873, "grad_norm": 0.34906916190722326, "learning_rate": 2.8005900265876167e-05, "loss": 0.4026, "step": 2030 }, { "epoch": 0.24953925543678585, "grad_norm": 0.3946976276342732, "learning_rate": 2.8002693978166166e-05, "loss": 0.4095, "step": 2031 }, { "epoch": 0.24966212065364296, "grad_norm": 0.4283483575197759, "learning_rate": 2.799948529869494e-05, "loss": 0.3881, "step": 2032 }, { "epoch": 0.24978498587050005, "grad_norm": 0.4386541937001283, "learning_rate": 2.7996274228052698e-05, "loss": 0.4013, "step": 2033 }, { "epoch": 0.24990785108735716, "grad_norm": 0.5116872347684306, "learning_rate": 2.7993060766830093e-05, "loss": 0.4443, "step": 2034 }, { "epoch": 0.25003071630421425, "grad_norm": 0.32912399567688727, "learning_rate": 2.7989844915618226e-05, "loss": 0.3765, "step": 2035 }, { "epoch": 0.25015358152107137, "grad_norm": 0.30670161096142357, "learning_rate": 2.7986626675008625e-05, "loss": 0.3406, "step": 2036 }, { "epoch": 0.2502764467379285, "grad_norm": 0.3649460076297443, "learning_rate": 2.7983406045593273e-05, "loss": 0.3753, "step": 2037 }, { "epoch": 0.2503993119547856, "grad_norm": 0.31056563470555304, "learning_rate": 2.7980183027964573e-05, "loss": 0.4016, "step": 2038 }, { "epoch": 0.2505221771716427, "grad_norm": 0.2899220745246993, "learning_rate": 2.797695762271539e-05, "loss": 0.3601, "step": 2039 }, { "epoch": 0.25064504238849983, "grad_norm": 0.3581039972036827, "learning_rate": 2.7973729830439008e-05, "loss": 0.4005, "step": 2040 }, { "epoch": 0.25076790760535694, "grad_norm": 0.3610836302582152, "learning_rate": 2.797049965172916e-05, "loss": 0.4229, "step": 2041 }, { "epoch": 0.25089077282221406, "grad_norm": 0.36570049582225816, "learning_rate": 2.7967267087180018e-05, "loss": 0.4216, "step": 2042 }, { "epoch": 0.2510136380390711, "grad_norm": 0.32524148559491567, "learning_rate": 2.7964032137386192e-05, "loss": 0.3978, "step": 2043 }, { "epoch": 0.25113650325592823, "grad_norm": 0.35327135884439287, "learning_rate": 2.796079480294273e-05, "loss": 0.4367, "step": 2044 }, { "epoch": 0.25125936847278535, "grad_norm": 0.3579090547321058, "learning_rate": 2.7957555084445114e-05, "loss": 0.3436, "step": 2045 }, { "epoch": 0.25138223368964246, "grad_norm": 0.3563564476406658, "learning_rate": 2.7954312982489278e-05, "loss": 0.3957, "step": 2046 }, { "epoch": 0.2515050989064996, "grad_norm": 0.35062521642926003, "learning_rate": 2.7951068497671582e-05, "loss": 0.3803, "step": 2047 }, { "epoch": 0.2516279641233567, "grad_norm": 0.478989341621803, "learning_rate": 2.794782163058882e-05, "loss": 0.4637, "step": 2048 }, { "epoch": 0.2517508293402138, "grad_norm": 0.3741153714180866, "learning_rate": 2.794457238183824e-05, "loss": 0.414, "step": 2049 }, { "epoch": 0.25187369455707087, "grad_norm": 0.3663239928093069, "learning_rate": 2.7941320752017522e-05, "loss": 0.3617, "step": 2050 }, { "epoch": 0.251996559773928, "grad_norm": 0.3921057642964173, "learning_rate": 2.793806674172478e-05, "loss": 0.4006, "step": 2051 }, { "epoch": 0.2521194249907851, "grad_norm": 0.35751605020365357, "learning_rate": 2.7934810351558565e-05, "loss": 0.4086, "step": 2052 }, { "epoch": 0.2522422902076422, "grad_norm": 0.3597730635572695, "learning_rate": 2.7931551582117868e-05, "loss": 0.4244, "step": 2053 }, { "epoch": 0.25236515542449933, "grad_norm": 0.42140034707530377, "learning_rate": 2.7928290434002122e-05, "loss": 0.4856, "step": 2054 }, { "epoch": 0.25248802064135645, "grad_norm": 0.36257879773818613, "learning_rate": 2.79250269078112e-05, "loss": 0.3979, "step": 2055 }, { "epoch": 0.25261088585821356, "grad_norm": 0.30549708156707117, "learning_rate": 2.7921761004145397e-05, "loss": 0.3779, "step": 2056 }, { "epoch": 0.2527337510750706, "grad_norm": 0.3207284721134734, "learning_rate": 2.7918492723605453e-05, "loss": 0.3811, "step": 2057 }, { "epoch": 0.25285661629192774, "grad_norm": 0.3983695986644132, "learning_rate": 2.791522206679256e-05, "loss": 0.4437, "step": 2058 }, { "epoch": 0.25297948150878485, "grad_norm": 0.3832393448695563, "learning_rate": 2.7911949034308318e-05, "loss": 0.3649, "step": 2059 }, { "epoch": 0.25310234672564197, "grad_norm": 0.33982460451509383, "learning_rate": 2.7908673626754794e-05, "loss": 0.3161, "step": 2060 }, { "epoch": 0.2532252119424991, "grad_norm": 0.3001829031610312, "learning_rate": 2.7905395844734468e-05, "loss": 0.3359, "step": 2061 }, { "epoch": 0.2533480771593562, "grad_norm": 0.537940420865946, "learning_rate": 2.7902115688850272e-05, "loss": 0.4237, "step": 2062 }, { "epoch": 0.2534709423762133, "grad_norm": 0.40080130116940305, "learning_rate": 2.789883315970557e-05, "loss": 0.4289, "step": 2063 }, { "epoch": 0.2535938075930704, "grad_norm": 0.32650280816433813, "learning_rate": 2.7895548257904157e-05, "loss": 0.3942, "step": 2064 }, { "epoch": 0.2537166728099275, "grad_norm": 0.38761041268167834, "learning_rate": 2.789226098405028e-05, "loss": 0.3897, "step": 2065 }, { "epoch": 0.2538395380267846, "grad_norm": 0.38858169038262685, "learning_rate": 2.7888971338748595e-05, "loss": 0.3829, "step": 2066 }, { "epoch": 0.2539624032436417, "grad_norm": 0.3558734067820138, "learning_rate": 2.7885679322604223e-05, "loss": 0.483, "step": 2067 }, { "epoch": 0.25408526846049884, "grad_norm": 0.3271947376552353, "learning_rate": 2.7882384936222703e-05, "loss": 0.44, "step": 2068 }, { "epoch": 0.25420813367735595, "grad_norm": 0.3785429726620261, "learning_rate": 2.787908818021003e-05, "loss": 0.3848, "step": 2069 }, { "epoch": 0.25433099889421307, "grad_norm": 0.4497481788120573, "learning_rate": 2.78757890551726e-05, "loss": 0.4482, "step": 2070 }, { "epoch": 0.2544538641110702, "grad_norm": 0.39495890438562237, "learning_rate": 2.7872487561717277e-05, "loss": 0.4706, "step": 2071 }, { "epoch": 0.25457672932792724, "grad_norm": 0.36785337299658233, "learning_rate": 2.7869183700451352e-05, "loss": 0.3699, "step": 2072 }, { "epoch": 0.25469959454478436, "grad_norm": 0.3588088045801959, "learning_rate": 2.7865877471982544e-05, "loss": 0.4914, "step": 2073 }, { "epoch": 0.2548224597616415, "grad_norm": 0.46543827451199155, "learning_rate": 2.786256887691901e-05, "loss": 0.5152, "step": 2074 }, { "epoch": 0.2549453249784986, "grad_norm": 0.4330220333009962, "learning_rate": 2.785925791586935e-05, "loss": 0.4324, "step": 2075 }, { "epoch": 0.2550681901953557, "grad_norm": 0.37275914674269245, "learning_rate": 2.785594458944259e-05, "loss": 0.3891, "step": 2076 }, { "epoch": 0.2551910554122128, "grad_norm": 0.2821903004036247, "learning_rate": 2.7852628898248203e-05, "loss": 0.3771, "step": 2077 }, { "epoch": 0.25531392062906993, "grad_norm": 0.4812171391098781, "learning_rate": 2.7849310842896074e-05, "loss": 0.4076, "step": 2078 }, { "epoch": 0.255436785845927, "grad_norm": 0.33606808487757484, "learning_rate": 2.7845990423996548e-05, "loss": 0.3797, "step": 2079 }, { "epoch": 0.2555596510627841, "grad_norm": 0.3468139762210836, "learning_rate": 2.7842667642160394e-05, "loss": 0.3619, "step": 2080 }, { "epoch": 0.2556825162796412, "grad_norm": 0.34554789676075026, "learning_rate": 2.7839342497998813e-05, "loss": 0.366, "step": 2081 }, { "epoch": 0.25580538149649834, "grad_norm": 0.36560106101010403, "learning_rate": 2.783601499212345e-05, "loss": 0.5418, "step": 2082 }, { "epoch": 0.25592824671335546, "grad_norm": 0.3269825409217582, "learning_rate": 2.783268512514637e-05, "loss": 0.3448, "step": 2083 }, { "epoch": 0.25605111193021257, "grad_norm": 0.340495868799829, "learning_rate": 2.7829352897680087e-05, "loss": 0.4171, "step": 2084 }, { "epoch": 0.2561739771470697, "grad_norm": 0.38600799346985426, "learning_rate": 2.782601831033754e-05, "loss": 0.4225, "step": 2085 }, { "epoch": 0.25629684236392675, "grad_norm": 0.687699599038792, "learning_rate": 2.7822681363732104e-05, "loss": 0.5299, "step": 2086 }, { "epoch": 0.25641970758078386, "grad_norm": 0.41911053198178877, "learning_rate": 2.7819342058477584e-05, "loss": 0.3739, "step": 2087 }, { "epoch": 0.256542572797641, "grad_norm": 0.3492517917550341, "learning_rate": 2.7816000395188232e-05, "loss": 0.3863, "step": 2088 }, { "epoch": 0.2566654380144981, "grad_norm": 0.4400888638288792, "learning_rate": 2.7812656374478723e-05, "loss": 0.4407, "step": 2089 }, { "epoch": 0.2567883032313552, "grad_norm": 0.3739986062864609, "learning_rate": 2.780930999696417e-05, "loss": 0.3721, "step": 2090 }, { "epoch": 0.2569111684482123, "grad_norm": 0.3718840008315654, "learning_rate": 2.7805961263260108e-05, "loss": 0.4242, "step": 2091 }, { "epoch": 0.25703403366506944, "grad_norm": 0.45375117031793577, "learning_rate": 2.7802610173982523e-05, "loss": 0.3665, "step": 2092 }, { "epoch": 0.2571568988819265, "grad_norm": 0.3532872433685177, "learning_rate": 2.7799256729747825e-05, "loss": 0.3566, "step": 2093 }, { "epoch": 0.2572797640987836, "grad_norm": 0.28049542294550084, "learning_rate": 2.7795900931172856e-05, "loss": 0.3317, "step": 2094 }, { "epoch": 0.25740262931564073, "grad_norm": 0.7056607495967341, "learning_rate": 2.7792542778874896e-05, "loss": 0.5279, "step": 2095 }, { "epoch": 0.25752549453249785, "grad_norm": 0.425762241099795, "learning_rate": 2.778918227347166e-05, "loss": 0.4518, "step": 2096 }, { "epoch": 0.25764835974935496, "grad_norm": 0.33057378230335244, "learning_rate": 2.778581941558128e-05, "loss": 0.3412, "step": 2097 }, { "epoch": 0.2577712249662121, "grad_norm": 0.4337123483735789, "learning_rate": 2.778245420582234e-05, "loss": 0.3932, "step": 2098 }, { "epoch": 0.2578940901830692, "grad_norm": 0.35973573199088693, "learning_rate": 2.777908664481384e-05, "loss": 0.4058, "step": 2099 }, { "epoch": 0.2580169553999263, "grad_norm": 0.3326237255614524, "learning_rate": 2.7775716733175232e-05, "loss": 0.3662, "step": 2100 }, { "epoch": 0.25813982061678337, "grad_norm": 0.3845399063989931, "learning_rate": 2.7772344471526385e-05, "loss": 0.4761, "step": 2101 }, { "epoch": 0.2582626858336405, "grad_norm": 0.4400584062235191, "learning_rate": 2.77689698604876e-05, "loss": 0.4415, "step": 2102 }, { "epoch": 0.2583855510504976, "grad_norm": 0.37265395454973704, "learning_rate": 2.7765592900679622e-05, "loss": 0.3454, "step": 2103 }, { "epoch": 0.2585084162673547, "grad_norm": 0.3647758602291026, "learning_rate": 2.7762213592723616e-05, "loss": 0.3488, "step": 2104 }, { "epoch": 0.25863128148421183, "grad_norm": 0.4046646869511428, "learning_rate": 2.7758831937241188e-05, "loss": 0.4268, "step": 2105 }, { "epoch": 0.25875414670106894, "grad_norm": 0.3753608208472984, "learning_rate": 2.775544793485437e-05, "loss": 0.4159, "step": 2106 }, { "epoch": 0.25887701191792606, "grad_norm": 0.37420257058643724, "learning_rate": 2.775206158618562e-05, "loss": 0.4935, "step": 2107 }, { "epoch": 0.2589998771347831, "grad_norm": 0.37114166788159403, "learning_rate": 2.7748672891857847e-05, "loss": 0.4036, "step": 2108 }, { "epoch": 0.25912274235164023, "grad_norm": 0.34682609664039643, "learning_rate": 2.7745281852494373e-05, "loss": 0.3822, "step": 2109 }, { "epoch": 0.25924560756849735, "grad_norm": 0.3989402808992689, "learning_rate": 2.7741888468718956e-05, "loss": 0.3777, "step": 2110 }, { "epoch": 0.25936847278535446, "grad_norm": 0.37203410634483575, "learning_rate": 2.773849274115579e-05, "loss": 0.4202, "step": 2111 }, { "epoch": 0.2594913380022116, "grad_norm": 0.3883131423439263, "learning_rate": 2.77350946704295e-05, "loss": 0.4568, "step": 2112 }, { "epoch": 0.2596142032190687, "grad_norm": 0.37197164227713386, "learning_rate": 2.7731694257165126e-05, "loss": 0.4327, "step": 2113 }, { "epoch": 0.2597370684359258, "grad_norm": 0.44869204326948464, "learning_rate": 2.7728291501988173e-05, "loss": 0.4462, "step": 2114 }, { "epoch": 0.25985993365278287, "grad_norm": 0.4762009081092052, "learning_rate": 2.7724886405524536e-05, "loss": 0.4712, "step": 2115 }, { "epoch": 0.25998279886964, "grad_norm": 0.3778701157926593, "learning_rate": 2.7721478968400573e-05, "loss": 0.3844, "step": 2116 }, { "epoch": 0.2601056640864971, "grad_norm": 0.35316097056056583, "learning_rate": 2.771806919124305e-05, "loss": 0.4503, "step": 2117 }, { "epoch": 0.2602285293033542, "grad_norm": 0.4285845343464826, "learning_rate": 2.771465707467918e-05, "loss": 0.4156, "step": 2118 }, { "epoch": 0.26035139452021133, "grad_norm": 0.37244104691466445, "learning_rate": 2.7711242619336605e-05, "loss": 0.3604, "step": 2119 }, { "epoch": 0.26047425973706845, "grad_norm": 0.4441543355304311, "learning_rate": 2.7707825825843382e-05, "loss": 0.4575, "step": 2120 }, { "epoch": 0.26059712495392556, "grad_norm": 0.3792268512586197, "learning_rate": 2.770440669482801e-05, "loss": 0.4669, "step": 2121 }, { "epoch": 0.2607199901707827, "grad_norm": 0.37080062927737245, "learning_rate": 2.7700985226919415e-05, "loss": 0.4905, "step": 2122 }, { "epoch": 0.26084285538763974, "grad_norm": 0.4023667400362161, "learning_rate": 2.769756142274696e-05, "loss": 0.4684, "step": 2123 }, { "epoch": 0.26096572060449685, "grad_norm": 0.38513303140637883, "learning_rate": 2.769413528294043e-05, "loss": 0.3776, "step": 2124 }, { "epoch": 0.26108858582135397, "grad_norm": 0.3728787654296303, "learning_rate": 2.7690706808130037e-05, "loss": 0.377, "step": 2125 }, { "epoch": 0.2612114510382111, "grad_norm": 0.39059800171255254, "learning_rate": 2.7687275998946426e-05, "loss": 0.3235, "step": 2126 }, { "epoch": 0.2613343162550682, "grad_norm": 0.430626810080421, "learning_rate": 2.768384285602068e-05, "loss": 0.4611, "step": 2127 }, { "epoch": 0.2614571814719253, "grad_norm": 0.3713020357185113, "learning_rate": 2.7680407379984292e-05, "loss": 0.3573, "step": 2128 }, { "epoch": 0.26158004668878243, "grad_norm": 0.4183774671441372, "learning_rate": 2.7676969571469207e-05, "loss": 0.4667, "step": 2129 }, { "epoch": 0.2617029119056395, "grad_norm": 0.31960696919981213, "learning_rate": 2.7673529431107777e-05, "loss": 0.3183, "step": 2130 }, { "epoch": 0.2618257771224966, "grad_norm": 0.33897741427878675, "learning_rate": 2.7670086959532807e-05, "loss": 0.4173, "step": 2131 }, { "epoch": 0.2619486423393537, "grad_norm": 0.4194969992632875, "learning_rate": 2.7666642157377504e-05, "loss": 0.4006, "step": 2132 }, { "epoch": 0.26207150755621084, "grad_norm": 0.4081096487958882, "learning_rate": 2.766319502527552e-05, "loss": 0.5315, "step": 2133 }, { "epoch": 0.26219437277306795, "grad_norm": 0.38417533981263746, "learning_rate": 2.765974556386094e-05, "loss": 0.4233, "step": 2134 }, { "epoch": 0.26231723798992507, "grad_norm": 0.3625980033740131, "learning_rate": 2.7656293773768262e-05, "loss": 0.3627, "step": 2135 }, { "epoch": 0.2624401032067822, "grad_norm": 0.3527448442380701, "learning_rate": 2.7652839655632423e-05, "loss": 0.3959, "step": 2136 }, { "epoch": 0.26256296842363924, "grad_norm": 0.32810517591687516, "learning_rate": 2.764938321008879e-05, "loss": 0.3618, "step": 2137 }, { "epoch": 0.26268583364049636, "grad_norm": 0.29830305058433465, "learning_rate": 2.7645924437773144e-05, "loss": 0.4029, "step": 2138 }, { "epoch": 0.2628086988573535, "grad_norm": 0.3552556059122855, "learning_rate": 2.764246333932171e-05, "loss": 0.4625, "step": 2139 }, { "epoch": 0.2629315640742106, "grad_norm": 0.47193391754308955, "learning_rate": 2.7638999915371137e-05, "loss": 0.4856, "step": 2140 }, { "epoch": 0.2630544292910677, "grad_norm": 0.36069580681756536, "learning_rate": 2.7635534166558495e-05, "loss": 0.4277, "step": 2141 }, { "epoch": 0.2631772945079248, "grad_norm": 0.28821610236841766, "learning_rate": 2.7632066093521283e-05, "loss": 0.3528, "step": 2142 }, { "epoch": 0.26330015972478193, "grad_norm": 0.3536493711238289, "learning_rate": 2.7628595696897443e-05, "loss": 0.3802, "step": 2143 }, { "epoch": 0.263423024941639, "grad_norm": 0.43526132415541235, "learning_rate": 2.7625122977325318e-05, "loss": 0.3418, "step": 2144 }, { "epoch": 0.2635458901584961, "grad_norm": 0.40688617870327426, "learning_rate": 2.76216479354437e-05, "loss": 0.4714, "step": 2145 }, { "epoch": 0.2636687553753532, "grad_norm": 0.37318045146765244, "learning_rate": 2.76181705718918e-05, "loss": 0.4063, "step": 2146 }, { "epoch": 0.26379162059221034, "grad_norm": 0.3835370911942022, "learning_rate": 2.7614690887309253e-05, "loss": 0.4182, "step": 2147 }, { "epoch": 0.26391448580906746, "grad_norm": 0.3310481938446071, "learning_rate": 2.7611208882336128e-05, "loss": 0.3561, "step": 2148 }, { "epoch": 0.26403735102592457, "grad_norm": 0.3783573244128056, "learning_rate": 2.760772455761291e-05, "loss": 0.3806, "step": 2149 }, { "epoch": 0.2641602162427817, "grad_norm": 0.4222987174971872, "learning_rate": 2.7604237913780533e-05, "loss": 0.5102, "step": 2150 }, { "epoch": 0.2642830814596388, "grad_norm": 0.3930208407869158, "learning_rate": 2.7600748951480325e-05, "loss": 0.3463, "step": 2151 }, { "epoch": 0.26440594667649586, "grad_norm": 0.3544028891568756, "learning_rate": 2.7597257671354072e-05, "loss": 0.4507, "step": 2152 }, { "epoch": 0.264528811893353, "grad_norm": 0.3431919566102782, "learning_rate": 2.7593764074043966e-05, "loss": 0.4571, "step": 2153 }, { "epoch": 0.2646516771102101, "grad_norm": 0.3850037940283426, "learning_rate": 2.759026816019263e-05, "loss": 0.4586, "step": 2154 }, { "epoch": 0.2647745423270672, "grad_norm": 0.359928328891065, "learning_rate": 2.7586769930443114e-05, "loss": 0.3922, "step": 2155 }, { "epoch": 0.2648974075439243, "grad_norm": 0.4003701613645168, "learning_rate": 2.7583269385438903e-05, "loss": 0.442, "step": 2156 }, { "epoch": 0.26502027276078144, "grad_norm": 0.38465653956112655, "learning_rate": 2.7579766525823888e-05, "loss": 0.4192, "step": 2157 }, { "epoch": 0.26514313797763855, "grad_norm": 0.39829319898474536, "learning_rate": 2.7576261352242407e-05, "loss": 0.3858, "step": 2158 }, { "epoch": 0.2652660031944956, "grad_norm": 0.3332105709409756, "learning_rate": 2.757275386533921e-05, "loss": 0.3639, "step": 2159 }, { "epoch": 0.26538886841135273, "grad_norm": 0.38238325757344194, "learning_rate": 2.7569244065759478e-05, "loss": 0.3699, "step": 2160 }, { "epoch": 0.26551173362820985, "grad_norm": 0.3872486247428541, "learning_rate": 2.756573195414881e-05, "loss": 0.3898, "step": 2161 }, { "epoch": 0.26563459884506696, "grad_norm": 0.34552523003848656, "learning_rate": 2.7562217531153248e-05, "loss": 0.3938, "step": 2162 }, { "epoch": 0.2657574640619241, "grad_norm": 0.44538501173494865, "learning_rate": 2.7558700797419233e-05, "loss": 0.3839, "step": 2163 }, { "epoch": 0.2658803292787812, "grad_norm": 0.4630088992215764, "learning_rate": 2.755518175359365e-05, "loss": 0.3942, "step": 2164 }, { "epoch": 0.2660031944956383, "grad_norm": 0.42223486334847726, "learning_rate": 2.7551660400323817e-05, "loss": 0.38, "step": 2165 }, { "epoch": 0.26612605971249537, "grad_norm": 0.4787580696725763, "learning_rate": 2.7548136738257447e-05, "loss": 0.4206, "step": 2166 }, { "epoch": 0.2662489249293525, "grad_norm": 0.3293750013052322, "learning_rate": 2.7544610768042698e-05, "loss": 0.4469, "step": 2167 }, { "epoch": 0.2663717901462096, "grad_norm": 0.3855776438093573, "learning_rate": 2.754108249032816e-05, "loss": 0.3658, "step": 2168 }, { "epoch": 0.2664946553630667, "grad_norm": 0.37382076532152164, "learning_rate": 2.753755190576283e-05, "loss": 0.4394, "step": 2169 }, { "epoch": 0.26661752057992383, "grad_norm": 0.4174834877655602, "learning_rate": 2.7534019014996132e-05, "loss": 0.461, "step": 2170 }, { "epoch": 0.26674038579678094, "grad_norm": 0.3389966394243485, "learning_rate": 2.753048381867792e-05, "loss": 0.4179, "step": 2171 }, { "epoch": 0.26686325101363806, "grad_norm": 0.37721347289726065, "learning_rate": 2.7526946317458474e-05, "loss": 0.3699, "step": 2172 }, { "epoch": 0.2669861162304952, "grad_norm": 0.4102101426494123, "learning_rate": 2.7523406511988497e-05, "loss": 0.4252, "step": 2173 }, { "epoch": 0.26710898144735223, "grad_norm": 0.32531863358630186, "learning_rate": 2.7519864402919108e-05, "loss": 0.3882, "step": 2174 }, { "epoch": 0.26723184666420935, "grad_norm": 0.3179790041113913, "learning_rate": 2.7516319990901857e-05, "loss": 0.3975, "step": 2175 }, { "epoch": 0.26735471188106646, "grad_norm": 0.47781535145730186, "learning_rate": 2.751277327658871e-05, "loss": 0.3845, "step": 2176 }, { "epoch": 0.2674775770979236, "grad_norm": 0.3396865465911544, "learning_rate": 2.750922426063207e-05, "loss": 0.3856, "step": 2177 }, { "epoch": 0.2676004423147807, "grad_norm": 0.29511935715542603, "learning_rate": 2.7505672943684753e-05, "loss": 0.3237, "step": 2178 }, { "epoch": 0.2677233075316378, "grad_norm": 0.3667301570235728, "learning_rate": 2.7502119326399997e-05, "loss": 0.4027, "step": 2179 }, { "epoch": 0.2678461727484949, "grad_norm": 0.41411773334917357, "learning_rate": 2.7498563409431475e-05, "loss": 0.4148, "step": 2180 }, { "epoch": 0.267969037965352, "grad_norm": 0.44140547693711285, "learning_rate": 2.7495005193433266e-05, "loss": 0.3617, "step": 2181 }, { "epoch": 0.2680919031822091, "grad_norm": 0.3657063410443397, "learning_rate": 2.749144467905989e-05, "loss": 0.3989, "step": 2182 }, { "epoch": 0.2682147683990662, "grad_norm": 0.3731278721878307, "learning_rate": 2.7487881866966268e-05, "loss": 0.3899, "step": 2183 }, { "epoch": 0.26833763361592333, "grad_norm": 0.40591777803329054, "learning_rate": 2.7484316757807768e-05, "loss": 0.4235, "step": 2184 }, { "epoch": 0.26846049883278045, "grad_norm": 0.37942447654826705, "learning_rate": 2.7480749352240166e-05, "loss": 0.3545, "step": 2185 }, { "epoch": 0.26858336404963756, "grad_norm": 0.3256341176387368, "learning_rate": 2.7477179650919654e-05, "loss": 0.3891, "step": 2186 }, { "epoch": 0.2687062292664947, "grad_norm": 0.31270073727756853, "learning_rate": 2.7473607654502866e-05, "loss": 0.3653, "step": 2187 }, { "epoch": 0.26882909448335174, "grad_norm": 0.36558431634185795, "learning_rate": 2.7470033363646846e-05, "loss": 0.3681, "step": 2188 }, { "epoch": 0.26895195970020885, "grad_norm": 0.4273099257437011, "learning_rate": 2.7466456779009058e-05, "loss": 0.4038, "step": 2189 }, { "epoch": 0.26907482491706597, "grad_norm": 0.39006847562637226, "learning_rate": 2.746287790124739e-05, "loss": 0.4398, "step": 2190 }, { "epoch": 0.2691976901339231, "grad_norm": 0.45630843022177653, "learning_rate": 2.7459296731020163e-05, "loss": 0.3894, "step": 2191 }, { "epoch": 0.2693205553507802, "grad_norm": 0.39647717076075883, "learning_rate": 2.7455713268986098e-05, "loss": 0.4053, "step": 2192 }, { "epoch": 0.2694434205676373, "grad_norm": 0.328924588231004, "learning_rate": 2.7452127515804357e-05, "loss": 0.3679, "step": 2193 }, { "epoch": 0.26956628578449443, "grad_norm": 0.41842828653581343, "learning_rate": 2.7448539472134518e-05, "loss": 0.4072, "step": 2194 }, { "epoch": 0.2696891510013515, "grad_norm": 0.40051146629781853, "learning_rate": 2.7444949138636576e-05, "loss": 0.3709, "step": 2195 }, { "epoch": 0.2698120162182086, "grad_norm": 0.3682938780583275, "learning_rate": 2.744135651597094e-05, "loss": 0.3818, "step": 2196 }, { "epoch": 0.2699348814350657, "grad_norm": 0.3602054666687116, "learning_rate": 2.7437761604798465e-05, "loss": 0.3692, "step": 2197 }, { "epoch": 0.27005774665192284, "grad_norm": 0.38328030916078726, "learning_rate": 2.74341644057804e-05, "loss": 0.4393, "step": 2198 }, { "epoch": 0.27018061186877995, "grad_norm": 0.3687903126671606, "learning_rate": 2.7430564919578432e-05, "loss": 0.3687, "step": 2199 }, { "epoch": 0.27030347708563707, "grad_norm": 0.34934487625730815, "learning_rate": 2.742696314685466e-05, "loss": 0.4624, "step": 2200 }, { "epoch": 0.2704263423024942, "grad_norm": 0.35766052793989483, "learning_rate": 2.7423359088271614e-05, "loss": 0.3563, "step": 2201 }, { "epoch": 0.2705492075193513, "grad_norm": 0.3440345161725449, "learning_rate": 2.741975274449223e-05, "loss": 0.3774, "step": 2202 }, { "epoch": 0.27067207273620836, "grad_norm": 0.4547805339647031, "learning_rate": 2.7416144116179876e-05, "loss": 0.4932, "step": 2203 }, { "epoch": 0.2707949379530655, "grad_norm": 0.38463495998282343, "learning_rate": 2.741253320399833e-05, "loss": 0.491, "step": 2204 }, { "epoch": 0.2709178031699226, "grad_norm": 0.37841975082893925, "learning_rate": 2.74089200086118e-05, "loss": 0.3732, "step": 2205 }, { "epoch": 0.2710406683867797, "grad_norm": 0.44590425702206865, "learning_rate": 2.740530453068491e-05, "loss": 0.3737, "step": 2206 }, { "epoch": 0.2711635336036368, "grad_norm": 0.3630654289113329, "learning_rate": 2.74016867708827e-05, "loss": 0.5244, "step": 2207 }, { "epoch": 0.27128639882049393, "grad_norm": 0.34194062736873987, "learning_rate": 2.7398066729870637e-05, "loss": 0.3624, "step": 2208 }, { "epoch": 0.27140926403735105, "grad_norm": 0.401170021134594, "learning_rate": 2.739444440831461e-05, "loss": 0.442, "step": 2209 }, { "epoch": 0.2715321292542081, "grad_norm": 0.39232581565697017, "learning_rate": 2.7390819806880906e-05, "loss": 0.3606, "step": 2210 }, { "epoch": 0.2716549944710652, "grad_norm": 0.36880836664007616, "learning_rate": 2.738719292623626e-05, "loss": 0.4013, "step": 2211 }, { "epoch": 0.27177785968792234, "grad_norm": 0.3743208299340842, "learning_rate": 2.7383563767047808e-05, "loss": 0.4395, "step": 2212 }, { "epoch": 0.27190072490477946, "grad_norm": 0.4614841470560727, "learning_rate": 2.7379932329983114e-05, "loss": 0.3626, "step": 2213 }, { "epoch": 0.27202359012163657, "grad_norm": 0.41393090929468856, "learning_rate": 2.737629861571015e-05, "loss": 0.4863, "step": 2214 }, { "epoch": 0.2721464553384937, "grad_norm": 0.333399166880551, "learning_rate": 2.737266262489732e-05, "loss": 0.3725, "step": 2215 }, { "epoch": 0.2722693205553508, "grad_norm": 0.36989178329345923, "learning_rate": 2.7369024358213436e-05, "loss": 0.5373, "step": 2216 }, { "epoch": 0.27239218577220786, "grad_norm": 0.37413792220538533, "learning_rate": 2.7365383816327746e-05, "loss": 0.4002, "step": 2217 }, { "epoch": 0.272515050989065, "grad_norm": 0.3648095685107889, "learning_rate": 2.736174099990989e-05, "loss": 0.4157, "step": 2218 }, { "epoch": 0.2726379162059221, "grad_norm": 0.38134852635817, "learning_rate": 2.7358095909629947e-05, "loss": 0.45, "step": 2219 }, { "epoch": 0.2727607814227792, "grad_norm": 0.39820998190238727, "learning_rate": 2.735444854615841e-05, "loss": 0.4144, "step": 2220 }, { "epoch": 0.2728836466396363, "grad_norm": 0.336063611891851, "learning_rate": 2.7350798910166176e-05, "loss": 0.4287, "step": 2221 }, { "epoch": 0.27300651185649344, "grad_norm": 0.3701655063321923, "learning_rate": 2.7347147002324587e-05, "loss": 0.3988, "step": 2222 }, { "epoch": 0.27312937707335055, "grad_norm": 0.3476800616365069, "learning_rate": 2.7343492823305377e-05, "loss": 0.4062, "step": 2223 }, { "epoch": 0.27325224229020767, "grad_norm": 0.3622846933452841, "learning_rate": 2.7339836373780712e-05, "loss": 0.3467, "step": 2224 }, { "epoch": 0.27337510750706473, "grad_norm": 0.32982503391027634, "learning_rate": 2.733617765442318e-05, "loss": 0.3386, "step": 2225 }, { "epoch": 0.27349797272392184, "grad_norm": 0.4580866192429457, "learning_rate": 2.7332516665905763e-05, "loss": 0.4593, "step": 2226 }, { "epoch": 0.27362083794077896, "grad_norm": 0.406469777901587, "learning_rate": 2.732885340890189e-05, "loss": 0.465, "step": 2227 }, { "epoch": 0.2737437031576361, "grad_norm": 0.3477535516076541, "learning_rate": 2.7325187884085385e-05, "loss": 0.4522, "step": 2228 }, { "epoch": 0.2738665683744932, "grad_norm": 0.33758660314537725, "learning_rate": 2.73215200921305e-05, "loss": 0.3552, "step": 2229 }, { "epoch": 0.2739894335913503, "grad_norm": 0.4172586480571998, "learning_rate": 2.7317850033711903e-05, "loss": 0.3707, "step": 2230 }, { "epoch": 0.2741122988082074, "grad_norm": 0.39545423098368493, "learning_rate": 2.7314177709504674e-05, "loss": 0.5045, "step": 2231 }, { "epoch": 0.2742351640250645, "grad_norm": 0.3619727991721719, "learning_rate": 2.7310503120184326e-05, "loss": 0.2955, "step": 2232 }, { "epoch": 0.2743580292419216, "grad_norm": 0.5117916904836272, "learning_rate": 2.730682626642675e-05, "loss": 0.5112, "step": 2233 }, { "epoch": 0.2744808944587787, "grad_norm": 0.39069394331488727, "learning_rate": 2.7303147148908305e-05, "loss": 0.3757, "step": 2234 }, { "epoch": 0.27460375967563583, "grad_norm": 0.4530939062549658, "learning_rate": 2.729946576830573e-05, "loss": 0.3744, "step": 2235 }, { "epoch": 0.27472662489249294, "grad_norm": 0.32278431606713076, "learning_rate": 2.7295782125296188e-05, "loss": 0.5236, "step": 2236 }, { "epoch": 0.27484949010935006, "grad_norm": 0.30812264522749894, "learning_rate": 2.7292096220557267e-05, "loss": 0.3561, "step": 2237 }, { "epoch": 0.2749723553262072, "grad_norm": 0.43028181965916723, "learning_rate": 2.728840805476696e-05, "loss": 0.3726, "step": 2238 }, { "epoch": 0.27509522054306423, "grad_norm": 0.35731816567232455, "learning_rate": 2.728471762860369e-05, "loss": 0.3691, "step": 2239 }, { "epoch": 0.27521808575992135, "grad_norm": 0.39072130140375266, "learning_rate": 2.728102494274628e-05, "loss": 0.3643, "step": 2240 }, { "epoch": 0.27534095097677846, "grad_norm": 0.39475031206985683, "learning_rate": 2.7277329997873974e-05, "loss": 0.4979, "step": 2241 }, { "epoch": 0.2754638161936356, "grad_norm": 0.36167870415458636, "learning_rate": 2.727363279466644e-05, "loss": 0.4065, "step": 2242 }, { "epoch": 0.2755866814104927, "grad_norm": 0.3730857468465115, "learning_rate": 2.726993333380375e-05, "loss": 0.3338, "step": 2243 }, { "epoch": 0.2757095466273498, "grad_norm": 0.3068875557282444, "learning_rate": 2.7266231615966396e-05, "loss": 0.4193, "step": 2244 }, { "epoch": 0.2758324118442069, "grad_norm": 0.3745845247251703, "learning_rate": 2.726252764183528e-05, "loss": 0.4265, "step": 2245 }, { "epoch": 0.275955277061064, "grad_norm": 0.3654470050762839, "learning_rate": 2.7258821412091735e-05, "loss": 0.3283, "step": 2246 }, { "epoch": 0.2760781422779211, "grad_norm": 0.33932415744913524, "learning_rate": 2.7255112927417494e-05, "loss": 0.3686, "step": 2247 }, { "epoch": 0.2762010074947782, "grad_norm": 0.35625785482478106, "learning_rate": 2.7251402188494704e-05, "loss": 0.4638, "step": 2248 }, { "epoch": 0.27632387271163533, "grad_norm": 0.5043751617207769, "learning_rate": 2.7247689196005935e-05, "loss": 0.3757, "step": 2249 }, { "epoch": 0.27644673792849245, "grad_norm": 0.4150007302697716, "learning_rate": 2.7243973950634165e-05, "loss": 0.387, "step": 2250 }, { "epoch": 0.27656960314534956, "grad_norm": 0.3492272566780636, "learning_rate": 2.7240256453062796e-05, "loss": 0.4021, "step": 2251 }, { "epoch": 0.2766924683622067, "grad_norm": 0.3559396002807903, "learning_rate": 2.7236536703975633e-05, "loss": 0.386, "step": 2252 }, { "epoch": 0.2768153335790638, "grad_norm": 0.34792527252304356, "learning_rate": 2.7232814704056902e-05, "loss": 0.3892, "step": 2253 }, { "epoch": 0.27693819879592085, "grad_norm": 0.32561641010589487, "learning_rate": 2.7229090453991238e-05, "loss": 0.443, "step": 2254 }, { "epoch": 0.27706106401277797, "grad_norm": 0.3535471811190515, "learning_rate": 2.722536395446369e-05, "loss": 0.3731, "step": 2255 }, { "epoch": 0.2771839292296351, "grad_norm": 0.35921282076438, "learning_rate": 2.7221635206159725e-05, "loss": 0.412, "step": 2256 }, { "epoch": 0.2773067944464922, "grad_norm": 0.36602836947151585, "learning_rate": 2.721790420976523e-05, "loss": 0.4386, "step": 2257 }, { "epoch": 0.2774296596633493, "grad_norm": 0.416365955938801, "learning_rate": 2.721417096596649e-05, "loss": 0.3974, "step": 2258 }, { "epoch": 0.27755252488020643, "grad_norm": 0.29672990444979425, "learning_rate": 2.7210435475450207e-05, "loss": 0.4098, "step": 2259 }, { "epoch": 0.27767539009706355, "grad_norm": 0.3959057939063712, "learning_rate": 2.7206697738903513e-05, "loss": 0.5319, "step": 2260 }, { "epoch": 0.2777982553139206, "grad_norm": 0.3515367341597082, "learning_rate": 2.720295775701393e-05, "loss": 0.3648, "step": 2261 }, { "epoch": 0.2779211205307777, "grad_norm": 0.3065911394467738, "learning_rate": 2.719921553046941e-05, "loss": 0.475, "step": 2262 }, { "epoch": 0.27804398574763484, "grad_norm": 0.431066353971205, "learning_rate": 2.71954710599583e-05, "loss": 0.4619, "step": 2263 }, { "epoch": 0.27816685096449195, "grad_norm": 0.35437047442544134, "learning_rate": 2.719172434616938e-05, "loss": 0.3477, "step": 2264 }, { "epoch": 0.27828971618134907, "grad_norm": 0.37952417066736766, "learning_rate": 2.718797538979184e-05, "loss": 0.4995, "step": 2265 }, { "epoch": 0.2784125813982062, "grad_norm": 0.3790997296018319, "learning_rate": 2.7184224191515263e-05, "loss": 0.4031, "step": 2266 }, { "epoch": 0.2785354466150633, "grad_norm": 0.3959738529432102, "learning_rate": 2.718047075202967e-05, "loss": 0.4602, "step": 2267 }, { "epoch": 0.27865831183192036, "grad_norm": 0.3542027459386924, "learning_rate": 2.717671507202547e-05, "loss": 0.3935, "step": 2268 }, { "epoch": 0.2787811770487775, "grad_norm": 0.48229619097377263, "learning_rate": 2.71729571521935e-05, "loss": 0.3883, "step": 2269 }, { "epoch": 0.2789040422656346, "grad_norm": 0.3821263267310959, "learning_rate": 2.716919699322501e-05, "loss": 0.439, "step": 2270 }, { "epoch": 0.2790269074824917, "grad_norm": 0.3784138274454273, "learning_rate": 2.716543459581165e-05, "loss": 0.3982, "step": 2271 }, { "epoch": 0.2791497726993488, "grad_norm": 0.3527992587130074, "learning_rate": 2.7161669960645493e-05, "loss": 0.4195, "step": 2272 }, { "epoch": 0.27927263791620593, "grad_norm": 0.4190052927165937, "learning_rate": 2.7157903088419016e-05, "loss": 0.4671, "step": 2273 }, { "epoch": 0.27939550313306305, "grad_norm": 0.41105362783782917, "learning_rate": 2.7154133979825116e-05, "loss": 0.4328, "step": 2274 }, { "epoch": 0.2795183683499201, "grad_norm": 0.3485322545515182, "learning_rate": 2.715036263555709e-05, "loss": 0.469, "step": 2275 }, { "epoch": 0.2796412335667772, "grad_norm": 0.3106259449535212, "learning_rate": 2.714658905630866e-05, "loss": 0.406, "step": 2276 }, { "epoch": 0.27976409878363434, "grad_norm": 0.41999166868882465, "learning_rate": 2.714281324277394e-05, "loss": 0.468, "step": 2277 }, { "epoch": 0.27988696400049146, "grad_norm": 0.31916865253841065, "learning_rate": 2.7139035195647475e-05, "loss": 0.3852, "step": 2278 }, { "epoch": 0.28000982921734857, "grad_norm": 0.4165777196277802, "learning_rate": 2.7135254915624213e-05, "loss": 0.377, "step": 2279 }, { "epoch": 0.2801326944342057, "grad_norm": 0.4102898582682893, "learning_rate": 2.7131472403399505e-05, "loss": 0.4102, "step": 2280 }, { "epoch": 0.2802555596510628, "grad_norm": 0.3432733702324749, "learning_rate": 2.7127687659669126e-05, "loss": 0.4091, "step": 2281 }, { "epoch": 0.2803784248679199, "grad_norm": 0.36313164326652225, "learning_rate": 2.7123900685129253e-05, "loss": 0.4389, "step": 2282 }, { "epoch": 0.280501290084777, "grad_norm": 0.3943539318934042, "learning_rate": 2.7120111480476476e-05, "loss": 0.3156, "step": 2283 }, { "epoch": 0.2806241553016341, "grad_norm": 0.42796246457365317, "learning_rate": 2.7116320046407795e-05, "loss": 0.3776, "step": 2284 }, { "epoch": 0.2807470205184912, "grad_norm": 0.3533433989574731, "learning_rate": 2.7112526383620615e-05, "loss": 0.4586, "step": 2285 }, { "epoch": 0.2808698857353483, "grad_norm": 0.30364961339834934, "learning_rate": 2.710873049281276e-05, "loss": 0.3448, "step": 2286 }, { "epoch": 0.28099275095220544, "grad_norm": 0.44075071479941247, "learning_rate": 2.7104932374682462e-05, "loss": 0.439, "step": 2287 }, { "epoch": 0.28111561616906255, "grad_norm": 0.4003863508089219, "learning_rate": 2.7101132029928352e-05, "loss": 0.4139, "step": 2288 }, { "epoch": 0.28123848138591967, "grad_norm": 0.37807801155367465, "learning_rate": 2.7097329459249485e-05, "loss": 0.4342, "step": 2289 }, { "epoch": 0.28136134660277673, "grad_norm": 0.3669066216400288, "learning_rate": 2.7093524663345318e-05, "loss": 0.5447, "step": 2290 }, { "epoch": 0.28148421181963384, "grad_norm": 0.40950164185157456, "learning_rate": 2.7089717642915723e-05, "loss": 0.3839, "step": 2291 }, { "epoch": 0.28160707703649096, "grad_norm": 0.35959990555584076, "learning_rate": 2.7085908398660966e-05, "loss": 0.4549, "step": 2292 }, { "epoch": 0.2817299422533481, "grad_norm": 0.37199005240956834, "learning_rate": 2.7082096931281743e-05, "loss": 0.4032, "step": 2293 }, { "epoch": 0.2818528074702052, "grad_norm": 0.358735288199906, "learning_rate": 2.707828324147914e-05, "loss": 0.3756, "step": 2294 }, { "epoch": 0.2819756726870623, "grad_norm": 0.3929122994072859, "learning_rate": 2.707446732995467e-05, "loss": 0.4302, "step": 2295 }, { "epoch": 0.2820985379039194, "grad_norm": 0.373014367150953, "learning_rate": 2.7070649197410236e-05, "loss": 0.4208, "step": 2296 }, { "epoch": 0.2822214031207765, "grad_norm": 0.38670225870926433, "learning_rate": 2.7066828844548166e-05, "loss": 0.3892, "step": 2297 }, { "epoch": 0.2823442683376336, "grad_norm": 0.31870902037135723, "learning_rate": 2.7063006272071185e-05, "loss": 0.3608, "step": 2298 }, { "epoch": 0.2824671335544907, "grad_norm": 0.3518314369528161, "learning_rate": 2.7059181480682434e-05, "loss": 0.3787, "step": 2299 }, { "epoch": 0.28258999877134783, "grad_norm": 0.3565038100687294, "learning_rate": 2.7055354471085454e-05, "loss": 0.4237, "step": 2300 }, { "epoch": 0.28271286398820494, "grad_norm": 0.3487835101249601, "learning_rate": 2.70515252439842e-05, "loss": 0.4228, "step": 2301 }, { "epoch": 0.28283572920506206, "grad_norm": 0.3809745788721226, "learning_rate": 2.704769380008304e-05, "loss": 0.3744, "step": 2302 }, { "epoch": 0.2829585944219192, "grad_norm": 0.4375295276129646, "learning_rate": 2.7043860140086728e-05, "loss": 0.3743, "step": 2303 }, { "epoch": 0.2830814596387763, "grad_norm": 0.4275111977641847, "learning_rate": 2.7040024264700457e-05, "loss": 0.4233, "step": 2304 }, { "epoch": 0.28320432485563335, "grad_norm": 0.4008958417395002, "learning_rate": 2.70361861746298e-05, "loss": 0.347, "step": 2305 }, { "epoch": 0.28332719007249046, "grad_norm": 0.3711426161085889, "learning_rate": 2.7032345870580756e-05, "loss": 0.4179, "step": 2306 }, { "epoch": 0.2834500552893476, "grad_norm": 0.35315858171083236, "learning_rate": 2.7028503353259728e-05, "loss": 0.3878, "step": 2307 }, { "epoch": 0.2835729205062047, "grad_norm": 0.34466141128454497, "learning_rate": 2.702465862337351e-05, "loss": 0.3822, "step": 2308 }, { "epoch": 0.2836957857230618, "grad_norm": 0.3502733446196268, "learning_rate": 2.7020811681629318e-05, "loss": 0.4113, "step": 2309 }, { "epoch": 0.2838186509399189, "grad_norm": 0.3704505118140841, "learning_rate": 2.701696252873478e-05, "loss": 0.443, "step": 2310 }, { "epoch": 0.28394151615677604, "grad_norm": 0.3853438122653751, "learning_rate": 2.7013111165397912e-05, "loss": 0.3681, "step": 2311 }, { "epoch": 0.2840643813736331, "grad_norm": 0.3404433696122941, "learning_rate": 2.700925759232716e-05, "loss": 0.3661, "step": 2312 }, { "epoch": 0.2841872465904902, "grad_norm": 0.347427060022441, "learning_rate": 2.700540181023135e-05, "loss": 0.3904, "step": 2313 }, { "epoch": 0.28431011180734733, "grad_norm": 0.33684795101234294, "learning_rate": 2.700154381981974e-05, "loss": 0.4467, "step": 2314 }, { "epoch": 0.28443297702420445, "grad_norm": 0.43000929648495134, "learning_rate": 2.699768362180197e-05, "loss": 0.4122, "step": 2315 }, { "epoch": 0.28455584224106156, "grad_norm": 0.4064397957619612, "learning_rate": 2.6993821216888115e-05, "loss": 0.4051, "step": 2316 }, { "epoch": 0.2846787074579187, "grad_norm": 0.45295570731277346, "learning_rate": 2.6989956605788623e-05, "loss": 0.489, "step": 2317 }, { "epoch": 0.2848015726747758, "grad_norm": 0.38264334550592466, "learning_rate": 2.6986089789214376e-05, "loss": 0.4483, "step": 2318 }, { "epoch": 0.28492443789163285, "grad_norm": 0.35048007348337534, "learning_rate": 2.698222076787664e-05, "loss": 0.3604, "step": 2319 }, { "epoch": 0.28504730310848997, "grad_norm": 0.3620565988516294, "learning_rate": 2.6978349542487102e-05, "loss": 0.3822, "step": 2320 }, { "epoch": 0.2851701683253471, "grad_norm": 0.45808933604055624, "learning_rate": 2.6974476113757855e-05, "loss": 0.4101, "step": 2321 }, { "epoch": 0.2852930335422042, "grad_norm": 0.3490723886037321, "learning_rate": 2.6970600482401373e-05, "loss": 0.4226, "step": 2322 }, { "epoch": 0.2854158987590613, "grad_norm": 0.3423238322259756, "learning_rate": 2.696672264913057e-05, "loss": 0.3322, "step": 2323 }, { "epoch": 0.28553876397591843, "grad_norm": 0.45014540716777635, "learning_rate": 2.6962842614658742e-05, "loss": 0.42, "step": 2324 }, { "epoch": 0.28566162919277555, "grad_norm": 0.4045753068225998, "learning_rate": 2.6958960379699596e-05, "loss": 0.4418, "step": 2325 }, { "epoch": 0.2857844944096326, "grad_norm": 0.45937086491865875, "learning_rate": 2.695507594496725e-05, "loss": 0.4467, "step": 2326 }, { "epoch": 0.2859073596264897, "grad_norm": 0.4055794353747232, "learning_rate": 2.695118931117621e-05, "loss": 0.4503, "step": 2327 }, { "epoch": 0.28603022484334684, "grad_norm": 0.2868062822361422, "learning_rate": 2.69473004790414e-05, "loss": 0.3618, "step": 2328 }, { "epoch": 0.28615309006020395, "grad_norm": 0.3742790403725525, "learning_rate": 2.6943409449278152e-05, "loss": 0.4128, "step": 2329 }, { "epoch": 0.28627595527706107, "grad_norm": 0.3389463203818989, "learning_rate": 2.693951622260219e-05, "loss": 0.4198, "step": 2330 }, { "epoch": 0.2863988204939182, "grad_norm": 0.35073483447663917, "learning_rate": 2.6935620799729652e-05, "loss": 0.3286, "step": 2331 }, { "epoch": 0.2865216857107753, "grad_norm": 0.3079737715766743, "learning_rate": 2.6931723181377067e-05, "loss": 0.4612, "step": 2332 }, { "epoch": 0.2866445509276324, "grad_norm": 0.2704363057026144, "learning_rate": 2.692782336826139e-05, "loss": 0.4093, "step": 2333 }, { "epoch": 0.2867674161444895, "grad_norm": 0.4317550974022478, "learning_rate": 2.6923921361099953e-05, "loss": 0.4899, "step": 2334 }, { "epoch": 0.2868902813613466, "grad_norm": 0.344759235196104, "learning_rate": 2.6920017160610514e-05, "loss": 0.3549, "step": 2335 }, { "epoch": 0.2870131465782037, "grad_norm": 0.3709895379358881, "learning_rate": 2.6916110767511223e-05, "loss": 0.435, "step": 2336 }, { "epoch": 0.2871360117950608, "grad_norm": 0.3102920512981143, "learning_rate": 2.6912202182520637e-05, "loss": 0.4056, "step": 2337 }, { "epoch": 0.28725887701191793, "grad_norm": 0.3750670013691792, "learning_rate": 2.6908291406357714e-05, "loss": 0.3617, "step": 2338 }, { "epoch": 0.28738174222877505, "grad_norm": 0.3326874685218836, "learning_rate": 2.6904378439741806e-05, "loss": 0.3671, "step": 2339 }, { "epoch": 0.28750460744563217, "grad_norm": 0.39139102310861046, "learning_rate": 2.690046328339269e-05, "loss": 0.4143, "step": 2340 }, { "epoch": 0.2876274726624892, "grad_norm": 0.37268310091074447, "learning_rate": 2.6896545938030532e-05, "loss": 0.3825, "step": 2341 }, { "epoch": 0.28775033787934634, "grad_norm": 0.3805225335988383, "learning_rate": 2.68926264043759e-05, "loss": 0.383, "step": 2342 }, { "epoch": 0.28787320309620346, "grad_norm": 0.46683709700927467, "learning_rate": 2.6888704683149768e-05, "loss": 0.4777, "step": 2343 }, { "epoch": 0.28799606831306057, "grad_norm": 0.30701232405091244, "learning_rate": 2.6884780775073513e-05, "loss": 0.3977, "step": 2344 }, { "epoch": 0.2881189335299177, "grad_norm": 0.3465812540179129, "learning_rate": 2.6880854680868905e-05, "loss": 0.5206, "step": 2345 }, { "epoch": 0.2882417987467748, "grad_norm": 0.36545419076730457, "learning_rate": 2.687692640125813e-05, "loss": 0.4337, "step": 2346 }, { "epoch": 0.2883646639636319, "grad_norm": 0.35551377110817967, "learning_rate": 2.687299593696377e-05, "loss": 0.4328, "step": 2347 }, { "epoch": 0.288487529180489, "grad_norm": 0.4366758863904531, "learning_rate": 2.6869063288708807e-05, "loss": 0.37, "step": 2348 }, { "epoch": 0.2886103943973461, "grad_norm": 0.38538705527561223, "learning_rate": 2.6865128457216623e-05, "loss": 0.4769, "step": 2349 }, { "epoch": 0.2887332596142032, "grad_norm": 0.3795533992987412, "learning_rate": 2.6861191443211008e-05, "loss": 0.3712, "step": 2350 }, { "epoch": 0.2888561248310603, "grad_norm": 0.5466342844617295, "learning_rate": 2.685725224741615e-05, "loss": 0.4632, "step": 2351 }, { "epoch": 0.28897899004791744, "grad_norm": 0.36632562258500095, "learning_rate": 2.6853310870556638e-05, "loss": 0.4224, "step": 2352 }, { "epoch": 0.28910185526477455, "grad_norm": 0.4113814114418391, "learning_rate": 2.6849367313357458e-05, "loss": 0.3895, "step": 2353 }, { "epoch": 0.28922472048163167, "grad_norm": 0.39689048503743385, "learning_rate": 2.6845421576544017e-05, "loss": 0.4611, "step": 2354 }, { "epoch": 0.2893475856984888, "grad_norm": 0.333346164771273, "learning_rate": 2.6841473660842088e-05, "loss": 0.3961, "step": 2355 }, { "epoch": 0.28947045091534584, "grad_norm": 0.37058262220218763, "learning_rate": 2.6837523566977876e-05, "loss": 0.3935, "step": 2356 }, { "epoch": 0.28959331613220296, "grad_norm": 0.366852986957973, "learning_rate": 2.6833571295677976e-05, "loss": 0.4679, "step": 2357 }, { "epoch": 0.2897161813490601, "grad_norm": 0.3253708687636887, "learning_rate": 2.6829616847669372e-05, "loss": 0.3711, "step": 2358 }, { "epoch": 0.2898390465659172, "grad_norm": 0.3976645539198819, "learning_rate": 2.6825660223679477e-05, "loss": 0.4823, "step": 2359 }, { "epoch": 0.2899619117827743, "grad_norm": 0.36556990110127474, "learning_rate": 2.682170142443607e-05, "loss": 0.5126, "step": 2360 }, { "epoch": 0.2900847769996314, "grad_norm": 0.4055853241816385, "learning_rate": 2.681774045066735e-05, "loss": 0.5519, "step": 2361 }, { "epoch": 0.29020764221648854, "grad_norm": 0.3645770441473277, "learning_rate": 2.6813777303101922e-05, "loss": 0.4473, "step": 2362 }, { "epoch": 0.2903305074333456, "grad_norm": 0.37567175050598595, "learning_rate": 2.6809811982468768e-05, "loss": 0.4224, "step": 2363 }, { "epoch": 0.2904533726502027, "grad_norm": 0.43405535932918804, "learning_rate": 2.680584448949729e-05, "loss": 0.5024, "step": 2364 }, { "epoch": 0.29057623786705983, "grad_norm": 0.3861632608716002, "learning_rate": 2.6801874824917287e-05, "loss": 0.4074, "step": 2365 }, { "epoch": 0.29069910308391694, "grad_norm": 0.5025563942812319, "learning_rate": 2.6797902989458944e-05, "loss": 0.3832, "step": 2366 }, { "epoch": 0.29082196830077406, "grad_norm": 0.29792833967524224, "learning_rate": 2.679392898385286e-05, "loss": 0.3957, "step": 2367 }, { "epoch": 0.2909448335176312, "grad_norm": 0.4307860043465109, "learning_rate": 2.678995280883002e-05, "loss": 0.3575, "step": 2368 }, { "epoch": 0.2910676987344883, "grad_norm": 0.3833191417015838, "learning_rate": 2.6785974465121827e-05, "loss": 0.3678, "step": 2369 }, { "epoch": 0.29119056395134535, "grad_norm": 0.4423983226255351, "learning_rate": 2.678199395346006e-05, "loss": 0.4442, "step": 2370 }, { "epoch": 0.29131342916820246, "grad_norm": 0.4285669632772031, "learning_rate": 2.677801127457692e-05, "loss": 0.368, "step": 2371 }, { "epoch": 0.2914362943850596, "grad_norm": 0.45499265614765866, "learning_rate": 2.6774026429204987e-05, "loss": 0.5271, "step": 2372 }, { "epoch": 0.2915591596019167, "grad_norm": 0.4042914097307531, "learning_rate": 2.677003941807725e-05, "loss": 0.4257, "step": 2373 }, { "epoch": 0.2916820248187738, "grad_norm": 0.41879034147518973, "learning_rate": 2.6766050241927095e-05, "loss": 0.3953, "step": 2374 }, { "epoch": 0.2918048900356309, "grad_norm": 0.42226362435881826, "learning_rate": 2.6762058901488303e-05, "loss": 0.5028, "step": 2375 }, { "epoch": 0.29192775525248804, "grad_norm": 0.34445540651167655, "learning_rate": 2.6758065397495057e-05, "loss": 0.3266, "step": 2376 }, { "epoch": 0.2920506204693451, "grad_norm": 0.4811812366005368, "learning_rate": 2.675406973068193e-05, "loss": 0.451, "step": 2377 }, { "epoch": 0.2921734856862022, "grad_norm": 0.3745777714096808, "learning_rate": 2.6750071901783907e-05, "loss": 0.386, "step": 2378 }, { "epoch": 0.29229635090305933, "grad_norm": 0.41068175420081315, "learning_rate": 2.6746071911536358e-05, "loss": 0.3797, "step": 2379 }, { "epoch": 0.29241921611991645, "grad_norm": 0.34691399970445597, "learning_rate": 2.674206976067506e-05, "loss": 0.4085, "step": 2380 }, { "epoch": 0.29254208133677356, "grad_norm": 0.3366370443898277, "learning_rate": 2.6738065449936178e-05, "loss": 0.3903, "step": 2381 }, { "epoch": 0.2926649465536307, "grad_norm": 0.3407795616277181, "learning_rate": 2.673405898005628e-05, "loss": 0.3512, "step": 2382 }, { "epoch": 0.2927878117704878, "grad_norm": 0.3884019556643177, "learning_rate": 2.673005035177233e-05, "loss": 0.4526, "step": 2383 }, { "epoch": 0.2929106769873449, "grad_norm": 0.37823027127318687, "learning_rate": 2.6726039565821686e-05, "loss": 0.4056, "step": 2384 }, { "epoch": 0.29303354220420197, "grad_norm": 0.34676233410777085, "learning_rate": 2.6722026622942118e-05, "loss": 0.4415, "step": 2385 }, { "epoch": 0.2931564074210591, "grad_norm": 0.3801637564808691, "learning_rate": 2.6718011523871766e-05, "loss": 0.3883, "step": 2386 }, { "epoch": 0.2932792726379162, "grad_norm": 0.31572333634145744, "learning_rate": 2.6713994269349195e-05, "loss": 0.398, "step": 2387 }, { "epoch": 0.2934021378547733, "grad_norm": 0.3433868575992696, "learning_rate": 2.670997486011334e-05, "loss": 0.4259, "step": 2388 }, { "epoch": 0.29352500307163043, "grad_norm": 0.4247628838533511, "learning_rate": 2.6705953296903554e-05, "loss": 0.4701, "step": 2389 }, { "epoch": 0.29364786828848755, "grad_norm": 0.3811902631175229, "learning_rate": 2.670192958045957e-05, "loss": 0.4209, "step": 2390 }, { "epoch": 0.29377073350534466, "grad_norm": 0.40833453343558385, "learning_rate": 2.669790371152154e-05, "loss": 0.4297, "step": 2391 }, { "epoch": 0.2938935987222017, "grad_norm": 0.4322761278122871, "learning_rate": 2.6693875690829982e-05, "loss": 0.3879, "step": 2392 }, { "epoch": 0.29401646393905884, "grad_norm": 0.5125505862780939, "learning_rate": 2.668984551912582e-05, "loss": 0.429, "step": 2393 }, { "epoch": 0.29413932915591595, "grad_norm": 0.39823270128624894, "learning_rate": 2.6685813197150395e-05, "loss": 0.4584, "step": 2394 }, { "epoch": 0.29426219437277307, "grad_norm": 0.3452501585659651, "learning_rate": 2.6681778725645414e-05, "loss": 0.4295, "step": 2395 }, { "epoch": 0.2943850595896302, "grad_norm": 0.42994658672791725, "learning_rate": 2.6677742105352994e-05, "loss": 0.381, "step": 2396 }, { "epoch": 0.2945079248064873, "grad_norm": 0.34830797501909067, "learning_rate": 2.667370333701565e-05, "loss": 0.4517, "step": 2397 }, { "epoch": 0.2946307900233444, "grad_norm": 0.42068412763182667, "learning_rate": 2.6669662421376278e-05, "loss": 0.4212, "step": 2398 }, { "epoch": 0.2947536552402015, "grad_norm": 0.3136712509636505, "learning_rate": 2.6665619359178192e-05, "loss": 0.355, "step": 2399 }, { "epoch": 0.2948765204570586, "grad_norm": 0.3629713289407376, "learning_rate": 2.6661574151165072e-05, "loss": 0.3617, "step": 2400 }, { "epoch": 0.2949993856739157, "grad_norm": 0.34839229190345494, "learning_rate": 2.665752679808102e-05, "loss": 0.3563, "step": 2401 }, { "epoch": 0.2951222508907728, "grad_norm": 0.3939701562258444, "learning_rate": 2.6653477300670515e-05, "loss": 0.3944, "step": 2402 }, { "epoch": 0.29524511610762993, "grad_norm": 0.3539997091223303, "learning_rate": 2.6649425659678435e-05, "loss": 0.3608, "step": 2403 }, { "epoch": 0.29536798132448705, "grad_norm": 0.3638821448415961, "learning_rate": 2.664537187585005e-05, "loss": 0.3793, "step": 2404 }, { "epoch": 0.29549084654134417, "grad_norm": 0.4817360603159778, "learning_rate": 2.6641315949931034e-05, "loss": 0.4478, "step": 2405 }, { "epoch": 0.2956137117582013, "grad_norm": 0.4065860494024764, "learning_rate": 2.6637257882667446e-05, "loss": 0.3613, "step": 2406 }, { "epoch": 0.29573657697505834, "grad_norm": 0.34939536698121254, "learning_rate": 2.663319767480574e-05, "loss": 0.3907, "step": 2407 }, { "epoch": 0.29585944219191546, "grad_norm": 0.38642048491050457, "learning_rate": 2.6629135327092772e-05, "loss": 0.3638, "step": 2408 }, { "epoch": 0.29598230740877257, "grad_norm": 0.33956719611472214, "learning_rate": 2.6625070840275767e-05, "loss": 0.3428, "step": 2409 }, { "epoch": 0.2961051726256297, "grad_norm": 0.361611333608016, "learning_rate": 2.662100421510238e-05, "loss": 0.3846, "step": 2410 }, { "epoch": 0.2962280378424868, "grad_norm": 0.31321557901701197, "learning_rate": 2.6616935452320634e-05, "loss": 0.4696, "step": 2411 }, { "epoch": 0.2963509030593439, "grad_norm": 0.42403627205881317, "learning_rate": 2.661286455267894e-05, "loss": 0.4207, "step": 2412 }, { "epoch": 0.29647376827620103, "grad_norm": 0.3580399158567739, "learning_rate": 2.6608791516926133e-05, "loss": 0.3842, "step": 2413 }, { "epoch": 0.2965966334930581, "grad_norm": 0.40994533768541563, "learning_rate": 2.660471634581141e-05, "loss": 0.4886, "step": 2414 }, { "epoch": 0.2967194987099152, "grad_norm": 0.37858849468131245, "learning_rate": 2.660063904008437e-05, "loss": 0.3685, "step": 2415 }, { "epoch": 0.2968423639267723, "grad_norm": 0.4431025851410872, "learning_rate": 2.659655960049502e-05, "loss": 0.4536, "step": 2416 }, { "epoch": 0.29696522914362944, "grad_norm": 0.3260986304132778, "learning_rate": 2.6592478027793732e-05, "loss": 0.4632, "step": 2417 }, { "epoch": 0.29708809436048655, "grad_norm": 0.39139558813352976, "learning_rate": 2.658839432273129e-05, "loss": 0.4129, "step": 2418 }, { "epoch": 0.29721095957734367, "grad_norm": 0.3135510808419369, "learning_rate": 2.6584308486058866e-05, "loss": 0.4429, "step": 2419 }, { "epoch": 0.2973338247942008, "grad_norm": 0.41971218056135984, "learning_rate": 2.6580220518528025e-05, "loss": 0.4701, "step": 2420 }, { "epoch": 0.29745669001105784, "grad_norm": 0.3919228548827828, "learning_rate": 2.657613042089072e-05, "loss": 0.3964, "step": 2421 }, { "epoch": 0.29757955522791496, "grad_norm": 0.3579328965759265, "learning_rate": 2.6572038193899296e-05, "loss": 0.3866, "step": 2422 }, { "epoch": 0.2977024204447721, "grad_norm": 0.358632299102405, "learning_rate": 2.6567943838306497e-05, "loss": 0.3843, "step": 2423 }, { "epoch": 0.2978252856616292, "grad_norm": 0.3367828229616208, "learning_rate": 2.6563847354865443e-05, "loss": 0.371, "step": 2424 }, { "epoch": 0.2979481508784863, "grad_norm": 0.4989134003361556, "learning_rate": 2.655974874432967e-05, "loss": 0.4054, "step": 2425 }, { "epoch": 0.2980710160953434, "grad_norm": 0.40221413699393954, "learning_rate": 2.655564800745308e-05, "loss": 0.366, "step": 2426 }, { "epoch": 0.29819388131220054, "grad_norm": 0.4271128513067987, "learning_rate": 2.655154514498998e-05, "loss": 0.3998, "step": 2427 }, { "epoch": 0.2983167465290576, "grad_norm": 0.4532952434272698, "learning_rate": 2.654744015769506e-05, "loss": 0.3878, "step": 2428 }, { "epoch": 0.2984396117459147, "grad_norm": 0.3614091121584845, "learning_rate": 2.6543333046323416e-05, "loss": 0.4053, "step": 2429 }, { "epoch": 0.29856247696277183, "grad_norm": 0.35184647455436957, "learning_rate": 2.653922381163052e-05, "loss": 0.3897, "step": 2430 }, { "epoch": 0.29868534217962894, "grad_norm": 0.3097735127222909, "learning_rate": 2.6535112454372236e-05, "loss": 0.3938, "step": 2431 }, { "epoch": 0.29880820739648606, "grad_norm": 0.363192730828218, "learning_rate": 2.6530998975304823e-05, "loss": 0.3995, "step": 2432 }, { "epoch": 0.2989310726133432, "grad_norm": 0.40830533004630976, "learning_rate": 2.652688337518493e-05, "loss": 0.322, "step": 2433 }, { "epoch": 0.2990539378302003, "grad_norm": 0.35883342417509806, "learning_rate": 2.65227656547696e-05, "loss": 0.4121, "step": 2434 }, { "epoch": 0.2991768030470574, "grad_norm": 0.3125968044398677, "learning_rate": 2.651864581481625e-05, "loss": 0.401, "step": 2435 }, { "epoch": 0.29929966826391446, "grad_norm": 0.46059536758504266, "learning_rate": 2.6514523856082703e-05, "loss": 0.4934, "step": 2436 }, { "epoch": 0.2994225334807716, "grad_norm": 0.39644310183171405, "learning_rate": 2.651039977932717e-05, "loss": 0.4008, "step": 2437 }, { "epoch": 0.2995453986976287, "grad_norm": 0.45793289448212665, "learning_rate": 2.6506273585308247e-05, "loss": 0.3559, "step": 2438 }, { "epoch": 0.2996682639144858, "grad_norm": 0.3716265881453556, "learning_rate": 2.6502145274784916e-05, "loss": 0.4443, "step": 2439 }, { "epoch": 0.2997911291313429, "grad_norm": 0.30726277374156286, "learning_rate": 2.6498014848516557e-05, "loss": 0.3479, "step": 2440 }, { "epoch": 0.29991399434820004, "grad_norm": 0.33659172981530844, "learning_rate": 2.649388230726293e-05, "loss": 0.3438, "step": 2441 }, { "epoch": 0.30003685956505716, "grad_norm": 0.38217974983190917, "learning_rate": 2.6489747651784196e-05, "loss": 0.5778, "step": 2442 }, { "epoch": 0.3001597247819142, "grad_norm": 0.3844134435861792, "learning_rate": 2.6485610882840892e-05, "loss": 0.447, "step": 2443 }, { "epoch": 0.30028258999877133, "grad_norm": 0.41964278347024525, "learning_rate": 2.6481472001193958e-05, "loss": 0.4764, "step": 2444 }, { "epoch": 0.30040545521562845, "grad_norm": 0.3993555540970638, "learning_rate": 2.647733100760471e-05, "loss": 0.3396, "step": 2445 }, { "epoch": 0.30052832043248556, "grad_norm": 0.34161710300327053, "learning_rate": 2.6473187902834848e-05, "loss": 0.4667, "step": 2446 }, { "epoch": 0.3006511856493427, "grad_norm": 0.37339811615138824, "learning_rate": 2.646904268764648e-05, "loss": 0.4043, "step": 2447 }, { "epoch": 0.3007740508661998, "grad_norm": 0.35687044710476273, "learning_rate": 2.6464895362802095e-05, "loss": 0.3848, "step": 2448 }, { "epoch": 0.3008969160830569, "grad_norm": 0.3708692388472157, "learning_rate": 2.6460745929064553e-05, "loss": 0.4021, "step": 2449 }, { "epoch": 0.30101978129991397, "grad_norm": 0.409885058740716, "learning_rate": 2.645659438719713e-05, "loss": 0.3649, "step": 2450 }, { "epoch": 0.3011426465167711, "grad_norm": 0.48636467252138027, "learning_rate": 2.6452440737963463e-05, "loss": 0.5345, "step": 2451 }, { "epoch": 0.3012655117336282, "grad_norm": 0.3129384433149311, "learning_rate": 2.6448284982127596e-05, "loss": 0.3996, "step": 2452 }, { "epoch": 0.3013883769504853, "grad_norm": 0.41990172359675604, "learning_rate": 2.6444127120453957e-05, "loss": 0.434, "step": 2453 }, { "epoch": 0.30151124216734243, "grad_norm": 0.3303002472352907, "learning_rate": 2.643996715370734e-05, "loss": 0.4453, "step": 2454 }, { "epoch": 0.30163410738419955, "grad_norm": 0.3524139677795965, "learning_rate": 2.6435805082652966e-05, "loss": 0.3376, "step": 2455 }, { "epoch": 0.30175697260105666, "grad_norm": 0.3322181367185029, "learning_rate": 2.6431640908056408e-05, "loss": 0.3761, "step": 2456 }, { "epoch": 0.3018798378179137, "grad_norm": 0.41114473739105384, "learning_rate": 2.6427474630683636e-05, "loss": 0.4684, "step": 2457 }, { "epoch": 0.30200270303477084, "grad_norm": 0.46562895592514886, "learning_rate": 2.642330625130102e-05, "loss": 0.4078, "step": 2458 }, { "epoch": 0.30212556825162795, "grad_norm": 0.3663033096687842, "learning_rate": 2.6419135770675304e-05, "loss": 0.3906, "step": 2459 }, { "epoch": 0.30224843346848507, "grad_norm": 0.3226012388369198, "learning_rate": 2.6414963189573616e-05, "loss": 0.3848, "step": 2460 }, { "epoch": 0.3023712986853422, "grad_norm": 0.398850230201826, "learning_rate": 2.641078850876348e-05, "loss": 0.4078, "step": 2461 }, { "epoch": 0.3024941639021993, "grad_norm": 0.4290021279302373, "learning_rate": 2.6406611729012796e-05, "loss": 0.4125, "step": 2462 }, { "epoch": 0.3026170291190564, "grad_norm": 0.34052036048952466, "learning_rate": 2.6402432851089863e-05, "loss": 0.4887, "step": 2463 }, { "epoch": 0.30273989433591353, "grad_norm": 0.2954982988087534, "learning_rate": 2.639825187576335e-05, "loss": 0.3438, "step": 2464 }, { "epoch": 0.3028627595527706, "grad_norm": 0.35567232265187965, "learning_rate": 2.6394068803802328e-05, "loss": 0.4074, "step": 2465 }, { "epoch": 0.3029856247696277, "grad_norm": 0.33683654097213117, "learning_rate": 2.6389883635976243e-05, "loss": 0.4293, "step": 2466 }, { "epoch": 0.3031084899864848, "grad_norm": 0.3076152210453901, "learning_rate": 2.6385696373054926e-05, "loss": 0.3376, "step": 2467 }, { "epoch": 0.30323135520334193, "grad_norm": 0.3909642545920544, "learning_rate": 2.6381507015808603e-05, "loss": 0.4965, "step": 2468 }, { "epoch": 0.30335422042019905, "grad_norm": 0.3116026975336749, "learning_rate": 2.6377315565007876e-05, "loss": 0.4329, "step": 2469 }, { "epoch": 0.30347708563705617, "grad_norm": 0.3667969268539343, "learning_rate": 2.6373122021423733e-05, "loss": 0.4255, "step": 2470 }, { "epoch": 0.3035999508539133, "grad_norm": 0.31183045913331964, "learning_rate": 2.6368926385827548e-05, "loss": 0.4396, "step": 2471 }, { "epoch": 0.30372281607077034, "grad_norm": 0.33820249963527843, "learning_rate": 2.6364728658991093e-05, "loss": 0.4152, "step": 2472 }, { "epoch": 0.30384568128762746, "grad_norm": 0.3535650369875115, "learning_rate": 2.63605288416865e-05, "loss": 0.431, "step": 2473 }, { "epoch": 0.30396854650448457, "grad_norm": 0.3983581281657848, "learning_rate": 2.6356326934686303e-05, "loss": 0.4603, "step": 2474 }, { "epoch": 0.3040914117213417, "grad_norm": 0.3823932137055082, "learning_rate": 2.6352122938763412e-05, "loss": 0.4224, "step": 2475 }, { "epoch": 0.3042142769381988, "grad_norm": 0.3391538991544692, "learning_rate": 2.634791685469113e-05, "loss": 0.4302, "step": 2476 }, { "epoch": 0.3043371421550559, "grad_norm": 0.3621398652527848, "learning_rate": 2.6343708683243137e-05, "loss": 0.375, "step": 2477 }, { "epoch": 0.30446000737191303, "grad_norm": 0.3660740724410321, "learning_rate": 2.6339498425193496e-05, "loss": 0.3372, "step": 2478 }, { "epoch": 0.3045828725887701, "grad_norm": 0.3371313742620008, "learning_rate": 2.633528608131666e-05, "loss": 0.3929, "step": 2479 }, { "epoch": 0.3047057378056272, "grad_norm": 0.3622749848717423, "learning_rate": 2.6331071652387463e-05, "loss": 0.3951, "step": 2480 }, { "epoch": 0.3048286030224843, "grad_norm": 0.34408634752692185, "learning_rate": 2.6326855139181117e-05, "loss": 0.4153, "step": 2481 }, { "epoch": 0.30495146823934144, "grad_norm": 0.38043528823455103, "learning_rate": 2.6322636542473228e-05, "loss": 0.4734, "step": 2482 }, { "epoch": 0.30507433345619855, "grad_norm": 0.3330099008455, "learning_rate": 2.631841586303978e-05, "loss": 0.348, "step": 2483 }, { "epoch": 0.30519719867305567, "grad_norm": 0.35388504479881866, "learning_rate": 2.6314193101657124e-05, "loss": 0.4178, "step": 2484 }, { "epoch": 0.3053200638899128, "grad_norm": 0.41668479926369895, "learning_rate": 2.6309968259102032e-05, "loss": 0.4514, "step": 2485 }, { "epoch": 0.3054429291067699, "grad_norm": 0.4796267276338137, "learning_rate": 2.630574133615163e-05, "loss": 0.4341, "step": 2486 }, { "epoch": 0.30556579432362696, "grad_norm": 0.31036717970150673, "learning_rate": 2.630151233358342e-05, "loss": 0.3865, "step": 2487 }, { "epoch": 0.3056886595404841, "grad_norm": 0.42482211359991995, "learning_rate": 2.6297281252175316e-05, "loss": 0.3423, "step": 2488 }, { "epoch": 0.3058115247573412, "grad_norm": 0.3052977786539593, "learning_rate": 2.6293048092705586e-05, "loss": 0.3034, "step": 2489 }, { "epoch": 0.3059343899741983, "grad_norm": 0.3733720760321401, "learning_rate": 2.62888128559529e-05, "loss": 0.505, "step": 2490 }, { "epoch": 0.3060572551910554, "grad_norm": 0.4118915919668245, "learning_rate": 2.6284575542696297e-05, "loss": 0.4221, "step": 2491 }, { "epoch": 0.30618012040791254, "grad_norm": 0.3631942571187445, "learning_rate": 2.628033615371521e-05, "loss": 0.3852, "step": 2492 }, { "epoch": 0.30630298562476965, "grad_norm": 0.43559538459804475, "learning_rate": 2.627609468978944e-05, "loss": 0.4133, "step": 2493 }, { "epoch": 0.3064258508416267, "grad_norm": 0.6799462698790493, "learning_rate": 2.6271851151699184e-05, "loss": 0.4683, "step": 2494 }, { "epoch": 0.30654871605848383, "grad_norm": 0.39532813827696545, "learning_rate": 2.626760554022501e-05, "loss": 0.3954, "step": 2495 }, { "epoch": 0.30667158127534094, "grad_norm": 0.39328473283064913, "learning_rate": 2.626335785614786e-05, "loss": 0.3973, "step": 2496 }, { "epoch": 0.30679444649219806, "grad_norm": 0.46924937008058076, "learning_rate": 2.6259108100249086e-05, "loss": 0.4577, "step": 2497 }, { "epoch": 0.3069173117090552, "grad_norm": 0.3755403861208075, "learning_rate": 2.6254856273310394e-05, "loss": 0.3237, "step": 2498 }, { "epoch": 0.3070401769259123, "grad_norm": 0.35585653547399443, "learning_rate": 2.6250602376113882e-05, "loss": 0.4577, "step": 2499 }, { "epoch": 0.3071630421427694, "grad_norm": 0.5037053630189562, "learning_rate": 2.6246346409442024e-05, "loss": 0.563, "step": 2500 }, { "epoch": 0.30728590735962646, "grad_norm": 0.36784401414601425, "learning_rate": 2.6242088374077676e-05, "loss": 0.369, "step": 2501 }, { "epoch": 0.3074087725764836, "grad_norm": 0.3661318859566203, "learning_rate": 2.623782827080408e-05, "loss": 0.4962, "step": 2502 }, { "epoch": 0.3075316377933407, "grad_norm": 0.3905053220236832, "learning_rate": 2.6233566100404856e-05, "loss": 0.403, "step": 2503 }, { "epoch": 0.3076545030101978, "grad_norm": 0.34425507950154943, "learning_rate": 2.6229301863664e-05, "loss": 0.3532, "step": 2504 }, { "epoch": 0.3077773682270549, "grad_norm": 0.4014739938185765, "learning_rate": 2.6225035561365888e-05, "loss": 0.3955, "step": 2505 }, { "epoch": 0.30790023344391204, "grad_norm": 0.3380498957022737, "learning_rate": 2.6220767194295285e-05, "loss": 0.4053, "step": 2506 }, { "epoch": 0.30802309866076916, "grad_norm": 0.40298245216640666, "learning_rate": 2.6216496763237324e-05, "loss": 0.5081, "step": 2507 }, { "epoch": 0.3081459638776262, "grad_norm": 0.3357133361825364, "learning_rate": 2.6212224268977527e-05, "loss": 0.3972, "step": 2508 }, { "epoch": 0.30826882909448333, "grad_norm": 0.4241599515281528, "learning_rate": 2.6207949712301787e-05, "loss": 0.4609, "step": 2509 }, { "epoch": 0.30839169431134045, "grad_norm": 0.32328936816123927, "learning_rate": 2.6203673093996385e-05, "loss": 0.4195, "step": 2510 }, { "epoch": 0.30851455952819756, "grad_norm": 0.35529830685977054, "learning_rate": 2.6199394414847975e-05, "loss": 0.4438, "step": 2511 }, { "epoch": 0.3086374247450547, "grad_norm": 0.31500231691327846, "learning_rate": 2.619511367564359e-05, "loss": 0.4236, "step": 2512 }, { "epoch": 0.3087602899619118, "grad_norm": 0.3094648860265501, "learning_rate": 2.6190830877170653e-05, "loss": 0.3674, "step": 2513 }, { "epoch": 0.3088831551787689, "grad_norm": 0.3570728779880329, "learning_rate": 2.618654602021695e-05, "loss": 0.4172, "step": 2514 }, { "epoch": 0.309006020395626, "grad_norm": 0.3167879679695941, "learning_rate": 2.6182259105570652e-05, "loss": 0.3246, "step": 2515 }, { "epoch": 0.3091288856124831, "grad_norm": 0.45610215635003437, "learning_rate": 2.6177970134020308e-05, "loss": 0.3897, "step": 2516 }, { "epoch": 0.3092517508293402, "grad_norm": 0.31374372950012425, "learning_rate": 2.6173679106354852e-05, "loss": 0.4363, "step": 2517 }, { "epoch": 0.3093746160461973, "grad_norm": 0.3397840456813493, "learning_rate": 2.616938602336359e-05, "loss": 0.4117, "step": 2518 }, { "epoch": 0.30949748126305443, "grad_norm": 0.3127545583225827, "learning_rate": 2.6165090885836208e-05, "loss": 0.3661, "step": 2519 }, { "epoch": 0.30962034647991155, "grad_norm": 0.3232795853370751, "learning_rate": 2.616079369456276e-05, "loss": 0.4665, "step": 2520 }, { "epoch": 0.30974321169676866, "grad_norm": 0.35091918603456007, "learning_rate": 2.6156494450333696e-05, "loss": 0.4132, "step": 2521 }, { "epoch": 0.3098660769136258, "grad_norm": 0.3624844602333186, "learning_rate": 2.6152193153939826e-05, "loss": 0.402, "step": 2522 }, { "epoch": 0.30998894213048284, "grad_norm": 0.3287969300552087, "learning_rate": 2.614788980617235e-05, "loss": 0.3981, "step": 2523 }, { "epoch": 0.31011180734733995, "grad_norm": 0.3721974770724443, "learning_rate": 2.6143584407822848e-05, "loss": 0.4225, "step": 2524 }, { "epoch": 0.31023467256419707, "grad_norm": 0.5232053219314503, "learning_rate": 2.6139276959683254e-05, "loss": 0.4744, "step": 2525 }, { "epoch": 0.3103575377810542, "grad_norm": 0.4086652313326835, "learning_rate": 2.6134967462545908e-05, "loss": 0.4641, "step": 2526 }, { "epoch": 0.3104804029979113, "grad_norm": 0.3719160086938702, "learning_rate": 2.6130655917203512e-05, "loss": 0.4514, "step": 2527 }, { "epoch": 0.3106032682147684, "grad_norm": 0.4441223845130375, "learning_rate": 2.6126342324449142e-05, "loss": 0.3987, "step": 2528 }, { "epoch": 0.31072613343162553, "grad_norm": 0.35164315055637696, "learning_rate": 2.6122026685076256e-05, "loss": 0.432, "step": 2529 }, { "epoch": 0.3108489986484826, "grad_norm": 0.4098876217843733, "learning_rate": 2.6117708999878695e-05, "loss": 0.4576, "step": 2530 }, { "epoch": 0.3109718638653397, "grad_norm": 0.4491777026857431, "learning_rate": 2.611338926965066e-05, "loss": 0.4774, "step": 2531 }, { "epoch": 0.3110947290821968, "grad_norm": 0.37944981304180586, "learning_rate": 2.6109067495186747e-05, "loss": 0.4471, "step": 2532 }, { "epoch": 0.31121759429905393, "grad_norm": 0.31694282113938677, "learning_rate": 2.6104743677281912e-05, "loss": 0.3334, "step": 2533 }, { "epoch": 0.31134045951591105, "grad_norm": 0.36382436225997666, "learning_rate": 2.610041781673149e-05, "loss": 0.4252, "step": 2534 }, { "epoch": 0.31146332473276817, "grad_norm": 0.3325620824118246, "learning_rate": 2.60960899143312e-05, "loss": 0.4065, "step": 2535 }, { "epoch": 0.3115861899496253, "grad_norm": 0.32275050540607164, "learning_rate": 2.6091759970877134e-05, "loss": 0.3948, "step": 2536 }, { "epoch": 0.3117090551664824, "grad_norm": 0.31687738148888484, "learning_rate": 2.6087427987165754e-05, "loss": 0.4142, "step": 2537 }, { "epoch": 0.31183192038333946, "grad_norm": 0.5042767918985579, "learning_rate": 2.6083093963993898e-05, "loss": 0.4594, "step": 2538 }, { "epoch": 0.31195478560019657, "grad_norm": 0.31362917433987775, "learning_rate": 2.6078757902158784e-05, "loss": 0.3663, "step": 2539 }, { "epoch": 0.3120776508170537, "grad_norm": 0.4279203819643912, "learning_rate": 2.6074419802458002e-05, "loss": 0.4236, "step": 2540 }, { "epoch": 0.3122005160339108, "grad_norm": 0.3258396857164655, "learning_rate": 2.6070079665689518e-05, "loss": 0.4126, "step": 2541 }, { "epoch": 0.3123233812507679, "grad_norm": 0.3707301728153853, "learning_rate": 2.6065737492651677e-05, "loss": 0.3328, "step": 2542 }, { "epoch": 0.31244624646762503, "grad_norm": 0.38525294314986563, "learning_rate": 2.606139328414318e-05, "loss": 0.4197, "step": 2543 }, { "epoch": 0.31256911168448215, "grad_norm": 0.35597611346532726, "learning_rate": 2.6057047040963127e-05, "loss": 0.5062, "step": 2544 }, { "epoch": 0.3126919769013392, "grad_norm": 0.31925373461045065, "learning_rate": 2.605269876391098e-05, "loss": 0.337, "step": 2545 }, { "epoch": 0.3128148421181963, "grad_norm": 0.3569907217905241, "learning_rate": 2.6048348453786576e-05, "loss": 0.4507, "step": 2546 }, { "epoch": 0.31293770733505344, "grad_norm": 0.3555192594639558, "learning_rate": 2.604399611139012e-05, "loss": 0.4062, "step": 2547 }, { "epoch": 0.31306057255191055, "grad_norm": 0.30489792181656045, "learning_rate": 2.60396417375222e-05, "loss": 0.3052, "step": 2548 }, { "epoch": 0.31318343776876767, "grad_norm": 0.3087084727527485, "learning_rate": 2.6035285332983783e-05, "loss": 0.3683, "step": 2549 }, { "epoch": 0.3133063029856248, "grad_norm": 0.35459725187643526, "learning_rate": 2.6030926898576196e-05, "loss": 0.3578, "step": 2550 }, { "epoch": 0.3134291682024819, "grad_norm": 0.3887742907242223, "learning_rate": 2.6026566435101143e-05, "loss": 0.412, "step": 2551 }, { "epoch": 0.31355203341933896, "grad_norm": 0.2957024195812244, "learning_rate": 2.60222039433607e-05, "loss": 0.3826, "step": 2552 }, { "epoch": 0.3136748986361961, "grad_norm": 0.4090172874499259, "learning_rate": 2.6017839424157322e-05, "loss": 0.4005, "step": 2553 }, { "epoch": 0.3137977638530532, "grad_norm": 0.40286945923698486, "learning_rate": 2.601347287829384e-05, "loss": 0.4724, "step": 2554 }, { "epoch": 0.3139206290699103, "grad_norm": 0.4201856373805626, "learning_rate": 2.6009104306573447e-05, "loss": 0.3938, "step": 2555 }, { "epoch": 0.3140434942867674, "grad_norm": 0.4293785258455791, "learning_rate": 2.600473370979971e-05, "loss": 0.3923, "step": 2556 }, { "epoch": 0.31416635950362454, "grad_norm": 0.34985794337632176, "learning_rate": 2.600036108877658e-05, "loss": 0.3374, "step": 2557 }, { "epoch": 0.31428922472048165, "grad_norm": 0.2980054791342795, "learning_rate": 2.5995986444308366e-05, "loss": 0.3546, "step": 2558 }, { "epoch": 0.3144120899373387, "grad_norm": 0.41177966244953057, "learning_rate": 2.5991609777199755e-05, "loss": 0.4517, "step": 2559 }, { "epoch": 0.3145349551541958, "grad_norm": 0.367767767326499, "learning_rate": 2.5987231088255807e-05, "loss": 0.3705, "step": 2560 }, { "epoch": 0.31465782037105294, "grad_norm": 0.42527577035384934, "learning_rate": 2.598285037828196e-05, "loss": 0.3646, "step": 2561 }, { "epoch": 0.31478068558791006, "grad_norm": 0.3248771909106964, "learning_rate": 2.5978467648084012e-05, "loss": 0.3767, "step": 2562 }, { "epoch": 0.3149035508047672, "grad_norm": 0.3199295280340809, "learning_rate": 2.5974082898468135e-05, "loss": 0.4097, "step": 2563 }, { "epoch": 0.3150264160216243, "grad_norm": 0.3986114884927368, "learning_rate": 2.5969696130240876e-05, "loss": 0.4594, "step": 2564 }, { "epoch": 0.3151492812384814, "grad_norm": 0.3761281170746406, "learning_rate": 2.596530734420916e-05, "loss": 0.3984, "step": 2565 }, { "epoch": 0.3152721464553385, "grad_norm": 0.36460729424679056, "learning_rate": 2.596091654118027e-05, "loss": 0.3948, "step": 2566 }, { "epoch": 0.3153950116721956, "grad_norm": 0.32107045143177926, "learning_rate": 2.5956523721961866e-05, "loss": 0.3359, "step": 2567 }, { "epoch": 0.3155178768890527, "grad_norm": 0.4015067686286183, "learning_rate": 2.5952128887361977e-05, "loss": 0.3825, "step": 2568 }, { "epoch": 0.3156407421059098, "grad_norm": 0.3638261794475553, "learning_rate": 2.5947732038189005e-05, "loss": 0.4246, "step": 2569 }, { "epoch": 0.3157636073227669, "grad_norm": 0.31017547055409467, "learning_rate": 2.5943333175251723e-05, "loss": 0.3282, "step": 2570 }, { "epoch": 0.31588647253962404, "grad_norm": 0.33016136065878776, "learning_rate": 2.5938932299359276e-05, "loss": 0.3352, "step": 2571 }, { "epoch": 0.31600933775648116, "grad_norm": 0.3302128676043254, "learning_rate": 2.5934529411321174e-05, "loss": 0.5025, "step": 2572 }, { "epoch": 0.31613220297333827, "grad_norm": 0.35293238991570025, "learning_rate": 2.59301245119473e-05, "loss": 0.4388, "step": 2573 }, { "epoch": 0.31625506819019533, "grad_norm": 0.2778220716747538, "learning_rate": 2.5925717602047903e-05, "loss": 0.3801, "step": 2574 }, { "epoch": 0.31637793340705245, "grad_norm": 0.4146390474493598, "learning_rate": 2.5921308682433613e-05, "loss": 0.3853, "step": 2575 }, { "epoch": 0.31650079862390956, "grad_norm": 0.3727896160343147, "learning_rate": 2.5916897753915415e-05, "loss": 0.4112, "step": 2576 }, { "epoch": 0.3166236638407667, "grad_norm": 0.3147110241807369, "learning_rate": 2.5912484817304675e-05, "loss": 0.4067, "step": 2577 }, { "epoch": 0.3167465290576238, "grad_norm": 0.32228401666496304, "learning_rate": 2.5908069873413123e-05, "loss": 0.4591, "step": 2578 }, { "epoch": 0.3168693942744809, "grad_norm": 0.32612172287663366, "learning_rate": 2.590365292305286e-05, "loss": 0.4049, "step": 2579 }, { "epoch": 0.316992259491338, "grad_norm": 0.33732522590697334, "learning_rate": 2.589923396703635e-05, "loss": 0.4437, "step": 2580 }, { "epoch": 0.3171151247081951, "grad_norm": 0.3969937073423355, "learning_rate": 2.5894813006176443e-05, "loss": 0.4099, "step": 2581 }, { "epoch": 0.3172379899250522, "grad_norm": 0.31955293203299506, "learning_rate": 2.5890390041286335e-05, "loss": 0.4045, "step": 2582 }, { "epoch": 0.3173608551419093, "grad_norm": 0.37288132510219096, "learning_rate": 2.5885965073179605e-05, "loss": 0.384, "step": 2583 }, { "epoch": 0.31748372035876643, "grad_norm": 0.3300361797836183, "learning_rate": 2.58815381026702e-05, "loss": 0.4384, "step": 2584 }, { "epoch": 0.31760658557562355, "grad_norm": 0.3601790115869198, "learning_rate": 2.5877109130572427e-05, "loss": 0.3428, "step": 2585 }, { "epoch": 0.31772945079248066, "grad_norm": 0.3220554579992287, "learning_rate": 2.587267815770097e-05, "loss": 0.4196, "step": 2586 }, { "epoch": 0.3178523160093378, "grad_norm": 0.3463619175574054, "learning_rate": 2.586824518487088e-05, "loss": 0.4763, "step": 2587 }, { "epoch": 0.3179751812261949, "grad_norm": 0.40226564306876794, "learning_rate": 2.586381021289757e-05, "loss": 0.4115, "step": 2588 }, { "epoch": 0.31809804644305195, "grad_norm": 0.3572006403817805, "learning_rate": 2.5859373242596827e-05, "loss": 0.3447, "step": 2589 }, { "epoch": 0.31822091165990907, "grad_norm": 0.303029479527945, "learning_rate": 2.58549342747848e-05, "loss": 0.3723, "step": 2590 }, { "epoch": 0.3183437768767662, "grad_norm": 0.3962972264278214, "learning_rate": 2.585049331027801e-05, "loss": 0.4137, "step": 2591 }, { "epoch": 0.3184666420936233, "grad_norm": 0.48441496359050856, "learning_rate": 2.5846050349893345e-05, "loss": 0.3976, "step": 2592 }, { "epoch": 0.3185895073104804, "grad_norm": 0.4050188330583398, "learning_rate": 2.584160539444806e-05, "loss": 0.397, "step": 2593 }, { "epoch": 0.31871237252733753, "grad_norm": 0.31857687932297307, "learning_rate": 2.5837158444759764e-05, "loss": 0.4465, "step": 2594 }, { "epoch": 0.31883523774419464, "grad_norm": 0.3516299149544377, "learning_rate": 2.583270950164646e-05, "loss": 0.3925, "step": 2595 }, { "epoch": 0.3189581029610517, "grad_norm": 0.3502128372599266, "learning_rate": 2.5828258565926497e-05, "loss": 0.4029, "step": 2596 }, { "epoch": 0.3190809681779088, "grad_norm": 0.38524524946349115, "learning_rate": 2.582380563841859e-05, "loss": 0.4101, "step": 2597 }, { "epoch": 0.31920383339476593, "grad_norm": 0.3930554149063493, "learning_rate": 2.5819350719941836e-05, "loss": 0.4658, "step": 2598 }, { "epoch": 0.31932669861162305, "grad_norm": 0.3633052245578209, "learning_rate": 2.5814893811315675e-05, "loss": 0.3814, "step": 2599 }, { "epoch": 0.31944956382848017, "grad_norm": 0.3883279368590656, "learning_rate": 2.5810434913359943e-05, "loss": 0.3911, "step": 2600 }, { "epoch": 0.3195724290453373, "grad_norm": 0.33440748801976145, "learning_rate": 2.580597402689481e-05, "loss": 0.3888, "step": 2601 }, { "epoch": 0.3196952942621944, "grad_norm": 0.3924283505138606, "learning_rate": 2.5801511152740837e-05, "loss": 0.3963, "step": 2602 }, { "epoch": 0.31981815947905146, "grad_norm": 0.4061306834871441, "learning_rate": 2.5797046291718943e-05, "loss": 0.4297, "step": 2603 }, { "epoch": 0.31994102469590857, "grad_norm": 0.4524253839427333, "learning_rate": 2.57925794446504e-05, "loss": 0.4698, "step": 2604 }, { "epoch": 0.3200638899127657, "grad_norm": 0.2983759732262898, "learning_rate": 2.578811061235686e-05, "loss": 0.362, "step": 2605 }, { "epoch": 0.3201867551296228, "grad_norm": 0.5091924229185497, "learning_rate": 2.5783639795660333e-05, "loss": 0.4418, "step": 2606 }, { "epoch": 0.3203096203464799, "grad_norm": 0.4482480716334209, "learning_rate": 2.57791669953832e-05, "loss": 0.496, "step": 2607 }, { "epoch": 0.32043248556333703, "grad_norm": 0.4474592977676661, "learning_rate": 2.577469221234821e-05, "loss": 0.3922, "step": 2608 }, { "epoch": 0.32055535078019415, "grad_norm": 0.555417366417709, "learning_rate": 2.5770215447378463e-05, "loss": 0.5244, "step": 2609 }, { "epoch": 0.3206782159970512, "grad_norm": 0.3824588502348266, "learning_rate": 2.5765736701297427e-05, "loss": 0.4245, "step": 2610 }, { "epoch": 0.3208010812139083, "grad_norm": 0.3753504404998376, "learning_rate": 2.576125597492895e-05, "loss": 0.4334, "step": 2611 }, { "epoch": 0.32092394643076544, "grad_norm": 0.3645629874615321, "learning_rate": 2.5756773269097217e-05, "loss": 0.365, "step": 2612 }, { "epoch": 0.32104681164762255, "grad_norm": 0.3504162356337515, "learning_rate": 2.5752288584626807e-05, "loss": 0.4197, "step": 2613 }, { "epoch": 0.32116967686447967, "grad_norm": 0.4003068175669233, "learning_rate": 2.574780192234264e-05, "loss": 0.4521, "step": 2614 }, { "epoch": 0.3212925420813368, "grad_norm": 0.36316516563233736, "learning_rate": 2.5743313283070015e-05, "loss": 0.3668, "step": 2615 }, { "epoch": 0.3214154072981939, "grad_norm": 0.32721809185726813, "learning_rate": 2.573882266763458e-05, "loss": 0.4179, "step": 2616 }, { "epoch": 0.321538272515051, "grad_norm": 0.4178601886844237, "learning_rate": 2.573433007686236e-05, "loss": 0.4453, "step": 2617 }, { "epoch": 0.3216611377319081, "grad_norm": 0.38388476103307007, "learning_rate": 2.572983551157974e-05, "loss": 0.3282, "step": 2618 }, { "epoch": 0.3217840029487652, "grad_norm": 0.31432408454094035, "learning_rate": 2.572533897261346e-05, "loss": 0.3432, "step": 2619 }, { "epoch": 0.3219068681656223, "grad_norm": 0.32168215272263256, "learning_rate": 2.5720840460790635e-05, "loss": 0.4315, "step": 2620 }, { "epoch": 0.3220297333824794, "grad_norm": 0.44068818545267163, "learning_rate": 2.571633997693873e-05, "loss": 0.382, "step": 2621 }, { "epoch": 0.32215259859933654, "grad_norm": 0.44009932298172355, "learning_rate": 2.571183752188559e-05, "loss": 0.3978, "step": 2622 }, { "epoch": 0.32227546381619365, "grad_norm": 0.37620014075044034, "learning_rate": 2.57073330964594e-05, "loss": 0.4149, "step": 2623 }, { "epoch": 0.32239832903305077, "grad_norm": 0.3826264931200861, "learning_rate": 2.5702826701488735e-05, "loss": 0.4746, "step": 2624 }, { "epoch": 0.3225211942499078, "grad_norm": 0.40844874526606517, "learning_rate": 2.56983183378025e-05, "loss": 0.4601, "step": 2625 }, { "epoch": 0.32264405946676494, "grad_norm": 0.5855033682203115, "learning_rate": 2.5693808006229988e-05, "loss": 0.4622, "step": 2626 }, { "epoch": 0.32276692468362206, "grad_norm": 0.3472410362969866, "learning_rate": 2.5689295707600853e-05, "loss": 0.3609, "step": 2627 }, { "epoch": 0.3228897899004792, "grad_norm": 0.2906375901047494, "learning_rate": 2.568478144274509e-05, "loss": 0.4123, "step": 2628 }, { "epoch": 0.3230126551173363, "grad_norm": 0.37366878170728285, "learning_rate": 2.568026521249307e-05, "loss": 0.5041, "step": 2629 }, { "epoch": 0.3231355203341934, "grad_norm": 0.41563642810777324, "learning_rate": 2.5675747017675535e-05, "loss": 0.4154, "step": 2630 }, { "epoch": 0.3232583855510505, "grad_norm": 0.42515101609210837, "learning_rate": 2.5671226859123567e-05, "loss": 0.4506, "step": 2631 }, { "epoch": 0.3233812507679076, "grad_norm": 0.3538381881972203, "learning_rate": 2.5666704737668627e-05, "loss": 0.4348, "step": 2632 }, { "epoch": 0.3235041159847647, "grad_norm": 0.29568344738040003, "learning_rate": 2.5662180654142523e-05, "loss": 0.3626, "step": 2633 }, { "epoch": 0.3236269812016218, "grad_norm": 0.3204235842760527, "learning_rate": 2.5657654609377438e-05, "loss": 0.4271, "step": 2634 }, { "epoch": 0.3237498464184789, "grad_norm": 0.40267476912819344, "learning_rate": 2.56531266042059e-05, "loss": 0.426, "step": 2635 }, { "epoch": 0.32387271163533604, "grad_norm": 0.33994515014412197, "learning_rate": 2.564859663946081e-05, "loss": 0.397, "step": 2636 }, { "epoch": 0.32399557685219316, "grad_norm": 0.4446888256349597, "learning_rate": 2.564406471597543e-05, "loss": 0.4282, "step": 2637 }, { "epoch": 0.32411844206905027, "grad_norm": 0.35810570074996223, "learning_rate": 2.563953083458338e-05, "loss": 0.3727, "step": 2638 }, { "epoch": 0.32424130728590733, "grad_norm": 0.43060453845662827, "learning_rate": 2.5634994996118625e-05, "loss": 0.3758, "step": 2639 }, { "epoch": 0.32436417250276445, "grad_norm": 0.3566162677365997, "learning_rate": 2.563045720141551e-05, "loss": 0.5283, "step": 2640 }, { "epoch": 0.32448703771962156, "grad_norm": 0.3434621822538365, "learning_rate": 2.562591745130874e-05, "loss": 0.3899, "step": 2641 }, { "epoch": 0.3246099029364787, "grad_norm": 0.33988464383874256, "learning_rate": 2.5621375746633363e-05, "loss": 0.4126, "step": 2642 }, { "epoch": 0.3247327681533358, "grad_norm": 0.3773528491631377, "learning_rate": 2.56168320882248e-05, "loss": 0.3937, "step": 2643 }, { "epoch": 0.3248556333701929, "grad_norm": 0.3967965359330622, "learning_rate": 2.561228647691883e-05, "loss": 0.4163, "step": 2644 }, { "epoch": 0.32497849858705, "grad_norm": 0.3844705053674616, "learning_rate": 2.560773891355158e-05, "loss": 0.4695, "step": 2645 }, { "epoch": 0.32510136380390714, "grad_norm": 0.33760935457817937, "learning_rate": 2.5603189398959554e-05, "loss": 0.4727, "step": 2646 }, { "epoch": 0.3252242290207642, "grad_norm": 0.30814723320727977, "learning_rate": 2.55986379339796e-05, "loss": 0.3229, "step": 2647 }, { "epoch": 0.3253470942376213, "grad_norm": 0.3360869914959563, "learning_rate": 2.5594084519448934e-05, "loss": 0.3518, "step": 2648 }, { "epoch": 0.32546995945447843, "grad_norm": 0.34565830459158625, "learning_rate": 2.5589529156205126e-05, "loss": 0.389, "step": 2649 }, { "epoch": 0.32559282467133555, "grad_norm": 0.3638092097041499, "learning_rate": 2.5584971845086107e-05, "loss": 0.3899, "step": 2650 }, { "epoch": 0.32571568988819266, "grad_norm": 0.3247265607866339, "learning_rate": 2.558041258693016e-05, "loss": 0.3328, "step": 2651 }, { "epoch": 0.3258385551050498, "grad_norm": 0.5068618736913006, "learning_rate": 2.5575851382575935e-05, "loss": 0.4519, "step": 2652 }, { "epoch": 0.3259614203219069, "grad_norm": 0.3339997077204509, "learning_rate": 2.5571288232862433e-05, "loss": 0.3533, "step": 2653 }, { "epoch": 0.32608428553876395, "grad_norm": 0.36937300497130854, "learning_rate": 2.556672313862902e-05, "loss": 0.4097, "step": 2654 }, { "epoch": 0.32620715075562107, "grad_norm": 0.42892222052508183, "learning_rate": 2.556215610071541e-05, "loss": 0.457, "step": 2655 }, { "epoch": 0.3263300159724782, "grad_norm": 0.4020634490935007, "learning_rate": 2.555758711996169e-05, "loss": 0.4744, "step": 2656 }, { "epoch": 0.3264528811893353, "grad_norm": 0.37234695290549824, "learning_rate": 2.5553016197208282e-05, "loss": 0.495, "step": 2657 }, { "epoch": 0.3265757464061924, "grad_norm": 0.44667060024796756, "learning_rate": 2.5548443333295984e-05, "loss": 0.4423, "step": 2658 }, { "epoch": 0.32669861162304953, "grad_norm": 0.32686939003339055, "learning_rate": 2.5543868529065944e-05, "loss": 0.3395, "step": 2659 }, { "epoch": 0.32682147683990664, "grad_norm": 0.39647130974235123, "learning_rate": 2.5539291785359672e-05, "loss": 0.4339, "step": 2660 }, { "epoch": 0.3269443420567637, "grad_norm": 0.41403502221068167, "learning_rate": 2.553471310301902e-05, "loss": 0.398, "step": 2661 }, { "epoch": 0.3270672072736208, "grad_norm": 0.38831204804811226, "learning_rate": 2.5530132482886215e-05, "loss": 0.4312, "step": 2662 }, { "epoch": 0.32719007249047793, "grad_norm": 0.3380216113278445, "learning_rate": 2.552554992580383e-05, "loss": 0.3908, "step": 2663 }, { "epoch": 0.32731293770733505, "grad_norm": 0.3761999017110098, "learning_rate": 2.55209654326148e-05, "loss": 0.4725, "step": 2664 }, { "epoch": 0.32743580292419217, "grad_norm": 0.30903854289563604, "learning_rate": 2.5516379004162402e-05, "loss": 0.3362, "step": 2665 }, { "epoch": 0.3275586681410493, "grad_norm": 0.38613618605758704, "learning_rate": 2.5511790641290292e-05, "loss": 0.3909, "step": 2666 }, { "epoch": 0.3276815333579064, "grad_norm": 0.30364820711004575, "learning_rate": 2.5507200344842466e-05, "loss": 0.3352, "step": 2667 }, { "epoch": 0.3278043985747635, "grad_norm": 0.3761903153549504, "learning_rate": 2.5502608115663275e-05, "loss": 0.361, "step": 2668 }, { "epoch": 0.32792726379162057, "grad_norm": 0.36914970418655507, "learning_rate": 2.5498013954597435e-05, "loss": 0.3417, "step": 2669 }, { "epoch": 0.3280501290084777, "grad_norm": 0.37112076391312104, "learning_rate": 2.5493417862490013e-05, "loss": 0.3274, "step": 2670 }, { "epoch": 0.3281729942253348, "grad_norm": 0.31526602878411525, "learning_rate": 2.548881984018642e-05, "loss": 0.4316, "step": 2671 }, { "epoch": 0.3282958594421919, "grad_norm": 0.3937856953686917, "learning_rate": 2.5484219888532443e-05, "loss": 0.3811, "step": 2672 }, { "epoch": 0.32841872465904903, "grad_norm": 0.3761698816820511, "learning_rate": 2.547961800837421e-05, "loss": 0.4371, "step": 2673 }, { "epoch": 0.32854158987590615, "grad_norm": 0.35788704852381825, "learning_rate": 2.547501420055821e-05, "loss": 0.4587, "step": 2674 }, { "epoch": 0.32866445509276326, "grad_norm": 0.32792415675080727, "learning_rate": 2.5470408465931277e-05, "loss": 0.3892, "step": 2675 }, { "epoch": 0.3287873203096203, "grad_norm": 0.4330831566419847, "learning_rate": 2.5465800805340613e-05, "loss": 0.4018, "step": 2676 }, { "epoch": 0.32891018552647744, "grad_norm": 0.3539746742810999, "learning_rate": 2.546119121963376e-05, "loss": 0.3692, "step": 2677 }, { "epoch": 0.32903305074333455, "grad_norm": 0.32857613417102793, "learning_rate": 2.5456579709658632e-05, "loss": 0.3584, "step": 2678 }, { "epoch": 0.32915591596019167, "grad_norm": 0.4225293416084444, "learning_rate": 2.5451966276263472e-05, "loss": 0.403, "step": 2679 }, { "epoch": 0.3292787811770488, "grad_norm": 0.3823871242267076, "learning_rate": 2.5447350920296902e-05, "loss": 0.4398, "step": 2680 }, { "epoch": 0.3294016463939059, "grad_norm": 0.42181512203225385, "learning_rate": 2.5442733642607888e-05, "loss": 0.4287, "step": 2681 }, { "epoch": 0.329524511610763, "grad_norm": 0.33704786812534504, "learning_rate": 2.5438114444045738e-05, "loss": 0.4094, "step": 2682 }, { "epoch": 0.3296473768276201, "grad_norm": 0.3236802873764095, "learning_rate": 2.543349332546013e-05, "loss": 0.361, "step": 2683 }, { "epoch": 0.3297702420444772, "grad_norm": 0.3984384387583954, "learning_rate": 2.5428870287701088e-05, "loss": 0.4616, "step": 2684 }, { "epoch": 0.3298931072613343, "grad_norm": 0.3956772242158181, "learning_rate": 2.5424245331618992e-05, "loss": 0.3295, "step": 2685 }, { "epoch": 0.3300159724781914, "grad_norm": 0.28637342418651407, "learning_rate": 2.541961845806457e-05, "loss": 0.3565, "step": 2686 }, { "epoch": 0.33013883769504854, "grad_norm": 0.45163815529946355, "learning_rate": 2.541498966788891e-05, "loss": 0.501, "step": 2687 }, { "epoch": 0.33026170291190565, "grad_norm": 0.33513828971587706, "learning_rate": 2.541035896194344e-05, "loss": 0.4192, "step": 2688 }, { "epoch": 0.33038456812876277, "grad_norm": 0.2962636063404572, "learning_rate": 2.5405726341079955e-05, "loss": 0.3442, "step": 2689 }, { "epoch": 0.3305074333456198, "grad_norm": 0.42084417894934606, "learning_rate": 2.540109180615059e-05, "loss": 0.3774, "step": 2690 }, { "epoch": 0.33063029856247694, "grad_norm": 0.3937002765634613, "learning_rate": 2.5396455358007843e-05, "loss": 0.4195, "step": 2691 }, { "epoch": 0.33075316377933406, "grad_norm": 0.3623444130845289, "learning_rate": 2.5391816997504552e-05, "loss": 0.4, "step": 2692 }, { "epoch": 0.3308760289961912, "grad_norm": 0.3261342708132, "learning_rate": 2.5387176725493922e-05, "loss": 0.3753, "step": 2693 }, { "epoch": 0.3309988942130483, "grad_norm": 0.3719805117286683, "learning_rate": 2.5382534542829497e-05, "loss": 0.3964, "step": 2694 }, { "epoch": 0.3311217594299054, "grad_norm": 0.34111232291053384, "learning_rate": 2.5377890450365174e-05, "loss": 0.4937, "step": 2695 }, { "epoch": 0.3312446246467625, "grad_norm": 0.3797353206481359, "learning_rate": 2.5373244448955207e-05, "loss": 0.4377, "step": 2696 }, { "epoch": 0.33136748986361964, "grad_norm": 0.3608313548973505, "learning_rate": 2.5368596539454195e-05, "loss": 0.3981, "step": 2697 }, { "epoch": 0.3314903550804767, "grad_norm": 0.41745228171238175, "learning_rate": 2.536394672271709e-05, "loss": 0.4161, "step": 2698 }, { "epoch": 0.3316132202973338, "grad_norm": 0.47059344538800285, "learning_rate": 2.5359294999599204e-05, "loss": 0.4528, "step": 2699 }, { "epoch": 0.3317360855141909, "grad_norm": 0.3829806804728833, "learning_rate": 2.5354641370956184e-05, "loss": 0.4182, "step": 2700 }, { "epoch": 0.33185895073104804, "grad_norm": 0.38021083357603885, "learning_rate": 2.5349985837644033e-05, "loss": 0.367, "step": 2701 }, { "epoch": 0.33198181594790516, "grad_norm": 0.3534401626843436, "learning_rate": 2.5345328400519112e-05, "loss": 0.4075, "step": 2702 }, { "epoch": 0.33210468116476227, "grad_norm": 0.31645377208688413, "learning_rate": 2.534066906043812e-05, "loss": 0.3827, "step": 2703 }, { "epoch": 0.3322275463816194, "grad_norm": 0.31520947070564453, "learning_rate": 2.533600781825812e-05, "loss": 0.457, "step": 2704 }, { "epoch": 0.33235041159847645, "grad_norm": 0.346837858923842, "learning_rate": 2.533134467483651e-05, "loss": 0.4607, "step": 2705 }, { "epoch": 0.33247327681533356, "grad_norm": 0.4428794223226603, "learning_rate": 2.532667963103105e-05, "loss": 0.4385, "step": 2706 }, { "epoch": 0.3325961420321907, "grad_norm": 0.3919762345527128, "learning_rate": 2.532201268769984e-05, "loss": 0.3512, "step": 2707 }, { "epoch": 0.3327190072490478, "grad_norm": 0.4417007054458725, "learning_rate": 2.531734384570134e-05, "loss": 0.4755, "step": 2708 }, { "epoch": 0.3328418724659049, "grad_norm": 0.43766150060974307, "learning_rate": 2.5312673105894347e-05, "loss": 0.372, "step": 2709 }, { "epoch": 0.332964737682762, "grad_norm": 0.344331499837332, "learning_rate": 2.530800046913802e-05, "loss": 0.3324, "step": 2710 }, { "epoch": 0.33308760289961914, "grad_norm": 0.359055479825419, "learning_rate": 2.5303325936291853e-05, "loss": 0.404, "step": 2711 }, { "epoch": 0.3332104681164762, "grad_norm": 0.3220368709244186, "learning_rate": 2.5298649508215702e-05, "loss": 0.3307, "step": 2712 }, { "epoch": 0.3333333333333333, "grad_norm": 0.36067813968446116, "learning_rate": 2.529397118576976e-05, "loss": 0.3691, "step": 2713 }, { "epoch": 0.33345619855019043, "grad_norm": 0.36392348366844796, "learning_rate": 2.5289290969814582e-05, "loss": 0.4244, "step": 2714 }, { "epoch": 0.33357906376704755, "grad_norm": 0.41845099294821053, "learning_rate": 2.5284608861211053e-05, "loss": 0.4751, "step": 2715 }, { "epoch": 0.33370192898390466, "grad_norm": 0.35569050448742234, "learning_rate": 2.527992486082042e-05, "loss": 0.3964, "step": 2716 }, { "epoch": 0.3338247942007618, "grad_norm": 0.35753941886578255, "learning_rate": 2.5275238969504288e-05, "loss": 0.3542, "step": 2717 }, { "epoch": 0.3339476594176189, "grad_norm": 0.40517623445017564, "learning_rate": 2.5270551188124572e-05, "loss": 0.4119, "step": 2718 }, { "epoch": 0.334070524634476, "grad_norm": 0.3484775035327179, "learning_rate": 2.526586151754358e-05, "loss": 0.4564, "step": 2719 }, { "epoch": 0.33419338985133307, "grad_norm": 0.32603407176706123, "learning_rate": 2.5261169958623937e-05, "loss": 0.3928, "step": 2720 }, { "epoch": 0.3343162550681902, "grad_norm": 0.46378830939860355, "learning_rate": 2.5256476512228625e-05, "loss": 0.4248, "step": 2721 }, { "epoch": 0.3344391202850473, "grad_norm": 0.3344255385334588, "learning_rate": 2.5251781179220973e-05, "loss": 0.369, "step": 2722 }, { "epoch": 0.3345619855019044, "grad_norm": 0.42350368045844966, "learning_rate": 2.524708396046466e-05, "loss": 0.4752, "step": 2723 }, { "epoch": 0.33468485071876153, "grad_norm": 0.28646891069303704, "learning_rate": 2.5242384856823703e-05, "loss": 0.4442, "step": 2724 }, { "epoch": 0.33480771593561864, "grad_norm": 0.3484089984100795, "learning_rate": 2.523768386916248e-05, "loss": 0.3054, "step": 2725 }, { "epoch": 0.33493058115247576, "grad_norm": 0.34743778990930124, "learning_rate": 2.5232980998345702e-05, "loss": 0.4096, "step": 2726 }, { "epoch": 0.3350534463693328, "grad_norm": 0.3815951886946314, "learning_rate": 2.522827624523844e-05, "loss": 0.4261, "step": 2727 }, { "epoch": 0.33517631158618993, "grad_norm": 0.3970456581713523, "learning_rate": 2.522356961070608e-05, "loss": 0.4147, "step": 2728 }, { "epoch": 0.33529917680304705, "grad_norm": 0.31304554174051474, "learning_rate": 2.5218861095614404e-05, "loss": 0.374, "step": 2729 }, { "epoch": 0.33542204201990417, "grad_norm": 0.3493907333446163, "learning_rate": 2.5214150700829497e-05, "loss": 0.3616, "step": 2730 }, { "epoch": 0.3355449072367613, "grad_norm": 0.35970102937121395, "learning_rate": 2.520943842721781e-05, "loss": 0.4237, "step": 2731 }, { "epoch": 0.3356677724536184, "grad_norm": 0.3543448982034817, "learning_rate": 2.5204724275646132e-05, "loss": 0.4457, "step": 2732 }, { "epoch": 0.3357906376704755, "grad_norm": 0.3899855001989281, "learning_rate": 2.5200008246981612e-05, "loss": 0.336, "step": 2733 }, { "epoch": 0.33591350288733257, "grad_norm": 0.5210440769831471, "learning_rate": 2.5195290342091717e-05, "loss": 0.3929, "step": 2734 }, { "epoch": 0.3360363681041897, "grad_norm": 0.3622868443647911, "learning_rate": 2.5190570561844283e-05, "loss": 0.3937, "step": 2735 }, { "epoch": 0.3361592333210468, "grad_norm": 0.3761862984305575, "learning_rate": 2.5185848907107485e-05, "loss": 0.4187, "step": 2736 }, { "epoch": 0.3362820985379039, "grad_norm": 0.41816581018471777, "learning_rate": 2.5181125378749834e-05, "loss": 0.3965, "step": 2737 }, { "epoch": 0.33640496375476103, "grad_norm": 0.33220945297851656, "learning_rate": 2.5176399977640202e-05, "loss": 0.4054, "step": 2738 }, { "epoch": 0.33652782897161815, "grad_norm": 0.34997431871354434, "learning_rate": 2.5171672704647785e-05, "loss": 0.4042, "step": 2739 }, { "epoch": 0.33665069418847526, "grad_norm": 0.3922109817766696, "learning_rate": 2.5166943560642145e-05, "loss": 0.3614, "step": 2740 }, { "epoch": 0.3367735594053323, "grad_norm": 0.3761708760437993, "learning_rate": 2.5162212546493166e-05, "loss": 0.3566, "step": 2741 }, { "epoch": 0.33689642462218944, "grad_norm": 0.36869862959560673, "learning_rate": 2.5157479663071096e-05, "loss": 0.4049, "step": 2742 }, { "epoch": 0.33701928983904655, "grad_norm": 0.3763283056543182, "learning_rate": 2.5152744911246516e-05, "loss": 0.4133, "step": 2743 }, { "epoch": 0.33714215505590367, "grad_norm": 0.3503690412607435, "learning_rate": 2.5148008291890358e-05, "loss": 0.3944, "step": 2744 }, { "epoch": 0.3372650202727608, "grad_norm": 0.3526998237195994, "learning_rate": 2.5143269805873877e-05, "loss": 0.3969, "step": 2745 }, { "epoch": 0.3373878854896179, "grad_norm": 0.3663421287259903, "learning_rate": 2.5138529454068704e-05, "loss": 0.3745, "step": 2746 }, { "epoch": 0.337510750706475, "grad_norm": 0.5234329499854311, "learning_rate": 2.513378723734678e-05, "loss": 0.444, "step": 2747 }, { "epoch": 0.33763361592333213, "grad_norm": 0.32802905603312166, "learning_rate": 2.512904315658042e-05, "loss": 0.3345, "step": 2748 }, { "epoch": 0.3377564811401892, "grad_norm": 0.3996875518517593, "learning_rate": 2.5124297212642263e-05, "loss": 0.4747, "step": 2749 }, { "epoch": 0.3378793463570463, "grad_norm": 0.41751840286281683, "learning_rate": 2.511954940640529e-05, "loss": 0.4161, "step": 2750 }, { "epoch": 0.3380022115739034, "grad_norm": 0.2963901222162024, "learning_rate": 2.5114799738742827e-05, "loss": 0.3696, "step": 2751 }, { "epoch": 0.33812507679076054, "grad_norm": 0.3435445897776779, "learning_rate": 2.511004821052855e-05, "loss": 0.4492, "step": 2752 }, { "epoch": 0.33824794200761765, "grad_norm": 0.3810273252716024, "learning_rate": 2.5105294822636476e-05, "loss": 0.4462, "step": 2753 }, { "epoch": 0.33837080722447477, "grad_norm": 0.3842737352526634, "learning_rate": 2.510053957594095e-05, "loss": 0.4788, "step": 2754 }, { "epoch": 0.3384936724413319, "grad_norm": 0.4109571348342879, "learning_rate": 2.5095782471316676e-05, "loss": 0.4401, "step": 2755 }, { "epoch": 0.33861653765818894, "grad_norm": 0.3940031379065786, "learning_rate": 2.5091023509638688e-05, "loss": 0.3453, "step": 2756 }, { "epoch": 0.33873940287504606, "grad_norm": 0.3137882077193894, "learning_rate": 2.5086262691782366e-05, "loss": 0.3863, "step": 2757 }, { "epoch": 0.3388622680919032, "grad_norm": 0.3840306464718552, "learning_rate": 2.5081500018623436e-05, "loss": 0.4722, "step": 2758 }, { "epoch": 0.3389851333087603, "grad_norm": 0.3690003238433632, "learning_rate": 2.5076735491037958e-05, "loss": 0.5065, "step": 2759 }, { "epoch": 0.3391079985256174, "grad_norm": 0.36231655841562427, "learning_rate": 2.5071969109902334e-05, "loss": 0.3708, "step": 2760 }, { "epoch": 0.3392308637424745, "grad_norm": 0.3247418485847422, "learning_rate": 2.5067200876093316e-05, "loss": 0.3807, "step": 2761 }, { "epoch": 0.33935372895933164, "grad_norm": 0.4129713155856541, "learning_rate": 2.506243079048798e-05, "loss": 0.4061, "step": 2762 }, { "epoch": 0.3394765941761887, "grad_norm": 0.4634034272743139, "learning_rate": 2.505765885396376e-05, "loss": 0.4262, "step": 2763 }, { "epoch": 0.3395994593930458, "grad_norm": 0.37732213923078645, "learning_rate": 2.5052885067398423e-05, "loss": 0.423, "step": 2764 }, { "epoch": 0.3397223246099029, "grad_norm": 0.3188880531788194, "learning_rate": 2.504810943167007e-05, "loss": 0.4561, "step": 2765 }, { "epoch": 0.33984518982676004, "grad_norm": 0.39341871518967364, "learning_rate": 2.5043331947657147e-05, "loss": 0.4381, "step": 2766 }, { "epoch": 0.33996805504361716, "grad_norm": 0.33902781859725745, "learning_rate": 2.503855261623845e-05, "loss": 0.4411, "step": 2767 }, { "epoch": 0.34009092026047427, "grad_norm": 0.35252037674396414, "learning_rate": 2.5033771438293104e-05, "loss": 0.3965, "step": 2768 }, { "epoch": 0.3402137854773314, "grad_norm": 0.43840987311705404, "learning_rate": 2.5028988414700573e-05, "loss": 0.435, "step": 2769 }, { "epoch": 0.3403366506941885, "grad_norm": 0.3796034118558244, "learning_rate": 2.5024203546340657e-05, "loss": 0.4423, "step": 2770 }, { "epoch": 0.34045951591104556, "grad_norm": 0.37241330963482183, "learning_rate": 2.5019416834093513e-05, "loss": 0.5005, "step": 2771 }, { "epoch": 0.3405823811279027, "grad_norm": 0.4080073216389217, "learning_rate": 2.5014628278839617e-05, "loss": 0.4031, "step": 2772 }, { "epoch": 0.3407052463447598, "grad_norm": 0.3671769323668464, "learning_rate": 2.5009837881459805e-05, "loss": 0.3882, "step": 2773 }, { "epoch": 0.3408281115616169, "grad_norm": 0.4299360854401009, "learning_rate": 2.5005045642835223e-05, "loss": 0.3951, "step": 2774 }, { "epoch": 0.340950976778474, "grad_norm": 0.3159746681814023, "learning_rate": 2.5000251563847378e-05, "loss": 0.352, "step": 2775 }, { "epoch": 0.34107384199533114, "grad_norm": 0.3118227428922029, "learning_rate": 2.4995455645378114e-05, "loss": 0.3954, "step": 2776 }, { "epoch": 0.34119670721218825, "grad_norm": 0.38060326744521056, "learning_rate": 2.499065788830961e-05, "loss": 0.4031, "step": 2777 }, { "epoch": 0.3413195724290453, "grad_norm": 0.3263529637649131, "learning_rate": 2.498585829352438e-05, "loss": 0.4217, "step": 2778 }, { "epoch": 0.34144243764590243, "grad_norm": 0.300764118954842, "learning_rate": 2.498105686190527e-05, "loss": 0.3795, "step": 2779 }, { "epoch": 0.34156530286275955, "grad_norm": 0.3107129219885787, "learning_rate": 2.4976253594335485e-05, "loss": 0.3674, "step": 2780 }, { "epoch": 0.34168816807961666, "grad_norm": 0.4515106740530756, "learning_rate": 2.497144849169855e-05, "loss": 0.4607, "step": 2781 }, { "epoch": 0.3418110332964738, "grad_norm": 0.3519487437194163, "learning_rate": 2.4966641554878332e-05, "loss": 0.3859, "step": 2782 }, { "epoch": 0.3419338985133309, "grad_norm": 0.49946824994771244, "learning_rate": 2.4961832784759037e-05, "loss": 0.3748, "step": 2783 }, { "epoch": 0.342056763730188, "grad_norm": 0.3738907141617204, "learning_rate": 2.49570221822252e-05, "loss": 0.4413, "step": 2784 }, { "epoch": 0.34217962894704507, "grad_norm": 0.3851457155091464, "learning_rate": 2.4952209748161708e-05, "loss": 0.4677, "step": 2785 }, { "epoch": 0.3423024941639022, "grad_norm": 0.321323327425752, "learning_rate": 2.494739548345378e-05, "loss": 0.3633, "step": 2786 }, { "epoch": 0.3424253593807593, "grad_norm": 0.3879678273078233, "learning_rate": 2.494257938898696e-05, "loss": 0.4061, "step": 2787 }, { "epoch": 0.3425482245976164, "grad_norm": 0.33198831647089566, "learning_rate": 2.4937761465647144e-05, "loss": 0.4166, "step": 2788 }, { "epoch": 0.34267108981447353, "grad_norm": 0.4002827018063276, "learning_rate": 2.4932941714320552e-05, "loss": 0.443, "step": 2789 }, { "epoch": 0.34279395503133064, "grad_norm": 0.37873131194156284, "learning_rate": 2.4928120135893752e-05, "loss": 0.4173, "step": 2790 }, { "epoch": 0.34291682024818776, "grad_norm": 0.3709658704598575, "learning_rate": 2.4923296731253635e-05, "loss": 0.3409, "step": 2791 }, { "epoch": 0.3430396854650448, "grad_norm": 0.3562278442682212, "learning_rate": 2.4918471501287447e-05, "loss": 0.3929, "step": 2792 }, { "epoch": 0.34316255068190193, "grad_norm": 0.33005820412505676, "learning_rate": 2.491364444688274e-05, "loss": 0.3577, "step": 2793 }, { "epoch": 0.34328541589875905, "grad_norm": 0.32267606076540223, "learning_rate": 2.4908815568927435e-05, "loss": 0.4168, "step": 2794 }, { "epoch": 0.34340828111561617, "grad_norm": 0.4092831041276183, "learning_rate": 2.4903984868309768e-05, "loss": 0.4186, "step": 2795 }, { "epoch": 0.3435311463324733, "grad_norm": 0.37173969225547526, "learning_rate": 2.489915234591831e-05, "loss": 0.3523, "step": 2796 }, { "epoch": 0.3436540115493304, "grad_norm": 0.2950569421148458, "learning_rate": 2.489431800264198e-05, "loss": 0.3523, "step": 2797 }, { "epoch": 0.3437768767661875, "grad_norm": 0.35831722097715807, "learning_rate": 2.488948183937002e-05, "loss": 0.3851, "step": 2798 }, { "epoch": 0.3438997419830446, "grad_norm": 0.3348141327913456, "learning_rate": 2.4884643856992008e-05, "loss": 0.4652, "step": 2799 }, { "epoch": 0.3440226071999017, "grad_norm": 0.3600077677980385, "learning_rate": 2.4879804056397865e-05, "loss": 0.4138, "step": 2800 }, { "epoch": 0.3441454724167588, "grad_norm": 0.3295353751171975, "learning_rate": 2.4874962438477838e-05, "loss": 0.4056, "step": 2801 }, { "epoch": 0.3442683376336159, "grad_norm": 0.3215985137751164, "learning_rate": 2.487011900412251e-05, "loss": 0.4278, "step": 2802 }, { "epoch": 0.34439120285047303, "grad_norm": 0.44331128574301565, "learning_rate": 2.4865273754222805e-05, "loss": 0.3673, "step": 2803 }, { "epoch": 0.34451406806733015, "grad_norm": 0.3580167001532478, "learning_rate": 2.4860426689669965e-05, "loss": 0.4436, "step": 2804 }, { "epoch": 0.34463693328418726, "grad_norm": 0.3617557285170264, "learning_rate": 2.485557781135559e-05, "loss": 0.4321, "step": 2805 }, { "epoch": 0.3447597985010444, "grad_norm": 0.42225937766058735, "learning_rate": 2.485072712017159e-05, "loss": 0.4079, "step": 2806 }, { "epoch": 0.34488266371790144, "grad_norm": 0.3866847506782678, "learning_rate": 2.4845874617010218e-05, "loss": 0.3862, "step": 2807 }, { "epoch": 0.34500552893475855, "grad_norm": 0.2952757929738372, "learning_rate": 2.4841020302764066e-05, "loss": 0.3436, "step": 2808 }, { "epoch": 0.34512839415161567, "grad_norm": 0.337321840101464, "learning_rate": 2.483616417832605e-05, "loss": 0.4277, "step": 2809 }, { "epoch": 0.3452512593684728, "grad_norm": 0.39119209741269834, "learning_rate": 2.483130624458942e-05, "loss": 0.4976, "step": 2810 }, { "epoch": 0.3453741245853299, "grad_norm": 0.5012819039634224, "learning_rate": 2.4826446502447767e-05, "loss": 0.423, "step": 2811 }, { "epoch": 0.345496989802187, "grad_norm": 0.6122902822223626, "learning_rate": 2.482158495279501e-05, "loss": 0.4039, "step": 2812 }, { "epoch": 0.34561985501904413, "grad_norm": 0.390917050943434, "learning_rate": 2.4816721596525392e-05, "loss": 0.4452, "step": 2813 }, { "epoch": 0.3457427202359012, "grad_norm": 0.3383364499977205, "learning_rate": 2.4811856434533497e-05, "loss": 0.3585, "step": 2814 }, { "epoch": 0.3458655854527583, "grad_norm": 0.37269755736529175, "learning_rate": 2.480698946771425e-05, "loss": 0.3704, "step": 2815 }, { "epoch": 0.3459884506696154, "grad_norm": 0.417924927222224, "learning_rate": 2.4802120696962886e-05, "loss": 0.4355, "step": 2816 }, { "epoch": 0.34611131588647254, "grad_norm": 0.4208657999546734, "learning_rate": 2.4797250123174993e-05, "loss": 0.4143, "step": 2817 }, { "epoch": 0.34623418110332965, "grad_norm": 0.46391058464839774, "learning_rate": 2.479237774724647e-05, "loss": 0.507, "step": 2818 }, { "epoch": 0.34635704632018677, "grad_norm": 0.31995160673398404, "learning_rate": 2.4787503570073574e-05, "loss": 0.3796, "step": 2819 }, { "epoch": 0.3464799115370439, "grad_norm": 0.39574172601642815, "learning_rate": 2.478262759255287e-05, "loss": 0.4483, "step": 2820 }, { "epoch": 0.34660277675390094, "grad_norm": 0.42208961365370257, "learning_rate": 2.4777749815581258e-05, "loss": 0.383, "step": 2821 }, { "epoch": 0.34672564197075806, "grad_norm": 0.386659401883785, "learning_rate": 2.477287024005598e-05, "loss": 0.3334, "step": 2822 }, { "epoch": 0.3468485071876152, "grad_norm": 0.3377837051992719, "learning_rate": 2.4767988866874604e-05, "loss": 0.4786, "step": 2823 }, { "epoch": 0.3469713724044723, "grad_norm": 0.36639918499556984, "learning_rate": 2.4763105696935016e-05, "loss": 0.3754, "step": 2824 }, { "epoch": 0.3470942376213294, "grad_norm": 0.29635337055045335, "learning_rate": 2.4758220731135456e-05, "loss": 0.3736, "step": 2825 }, { "epoch": 0.3472171028381865, "grad_norm": 0.3478532429134772, "learning_rate": 2.475333397037448e-05, "loss": 0.4429, "step": 2826 }, { "epoch": 0.34733996805504364, "grad_norm": 0.3587502406421325, "learning_rate": 2.4748445415550964e-05, "loss": 0.4035, "step": 2827 }, { "epoch": 0.34746283327190075, "grad_norm": 0.3524559680759411, "learning_rate": 2.4743555067564144e-05, "loss": 0.4387, "step": 2828 }, { "epoch": 0.3475856984887578, "grad_norm": 0.40260038098595036, "learning_rate": 2.473866292731355e-05, "loss": 0.4318, "step": 2829 }, { "epoch": 0.3477085637056149, "grad_norm": 0.39658377672883083, "learning_rate": 2.4733768995699077e-05, "loss": 0.4361, "step": 2830 }, { "epoch": 0.34783142892247204, "grad_norm": 0.3301225169319348, "learning_rate": 2.4728873273620918e-05, "loss": 0.4772, "step": 2831 }, { "epoch": 0.34795429413932916, "grad_norm": 0.3309215682972773, "learning_rate": 2.4723975761979615e-05, "loss": 0.441, "step": 2832 }, { "epoch": 0.34807715935618627, "grad_norm": 0.3676837890936484, "learning_rate": 2.4719076461676033e-05, "loss": 0.3679, "step": 2833 }, { "epoch": 0.3482000245730434, "grad_norm": 0.43234848461979664, "learning_rate": 2.4714175373611365e-05, "loss": 0.4721, "step": 2834 }, { "epoch": 0.3483228897899005, "grad_norm": 0.4023289105405871, "learning_rate": 2.4709272498687135e-05, "loss": 0.4556, "step": 2835 }, { "epoch": 0.34844575500675756, "grad_norm": 0.42321939901219935, "learning_rate": 2.47043678378052e-05, "loss": 0.3842, "step": 2836 }, { "epoch": 0.3485686202236147, "grad_norm": 0.40977875718725976, "learning_rate": 2.469946139186773e-05, "loss": 0.3788, "step": 2837 }, { "epoch": 0.3486914854404718, "grad_norm": 0.33305808209341214, "learning_rate": 2.4694553161777246e-05, "loss": 0.3755, "step": 2838 }, { "epoch": 0.3488143506573289, "grad_norm": 0.40545128524298557, "learning_rate": 2.4689643148436577e-05, "loss": 0.4172, "step": 2839 }, { "epoch": 0.348937215874186, "grad_norm": 0.3576260110724531, "learning_rate": 2.4684731352748893e-05, "loss": 0.3532, "step": 2840 }, { "epoch": 0.34906008109104314, "grad_norm": 0.36640339402791655, "learning_rate": 2.4679817775617675e-05, "loss": 0.3982, "step": 2841 }, { "epoch": 0.34918294630790025, "grad_norm": 0.43335441679421965, "learning_rate": 2.4674902417946763e-05, "loss": 0.5008, "step": 2842 }, { "epoch": 0.3493058115247573, "grad_norm": 0.35652213855912374, "learning_rate": 2.466998528064029e-05, "loss": 0.3899, "step": 2843 }, { "epoch": 0.34942867674161443, "grad_norm": 0.3559026488569518, "learning_rate": 2.4665066364602743e-05, "loss": 0.3444, "step": 2844 }, { "epoch": 0.34955154195847155, "grad_norm": 0.32842824546485383, "learning_rate": 2.4660145670738914e-05, "loss": 0.5059, "step": 2845 }, { "epoch": 0.34967440717532866, "grad_norm": 0.33349584186530423, "learning_rate": 2.4655223199953932e-05, "loss": 0.3721, "step": 2846 }, { "epoch": 0.3497972723921858, "grad_norm": 0.4003039115799348, "learning_rate": 2.4650298953153265e-05, "loss": 0.5278, "step": 2847 }, { "epoch": 0.3499201376090429, "grad_norm": 0.341734127619233, "learning_rate": 2.4645372931242692e-05, "loss": 0.3805, "step": 2848 }, { "epoch": 0.3500430028259, "grad_norm": 0.38543259659647305, "learning_rate": 2.4640445135128317e-05, "loss": 0.3678, "step": 2849 }, { "epoch": 0.3501658680427571, "grad_norm": 0.3458934534548992, "learning_rate": 2.4635515565716577e-05, "loss": 0.3576, "step": 2850 }, { "epoch": 0.3502887332596142, "grad_norm": 0.3045995975277125, "learning_rate": 2.463058422391424e-05, "loss": 0.4279, "step": 2851 }, { "epoch": 0.3504115984764713, "grad_norm": 0.364152228020887, "learning_rate": 2.4625651110628395e-05, "loss": 0.3458, "step": 2852 }, { "epoch": 0.3505344636933284, "grad_norm": 0.33129345163469676, "learning_rate": 2.4620716226766448e-05, "loss": 0.3878, "step": 2853 }, { "epoch": 0.35065732891018553, "grad_norm": 0.4894977756573255, "learning_rate": 2.4615779573236145e-05, "loss": 0.4006, "step": 2854 }, { "epoch": 0.35078019412704264, "grad_norm": 0.44992507281812644, "learning_rate": 2.461084115094555e-05, "loss": 0.3838, "step": 2855 }, { "epoch": 0.35090305934389976, "grad_norm": 0.39642342761192984, "learning_rate": 2.4605900960803056e-05, "loss": 0.3313, "step": 2856 }, { "epoch": 0.3510259245607569, "grad_norm": 0.27812070163329744, "learning_rate": 2.4600959003717375e-05, "loss": 0.3735, "step": 2857 }, { "epoch": 0.35114878977761393, "grad_norm": 0.38337802263124166, "learning_rate": 2.459601528059755e-05, "loss": 0.3997, "step": 2858 }, { "epoch": 0.35127165499447105, "grad_norm": 0.34098911782825464, "learning_rate": 2.4591069792352946e-05, "loss": 0.3843, "step": 2859 }, { "epoch": 0.35139452021132817, "grad_norm": 0.3586356879500431, "learning_rate": 2.4586122539893253e-05, "loss": 0.3889, "step": 2860 }, { "epoch": 0.3515173854281853, "grad_norm": 0.34182904141162535, "learning_rate": 2.458117352412849e-05, "loss": 0.3872, "step": 2861 }, { "epoch": 0.3516402506450424, "grad_norm": 0.3665485278862311, "learning_rate": 2.4576222745968988e-05, "loss": 0.4042, "step": 2862 }, { "epoch": 0.3517631158618995, "grad_norm": 0.44765097160323164, "learning_rate": 2.457127020632542e-05, "loss": 0.4532, "step": 2863 }, { "epoch": 0.3518859810787566, "grad_norm": 0.4795786332780668, "learning_rate": 2.4566315906108772e-05, "loss": 0.4412, "step": 2864 }, { "epoch": 0.3520088462956137, "grad_norm": 0.34991807088643617, "learning_rate": 2.4561359846230346e-05, "loss": 0.436, "step": 2865 }, { "epoch": 0.3521317115124708, "grad_norm": 0.36577707624004485, "learning_rate": 2.455640202760179e-05, "loss": 0.4083, "step": 2866 }, { "epoch": 0.3522545767293279, "grad_norm": 0.3873002597902908, "learning_rate": 2.4551442451135052e-05, "loss": 0.3765, "step": 2867 }, { "epoch": 0.35237744194618503, "grad_norm": 0.3486652783667364, "learning_rate": 2.4546481117742422e-05, "loss": 0.3765, "step": 2868 }, { "epoch": 0.35250030716304215, "grad_norm": 0.3509690288886916, "learning_rate": 2.4541518028336496e-05, "loss": 0.4008, "step": 2869 }, { "epoch": 0.35262317237989926, "grad_norm": 0.3905612999910376, "learning_rate": 2.453655318383021e-05, "loss": 0.376, "step": 2870 }, { "epoch": 0.3527460375967564, "grad_norm": 0.3476929340780962, "learning_rate": 2.4531586585136817e-05, "loss": 0.3847, "step": 2871 }, { "epoch": 0.35286890281361344, "grad_norm": 0.3733909727857693, "learning_rate": 2.452661823316988e-05, "loss": 0.4955, "step": 2872 }, { "epoch": 0.35299176803047055, "grad_norm": 0.380265393010261, "learning_rate": 2.4521648128843307e-05, "loss": 0.4229, "step": 2873 }, { "epoch": 0.35311463324732767, "grad_norm": 0.3773918794024539, "learning_rate": 2.451667627307131e-05, "loss": 0.4357, "step": 2874 }, { "epoch": 0.3532374984641848, "grad_norm": 0.39235536496142753, "learning_rate": 2.4511702666768422e-05, "loss": 0.4078, "step": 2875 }, { "epoch": 0.3533603636810419, "grad_norm": 0.4003938510521753, "learning_rate": 2.4506727310849525e-05, "loss": 0.4287, "step": 2876 }, { "epoch": 0.353483228897899, "grad_norm": 0.3682883750554175, "learning_rate": 2.4501750206229785e-05, "loss": 0.4263, "step": 2877 }, { "epoch": 0.35360609411475613, "grad_norm": 0.26523812240928935, "learning_rate": 2.449677135382472e-05, "loss": 0.3357, "step": 2878 }, { "epoch": 0.35372895933161325, "grad_norm": 0.42270189509156736, "learning_rate": 2.4491790754550154e-05, "loss": 0.4268, "step": 2879 }, { "epoch": 0.3538518245484703, "grad_norm": 0.32460149759128953, "learning_rate": 2.4486808409322234e-05, "loss": 0.3916, "step": 2880 }, { "epoch": 0.3539746897653274, "grad_norm": 0.38341451406823285, "learning_rate": 2.448182431905743e-05, "loss": 0.4377, "step": 2881 }, { "epoch": 0.35409755498218454, "grad_norm": 0.3365873522201041, "learning_rate": 2.4476838484672533e-05, "loss": 0.365, "step": 2882 }, { "epoch": 0.35422042019904165, "grad_norm": 0.30642272503283896, "learning_rate": 2.4471850907084658e-05, "loss": 0.4024, "step": 2883 }, { "epoch": 0.35434328541589877, "grad_norm": 0.37535054466322026, "learning_rate": 2.4466861587211233e-05, "loss": 0.4701, "step": 2884 }, { "epoch": 0.3544661506327559, "grad_norm": 0.3180522688176207, "learning_rate": 2.4461870525970013e-05, "loss": 0.3584, "step": 2885 }, { "epoch": 0.354589015849613, "grad_norm": 0.34936757185895145, "learning_rate": 2.4456877724279076e-05, "loss": 0.3524, "step": 2886 }, { "epoch": 0.35471188106647006, "grad_norm": 0.35149350728058676, "learning_rate": 2.4451883183056812e-05, "loss": 0.3121, "step": 2887 }, { "epoch": 0.3548347462833272, "grad_norm": 0.43353292214782596, "learning_rate": 2.4446886903221935e-05, "loss": 0.4273, "step": 2888 }, { "epoch": 0.3549576115001843, "grad_norm": 0.37942516622729383, "learning_rate": 2.4441888885693473e-05, "loss": 0.354, "step": 2889 }, { "epoch": 0.3550804767170414, "grad_norm": 0.5558049404108133, "learning_rate": 2.4436889131390788e-05, "loss": 0.5273, "step": 2890 }, { "epoch": 0.3552033419338985, "grad_norm": 0.36012683228586206, "learning_rate": 2.4431887641233543e-05, "loss": 0.368, "step": 2891 }, { "epoch": 0.35532620715075564, "grad_norm": 0.36499668351255726, "learning_rate": 2.442688441614174e-05, "loss": 0.4339, "step": 2892 }, { "epoch": 0.35544907236761275, "grad_norm": 0.40551893255382704, "learning_rate": 2.4421879457035678e-05, "loss": 0.4438, "step": 2893 }, { "epoch": 0.3555719375844698, "grad_norm": 0.3886063702454604, "learning_rate": 2.4416872764836e-05, "loss": 0.4494, "step": 2894 }, { "epoch": 0.3556948028013269, "grad_norm": 0.4198348016786747, "learning_rate": 2.441186434046364e-05, "loss": 0.4685, "step": 2895 }, { "epoch": 0.35581766801818404, "grad_norm": 0.41016620397990416, "learning_rate": 2.4406854184839875e-05, "loss": 0.4312, "step": 2896 }, { "epoch": 0.35594053323504116, "grad_norm": 0.366960404708312, "learning_rate": 2.440184229888629e-05, "loss": 0.4112, "step": 2897 }, { "epoch": 0.35606339845189827, "grad_norm": 0.34816110415762797, "learning_rate": 2.4396828683524787e-05, "loss": 0.3753, "step": 2898 }, { "epoch": 0.3561862636687554, "grad_norm": 0.3613902922681953, "learning_rate": 2.4391813339677588e-05, "loss": 0.5021, "step": 2899 }, { "epoch": 0.3563091288856125, "grad_norm": 0.39769720587482316, "learning_rate": 2.4386796268267227e-05, "loss": 0.3819, "step": 2900 }, { "epoch": 0.3564319941024696, "grad_norm": 0.3703302231146343, "learning_rate": 2.438177747021658e-05, "loss": 0.3905, "step": 2901 }, { "epoch": 0.3565548593193267, "grad_norm": 0.3564272912723834, "learning_rate": 2.43767569464488e-05, "loss": 0.3869, "step": 2902 }, { "epoch": 0.3566777245361838, "grad_norm": 0.39120054484224265, "learning_rate": 2.4371734697887395e-05, "loss": 0.3895, "step": 2903 }, { "epoch": 0.3568005897530409, "grad_norm": 0.34720029626871873, "learning_rate": 2.436671072545617e-05, "loss": 0.4135, "step": 2904 }, { "epoch": 0.356923454969898, "grad_norm": 0.321281935873273, "learning_rate": 2.436168503007925e-05, "loss": 0.3913, "step": 2905 }, { "epoch": 0.35704632018675514, "grad_norm": 0.35739754050764305, "learning_rate": 2.435665761268108e-05, "loss": 0.3903, "step": 2906 }, { "epoch": 0.35716918540361225, "grad_norm": 0.33632508008698486, "learning_rate": 2.4351628474186427e-05, "loss": 0.4313, "step": 2907 }, { "epoch": 0.35729205062046937, "grad_norm": 0.3496334555601173, "learning_rate": 2.434659761552036e-05, "loss": 0.4564, "step": 2908 }, { "epoch": 0.35741491583732643, "grad_norm": 0.3572407944859599, "learning_rate": 2.4341565037608278e-05, "loss": 0.3849, "step": 2909 }, { "epoch": 0.35753778105418355, "grad_norm": 0.30856536720552225, "learning_rate": 2.4336530741375892e-05, "loss": 0.4089, "step": 2910 }, { "epoch": 0.35766064627104066, "grad_norm": 0.3334278480283855, "learning_rate": 2.4331494727749223e-05, "loss": 0.4209, "step": 2911 }, { "epoch": 0.3577835114878978, "grad_norm": 0.3792981271794162, "learning_rate": 2.4326456997654617e-05, "loss": 0.4168, "step": 2912 }, { "epoch": 0.3579063767047549, "grad_norm": 0.3332319641629819, "learning_rate": 2.4321417552018728e-05, "loss": 0.3964, "step": 2913 }, { "epoch": 0.358029241921612, "grad_norm": 0.4405316095189461, "learning_rate": 2.4316376391768534e-05, "loss": 0.4675, "step": 2914 }, { "epoch": 0.3581521071384691, "grad_norm": 0.3607265297009631, "learning_rate": 2.431133351783132e-05, "loss": 0.4576, "step": 2915 }, { "epoch": 0.3582749723553262, "grad_norm": 0.2846626995302858, "learning_rate": 2.430628893113469e-05, "loss": 0.3548, "step": 2916 }, { "epoch": 0.3583978375721833, "grad_norm": 0.293402768676923, "learning_rate": 2.430124263260657e-05, "loss": 0.3769, "step": 2917 }, { "epoch": 0.3585207027890404, "grad_norm": 0.3535824493669414, "learning_rate": 2.4296194623175187e-05, "loss": 0.4743, "step": 2918 }, { "epoch": 0.35864356800589753, "grad_norm": 0.3665268718605146, "learning_rate": 2.4291144903769087e-05, "loss": 0.3727, "step": 2919 }, { "epoch": 0.35876643322275464, "grad_norm": 0.38986369116393854, "learning_rate": 2.4286093475317145e-05, "loss": 0.5193, "step": 2920 }, { "epoch": 0.35888929843961176, "grad_norm": 0.35571201372545536, "learning_rate": 2.428104033874852e-05, "loss": 0.3643, "step": 2921 }, { "epoch": 0.3590121636564689, "grad_norm": 0.37768268803348587, "learning_rate": 2.4275985494992724e-05, "loss": 0.4415, "step": 2922 }, { "epoch": 0.35913502887332593, "grad_norm": 0.46809617446689034, "learning_rate": 2.4270928944979546e-05, "loss": 0.4906, "step": 2923 }, { "epoch": 0.35925789409018305, "grad_norm": 0.3908994012210242, "learning_rate": 2.4265870689639113e-05, "loss": 0.4256, "step": 2924 }, { "epoch": 0.35938075930704017, "grad_norm": 0.44397130090140746, "learning_rate": 2.4260810729901857e-05, "loss": 0.4685, "step": 2925 }, { "epoch": 0.3595036245238973, "grad_norm": 0.43712234993395405, "learning_rate": 2.4255749066698535e-05, "loss": 0.4389, "step": 2926 }, { "epoch": 0.3596264897407544, "grad_norm": 0.44886327881480026, "learning_rate": 2.4250685700960188e-05, "loss": 0.4708, "step": 2927 }, { "epoch": 0.3597493549576115, "grad_norm": 0.3204714056055093, "learning_rate": 2.4245620633618207e-05, "loss": 0.3485, "step": 2928 }, { "epoch": 0.3598722201744686, "grad_norm": 0.31143175567446024, "learning_rate": 2.424055386560426e-05, "loss": 0.4398, "step": 2929 }, { "epoch": 0.35999508539132574, "grad_norm": 0.407996178810538, "learning_rate": 2.4235485397850363e-05, "loss": 0.4222, "step": 2930 }, { "epoch": 0.3601179506081828, "grad_norm": 0.3314500901773975, "learning_rate": 2.4230415231288823e-05, "loss": 0.3428, "step": 2931 }, { "epoch": 0.3602408158250399, "grad_norm": 0.373443049984383, "learning_rate": 2.422534336685226e-05, "loss": 0.3901, "step": 2932 }, { "epoch": 0.36036368104189703, "grad_norm": 0.397038982977027, "learning_rate": 2.4220269805473612e-05, "loss": 0.3449, "step": 2933 }, { "epoch": 0.36048654625875415, "grad_norm": 0.3447177025566012, "learning_rate": 2.421519454808613e-05, "loss": 0.3849, "step": 2934 }, { "epoch": 0.36060941147561126, "grad_norm": 0.3792521882186624, "learning_rate": 2.4210117595623377e-05, "loss": 0.3796, "step": 2935 }, { "epoch": 0.3607322766924684, "grad_norm": 0.41808862484325615, "learning_rate": 2.4205038949019218e-05, "loss": 0.375, "step": 2936 }, { "epoch": 0.3608551419093255, "grad_norm": 0.3286910994117108, "learning_rate": 2.419995860920784e-05, "loss": 0.3571, "step": 2937 }, { "epoch": 0.36097800712618255, "grad_norm": 0.38699652525641837, "learning_rate": 2.4194876577123746e-05, "loss": 0.4083, "step": 2938 }, { "epoch": 0.36110087234303967, "grad_norm": 0.3633611697201627, "learning_rate": 2.4189792853701734e-05, "loss": 0.4202, "step": 2939 }, { "epoch": 0.3612237375598968, "grad_norm": 0.38728656827427965, "learning_rate": 2.418470743987692e-05, "loss": 0.3623, "step": 2940 }, { "epoch": 0.3613466027767539, "grad_norm": 0.3456498534774211, "learning_rate": 2.4179620336584743e-05, "loss": 0.335, "step": 2941 }, { "epoch": 0.361469467993611, "grad_norm": 0.3192259100359807, "learning_rate": 2.417453154476093e-05, "loss": 0.4539, "step": 2942 }, { "epoch": 0.36159233321046813, "grad_norm": 0.3232869610469949, "learning_rate": 2.4169441065341546e-05, "loss": 0.3485, "step": 2943 }, { "epoch": 0.36171519842732525, "grad_norm": 0.383615616478675, "learning_rate": 2.4164348899262936e-05, "loss": 0.4246, "step": 2944 }, { "epoch": 0.3618380636441823, "grad_norm": 0.3646748863180278, "learning_rate": 2.4159255047461785e-05, "loss": 0.3996, "step": 2945 }, { "epoch": 0.3619609288610394, "grad_norm": 0.3605893072849074, "learning_rate": 2.4154159510875065e-05, "loss": 0.4207, "step": 2946 }, { "epoch": 0.36208379407789654, "grad_norm": 0.335109457742082, "learning_rate": 2.414906229044007e-05, "loss": 0.4472, "step": 2947 }, { "epoch": 0.36220665929475365, "grad_norm": 0.3769427936527378, "learning_rate": 2.4143963387094403e-05, "loss": 0.3654, "step": 2948 }, { "epoch": 0.36232952451161077, "grad_norm": 0.3099640325110777, "learning_rate": 2.4138862801775973e-05, "loss": 0.3604, "step": 2949 }, { "epoch": 0.3624523897284679, "grad_norm": 0.42243830598902277, "learning_rate": 2.4133760535422994e-05, "loss": 0.4162, "step": 2950 }, { "epoch": 0.362575254945325, "grad_norm": 0.3517457261588086, "learning_rate": 2.4128656588974e-05, "loss": 0.4298, "step": 2951 }, { "epoch": 0.3626981201621821, "grad_norm": 0.3081861855738365, "learning_rate": 2.4123550963367824e-05, "loss": 0.4148, "step": 2952 }, { "epoch": 0.3628209853790392, "grad_norm": 0.32972348058233936, "learning_rate": 2.411844365954362e-05, "loss": 0.4245, "step": 2953 }, { "epoch": 0.3629438505958963, "grad_norm": 0.353017593846627, "learning_rate": 2.4113334678440842e-05, "loss": 0.3812, "step": 2954 }, { "epoch": 0.3630667158127534, "grad_norm": 0.3972763775235703, "learning_rate": 2.410822402099925e-05, "loss": 0.421, "step": 2955 }, { "epoch": 0.3631895810296105, "grad_norm": 0.31078890120574126, "learning_rate": 2.4103111688158917e-05, "loss": 0.4192, "step": 2956 }, { "epoch": 0.36331244624646764, "grad_norm": 0.39694185758392686, "learning_rate": 2.4097997680860232e-05, "loss": 0.4515, "step": 2957 }, { "epoch": 0.36343531146332475, "grad_norm": 0.485311102712204, "learning_rate": 2.4092882000043868e-05, "loss": 0.4517, "step": 2958 }, { "epoch": 0.36355817668018187, "grad_norm": 0.3787029457101631, "learning_rate": 2.408776464665083e-05, "loss": 0.4695, "step": 2959 }, { "epoch": 0.3636810418970389, "grad_norm": 0.343906656727542, "learning_rate": 2.4082645621622425e-05, "loss": 0.3978, "step": 2960 }, { "epoch": 0.36380390711389604, "grad_norm": 0.2775026655867528, "learning_rate": 2.407752492590026e-05, "loss": 0.4006, "step": 2961 }, { "epoch": 0.36392677233075316, "grad_norm": 0.3002077495537357, "learning_rate": 2.4072402560426253e-05, "loss": 0.426, "step": 2962 }, { "epoch": 0.36404963754761027, "grad_norm": 0.3788735872420444, "learning_rate": 2.4067278526142635e-05, "loss": 0.4564, "step": 2963 }, { "epoch": 0.3641725027644674, "grad_norm": 0.4034618457771136, "learning_rate": 2.4062152823991933e-05, "loss": 0.4737, "step": 2964 }, { "epoch": 0.3642953679813245, "grad_norm": 0.36076423678226166, "learning_rate": 2.405702545491699e-05, "loss": 0.3664, "step": 2965 }, { "epoch": 0.3644182331981816, "grad_norm": 0.426557921625023, "learning_rate": 2.405189641986095e-05, "loss": 0.4175, "step": 2966 }, { "epoch": 0.3645410984150387, "grad_norm": 0.31317722875142057, "learning_rate": 2.404676571976727e-05, "loss": 0.3746, "step": 2967 }, { "epoch": 0.3646639636318958, "grad_norm": 0.3487647028879126, "learning_rate": 2.4041633355579705e-05, "loss": 0.4091, "step": 2968 }, { "epoch": 0.3647868288487529, "grad_norm": 0.39764020250666193, "learning_rate": 2.403649932824232e-05, "loss": 0.4549, "step": 2969 }, { "epoch": 0.36490969406561, "grad_norm": 0.3608317791769132, "learning_rate": 2.403136363869949e-05, "loss": 0.4639, "step": 2970 }, { "epoch": 0.36503255928246714, "grad_norm": 0.39195457316780885, "learning_rate": 2.4026226287895885e-05, "loss": 0.4034, "step": 2971 }, { "epoch": 0.36515542449932425, "grad_norm": 0.37671562046173335, "learning_rate": 2.4021087276776493e-05, "loss": 0.3956, "step": 2972 }, { "epoch": 0.36527828971618137, "grad_norm": 0.34721263279922526, "learning_rate": 2.40159466062866e-05, "loss": 0.3863, "step": 2973 }, { "epoch": 0.36540115493303843, "grad_norm": 0.43911284587679855, "learning_rate": 2.40108042773718e-05, "loss": 0.3852, "step": 2974 }, { "epoch": 0.36552402014989555, "grad_norm": 0.39080425894756515, "learning_rate": 2.400566029097799e-05, "loss": 0.3775, "step": 2975 }, { "epoch": 0.36564688536675266, "grad_norm": 0.3232289837130092, "learning_rate": 2.4000514648051372e-05, "loss": 0.4148, "step": 2976 }, { "epoch": 0.3657697505836098, "grad_norm": 0.3285720531637646, "learning_rate": 2.3995367349538456e-05, "loss": 0.3639, "step": 2977 }, { "epoch": 0.3658926158004669, "grad_norm": 0.3714573339489424, "learning_rate": 2.399021839638605e-05, "loss": 0.4075, "step": 2978 }, { "epoch": 0.366015481017324, "grad_norm": 0.3474654957810536, "learning_rate": 2.3985067789541285e-05, "loss": 0.3405, "step": 2979 }, { "epoch": 0.3661383462341811, "grad_norm": 0.34002981035147656, "learning_rate": 2.3979915529951562e-05, "loss": 0.3326, "step": 2980 }, { "epoch": 0.36626121145103824, "grad_norm": 0.35782125616123645, "learning_rate": 2.3974761618564613e-05, "loss": 0.3855, "step": 2981 }, { "epoch": 0.3663840766678953, "grad_norm": 0.3946710572781487, "learning_rate": 2.396960605632847e-05, "loss": 0.4009, "step": 2982 }, { "epoch": 0.3665069418847524, "grad_norm": 0.32384229824144023, "learning_rate": 2.396444884419146e-05, "loss": 0.3666, "step": 2983 }, { "epoch": 0.36662980710160953, "grad_norm": 0.30049871557146146, "learning_rate": 2.3959289983102223e-05, "loss": 0.3367, "step": 2984 }, { "epoch": 0.36675267231846664, "grad_norm": 0.3914769754275328, "learning_rate": 2.39541294740097e-05, "loss": 0.4148, "step": 2985 }, { "epoch": 0.36687553753532376, "grad_norm": 0.38301281038767815, "learning_rate": 2.3948967317863124e-05, "loss": 0.3915, "step": 2986 }, { "epoch": 0.3669984027521809, "grad_norm": 0.39293836400208637, "learning_rate": 2.3943803515612053e-05, "loss": 0.4436, "step": 2987 }, { "epoch": 0.367121267969038, "grad_norm": 0.34115860433422723, "learning_rate": 2.393863806820632e-05, "loss": 0.4039, "step": 2988 }, { "epoch": 0.36724413318589505, "grad_norm": 0.3761590206426997, "learning_rate": 2.3933470976596088e-05, "loss": 0.4013, "step": 2989 }, { "epoch": 0.36736699840275217, "grad_norm": 0.3710613090188482, "learning_rate": 2.3928302241731807e-05, "loss": 0.3884, "step": 2990 }, { "epoch": 0.3674898636196093, "grad_norm": 0.3841015096609876, "learning_rate": 2.3923131864564228e-05, "loss": 0.3743, "step": 2991 }, { "epoch": 0.3676127288364664, "grad_norm": 0.38654693455500255, "learning_rate": 2.391795984604441e-05, "loss": 0.3819, "step": 2992 }, { "epoch": 0.3677355940533235, "grad_norm": 0.36536140293788455, "learning_rate": 2.3912786187123714e-05, "loss": 0.4019, "step": 2993 }, { "epoch": 0.3678584592701806, "grad_norm": 0.4136326726598335, "learning_rate": 2.39076108887538e-05, "loss": 0.4503, "step": 2994 }, { "epoch": 0.36798132448703774, "grad_norm": 0.29985136773302307, "learning_rate": 2.3902433951886634e-05, "loss": 0.4109, "step": 2995 }, { "epoch": 0.3681041897038948, "grad_norm": 0.32143362035872824, "learning_rate": 2.3897255377474472e-05, "loss": 0.3619, "step": 2996 }, { "epoch": 0.3682270549207519, "grad_norm": 0.4125123977134526, "learning_rate": 2.389207516646989e-05, "loss": 0.3817, "step": 2997 }, { "epoch": 0.36834992013760903, "grad_norm": 0.3904555391229499, "learning_rate": 2.3886893319825747e-05, "loss": 0.3978, "step": 2998 }, { "epoch": 0.36847278535446615, "grad_norm": 0.3064376484274981, "learning_rate": 2.3881709838495208e-05, "loss": 0.3282, "step": 2999 }, { "epoch": 0.36859565057132326, "grad_norm": 0.4837825204462492, "learning_rate": 2.3876524723431748e-05, "loss": 0.4287, "step": 3000 }, { "epoch": 0.3687185157881804, "grad_norm": 0.4006413508482711, "learning_rate": 2.3871337975589124e-05, "loss": 0.4342, "step": 3001 }, { "epoch": 0.3688413810050375, "grad_norm": 0.3092214599661757, "learning_rate": 2.386614959592142e-05, "loss": 0.4272, "step": 3002 }, { "epoch": 0.36896424622189455, "grad_norm": 0.3044323519033855, "learning_rate": 2.3860959585382995e-05, "loss": 0.4098, "step": 3003 }, { "epoch": 0.36908711143875167, "grad_norm": 0.48447837014607387, "learning_rate": 2.385576794492852e-05, "loss": 0.5301, "step": 3004 }, { "epoch": 0.3692099766556088, "grad_norm": 0.3934424806622885, "learning_rate": 2.385057467551296e-05, "loss": 0.4704, "step": 3005 }, { "epoch": 0.3693328418724659, "grad_norm": 0.3430758499891149, "learning_rate": 2.3845379778091587e-05, "loss": 0.3552, "step": 3006 }, { "epoch": 0.369455707089323, "grad_norm": 0.3689961363433732, "learning_rate": 2.384018325361997e-05, "loss": 0.3709, "step": 3007 }, { "epoch": 0.36957857230618013, "grad_norm": 0.37666628641493277, "learning_rate": 2.3834985103053976e-05, "loss": 0.3785, "step": 3008 }, { "epoch": 0.36970143752303725, "grad_norm": 0.3885637559366536, "learning_rate": 2.3829785327349766e-05, "loss": 0.4044, "step": 3009 }, { "epoch": 0.36982430273989436, "grad_norm": 0.4421506136951007, "learning_rate": 2.382458392746381e-05, "loss": 0.4112, "step": 3010 }, { "epoch": 0.3699471679567514, "grad_norm": 0.3071588042839555, "learning_rate": 2.381938090435287e-05, "loss": 0.3846, "step": 3011 }, { "epoch": 0.37007003317360854, "grad_norm": 0.3452883473591838, "learning_rate": 2.3814176258974006e-05, "loss": 0.4051, "step": 3012 }, { "epoch": 0.37019289839046565, "grad_norm": 0.3482933925947291, "learning_rate": 2.380896999228458e-05, "loss": 0.4281, "step": 3013 }, { "epoch": 0.37031576360732277, "grad_norm": 0.37662960593891304, "learning_rate": 2.3803762105242255e-05, "loss": 0.4298, "step": 3014 }, { "epoch": 0.3704386288241799, "grad_norm": 0.30651247892577993, "learning_rate": 2.3798552598804987e-05, "loss": 0.3904, "step": 3015 }, { "epoch": 0.370561494041037, "grad_norm": 0.5069453492640575, "learning_rate": 2.3793341473931024e-05, "loss": 0.3797, "step": 3016 }, { "epoch": 0.3706843592578941, "grad_norm": 0.46813626240534073, "learning_rate": 2.3788128731578928e-05, "loss": 0.4129, "step": 3017 }, { "epoch": 0.3708072244747512, "grad_norm": 0.3417898933896125, "learning_rate": 2.378291437270754e-05, "loss": 0.3519, "step": 3018 }, { "epoch": 0.3709300896916083, "grad_norm": 0.34764668843786745, "learning_rate": 2.377769839827602e-05, "loss": 0.3686, "step": 3019 }, { "epoch": 0.3710529549084654, "grad_norm": 0.3440944730358752, "learning_rate": 2.3772480809243797e-05, "loss": 0.3368, "step": 3020 }, { "epoch": 0.3711758201253225, "grad_norm": 0.34758336955126073, "learning_rate": 2.3767261606570626e-05, "loss": 0.4185, "step": 3021 }, { "epoch": 0.37129868534217964, "grad_norm": 0.39803086683038175, "learning_rate": 2.376204079121654e-05, "loss": 0.4658, "step": 3022 }, { "epoch": 0.37142155055903675, "grad_norm": 0.42459519248203587, "learning_rate": 2.375681836414187e-05, "loss": 0.3594, "step": 3023 }, { "epoch": 0.37154441577589387, "grad_norm": 0.30591467982332887, "learning_rate": 2.3751594326307254e-05, "loss": 0.3966, "step": 3024 }, { "epoch": 0.3716672809927509, "grad_norm": 0.39551227491024965, "learning_rate": 2.374636867867362e-05, "loss": 0.4085, "step": 3025 }, { "epoch": 0.37179014620960804, "grad_norm": 0.34433966795899623, "learning_rate": 2.3741141422202188e-05, "loss": 0.4696, "step": 3026 }, { "epoch": 0.37191301142646516, "grad_norm": 0.3351562356995213, "learning_rate": 2.373591255785448e-05, "loss": 0.3245, "step": 3027 }, { "epoch": 0.37203587664332227, "grad_norm": 0.35630868683038286, "learning_rate": 2.373068208659231e-05, "loss": 0.339, "step": 3028 }, { "epoch": 0.3721587418601794, "grad_norm": 0.30816089141504777, "learning_rate": 2.3725450009377795e-05, "loss": 0.3318, "step": 3029 }, { "epoch": 0.3722816070770365, "grad_norm": 0.3417989691350655, "learning_rate": 2.3720216327173327e-05, "loss": 0.4263, "step": 3030 }, { "epoch": 0.3724044722938936, "grad_norm": 0.3790782930037238, "learning_rate": 2.371498104094163e-05, "loss": 0.4046, "step": 3031 }, { "epoch": 0.37252733751075073, "grad_norm": 0.3619526320956046, "learning_rate": 2.3709744151645686e-05, "loss": 0.4152, "step": 3032 }, { "epoch": 0.3726502027276078, "grad_norm": 0.34119591191646836, "learning_rate": 2.3704505660248786e-05, "loss": 0.3988, "step": 3033 }, { "epoch": 0.3727730679444649, "grad_norm": 0.33558712378950667, "learning_rate": 2.3699265567714522e-05, "loss": 0.3839, "step": 3034 }, { "epoch": 0.372895933161322, "grad_norm": 0.39452044264795744, "learning_rate": 2.3694023875006773e-05, "loss": 0.3622, "step": 3035 }, { "epoch": 0.37301879837817914, "grad_norm": 0.3619158224895691, "learning_rate": 2.368878058308972e-05, "loss": 0.4646, "step": 3036 }, { "epoch": 0.37314166359503625, "grad_norm": 0.42645619692070325, "learning_rate": 2.368353569292782e-05, "loss": 0.3779, "step": 3037 }, { "epoch": 0.37326452881189337, "grad_norm": 0.4569486735634603, "learning_rate": 2.367828920548585e-05, "loss": 0.4568, "step": 3038 }, { "epoch": 0.3733873940287505, "grad_norm": 0.30743780068002485, "learning_rate": 2.3673041121728857e-05, "loss": 0.3893, "step": 3039 }, { "epoch": 0.37351025924560755, "grad_norm": 0.4735474613475287, "learning_rate": 2.36677914426222e-05, "loss": 0.4668, "step": 3040 }, { "epoch": 0.37363312446246466, "grad_norm": 0.311634563240951, "learning_rate": 2.3662540169131516e-05, "loss": 0.3713, "step": 3041 }, { "epoch": 0.3737559896793218, "grad_norm": 0.3378750607968921, "learning_rate": 2.365728730222275e-05, "loss": 0.3413, "step": 3042 }, { "epoch": 0.3738788548961789, "grad_norm": 0.3574263170135598, "learning_rate": 2.3652032842862127e-05, "loss": 0.3999, "step": 3043 }, { "epoch": 0.374001720113036, "grad_norm": 0.32586544998172684, "learning_rate": 2.3646776792016175e-05, "loss": 0.402, "step": 3044 }, { "epoch": 0.3741245853298931, "grad_norm": 0.400006727438238, "learning_rate": 2.3641519150651707e-05, "loss": 0.3182, "step": 3045 }, { "epoch": 0.37424745054675024, "grad_norm": 0.3431964192455949, "learning_rate": 2.3636259919735835e-05, "loss": 0.3929, "step": 3046 }, { "epoch": 0.3743703157636073, "grad_norm": 0.36877528635648577, "learning_rate": 2.3630999100235956e-05, "loss": 0.3884, "step": 3047 }, { "epoch": 0.3744931809804644, "grad_norm": 0.3094752434142774, "learning_rate": 2.362573669311977e-05, "loss": 0.4439, "step": 3048 }, { "epoch": 0.37461604619732153, "grad_norm": 0.36663313265486713, "learning_rate": 2.3620472699355255e-05, "loss": 0.3859, "step": 3049 }, { "epoch": 0.37473891141417864, "grad_norm": 0.31829941549271684, "learning_rate": 2.3615207119910693e-05, "loss": 0.3519, "step": 3050 }, { "epoch": 0.37486177663103576, "grad_norm": 0.3737776161299803, "learning_rate": 2.3609939955754656e-05, "loss": 0.4462, "step": 3051 }, { "epoch": 0.3749846418478929, "grad_norm": 0.47441119112040414, "learning_rate": 2.3604671207856002e-05, "loss": 0.4429, "step": 3052 }, { "epoch": 0.37510750706475, "grad_norm": 0.37873712132785436, "learning_rate": 2.359940087718388e-05, "loss": 0.3897, "step": 3053 }, { "epoch": 0.37523037228160705, "grad_norm": 0.3175231538233573, "learning_rate": 2.3594128964707736e-05, "loss": 0.3238, "step": 3054 }, { "epoch": 0.37535323749846417, "grad_norm": 0.6345778070540993, "learning_rate": 2.3588855471397305e-05, "loss": 0.4828, "step": 3055 }, { "epoch": 0.3754761027153213, "grad_norm": 0.4351544922089573, "learning_rate": 2.358358039822261e-05, "loss": 0.4098, "step": 3056 }, { "epoch": 0.3755989679321784, "grad_norm": 0.4113077631991106, "learning_rate": 2.357830374615397e-05, "loss": 0.3348, "step": 3057 }, { "epoch": 0.3757218331490355, "grad_norm": 0.3213293419349442, "learning_rate": 2.3573025516161977e-05, "loss": 0.3866, "step": 3058 }, { "epoch": 0.3758446983658926, "grad_norm": 0.34985740580948316, "learning_rate": 2.356774570921755e-05, "loss": 0.4042, "step": 3059 }, { "epoch": 0.37596756358274974, "grad_norm": 0.7779895655768537, "learning_rate": 2.3562464326291862e-05, "loss": 0.5714, "step": 3060 }, { "epoch": 0.37609042879960686, "grad_norm": 0.3748241750145869, "learning_rate": 2.355718136835639e-05, "loss": 0.4406, "step": 3061 }, { "epoch": 0.3762132940164639, "grad_norm": 0.3986159785953062, "learning_rate": 2.35518968363829e-05, "loss": 0.4222, "step": 3062 }, { "epoch": 0.37633615923332103, "grad_norm": 0.41451388519554155, "learning_rate": 2.3546610731343446e-05, "loss": 0.4997, "step": 3063 }, { "epoch": 0.37645902445017815, "grad_norm": 0.35220278692012036, "learning_rate": 2.3541323054210374e-05, "loss": 0.3973, "step": 3064 }, { "epoch": 0.37658188966703526, "grad_norm": 0.4707516461281978, "learning_rate": 2.353603380595633e-05, "loss": 0.4124, "step": 3065 }, { "epoch": 0.3767047548838924, "grad_norm": 0.34980975713787127, "learning_rate": 2.353074298755421e-05, "loss": 0.4444, "step": 3066 }, { "epoch": 0.3768276201007495, "grad_norm": 0.571332578787613, "learning_rate": 2.352545059997725e-05, "loss": 0.4428, "step": 3067 }, { "epoch": 0.3769504853176066, "grad_norm": 0.3999564385500318, "learning_rate": 2.352015664419894e-05, "loss": 0.3727, "step": 3068 }, { "epoch": 0.37707335053446367, "grad_norm": 0.3609327529187728, "learning_rate": 2.3514861121193068e-05, "loss": 0.3862, "step": 3069 }, { "epoch": 0.3771962157513208, "grad_norm": 0.3319922789880446, "learning_rate": 2.3509564031933716e-05, "loss": 0.3705, "step": 3070 }, { "epoch": 0.3773190809681779, "grad_norm": 0.30635165351739196, "learning_rate": 2.3504265377395244e-05, "loss": 0.3302, "step": 3071 }, { "epoch": 0.377441946185035, "grad_norm": 0.38552652091831796, "learning_rate": 2.349896515855231e-05, "loss": 0.3644, "step": 3072 }, { "epoch": 0.37756481140189213, "grad_norm": 0.3667059133389535, "learning_rate": 2.3493663376379853e-05, "loss": 0.4019, "step": 3073 }, { "epoch": 0.37768767661874925, "grad_norm": 0.3716405719720763, "learning_rate": 2.3488360031853102e-05, "loss": 0.379, "step": 3074 }, { "epoch": 0.37781054183560636, "grad_norm": 0.33265030175128907, "learning_rate": 2.348305512594757e-05, "loss": 0.4286, "step": 3075 }, { "epoch": 0.3779334070524634, "grad_norm": 0.41464146661050444, "learning_rate": 2.3477748659639063e-05, "loss": 0.3645, "step": 3076 }, { "epoch": 0.37805627226932054, "grad_norm": 0.3684963243852746, "learning_rate": 2.347244063390367e-05, "loss": 0.3707, "step": 3077 }, { "epoch": 0.37817913748617765, "grad_norm": 0.32216159854828247, "learning_rate": 2.346713104971777e-05, "loss": 0.3353, "step": 3078 }, { "epoch": 0.37830200270303477, "grad_norm": 0.44705003038062757, "learning_rate": 2.3461819908058024e-05, "loss": 0.426, "step": 3079 }, { "epoch": 0.3784248679198919, "grad_norm": 0.3289283288657894, "learning_rate": 2.3456507209901382e-05, "loss": 0.3932, "step": 3080 }, { "epoch": 0.378547733136749, "grad_norm": 0.39132780557374125, "learning_rate": 2.345119295622508e-05, "loss": 0.5033, "step": 3081 }, { "epoch": 0.3786705983536061, "grad_norm": 0.36283339537343906, "learning_rate": 2.3445877148006643e-05, "loss": 0.4198, "step": 3082 }, { "epoch": 0.37879346357046323, "grad_norm": 0.3188069575225322, "learning_rate": 2.3440559786223878e-05, "loss": 0.3974, "step": 3083 }, { "epoch": 0.3789163287873203, "grad_norm": 0.37437310692054054, "learning_rate": 2.343524087185488e-05, "loss": 0.4557, "step": 3084 }, { "epoch": 0.3790391940041774, "grad_norm": 0.347678238777681, "learning_rate": 2.3429920405878024e-05, "loss": 0.3365, "step": 3085 }, { "epoch": 0.3791620592210345, "grad_norm": 0.31366779195073335, "learning_rate": 2.3424598389271986e-05, "loss": 0.432, "step": 3086 }, { "epoch": 0.37928492443789164, "grad_norm": 0.3686394336116379, "learning_rate": 2.3419274823015704e-05, "loss": 0.3507, "step": 3087 }, { "epoch": 0.37940778965474875, "grad_norm": 0.3250984081538782, "learning_rate": 2.3413949708088424e-05, "loss": 0.3691, "step": 3088 }, { "epoch": 0.37953065487160587, "grad_norm": 0.39248285065419986, "learning_rate": 2.3408623045469658e-05, "loss": 0.3899, "step": 3089 }, { "epoch": 0.379653520088463, "grad_norm": 0.3439794179442065, "learning_rate": 2.3403294836139216e-05, "loss": 0.3832, "step": 3090 }, { "epoch": 0.37977638530532004, "grad_norm": 0.4333187097889526, "learning_rate": 2.339796508107718e-05, "loss": 0.4382, "step": 3091 }, { "epoch": 0.37989925052217716, "grad_norm": 0.3379385671271643, "learning_rate": 2.339263378126394e-05, "loss": 0.387, "step": 3092 }, { "epoch": 0.38002211573903427, "grad_norm": 0.43926330469966196, "learning_rate": 2.338730093768014e-05, "loss": 0.4376, "step": 3093 }, { "epoch": 0.3801449809558914, "grad_norm": 0.42790229694233406, "learning_rate": 2.338196655130673e-05, "loss": 0.3816, "step": 3094 }, { "epoch": 0.3802678461727485, "grad_norm": 0.37869708678040587, "learning_rate": 2.3376630623124925e-05, "loss": 0.4239, "step": 3095 }, { "epoch": 0.3803907113896056, "grad_norm": 0.31536368885500743, "learning_rate": 2.3371293154116244e-05, "loss": 0.4331, "step": 3096 }, { "epoch": 0.38051357660646273, "grad_norm": 0.3862363871477553, "learning_rate": 2.3365954145262478e-05, "loss": 0.3927, "step": 3097 }, { "epoch": 0.3806364418233198, "grad_norm": 0.3992795185673279, "learning_rate": 2.3360613597545698e-05, "loss": 0.366, "step": 3098 }, { "epoch": 0.3807593070401769, "grad_norm": 0.5367136690600002, "learning_rate": 2.3355271511948272e-05, "loss": 0.4533, "step": 3099 }, { "epoch": 0.380882172257034, "grad_norm": 0.36391454249247623, "learning_rate": 2.3349927889452834e-05, "loss": 0.3519, "step": 3100 }, { "epoch": 0.38100503747389114, "grad_norm": 0.41114079704502193, "learning_rate": 2.3344582731042313e-05, "loss": 0.3748, "step": 3101 }, { "epoch": 0.38112790269074825, "grad_norm": 0.36927897764309814, "learning_rate": 2.3339236037699915e-05, "loss": 0.4787, "step": 3102 }, { "epoch": 0.38125076790760537, "grad_norm": 0.3685339839177811, "learning_rate": 2.333388781040913e-05, "loss": 0.3451, "step": 3103 }, { "epoch": 0.3813736331244625, "grad_norm": 0.3365698226251755, "learning_rate": 2.3328538050153735e-05, "loss": 0.4341, "step": 3104 }, { "epoch": 0.38149649834131955, "grad_norm": 0.3068686975573446, "learning_rate": 2.3323186757917772e-05, "loss": 0.3894, "step": 3105 }, { "epoch": 0.38161936355817666, "grad_norm": 0.3268901762503207, "learning_rate": 2.3317833934685583e-05, "loss": 0.3623, "step": 3106 }, { "epoch": 0.3817422287750338, "grad_norm": 0.3795043543232649, "learning_rate": 2.3312479581441786e-05, "loss": 0.4267, "step": 3107 }, { "epoch": 0.3818650939918909, "grad_norm": 0.42372429317694144, "learning_rate": 2.3307123699171277e-05, "loss": 0.3521, "step": 3108 }, { "epoch": 0.381987959208748, "grad_norm": 0.4127044203146912, "learning_rate": 2.330176628885924e-05, "loss": 0.4664, "step": 3109 }, { "epoch": 0.3821108244256051, "grad_norm": 0.4039316858339911, "learning_rate": 2.329640735149113e-05, "loss": 0.378, "step": 3110 }, { "epoch": 0.38223368964246224, "grad_norm": 0.48504607098685965, "learning_rate": 2.329104688805269e-05, "loss": 0.5003, "step": 3111 }, { "epoch": 0.38235655485931935, "grad_norm": 0.3550498460869196, "learning_rate": 2.3285684899529948e-05, "loss": 0.3535, "step": 3112 }, { "epoch": 0.3824794200761764, "grad_norm": 0.3120576051026991, "learning_rate": 2.3280321386909203e-05, "loss": 0.4723, "step": 3113 }, { "epoch": 0.38260228529303353, "grad_norm": 0.31448020859919856, "learning_rate": 2.3274956351177037e-05, "loss": 0.4092, "step": 3114 }, { "epoch": 0.38272515050989064, "grad_norm": 0.38972422441123566, "learning_rate": 2.326958979332032e-05, "loss": 0.436, "step": 3115 }, { "epoch": 0.38284801572674776, "grad_norm": 0.32802561114866563, "learning_rate": 2.3264221714326182e-05, "loss": 0.4472, "step": 3116 }, { "epoch": 0.3829708809436049, "grad_norm": 0.4722947603401179, "learning_rate": 2.325885211518206e-05, "loss": 0.4819, "step": 3117 }, { "epoch": 0.383093746160462, "grad_norm": 0.3166466995144076, "learning_rate": 2.3253480996875653e-05, "loss": 0.4156, "step": 3118 }, { "epoch": 0.3832166113773191, "grad_norm": 0.3244328732484131, "learning_rate": 2.3248108360394942e-05, "loss": 0.3711, "step": 3119 }, { "epoch": 0.38333947659417617, "grad_norm": 0.44522781202478273, "learning_rate": 2.3242734206728186e-05, "loss": 0.5042, "step": 3120 }, { "epoch": 0.3834623418110333, "grad_norm": 0.39213840578463327, "learning_rate": 2.323735853686393e-05, "loss": 0.4023, "step": 3121 }, { "epoch": 0.3835852070278904, "grad_norm": 0.3931550226266602, "learning_rate": 2.3231981351790993e-05, "loss": 0.4063, "step": 3122 }, { "epoch": 0.3837080722447475, "grad_norm": 0.3624993861157678, "learning_rate": 2.3226602652498473e-05, "loss": 0.3773, "step": 3123 }, { "epoch": 0.3838309374616046, "grad_norm": 0.33704040409649716, "learning_rate": 2.3221222439975748e-05, "loss": 0.4136, "step": 3124 }, { "epoch": 0.38395380267846174, "grad_norm": 0.3088268219135342, "learning_rate": 2.3215840715212467e-05, "loss": 0.389, "step": 3125 }, { "epoch": 0.38407666789531886, "grad_norm": 0.32593556336252905, "learning_rate": 2.3210457479198573e-05, "loss": 0.345, "step": 3126 }, { "epoch": 0.3841995331121759, "grad_norm": 0.4151187646607327, "learning_rate": 2.3205072732924266e-05, "loss": 0.4605, "step": 3127 }, { "epoch": 0.38432239832903303, "grad_norm": 0.3920477387340558, "learning_rate": 2.3199686477380047e-05, "loss": 0.4227, "step": 3128 }, { "epoch": 0.38444526354589015, "grad_norm": 0.3582396035790398, "learning_rate": 2.3194298713556676e-05, "loss": 0.384, "step": 3129 }, { "epoch": 0.38456812876274726, "grad_norm": 0.39622012030498716, "learning_rate": 2.3188909442445202e-05, "loss": 0.3577, "step": 3130 }, { "epoch": 0.3846909939796044, "grad_norm": 0.42223095814950173, "learning_rate": 2.318351866503694e-05, "loss": 0.4819, "step": 3131 }, { "epoch": 0.3848138591964615, "grad_norm": 0.33843008556348636, "learning_rate": 2.3178126382323488e-05, "loss": 0.404, "step": 3132 }, { "epoch": 0.3849367244133186, "grad_norm": 0.41878766544796275, "learning_rate": 2.3172732595296727e-05, "loss": 0.3671, "step": 3133 }, { "epoch": 0.3850595896301757, "grad_norm": 0.33366194752039235, "learning_rate": 2.316733730494881e-05, "loss": 0.4696, "step": 3134 }, { "epoch": 0.3851824548470328, "grad_norm": 0.5922877234959689, "learning_rate": 2.316194051227216e-05, "loss": 0.4228, "step": 3135 }, { "epoch": 0.3853053200638899, "grad_norm": 0.33970278807164805, "learning_rate": 2.3156542218259485e-05, "loss": 0.3999, "step": 3136 }, { "epoch": 0.385428185280747, "grad_norm": 0.3907364891441635, "learning_rate": 2.3151142423903765e-05, "loss": 0.3991, "step": 3137 }, { "epoch": 0.38555105049760413, "grad_norm": 0.34024314459664945, "learning_rate": 2.314574113019826e-05, "loss": 0.3967, "step": 3138 }, { "epoch": 0.38567391571446125, "grad_norm": 0.3174531953195686, "learning_rate": 2.3140338338136505e-05, "loss": 0.3719, "step": 3139 }, { "epoch": 0.38579678093131836, "grad_norm": 0.36994587587586886, "learning_rate": 2.31349340487123e-05, "loss": 0.3888, "step": 3140 }, { "epoch": 0.3859196461481755, "grad_norm": 0.3403468007625933, "learning_rate": 2.312952826291973e-05, "loss": 0.3583, "step": 3141 }, { "epoch": 0.38604251136503254, "grad_norm": 0.3495201428692366, "learning_rate": 2.3124120981753164e-05, "loss": 0.3902, "step": 3142 }, { "epoch": 0.38616537658188965, "grad_norm": 0.3509888003038599, "learning_rate": 2.311871220620723e-05, "loss": 0.449, "step": 3143 }, { "epoch": 0.38628824179874677, "grad_norm": 0.398400890522723, "learning_rate": 2.3113301937276834e-05, "loss": 0.4068, "step": 3144 }, { "epoch": 0.3864111070156039, "grad_norm": 0.33617552039026805, "learning_rate": 2.310789017595717e-05, "loss": 0.3762, "step": 3145 }, { "epoch": 0.386533972232461, "grad_norm": 0.34314865239655545, "learning_rate": 2.310247692324368e-05, "loss": 0.425, "step": 3146 }, { "epoch": 0.3866568374493181, "grad_norm": 0.30162823963322344, "learning_rate": 2.3097062180132113e-05, "loss": 0.464, "step": 3147 }, { "epoch": 0.38677970266617523, "grad_norm": 0.35543612714039463, "learning_rate": 2.3091645947618463e-05, "loss": 0.3863, "step": 3148 }, { "epoch": 0.3869025678830323, "grad_norm": 0.33281440749720387, "learning_rate": 2.3086228226699023e-05, "loss": 0.3979, "step": 3149 }, { "epoch": 0.3870254330998894, "grad_norm": 0.37669363307632, "learning_rate": 2.3080809018370338e-05, "loss": 0.4031, "step": 3150 }, { "epoch": 0.3871482983167465, "grad_norm": 0.33889301898706914, "learning_rate": 2.3075388323629242e-05, "loss": 0.4435, "step": 3151 }, { "epoch": 0.38727116353360364, "grad_norm": 0.3440716876447767, "learning_rate": 2.3069966143472837e-05, "loss": 0.4125, "step": 3152 }, { "epoch": 0.38739402875046075, "grad_norm": 0.3508629294383446, "learning_rate": 2.3064542478898494e-05, "loss": 0.3551, "step": 3153 }, { "epoch": 0.38751689396731787, "grad_norm": 0.3270051574392312, "learning_rate": 2.305911733090386e-05, "loss": 0.4098, "step": 3154 }, { "epoch": 0.387639759184175, "grad_norm": 0.4507356869143935, "learning_rate": 2.305369070048686e-05, "loss": 0.4305, "step": 3155 }, { "epoch": 0.38776262440103204, "grad_norm": 0.3483740145693897, "learning_rate": 2.304826258864569e-05, "loss": 0.4923, "step": 3156 }, { "epoch": 0.38788548961788916, "grad_norm": 0.37647502247512693, "learning_rate": 2.30428329963788e-05, "loss": 0.3442, "step": 3157 }, { "epoch": 0.38800835483474627, "grad_norm": 0.46489167652199354, "learning_rate": 2.303740192468495e-05, "loss": 0.3847, "step": 3158 }, { "epoch": 0.3881312200516034, "grad_norm": 0.4306251211561787, "learning_rate": 2.3031969374563137e-05, "loss": 0.3887, "step": 3159 }, { "epoch": 0.3882540852684605, "grad_norm": 0.3697401677139183, "learning_rate": 2.302653534701265e-05, "loss": 0.3514, "step": 3160 }, { "epoch": 0.3883769504853176, "grad_norm": 0.3423829988300334, "learning_rate": 2.3021099843033037e-05, "loss": 0.4588, "step": 3161 }, { "epoch": 0.38849981570217473, "grad_norm": 0.37402813776347166, "learning_rate": 2.3015662863624124e-05, "loss": 0.3856, "step": 3162 }, { "epoch": 0.38862268091903185, "grad_norm": 0.36937360502568056, "learning_rate": 2.3010224409786016e-05, "loss": 0.469, "step": 3163 }, { "epoch": 0.3887455461358889, "grad_norm": 0.28179357783679976, "learning_rate": 2.300478448251907e-05, "loss": 0.3806, "step": 3164 }, { "epoch": 0.388868411352746, "grad_norm": 0.31204384677985647, "learning_rate": 2.299934308282393e-05, "loss": 0.4124, "step": 3165 }, { "epoch": 0.38899127656960314, "grad_norm": 0.3685973564208309, "learning_rate": 2.2993900211701516e-05, "loss": 0.4367, "step": 3166 }, { "epoch": 0.38911414178646025, "grad_norm": 0.34394075718272354, "learning_rate": 2.2988455870152995e-05, "loss": 0.3962, "step": 3167 }, { "epoch": 0.38923700700331737, "grad_norm": 0.436586102495231, "learning_rate": 2.2983010059179824e-05, "loss": 0.4458, "step": 3168 }, { "epoch": 0.3893598722201745, "grad_norm": 1.0377987274235576, "learning_rate": 2.2977562779783726e-05, "loss": 0.4234, "step": 3169 }, { "epoch": 0.3894827374370316, "grad_norm": 0.33870136704064435, "learning_rate": 2.297211403296669e-05, "loss": 0.4579, "step": 3170 }, { "epoch": 0.38960560265388866, "grad_norm": 0.3355479320109159, "learning_rate": 2.296666381973098e-05, "loss": 0.4484, "step": 3171 }, { "epoch": 0.3897284678707458, "grad_norm": 0.4037520763875301, "learning_rate": 2.2961212141079123e-05, "loss": 0.4403, "step": 3172 }, { "epoch": 0.3898513330876029, "grad_norm": 0.3830848287637794, "learning_rate": 2.2955758998013924e-05, "loss": 0.4162, "step": 3173 }, { "epoch": 0.38997419830446, "grad_norm": 0.3785082907557491, "learning_rate": 2.2950304391538453e-05, "loss": 0.4279, "step": 3174 }, { "epoch": 0.3900970635213171, "grad_norm": 0.28499662760579203, "learning_rate": 2.2944848322656048e-05, "loss": 0.3887, "step": 3175 }, { "epoch": 0.39021992873817424, "grad_norm": 0.3390017128460267, "learning_rate": 2.2939390792370315e-05, "loss": 0.4284, "step": 3176 }, { "epoch": 0.39034279395503135, "grad_norm": 0.33644233839490006, "learning_rate": 2.2933931801685137e-05, "loss": 0.3372, "step": 3177 }, { "epoch": 0.3904656591718884, "grad_norm": 0.3977730241789634, "learning_rate": 2.292847135160466e-05, "loss": 0.3901, "step": 3178 }, { "epoch": 0.39058852438874553, "grad_norm": 0.47515362653884335, "learning_rate": 2.2923009443133294e-05, "loss": 0.4777, "step": 3179 }, { "epoch": 0.39071138960560264, "grad_norm": 0.3470895745709387, "learning_rate": 2.2917546077275725e-05, "loss": 0.4075, "step": 3180 }, { "epoch": 0.39083425482245976, "grad_norm": 0.3924881914160432, "learning_rate": 2.29120812550369e-05, "loss": 0.4129, "step": 3181 }, { "epoch": 0.3909571200393169, "grad_norm": 0.386128117412506, "learning_rate": 2.290661497742204e-05, "loss": 0.3905, "step": 3182 }, { "epoch": 0.391079985256174, "grad_norm": 0.3253964959183801, "learning_rate": 2.2901147245436635e-05, "loss": 0.3275, "step": 3183 }, { "epoch": 0.3912028504730311, "grad_norm": 0.3638375794747753, "learning_rate": 2.2895678060086432e-05, "loss": 0.5015, "step": 3184 }, { "epoch": 0.39132571568988817, "grad_norm": 0.35794747827650375, "learning_rate": 2.289020742237745e-05, "loss": 0.326, "step": 3185 }, { "epoch": 0.3914485809067453, "grad_norm": 0.36425626718285214, "learning_rate": 2.288473533331599e-05, "loss": 0.3708, "step": 3186 }, { "epoch": 0.3915714461236024, "grad_norm": 0.32936284797152565, "learning_rate": 2.2879261793908596e-05, "loss": 0.4061, "step": 3187 }, { "epoch": 0.3916943113404595, "grad_norm": 0.3477738627796885, "learning_rate": 2.2873786805162096e-05, "loss": 0.3801, "step": 3188 }, { "epoch": 0.3918171765573166, "grad_norm": 0.33473177787935887, "learning_rate": 2.2868310368083578e-05, "loss": 0.3494, "step": 3189 }, { "epoch": 0.39194004177417374, "grad_norm": 0.3095628740198901, "learning_rate": 2.2862832483680392e-05, "loss": 0.4146, "step": 3190 }, { "epoch": 0.39206290699103086, "grad_norm": 0.436730741403244, "learning_rate": 2.2857353152960165e-05, "loss": 0.522, "step": 3191 }, { "epoch": 0.392185772207888, "grad_norm": 0.3748595206299679, "learning_rate": 2.2851872376930777e-05, "loss": 0.3651, "step": 3192 }, { "epoch": 0.39230863742474503, "grad_norm": 0.32454139376335367, "learning_rate": 2.2846390156600395e-05, "loss": 0.4294, "step": 3193 }, { "epoch": 0.39243150264160215, "grad_norm": 0.4112156879565204, "learning_rate": 2.284090649297742e-05, "loss": 0.3724, "step": 3194 }, { "epoch": 0.39255436785845926, "grad_norm": 0.4001597276663543, "learning_rate": 2.2835421387070556e-05, "loss": 0.389, "step": 3195 }, { "epoch": 0.3926772330753164, "grad_norm": 0.3577976975524213, "learning_rate": 2.2829934839888732e-05, "loss": 0.4278, "step": 3196 }, { "epoch": 0.3928000982921735, "grad_norm": 0.3438584580310055, "learning_rate": 2.2824446852441182e-05, "loss": 0.4398, "step": 3197 }, { "epoch": 0.3929229635090306, "grad_norm": 0.40907031643390646, "learning_rate": 2.281895742573737e-05, "loss": 0.3838, "step": 3198 }, { "epoch": 0.3930458287258877, "grad_norm": 0.33642815168376483, "learning_rate": 2.281346656078705e-05, "loss": 0.3603, "step": 3199 }, { "epoch": 0.3931686939427448, "grad_norm": 0.305517392995928, "learning_rate": 2.2807974258600227e-05, "loss": 0.3588, "step": 3200 }, { "epoch": 0.3932915591596019, "grad_norm": 0.38595105707263194, "learning_rate": 2.280248052018718e-05, "loss": 0.421, "step": 3201 }, { "epoch": 0.393414424376459, "grad_norm": 0.35032191235486354, "learning_rate": 2.2796985346558436e-05, "loss": 0.3515, "step": 3202 }, { "epoch": 0.39353728959331613, "grad_norm": 0.3058732728301916, "learning_rate": 2.2791488738724807e-05, "loss": 0.3525, "step": 3203 }, { "epoch": 0.39366015481017325, "grad_norm": 0.309739267724655, "learning_rate": 2.2785990697697353e-05, "loss": 0.3305, "step": 3204 }, { "epoch": 0.39378302002703036, "grad_norm": 0.37821930614322097, "learning_rate": 2.2780491224487402e-05, "loss": 0.4039, "step": 3205 }, { "epoch": 0.3939058852438875, "grad_norm": 0.37665969472143085, "learning_rate": 2.2774990320106552e-05, "loss": 0.4147, "step": 3206 }, { "epoch": 0.39402875046074454, "grad_norm": 0.3868649715031774, "learning_rate": 2.2769487985566653e-05, "loss": 0.4295, "step": 3207 }, { "epoch": 0.39415161567760165, "grad_norm": 0.3234463840886637, "learning_rate": 2.2763984221879827e-05, "loss": 0.3946, "step": 3208 }, { "epoch": 0.39427448089445877, "grad_norm": 0.31731801445224533, "learning_rate": 2.2758479030058453e-05, "loss": 0.3858, "step": 3209 }, { "epoch": 0.3943973461113159, "grad_norm": 0.3480055733050938, "learning_rate": 2.275297241111518e-05, "loss": 0.4616, "step": 3210 }, { "epoch": 0.394520211328173, "grad_norm": 0.31117542620711386, "learning_rate": 2.274746436606291e-05, "loss": 0.3566, "step": 3211 }, { "epoch": 0.3946430765450301, "grad_norm": 0.39075445482630317, "learning_rate": 2.2741954895914813e-05, "loss": 0.4831, "step": 3212 }, { "epoch": 0.39476594176188723, "grad_norm": 0.3221577170721671, "learning_rate": 2.273644400168432e-05, "loss": 0.4253, "step": 3213 }, { "epoch": 0.39488880697874434, "grad_norm": 0.32660642068941403, "learning_rate": 2.273093168438513e-05, "loss": 0.3645, "step": 3214 }, { "epoch": 0.3950116721956014, "grad_norm": 0.3690866720471788, "learning_rate": 2.272541794503119e-05, "loss": 0.3794, "step": 3215 }, { "epoch": 0.3951345374124585, "grad_norm": 0.3507110164277769, "learning_rate": 2.271990278463672e-05, "loss": 0.4206, "step": 3216 }, { "epoch": 0.39525740262931564, "grad_norm": 0.3261631261063418, "learning_rate": 2.27143862042162e-05, "loss": 0.3687, "step": 3217 }, { "epoch": 0.39538026784617275, "grad_norm": 0.29140564910213773, "learning_rate": 2.270886820478437e-05, "loss": 0.4225, "step": 3218 }, { "epoch": 0.39550313306302987, "grad_norm": 0.41415195853274284, "learning_rate": 2.270334878735622e-05, "loss": 0.4112, "step": 3219 }, { "epoch": 0.395625998279887, "grad_norm": 0.31861417754155025, "learning_rate": 2.2697827952947023e-05, "loss": 0.3712, "step": 3220 }, { "epoch": 0.3957488634967441, "grad_norm": 0.36347228393843617, "learning_rate": 2.2692305702572295e-05, "loss": 0.3337, "step": 3221 }, { "epoch": 0.39587172871360116, "grad_norm": 0.33085562136891883, "learning_rate": 2.268678203724782e-05, "loss": 0.4322, "step": 3222 }, { "epoch": 0.39599459393045827, "grad_norm": 0.328151105706588, "learning_rate": 2.268125695798964e-05, "loss": 0.4022, "step": 3223 }, { "epoch": 0.3961174591473154, "grad_norm": 0.4202769616984738, "learning_rate": 2.2675730465814056e-05, "loss": 0.459, "step": 3224 }, { "epoch": 0.3962403243641725, "grad_norm": 0.3351082413239133, "learning_rate": 2.2670202561737635e-05, "loss": 0.4679, "step": 3225 }, { "epoch": 0.3963631895810296, "grad_norm": 0.36441942359017604, "learning_rate": 2.2664673246777197e-05, "loss": 0.4375, "step": 3226 }, { "epoch": 0.39648605479788673, "grad_norm": 0.41579897459307286, "learning_rate": 2.265914252194982e-05, "loss": 0.4905, "step": 3227 }, { "epoch": 0.39660892001474385, "grad_norm": 0.4326493054976862, "learning_rate": 2.2653610388272842e-05, "loss": 0.429, "step": 3228 }, { "epoch": 0.3967317852316009, "grad_norm": 0.43548373194239653, "learning_rate": 2.2648076846763877e-05, "loss": 0.408, "step": 3229 }, { "epoch": 0.396854650448458, "grad_norm": 0.39572592294312514, "learning_rate": 2.2642541898440764e-05, "loss": 0.4477, "step": 3230 }, { "epoch": 0.39697751566531514, "grad_norm": 0.37120479570813714, "learning_rate": 2.2637005544321645e-05, "loss": 0.4041, "step": 3231 }, { "epoch": 0.39710038088217225, "grad_norm": 0.39070310630045835, "learning_rate": 2.2631467785424875e-05, "loss": 0.33, "step": 3232 }, { "epoch": 0.39722324609902937, "grad_norm": 0.39687249959159654, "learning_rate": 2.2625928622769105e-05, "loss": 0.3592, "step": 3233 }, { "epoch": 0.3973461113158865, "grad_norm": 0.3614339945479748, "learning_rate": 2.2620388057373216e-05, "loss": 0.4232, "step": 3234 }, { "epoch": 0.3974689765327436, "grad_norm": 0.2905600305436742, "learning_rate": 2.2614846090256366e-05, "loss": 0.361, "step": 3235 }, { "epoch": 0.39759184174960066, "grad_norm": 0.3722090347857146, "learning_rate": 2.2609302722437958e-05, "loss": 0.396, "step": 3236 }, { "epoch": 0.3977147069664578, "grad_norm": 0.3846683635344256, "learning_rate": 2.2603757954937668e-05, "loss": 0.4459, "step": 3237 }, { "epoch": 0.3978375721833149, "grad_norm": 0.34498716447572153, "learning_rate": 2.259821178877541e-05, "loss": 0.3837, "step": 3238 }, { "epoch": 0.397960437400172, "grad_norm": 0.44675798033290104, "learning_rate": 2.259266422497137e-05, "loss": 0.4031, "step": 3239 }, { "epoch": 0.3980833026170291, "grad_norm": 0.3405937191667073, "learning_rate": 2.2587115264545984e-05, "loss": 0.4275, "step": 3240 }, { "epoch": 0.39820616783388624, "grad_norm": 0.3196717911456246, "learning_rate": 2.2581564908519952e-05, "loss": 0.4711, "step": 3241 }, { "epoch": 0.39832903305074335, "grad_norm": 0.31865838591289813, "learning_rate": 2.2576013157914224e-05, "loss": 0.4334, "step": 3242 }, { "epoch": 0.39845189826760047, "grad_norm": 0.3836684247292872, "learning_rate": 2.2570460013750012e-05, "loss": 0.3677, "step": 3243 }, { "epoch": 0.39857476348445753, "grad_norm": 0.3796858332125201, "learning_rate": 2.2564905477048768e-05, "loss": 0.4249, "step": 3244 }, { "epoch": 0.39869762870131464, "grad_norm": 0.37694485936471345, "learning_rate": 2.2559349548832227e-05, "loss": 0.4137, "step": 3245 }, { "epoch": 0.39882049391817176, "grad_norm": 0.3318558026381408, "learning_rate": 2.2553792230122357e-05, "loss": 0.4001, "step": 3246 }, { "epoch": 0.3989433591350289, "grad_norm": 0.34126093711540517, "learning_rate": 2.25482335219414e-05, "loss": 0.4066, "step": 3247 }, { "epoch": 0.399066224351886, "grad_norm": 0.354729505380283, "learning_rate": 2.2542673425311834e-05, "loss": 0.3923, "step": 3248 }, { "epoch": 0.3991890895687431, "grad_norm": 0.36519168509862515, "learning_rate": 2.2537111941256406e-05, "loss": 0.4362, "step": 3249 }, { "epoch": 0.3993119547856002, "grad_norm": 0.286184848429829, "learning_rate": 2.2531549070798117e-05, "loss": 0.3865, "step": 3250 }, { "epoch": 0.3994348200024573, "grad_norm": 0.462487052572291, "learning_rate": 2.252598481496022e-05, "loss": 0.3953, "step": 3251 }, { "epoch": 0.3995576852193144, "grad_norm": 0.40237341708906826, "learning_rate": 2.252041917476623e-05, "loss": 0.3934, "step": 3252 }, { "epoch": 0.3996805504361715, "grad_norm": 0.3326900189146537, "learning_rate": 2.2514852151239897e-05, "loss": 0.3585, "step": 3253 }, { "epoch": 0.3998034156530286, "grad_norm": 0.3230090445925788, "learning_rate": 2.250928374540525e-05, "loss": 0.3543, "step": 3254 }, { "epoch": 0.39992628086988574, "grad_norm": 0.3807814820954173, "learning_rate": 2.250371395828656e-05, "loss": 0.3607, "step": 3255 }, { "epoch": 0.40004914608674286, "grad_norm": 0.32337549330846904, "learning_rate": 2.2498142790908346e-05, "loss": 0.3894, "step": 3256 }, { "epoch": 0.4001720113036, "grad_norm": 0.33568002999270724, "learning_rate": 2.2492570244295395e-05, "loss": 0.3903, "step": 3257 }, { "epoch": 0.40029487652045703, "grad_norm": 0.3732896587030538, "learning_rate": 2.248699631947274e-05, "loss": 0.476, "step": 3258 }, { "epoch": 0.40041774173731415, "grad_norm": 0.37058117206468794, "learning_rate": 2.2481421017465662e-05, "loss": 0.36, "step": 3259 }, { "epoch": 0.40054060695417126, "grad_norm": 0.37120512159820734, "learning_rate": 2.2475844339299714e-05, "loss": 0.4015, "step": 3260 }, { "epoch": 0.4006634721710284, "grad_norm": 0.38505713615375925, "learning_rate": 2.2470266286000672e-05, "loss": 0.3618, "step": 3261 }, { "epoch": 0.4007863373878855, "grad_norm": 0.322519646490485, "learning_rate": 2.24646868585946e-05, "loss": 0.3949, "step": 3262 }, { "epoch": 0.4009092026047426, "grad_norm": 0.27835625749601123, "learning_rate": 2.2459106058107788e-05, "loss": 0.4039, "step": 3263 }, { "epoch": 0.4010320678215997, "grad_norm": 0.3784545932932979, "learning_rate": 2.2453523885566794e-05, "loss": 0.4076, "step": 3264 }, { "epoch": 0.40115493303845684, "grad_norm": 0.44388517540846134, "learning_rate": 2.244794034199842e-05, "loss": 0.4205, "step": 3265 }, { "epoch": 0.4012777982553139, "grad_norm": 0.46975899704278656, "learning_rate": 2.244235542842972e-05, "loss": 0.4648, "step": 3266 }, { "epoch": 0.401400663472171, "grad_norm": 0.3923855188108955, "learning_rate": 2.2436769145888e-05, "loss": 0.3308, "step": 3267 }, { "epoch": 0.40152352868902813, "grad_norm": 0.37757944583675834, "learning_rate": 2.243118149540083e-05, "loss": 0.3833, "step": 3268 }, { "epoch": 0.40164639390588525, "grad_norm": 0.29623636799899894, "learning_rate": 2.2425592477996012e-05, "loss": 0.4249, "step": 3269 }, { "epoch": 0.40176925912274236, "grad_norm": 0.4767129011019887, "learning_rate": 2.2420002094701615e-05, "loss": 0.4077, "step": 3270 }, { "epoch": 0.4018921243395995, "grad_norm": 0.33195455639436805, "learning_rate": 2.241441034654596e-05, "loss": 0.3645, "step": 3271 }, { "epoch": 0.4020149895564566, "grad_norm": 0.35206490713550215, "learning_rate": 2.24088172345576e-05, "loss": 0.4502, "step": 3272 }, { "epoch": 0.40213785477331365, "grad_norm": 0.3126717001842748, "learning_rate": 2.2403222759765358e-05, "loss": 0.374, "step": 3273 }, { "epoch": 0.40226071999017077, "grad_norm": 0.4404686835784195, "learning_rate": 2.23976269231983e-05, "loss": 0.4911, "step": 3274 }, { "epoch": 0.4023835852070279, "grad_norm": 0.39419564980069105, "learning_rate": 2.239202972588575e-05, "loss": 0.4303, "step": 3275 }, { "epoch": 0.402506450423885, "grad_norm": 0.37472463022817737, "learning_rate": 2.2386431168857263e-05, "loss": 0.3959, "step": 3276 }, { "epoch": 0.4026293156407421, "grad_norm": 0.36403189439636496, "learning_rate": 2.2380831253142673e-05, "loss": 0.4432, "step": 3277 }, { "epoch": 0.40275218085759923, "grad_norm": 0.30519824029181064, "learning_rate": 2.2375229979772034e-05, "loss": 0.3788, "step": 3278 }, { "epoch": 0.40287504607445634, "grad_norm": 0.3386288874612779, "learning_rate": 2.2369627349775673e-05, "loss": 0.3239, "step": 3279 }, { "epoch": 0.4029979112913134, "grad_norm": 0.340334596106095, "learning_rate": 2.2364023364184154e-05, "loss": 0.3402, "step": 3280 }, { "epoch": 0.4031207765081705, "grad_norm": 0.3646642672348313, "learning_rate": 2.2358418024028294e-05, "loss": 0.4161, "step": 3281 }, { "epoch": 0.40324364172502764, "grad_norm": 0.35874121103871504, "learning_rate": 2.2352811330339164e-05, "loss": 0.4278, "step": 3282 }, { "epoch": 0.40336650694188475, "grad_norm": 0.353402404953417, "learning_rate": 2.234720328414807e-05, "loss": 0.3513, "step": 3283 }, { "epoch": 0.40348937215874187, "grad_norm": 0.3558540994610797, "learning_rate": 2.2341593886486584e-05, "loss": 0.3986, "step": 3284 }, { "epoch": 0.403612237375599, "grad_norm": 0.3540504590482026, "learning_rate": 2.2335983138386513e-05, "loss": 0.4065, "step": 3285 }, { "epoch": 0.4037351025924561, "grad_norm": 0.3465831570899703, "learning_rate": 2.2330371040879914e-05, "loss": 0.3505, "step": 3286 }, { "epoch": 0.40385796780931316, "grad_norm": 0.3446370644856155, "learning_rate": 2.232475759499911e-05, "loss": 0.4748, "step": 3287 }, { "epoch": 0.40398083302617027, "grad_norm": 0.33961028590740894, "learning_rate": 2.2319142801776637e-05, "loss": 0.3932, "step": 3288 }, { "epoch": 0.4041036982430274, "grad_norm": 0.3161541330871344, "learning_rate": 2.2313526662245324e-05, "loss": 0.3505, "step": 3289 }, { "epoch": 0.4042265634598845, "grad_norm": 0.29525183254828297, "learning_rate": 2.2307909177438205e-05, "loss": 0.3635, "step": 3290 }, { "epoch": 0.4043494286767416, "grad_norm": 0.44062237768332735, "learning_rate": 2.230229034838859e-05, "loss": 0.4745, "step": 3291 }, { "epoch": 0.40447229389359873, "grad_norm": 0.5236902297371111, "learning_rate": 2.229667017613002e-05, "loss": 0.461, "step": 3292 }, { "epoch": 0.40459515911045585, "grad_norm": 0.4316872448595078, "learning_rate": 2.229104866169629e-05, "loss": 0.3644, "step": 3293 }, { "epoch": 0.40471802432731296, "grad_norm": 0.3548496132811826, "learning_rate": 2.2285425806121446e-05, "loss": 0.4727, "step": 3294 }, { "epoch": 0.40484088954417, "grad_norm": 0.4017189032386928, "learning_rate": 2.2279801610439768e-05, "loss": 0.4354, "step": 3295 }, { "epoch": 0.40496375476102714, "grad_norm": 0.3808085353565046, "learning_rate": 2.22741760756858e-05, "loss": 0.4469, "step": 3296 }, { "epoch": 0.40508661997788425, "grad_norm": 0.3447101999014027, "learning_rate": 2.2268549202894314e-05, "loss": 0.3702, "step": 3297 }, { "epoch": 0.40520948519474137, "grad_norm": 0.3953000200523304, "learning_rate": 2.2262920993100345e-05, "loss": 0.383, "step": 3298 }, { "epoch": 0.4053323504115985, "grad_norm": 0.39854216137803783, "learning_rate": 2.2257291447339157e-05, "loss": 0.3966, "step": 3299 }, { "epoch": 0.4054552156284556, "grad_norm": 0.4068555429004065, "learning_rate": 2.2251660566646275e-05, "loss": 0.4569, "step": 3300 }, { "epoch": 0.4055780808453127, "grad_norm": 0.41960856810354275, "learning_rate": 2.2246028352057457e-05, "loss": 0.4583, "step": 3301 }, { "epoch": 0.4057009460621698, "grad_norm": 0.6881829428012577, "learning_rate": 2.224039480460872e-05, "loss": 0.4064, "step": 3302 }, { "epoch": 0.4058238112790269, "grad_norm": 0.367815934259887, "learning_rate": 2.2234759925336312e-05, "loss": 0.4448, "step": 3303 }, { "epoch": 0.405946676495884, "grad_norm": 0.39216255219181284, "learning_rate": 2.222912371527674e-05, "loss": 0.4674, "step": 3304 }, { "epoch": 0.4060695417127411, "grad_norm": 0.4121813919357509, "learning_rate": 2.2223486175466734e-05, "loss": 0.3893, "step": 3305 }, { "epoch": 0.40619240692959824, "grad_norm": 0.32145977224419703, "learning_rate": 2.2217847306943298e-05, "loss": 0.4006, "step": 3306 }, { "epoch": 0.40631527214645535, "grad_norm": 0.38081147798335424, "learning_rate": 2.2212207110743655e-05, "loss": 0.3793, "step": 3307 }, { "epoch": 0.40643813736331247, "grad_norm": 0.31919095080686644, "learning_rate": 2.220656558790529e-05, "loss": 0.4418, "step": 3308 }, { "epoch": 0.40656100258016953, "grad_norm": 0.43046543463236864, "learning_rate": 2.2200922739465915e-05, "loss": 0.4549, "step": 3309 }, { "epoch": 0.40668386779702664, "grad_norm": 0.3978579685134739, "learning_rate": 2.219527856646351e-05, "loss": 0.4649, "step": 3310 }, { "epoch": 0.40680673301388376, "grad_norm": 0.4314381821949487, "learning_rate": 2.2189633069936273e-05, "loss": 0.4842, "step": 3311 }, { "epoch": 0.4069295982307409, "grad_norm": 0.32097400530916265, "learning_rate": 2.2183986250922663e-05, "loss": 0.361, "step": 3312 }, { "epoch": 0.407052463447598, "grad_norm": 0.3665046749909003, "learning_rate": 2.2178338110461365e-05, "loss": 0.3785, "step": 3313 }, { "epoch": 0.4071753286644551, "grad_norm": 0.4216556387741362, "learning_rate": 2.2172688649591325e-05, "loss": 0.4506, "step": 3314 }, { "epoch": 0.4072981938813122, "grad_norm": 0.38626430229844366, "learning_rate": 2.2167037869351728e-05, "loss": 0.4281, "step": 3315 }, { "epoch": 0.40742105909816934, "grad_norm": 0.33475576436204096, "learning_rate": 2.2161385770781994e-05, "loss": 0.3832, "step": 3316 }, { "epoch": 0.4075439243150264, "grad_norm": 0.42176387692794526, "learning_rate": 2.215573235492179e-05, "loss": 0.3841, "step": 3317 }, { "epoch": 0.4076667895318835, "grad_norm": 0.43603259025588564, "learning_rate": 2.2150077622811024e-05, "loss": 0.4464, "step": 3318 }, { "epoch": 0.4077896547487406, "grad_norm": 0.40891637205680625, "learning_rate": 2.2144421575489853e-05, "loss": 0.3428, "step": 3319 }, { "epoch": 0.40791251996559774, "grad_norm": 0.33507160459232455, "learning_rate": 2.2138764213998666e-05, "loss": 0.3509, "step": 3320 }, { "epoch": 0.40803538518245486, "grad_norm": 0.41095954441017296, "learning_rate": 2.2133105539378103e-05, "loss": 0.4528, "step": 3321 }, { "epoch": 0.408158250399312, "grad_norm": 0.39786497864809045, "learning_rate": 2.212744555266903e-05, "loss": 0.4768, "step": 3322 }, { "epoch": 0.4082811156161691, "grad_norm": 0.38815427542140263, "learning_rate": 2.2121784254912568e-05, "loss": 0.4573, "step": 3323 }, { "epoch": 0.40840398083302615, "grad_norm": 0.3010274715349251, "learning_rate": 2.211612164715008e-05, "loss": 0.3698, "step": 3324 }, { "epoch": 0.40852684604988326, "grad_norm": 0.3265797621336881, "learning_rate": 2.211045773042317e-05, "loss": 0.4088, "step": 3325 }, { "epoch": 0.4086497112667404, "grad_norm": 0.35426189238270467, "learning_rate": 2.2104792505773666e-05, "loss": 0.3772, "step": 3326 }, { "epoch": 0.4087725764835975, "grad_norm": 0.3510610695418674, "learning_rate": 2.209912597424366e-05, "loss": 0.4327, "step": 3327 }, { "epoch": 0.4088954417004546, "grad_norm": 0.4116627705754775, "learning_rate": 2.209345813687547e-05, "loss": 0.3476, "step": 3328 }, { "epoch": 0.4090183069173117, "grad_norm": 0.423943819589827, "learning_rate": 2.208778899471166e-05, "loss": 0.4738, "step": 3329 }, { "epoch": 0.40914117213416884, "grad_norm": 0.37840481174970736, "learning_rate": 2.2082118548795034e-05, "loss": 0.4352, "step": 3330 }, { "epoch": 0.4092640373510259, "grad_norm": 0.36045020291457264, "learning_rate": 2.2076446800168624e-05, "loss": 0.4153, "step": 3331 }, { "epoch": 0.409386902567883, "grad_norm": 0.3149586073383069, "learning_rate": 2.207077374987572e-05, "loss": 0.3209, "step": 3332 }, { "epoch": 0.40950976778474013, "grad_norm": 0.35515105974254174, "learning_rate": 2.2065099398959837e-05, "loss": 0.459, "step": 3333 }, { "epoch": 0.40963263300159725, "grad_norm": 0.39733000179303535, "learning_rate": 2.205942374846474e-05, "loss": 0.4944, "step": 3334 }, { "epoch": 0.40975549821845436, "grad_norm": 0.34691781929571064, "learning_rate": 2.205374679943443e-05, "loss": 0.3242, "step": 3335 }, { "epoch": 0.4098783634353115, "grad_norm": 0.3445330401264485, "learning_rate": 2.2048068552913136e-05, "loss": 0.3868, "step": 3336 }, { "epoch": 0.4100012286521686, "grad_norm": 0.35047100249822033, "learning_rate": 2.204238900994534e-05, "loss": 0.3605, "step": 3337 }, { "epoch": 0.41012409386902565, "grad_norm": 0.27557436536272784, "learning_rate": 2.2036708171575763e-05, "loss": 0.4047, "step": 3338 }, { "epoch": 0.41024695908588277, "grad_norm": 0.3992391540203913, "learning_rate": 2.2031026038849353e-05, "loss": 0.3665, "step": 3339 }, { "epoch": 0.4103698243027399, "grad_norm": 0.31940887972447685, "learning_rate": 2.2025342612811297e-05, "loss": 0.3636, "step": 3340 }, { "epoch": 0.410492689519597, "grad_norm": 0.3829001788089192, "learning_rate": 2.2019657894507027e-05, "loss": 0.3861, "step": 3341 }, { "epoch": 0.4106155547364541, "grad_norm": 0.3937180243258087, "learning_rate": 2.2013971884982212e-05, "loss": 0.4016, "step": 3342 }, { "epoch": 0.41073841995331123, "grad_norm": 0.3675095982646067, "learning_rate": 2.200828458528276e-05, "loss": 0.3653, "step": 3343 }, { "epoch": 0.41086128517016834, "grad_norm": 0.40976763351528955, "learning_rate": 2.2002595996454805e-05, "loss": 0.4387, "step": 3344 }, { "epoch": 0.41098415038702546, "grad_norm": 0.47857535787977207, "learning_rate": 2.199690611954473e-05, "loss": 0.4129, "step": 3345 }, { "epoch": 0.4111070156038825, "grad_norm": 0.3570511270106789, "learning_rate": 2.199121495559915e-05, "loss": 0.3952, "step": 3346 }, { "epoch": 0.41122988082073964, "grad_norm": 0.33928517824458276, "learning_rate": 2.198552250566492e-05, "loss": 0.4044, "step": 3347 }, { "epoch": 0.41135274603759675, "grad_norm": 0.3473905157493431, "learning_rate": 2.197982877078913e-05, "loss": 0.4678, "step": 3348 }, { "epoch": 0.41147561125445387, "grad_norm": 0.4007764667138836, "learning_rate": 2.19741337520191e-05, "loss": 0.4736, "step": 3349 }, { "epoch": 0.411598476471311, "grad_norm": 0.40379182071186814, "learning_rate": 2.19684374504024e-05, "loss": 0.4468, "step": 3350 }, { "epoch": 0.4117213416881681, "grad_norm": 0.3585978092867932, "learning_rate": 2.1962739866986816e-05, "loss": 0.359, "step": 3351 }, { "epoch": 0.4118442069050252, "grad_norm": 0.360779445201696, "learning_rate": 2.195704100282039e-05, "loss": 0.3369, "step": 3352 }, { "epoch": 0.41196707212188227, "grad_norm": 0.26737676640314084, "learning_rate": 2.1951340858951392e-05, "loss": 0.383, "step": 3353 }, { "epoch": 0.4120899373387394, "grad_norm": 0.42776607133428635, "learning_rate": 2.1945639436428324e-05, "loss": 0.459, "step": 3354 }, { "epoch": 0.4122128025555965, "grad_norm": 0.4251813908783074, "learning_rate": 2.1939936736299925e-05, "loss": 0.4139, "step": 3355 }, { "epoch": 0.4123356677724536, "grad_norm": 0.3551642011630559, "learning_rate": 2.1934232759615168e-05, "loss": 0.4254, "step": 3356 }, { "epoch": 0.41245853298931073, "grad_norm": 0.3858431560474538, "learning_rate": 2.192852750742327e-05, "loss": 0.4149, "step": 3357 }, { "epoch": 0.41258139820616785, "grad_norm": 0.4000751692902295, "learning_rate": 2.1922820980773667e-05, "loss": 0.3501, "step": 3358 }, { "epoch": 0.41270426342302496, "grad_norm": 0.3541734091993644, "learning_rate": 2.1917113180716044e-05, "loss": 0.3717, "step": 3359 }, { "epoch": 0.412827128639882, "grad_norm": 0.3942932722826358, "learning_rate": 2.1911404108300307e-05, "loss": 0.3658, "step": 3360 }, { "epoch": 0.41294999385673914, "grad_norm": 0.37243312814244167, "learning_rate": 2.1905693764576608e-05, "loss": 0.4132, "step": 3361 }, { "epoch": 0.41307285907359625, "grad_norm": 0.3413403712685136, "learning_rate": 2.1899982150595324e-05, "loss": 0.4569, "step": 3362 }, { "epoch": 0.41319572429045337, "grad_norm": 0.336534425948276, "learning_rate": 2.189426926740707e-05, "loss": 0.4129, "step": 3363 }, { "epoch": 0.4133185895073105, "grad_norm": 0.4024464209054173, "learning_rate": 2.18885551160627e-05, "loss": 0.4088, "step": 3364 }, { "epoch": 0.4134414547241676, "grad_norm": 0.39730458762869836, "learning_rate": 2.1882839697613286e-05, "loss": 0.405, "step": 3365 }, { "epoch": 0.4135643199410247, "grad_norm": 0.41626850462155424, "learning_rate": 2.1877123013110146e-05, "loss": 0.3845, "step": 3366 }, { "epoch": 0.4136871851578818, "grad_norm": 0.35312219041043846, "learning_rate": 2.187140506360483e-05, "loss": 0.3337, "step": 3367 }, { "epoch": 0.4138100503747389, "grad_norm": 0.5566679105928003, "learning_rate": 2.186568585014912e-05, "loss": 0.4043, "step": 3368 }, { "epoch": 0.413932915591596, "grad_norm": 0.3420321575651186, "learning_rate": 2.1859965373795018e-05, "loss": 0.3786, "step": 3369 }, { "epoch": 0.4140557808084531, "grad_norm": 0.3779634162352052, "learning_rate": 2.185424363559477e-05, "loss": 0.4407, "step": 3370 }, { "epoch": 0.41417864602531024, "grad_norm": 0.3558870668447191, "learning_rate": 2.1848520636600863e-05, "loss": 0.4417, "step": 3371 }, { "epoch": 0.41430151124216735, "grad_norm": 0.4240532892958189, "learning_rate": 2.1842796377865995e-05, "loss": 0.4251, "step": 3372 }, { "epoch": 0.41442437645902447, "grad_norm": 0.3477351387075269, "learning_rate": 2.1837070860443115e-05, "loss": 0.3407, "step": 3373 }, { "epoch": 0.4145472416758816, "grad_norm": 0.3387189359565126, "learning_rate": 2.1831344085385386e-05, "loss": 0.4474, "step": 3374 }, { "epoch": 0.41467010689273864, "grad_norm": 0.42498021033119454, "learning_rate": 2.182561605374622e-05, "loss": 0.4399, "step": 3375 }, { "epoch": 0.41479297210959576, "grad_norm": 0.3394531326828376, "learning_rate": 2.181988676657924e-05, "loss": 0.3745, "step": 3376 }, { "epoch": 0.4149158373264529, "grad_norm": 0.3407416655579041, "learning_rate": 2.1814156224938322e-05, "loss": 0.3817, "step": 3377 }, { "epoch": 0.41503870254331, "grad_norm": 0.3582545718827618, "learning_rate": 2.1808424429877557e-05, "loss": 0.3793, "step": 3378 }, { "epoch": 0.4151615677601671, "grad_norm": 0.32471871469308744, "learning_rate": 2.1802691382451272e-05, "loss": 0.376, "step": 3379 }, { "epoch": 0.4152844329770242, "grad_norm": 0.33605949663331375, "learning_rate": 2.1796957083714022e-05, "loss": 0.3631, "step": 3380 }, { "epoch": 0.41540729819388134, "grad_norm": 0.40163274883023015, "learning_rate": 2.17912215347206e-05, "loss": 0.5228, "step": 3381 }, { "epoch": 0.4155301634107384, "grad_norm": 0.3431228349064354, "learning_rate": 2.1785484736526017e-05, "loss": 0.3627, "step": 3382 }, { "epoch": 0.4156530286275955, "grad_norm": 0.5427848988905593, "learning_rate": 2.1779746690185522e-05, "loss": 0.4738, "step": 3383 }, { "epoch": 0.4157758938444526, "grad_norm": 0.37212667814393213, "learning_rate": 2.1774007396754594e-05, "loss": 0.3706, "step": 3384 }, { "epoch": 0.41589875906130974, "grad_norm": 0.35980458175781865, "learning_rate": 2.1768266857288934e-05, "loss": 0.3765, "step": 3385 }, { "epoch": 0.41602162427816686, "grad_norm": 0.4071359855926031, "learning_rate": 2.176252507284448e-05, "loss": 0.4348, "step": 3386 }, { "epoch": 0.416144489495024, "grad_norm": 0.38519513501020913, "learning_rate": 2.1756782044477397e-05, "loss": 0.4693, "step": 3387 }, { "epoch": 0.4162673547118811, "grad_norm": 0.3539512516965038, "learning_rate": 2.1751037773244075e-05, "loss": 0.3654, "step": 3388 }, { "epoch": 0.41639021992873815, "grad_norm": 0.3164901882381233, "learning_rate": 2.1745292260201137e-05, "loss": 0.3628, "step": 3389 }, { "epoch": 0.41651308514559526, "grad_norm": 0.32921823239663556, "learning_rate": 2.173954550640543e-05, "loss": 0.4033, "step": 3390 }, { "epoch": 0.4166359503624524, "grad_norm": 0.4395618232602935, "learning_rate": 2.1733797512914035e-05, "loss": 0.411, "step": 3391 }, { "epoch": 0.4167588155793095, "grad_norm": 0.33653420410911555, "learning_rate": 2.1728048280784264e-05, "loss": 0.4296, "step": 3392 }, { "epoch": 0.4168816807961666, "grad_norm": 0.4306718049848754, "learning_rate": 2.172229781107364e-05, "loss": 0.4602, "step": 3393 }, { "epoch": 0.4170045460130237, "grad_norm": 0.34563522354921633, "learning_rate": 2.1716546104839928e-05, "loss": 0.4334, "step": 3394 }, { "epoch": 0.41712741122988084, "grad_norm": 0.29457250013844005, "learning_rate": 2.1710793163141117e-05, "loss": 0.3827, "step": 3395 }, { "epoch": 0.41725027644673796, "grad_norm": 0.3669535933305933, "learning_rate": 2.170503898703543e-05, "loss": 0.3941, "step": 3396 }, { "epoch": 0.417373141663595, "grad_norm": 0.376234635601987, "learning_rate": 2.1699283577581302e-05, "loss": 0.3999, "step": 3397 }, { "epoch": 0.41749600688045213, "grad_norm": 0.30352522784725233, "learning_rate": 2.1693526935837405e-05, "loss": 0.47, "step": 3398 }, { "epoch": 0.41761887209730925, "grad_norm": 0.39988617715674507, "learning_rate": 2.168776906286264e-05, "loss": 0.3813, "step": 3399 }, { "epoch": 0.41774173731416636, "grad_norm": 0.36419273177179323, "learning_rate": 2.1682009959716127e-05, "loss": 0.3927, "step": 3400 }, { "epoch": 0.4178646025310235, "grad_norm": 0.339261176808415, "learning_rate": 2.1676249627457218e-05, "loss": 0.3698, "step": 3401 }, { "epoch": 0.4179874677478806, "grad_norm": 0.34620414598993904, "learning_rate": 2.167048806714548e-05, "loss": 0.3677, "step": 3402 }, { "epoch": 0.4181103329647377, "grad_norm": 0.4249540174692474, "learning_rate": 2.1664725279840727e-05, "loss": 0.3651, "step": 3403 }, { "epoch": 0.41823319818159477, "grad_norm": 0.6308950217382037, "learning_rate": 2.1658961266602984e-05, "loss": 0.3271, "step": 3404 }, { "epoch": 0.4183560633984519, "grad_norm": 0.4209290967799896, "learning_rate": 2.1653196028492495e-05, "loss": 0.4301, "step": 3405 }, { "epoch": 0.418478928615309, "grad_norm": 0.3661013061216894, "learning_rate": 2.1647429566569745e-05, "loss": 0.4868, "step": 3406 }, { "epoch": 0.4186017938321661, "grad_norm": 0.5060365835513152, "learning_rate": 2.164166188189544e-05, "loss": 0.4802, "step": 3407 }, { "epoch": 0.41872465904902323, "grad_norm": 0.4253179788957432, "learning_rate": 2.16358929755305e-05, "loss": 0.4429, "step": 3408 }, { "epoch": 0.41884752426588034, "grad_norm": 0.405675345428404, "learning_rate": 2.1630122848536087e-05, "loss": 0.3828, "step": 3409 }, { "epoch": 0.41897038948273746, "grad_norm": 0.3435272417963858, "learning_rate": 2.162435150197357e-05, "loss": 0.3705, "step": 3410 }, { "epoch": 0.4190932546995945, "grad_norm": 0.37385819653637076, "learning_rate": 2.1618578936904552e-05, "loss": 0.4694, "step": 3411 }, { "epoch": 0.41921611991645163, "grad_norm": 0.3398802865402088, "learning_rate": 2.1612805154390868e-05, "loss": 0.3472, "step": 3412 }, { "epoch": 0.41933898513330875, "grad_norm": 0.33371777066148967, "learning_rate": 2.160703015549456e-05, "loss": 0.4889, "step": 3413 }, { "epoch": 0.41946185035016587, "grad_norm": 0.34056143065268407, "learning_rate": 2.1601253941277906e-05, "loss": 0.4269, "step": 3414 }, { "epoch": 0.419584715567023, "grad_norm": 0.37053590309086065, "learning_rate": 2.1595476512803397e-05, "loss": 0.3576, "step": 3415 }, { "epoch": 0.4197075807838801, "grad_norm": 0.32338496951224416, "learning_rate": 2.158969787113375e-05, "loss": 0.3485, "step": 3416 }, { "epoch": 0.4198304460007372, "grad_norm": 0.34327194758224133, "learning_rate": 2.1583918017331925e-05, "loss": 0.4733, "step": 3417 }, { "epoch": 0.41995331121759427, "grad_norm": 0.35784042788711495, "learning_rate": 2.1578136952461073e-05, "loss": 0.3802, "step": 3418 }, { "epoch": 0.4200761764344514, "grad_norm": 0.37143350114672713, "learning_rate": 2.157235467758459e-05, "loss": 0.5786, "step": 3419 }, { "epoch": 0.4201990416513085, "grad_norm": 0.3236014004200892, "learning_rate": 2.156657119376609e-05, "loss": 0.3392, "step": 3420 }, { "epoch": 0.4203219068681656, "grad_norm": 0.3516673420771568, "learning_rate": 2.1560786502069398e-05, "loss": 0.416, "step": 3421 }, { "epoch": 0.42044477208502273, "grad_norm": 0.39257711813881846, "learning_rate": 2.1555000603558588e-05, "loss": 0.4844, "step": 3422 }, { "epoch": 0.42056763730187985, "grad_norm": 0.3557932739897038, "learning_rate": 2.154921349929792e-05, "loss": 0.5549, "step": 3423 }, { "epoch": 0.42069050251873696, "grad_norm": 0.3041169134283566, "learning_rate": 2.1543425190351908e-05, "loss": 0.4008, "step": 3424 }, { "epoch": 0.4208133677355941, "grad_norm": 0.35536605183309755, "learning_rate": 2.153763567778526e-05, "loss": 0.3984, "step": 3425 }, { "epoch": 0.42093623295245114, "grad_norm": 0.3746224563066622, "learning_rate": 2.1531844962662933e-05, "loss": 0.3957, "step": 3426 }, { "epoch": 0.42105909816930825, "grad_norm": 0.39886980878034334, "learning_rate": 2.152605304605008e-05, "loss": 0.4689, "step": 3427 }, { "epoch": 0.42118196338616537, "grad_norm": 0.39121515999653933, "learning_rate": 2.15202599290121e-05, "loss": 0.3702, "step": 3428 }, { "epoch": 0.4213048286030225, "grad_norm": 0.48462213078119476, "learning_rate": 2.1514465612614583e-05, "loss": 0.3395, "step": 3429 }, { "epoch": 0.4214276938198796, "grad_norm": 0.3791064580890338, "learning_rate": 2.150867009792337e-05, "loss": 0.3996, "step": 3430 }, { "epoch": 0.4215505590367367, "grad_norm": 0.5037808987039685, "learning_rate": 2.1502873386004498e-05, "loss": 0.4611, "step": 3431 }, { "epoch": 0.42167342425359383, "grad_norm": 0.33478245174888965, "learning_rate": 2.1497075477924245e-05, "loss": 0.4105, "step": 3432 }, { "epoch": 0.4217962894704509, "grad_norm": 0.3501077525669074, "learning_rate": 2.149127637474909e-05, "loss": 0.359, "step": 3433 }, { "epoch": 0.421919154687308, "grad_norm": 0.4383096042587205, "learning_rate": 2.1485476077545745e-05, "loss": 0.444, "step": 3434 }, { "epoch": 0.4220420199041651, "grad_norm": 0.3530816282192209, "learning_rate": 2.1479674587381136e-05, "loss": 0.4391, "step": 3435 }, { "epoch": 0.42216488512102224, "grad_norm": 0.4017401277884512, "learning_rate": 2.1473871905322406e-05, "loss": 0.3744, "step": 3436 }, { "epoch": 0.42228775033787935, "grad_norm": 0.377021087751376, "learning_rate": 2.146806803243692e-05, "loss": 0.4294, "step": 3437 }, { "epoch": 0.42241061555473647, "grad_norm": 0.48889521475720155, "learning_rate": 2.1462262969792272e-05, "loss": 0.4796, "step": 3438 }, { "epoch": 0.4225334807715936, "grad_norm": 0.4062413464556399, "learning_rate": 2.1456456718456256e-05, "loss": 0.4295, "step": 3439 }, { "epoch": 0.42265634598845064, "grad_norm": 0.33359852239482807, "learning_rate": 2.1450649279496903e-05, "loss": 0.4314, "step": 3440 }, { "epoch": 0.42277921120530776, "grad_norm": 0.3751629248136066, "learning_rate": 2.1444840653982447e-05, "loss": 0.4066, "step": 3441 }, { "epoch": 0.4229020764221649, "grad_norm": 0.38733124085208415, "learning_rate": 2.143903084298135e-05, "loss": 0.4428, "step": 3442 }, { "epoch": 0.423024941639022, "grad_norm": 0.307013786725959, "learning_rate": 2.1433219847562287e-05, "loss": 0.3254, "step": 3443 }, { "epoch": 0.4231478068558791, "grad_norm": 0.3646334858347636, "learning_rate": 2.1427407668794152e-05, "loss": 0.4327, "step": 3444 }, { "epoch": 0.4232706720727362, "grad_norm": 0.3546354811282887, "learning_rate": 2.1421594307746062e-05, "loss": 0.4746, "step": 3445 }, { "epoch": 0.42339353728959334, "grad_norm": 0.4616197996338231, "learning_rate": 2.1415779765487342e-05, "loss": 0.3969, "step": 3446 }, { "epoch": 0.42351640250645045, "grad_norm": 0.3269468447583885, "learning_rate": 2.1409964043087548e-05, "loss": 0.3917, "step": 3447 }, { "epoch": 0.4236392677233075, "grad_norm": 0.456375636926491, "learning_rate": 2.140414714161643e-05, "loss": 0.3806, "step": 3448 }, { "epoch": 0.4237621329401646, "grad_norm": 0.4081319340984538, "learning_rate": 2.1398329062143982e-05, "loss": 0.3602, "step": 3449 }, { "epoch": 0.42388499815702174, "grad_norm": 0.3623619611410681, "learning_rate": 2.1392509805740396e-05, "loss": 0.3851, "step": 3450 }, { "epoch": 0.42400786337387886, "grad_norm": 0.32278497457745964, "learning_rate": 2.138668937347609e-05, "loss": 0.4147, "step": 3451 }, { "epoch": 0.424130728590736, "grad_norm": 0.34245353784430443, "learning_rate": 2.1380867766421693e-05, "loss": 0.4128, "step": 3452 }, { "epoch": 0.4242535938075931, "grad_norm": 0.3295847978851934, "learning_rate": 2.137504498564805e-05, "loss": 0.3866, "step": 3453 }, { "epoch": 0.4243764590244502, "grad_norm": 0.41874721665808584, "learning_rate": 2.136922103222623e-05, "loss": 0.3972, "step": 3454 }, { "epoch": 0.42449932424130726, "grad_norm": 0.3947696931047465, "learning_rate": 2.1363395907227502e-05, "loss": 0.3518, "step": 3455 }, { "epoch": 0.4246221894581644, "grad_norm": 0.36095205452865137, "learning_rate": 2.1357569611723365e-05, "loss": 0.3867, "step": 3456 }, { "epoch": 0.4247450546750215, "grad_norm": 0.3501262004107321, "learning_rate": 2.135174214678553e-05, "loss": 0.3768, "step": 3457 }, { "epoch": 0.4248679198918786, "grad_norm": 0.36510602860036856, "learning_rate": 2.134591351348592e-05, "loss": 0.339, "step": 3458 }, { "epoch": 0.4249907851087357, "grad_norm": 0.3982505626832187, "learning_rate": 2.1340083712896674e-05, "loss": 0.3477, "step": 3459 }, { "epoch": 0.42511365032559284, "grad_norm": 0.379702862565367, "learning_rate": 2.1334252746090142e-05, "loss": 0.3577, "step": 3460 }, { "epoch": 0.42523651554244996, "grad_norm": 0.34600948113237945, "learning_rate": 2.1328420614138903e-05, "loss": 0.4029, "step": 3461 }, { "epoch": 0.425359380759307, "grad_norm": 0.4038514777835777, "learning_rate": 2.1322587318115728e-05, "loss": 0.4463, "step": 3462 }, { "epoch": 0.42548224597616413, "grad_norm": 0.39143337269635253, "learning_rate": 2.131675285909362e-05, "loss": 0.3538, "step": 3463 }, { "epoch": 0.42560511119302125, "grad_norm": 0.39179683380466784, "learning_rate": 2.1310917238145793e-05, "loss": 0.3811, "step": 3464 }, { "epoch": 0.42572797640987836, "grad_norm": 0.30709030317768354, "learning_rate": 2.130508045634566e-05, "loss": 0.3493, "step": 3465 }, { "epoch": 0.4258508416267355, "grad_norm": 0.34824223082181455, "learning_rate": 2.1299242514766875e-05, "loss": 0.469, "step": 3466 }, { "epoch": 0.4259737068435926, "grad_norm": 0.37193171378812606, "learning_rate": 2.1293403414483277e-05, "loss": 0.4085, "step": 3467 }, { "epoch": 0.4260965720604497, "grad_norm": 0.4302749655774069, "learning_rate": 2.128756315656894e-05, "loss": 0.4167, "step": 3468 }, { "epoch": 0.42621943727730677, "grad_norm": 0.3282609062425896, "learning_rate": 2.128172174209813e-05, "loss": 0.4348, "step": 3469 }, { "epoch": 0.4263423024941639, "grad_norm": 0.30586708710367905, "learning_rate": 2.127587917214535e-05, "loss": 0.3292, "step": 3470 }, { "epoch": 0.426465167711021, "grad_norm": 0.37435801444889477, "learning_rate": 2.127003544778529e-05, "loss": 0.3945, "step": 3471 }, { "epoch": 0.4265880329278781, "grad_norm": 0.37866780372601805, "learning_rate": 2.126419057009288e-05, "loss": 0.4734, "step": 3472 }, { "epoch": 0.42671089814473523, "grad_norm": 0.40183486443920907, "learning_rate": 2.1258344540143234e-05, "loss": 0.3612, "step": 3473 }, { "epoch": 0.42683376336159234, "grad_norm": 0.3140898933027728, "learning_rate": 2.1252497359011698e-05, "loss": 0.386, "step": 3474 }, { "epoch": 0.42695662857844946, "grad_norm": 0.43519503153485295, "learning_rate": 2.1246649027773815e-05, "loss": 0.4035, "step": 3475 }, { "epoch": 0.4270794937953066, "grad_norm": 0.32733197675383713, "learning_rate": 2.1240799547505365e-05, "loss": 0.3397, "step": 3476 }, { "epoch": 0.42720235901216363, "grad_norm": 0.37426550393363195, "learning_rate": 2.1234948919282303e-05, "loss": 0.4278, "step": 3477 }, { "epoch": 0.42732522422902075, "grad_norm": 0.3553844911976568, "learning_rate": 2.1229097144180832e-05, "loss": 0.4158, "step": 3478 }, { "epoch": 0.42744808944587787, "grad_norm": 0.3404673261301082, "learning_rate": 2.122324422327733e-05, "loss": 0.3361, "step": 3479 }, { "epoch": 0.427570954662735, "grad_norm": 0.3569236745616214, "learning_rate": 2.1217390157648414e-05, "loss": 0.3528, "step": 3480 }, { "epoch": 0.4276938198795921, "grad_norm": 0.37744442782265025, "learning_rate": 2.1211534948370903e-05, "loss": 0.4202, "step": 3481 }, { "epoch": 0.4278166850964492, "grad_norm": 0.41378943577045324, "learning_rate": 2.1205678596521817e-05, "loss": 0.4683, "step": 3482 }, { "epoch": 0.4279395503133063, "grad_norm": 0.40048052706961706, "learning_rate": 2.1199821103178402e-05, "loss": 0.3808, "step": 3483 }, { "epoch": 0.4280624155301634, "grad_norm": 0.3727945987467886, "learning_rate": 2.11939624694181e-05, "loss": 0.3435, "step": 3484 }, { "epoch": 0.4281852807470205, "grad_norm": 0.3783829267704779, "learning_rate": 2.1188102696318573e-05, "loss": 0.4425, "step": 3485 }, { "epoch": 0.4283081459638776, "grad_norm": 0.36936338493095017, "learning_rate": 2.118224178495768e-05, "loss": 0.4039, "step": 3486 }, { "epoch": 0.42843101118073473, "grad_norm": 0.33887694054714795, "learning_rate": 2.1176379736413513e-05, "loss": 0.3287, "step": 3487 }, { "epoch": 0.42855387639759185, "grad_norm": 0.3646759121692524, "learning_rate": 2.1170516551764343e-05, "loss": 0.3721, "step": 3488 }, { "epoch": 0.42867674161444896, "grad_norm": 0.4230658010518359, "learning_rate": 2.1164652232088674e-05, "loss": 0.418, "step": 3489 }, { "epoch": 0.4287996068313061, "grad_norm": 0.4074848005309181, "learning_rate": 2.1158786778465206e-05, "loss": 0.4748, "step": 3490 }, { "epoch": 0.42892247204816314, "grad_norm": 0.3680402357198204, "learning_rate": 2.1152920191972848e-05, "loss": 0.4461, "step": 3491 }, { "epoch": 0.42904533726502025, "grad_norm": 0.3191578969774342, "learning_rate": 2.1147052473690726e-05, "loss": 0.4097, "step": 3492 }, { "epoch": 0.42916820248187737, "grad_norm": 0.4116821190755898, "learning_rate": 2.1141183624698166e-05, "loss": 0.3932, "step": 3493 }, { "epoch": 0.4292910676987345, "grad_norm": 0.3941292911348446, "learning_rate": 2.1135313646074702e-05, "loss": 0.411, "step": 3494 }, { "epoch": 0.4294139329155916, "grad_norm": 0.3767624333508222, "learning_rate": 2.1129442538900087e-05, "loss": 0.4026, "step": 3495 }, { "epoch": 0.4295367981324487, "grad_norm": 0.3209350213225189, "learning_rate": 2.1123570304254265e-05, "loss": 0.3825, "step": 3496 }, { "epoch": 0.42965966334930583, "grad_norm": 0.29382458930169647, "learning_rate": 2.11176969432174e-05, "loss": 0.3713, "step": 3497 }, { "epoch": 0.42978252856616295, "grad_norm": 0.4291103493202486, "learning_rate": 2.1111822456869853e-05, "loss": 0.3861, "step": 3498 }, { "epoch": 0.42990539378302, "grad_norm": 0.3188190098967969, "learning_rate": 2.1105946846292207e-05, "loss": 0.4129, "step": 3499 }, { "epoch": 0.4300282589998771, "grad_norm": 0.3681704849063503, "learning_rate": 2.1100070112565237e-05, "loss": 0.4639, "step": 3500 }, { "epoch": 0.43015112421673424, "grad_norm": 0.4253703823272596, "learning_rate": 2.1094192256769927e-05, "loss": 0.4562, "step": 3501 }, { "epoch": 0.43027398943359135, "grad_norm": 0.3218973978419307, "learning_rate": 2.108831327998747e-05, "loss": 0.4409, "step": 3502 }, { "epoch": 0.43039685465044847, "grad_norm": 0.3741071371763972, "learning_rate": 2.108243318329928e-05, "loss": 0.3761, "step": 3503 }, { "epoch": 0.4305197198673056, "grad_norm": 0.35049409294115214, "learning_rate": 2.107655196778694e-05, "loss": 0.4, "step": 3504 }, { "epoch": 0.4306425850841627, "grad_norm": 0.43172987786003797, "learning_rate": 2.1070669634532276e-05, "loss": 0.447, "step": 3505 }, { "epoch": 0.43076545030101976, "grad_norm": 0.34073459112768734, "learning_rate": 2.1064786184617306e-05, "loss": 0.3446, "step": 3506 }, { "epoch": 0.4308883155178769, "grad_norm": 0.41368754154365994, "learning_rate": 2.1058901619124247e-05, "loss": 0.3816, "step": 3507 }, { "epoch": 0.431011180734734, "grad_norm": 0.43513861053507064, "learning_rate": 2.1053015939135533e-05, "loss": 0.476, "step": 3508 }, { "epoch": 0.4311340459515911, "grad_norm": 0.387842227874886, "learning_rate": 2.1047129145733787e-05, "loss": 0.4508, "step": 3509 }, { "epoch": 0.4312569111684482, "grad_norm": 0.34186504900612447, "learning_rate": 2.1041241240001856e-05, "loss": 0.4671, "step": 3510 }, { "epoch": 0.43137977638530534, "grad_norm": 0.3709283246514616, "learning_rate": 2.1035352223022773e-05, "loss": 0.4474, "step": 3511 }, { "epoch": 0.43150264160216245, "grad_norm": 0.4019391430752559, "learning_rate": 2.1029462095879795e-05, "loss": 0.3424, "step": 3512 }, { "epoch": 0.4316255068190195, "grad_norm": 0.3416699778841241, "learning_rate": 2.1023570859656358e-05, "loss": 0.3944, "step": 3513 }, { "epoch": 0.4317483720358766, "grad_norm": 0.29850042284221145, "learning_rate": 2.1017678515436134e-05, "loss": 0.3868, "step": 3514 }, { "epoch": 0.43187123725273374, "grad_norm": 0.3488161772566983, "learning_rate": 2.1011785064302967e-05, "loss": 0.348, "step": 3515 }, { "epoch": 0.43199410246959086, "grad_norm": 0.40144135249519286, "learning_rate": 2.100589050734093e-05, "loss": 0.4373, "step": 3516 }, { "epoch": 0.432116967686448, "grad_norm": 0.3805169605371665, "learning_rate": 2.0999994845634285e-05, "loss": 0.4009, "step": 3517 }, { "epoch": 0.4322398329033051, "grad_norm": 0.36390011324128124, "learning_rate": 2.0994098080267496e-05, "loss": 0.4011, "step": 3518 }, { "epoch": 0.4323626981201622, "grad_norm": 0.38860718161378766, "learning_rate": 2.0988200212325237e-05, "loss": 0.406, "step": 3519 }, { "epoch": 0.43248556333701926, "grad_norm": 0.2992369304919244, "learning_rate": 2.0982301242892386e-05, "loss": 0.3518, "step": 3520 }, { "epoch": 0.4326084285538764, "grad_norm": 0.37430075240476096, "learning_rate": 2.0976401173054016e-05, "loss": 0.4237, "step": 3521 }, { "epoch": 0.4327312937707335, "grad_norm": 0.37508152206647843, "learning_rate": 2.0970500003895408e-05, "loss": 0.4278, "step": 3522 }, { "epoch": 0.4328541589875906, "grad_norm": 0.32826102050193634, "learning_rate": 2.0964597736502043e-05, "loss": 0.2999, "step": 3523 }, { "epoch": 0.4329770242044477, "grad_norm": 0.3352656226722906, "learning_rate": 2.0958694371959614e-05, "loss": 0.4708, "step": 3524 }, { "epoch": 0.43309988942130484, "grad_norm": 0.36399750804482417, "learning_rate": 2.095278991135399e-05, "loss": 0.4499, "step": 3525 }, { "epoch": 0.43322275463816196, "grad_norm": 0.37559999014452167, "learning_rate": 2.0946884355771274e-05, "loss": 0.3998, "step": 3526 }, { "epoch": 0.43334561985501907, "grad_norm": 0.4024130924798178, "learning_rate": 2.0940977706297747e-05, "loss": 0.3903, "step": 3527 }, { "epoch": 0.43346848507187613, "grad_norm": 0.3361026246358812, "learning_rate": 2.0935069964019897e-05, "loss": 0.3922, "step": 3528 }, { "epoch": 0.43359135028873325, "grad_norm": 0.36561287449250424, "learning_rate": 2.0929161130024415e-05, "loss": 0.4033, "step": 3529 }, { "epoch": 0.43371421550559036, "grad_norm": 0.32242506954200506, "learning_rate": 2.0923251205398198e-05, "loss": 0.3747, "step": 3530 }, { "epoch": 0.4338370807224475, "grad_norm": 0.3976692022820012, "learning_rate": 2.0917340191228337e-05, "loss": 0.4161, "step": 3531 }, { "epoch": 0.4339599459393046, "grad_norm": 0.4464450858379828, "learning_rate": 2.091142808860212e-05, "loss": 0.4419, "step": 3532 }, { "epoch": 0.4340828111561617, "grad_norm": 0.3013332065637877, "learning_rate": 2.0905514898607045e-05, "loss": 0.3564, "step": 3533 }, { "epoch": 0.4342056763730188, "grad_norm": 0.385579124719164, "learning_rate": 2.0899600622330802e-05, "loss": 0.4073, "step": 3534 }, { "epoch": 0.4343285415898759, "grad_norm": 0.47246195645091277, "learning_rate": 2.0893685260861288e-05, "loss": 0.3294, "step": 3535 }, { "epoch": 0.434451406806733, "grad_norm": 0.37022176841538545, "learning_rate": 2.0887768815286585e-05, "loss": 0.5184, "step": 3536 }, { "epoch": 0.4345742720235901, "grad_norm": 0.3666290999405796, "learning_rate": 2.0881851286694998e-05, "loss": 0.3835, "step": 3537 }, { "epoch": 0.43469713724044723, "grad_norm": 0.3653645102713486, "learning_rate": 2.0875932676175013e-05, "loss": 0.4114, "step": 3538 }, { "epoch": 0.43482000245730434, "grad_norm": 0.3014623994434132, "learning_rate": 2.0870012984815312e-05, "loss": 0.3647, "step": 3539 }, { "epoch": 0.43494286767416146, "grad_norm": 0.39714853178559756, "learning_rate": 2.0864092213704797e-05, "loss": 0.4083, "step": 3540 }, { "epoch": 0.4350657328910186, "grad_norm": 0.3388074463026612, "learning_rate": 2.0858170363932545e-05, "loss": 0.3551, "step": 3541 }, { "epoch": 0.43518859810787563, "grad_norm": 0.5403764570922317, "learning_rate": 2.0852247436587847e-05, "loss": 0.4906, "step": 3542 }, { "epoch": 0.43531146332473275, "grad_norm": 0.38434116827782705, "learning_rate": 2.0846323432760192e-05, "loss": 0.4114, "step": 3543 }, { "epoch": 0.43543432854158987, "grad_norm": 0.3149387763666705, "learning_rate": 2.084039835353925e-05, "loss": 0.4137, "step": 3544 }, { "epoch": 0.435557193758447, "grad_norm": 0.2926309159413023, "learning_rate": 2.0834472200014906e-05, "loss": 0.4144, "step": 3545 }, { "epoch": 0.4356800589753041, "grad_norm": 0.30887253188757924, "learning_rate": 2.0828544973277244e-05, "loss": 0.3918, "step": 3546 }, { "epoch": 0.4358029241921612, "grad_norm": 0.3595644252770545, "learning_rate": 2.0822616674416533e-05, "loss": 0.3982, "step": 3547 }, { "epoch": 0.4359257894090183, "grad_norm": 0.3177510682624003, "learning_rate": 2.0816687304523243e-05, "loss": 0.411, "step": 3548 }, { "epoch": 0.4360486546258754, "grad_norm": 0.3932015199785359, "learning_rate": 2.0810756864688045e-05, "loss": 0.4879, "step": 3549 }, { "epoch": 0.4361715198427325, "grad_norm": 0.2741516357398603, "learning_rate": 2.080482535600181e-05, "loss": 0.355, "step": 3550 }, { "epoch": 0.4362943850595896, "grad_norm": 0.33215564745334086, "learning_rate": 2.0798892779555592e-05, "loss": 0.4176, "step": 3551 }, { "epoch": 0.43641725027644673, "grad_norm": 0.33180849532805057, "learning_rate": 2.079295913644066e-05, "loss": 0.3867, "step": 3552 }, { "epoch": 0.43654011549330385, "grad_norm": 0.33452283722283876, "learning_rate": 2.0787024427748455e-05, "loss": 0.3666, "step": 3553 }, { "epoch": 0.43666298071016096, "grad_norm": 0.3801129020594995, "learning_rate": 2.078108865457064e-05, "loss": 0.4344, "step": 3554 }, { "epoch": 0.4367858459270181, "grad_norm": 0.3474519904021553, "learning_rate": 2.0775151817999063e-05, "loss": 0.44, "step": 3555 }, { "epoch": 0.4369087111438752, "grad_norm": 0.2770261393347774, "learning_rate": 2.0769213919125764e-05, "loss": 0.3217, "step": 3556 }, { "epoch": 0.43703157636073225, "grad_norm": 0.3075241766025102, "learning_rate": 2.0763274959042972e-05, "loss": 0.341, "step": 3557 }, { "epoch": 0.43715444157758937, "grad_norm": 0.3407320059549603, "learning_rate": 2.0757334938843135e-05, "loss": 0.3897, "step": 3558 }, { "epoch": 0.4372773067944465, "grad_norm": 0.33031595084038184, "learning_rate": 2.075139385961886e-05, "loss": 0.3439, "step": 3559 }, { "epoch": 0.4374001720113036, "grad_norm": 0.2937683635524918, "learning_rate": 2.0745451722462996e-05, "loss": 0.4147, "step": 3560 }, { "epoch": 0.4375230372281607, "grad_norm": 0.3593834456364729, "learning_rate": 2.0739508528468544e-05, "loss": 0.4134, "step": 3561 }, { "epoch": 0.43764590244501783, "grad_norm": 0.40577850450845476, "learning_rate": 2.0733564278728723e-05, "loss": 0.4274, "step": 3562 }, { "epoch": 0.43776876766187495, "grad_norm": 0.40143740429303115, "learning_rate": 2.072761897433693e-05, "loss": 0.4306, "step": 3563 }, { "epoch": 0.437891632878732, "grad_norm": 0.3523757232750293, "learning_rate": 2.072167261638678e-05, "loss": 0.3294, "step": 3564 }, { "epoch": 0.4380144980955891, "grad_norm": 0.40594987383903003, "learning_rate": 2.0715725205972054e-05, "loss": 0.3698, "step": 3565 }, { "epoch": 0.43813736331244624, "grad_norm": 0.3185249391661298, "learning_rate": 2.070977674418675e-05, "loss": 0.4509, "step": 3566 }, { "epoch": 0.43826022852930335, "grad_norm": 0.4104012546800591, "learning_rate": 2.0703827232125033e-05, "loss": 0.4181, "step": 3567 }, { "epoch": 0.43838309374616047, "grad_norm": 0.32494553651592417, "learning_rate": 2.069787667088129e-05, "loss": 0.4454, "step": 3568 }, { "epoch": 0.4385059589630176, "grad_norm": 0.3749583801287379, "learning_rate": 2.069192506155009e-05, "loss": 0.3573, "step": 3569 }, { "epoch": 0.4386288241798747, "grad_norm": 0.45979616499825493, "learning_rate": 2.068597240522618e-05, "loss": 0.5132, "step": 3570 }, { "epoch": 0.43875168939673176, "grad_norm": 0.3201875731948442, "learning_rate": 2.068001870300453e-05, "loss": 0.3628, "step": 3571 }, { "epoch": 0.4388745546135889, "grad_norm": 0.3112269634701783, "learning_rate": 2.067406395598027e-05, "loss": 0.3752, "step": 3572 }, { "epoch": 0.438997419830446, "grad_norm": 0.33163125685536404, "learning_rate": 2.0668108165248747e-05, "loss": 0.3298, "step": 3573 }, { "epoch": 0.4391202850473031, "grad_norm": 0.36587401545019765, "learning_rate": 2.0662151331905486e-05, "loss": 0.4077, "step": 3574 }, { "epoch": 0.4392431502641602, "grad_norm": 0.38678082983408285, "learning_rate": 2.0656193457046206e-05, "loss": 0.3165, "step": 3575 }, { "epoch": 0.43936601548101734, "grad_norm": 0.37820763536173474, "learning_rate": 2.065023454176682e-05, "loss": 0.4453, "step": 3576 }, { "epoch": 0.43948888069787445, "grad_norm": 0.4289806694057173, "learning_rate": 2.064427458716344e-05, "loss": 0.4869, "step": 3577 }, { "epoch": 0.43961174591473157, "grad_norm": 0.4528634541267139, "learning_rate": 2.0638313594332344e-05, "loss": 0.3457, "step": 3578 }, { "epoch": 0.4397346111315886, "grad_norm": 0.33223010155445065, "learning_rate": 2.0632351564370035e-05, "loss": 0.3484, "step": 3579 }, { "epoch": 0.43985747634844574, "grad_norm": 0.29793082304411045, "learning_rate": 2.062638849837318e-05, "loss": 0.3912, "step": 3580 }, { "epoch": 0.43998034156530286, "grad_norm": 0.40141502402554935, "learning_rate": 2.0620424397438646e-05, "loss": 0.3656, "step": 3581 }, { "epoch": 0.44010320678216, "grad_norm": 0.3296250701091888, "learning_rate": 2.06144592626635e-05, "loss": 0.4037, "step": 3582 }, { "epoch": 0.4402260719990171, "grad_norm": 0.36879178446181726, "learning_rate": 2.060849309514498e-05, "loss": 0.4215, "step": 3583 }, { "epoch": 0.4403489372158742, "grad_norm": 0.43740241750089204, "learning_rate": 2.0602525895980528e-05, "loss": 0.4109, "step": 3584 }, { "epoch": 0.4404718024327313, "grad_norm": 0.34786713750751275, "learning_rate": 2.0596557666267776e-05, "loss": 0.3302, "step": 3585 }, { "epoch": 0.4405946676495884, "grad_norm": 0.34535851623069586, "learning_rate": 2.0590588407104532e-05, "loss": 0.3503, "step": 3586 }, { "epoch": 0.4407175328664455, "grad_norm": 0.3383069784628563, "learning_rate": 2.0584618119588806e-05, "loss": 0.3618, "step": 3587 }, { "epoch": 0.4408403980833026, "grad_norm": 0.4282532599692204, "learning_rate": 2.0578646804818793e-05, "loss": 0.423, "step": 3588 }, { "epoch": 0.4409632633001597, "grad_norm": 0.3650833509462641, "learning_rate": 2.0572674463892883e-05, "loss": 0.4019, "step": 3589 }, { "epoch": 0.44108612851701684, "grad_norm": 0.36502652342012304, "learning_rate": 2.0566701097909643e-05, "loss": 0.4131, "step": 3590 }, { "epoch": 0.44120899373387396, "grad_norm": 0.32232227005497843, "learning_rate": 2.0560726707967836e-05, "loss": 0.3462, "step": 3591 }, { "epoch": 0.44133185895073107, "grad_norm": 0.41742798689006577, "learning_rate": 2.0554751295166412e-05, "loss": 0.3577, "step": 3592 }, { "epoch": 0.44145472416758813, "grad_norm": 0.5125256544067036, "learning_rate": 2.054877486060452e-05, "loss": 0.4303, "step": 3593 }, { "epoch": 0.44157758938444525, "grad_norm": 0.39093654980709663, "learning_rate": 2.0542797405381476e-05, "loss": 0.4011, "step": 3594 }, { "epoch": 0.44170045460130236, "grad_norm": 0.38052740629115456, "learning_rate": 2.0536818930596785e-05, "loss": 0.3623, "step": 3595 }, { "epoch": 0.4418233198181595, "grad_norm": 0.43673708285238033, "learning_rate": 2.053083943735017e-05, "loss": 0.4473, "step": 3596 }, { "epoch": 0.4419461850350166, "grad_norm": 0.3290031810639264, "learning_rate": 2.0524858926741505e-05, "loss": 0.3089, "step": 3597 }, { "epoch": 0.4420690502518737, "grad_norm": 0.33772594915187903, "learning_rate": 2.051887739987087e-05, "loss": 0.3953, "step": 3598 }, { "epoch": 0.4421919154687308, "grad_norm": 0.3143660989336131, "learning_rate": 2.0512894857838528e-05, "loss": 0.3586, "step": 3599 }, { "epoch": 0.4423147806855879, "grad_norm": 0.5212846986382947, "learning_rate": 2.050691130174493e-05, "loss": 0.4547, "step": 3600 }, { "epoch": 0.442437645902445, "grad_norm": 0.4086073021706548, "learning_rate": 2.0500926732690713e-05, "loss": 0.5228, "step": 3601 }, { "epoch": 0.4425605111193021, "grad_norm": 0.4641921396506517, "learning_rate": 2.0494941151776698e-05, "loss": 0.4541, "step": 3602 }, { "epoch": 0.44268337633615923, "grad_norm": 0.35569851075153375, "learning_rate": 2.0488954560103895e-05, "loss": 0.3391, "step": 3603 }, { "epoch": 0.44280624155301634, "grad_norm": 0.34949818036820957, "learning_rate": 2.0482966958773494e-05, "loss": 0.5141, "step": 3604 }, { "epoch": 0.44292910676987346, "grad_norm": 0.3543837688179243, "learning_rate": 2.047697834888688e-05, "loss": 0.4095, "step": 3605 }, { "epoch": 0.4430519719867306, "grad_norm": 0.4417616971730573, "learning_rate": 2.047098873154562e-05, "loss": 0.4479, "step": 3606 }, { "epoch": 0.4431748372035877, "grad_norm": 0.38320782770448164, "learning_rate": 2.0464998107851464e-05, "loss": 0.4289, "step": 3607 }, { "epoch": 0.44329770242044475, "grad_norm": 0.371507962189336, "learning_rate": 2.0459006478906348e-05, "loss": 0.3561, "step": 3608 }, { "epoch": 0.44342056763730187, "grad_norm": 0.32921821291276715, "learning_rate": 2.045301384581239e-05, "loss": 0.4566, "step": 3609 }, { "epoch": 0.443543432854159, "grad_norm": 0.42063847770596235, "learning_rate": 2.0447020209671904e-05, "loss": 0.4011, "step": 3610 }, { "epoch": 0.4436662980710161, "grad_norm": 0.35359387817245175, "learning_rate": 2.044102557158737e-05, "loss": 0.3384, "step": 3611 }, { "epoch": 0.4437891632878732, "grad_norm": 0.38958951357409466, "learning_rate": 2.0435029932661472e-05, "loss": 0.4223, "step": 3612 }, { "epoch": 0.4439120285047303, "grad_norm": 0.41020625694464724, "learning_rate": 2.0429033293997066e-05, "loss": 0.4469, "step": 3613 }, { "epoch": 0.44403489372158744, "grad_norm": 0.4042554018185749, "learning_rate": 2.042303565669719e-05, "loss": 0.4259, "step": 3614 }, { "epoch": 0.4441577589384445, "grad_norm": 0.34624937077543694, "learning_rate": 2.0417037021865077e-05, "loss": 0.4015, "step": 3615 }, { "epoch": 0.4442806241553016, "grad_norm": 0.4013848306943411, "learning_rate": 2.0411037390604134e-05, "loss": 0.3847, "step": 3616 }, { "epoch": 0.44440348937215873, "grad_norm": 0.31365366765861985, "learning_rate": 2.0405036764017956e-05, "loss": 0.4403, "step": 3617 }, { "epoch": 0.44452635458901585, "grad_norm": 0.3942845157773175, "learning_rate": 2.0399035143210315e-05, "loss": 0.4227, "step": 3618 }, { "epoch": 0.44464921980587296, "grad_norm": 0.48461343213349833, "learning_rate": 2.039303252928518e-05, "loss": 0.4011, "step": 3619 }, { "epoch": 0.4447720850227301, "grad_norm": 0.32973718534185287, "learning_rate": 2.038702892334668e-05, "loss": 0.391, "step": 3620 }, { "epoch": 0.4448949502395872, "grad_norm": 0.3416211449081529, "learning_rate": 2.038102432649915e-05, "loss": 0.4672, "step": 3621 }, { "epoch": 0.44501781545644425, "grad_norm": 0.35620966032617923, "learning_rate": 2.0375018739847087e-05, "loss": 0.3724, "step": 3622 }, { "epoch": 0.44514068067330137, "grad_norm": 0.40331947736811874, "learning_rate": 2.0369012164495195e-05, "loss": 0.4344, "step": 3623 }, { "epoch": 0.4452635458901585, "grad_norm": 0.3399259340624066, "learning_rate": 2.036300460154832e-05, "loss": 0.3413, "step": 3624 }, { "epoch": 0.4453864111070156, "grad_norm": 0.3857229166250816, "learning_rate": 2.035699605211154e-05, "loss": 0.3546, "step": 3625 }, { "epoch": 0.4455092763238727, "grad_norm": 0.2754745330269242, "learning_rate": 2.0350986517290072e-05, "loss": 0.3975, "step": 3626 }, { "epoch": 0.44563214154072983, "grad_norm": 0.370877066768127, "learning_rate": 2.034497599818934e-05, "loss": 0.386, "step": 3627 }, { "epoch": 0.44575500675758695, "grad_norm": 0.38809812924548165, "learning_rate": 2.0338964495914932e-05, "loss": 0.4379, "step": 3628 }, { "epoch": 0.44587787197444406, "grad_norm": 0.34836619772022676, "learning_rate": 2.0332952011572634e-05, "loss": 0.4065, "step": 3629 }, { "epoch": 0.4460007371913011, "grad_norm": 0.4801317058859205, "learning_rate": 2.0326938546268398e-05, "loss": 0.4311, "step": 3630 }, { "epoch": 0.44612360240815824, "grad_norm": 0.39266695939064705, "learning_rate": 2.0320924101108364e-05, "loss": 0.4184, "step": 3631 }, { "epoch": 0.44624646762501535, "grad_norm": 0.34387107055791905, "learning_rate": 2.0314908677198846e-05, "loss": 0.4519, "step": 3632 }, { "epoch": 0.44636933284187247, "grad_norm": 0.34068063682508604, "learning_rate": 2.0308892275646343e-05, "loss": 0.3923, "step": 3633 }, { "epoch": 0.4464921980587296, "grad_norm": 0.2893007623735683, "learning_rate": 2.0302874897557545e-05, "loss": 0.379, "step": 3634 }, { "epoch": 0.4466150632755867, "grad_norm": 0.3339854789328649, "learning_rate": 2.029685654403929e-05, "loss": 0.389, "step": 3635 }, { "epoch": 0.4467379284924438, "grad_norm": 0.30898580304331696, "learning_rate": 2.029083721619863e-05, "loss": 0.3793, "step": 3636 }, { "epoch": 0.4468607937093009, "grad_norm": 0.40451681421885116, "learning_rate": 2.0284816915142775e-05, "loss": 0.527, "step": 3637 }, { "epoch": 0.446983658926158, "grad_norm": 0.30628415207885545, "learning_rate": 2.027879564197912e-05, "loss": 0.4231, "step": 3638 }, { "epoch": 0.4471065241430151, "grad_norm": 0.41225222710099374, "learning_rate": 2.0272773397815247e-05, "loss": 0.4895, "step": 3639 }, { "epoch": 0.4472293893598722, "grad_norm": 0.34412380894319655, "learning_rate": 2.02667501837589e-05, "loss": 0.4246, "step": 3640 }, { "epoch": 0.44735225457672934, "grad_norm": 0.4504582434011421, "learning_rate": 2.0260726000918006e-05, "loss": 0.4889, "step": 3641 }, { "epoch": 0.44747511979358645, "grad_norm": 0.4023512250755222, "learning_rate": 2.025470085040069e-05, "loss": 0.4173, "step": 3642 }, { "epoch": 0.44759798501044357, "grad_norm": 0.5604145366089796, "learning_rate": 2.0248674733315224e-05, "loss": 0.4087, "step": 3643 }, { "epoch": 0.4477208502273006, "grad_norm": 0.2737994874486205, "learning_rate": 2.0242647650770084e-05, "loss": 0.3389, "step": 3644 }, { "epoch": 0.44784371544415774, "grad_norm": 0.39500444080273284, "learning_rate": 2.0236619603873905e-05, "loss": 0.5004, "step": 3645 }, { "epoch": 0.44796658066101486, "grad_norm": 0.36357521685721356, "learning_rate": 2.0230590593735515e-05, "loss": 0.3679, "step": 3646 }, { "epoch": 0.448089445877872, "grad_norm": 0.42718975769478845, "learning_rate": 2.02245606214639e-05, "loss": 0.4934, "step": 3647 }, { "epoch": 0.4482123110947291, "grad_norm": 0.3846202839157917, "learning_rate": 2.0218529688168244e-05, "loss": 0.4137, "step": 3648 }, { "epoch": 0.4483351763115862, "grad_norm": 0.32961079180589825, "learning_rate": 2.02124977949579e-05, "loss": 0.3808, "step": 3649 }, { "epoch": 0.4484580415284433, "grad_norm": 0.3483789866589755, "learning_rate": 2.0206464942942388e-05, "loss": 0.3387, "step": 3650 }, { "epoch": 0.4485809067453004, "grad_norm": 0.30802311095042834, "learning_rate": 2.0200431133231414e-05, "loss": 0.3301, "step": 3651 }, { "epoch": 0.4487037719621575, "grad_norm": 0.39618457513543415, "learning_rate": 2.0194396366934863e-05, "loss": 0.4207, "step": 3652 }, { "epoch": 0.4488266371790146, "grad_norm": 0.31516949400772487, "learning_rate": 2.018836064516278e-05, "loss": 0.3265, "step": 3653 }, { "epoch": 0.4489495023958717, "grad_norm": 0.3753860810042433, "learning_rate": 2.018232396902541e-05, "loss": 0.3854, "step": 3654 }, { "epoch": 0.44907236761272884, "grad_norm": 0.35203199212304154, "learning_rate": 2.0176286339633148e-05, "loss": 0.3738, "step": 3655 }, { "epoch": 0.44919523282958596, "grad_norm": 0.40243315654642703, "learning_rate": 2.0170247758096586e-05, "loss": 0.4355, "step": 3656 }, { "epoch": 0.44931809804644307, "grad_norm": 0.37980566059321613, "learning_rate": 2.016420822552648e-05, "loss": 0.405, "step": 3657 }, { "epoch": 0.4494409632633002, "grad_norm": 0.38223992646514504, "learning_rate": 2.0158167743033764e-05, "loss": 0.3583, "step": 3658 }, { "epoch": 0.44956382848015725, "grad_norm": 0.35913658590095393, "learning_rate": 2.0152126311729542e-05, "loss": 0.3659, "step": 3659 }, { "epoch": 0.44968669369701436, "grad_norm": 0.3445220241330327, "learning_rate": 2.0146083932725096e-05, "loss": 0.4395, "step": 3660 }, { "epoch": 0.4498095589138715, "grad_norm": 0.3652612105126723, "learning_rate": 2.0140040607131888e-05, "loss": 0.3721, "step": 3661 }, { "epoch": 0.4499324241307286, "grad_norm": 0.32489535373960565, "learning_rate": 2.0133996336061538e-05, "loss": 0.3576, "step": 3662 }, { "epoch": 0.4500552893475857, "grad_norm": 0.377321596037567, "learning_rate": 2.0127951120625864e-05, "loss": 0.3583, "step": 3663 }, { "epoch": 0.4501781545644428, "grad_norm": 0.3856346212071296, "learning_rate": 2.0121904961936835e-05, "loss": 0.3834, "step": 3664 }, { "epoch": 0.45030101978129994, "grad_norm": 0.34622041457248065, "learning_rate": 2.0115857861106604e-05, "loss": 0.4582, "step": 3665 }, { "epoch": 0.450423884998157, "grad_norm": 0.3045064289828424, "learning_rate": 2.0109809819247498e-05, "loss": 0.4136, "step": 3666 }, { "epoch": 0.4505467502150141, "grad_norm": 0.3468274632878711, "learning_rate": 2.010376083747201e-05, "loss": 0.475, "step": 3667 }, { "epoch": 0.45066961543187123, "grad_norm": 0.41789972416356014, "learning_rate": 2.0097710916892823e-05, "loss": 0.4241, "step": 3668 }, { "epoch": 0.45079248064872834, "grad_norm": 0.3045481970177047, "learning_rate": 2.0091660058622767e-05, "loss": 0.4117, "step": 3669 }, { "epoch": 0.45091534586558546, "grad_norm": 0.3476732222835775, "learning_rate": 2.0085608263774864e-05, "loss": 0.3525, "step": 3670 }, { "epoch": 0.4510382110824426, "grad_norm": 0.43887268440581784, "learning_rate": 2.0079555533462306e-05, "loss": 0.4215, "step": 3671 }, { "epoch": 0.4511610762992997, "grad_norm": 0.39808002885214605, "learning_rate": 2.0073501868798444e-05, "loss": 0.464, "step": 3672 }, { "epoch": 0.45128394151615675, "grad_norm": 0.32366543993277047, "learning_rate": 2.0067447270896822e-05, "loss": 0.3454, "step": 3673 }, { "epoch": 0.45140680673301387, "grad_norm": 0.3234871915479347, "learning_rate": 2.0061391740871133e-05, "loss": 0.4199, "step": 3674 }, { "epoch": 0.451529671949871, "grad_norm": 0.34458950023759327, "learning_rate": 2.0055335279835257e-05, "loss": 0.3754, "step": 3675 }, { "epoch": 0.4516525371667281, "grad_norm": 0.3723916538256981, "learning_rate": 2.0049277888903244e-05, "loss": 0.3588, "step": 3676 }, { "epoch": 0.4517754023835852, "grad_norm": 0.4125967629197728, "learning_rate": 2.0043219569189312e-05, "loss": 0.4165, "step": 3677 }, { "epoch": 0.4518982676004423, "grad_norm": 0.37184144056081003, "learning_rate": 2.0037160321807846e-05, "loss": 0.5089, "step": 3678 }, { "epoch": 0.45202113281729944, "grad_norm": 0.31056829566079375, "learning_rate": 2.00311001478734e-05, "loss": 0.3514, "step": 3679 }, { "epoch": 0.45214399803415656, "grad_norm": 0.42184104978605147, "learning_rate": 2.0025039048500712e-05, "loss": 0.359, "step": 3680 }, { "epoch": 0.4522668632510136, "grad_norm": 0.4690473250473968, "learning_rate": 2.0018977024804682e-05, "loss": 0.4337, "step": 3681 }, { "epoch": 0.45238972846787073, "grad_norm": 0.4315463244667314, "learning_rate": 2.0012914077900374e-05, "loss": 0.3959, "step": 3682 }, { "epoch": 0.45251259368472785, "grad_norm": 0.3152936739185362, "learning_rate": 2.0006850208903034e-05, "loss": 0.3877, "step": 3683 }, { "epoch": 0.45263545890158496, "grad_norm": 0.32273620540056963, "learning_rate": 2.000078541892807e-05, "loss": 0.4408, "step": 3684 }, { "epoch": 0.4527583241184421, "grad_norm": 0.3282324819698989, "learning_rate": 1.9994719709091052e-05, "loss": 0.4691, "step": 3685 }, { "epoch": 0.4528811893352992, "grad_norm": 0.3739802885265564, "learning_rate": 1.9988653080507743e-05, "loss": 0.4333, "step": 3686 }, { "epoch": 0.4530040545521563, "grad_norm": 0.3815046081689773, "learning_rate": 1.9982585534294054e-05, "loss": 0.4191, "step": 3687 }, { "epoch": 0.45312691976901337, "grad_norm": 0.4076239446481136, "learning_rate": 1.9976517071566065e-05, "loss": 0.3894, "step": 3688 }, { "epoch": 0.4532497849858705, "grad_norm": 0.44096408938097703, "learning_rate": 1.9970447693440036e-05, "loss": 0.3997, "step": 3689 }, { "epoch": 0.4533726502027276, "grad_norm": 0.3545866105374145, "learning_rate": 1.9964377401032386e-05, "loss": 0.3812, "step": 3690 }, { "epoch": 0.4534955154195847, "grad_norm": 0.3640202045301879, "learning_rate": 1.9958306195459708e-05, "loss": 0.491, "step": 3691 }, { "epoch": 0.45361838063644183, "grad_norm": 0.4939516302403304, "learning_rate": 1.995223407783877e-05, "loss": 0.4422, "step": 3692 }, { "epoch": 0.45374124585329895, "grad_norm": 0.33228403882720864, "learning_rate": 1.9946161049286474e-05, "loss": 0.3961, "step": 3693 }, { "epoch": 0.45386411107015606, "grad_norm": 0.3840266187190393, "learning_rate": 1.994008711091994e-05, "loss": 0.3484, "step": 3694 }, { "epoch": 0.4539869762870131, "grad_norm": 0.35722074901079354, "learning_rate": 1.9934012263856417e-05, "loss": 0.3495, "step": 3695 }, { "epoch": 0.45410984150387024, "grad_norm": 0.3709788123988426, "learning_rate": 1.992793650921334e-05, "loss": 0.4037, "step": 3696 }, { "epoch": 0.45423270672072735, "grad_norm": 0.29389531593857204, "learning_rate": 1.99218598481083e-05, "loss": 0.3875, "step": 3697 }, { "epoch": 0.45435557193758447, "grad_norm": 0.40686031221309166, "learning_rate": 1.9915782281659052e-05, "loss": 0.4296, "step": 3698 }, { "epoch": 0.4544784371544416, "grad_norm": 0.2951119655052254, "learning_rate": 1.9909703810983542e-05, "loss": 0.4159, "step": 3699 }, { "epoch": 0.4546013023712987, "grad_norm": 0.3259067711420106, "learning_rate": 1.9903624437199853e-05, "loss": 0.3838, "step": 3700 }, { "epoch": 0.4547241675881558, "grad_norm": 0.4290132182110817, "learning_rate": 1.9897544161426252e-05, "loss": 0.4643, "step": 3701 }, { "epoch": 0.4548470328050129, "grad_norm": 0.346548482422272, "learning_rate": 1.9891462984781162e-05, "loss": 0.4205, "step": 3702 }, { "epoch": 0.45496989802187, "grad_norm": 0.32395797805038873, "learning_rate": 1.988538090838318e-05, "loss": 0.3755, "step": 3703 }, { "epoch": 0.4550927632387271, "grad_norm": 0.3845144762232551, "learning_rate": 1.987929793335106e-05, "loss": 0.3862, "step": 3704 }, { "epoch": 0.4552156284555842, "grad_norm": 0.3259978524827263, "learning_rate": 1.987321406080373e-05, "loss": 0.3992, "step": 3705 }, { "epoch": 0.45533849367244134, "grad_norm": 0.4922137403059752, "learning_rate": 1.9867129291860283e-05, "loss": 0.4275, "step": 3706 }, { "epoch": 0.45546135888929845, "grad_norm": 0.37722731992806197, "learning_rate": 1.986104362763996e-05, "loss": 0.3839, "step": 3707 }, { "epoch": 0.45558422410615557, "grad_norm": 0.33862763064408885, "learning_rate": 1.985495706926219e-05, "loss": 0.3956, "step": 3708 }, { "epoch": 0.4557070893230127, "grad_norm": 0.3010650247353648, "learning_rate": 1.984886961784655e-05, "loss": 0.4331, "step": 3709 }, { "epoch": 0.45582995453986974, "grad_norm": 0.3747362758667823, "learning_rate": 1.984278127451279e-05, "loss": 0.4335, "step": 3710 }, { "epoch": 0.45595281975672686, "grad_norm": 0.3506843378396547, "learning_rate": 1.9836692040380826e-05, "loss": 0.4097, "step": 3711 }, { "epoch": 0.456075684973584, "grad_norm": 0.40313935251821664, "learning_rate": 1.9830601916570722e-05, "loss": 0.3723, "step": 3712 }, { "epoch": 0.4561985501904411, "grad_norm": 0.37524989418683274, "learning_rate": 1.9824510904202725e-05, "loss": 0.4671, "step": 3713 }, { "epoch": 0.4563214154072982, "grad_norm": 0.32471651023202136, "learning_rate": 1.9818419004397234e-05, "loss": 0.3569, "step": 3714 }, { "epoch": 0.4564442806241553, "grad_norm": 0.3380667382145599, "learning_rate": 1.981232621827482e-05, "loss": 0.4138, "step": 3715 }, { "epoch": 0.45656714584101243, "grad_norm": 0.31866063532881006, "learning_rate": 1.980623254695621e-05, "loss": 0.5468, "step": 3716 }, { "epoch": 0.4566900110578695, "grad_norm": 0.32938322304506606, "learning_rate": 1.9800137991562286e-05, "loss": 0.3394, "step": 3717 }, { "epoch": 0.4568128762747266, "grad_norm": 0.37786059293677093, "learning_rate": 1.9794042553214106e-05, "loss": 0.4596, "step": 3718 }, { "epoch": 0.4569357414915837, "grad_norm": 0.39268658552703817, "learning_rate": 1.9787946233032896e-05, "loss": 0.4557, "step": 3719 }, { "epoch": 0.45705860670844084, "grad_norm": 0.3214267210100699, "learning_rate": 1.978184903214002e-05, "loss": 0.3596, "step": 3720 }, { "epoch": 0.45718147192529796, "grad_norm": 0.2891492482660212, "learning_rate": 1.977575095165703e-05, "loss": 0.4007, "step": 3721 }, { "epoch": 0.45730433714215507, "grad_norm": 0.4368896394179181, "learning_rate": 1.9769651992705627e-05, "loss": 0.4604, "step": 3722 }, { "epoch": 0.4574272023590122, "grad_norm": 0.4277251801143732, "learning_rate": 1.9763552156407666e-05, "loss": 0.4616, "step": 3723 }, { "epoch": 0.45755006757586925, "grad_norm": 0.339186234286311, "learning_rate": 1.9757451443885184e-05, "loss": 0.3875, "step": 3724 }, { "epoch": 0.45767293279272636, "grad_norm": 0.3828075520847473, "learning_rate": 1.9751349856260357e-05, "loss": 0.382, "step": 3725 }, { "epoch": 0.4577957980095835, "grad_norm": 0.3231922777116697, "learning_rate": 1.9745247394655544e-05, "loss": 0.3551, "step": 3726 }, { "epoch": 0.4579186632264406, "grad_norm": 0.3437357418106624, "learning_rate": 1.973914406019324e-05, "loss": 0.3654, "step": 3727 }, { "epoch": 0.4580415284432977, "grad_norm": 0.32255195048968904, "learning_rate": 1.9733039853996126e-05, "loss": 0.3632, "step": 3728 }, { "epoch": 0.4581643936601548, "grad_norm": 0.35594171664815694, "learning_rate": 1.9726934777187023e-05, "loss": 0.3955, "step": 3729 }, { "epoch": 0.45828725887701194, "grad_norm": 0.41798050811481535, "learning_rate": 1.9720828830888922e-05, "loss": 0.4224, "step": 3730 }, { "epoch": 0.458410124093869, "grad_norm": 0.5022342523471217, "learning_rate": 1.9714722016224977e-05, "loss": 0.4182, "step": 3731 }, { "epoch": 0.4585329893107261, "grad_norm": 0.4194103166595794, "learning_rate": 1.970861433431849e-05, "loss": 0.3437, "step": 3732 }, { "epoch": 0.45865585452758323, "grad_norm": 0.3047200328144575, "learning_rate": 1.970250578629293e-05, "loss": 0.3661, "step": 3733 }, { "epoch": 0.45877871974444034, "grad_norm": 0.311049059525901, "learning_rate": 1.9696396373271935e-05, "loss": 0.3882, "step": 3734 }, { "epoch": 0.45890158496129746, "grad_norm": 0.3742522840676426, "learning_rate": 1.9690286096379277e-05, "loss": 0.3707, "step": 3735 }, { "epoch": 0.4590244501781546, "grad_norm": 0.32397040851297715, "learning_rate": 1.9684174956738912e-05, "loss": 0.4095, "step": 3736 }, { "epoch": 0.4591473153950117, "grad_norm": 0.3536995829383224, "learning_rate": 1.9678062955474943e-05, "loss": 0.324, "step": 3737 }, { "epoch": 0.4592701806118688, "grad_norm": 0.34278218236609426, "learning_rate": 1.9671950093711633e-05, "loss": 0.3796, "step": 3738 }, { "epoch": 0.45939304582872587, "grad_norm": 0.4324116395966203, "learning_rate": 1.9665836372573397e-05, "loss": 0.3727, "step": 3739 }, { "epoch": 0.459515911045583, "grad_norm": 0.32984220338597753, "learning_rate": 1.965972179318482e-05, "loss": 0.4215, "step": 3740 }, { "epoch": 0.4596387762624401, "grad_norm": 0.34245412614057813, "learning_rate": 1.965360635667064e-05, "loss": 0.3256, "step": 3741 }, { "epoch": 0.4597616414792972, "grad_norm": 0.39985409646044945, "learning_rate": 1.964749006415575e-05, "loss": 0.3859, "step": 3742 }, { "epoch": 0.4598845066961543, "grad_norm": 0.34282588211434367, "learning_rate": 1.9641372916765207e-05, "loss": 0.4053, "step": 3743 }, { "epoch": 0.46000737191301144, "grad_norm": 0.37141868083167673, "learning_rate": 1.963525491562421e-05, "loss": 0.4436, "step": 3744 }, { "epoch": 0.46013023712986856, "grad_norm": 0.3537768779051126, "learning_rate": 1.962913606185814e-05, "loss": 0.3969, "step": 3745 }, { "epoch": 0.4602531023467256, "grad_norm": 0.369471216701242, "learning_rate": 1.9623016356592504e-05, "loss": 0.3052, "step": 3746 }, { "epoch": 0.46037596756358273, "grad_norm": 0.29849804704082483, "learning_rate": 1.9616895800952994e-05, "loss": 0.3588, "step": 3747 }, { "epoch": 0.46049883278043985, "grad_norm": 0.33830990436077024, "learning_rate": 1.961077439606544e-05, "loss": 0.3834, "step": 3748 }, { "epoch": 0.46062169799729696, "grad_norm": 0.3636531308313358, "learning_rate": 1.9604652143055843e-05, "loss": 0.3899, "step": 3749 }, { "epoch": 0.4607445632141541, "grad_norm": 0.3898723505828375, "learning_rate": 1.9598529043050343e-05, "loss": 0.4576, "step": 3750 }, { "epoch": 0.4608674284310112, "grad_norm": 0.4896345453212454, "learning_rate": 1.9592405097175248e-05, "loss": 0.4588, "step": 3751 }, { "epoch": 0.4609902936478683, "grad_norm": 0.3881216335061201, "learning_rate": 1.958628030655702e-05, "loss": 0.357, "step": 3752 }, { "epoch": 0.46111315886472537, "grad_norm": 0.3642051804982982, "learning_rate": 1.958015467232227e-05, "loss": 0.4176, "step": 3753 }, { "epoch": 0.4612360240815825, "grad_norm": 0.3717492613426894, "learning_rate": 1.9574028195597776e-05, "loss": 0.4277, "step": 3754 }, { "epoch": 0.4613588892984396, "grad_norm": 0.3563425630371167, "learning_rate": 1.9567900877510456e-05, "loss": 0.4316, "step": 3755 }, { "epoch": 0.4614817545152967, "grad_norm": 0.3106684297264167, "learning_rate": 1.9561772719187394e-05, "loss": 0.3956, "step": 3756 }, { "epoch": 0.46160461973215383, "grad_norm": 0.4042141838650947, "learning_rate": 1.9555643721755826e-05, "loss": 0.493, "step": 3757 }, { "epoch": 0.46172748494901095, "grad_norm": 0.32944855261930384, "learning_rate": 1.9549513886343135e-05, "loss": 0.4206, "step": 3758 }, { "epoch": 0.46185035016586806, "grad_norm": 0.36957385528176206, "learning_rate": 1.9543383214076874e-05, "loss": 0.3773, "step": 3759 }, { "epoch": 0.4619732153827252, "grad_norm": 0.3434125942947918, "learning_rate": 1.9537251706084733e-05, "loss": 0.4191, "step": 3760 }, { "epoch": 0.46209608059958224, "grad_norm": 0.294905219799037, "learning_rate": 1.9531119363494566e-05, "loss": 0.4527, "step": 3761 }, { "epoch": 0.46221894581643935, "grad_norm": 0.3086906008575854, "learning_rate": 1.952498618743438e-05, "loss": 0.3197, "step": 3762 }, { "epoch": 0.46234181103329647, "grad_norm": 0.4258942843200486, "learning_rate": 1.9518852179032325e-05, "loss": 0.5021, "step": 3763 }, { "epoch": 0.4624646762501536, "grad_norm": 0.4114700193145706, "learning_rate": 1.9512717339416724e-05, "loss": 0.3687, "step": 3764 }, { "epoch": 0.4625875414670107, "grad_norm": 0.3735153081260466, "learning_rate": 1.950658166971603e-05, "loss": 0.4425, "step": 3765 }, { "epoch": 0.4627104066838678, "grad_norm": 0.27811633108582745, "learning_rate": 1.9500445171058866e-05, "loss": 0.3929, "step": 3766 }, { "epoch": 0.46283327190072493, "grad_norm": 0.3643295223963282, "learning_rate": 1.9494307844573997e-05, "loss": 0.4007, "step": 3767 }, { "epoch": 0.462956137117582, "grad_norm": 0.3483314882216492, "learning_rate": 1.9488169691390348e-05, "loss": 0.3512, "step": 3768 }, { "epoch": 0.4630790023344391, "grad_norm": 0.2706271544203541, "learning_rate": 1.948203071263699e-05, "loss": 0.3625, "step": 3769 }, { "epoch": 0.4632018675512962, "grad_norm": 0.37668012379048926, "learning_rate": 1.947589090944315e-05, "loss": 0.3863, "step": 3770 }, { "epoch": 0.46332473276815334, "grad_norm": 0.36313277180887404, "learning_rate": 1.9469750282938208e-05, "loss": 0.4734, "step": 3771 }, { "epoch": 0.46344759798501045, "grad_norm": 0.41118831593486294, "learning_rate": 1.9463608834251687e-05, "loss": 0.4542, "step": 3772 }, { "epoch": 0.46357046320186757, "grad_norm": 0.39933980656994383, "learning_rate": 1.9457466564513268e-05, "loss": 0.4266, "step": 3773 }, { "epoch": 0.4636933284187247, "grad_norm": 0.25945613010142116, "learning_rate": 1.945132347485278e-05, "loss": 0.2952, "step": 3774 }, { "epoch": 0.46381619363558174, "grad_norm": 0.4032902996991113, "learning_rate": 1.9445179566400206e-05, "loss": 0.4423, "step": 3775 }, { "epoch": 0.46393905885243886, "grad_norm": 0.37925427534764317, "learning_rate": 1.943903484028568e-05, "loss": 0.3921, "step": 3776 }, { "epoch": 0.464061924069296, "grad_norm": 0.39945142631991026, "learning_rate": 1.9432889297639485e-05, "loss": 0.4878, "step": 3777 }, { "epoch": 0.4641847892861531, "grad_norm": 0.38109953594743723, "learning_rate": 1.9426742939592052e-05, "loss": 0.4275, "step": 3778 }, { "epoch": 0.4643076545030102, "grad_norm": 0.44996503122243897, "learning_rate": 1.942059576727396e-05, "loss": 0.4202, "step": 3779 }, { "epoch": 0.4644305197198673, "grad_norm": 0.37723235631135454, "learning_rate": 1.941444778181595e-05, "loss": 0.4768, "step": 3780 }, { "epoch": 0.46455338493672443, "grad_norm": 0.3422485852689602, "learning_rate": 1.94082989843489e-05, "loss": 0.3129, "step": 3781 }, { "epoch": 0.4646762501535815, "grad_norm": 0.3952987399933602, "learning_rate": 1.9402149376003837e-05, "loss": 0.4353, "step": 3782 }, { "epoch": 0.4647991153704386, "grad_norm": 0.30096951957270895, "learning_rate": 1.9395998957911945e-05, "loss": 0.4075, "step": 3783 }, { "epoch": 0.4649219805872957, "grad_norm": 0.3690918941621688, "learning_rate": 1.938984773120455e-05, "loss": 0.3813, "step": 3784 }, { "epoch": 0.46504484580415284, "grad_norm": 0.3051586084915257, "learning_rate": 1.938369569701314e-05, "loss": 0.3928, "step": 3785 }, { "epoch": 0.46516771102100996, "grad_norm": 0.3284246417559814, "learning_rate": 1.9377542856469335e-05, "loss": 0.4452, "step": 3786 }, { "epoch": 0.46529057623786707, "grad_norm": 0.4952907686079838, "learning_rate": 1.937138921070491e-05, "loss": 0.3586, "step": 3787 }, { "epoch": 0.4654134414547242, "grad_norm": 0.371114070544621, "learning_rate": 1.9365234760851792e-05, "loss": 0.4692, "step": 3788 }, { "epoch": 0.4655363066715813, "grad_norm": 0.4213469228295087, "learning_rate": 1.9359079508042046e-05, "loss": 0.3953, "step": 3789 }, { "epoch": 0.46565917188843836, "grad_norm": 0.5037908532421708, "learning_rate": 1.9352923453407896e-05, "loss": 0.4791, "step": 3790 }, { "epoch": 0.4657820371052955, "grad_norm": 0.3530400914870453, "learning_rate": 1.934676659808171e-05, "loss": 0.4358, "step": 3791 }, { "epoch": 0.4659049023221526, "grad_norm": 0.32654878381005753, "learning_rate": 1.934060894319599e-05, "loss": 0.3511, "step": 3792 }, { "epoch": 0.4660277675390097, "grad_norm": 0.3064147624692356, "learning_rate": 1.933445048988341e-05, "loss": 0.4033, "step": 3793 }, { "epoch": 0.4661506327558668, "grad_norm": 0.4109641273894947, "learning_rate": 1.932829123927677e-05, "loss": 0.4067, "step": 3794 }, { "epoch": 0.46627349797272394, "grad_norm": 0.4463118901939152, "learning_rate": 1.9322131192509028e-05, "loss": 0.4344, "step": 3795 }, { "epoch": 0.46639636318958105, "grad_norm": 0.33809352333476755, "learning_rate": 1.9315970350713278e-05, "loss": 0.4546, "step": 3796 }, { "epoch": 0.4665192284064381, "grad_norm": 0.4093662784121025, "learning_rate": 1.930980871502278e-05, "loss": 0.3892, "step": 3797 }, { "epoch": 0.46664209362329523, "grad_norm": 0.3835490612886463, "learning_rate": 1.9303646286570913e-05, "loss": 0.429, "step": 3798 }, { "epoch": 0.46676495884015234, "grad_norm": 0.3694671351789465, "learning_rate": 1.9297483066491222e-05, "loss": 0.4134, "step": 3799 }, { "epoch": 0.46688782405700946, "grad_norm": 0.38072478741345417, "learning_rate": 1.9291319055917393e-05, "loss": 0.4013, "step": 3800 }, { "epoch": 0.4670106892738666, "grad_norm": 0.3543050740498428, "learning_rate": 1.9285154255983257e-05, "loss": 0.4233, "step": 3801 }, { "epoch": 0.4671335544907237, "grad_norm": 0.6260536113294074, "learning_rate": 1.927898866782278e-05, "loss": 0.4361, "step": 3802 }, { "epoch": 0.4672564197075808, "grad_norm": 0.36480826852606607, "learning_rate": 1.9272822292570092e-05, "loss": 0.3906, "step": 3803 }, { "epoch": 0.46737928492443787, "grad_norm": 0.34639773948152547, "learning_rate": 1.926665513135945e-05, "loss": 0.37, "step": 3804 }, { "epoch": 0.467502150141295, "grad_norm": 0.35669094506442944, "learning_rate": 1.9260487185325267e-05, "loss": 0.4012, "step": 3805 }, { "epoch": 0.4676250153581521, "grad_norm": 0.41250147482300037, "learning_rate": 1.92543184556021e-05, "loss": 0.3838, "step": 3806 }, { "epoch": 0.4677478805750092, "grad_norm": 0.49435294619188175, "learning_rate": 1.924814894332464e-05, "loss": 0.4746, "step": 3807 }, { "epoch": 0.4678707457918663, "grad_norm": 0.2900792477276627, "learning_rate": 1.9241978649627738e-05, "loss": 0.3954, "step": 3808 }, { "epoch": 0.46799361100872344, "grad_norm": 0.36869569251844236, "learning_rate": 1.9235807575646368e-05, "loss": 0.3926, "step": 3809 }, { "epoch": 0.46811647622558056, "grad_norm": 0.4036612680782122, "learning_rate": 1.9229635722515667e-05, "loss": 0.4753, "step": 3810 }, { "epoch": 0.4682393414424377, "grad_norm": 0.36503479289240537, "learning_rate": 1.9223463091370903e-05, "loss": 0.3668, "step": 3811 }, { "epoch": 0.46836220665929473, "grad_norm": 0.33022289406508676, "learning_rate": 1.9217289683347496e-05, "loss": 0.3975, "step": 3812 }, { "epoch": 0.46848507187615185, "grad_norm": 0.38286127839475975, "learning_rate": 1.9211115499580995e-05, "loss": 0.413, "step": 3813 }, { "epoch": 0.46860793709300896, "grad_norm": 0.341987792454679, "learning_rate": 1.9204940541207113e-05, "loss": 0.3879, "step": 3814 }, { "epoch": 0.4687308023098661, "grad_norm": 0.37440641502089067, "learning_rate": 1.919876480936169e-05, "loss": 0.429, "step": 3815 }, { "epoch": 0.4688536675267232, "grad_norm": 0.36496582801264554, "learning_rate": 1.919258830518071e-05, "loss": 0.3695, "step": 3816 }, { "epoch": 0.4689765327435803, "grad_norm": 0.41978745849740523, "learning_rate": 1.91864110298003e-05, "loss": 0.5331, "step": 3817 }, { "epoch": 0.4690993979604374, "grad_norm": 0.38960087081688843, "learning_rate": 1.918023298435673e-05, "loss": 0.4016, "step": 3818 }, { "epoch": 0.4692222631772945, "grad_norm": 0.3439747121214593, "learning_rate": 1.9174054169986415e-05, "loss": 0.3486, "step": 3819 }, { "epoch": 0.4693451283941516, "grad_norm": 0.37611737529666583, "learning_rate": 1.9167874587825902e-05, "loss": 0.3894, "step": 3820 }, { "epoch": 0.4694679936110087, "grad_norm": 0.37204206581578037, "learning_rate": 1.916169423901189e-05, "loss": 0.4424, "step": 3821 }, { "epoch": 0.46959085882786583, "grad_norm": 0.3470123007821094, "learning_rate": 1.9155513124681216e-05, "loss": 0.4696, "step": 3822 }, { "epoch": 0.46971372404472295, "grad_norm": 0.32107960394199714, "learning_rate": 1.9149331245970844e-05, "loss": 0.3669, "step": 3823 }, { "epoch": 0.46983658926158006, "grad_norm": 0.30780920926125865, "learning_rate": 1.91431486040179e-05, "loss": 0.3522, "step": 3824 }, { "epoch": 0.4699594544784372, "grad_norm": 0.3552002578329196, "learning_rate": 1.913696519995964e-05, "loss": 0.4273, "step": 3825 }, { "epoch": 0.47008231969529424, "grad_norm": 0.36593031704312906, "learning_rate": 1.9130781034933463e-05, "loss": 0.3731, "step": 3826 }, { "epoch": 0.47020518491215135, "grad_norm": 0.34105910629975694, "learning_rate": 1.9124596110076908e-05, "loss": 0.3844, "step": 3827 }, { "epoch": 0.47032805012900847, "grad_norm": 0.4032287788226979, "learning_rate": 1.911841042652764e-05, "loss": 0.4403, "step": 3828 }, { "epoch": 0.4704509153458656, "grad_norm": 0.4144649009504001, "learning_rate": 1.911222398542349e-05, "loss": 0.4535, "step": 3829 }, { "epoch": 0.4705737805627227, "grad_norm": 0.32023578206498843, "learning_rate": 1.91060367879024e-05, "loss": 0.4016, "step": 3830 }, { "epoch": 0.4706966457795798, "grad_norm": 0.31894281953737774, "learning_rate": 1.9099848835102476e-05, "loss": 0.388, "step": 3831 }, { "epoch": 0.47081951099643693, "grad_norm": 0.38727039949402126, "learning_rate": 1.9093660128161943e-05, "loss": 0.4121, "step": 3832 }, { "epoch": 0.470942376213294, "grad_norm": 0.38705496467813877, "learning_rate": 1.908747066821918e-05, "loss": 0.4099, "step": 3833 }, { "epoch": 0.4710652414301511, "grad_norm": 0.4935255967513543, "learning_rate": 1.908128045641269e-05, "loss": 0.4312, "step": 3834 }, { "epoch": 0.4711881066470082, "grad_norm": 0.316900923707705, "learning_rate": 1.9075089493881137e-05, "loss": 0.3756, "step": 3835 }, { "epoch": 0.47131097186386534, "grad_norm": 0.43364072608236076, "learning_rate": 1.9068897781763294e-05, "loss": 0.3837, "step": 3836 }, { "epoch": 0.47143383708072245, "grad_norm": 0.3120016280583626, "learning_rate": 1.9062705321198095e-05, "loss": 0.4844, "step": 3837 }, { "epoch": 0.47155670229757957, "grad_norm": 0.32266222860664073, "learning_rate": 1.90565121133246e-05, "loss": 0.358, "step": 3838 }, { "epoch": 0.4716795675144367, "grad_norm": 0.3296473605931329, "learning_rate": 1.905031815928201e-05, "loss": 0.3658, "step": 3839 }, { "epoch": 0.4718024327312938, "grad_norm": 0.3090657100631129, "learning_rate": 1.9044123460209655e-05, "loss": 0.3491, "step": 3840 }, { "epoch": 0.47192529794815086, "grad_norm": 0.33802753716470435, "learning_rate": 1.9037928017247023e-05, "loss": 0.4205, "step": 3841 }, { "epoch": 0.472048163165008, "grad_norm": 0.3183474446074638, "learning_rate": 1.9031731831533716e-05, "loss": 0.3412, "step": 3842 }, { "epoch": 0.4721710283818651, "grad_norm": 0.33543637072187565, "learning_rate": 1.902553490420949e-05, "loss": 0.3626, "step": 3843 }, { "epoch": 0.4722938935987222, "grad_norm": 1.4173868654653021, "learning_rate": 1.9019337236414218e-05, "loss": 0.4763, "step": 3844 }, { "epoch": 0.4724167588155793, "grad_norm": 0.39513553254621275, "learning_rate": 1.9013138829287932e-05, "loss": 0.3533, "step": 3845 }, { "epoch": 0.47253962403243643, "grad_norm": 0.6222541546555568, "learning_rate": 1.900693968397078e-05, "loss": 0.5388, "step": 3846 }, { "epoch": 0.47266248924929355, "grad_norm": 0.3295559323206521, "learning_rate": 1.9000739801603066e-05, "loss": 0.4272, "step": 3847 }, { "epoch": 0.4727853544661506, "grad_norm": 0.3624662831203322, "learning_rate": 1.8994539183325207e-05, "loss": 0.3283, "step": 3848 }, { "epoch": 0.4729082196830077, "grad_norm": 0.3757706276122071, "learning_rate": 1.8988337830277772e-05, "loss": 0.349, "step": 3849 }, { "epoch": 0.47303108489986484, "grad_norm": 0.3499804514563511, "learning_rate": 1.898213574360146e-05, "loss": 0.3599, "step": 3850 }, { "epoch": 0.47315395011672196, "grad_norm": 0.3407191162629826, "learning_rate": 1.8975932924437098e-05, "loss": 0.4552, "step": 3851 }, { "epoch": 0.47327681533357907, "grad_norm": 0.309982608041907, "learning_rate": 1.8969729373925668e-05, "loss": 0.3485, "step": 3852 }, { "epoch": 0.4733996805504362, "grad_norm": 0.3484408463613498, "learning_rate": 1.896352509320825e-05, "loss": 0.4421, "step": 3853 }, { "epoch": 0.4735225457672933, "grad_norm": 0.32636779638838903, "learning_rate": 1.8957320083426108e-05, "loss": 0.4597, "step": 3854 }, { "epoch": 0.47364541098415036, "grad_norm": 0.3753809125062488, "learning_rate": 1.8951114345720598e-05, "loss": 0.3535, "step": 3855 }, { "epoch": 0.4737682762010075, "grad_norm": 0.36654218794927945, "learning_rate": 1.8944907881233225e-05, "loss": 0.3804, "step": 3856 }, { "epoch": 0.4738911414178646, "grad_norm": 0.39084256252138483, "learning_rate": 1.8938700691105632e-05, "loss": 0.3934, "step": 3857 }, { "epoch": 0.4740140066347217, "grad_norm": 0.32377421402527085, "learning_rate": 1.8932492776479596e-05, "loss": 0.4138, "step": 3858 }, { "epoch": 0.4741368718515788, "grad_norm": 0.34297250690319914, "learning_rate": 1.892628413849701e-05, "loss": 0.512, "step": 3859 }, { "epoch": 0.47425973706843594, "grad_norm": 0.3201464227894041, "learning_rate": 1.892007477829992e-05, "loss": 0.402, "step": 3860 }, { "epoch": 0.47438260228529305, "grad_norm": 0.29002873816319075, "learning_rate": 1.8913864697030497e-05, "loss": 0.3686, "step": 3861 }, { "epoch": 0.47450546750215017, "grad_norm": 0.3353183706458693, "learning_rate": 1.8907653895831047e-05, "loss": 0.4226, "step": 3862 }, { "epoch": 0.47462833271900723, "grad_norm": 0.37291206825909107, "learning_rate": 1.8901442375844006e-05, "loss": 0.3847, "step": 3863 }, { "epoch": 0.47475119793586434, "grad_norm": 0.32251761648231436, "learning_rate": 1.8895230138211942e-05, "loss": 0.4659, "step": 3864 }, { "epoch": 0.47487406315272146, "grad_norm": 0.39220325029557934, "learning_rate": 1.8889017184077554e-05, "loss": 0.4322, "step": 3865 }, { "epoch": 0.4749969283695786, "grad_norm": 0.33215048078505766, "learning_rate": 1.8882803514583676e-05, "loss": 0.3757, "step": 3866 }, { "epoch": 0.4751197935864357, "grad_norm": 0.33519743584462924, "learning_rate": 1.8876589130873273e-05, "loss": 0.3926, "step": 3867 }, { "epoch": 0.4752426588032928, "grad_norm": 0.4350557104662903, "learning_rate": 1.8870374034089434e-05, "loss": 0.4256, "step": 3868 }, { "epoch": 0.4753655240201499, "grad_norm": 0.317423649331289, "learning_rate": 1.8864158225375403e-05, "loss": 0.303, "step": 3869 }, { "epoch": 0.475488389237007, "grad_norm": 0.3589062096040966, "learning_rate": 1.8857941705874514e-05, "loss": 0.4506, "step": 3870 }, { "epoch": 0.4756112544538641, "grad_norm": 0.32754671499341825, "learning_rate": 1.8851724476730275e-05, "loss": 0.3876, "step": 3871 }, { "epoch": 0.4757341196707212, "grad_norm": 0.27217171903630294, "learning_rate": 1.88455065390863e-05, "loss": 0.4052, "step": 3872 }, { "epoch": 0.4758569848875783, "grad_norm": 0.3862572543777813, "learning_rate": 1.8839287894086334e-05, "loss": 0.3995, "step": 3873 }, { "epoch": 0.47597985010443544, "grad_norm": 0.32841136834575996, "learning_rate": 1.8833068542874258e-05, "loss": 0.4077, "step": 3874 }, { "epoch": 0.47610271532129256, "grad_norm": 0.36870562959825776, "learning_rate": 1.882684848659408e-05, "loss": 0.4933, "step": 3875 }, { "epoch": 0.4762255805381497, "grad_norm": 0.34368450017405, "learning_rate": 1.8820627726389944e-05, "loss": 0.4042, "step": 3876 }, { "epoch": 0.47634844575500673, "grad_norm": 0.32956954651967124, "learning_rate": 1.8814406263406115e-05, "loss": 0.4235, "step": 3877 }, { "epoch": 0.47647131097186385, "grad_norm": 0.3524215536246748, "learning_rate": 1.880818409878699e-05, "loss": 0.3104, "step": 3878 }, { "epoch": 0.47659417618872096, "grad_norm": 0.3525988955259259, "learning_rate": 1.8801961233677095e-05, "loss": 0.4635, "step": 3879 }, { "epoch": 0.4767170414055781, "grad_norm": 0.2922569707465314, "learning_rate": 1.879573766922109e-05, "loss": 0.3296, "step": 3880 }, { "epoch": 0.4768399066224352, "grad_norm": 0.338908320256987, "learning_rate": 1.878951340656376e-05, "loss": 0.3628, "step": 3881 }, { "epoch": 0.4769627718392923, "grad_norm": 0.3335279619999204, "learning_rate": 1.8783288446850006e-05, "loss": 0.4147, "step": 3882 }, { "epoch": 0.4770856370561494, "grad_norm": 0.33799723440810786, "learning_rate": 1.8777062791224883e-05, "loss": 0.3883, "step": 3883 }, { "epoch": 0.4772085022730065, "grad_norm": 0.29739922965192006, "learning_rate": 1.877083644083356e-05, "loss": 0.2999, "step": 3884 }, { "epoch": 0.4773313674898636, "grad_norm": 0.30591883146151605, "learning_rate": 1.876460939682132e-05, "loss": 0.4362, "step": 3885 }, { "epoch": 0.4774542327067207, "grad_norm": 0.4018407689035628, "learning_rate": 1.8758381660333595e-05, "loss": 0.4731, "step": 3886 }, { "epoch": 0.47757709792357783, "grad_norm": 0.39363162749476716, "learning_rate": 1.8752153232515946e-05, "loss": 0.3702, "step": 3887 }, { "epoch": 0.47769996314043495, "grad_norm": 0.3293738408206716, "learning_rate": 1.874592411451404e-05, "loss": 0.3675, "step": 3888 }, { "epoch": 0.47782282835729206, "grad_norm": 0.33316243011755003, "learning_rate": 1.873969430747368e-05, "loss": 0.3535, "step": 3889 }, { "epoch": 0.4779456935741492, "grad_norm": 0.35326241591017277, "learning_rate": 1.8733463812540812e-05, "loss": 0.4482, "step": 3890 }, { "epoch": 0.4780685587910063, "grad_norm": 0.33350286168371934, "learning_rate": 1.8727232630861483e-05, "loss": 0.3499, "step": 3891 }, { "epoch": 0.47819142400786335, "grad_norm": 0.353638260280847, "learning_rate": 1.8721000763581888e-05, "loss": 0.4281, "step": 3892 }, { "epoch": 0.47831428922472047, "grad_norm": 0.3282033752698834, "learning_rate": 1.8714768211848336e-05, "loss": 0.3844, "step": 3893 }, { "epoch": 0.4784371544415776, "grad_norm": 0.3345901290319475, "learning_rate": 1.870853497680726e-05, "loss": 0.3739, "step": 3894 }, { "epoch": 0.4785600196584347, "grad_norm": 0.34876612082384867, "learning_rate": 1.8702301059605226e-05, "loss": 0.3498, "step": 3895 }, { "epoch": 0.4786828848752918, "grad_norm": 0.40870442218104225, "learning_rate": 1.869606646138892e-05, "loss": 0.4319, "step": 3896 }, { "epoch": 0.47880575009214893, "grad_norm": 0.3483351789443117, "learning_rate": 1.8689831183305157e-05, "loss": 0.3696, "step": 3897 }, { "epoch": 0.47892861530900604, "grad_norm": 0.2994477620958767, "learning_rate": 1.8683595226500884e-05, "loss": 0.4147, "step": 3898 }, { "epoch": 0.4790514805258631, "grad_norm": 0.3887528485871418, "learning_rate": 1.867735859212315e-05, "loss": 0.4106, "step": 3899 }, { "epoch": 0.4791743457427202, "grad_norm": 0.3492948301143615, "learning_rate": 1.8671121281319156e-05, "loss": 0.367, "step": 3900 }, { "epoch": 0.47929721095957734, "grad_norm": 0.3581680895854424, "learning_rate": 1.866488329523621e-05, "loss": 0.423, "step": 3901 }, { "epoch": 0.47942007617643445, "grad_norm": 0.44769361990416195, "learning_rate": 1.865864463502175e-05, "loss": 0.464, "step": 3902 }, { "epoch": 0.47954294139329157, "grad_norm": 0.3028866890265796, "learning_rate": 1.8652405301823333e-05, "loss": 0.375, "step": 3903 }, { "epoch": 0.4796658066101487, "grad_norm": 0.30927394306604444, "learning_rate": 1.8646165296788654e-05, "loss": 0.3253, "step": 3904 }, { "epoch": 0.4797886718270058, "grad_norm": 0.38404400201112426, "learning_rate": 1.863992462106551e-05, "loss": 0.367, "step": 3905 }, { "epoch": 0.47991153704386286, "grad_norm": 0.3853077171814125, "learning_rate": 1.863368327580184e-05, "loss": 0.4339, "step": 3906 }, { "epoch": 0.48003440226072, "grad_norm": 0.3771693699141375, "learning_rate": 1.8627441262145692e-05, "loss": 0.3364, "step": 3907 }, { "epoch": 0.4801572674775771, "grad_norm": 0.3559502358093738, "learning_rate": 1.8621198581245255e-05, "loss": 0.3321, "step": 3908 }, { "epoch": 0.4802801326944342, "grad_norm": 0.3801317924545894, "learning_rate": 1.8614955234248816e-05, "loss": 0.4094, "step": 3909 }, { "epoch": 0.4804029979112913, "grad_norm": 0.3532205712953907, "learning_rate": 1.8608711222304814e-05, "loss": 0.4075, "step": 3910 }, { "epoch": 0.48052586312814843, "grad_norm": 0.4057057331859042, "learning_rate": 1.8602466546561776e-05, "loss": 0.3626, "step": 3911 }, { "epoch": 0.48064872834500555, "grad_norm": 0.39164383077159487, "learning_rate": 1.859622120816839e-05, "loss": 0.4556, "step": 3912 }, { "epoch": 0.4807715935618626, "grad_norm": 0.42313650294240984, "learning_rate": 1.858997520827343e-05, "loss": 0.375, "step": 3913 }, { "epoch": 0.4808944587787197, "grad_norm": 0.38899852252151, "learning_rate": 1.858372854802581e-05, "loss": 0.3911, "step": 3914 }, { "epoch": 0.48101732399557684, "grad_norm": 0.40595948509607394, "learning_rate": 1.857748122857457e-05, "loss": 0.376, "step": 3915 }, { "epoch": 0.48114018921243396, "grad_norm": 0.3635195981290762, "learning_rate": 1.8571233251068853e-05, "loss": 0.4485, "step": 3916 }, { "epoch": 0.48126305442929107, "grad_norm": 0.4222797437757623, "learning_rate": 1.856498461665795e-05, "loss": 0.4622, "step": 3917 }, { "epoch": 0.4813859196461482, "grad_norm": 0.33039887637584414, "learning_rate": 1.8558735326491233e-05, "loss": 0.3887, "step": 3918 }, { "epoch": 0.4815087848630053, "grad_norm": 0.4008376550520748, "learning_rate": 1.855248538171824e-05, "loss": 0.3879, "step": 3919 }, { "epoch": 0.4816316500798624, "grad_norm": 0.33254732684440197, "learning_rate": 1.85462347834886e-05, "loss": 0.4631, "step": 3920 }, { "epoch": 0.4817545152967195, "grad_norm": 0.5199503951938451, "learning_rate": 1.8539983532952065e-05, "loss": 0.3958, "step": 3921 }, { "epoch": 0.4818773805135766, "grad_norm": 0.35818785512374995, "learning_rate": 1.853373163125852e-05, "loss": 0.3473, "step": 3922 }, { "epoch": 0.4820002457304337, "grad_norm": 0.38790243372905986, "learning_rate": 1.852747907955796e-05, "loss": 0.3821, "step": 3923 }, { "epoch": 0.4821231109472908, "grad_norm": 0.37450399451832744, "learning_rate": 1.8521225879000496e-05, "loss": 0.4343, "step": 3924 }, { "epoch": 0.48224597616414794, "grad_norm": 0.35628708584499835, "learning_rate": 1.851497203073637e-05, "loss": 0.4302, "step": 3925 }, { "epoch": 0.48236884138100505, "grad_norm": 0.3206652150276564, "learning_rate": 1.850871753591593e-05, "loss": 0.3956, "step": 3926 }, { "epoch": 0.48249170659786217, "grad_norm": 0.4448951898455375, "learning_rate": 1.8502462395689663e-05, "loss": 0.4616, "step": 3927 }, { "epoch": 0.48261457181471923, "grad_norm": 0.3987694133460359, "learning_rate": 1.8496206611208144e-05, "loss": 0.3764, "step": 3928 }, { "epoch": 0.48273743703157634, "grad_norm": 0.33316729752114577, "learning_rate": 1.8489950183622097e-05, "loss": 0.313, "step": 3929 }, { "epoch": 0.48286030224843346, "grad_norm": 0.3903288309304369, "learning_rate": 1.8483693114082346e-05, "loss": 0.3895, "step": 3930 }, { "epoch": 0.4829831674652906, "grad_norm": 0.3883553079270256, "learning_rate": 1.847743540373984e-05, "loss": 0.3736, "step": 3931 }, { "epoch": 0.4831060326821477, "grad_norm": 0.3399968824385403, "learning_rate": 1.8471177053745644e-05, "loss": 0.4086, "step": 3932 }, { "epoch": 0.4832288978990048, "grad_norm": 0.2888260273731495, "learning_rate": 1.8464918065250935e-05, "loss": 0.3247, "step": 3933 }, { "epoch": 0.4833517631158619, "grad_norm": 0.4428995489090141, "learning_rate": 1.8458658439407024e-05, "loss": 0.4667, "step": 3934 }, { "epoch": 0.483474628332719, "grad_norm": 0.319342411534796, "learning_rate": 1.845239817736532e-05, "loss": 0.4488, "step": 3935 }, { "epoch": 0.4835974935495761, "grad_norm": 0.39215218712339484, "learning_rate": 1.8446137280277362e-05, "loss": 0.3433, "step": 3936 }, { "epoch": 0.4837203587664332, "grad_norm": 0.32157272887748134, "learning_rate": 1.84398757492948e-05, "loss": 0.4869, "step": 3937 }, { "epoch": 0.4838432239832903, "grad_norm": 0.3410639703883864, "learning_rate": 1.8433613585569406e-05, "loss": 0.3794, "step": 3938 }, { "epoch": 0.48396608920014744, "grad_norm": 0.3782325596075135, "learning_rate": 1.8427350790253055e-05, "loss": 0.3881, "step": 3939 }, { "epoch": 0.48408895441700456, "grad_norm": 0.30830448740601757, "learning_rate": 1.8421087364497756e-05, "loss": 0.3491, "step": 3940 }, { "epoch": 0.4842118196338617, "grad_norm": 0.38907738289618177, "learning_rate": 1.8414823309455625e-05, "loss": 0.4059, "step": 3941 }, { "epoch": 0.4843346848507188, "grad_norm": 0.3358051095859802, "learning_rate": 1.8408558626278892e-05, "loss": 0.449, "step": 3942 }, { "epoch": 0.48445755006757585, "grad_norm": 0.39195030824800353, "learning_rate": 1.84022933161199e-05, "loss": 0.3851, "step": 3943 }, { "epoch": 0.48458041528443296, "grad_norm": 0.3413947902222491, "learning_rate": 1.8396027380131123e-05, "loss": 0.4703, "step": 3944 }, { "epoch": 0.4847032805012901, "grad_norm": 0.399735033655355, "learning_rate": 1.838976081946513e-05, "loss": 0.4714, "step": 3945 }, { "epoch": 0.4848261457181472, "grad_norm": 0.4341449344081107, "learning_rate": 1.8383493635274618e-05, "loss": 0.4411, "step": 3946 }, { "epoch": 0.4849490109350043, "grad_norm": 0.5839522330598493, "learning_rate": 1.8377225828712393e-05, "loss": 0.4722, "step": 3947 }, { "epoch": 0.4850718761518614, "grad_norm": 0.4156387827145216, "learning_rate": 1.8370957400931383e-05, "loss": 0.468, "step": 3948 }, { "epoch": 0.48519474136871854, "grad_norm": 0.3682128392857113, "learning_rate": 1.8364688353084614e-05, "loss": 0.4742, "step": 3949 }, { "epoch": 0.4853176065855756, "grad_norm": 0.3874079393347861, "learning_rate": 1.835841868632525e-05, "loss": 0.3932, "step": 3950 }, { "epoch": 0.4854404718024327, "grad_norm": 0.3652244995290719, "learning_rate": 1.8352148401806546e-05, "loss": 0.3805, "step": 3951 }, { "epoch": 0.48556333701928983, "grad_norm": 0.47829840366298565, "learning_rate": 1.8345877500681887e-05, "loss": 0.4486, "step": 3952 }, { "epoch": 0.48568620223614695, "grad_norm": 0.3514301251451787, "learning_rate": 1.8339605984104755e-05, "loss": 0.502, "step": 3953 }, { "epoch": 0.48580906745300406, "grad_norm": 0.3836740542153559, "learning_rate": 1.833333385322876e-05, "loss": 0.3586, "step": 3954 }, { "epoch": 0.4859319326698612, "grad_norm": 0.3552171621523195, "learning_rate": 1.8327061109207622e-05, "loss": 0.3691, "step": 3955 }, { "epoch": 0.4860547978867183, "grad_norm": 0.3839087450721339, "learning_rate": 1.8320787753195168e-05, "loss": 0.3329, "step": 3956 }, { "epoch": 0.48617766310357535, "grad_norm": 0.34395906856301334, "learning_rate": 1.8314513786345345e-05, "loss": 0.4098, "step": 3957 }, { "epoch": 0.48630052832043247, "grad_norm": 0.35194983311029326, "learning_rate": 1.8308239209812204e-05, "loss": 0.3162, "step": 3958 }, { "epoch": 0.4864233935372896, "grad_norm": 0.4748418269610896, "learning_rate": 1.8301964024749917e-05, "loss": 0.442, "step": 3959 }, { "epoch": 0.4865462587541467, "grad_norm": 0.36932698711307266, "learning_rate": 1.8295688232312764e-05, "loss": 0.4732, "step": 3960 }, { "epoch": 0.4866691239710038, "grad_norm": 0.3521656027830729, "learning_rate": 1.8289411833655134e-05, "loss": 0.3529, "step": 3961 }, { "epoch": 0.48679198918786093, "grad_norm": 0.37729444782270727, "learning_rate": 1.8283134829931526e-05, "loss": 0.4396, "step": 3962 }, { "epoch": 0.48691485440471804, "grad_norm": 0.37344200015140616, "learning_rate": 1.827685722229656e-05, "loss": 0.4608, "step": 3963 }, { "epoch": 0.4870377196215751, "grad_norm": 0.3060838996744568, "learning_rate": 1.8270579011904957e-05, "loss": 0.4019, "step": 3964 }, { "epoch": 0.4871605848384322, "grad_norm": 0.3467992555104117, "learning_rate": 1.8264300199911557e-05, "loss": 0.4425, "step": 3965 }, { "epoch": 0.48728345005528934, "grad_norm": 0.3810674054069561, "learning_rate": 1.8258020787471307e-05, "loss": 0.3802, "step": 3966 }, { "epoch": 0.48740631527214645, "grad_norm": 0.3547386765232057, "learning_rate": 1.8251740775739258e-05, "loss": 0.4211, "step": 3967 }, { "epoch": 0.48752918048900357, "grad_norm": 0.34252926980608606, "learning_rate": 1.824546016587058e-05, "loss": 0.3816, "step": 3968 }, { "epoch": 0.4876520457058607, "grad_norm": 0.3784184414065755, "learning_rate": 1.823917895902056e-05, "loss": 0.368, "step": 3969 }, { "epoch": 0.4877749109227178, "grad_norm": 0.43634792460902305, "learning_rate": 1.8232897156344574e-05, "loss": 0.4075, "step": 3970 }, { "epoch": 0.4878977761395749, "grad_norm": 0.30762092654577217, "learning_rate": 1.822661475899812e-05, "loss": 0.3845, "step": 3971 }, { "epoch": 0.488020641356432, "grad_norm": 0.2826284262967095, "learning_rate": 1.8220331768136806e-05, "loss": 0.3371, "step": 3972 }, { "epoch": 0.4881435065732891, "grad_norm": 0.37020900227587167, "learning_rate": 1.821404818491635e-05, "loss": 0.3716, "step": 3973 }, { "epoch": 0.4882663717901462, "grad_norm": 0.3167293553791776, "learning_rate": 1.820776401049257e-05, "loss": 0.4097, "step": 3974 }, { "epoch": 0.4883892370070033, "grad_norm": 0.32038153791767526, "learning_rate": 1.8201479246021405e-05, "loss": 0.4359, "step": 3975 }, { "epoch": 0.48851210222386043, "grad_norm": 0.37841876132705865, "learning_rate": 1.81951938926589e-05, "loss": 0.4822, "step": 3976 }, { "epoch": 0.48863496744071755, "grad_norm": 0.31304368026134693, "learning_rate": 1.8188907951561194e-05, "loss": 0.471, "step": 3977 }, { "epoch": 0.48875783265757466, "grad_norm": 0.4343115435791443, "learning_rate": 1.8182621423884555e-05, "loss": 0.4719, "step": 3978 }, { "epoch": 0.4888806978744317, "grad_norm": 0.36800100609045944, "learning_rate": 1.8176334310785344e-05, "loss": 0.3681, "step": 3979 }, { "epoch": 0.48900356309128884, "grad_norm": 0.4022027890992233, "learning_rate": 1.8170046613420037e-05, "loss": 0.421, "step": 3980 }, { "epoch": 0.48912642830814596, "grad_norm": 0.3700126885068853, "learning_rate": 1.8163758332945215e-05, "loss": 0.3858, "step": 3981 }, { "epoch": 0.48924929352500307, "grad_norm": 0.3269044911714711, "learning_rate": 1.815746947051756e-05, "loss": 0.3624, "step": 3982 }, { "epoch": 0.4893721587418602, "grad_norm": 0.3253902319405989, "learning_rate": 1.8151180027293877e-05, "loss": 0.3538, "step": 3983 }, { "epoch": 0.4894950239587173, "grad_norm": 0.38939450865644654, "learning_rate": 1.8144890004431066e-05, "loss": 0.494, "step": 3984 }, { "epoch": 0.4896178891755744, "grad_norm": 0.38486223247580037, "learning_rate": 1.8138599403086127e-05, "loss": 0.4656, "step": 3985 }, { "epoch": 0.4897407543924315, "grad_norm": 0.344670613824592, "learning_rate": 1.8132308224416186e-05, "loss": 0.4548, "step": 3986 }, { "epoch": 0.4898636196092886, "grad_norm": 0.44991115351494376, "learning_rate": 1.812601646957846e-05, "loss": 0.4351, "step": 3987 }, { "epoch": 0.4899864848261457, "grad_norm": 0.3449405473164424, "learning_rate": 1.811972413973028e-05, "loss": 0.449, "step": 3988 }, { "epoch": 0.4901093500430028, "grad_norm": 0.3541393355732402, "learning_rate": 1.8113431236029078e-05, "loss": 0.2939, "step": 3989 }, { "epoch": 0.49023221525985994, "grad_norm": 0.3165048086353164, "learning_rate": 1.8107137759632387e-05, "loss": 0.3944, "step": 3990 }, { "epoch": 0.49035508047671705, "grad_norm": 0.2843389160511553, "learning_rate": 1.8100843711697854e-05, "loss": 0.3123, "step": 3991 }, { "epoch": 0.49047794569357417, "grad_norm": 0.3446759677054385, "learning_rate": 1.8094549093383236e-05, "loss": 0.4481, "step": 3992 }, { "epoch": 0.4906008109104313, "grad_norm": 0.30596006222280847, "learning_rate": 1.8088253905846377e-05, "loss": 0.3555, "step": 3993 }, { "epoch": 0.49072367612728834, "grad_norm": 0.34058655256247156, "learning_rate": 1.8081958150245243e-05, "loss": 0.4259, "step": 3994 }, { "epoch": 0.49084654134414546, "grad_norm": 0.31622333087297316, "learning_rate": 1.807566182773789e-05, "loss": 0.4496, "step": 3995 }, { "epoch": 0.4909694065610026, "grad_norm": 0.467495794348401, "learning_rate": 1.8069364939482496e-05, "loss": 0.531, "step": 3996 }, { "epoch": 0.4910922717778597, "grad_norm": 0.35129748334024824, "learning_rate": 1.8063067486637324e-05, "loss": 0.4437, "step": 3997 }, { "epoch": 0.4912151369947168, "grad_norm": 0.3154669549010274, "learning_rate": 1.8056769470360748e-05, "loss": 0.4537, "step": 3998 }, { "epoch": 0.4913380022115739, "grad_norm": 0.39186937216232537, "learning_rate": 1.8050470891811257e-05, "loss": 0.3413, "step": 3999 }, { "epoch": 0.49146086742843104, "grad_norm": 0.3308139049623009, "learning_rate": 1.804417175214743e-05, "loss": 0.387, "step": 4000 }, { "epoch": 0.4915837326452881, "grad_norm": 0.40202894447346277, "learning_rate": 1.8037872052527948e-05, "loss": 0.3489, "step": 4001 }, { "epoch": 0.4917065978621452, "grad_norm": 0.28770520054875715, "learning_rate": 1.8031571794111602e-05, "loss": 0.4417, "step": 4002 }, { "epoch": 0.4918294630790023, "grad_norm": 0.3330377064645265, "learning_rate": 1.8025270978057285e-05, "loss": 0.3542, "step": 4003 }, { "epoch": 0.49195232829585944, "grad_norm": 0.3785801551786474, "learning_rate": 1.8018969605523996e-05, "loss": 0.4131, "step": 4004 }, { "epoch": 0.49207519351271656, "grad_norm": 0.3599095365193972, "learning_rate": 1.8012667677670825e-05, "loss": 0.373, "step": 4005 }, { "epoch": 0.4921980587295737, "grad_norm": 0.4116490075597935, "learning_rate": 1.8006365195656972e-05, "loss": 0.3622, "step": 4006 }, { "epoch": 0.4923209239464308, "grad_norm": 0.2701729736684711, "learning_rate": 1.8000062160641737e-05, "loss": 0.3613, "step": 4007 }, { "epoch": 0.49244378916328785, "grad_norm": 0.3264465326842336, "learning_rate": 1.7993758573784525e-05, "loss": 0.3893, "step": 4008 }, { "epoch": 0.49256665438014496, "grad_norm": 0.37531984378920374, "learning_rate": 1.798745443624484e-05, "loss": 0.3486, "step": 4009 }, { "epoch": 0.4926895195970021, "grad_norm": 0.3601189201371511, "learning_rate": 1.798114974918228e-05, "loss": 0.435, "step": 4010 }, { "epoch": 0.4928123848138592, "grad_norm": 0.4249028584912955, "learning_rate": 1.797484451375656e-05, "loss": 0.3783, "step": 4011 }, { "epoch": 0.4929352500307163, "grad_norm": 0.32761942920388737, "learning_rate": 1.7968538731127486e-05, "loss": 0.4138, "step": 4012 }, { "epoch": 0.4930581152475734, "grad_norm": 0.34865942682352735, "learning_rate": 1.7962232402454965e-05, "loss": 0.496, "step": 4013 }, { "epoch": 0.49318098046443054, "grad_norm": 0.37310551090988076, "learning_rate": 1.7955925528898997e-05, "loss": 0.3909, "step": 4014 }, { "epoch": 0.4933038456812876, "grad_norm": 0.3471795458982312, "learning_rate": 1.7949618111619706e-05, "loss": 0.4074, "step": 4015 }, { "epoch": 0.4934267108981447, "grad_norm": 0.3177409193052805, "learning_rate": 1.794331015177729e-05, "loss": 0.3372, "step": 4016 }, { "epoch": 0.49354957611500183, "grad_norm": 0.3605125484231337, "learning_rate": 1.793700165053206e-05, "loss": 0.4611, "step": 4017 }, { "epoch": 0.49367244133185895, "grad_norm": 0.364473272165005, "learning_rate": 1.793069260904442e-05, "loss": 0.4068, "step": 4018 }, { "epoch": 0.49379530654871606, "grad_norm": 0.39942807423384374, "learning_rate": 1.7924383028474884e-05, "loss": 0.4231, "step": 4019 }, { "epoch": 0.4939181717655732, "grad_norm": 0.36030875897936077, "learning_rate": 1.7918072909984057e-05, "loss": 0.4395, "step": 4020 }, { "epoch": 0.4940410369824303, "grad_norm": 0.45703909261458076, "learning_rate": 1.7911762254732636e-05, "loss": 0.3322, "step": 4021 }, { "epoch": 0.4941639021992874, "grad_norm": 0.3229305158684031, "learning_rate": 1.7905451063881435e-05, "loss": 0.3549, "step": 4022 }, { "epoch": 0.49428676741614447, "grad_norm": 0.3951931015548432, "learning_rate": 1.7899139338591354e-05, "loss": 0.492, "step": 4023 }, { "epoch": 0.4944096326330016, "grad_norm": 0.3140928415860295, "learning_rate": 1.7892827080023393e-05, "loss": 0.3946, "step": 4024 }, { "epoch": 0.4945324978498587, "grad_norm": 0.3805086337822165, "learning_rate": 1.7886514289338656e-05, "loss": 0.3907, "step": 4025 }, { "epoch": 0.4946553630667158, "grad_norm": 0.37673624941775413, "learning_rate": 1.7880200967698332e-05, "loss": 0.4952, "step": 4026 }, { "epoch": 0.49477822828357293, "grad_norm": 0.4611611136857537, "learning_rate": 1.7873887116263715e-05, "loss": 0.4547, "step": 4027 }, { "epoch": 0.49490109350043004, "grad_norm": 0.4184443068715193, "learning_rate": 1.7867572736196204e-05, "loss": 0.5271, "step": 4028 }, { "epoch": 0.49502395871728716, "grad_norm": 0.39196823583079016, "learning_rate": 1.7861257828657283e-05, "loss": 0.4468, "step": 4029 }, { "epoch": 0.4951468239341442, "grad_norm": 0.3320311837617882, "learning_rate": 1.785494239480854e-05, "loss": 0.3529, "step": 4030 }, { "epoch": 0.49526968915100134, "grad_norm": 0.5006846069173868, "learning_rate": 1.784862643581166e-05, "loss": 0.4571, "step": 4031 }, { "epoch": 0.49539255436785845, "grad_norm": 0.363419467110463, "learning_rate": 1.7842309952828424e-05, "loss": 0.4431, "step": 4032 }, { "epoch": 0.49551541958471557, "grad_norm": 0.3664645195376408, "learning_rate": 1.7835992947020702e-05, "loss": 0.3915, "step": 4033 }, { "epoch": 0.4956382848015727, "grad_norm": 0.4117416491924022, "learning_rate": 1.782967541955047e-05, "loss": 0.4535, "step": 4034 }, { "epoch": 0.4957611500184298, "grad_norm": 0.6740568879954986, "learning_rate": 1.7823357371579797e-05, "loss": 0.5948, "step": 4035 }, { "epoch": 0.4958840152352869, "grad_norm": 0.3347032310103473, "learning_rate": 1.7817038804270848e-05, "loss": 0.4483, "step": 4036 }, { "epoch": 0.496006880452144, "grad_norm": 0.3082480814188309, "learning_rate": 1.781071971878587e-05, "loss": 0.4037, "step": 4037 }, { "epoch": 0.4961297456690011, "grad_norm": 0.2885202240779538, "learning_rate": 1.7804400116287238e-05, "loss": 0.3794, "step": 4038 }, { "epoch": 0.4962526108858582, "grad_norm": 0.31421915698526554, "learning_rate": 1.7798079997937387e-05, "loss": 0.3689, "step": 4039 }, { "epoch": 0.4963754761027153, "grad_norm": 0.3303303181351468, "learning_rate": 1.7791759364898865e-05, "loss": 0.3603, "step": 4040 }, { "epoch": 0.49649834131957243, "grad_norm": 0.33696796019969805, "learning_rate": 1.7785438218334317e-05, "loss": 0.4017, "step": 4041 }, { "epoch": 0.49662120653642955, "grad_norm": 0.341349928197558, "learning_rate": 1.777911655940647e-05, "loss": 0.4172, "step": 4042 }, { "epoch": 0.49674407175328666, "grad_norm": 0.30950054248448394, "learning_rate": 1.7772794389278156e-05, "loss": 0.4278, "step": 4043 }, { "epoch": 0.4968669369701438, "grad_norm": 0.2839820974805423, "learning_rate": 1.77664717091123e-05, "loss": 0.3416, "step": 4044 }, { "epoch": 0.49698980218700084, "grad_norm": 0.3422939126760188, "learning_rate": 1.776014852007191e-05, "loss": 0.3087, "step": 4045 }, { "epoch": 0.49711266740385796, "grad_norm": 0.3427807178944057, "learning_rate": 1.77538248233201e-05, "loss": 0.3369, "step": 4046 }, { "epoch": 0.49723553262071507, "grad_norm": 0.3700420306536858, "learning_rate": 1.7747500620020076e-05, "loss": 0.5202, "step": 4047 }, { "epoch": 0.4973583978375722, "grad_norm": 0.37899865068024446, "learning_rate": 1.7741175911335125e-05, "loss": 0.3731, "step": 4048 }, { "epoch": 0.4974812630544293, "grad_norm": 0.47591654427759067, "learning_rate": 1.773485069842865e-05, "loss": 0.4517, "step": 4049 }, { "epoch": 0.4976041282712864, "grad_norm": 0.3569144120317667, "learning_rate": 1.772852498246412e-05, "loss": 0.3016, "step": 4050 }, { "epoch": 0.49772699348814353, "grad_norm": 0.34097399915778215, "learning_rate": 1.7722198764605114e-05, "loss": 0.4237, "step": 4051 }, { "epoch": 0.4978498587050006, "grad_norm": 0.3782880406259186, "learning_rate": 1.77158720460153e-05, "loss": 0.2995, "step": 4052 }, { "epoch": 0.4979727239218577, "grad_norm": 0.34256234021917914, "learning_rate": 1.770954482785844e-05, "loss": 0.355, "step": 4053 }, { "epoch": 0.4980955891387148, "grad_norm": 0.2939966528887178, "learning_rate": 1.770321711129838e-05, "loss": 0.3724, "step": 4054 }, { "epoch": 0.49821845435557194, "grad_norm": 0.30968547198038426, "learning_rate": 1.7696888897499062e-05, "loss": 0.415, "step": 4055 }, { "epoch": 0.49834131957242905, "grad_norm": 0.4008454256756699, "learning_rate": 1.769056018762452e-05, "loss": 0.3995, "step": 4056 }, { "epoch": 0.49846418478928617, "grad_norm": 0.43981011175311413, "learning_rate": 1.7684230982838883e-05, "loss": 0.4627, "step": 4057 }, { "epoch": 0.4985870500061433, "grad_norm": 0.38067999058286917, "learning_rate": 1.7677901284306363e-05, "loss": 0.4493, "step": 4058 }, { "epoch": 0.49870991522300034, "grad_norm": 0.32274120039921583, "learning_rate": 1.767157109319127e-05, "loss": 0.3613, "step": 4059 }, { "epoch": 0.49883278043985746, "grad_norm": 0.36547551646069537, "learning_rate": 1.7665240410657996e-05, "loss": 0.4457, "step": 4060 }, { "epoch": 0.4989556456567146, "grad_norm": 0.3343741406250635, "learning_rate": 1.7658909237871035e-05, "loss": 0.3349, "step": 4061 }, { "epoch": 0.4990785108735717, "grad_norm": 0.39918749854451535, "learning_rate": 1.7652577575994965e-05, "loss": 0.3553, "step": 4062 }, { "epoch": 0.4992013760904288, "grad_norm": 0.49903782312350303, "learning_rate": 1.7646245426194453e-05, "loss": 0.4259, "step": 4063 }, { "epoch": 0.4993242413072859, "grad_norm": 0.36299628178562415, "learning_rate": 1.7639912789634257e-05, "loss": 0.4576, "step": 4064 }, { "epoch": 0.49944710652414304, "grad_norm": 0.3097871682641119, "learning_rate": 1.763357966747922e-05, "loss": 0.3418, "step": 4065 }, { "epoch": 0.4995699717410001, "grad_norm": 0.3706060054430856, "learning_rate": 1.7627246060894285e-05, "loss": 0.3508, "step": 4066 }, { "epoch": 0.4996928369578572, "grad_norm": 0.3874363487626205, "learning_rate": 1.7620911971044472e-05, "loss": 0.3374, "step": 4067 }, { "epoch": 0.4998157021747143, "grad_norm": 0.3341075652265549, "learning_rate": 1.7614577399094904e-05, "loss": 0.4557, "step": 4068 }, { "epoch": 0.49993856739157144, "grad_norm": 0.3467860626830829, "learning_rate": 1.7608242346210775e-05, "loss": 0.43, "step": 4069 }, { "epoch": 0.5000614326084285, "grad_norm": 0.38946237900908576, "learning_rate": 1.7601906813557383e-05, "loss": 0.4344, "step": 4070 }, { "epoch": 0.5001842978252856, "grad_norm": 0.3339066613709426, "learning_rate": 1.7595570802300107e-05, "loss": 0.4084, "step": 4071 }, { "epoch": 0.5003071630421427, "grad_norm": 0.33137450416971564, "learning_rate": 1.758923431360442e-05, "loss": 0.3836, "step": 4072 }, { "epoch": 0.5004300282589998, "grad_norm": 0.42773108428903783, "learning_rate": 1.7582897348635867e-05, "loss": 0.4525, "step": 4073 }, { "epoch": 0.500552893475857, "grad_norm": 0.37036592211068114, "learning_rate": 1.7576559908560104e-05, "loss": 0.324, "step": 4074 }, { "epoch": 0.5006757586927141, "grad_norm": 0.3611327526907515, "learning_rate": 1.7570221994542845e-05, "loss": 0.4141, "step": 4075 }, { "epoch": 0.5007986239095712, "grad_norm": 0.36288058288360037, "learning_rate": 1.7563883607749927e-05, "loss": 0.4088, "step": 4076 }, { "epoch": 0.5009214891264283, "grad_norm": 0.320626130603874, "learning_rate": 1.755754474934724e-05, "loss": 0.4371, "step": 4077 }, { "epoch": 0.5010443543432854, "grad_norm": 0.3131380246448689, "learning_rate": 1.7551205420500785e-05, "loss": 0.3727, "step": 4078 }, { "epoch": 0.5011672195601425, "grad_norm": 0.3404743971531739, "learning_rate": 1.7544865622376638e-05, "loss": 0.3443, "step": 4079 }, { "epoch": 0.5012900847769997, "grad_norm": 0.4488119258361691, "learning_rate": 1.753852535614097e-05, "loss": 0.4463, "step": 4080 }, { "epoch": 0.5014129499938568, "grad_norm": 0.3603189154270178, "learning_rate": 1.7532184622960014e-05, "loss": 0.3574, "step": 4081 }, { "epoch": 0.5015358152107139, "grad_norm": 0.3605694362856266, "learning_rate": 1.7525843424000128e-05, "loss": 0.3375, "step": 4082 }, { "epoch": 0.501658680427571, "grad_norm": 0.34285341858816687, "learning_rate": 1.751950176042772e-05, "loss": 0.3568, "step": 4083 }, { "epoch": 0.5017815456444281, "grad_norm": 0.3261307220644031, "learning_rate": 1.7513159633409305e-05, "loss": 0.4454, "step": 4084 }, { "epoch": 0.5019044108612851, "grad_norm": 0.34048757326937407, "learning_rate": 1.7506817044111477e-05, "loss": 0.3562, "step": 4085 }, { "epoch": 0.5020272760781422, "grad_norm": 0.2957842551105179, "learning_rate": 1.75004739937009e-05, "loss": 0.3656, "step": 4086 }, { "epoch": 0.5021501412949994, "grad_norm": 0.47820851390811286, "learning_rate": 1.7494130483344357e-05, "loss": 0.4592, "step": 4087 }, { "epoch": 0.5022730065118565, "grad_norm": 0.32478324292979616, "learning_rate": 1.7487786514208685e-05, "loss": 0.3445, "step": 4088 }, { "epoch": 0.5023958717287136, "grad_norm": 0.3078076693983581, "learning_rate": 1.748144208746082e-05, "loss": 0.389, "step": 4089 }, { "epoch": 0.5025187369455707, "grad_norm": 0.3528772242389347, "learning_rate": 1.747509720426777e-05, "loss": 0.3677, "step": 4090 }, { "epoch": 0.5026416021624278, "grad_norm": 0.37909258666616974, "learning_rate": 1.7468751865796645e-05, "loss": 0.3503, "step": 4091 }, { "epoch": 0.5027644673792849, "grad_norm": 0.32318631573236895, "learning_rate": 1.746240607321462e-05, "loss": 0.4454, "step": 4092 }, { "epoch": 0.502887332596142, "grad_norm": 0.3349247734864289, "learning_rate": 1.7456059827688976e-05, "loss": 0.3989, "step": 4093 }, { "epoch": 0.5030101978129992, "grad_norm": 0.30902656266406436, "learning_rate": 1.744971313038705e-05, "loss": 0.3767, "step": 4094 }, { "epoch": 0.5031330630298563, "grad_norm": 0.39261522016058914, "learning_rate": 1.744336598247628e-05, "loss": 0.4464, "step": 4095 }, { "epoch": 0.5032559282467134, "grad_norm": 0.39130601788310826, "learning_rate": 1.7437018385124182e-05, "loss": 0.4025, "step": 4096 }, { "epoch": 0.5033787934635705, "grad_norm": 0.31494282380629035, "learning_rate": 1.7430670339498358e-05, "loss": 0.3783, "step": 4097 }, { "epoch": 0.5035016586804276, "grad_norm": 0.31652146111318075, "learning_rate": 1.7424321846766487e-05, "loss": 0.3535, "step": 4098 }, { "epoch": 0.5036245238972846, "grad_norm": 0.3992087893550707, "learning_rate": 1.7417972908096337e-05, "loss": 0.4044, "step": 4099 }, { "epoch": 0.5037473891141417, "grad_norm": 0.6857798984937534, "learning_rate": 1.741162352465575e-05, "loss": 0.4974, "step": 4100 }, { "epoch": 0.5038702543309989, "grad_norm": 0.3253877029956226, "learning_rate": 1.7405273697612656e-05, "loss": 0.3902, "step": 4101 }, { "epoch": 0.503993119547856, "grad_norm": 0.30925125339213, "learning_rate": 1.7398923428135066e-05, "loss": 0.4148, "step": 4102 }, { "epoch": 0.5041159847647131, "grad_norm": 0.31754506141121747, "learning_rate": 1.739257271739107e-05, "loss": 0.3716, "step": 4103 }, { "epoch": 0.5042388499815702, "grad_norm": 0.3213839265938943, "learning_rate": 1.7386221566548836e-05, "loss": 0.3934, "step": 4104 }, { "epoch": 0.5043617151984273, "grad_norm": 0.38068157908468675, "learning_rate": 1.7379869976776617e-05, "loss": 0.4501, "step": 4105 }, { "epoch": 0.5044845804152844, "grad_norm": 0.36174089790708375, "learning_rate": 1.7373517949242755e-05, "loss": 0.4533, "step": 4106 }, { "epoch": 0.5046074456321415, "grad_norm": 0.38831920489174465, "learning_rate": 1.7367165485115657e-05, "loss": 0.3763, "step": 4107 }, { "epoch": 0.5047303108489987, "grad_norm": 0.3783032470818448, "learning_rate": 1.736081258556382e-05, "loss": 0.4066, "step": 4108 }, { "epoch": 0.5048531760658558, "grad_norm": 0.34455218811511285, "learning_rate": 1.7354459251755816e-05, "loss": 0.3311, "step": 4109 }, { "epoch": 0.5049760412827129, "grad_norm": 0.3407818949805521, "learning_rate": 1.7348105484860305e-05, "loss": 0.3727, "step": 4110 }, { "epoch": 0.50509890649957, "grad_norm": 0.4005182264828349, "learning_rate": 1.7341751286046018e-05, "loss": 0.3551, "step": 4111 }, { "epoch": 0.5052217717164271, "grad_norm": 0.40190568125643444, "learning_rate": 1.733539665648177e-05, "loss": 0.4611, "step": 4112 }, { "epoch": 0.5053446369332842, "grad_norm": 0.34397104821574076, "learning_rate": 1.732904159733645e-05, "loss": 0.3452, "step": 4113 }, { "epoch": 0.5054675021501412, "grad_norm": 0.38643904068946, "learning_rate": 1.7322686109779032e-05, "loss": 0.4301, "step": 4114 }, { "epoch": 0.5055903673669984, "grad_norm": 0.4583056383658355, "learning_rate": 1.731633019497857e-05, "loss": 0.3837, "step": 4115 }, { "epoch": 0.5057132325838555, "grad_norm": 0.3642126156038298, "learning_rate": 1.7309973854104186e-05, "loss": 0.3579, "step": 4116 }, { "epoch": 0.5058360978007126, "grad_norm": 0.3781552032694182, "learning_rate": 1.7303617088325097e-05, "loss": 0.393, "step": 4117 }, { "epoch": 0.5059589630175697, "grad_norm": 0.36192243061580265, "learning_rate": 1.729725989881058e-05, "loss": 0.3275, "step": 4118 }, { "epoch": 0.5060818282344268, "grad_norm": 0.32779590828235666, "learning_rate": 1.7290902286730007e-05, "loss": 0.4619, "step": 4119 }, { "epoch": 0.5062046934512839, "grad_norm": 0.3489594191783829, "learning_rate": 1.7284544253252813e-05, "loss": 0.3614, "step": 4120 }, { "epoch": 0.506327558668141, "grad_norm": 0.26259753455481244, "learning_rate": 1.727818579954852e-05, "loss": 0.387, "step": 4121 }, { "epoch": 0.5064504238849982, "grad_norm": 0.4361957571185388, "learning_rate": 1.7271826926786724e-05, "loss": 0.4465, "step": 4122 }, { "epoch": 0.5065732891018553, "grad_norm": 0.3538275599660987, "learning_rate": 1.7265467636137097e-05, "loss": 0.3756, "step": 4123 }, { "epoch": 0.5066961543187124, "grad_norm": 0.44793029215026403, "learning_rate": 1.7259107928769392e-05, "loss": 0.4746, "step": 4124 }, { "epoch": 0.5068190195355695, "grad_norm": 0.3551586383004818, "learning_rate": 1.725274780585343e-05, "loss": 0.3928, "step": 4125 }, { "epoch": 0.5069418847524266, "grad_norm": 0.37522211175001374, "learning_rate": 1.724638726855912e-05, "loss": 0.4102, "step": 4126 }, { "epoch": 0.5070647499692837, "grad_norm": 0.39720017655788925, "learning_rate": 1.7240026318056446e-05, "loss": 0.393, "step": 4127 }, { "epoch": 0.5071876151861408, "grad_norm": 0.34614928193882183, "learning_rate": 1.7233664955515454e-05, "loss": 0.3754, "step": 4128 }, { "epoch": 0.5073104804029979, "grad_norm": 0.33020498527526065, "learning_rate": 1.722730318210628e-05, "loss": 0.3376, "step": 4129 }, { "epoch": 0.507433345619855, "grad_norm": 0.3196595245276729, "learning_rate": 1.722094099899913e-05, "loss": 0.4423, "step": 4130 }, { "epoch": 0.5075562108367121, "grad_norm": 0.2939358157284869, "learning_rate": 1.7214578407364286e-05, "loss": 0.4176, "step": 4131 }, { "epoch": 0.5076790760535692, "grad_norm": 0.42874359790362515, "learning_rate": 1.7208215408372107e-05, "loss": 0.3987, "step": 4132 }, { "epoch": 0.5078019412704263, "grad_norm": 0.3552096685414059, "learning_rate": 1.720185200319302e-05, "loss": 0.3311, "step": 4133 }, { "epoch": 0.5079248064872834, "grad_norm": 0.4273963197466469, "learning_rate": 1.7195488192997543e-05, "loss": 0.4571, "step": 4134 }, { "epoch": 0.5080476717041406, "grad_norm": 0.35225345403583075, "learning_rate": 1.7189123978956246e-05, "loss": 0.449, "step": 4135 }, { "epoch": 0.5081705369209977, "grad_norm": 0.2799438338561799, "learning_rate": 1.718275936223979e-05, "loss": 0.3733, "step": 4136 }, { "epoch": 0.5082934021378548, "grad_norm": 0.33704292182443457, "learning_rate": 1.7176394344018912e-05, "loss": 0.3324, "step": 4137 }, { "epoch": 0.5084162673547119, "grad_norm": 0.35578714313267146, "learning_rate": 1.7170028925464403e-05, "loss": 0.4161, "step": 4138 }, { "epoch": 0.508539132571569, "grad_norm": 0.32601755060564025, "learning_rate": 1.716366310774715e-05, "loss": 0.459, "step": 4139 }, { "epoch": 0.5086619977884261, "grad_norm": 0.36230215980467995, "learning_rate": 1.7157296892038096e-05, "loss": 0.3833, "step": 4140 }, { "epoch": 0.5087848630052832, "grad_norm": 0.38225408423385665, "learning_rate": 1.7150930279508273e-05, "loss": 0.4489, "step": 4141 }, { "epoch": 0.5089077282221404, "grad_norm": 0.3115296881671131, "learning_rate": 1.714456327132877e-05, "loss": 0.4145, "step": 4142 }, { "epoch": 0.5090305934389974, "grad_norm": 0.29710209813446176, "learning_rate": 1.7138195868670764e-05, "loss": 0.3308, "step": 4143 }, { "epoch": 0.5091534586558545, "grad_norm": 0.37913259105393743, "learning_rate": 1.7131828072705494e-05, "loss": 0.4493, "step": 4144 }, { "epoch": 0.5092763238727116, "grad_norm": 0.3016765129947813, "learning_rate": 1.7125459884604278e-05, "loss": 0.3439, "step": 4145 }, { "epoch": 0.5093991890895687, "grad_norm": 0.3491232400180506, "learning_rate": 1.7119091305538495e-05, "loss": 0.4589, "step": 4146 }, { "epoch": 0.5095220543064258, "grad_norm": 0.3994463559372828, "learning_rate": 1.711272233667961e-05, "loss": 0.5099, "step": 4147 }, { "epoch": 0.509644919523283, "grad_norm": 0.4079451107453317, "learning_rate": 1.710635297919916e-05, "loss": 0.4482, "step": 4148 }, { "epoch": 0.5097677847401401, "grad_norm": 0.4063788504149667, "learning_rate": 1.7099983234268733e-05, "loss": 0.4152, "step": 4149 }, { "epoch": 0.5098906499569972, "grad_norm": 0.40715543674417637, "learning_rate": 1.709361310306001e-05, "loss": 0.3629, "step": 4150 }, { "epoch": 0.5100135151738543, "grad_norm": 0.3039576097988044, "learning_rate": 1.7087242586744733e-05, "loss": 0.3387, "step": 4151 }, { "epoch": 0.5101363803907114, "grad_norm": 0.3492726633038822, "learning_rate": 1.708087168649472e-05, "loss": 0.4005, "step": 4152 }, { "epoch": 0.5102592456075685, "grad_norm": 0.3574572774707258, "learning_rate": 1.7074500403481855e-05, "loss": 0.3972, "step": 4153 }, { "epoch": 0.5103821108244256, "grad_norm": 0.4363711255446303, "learning_rate": 1.7068128738878095e-05, "loss": 0.5353, "step": 4154 }, { "epoch": 0.5105049760412828, "grad_norm": 0.36106971666957516, "learning_rate": 1.706175669385546e-05, "loss": 0.4617, "step": 4155 }, { "epoch": 0.5106278412581399, "grad_norm": 0.37495852010167374, "learning_rate": 1.7055384269586063e-05, "loss": 0.4003, "step": 4156 }, { "epoch": 0.5107507064749969, "grad_norm": 0.3979573545103336, "learning_rate": 1.7049011467242055e-05, "loss": 0.3343, "step": 4157 }, { "epoch": 0.510873571691854, "grad_norm": 0.35264073590829403, "learning_rate": 1.7042638287995673e-05, "loss": 0.4489, "step": 4158 }, { "epoch": 0.5109964369087111, "grad_norm": 0.37811898903906, "learning_rate": 1.7036264733019226e-05, "loss": 0.3745, "step": 4159 }, { "epoch": 0.5111193021255682, "grad_norm": 0.5030765451865503, "learning_rate": 1.702989080348509e-05, "loss": 0.5208, "step": 4160 }, { "epoch": 0.5112421673424253, "grad_norm": 0.5693916690085344, "learning_rate": 1.7023516500565702e-05, "loss": 0.3853, "step": 4161 }, { "epoch": 0.5113650325592825, "grad_norm": 0.3372859318698206, "learning_rate": 1.7017141825433576e-05, "loss": 0.4437, "step": 4162 }, { "epoch": 0.5114878977761396, "grad_norm": 0.3132181671654403, "learning_rate": 1.7010766779261292e-05, "loss": 0.3939, "step": 4163 }, { "epoch": 0.5116107629929967, "grad_norm": 0.37551054675685597, "learning_rate": 1.7004391363221502e-05, "loss": 0.3733, "step": 4164 }, { "epoch": 0.5117336282098538, "grad_norm": 0.3521874272768204, "learning_rate": 1.6998015578486918e-05, "loss": 0.3434, "step": 4165 }, { "epoch": 0.5118564934267109, "grad_norm": 0.35033542053498135, "learning_rate": 1.699163942623033e-05, "loss": 0.3621, "step": 4166 }, { "epoch": 0.511979358643568, "grad_norm": 0.37708493813279026, "learning_rate": 1.6985262907624583e-05, "loss": 0.4958, "step": 4167 }, { "epoch": 0.5121022238604251, "grad_norm": 0.340579624154558, "learning_rate": 1.6978886023842598e-05, "loss": 0.4203, "step": 4168 }, { "epoch": 0.5122250890772823, "grad_norm": 0.3253471885606933, "learning_rate": 1.6972508776057362e-05, "loss": 0.4416, "step": 4169 }, { "epoch": 0.5123479542941394, "grad_norm": 0.31289589251261696, "learning_rate": 1.6966131165441928e-05, "loss": 0.4191, "step": 4170 }, { "epoch": 0.5124708195109965, "grad_norm": 0.33785598617997337, "learning_rate": 1.6959753193169422e-05, "loss": 0.4267, "step": 4171 }, { "epoch": 0.5125936847278535, "grad_norm": 0.5084598320606314, "learning_rate": 1.695337486041302e-05, "loss": 0.434, "step": 4172 }, { "epoch": 0.5127165499447106, "grad_norm": 0.33811434793860323, "learning_rate": 1.694699616834598e-05, "loss": 0.3218, "step": 4173 }, { "epoch": 0.5128394151615677, "grad_norm": 0.3343526458736962, "learning_rate": 1.6940617118141626e-05, "loss": 0.3593, "step": 4174 }, { "epoch": 0.5129622803784248, "grad_norm": 0.34617859908667664, "learning_rate": 1.693423771097334e-05, "loss": 0.3998, "step": 4175 }, { "epoch": 0.513085145595282, "grad_norm": 0.38733695545280217, "learning_rate": 1.6927857948014565e-05, "loss": 0.435, "step": 4176 }, { "epoch": 0.5132080108121391, "grad_norm": 0.3084318684699951, "learning_rate": 1.6921477830438827e-05, "loss": 0.4004, "step": 4177 }, { "epoch": 0.5133308760289962, "grad_norm": 0.6311305943968963, "learning_rate": 1.6915097359419703e-05, "loss": 0.4312, "step": 4178 }, { "epoch": 0.5134537412458533, "grad_norm": 0.3759857059483885, "learning_rate": 1.690871653613084e-05, "loss": 0.4598, "step": 4179 }, { "epoch": 0.5135766064627104, "grad_norm": 0.37667964117913444, "learning_rate": 1.6902335361745944e-05, "loss": 0.4493, "step": 4180 }, { "epoch": 0.5136994716795675, "grad_norm": 0.4761069995263985, "learning_rate": 1.6895953837438802e-05, "loss": 0.4921, "step": 4181 }, { "epoch": 0.5138223368964246, "grad_norm": 0.3770763435453868, "learning_rate": 1.6889571964383242e-05, "loss": 0.3417, "step": 4182 }, { "epoch": 0.5139452021132818, "grad_norm": 0.3596787187835276, "learning_rate": 1.6883189743753174e-05, "loss": 0.4011, "step": 4183 }, { "epoch": 0.5140680673301389, "grad_norm": 0.3390047024528614, "learning_rate": 1.687680717672257e-05, "loss": 0.4318, "step": 4184 }, { "epoch": 0.514190932546996, "grad_norm": 0.37235983334717354, "learning_rate": 1.6870424264465454e-05, "loss": 0.4198, "step": 4185 }, { "epoch": 0.514313797763853, "grad_norm": 0.4244469897455483, "learning_rate": 1.6864041008155926e-05, "loss": 0.4167, "step": 4186 }, { "epoch": 0.5144366629807101, "grad_norm": 0.33029047979900106, "learning_rate": 1.6857657408968146e-05, "loss": 0.4245, "step": 4187 }, { "epoch": 0.5145595281975672, "grad_norm": 0.3758941147516442, "learning_rate": 1.6851273468076328e-05, "loss": 0.4214, "step": 4188 }, { "epoch": 0.5146823934144243, "grad_norm": 0.43599113695399533, "learning_rate": 1.6844889186654757e-05, "loss": 0.4251, "step": 4189 }, { "epoch": 0.5148052586312815, "grad_norm": 0.3507850243984452, "learning_rate": 1.6838504565877795e-05, "loss": 0.4114, "step": 4190 }, { "epoch": 0.5149281238481386, "grad_norm": 0.35344476423931354, "learning_rate": 1.6832119606919835e-05, "loss": 0.3974, "step": 4191 }, { "epoch": 0.5150509890649957, "grad_norm": 0.5585719015711476, "learning_rate": 1.6825734310955356e-05, "loss": 0.4591, "step": 4192 }, { "epoch": 0.5151738542818528, "grad_norm": 0.36382240840518537, "learning_rate": 1.681934867915889e-05, "loss": 0.397, "step": 4193 }, { "epoch": 0.5152967194987099, "grad_norm": 0.4186740716694509, "learning_rate": 1.6812962712705037e-05, "loss": 0.3753, "step": 4194 }, { "epoch": 0.515419584715567, "grad_norm": 0.5097753052814202, "learning_rate": 1.6806576412768446e-05, "loss": 0.4753, "step": 4195 }, { "epoch": 0.5155424499324242, "grad_norm": 0.37984471400073866, "learning_rate": 1.6800189780523844e-05, "loss": 0.3646, "step": 4196 }, { "epoch": 0.5156653151492813, "grad_norm": 0.3368374374046932, "learning_rate": 1.6793802817146003e-05, "loss": 0.3141, "step": 4197 }, { "epoch": 0.5157881803661384, "grad_norm": 0.37899438092889326, "learning_rate": 1.6787415523809775e-05, "loss": 0.3942, "step": 4198 }, { "epoch": 0.5159110455829955, "grad_norm": 0.3382082575808774, "learning_rate": 1.6781027901690043e-05, "loss": 0.4662, "step": 4199 }, { "epoch": 0.5160339107998526, "grad_norm": 0.31607174034420193, "learning_rate": 1.6774639951961783e-05, "loss": 0.4183, "step": 4200 }, { "epoch": 0.5161567760167096, "grad_norm": 0.3491077239019674, "learning_rate": 1.6768251675800012e-05, "loss": 0.362, "step": 4201 }, { "epoch": 0.5162796412335667, "grad_norm": 0.37134431733788414, "learning_rate": 1.6761863074379815e-05, "loss": 0.3508, "step": 4202 }, { "epoch": 0.5164025064504238, "grad_norm": 0.3226291633251699, "learning_rate": 1.6755474148876328e-05, "loss": 0.377, "step": 4203 }, { "epoch": 0.516525371667281, "grad_norm": 0.35502967858665835, "learning_rate": 1.674908490046476e-05, "loss": 0.3821, "step": 4204 }, { "epoch": 0.5166482368841381, "grad_norm": 0.3504963057016427, "learning_rate": 1.6742695330320367e-05, "loss": 0.4013, "step": 4205 }, { "epoch": 0.5167711021009952, "grad_norm": 0.4587857065491016, "learning_rate": 1.6736305439618466e-05, "loss": 0.4192, "step": 4206 }, { "epoch": 0.5168939673178523, "grad_norm": 0.3542022495691324, "learning_rate": 1.672991522953444e-05, "loss": 0.4258, "step": 4207 }, { "epoch": 0.5170168325347094, "grad_norm": 0.3493066774110276, "learning_rate": 1.672352470124373e-05, "loss": 0.3834, "step": 4208 }, { "epoch": 0.5171396977515665, "grad_norm": 0.29854097897422643, "learning_rate": 1.671713385592183e-05, "loss": 0.4085, "step": 4209 }, { "epoch": 0.5172625629684237, "grad_norm": 0.33824657181669937, "learning_rate": 1.6710742694744288e-05, "loss": 0.4303, "step": 4210 }, { "epoch": 0.5173854281852808, "grad_norm": 0.31977317917487874, "learning_rate": 1.6704351218886722e-05, "loss": 0.4651, "step": 4211 }, { "epoch": 0.5175082934021379, "grad_norm": 0.30138468255813955, "learning_rate": 1.6697959429524803e-05, "loss": 0.3046, "step": 4212 }, { "epoch": 0.517631158618995, "grad_norm": 0.2997271323132441, "learning_rate": 1.6691567327834264e-05, "loss": 0.3667, "step": 4213 }, { "epoch": 0.5177540238358521, "grad_norm": 0.345757449429801, "learning_rate": 1.668517491499088e-05, "loss": 0.4055, "step": 4214 }, { "epoch": 0.5178768890527092, "grad_norm": 0.5388011999555199, "learning_rate": 1.6678782192170503e-05, "loss": 0.5044, "step": 4215 }, { "epoch": 0.5179997542695662, "grad_norm": 0.4056699123157379, "learning_rate": 1.6672389160549027e-05, "loss": 0.4143, "step": 4216 }, { "epoch": 0.5181226194864234, "grad_norm": 0.3830533766867315, "learning_rate": 1.6665995821302413e-05, "loss": 0.424, "step": 4217 }, { "epoch": 0.5182454847032805, "grad_norm": 0.3309779243491511, "learning_rate": 1.6659602175606665e-05, "loss": 0.3406, "step": 4218 }, { "epoch": 0.5183683499201376, "grad_norm": 0.33733207350775646, "learning_rate": 1.6653208224637868e-05, "loss": 0.4639, "step": 4219 }, { "epoch": 0.5184912151369947, "grad_norm": 0.3382400362213712, "learning_rate": 1.6646813969572133e-05, "loss": 0.3835, "step": 4220 }, { "epoch": 0.5186140803538518, "grad_norm": 0.34854808154034606, "learning_rate": 1.664041941158565e-05, "loss": 0.46, "step": 4221 }, { "epoch": 0.5187369455707089, "grad_norm": 0.3687566381985606, "learning_rate": 1.6634024551854656e-05, "loss": 0.3994, "step": 4222 }, { "epoch": 0.518859810787566, "grad_norm": 0.30456629953778136, "learning_rate": 1.662762939155544e-05, "loss": 0.3483, "step": 4223 }, { "epoch": 0.5189826760044232, "grad_norm": 0.3023831758166842, "learning_rate": 1.6621233931864357e-05, "loss": 0.4014, "step": 4224 }, { "epoch": 0.5191055412212803, "grad_norm": 0.37693946279257406, "learning_rate": 1.661483817395781e-05, "loss": 0.4215, "step": 4225 }, { "epoch": 0.5192284064381374, "grad_norm": 0.3156832497644193, "learning_rate": 1.6608442119012242e-05, "loss": 0.4256, "step": 4226 }, { "epoch": 0.5193512716549945, "grad_norm": 0.32964390194028553, "learning_rate": 1.6602045768204186e-05, "loss": 0.3922, "step": 4227 }, { "epoch": 0.5194741368718516, "grad_norm": 0.8141381637360006, "learning_rate": 1.6595649122710197e-05, "loss": 0.5114, "step": 4228 }, { "epoch": 0.5195970020887087, "grad_norm": 0.32884694468758285, "learning_rate": 1.6589252183706904e-05, "loss": 0.383, "step": 4229 }, { "epoch": 0.5197198673055657, "grad_norm": 0.3549035925086357, "learning_rate": 1.6582854952370972e-05, "loss": 0.4442, "step": 4230 }, { "epoch": 0.5198427325224229, "grad_norm": 0.32368670957431445, "learning_rate": 1.657645742987914e-05, "loss": 0.3044, "step": 4231 }, { "epoch": 0.51996559773928, "grad_norm": 0.37645280612936144, "learning_rate": 1.6570059617408187e-05, "loss": 0.3782, "step": 4232 }, { "epoch": 0.5200884629561371, "grad_norm": 0.34180422154161777, "learning_rate": 1.656366151613495e-05, "loss": 0.418, "step": 4233 }, { "epoch": 0.5202113281729942, "grad_norm": 0.4833703448404737, "learning_rate": 1.6557263127236323e-05, "loss": 0.4766, "step": 4234 }, { "epoch": 0.5203341933898513, "grad_norm": 0.3455125207955852, "learning_rate": 1.6550864451889234e-05, "loss": 0.3953, "step": 4235 }, { "epoch": 0.5204570586067084, "grad_norm": 0.3511406014097645, "learning_rate": 1.654446549127069e-05, "loss": 0.4174, "step": 4236 }, { "epoch": 0.5205799238235655, "grad_norm": 0.36240741598530474, "learning_rate": 1.6538066246557735e-05, "loss": 0.4168, "step": 4237 }, { "epoch": 0.5207027890404227, "grad_norm": 0.28433875340217724, "learning_rate": 1.653166671892747e-05, "loss": 0.3276, "step": 4238 }, { "epoch": 0.5208256542572798, "grad_norm": 0.3511029404429295, "learning_rate": 1.6525266909557046e-05, "loss": 0.3568, "step": 4239 }, { "epoch": 0.5209485194741369, "grad_norm": 0.3099753445085167, "learning_rate": 1.6518866819623665e-05, "loss": 0.3784, "step": 4240 }, { "epoch": 0.521071384690994, "grad_norm": 0.29372759243806607, "learning_rate": 1.6512466450304584e-05, "loss": 0.4455, "step": 4241 }, { "epoch": 0.5211942499078511, "grad_norm": 0.4233803834717534, "learning_rate": 1.6506065802777107e-05, "loss": 0.3212, "step": 4242 }, { "epoch": 0.5213171151247082, "grad_norm": 0.4353610506452296, "learning_rate": 1.6499664878218592e-05, "loss": 0.3787, "step": 4243 }, { "epoch": 0.5214399803415654, "grad_norm": 0.3437770241065022, "learning_rate": 1.649326367780645e-05, "loss": 0.3841, "step": 4244 }, { "epoch": 0.5215628455584224, "grad_norm": 0.3974987789388931, "learning_rate": 1.6486862202718134e-05, "loss": 0.3944, "step": 4245 }, { "epoch": 0.5216857107752795, "grad_norm": 0.36283907465432286, "learning_rate": 1.6480460454131165e-05, "loss": 0.3545, "step": 4246 }, { "epoch": 0.5218085759921366, "grad_norm": 0.42790471709632766, "learning_rate": 1.6474058433223092e-05, "loss": 0.4232, "step": 4247 }, { "epoch": 0.5219314412089937, "grad_norm": 0.3925614836792061, "learning_rate": 1.646765614117153e-05, "loss": 0.3754, "step": 4248 }, { "epoch": 0.5220543064258508, "grad_norm": 0.41850623281112087, "learning_rate": 1.646125357915414e-05, "loss": 0.4227, "step": 4249 }, { "epoch": 0.5221771716427079, "grad_norm": 0.36727704006792267, "learning_rate": 1.645485074834863e-05, "loss": 0.3907, "step": 4250 }, { "epoch": 0.522300036859565, "grad_norm": 0.34194066028830405, "learning_rate": 1.6448447649932763e-05, "loss": 0.3899, "step": 4251 }, { "epoch": 0.5224229020764222, "grad_norm": 0.39720794772673373, "learning_rate": 1.644204428508434e-05, "loss": 0.3724, "step": 4252 }, { "epoch": 0.5225457672932793, "grad_norm": 0.3025174805609015, "learning_rate": 1.6435640654981225e-05, "loss": 0.3842, "step": 4253 }, { "epoch": 0.5226686325101364, "grad_norm": 0.3399773925526555, "learning_rate": 1.642923676080132e-05, "loss": 0.4476, "step": 4254 }, { "epoch": 0.5227914977269935, "grad_norm": 0.3019959847927942, "learning_rate": 1.6422832603722583e-05, "loss": 0.3111, "step": 4255 }, { "epoch": 0.5229143629438506, "grad_norm": 0.3694567421342746, "learning_rate": 1.6416428184923014e-05, "loss": 0.3431, "step": 4256 }, { "epoch": 0.5230372281607077, "grad_norm": 0.3087083100808296, "learning_rate": 1.641002350558067e-05, "loss": 0.3849, "step": 4257 }, { "epoch": 0.5231600933775649, "grad_norm": 0.36446368193497874, "learning_rate": 1.6403618566873645e-05, "loss": 0.3844, "step": 4258 }, { "epoch": 0.5232829585944219, "grad_norm": 0.31915216057417134, "learning_rate": 1.6397213369980087e-05, "loss": 0.3695, "step": 4259 }, { "epoch": 0.523405823811279, "grad_norm": 0.3880706234392669, "learning_rate": 1.6390807916078192e-05, "loss": 0.3824, "step": 4260 }, { "epoch": 0.5235286890281361, "grad_norm": 0.4013397533483165, "learning_rate": 1.6384402206346202e-05, "loss": 0.4236, "step": 4261 }, { "epoch": 0.5236515542449932, "grad_norm": 0.36762500204244575, "learning_rate": 1.6377996241962402e-05, "loss": 0.4147, "step": 4262 }, { "epoch": 0.5237744194618503, "grad_norm": 0.34337160414993606, "learning_rate": 1.6371590024105128e-05, "loss": 0.5026, "step": 4263 }, { "epoch": 0.5238972846787074, "grad_norm": 0.3679871997987526, "learning_rate": 1.6365183553952765e-05, "loss": 0.3669, "step": 4264 }, { "epoch": 0.5240201498955646, "grad_norm": 0.38952462844553604, "learning_rate": 1.6358776832683743e-05, "loss": 0.44, "step": 4265 }, { "epoch": 0.5241430151124217, "grad_norm": 0.2929038395771476, "learning_rate": 1.635236986147653e-05, "loss": 0.3347, "step": 4266 }, { "epoch": 0.5242658803292788, "grad_norm": 0.39509651809208596, "learning_rate": 1.6345962641509657e-05, "loss": 0.3393, "step": 4267 }, { "epoch": 0.5243887455461359, "grad_norm": 0.43613046727340243, "learning_rate": 1.633955517396168e-05, "loss": 0.4415, "step": 4268 }, { "epoch": 0.524511610762993, "grad_norm": 0.3982038947055134, "learning_rate": 1.6333147460011223e-05, "loss": 0.4058, "step": 4269 }, { "epoch": 0.5246344759798501, "grad_norm": 0.33590061386654907, "learning_rate": 1.6326739500836935e-05, "loss": 0.3817, "step": 4270 }, { "epoch": 0.5247573411967072, "grad_norm": 0.2979584567640762, "learning_rate": 1.6320331297617513e-05, "loss": 0.4205, "step": 4271 }, { "epoch": 0.5248802064135644, "grad_norm": 0.39018473967243555, "learning_rate": 1.631392285153172e-05, "loss": 0.4107, "step": 4272 }, { "epoch": 0.5250030716304215, "grad_norm": 0.32922471235183387, "learning_rate": 1.6307514163758334e-05, "loss": 0.3697, "step": 4273 }, { "epoch": 0.5251259368472785, "grad_norm": 0.4086341233328449, "learning_rate": 1.6301105235476195e-05, "loss": 0.4887, "step": 4274 }, { "epoch": 0.5252488020641356, "grad_norm": 0.3338488056539903, "learning_rate": 1.629469606786419e-05, "loss": 0.4036, "step": 4275 }, { "epoch": 0.5253716672809927, "grad_norm": 0.4914768720916862, "learning_rate": 1.628828666210124e-05, "loss": 0.4668, "step": 4276 }, { "epoch": 0.5254945324978498, "grad_norm": 0.3850294138408027, "learning_rate": 1.628187701936631e-05, "loss": 0.4195, "step": 4277 }, { "epoch": 0.525617397714707, "grad_norm": 0.4078570206324635, "learning_rate": 1.6275467140838418e-05, "loss": 0.427, "step": 4278 }, { "epoch": 0.5257402629315641, "grad_norm": 0.3467117693152213, "learning_rate": 1.6269057027696618e-05, "loss": 0.313, "step": 4279 }, { "epoch": 0.5258631281484212, "grad_norm": 0.37368502446613233, "learning_rate": 1.626264668112001e-05, "loss": 0.3696, "step": 4280 }, { "epoch": 0.5259859933652783, "grad_norm": 0.3803145992166544, "learning_rate": 1.625623610228773e-05, "loss": 0.4588, "step": 4281 }, { "epoch": 0.5261088585821354, "grad_norm": 0.4184206648045848, "learning_rate": 1.6249825292378965e-05, "loss": 0.4263, "step": 4282 }, { "epoch": 0.5262317237989925, "grad_norm": 0.39544899542736217, "learning_rate": 1.6243414252572946e-05, "loss": 0.3359, "step": 4283 }, { "epoch": 0.5263545890158496, "grad_norm": 0.47224208915632654, "learning_rate": 1.6237002984048935e-05, "loss": 0.4554, "step": 4284 }, { "epoch": 0.5264774542327068, "grad_norm": 0.3485666517859798, "learning_rate": 1.6230591487986247e-05, "loss": 0.4221, "step": 4285 }, { "epoch": 0.5266003194495639, "grad_norm": 0.34707528980788876, "learning_rate": 1.6224179765564243e-05, "loss": 0.3926, "step": 4286 }, { "epoch": 0.526723184666421, "grad_norm": 0.35981382106471677, "learning_rate": 1.6217767817962304e-05, "loss": 0.3553, "step": 4287 }, { "epoch": 0.526846049883278, "grad_norm": 0.45283024107967257, "learning_rate": 1.6211355646359877e-05, "loss": 0.4512, "step": 4288 }, { "epoch": 0.5269689151001351, "grad_norm": 0.3606942902200101, "learning_rate": 1.620494325193643e-05, "loss": 0.4431, "step": 4289 }, { "epoch": 0.5270917803169922, "grad_norm": 0.37740359985566735, "learning_rate": 1.619853063587149e-05, "loss": 0.4497, "step": 4290 }, { "epoch": 0.5272146455338493, "grad_norm": 0.34926905363950433, "learning_rate": 1.6192117799344606e-05, "loss": 0.429, "step": 4291 }, { "epoch": 0.5273375107507065, "grad_norm": 0.32055045624628625, "learning_rate": 1.6185704743535388e-05, "loss": 0.3492, "step": 4292 }, { "epoch": 0.5274603759675636, "grad_norm": 0.3861177442054672, "learning_rate": 1.6179291469623474e-05, "loss": 0.4156, "step": 4293 }, { "epoch": 0.5275832411844207, "grad_norm": 0.3752471669064736, "learning_rate": 1.617287797878854e-05, "loss": 0.3804, "step": 4294 }, { "epoch": 0.5277061064012778, "grad_norm": 0.3375566380001534, "learning_rate": 1.6166464272210304e-05, "loss": 0.3577, "step": 4295 }, { "epoch": 0.5278289716181349, "grad_norm": 0.3397029615051577, "learning_rate": 1.6160050351068534e-05, "loss": 0.4126, "step": 4296 }, { "epoch": 0.527951836834992, "grad_norm": 0.38101486036885746, "learning_rate": 1.6153636216543027e-05, "loss": 0.4035, "step": 4297 }, { "epoch": 0.5280747020518491, "grad_norm": 0.3229138539692241, "learning_rate": 1.6147221869813618e-05, "loss": 0.3885, "step": 4298 }, { "epoch": 0.5281975672687063, "grad_norm": 0.3362177326417246, "learning_rate": 1.6140807312060188e-05, "loss": 0.4485, "step": 4299 }, { "epoch": 0.5283204324855634, "grad_norm": 0.30998539361577615, "learning_rate": 1.613439254446265e-05, "loss": 0.3319, "step": 4300 }, { "epoch": 0.5284432977024205, "grad_norm": 0.33963581174209106, "learning_rate": 1.612797756820096e-05, "loss": 0.4027, "step": 4301 }, { "epoch": 0.5285661629192776, "grad_norm": 0.3000841443192372, "learning_rate": 1.612156238445511e-05, "loss": 0.3966, "step": 4302 }, { "epoch": 0.5286890281361346, "grad_norm": 0.39070483026433167, "learning_rate": 1.6115146994405133e-05, "loss": 0.4183, "step": 4303 }, { "epoch": 0.5288118933529917, "grad_norm": 0.35804153141509754, "learning_rate": 1.61087313992311e-05, "loss": 0.4051, "step": 4304 }, { "epoch": 0.5289347585698488, "grad_norm": 0.3935737129956302, "learning_rate": 1.6102315600113117e-05, "loss": 0.3459, "step": 4305 }, { "epoch": 0.529057623786706, "grad_norm": 0.32264078099691784, "learning_rate": 1.6095899598231324e-05, "loss": 0.3817, "step": 4306 }, { "epoch": 0.5291804890035631, "grad_norm": 0.36308829497210915, "learning_rate": 1.6089483394765908e-05, "loss": 0.4534, "step": 4307 }, { "epoch": 0.5293033542204202, "grad_norm": 0.3198120782436122, "learning_rate": 1.6083066990897094e-05, "loss": 0.4335, "step": 4308 }, { "epoch": 0.5294262194372773, "grad_norm": 0.3343572157172234, "learning_rate": 1.607665038780513e-05, "loss": 0.377, "step": 4309 }, { "epoch": 0.5295490846541344, "grad_norm": 0.5035321676880377, "learning_rate": 1.6070233586670297e-05, "loss": 0.4525, "step": 4310 }, { "epoch": 0.5296719498709915, "grad_norm": 0.5050905401908121, "learning_rate": 1.606381658867295e-05, "loss": 0.3879, "step": 4311 }, { "epoch": 0.5297948150878486, "grad_norm": 0.329729522054241, "learning_rate": 1.6057399394993432e-05, "loss": 0.4895, "step": 4312 }, { "epoch": 0.5299176803047058, "grad_norm": 0.32485910449837496, "learning_rate": 1.6050982006812158e-05, "loss": 0.335, "step": 4313 }, { "epoch": 0.5300405455215629, "grad_norm": 0.3741175846736955, "learning_rate": 1.6044564425309555e-05, "loss": 0.363, "step": 4314 }, { "epoch": 0.53016341073842, "grad_norm": 0.3605009914623345, "learning_rate": 1.6038146651666106e-05, "loss": 0.4112, "step": 4315 }, { "epoch": 0.5302862759552771, "grad_norm": 0.36788164395224976, "learning_rate": 1.603172868706231e-05, "loss": 0.3675, "step": 4316 }, { "epoch": 0.5304091411721342, "grad_norm": 0.35577177706097307, "learning_rate": 1.6025310532678713e-05, "loss": 0.3697, "step": 4317 }, { "epoch": 0.5305320063889912, "grad_norm": 0.3437992815659892, "learning_rate": 1.6018892189695893e-05, "loss": 0.2938, "step": 4318 }, { "epoch": 0.5306548716058483, "grad_norm": 0.366530937805977, "learning_rate": 1.6012473659294463e-05, "loss": 0.4236, "step": 4319 }, { "epoch": 0.5307777368227055, "grad_norm": 0.3550636102309216, "learning_rate": 1.6006054942655073e-05, "loss": 0.4483, "step": 4320 }, { "epoch": 0.5309006020395626, "grad_norm": 0.3521642326998801, "learning_rate": 1.5999636040958394e-05, "loss": 0.4029, "step": 4321 }, { "epoch": 0.5310234672564197, "grad_norm": 0.3330920140920521, "learning_rate": 1.5993216955385153e-05, "loss": 0.3423, "step": 4322 }, { "epoch": 0.5311463324732768, "grad_norm": 0.3635321660570284, "learning_rate": 1.598679768711609e-05, "loss": 0.3942, "step": 4323 }, { "epoch": 0.5312691976901339, "grad_norm": 0.3530380393348796, "learning_rate": 1.5980378237331995e-05, "loss": 0.4174, "step": 4324 }, { "epoch": 0.531392062906991, "grad_norm": 0.37208417322945025, "learning_rate": 1.597395860721368e-05, "loss": 0.3537, "step": 4325 }, { "epoch": 0.5315149281238482, "grad_norm": 0.3345485834959426, "learning_rate": 1.5967538797941997e-05, "loss": 0.4233, "step": 4326 }, { "epoch": 0.5316377933407053, "grad_norm": 0.31007398371499373, "learning_rate": 1.5961118810697824e-05, "loss": 0.4055, "step": 4327 }, { "epoch": 0.5317606585575624, "grad_norm": 0.3418666373579472, "learning_rate": 1.5954698646662085e-05, "loss": 0.4009, "step": 4328 }, { "epoch": 0.5318835237744195, "grad_norm": 0.33698025393755887, "learning_rate": 1.5948278307015715e-05, "loss": 0.3721, "step": 4329 }, { "epoch": 0.5320063889912766, "grad_norm": 0.3871464121204454, "learning_rate": 1.5941857792939702e-05, "loss": 0.3781, "step": 4330 }, { "epoch": 0.5321292542081337, "grad_norm": 0.44279943821001616, "learning_rate": 1.593543710561506e-05, "loss": 0.3747, "step": 4331 }, { "epoch": 0.5322521194249907, "grad_norm": 0.3450692436768984, "learning_rate": 1.592901624622282e-05, "loss": 0.3709, "step": 4332 }, { "epoch": 0.5323749846418478, "grad_norm": 0.3574872310162132, "learning_rate": 1.5922595215944072e-05, "loss": 0.3999, "step": 4333 }, { "epoch": 0.532497849858705, "grad_norm": 0.3294240055367606, "learning_rate": 1.591617401595992e-05, "loss": 0.4018, "step": 4334 }, { "epoch": 0.5326207150755621, "grad_norm": 0.37445716761412506, "learning_rate": 1.5909752647451494e-05, "loss": 0.4007, "step": 4335 }, { "epoch": 0.5327435802924192, "grad_norm": 0.4410909140709623, "learning_rate": 1.590333111159997e-05, "loss": 0.3715, "step": 4336 }, { "epoch": 0.5328664455092763, "grad_norm": 0.3198217266866804, "learning_rate": 1.589690940958655e-05, "loss": 0.353, "step": 4337 }, { "epoch": 0.5329893107261334, "grad_norm": 0.2701530526968492, "learning_rate": 1.5890487542592458e-05, "loss": 0.3063, "step": 4338 }, { "epoch": 0.5331121759429905, "grad_norm": 0.3330132664488213, "learning_rate": 1.5884065511798957e-05, "loss": 0.4089, "step": 4339 }, { "epoch": 0.5332350411598477, "grad_norm": 0.38666173288332956, "learning_rate": 1.5877643318387338e-05, "loss": 0.4283, "step": 4340 }, { "epoch": 0.5333579063767048, "grad_norm": 0.332355868175248, "learning_rate": 1.5871220963538927e-05, "loss": 0.3975, "step": 4341 }, { "epoch": 0.5334807715935619, "grad_norm": 0.329600273315027, "learning_rate": 1.5864798448435064e-05, "loss": 0.4266, "step": 4342 }, { "epoch": 0.533603636810419, "grad_norm": 0.3167081632305136, "learning_rate": 1.5858375774257136e-05, "loss": 0.4408, "step": 4343 }, { "epoch": 0.5337265020272761, "grad_norm": 0.3408027745286259, "learning_rate": 1.585195294218655e-05, "loss": 0.4211, "step": 4344 }, { "epoch": 0.5338493672441332, "grad_norm": 0.3406227058921208, "learning_rate": 1.584552995340475e-05, "loss": 0.4017, "step": 4345 }, { "epoch": 0.5339722324609903, "grad_norm": 0.3726490707133484, "learning_rate": 1.58391068090932e-05, "loss": 0.4407, "step": 4346 }, { "epoch": 0.5340950976778474, "grad_norm": 0.3847110357779319, "learning_rate": 1.5832683510433393e-05, "loss": 0.3734, "step": 4347 }, { "epoch": 0.5342179628947045, "grad_norm": 0.34874469049267043, "learning_rate": 1.582626005860685e-05, "loss": 0.339, "step": 4348 }, { "epoch": 0.5343408281115616, "grad_norm": 0.31239973089160783, "learning_rate": 1.581983645479513e-05, "loss": 0.3808, "step": 4349 }, { "epoch": 0.5344636933284187, "grad_norm": 0.40496677230555633, "learning_rate": 1.581341270017981e-05, "loss": 0.3685, "step": 4350 }, { "epoch": 0.5345865585452758, "grad_norm": 0.3483986757974261, "learning_rate": 1.5806988795942495e-05, "loss": 0.4381, "step": 4351 }, { "epoch": 0.5347094237621329, "grad_norm": 0.34146964443609046, "learning_rate": 1.580056474326483e-05, "loss": 0.3828, "step": 4352 }, { "epoch": 0.53483228897899, "grad_norm": 0.3609959538023517, "learning_rate": 1.5794140543328472e-05, "loss": 0.3585, "step": 4353 }, { "epoch": 0.5349551541958472, "grad_norm": 0.3658342421727669, "learning_rate": 1.5787716197315107e-05, "loss": 0.3907, "step": 4354 }, { "epoch": 0.5350780194127043, "grad_norm": 0.3635380569411021, "learning_rate": 1.578129170640646e-05, "loss": 0.4042, "step": 4355 }, { "epoch": 0.5352008846295614, "grad_norm": 0.31582391114934416, "learning_rate": 1.5774867071784274e-05, "loss": 0.3995, "step": 4356 }, { "epoch": 0.5353237498464185, "grad_norm": 0.36385725125699, "learning_rate": 1.5768442294630312e-05, "loss": 0.3192, "step": 4357 }, { "epoch": 0.5354466150632756, "grad_norm": 0.37769825818980796, "learning_rate": 1.5762017376126372e-05, "loss": 0.4229, "step": 4358 }, { "epoch": 0.5355694802801327, "grad_norm": 0.3367346577332724, "learning_rate": 1.5755592317454278e-05, "loss": 0.3657, "step": 4359 }, { "epoch": 0.5356923454969899, "grad_norm": 0.33482429907146555, "learning_rate": 1.5749167119795878e-05, "loss": 0.3751, "step": 4360 }, { "epoch": 0.5358152107138469, "grad_norm": 0.5036598239741914, "learning_rate": 1.574274178433304e-05, "loss": 0.4188, "step": 4361 }, { "epoch": 0.535938075930704, "grad_norm": 0.40280964223450455, "learning_rate": 1.5736316312247675e-05, "loss": 0.4183, "step": 4362 }, { "epoch": 0.5360609411475611, "grad_norm": 0.30569419924725894, "learning_rate": 1.5729890704721698e-05, "loss": 0.3531, "step": 4363 }, { "epoch": 0.5361838063644182, "grad_norm": 0.37214336959615896, "learning_rate": 1.572346496293706e-05, "loss": 0.3474, "step": 4364 }, { "epoch": 0.5363066715812753, "grad_norm": 0.32985644427785926, "learning_rate": 1.5717039088075728e-05, "loss": 0.353, "step": 4365 }, { "epoch": 0.5364295367981324, "grad_norm": 0.5006314144888484, "learning_rate": 1.5710613081319714e-05, "loss": 0.4492, "step": 4366 }, { "epoch": 0.5365524020149895, "grad_norm": 0.340068236593571, "learning_rate": 1.5704186943851025e-05, "loss": 0.414, "step": 4367 }, { "epoch": 0.5366752672318467, "grad_norm": 0.3295097225020102, "learning_rate": 1.5697760676851717e-05, "loss": 0.3499, "step": 4368 }, { "epoch": 0.5367981324487038, "grad_norm": 0.3707757116613846, "learning_rate": 1.5691334281503858e-05, "loss": 0.3732, "step": 4369 }, { "epoch": 0.5369209976655609, "grad_norm": 0.3663107638944122, "learning_rate": 1.5684907758989543e-05, "loss": 0.3582, "step": 4370 }, { "epoch": 0.537043862882418, "grad_norm": 0.3550842841175729, "learning_rate": 1.567848111049088e-05, "loss": 0.3396, "step": 4371 }, { "epoch": 0.5371667280992751, "grad_norm": 0.360425545528446, "learning_rate": 1.5672054337190026e-05, "loss": 0.3988, "step": 4372 }, { "epoch": 0.5372895933161322, "grad_norm": 0.3385139659773567, "learning_rate": 1.5665627440269134e-05, "loss": 0.3379, "step": 4373 }, { "epoch": 0.5374124585329894, "grad_norm": 0.3130240359735094, "learning_rate": 1.565920042091039e-05, "loss": 0.3425, "step": 4374 }, { "epoch": 0.5375353237498465, "grad_norm": 0.36345942009783266, "learning_rate": 1.5652773280296002e-05, "loss": 0.3633, "step": 4375 }, { "epoch": 0.5376581889667035, "grad_norm": 0.425039429563216, "learning_rate": 1.5646346019608205e-05, "loss": 0.3786, "step": 4376 }, { "epoch": 0.5377810541835606, "grad_norm": 0.31176462768513735, "learning_rate": 1.5639918640029247e-05, "loss": 0.4031, "step": 4377 }, { "epoch": 0.5379039194004177, "grad_norm": 0.4087549350234126, "learning_rate": 1.5633491142741403e-05, "loss": 0.4416, "step": 4378 }, { "epoch": 0.5380267846172748, "grad_norm": 0.35892609947684706, "learning_rate": 1.5627063528926973e-05, "loss": 0.4433, "step": 4379 }, { "epoch": 0.5381496498341319, "grad_norm": 0.3518036370611161, "learning_rate": 1.562063579976828e-05, "loss": 0.3819, "step": 4380 }, { "epoch": 0.538272515050989, "grad_norm": 0.35305715584798836, "learning_rate": 1.561420795644765e-05, "loss": 0.445, "step": 4381 }, { "epoch": 0.5383953802678462, "grad_norm": 0.4554864092093423, "learning_rate": 1.560778000014745e-05, "loss": 0.3975, "step": 4382 }, { "epoch": 0.5385182454847033, "grad_norm": 0.3521678643242537, "learning_rate": 1.5601351932050063e-05, "loss": 0.3693, "step": 4383 }, { "epoch": 0.5386411107015604, "grad_norm": 0.36898055033182986, "learning_rate": 1.5594923753337884e-05, "loss": 0.4343, "step": 4384 }, { "epoch": 0.5387639759184175, "grad_norm": 0.39205315226219906, "learning_rate": 1.5588495465193345e-05, "loss": 0.4168, "step": 4385 }, { "epoch": 0.5388868411352746, "grad_norm": 0.5316353323926366, "learning_rate": 1.5582067068798873e-05, "loss": 0.4322, "step": 4386 }, { "epoch": 0.5390097063521317, "grad_norm": 0.33721777894731786, "learning_rate": 1.557563856533695e-05, "loss": 0.3786, "step": 4387 }, { "epoch": 0.5391325715689889, "grad_norm": 0.3253889857506689, "learning_rate": 1.5569209955990036e-05, "loss": 0.4105, "step": 4388 }, { "epoch": 0.539255436785846, "grad_norm": 0.3509302848158799, "learning_rate": 1.5562781241940647e-05, "loss": 0.3613, "step": 4389 }, { "epoch": 0.539378302002703, "grad_norm": 0.32101552089245833, "learning_rate": 1.5556352424371294e-05, "loss": 0.4135, "step": 4390 }, { "epoch": 0.5395011672195601, "grad_norm": 0.37263537926172907, "learning_rate": 1.5549923504464527e-05, "loss": 0.4129, "step": 4391 }, { "epoch": 0.5396240324364172, "grad_norm": 0.28412772093451133, "learning_rate": 1.5543494483402894e-05, "loss": 0.3775, "step": 4392 }, { "epoch": 0.5397468976532743, "grad_norm": 0.3734894460223575, "learning_rate": 1.5537065362368977e-05, "loss": 0.4195, "step": 4393 }, { "epoch": 0.5398697628701314, "grad_norm": 0.444104820543159, "learning_rate": 1.553063614254537e-05, "loss": 0.4242, "step": 4394 }, { "epoch": 0.5399926280869886, "grad_norm": 0.3892572184483615, "learning_rate": 1.5524206825114685e-05, "loss": 0.3605, "step": 4395 }, { "epoch": 0.5401154933038457, "grad_norm": 0.45609649916949624, "learning_rate": 1.551777741125955e-05, "loss": 0.3781, "step": 4396 }, { "epoch": 0.5402383585207028, "grad_norm": 0.38491955733639566, "learning_rate": 1.5511347902162622e-05, "loss": 0.3595, "step": 4397 }, { "epoch": 0.5403612237375599, "grad_norm": 0.4663256801906011, "learning_rate": 1.5504918299006564e-05, "loss": 0.4262, "step": 4398 }, { "epoch": 0.540484088954417, "grad_norm": 0.39726433907649406, "learning_rate": 1.549848860297406e-05, "loss": 0.3614, "step": 4399 }, { "epoch": 0.5406069541712741, "grad_norm": 0.3366325688173977, "learning_rate": 1.5492058815247804e-05, "loss": 0.4895, "step": 4400 }, { "epoch": 0.5407298193881312, "grad_norm": 0.42173672898194486, "learning_rate": 1.548562893701053e-05, "loss": 0.4574, "step": 4401 }, { "epoch": 0.5408526846049884, "grad_norm": 0.40675645386774545, "learning_rate": 1.5479198969444956e-05, "loss": 0.4424, "step": 4402 }, { "epoch": 0.5409755498218455, "grad_norm": 0.38995387673169, "learning_rate": 1.547276891373384e-05, "loss": 0.4728, "step": 4403 }, { "epoch": 0.5410984150387026, "grad_norm": 0.36269237742314875, "learning_rate": 1.546633877105995e-05, "loss": 0.3311, "step": 4404 }, { "epoch": 0.5412212802555596, "grad_norm": 0.305598938620414, "learning_rate": 1.5459908542606066e-05, "loss": 0.386, "step": 4405 }, { "epoch": 0.5413441454724167, "grad_norm": 0.3998053708186553, "learning_rate": 1.545347822955499e-05, "loss": 0.4531, "step": 4406 }, { "epoch": 0.5414670106892738, "grad_norm": 0.40220046444636254, "learning_rate": 1.544704783308953e-05, "loss": 0.3991, "step": 4407 }, { "epoch": 0.541589875906131, "grad_norm": 0.38638785038206463, "learning_rate": 1.5440617354392526e-05, "loss": 0.3652, "step": 4408 }, { "epoch": 0.5417127411229881, "grad_norm": 0.42802121736010823, "learning_rate": 1.5434186794646813e-05, "loss": 0.4283, "step": 4409 }, { "epoch": 0.5418356063398452, "grad_norm": 0.36537962777293764, "learning_rate": 1.5427756155035257e-05, "loss": 0.36, "step": 4410 }, { "epoch": 0.5419584715567023, "grad_norm": 0.3155269667643227, "learning_rate": 1.5421325436740734e-05, "loss": 0.4035, "step": 4411 }, { "epoch": 0.5420813367735594, "grad_norm": 0.3229780465646929, "learning_rate": 1.5414894640946122e-05, "loss": 0.4004, "step": 4412 }, { "epoch": 0.5422042019904165, "grad_norm": 0.3377402446067495, "learning_rate": 1.5408463768834336e-05, "loss": 0.4025, "step": 4413 }, { "epoch": 0.5423270672072736, "grad_norm": 0.2827561864814251, "learning_rate": 1.5402032821588288e-05, "loss": 0.3284, "step": 4414 }, { "epoch": 0.5424499324241308, "grad_norm": 0.33927928965630544, "learning_rate": 1.5395601800390907e-05, "loss": 0.4308, "step": 4415 }, { "epoch": 0.5425727976409879, "grad_norm": 0.3368760935981452, "learning_rate": 1.5389170706425142e-05, "loss": 0.3546, "step": 4416 }, { "epoch": 0.542695662857845, "grad_norm": 0.2774058523429353, "learning_rate": 1.538273954087395e-05, "loss": 0.3792, "step": 4417 }, { "epoch": 0.5428185280747021, "grad_norm": 0.342574445235076, "learning_rate": 1.5376308304920303e-05, "loss": 0.3115, "step": 4418 }, { "epoch": 0.5429413932915591, "grad_norm": 0.32662090650099496, "learning_rate": 1.536987699974718e-05, "loss": 0.3784, "step": 4419 }, { "epoch": 0.5430642585084162, "grad_norm": 0.40810078145146994, "learning_rate": 1.536344562653759e-05, "loss": 0.3511, "step": 4420 }, { "epoch": 0.5431871237252733, "grad_norm": 0.35809198891998817, "learning_rate": 1.5357014186474527e-05, "loss": 0.3003, "step": 4421 }, { "epoch": 0.5433099889421305, "grad_norm": 0.3745499838079226, "learning_rate": 1.5350582680741022e-05, "loss": 0.3904, "step": 4422 }, { "epoch": 0.5434328541589876, "grad_norm": 0.38390957268276965, "learning_rate": 1.5344151110520104e-05, "loss": 0.4252, "step": 4423 }, { "epoch": 0.5435557193758447, "grad_norm": 0.41539340603500513, "learning_rate": 1.533771947699482e-05, "loss": 0.4328, "step": 4424 }, { "epoch": 0.5436785845927018, "grad_norm": 0.5212433206973731, "learning_rate": 1.5331287781348234e-05, "loss": 0.3809, "step": 4425 }, { "epoch": 0.5438014498095589, "grad_norm": 0.28901413400385756, "learning_rate": 1.53248560247634e-05, "loss": 0.4297, "step": 4426 }, { "epoch": 0.543924315026416, "grad_norm": 0.394553115550976, "learning_rate": 1.5318424208423415e-05, "loss": 0.3583, "step": 4427 }, { "epoch": 0.5440471802432731, "grad_norm": 0.3303378474636741, "learning_rate": 1.531199233351136e-05, "loss": 0.4493, "step": 4428 }, { "epoch": 0.5441700454601303, "grad_norm": 0.354219704614018, "learning_rate": 1.5305560401210337e-05, "loss": 0.3547, "step": 4429 }, { "epoch": 0.5442929106769874, "grad_norm": 0.3195395605534519, "learning_rate": 1.5299128412703465e-05, "loss": 0.3247, "step": 4430 }, { "epoch": 0.5444157758938445, "grad_norm": 0.32914541567657823, "learning_rate": 1.5292696369173858e-05, "loss": 0.4802, "step": 4431 }, { "epoch": 0.5445386411107016, "grad_norm": 0.3626331052194106, "learning_rate": 1.5286264271804648e-05, "loss": 0.3927, "step": 4432 }, { "epoch": 0.5446615063275587, "grad_norm": 0.3996309477041614, "learning_rate": 1.5279832121778987e-05, "loss": 0.3742, "step": 4433 }, { "epoch": 0.5447843715444157, "grad_norm": 0.3509631801614867, "learning_rate": 1.527339992028002e-05, "loss": 0.4074, "step": 4434 }, { "epoch": 0.5449072367612728, "grad_norm": 0.30148213082970643, "learning_rate": 1.5266967668490912e-05, "loss": 0.3557, "step": 4435 }, { "epoch": 0.54503010197813, "grad_norm": 0.3919687396744926, "learning_rate": 1.526053536759483e-05, "loss": 0.3909, "step": 4436 }, { "epoch": 0.5451529671949871, "grad_norm": 0.3860457408543303, "learning_rate": 1.525410301877496e-05, "loss": 0.3704, "step": 4437 }, { "epoch": 0.5452758324118442, "grad_norm": 0.3112948540064382, "learning_rate": 1.5247670623214484e-05, "loss": 0.3468, "step": 4438 }, { "epoch": 0.5453986976287013, "grad_norm": 0.3117276724067575, "learning_rate": 1.5241238182096606e-05, "loss": 0.3288, "step": 4439 }, { "epoch": 0.5455215628455584, "grad_norm": 0.33081321721807927, "learning_rate": 1.5234805696604531e-05, "loss": 0.4159, "step": 4440 }, { "epoch": 0.5456444280624155, "grad_norm": 0.42442018610697463, "learning_rate": 1.5228373167921469e-05, "loss": 0.3638, "step": 4441 }, { "epoch": 0.5457672932792726, "grad_norm": 0.44354529201018233, "learning_rate": 1.5221940597230639e-05, "loss": 0.4741, "step": 4442 }, { "epoch": 0.5458901584961298, "grad_norm": 0.30900080999086676, "learning_rate": 1.5215507985715283e-05, "loss": 0.3902, "step": 4443 }, { "epoch": 0.5460130237129869, "grad_norm": 0.3576614550351199, "learning_rate": 1.5209075334558625e-05, "loss": 0.3293, "step": 4444 }, { "epoch": 0.546135888929844, "grad_norm": 0.39555067790970766, "learning_rate": 1.5202642644943914e-05, "loss": 0.388, "step": 4445 }, { "epoch": 0.5462587541467011, "grad_norm": 0.3462294346786759, "learning_rate": 1.5196209918054408e-05, "loss": 0.4609, "step": 4446 }, { "epoch": 0.5463816193635582, "grad_norm": 0.32093615873935494, "learning_rate": 1.5189777155073354e-05, "loss": 0.3425, "step": 4447 }, { "epoch": 0.5465044845804153, "grad_norm": 0.34838256784605975, "learning_rate": 1.5183344357184032e-05, "loss": 0.4013, "step": 4448 }, { "epoch": 0.5466273497972723, "grad_norm": 0.4209650383966787, "learning_rate": 1.5176911525569699e-05, "loss": 0.4055, "step": 4449 }, { "epoch": 0.5467502150141295, "grad_norm": 0.3134698130039524, "learning_rate": 1.517047866141364e-05, "loss": 0.3398, "step": 4450 }, { "epoch": 0.5468730802309866, "grad_norm": 0.3332746210814233, "learning_rate": 1.5164045765899133e-05, "loss": 0.3562, "step": 4451 }, { "epoch": 0.5469959454478437, "grad_norm": 0.31964282933581223, "learning_rate": 1.5157612840209477e-05, "loss": 0.3716, "step": 4452 }, { "epoch": 0.5471188106647008, "grad_norm": 0.39096713142366574, "learning_rate": 1.5151179885527954e-05, "loss": 0.4106, "step": 4453 }, { "epoch": 0.5472416758815579, "grad_norm": 0.31271836828571603, "learning_rate": 1.5144746903037876e-05, "loss": 0.3808, "step": 4454 }, { "epoch": 0.547364541098415, "grad_norm": 0.3233751358127947, "learning_rate": 1.5138313893922542e-05, "loss": 0.4107, "step": 4455 }, { "epoch": 0.5474874063152722, "grad_norm": 0.3493639422193956, "learning_rate": 1.5131880859365268e-05, "loss": 0.4542, "step": 4456 }, { "epoch": 0.5476102715321293, "grad_norm": 0.39615394553815125, "learning_rate": 1.5125447800549357e-05, "loss": 0.3786, "step": 4457 }, { "epoch": 0.5477331367489864, "grad_norm": 0.31779683823559846, "learning_rate": 1.5119014718658147e-05, "loss": 0.3735, "step": 4458 }, { "epoch": 0.5478560019658435, "grad_norm": 0.309557007128385, "learning_rate": 1.5112581614874946e-05, "loss": 0.385, "step": 4459 }, { "epoch": 0.5479788671827006, "grad_norm": 0.3689206750834069, "learning_rate": 1.5106148490383091e-05, "loss": 0.4489, "step": 4460 }, { "epoch": 0.5481017323995577, "grad_norm": 0.4315110135101682, "learning_rate": 1.5099715346365902e-05, "loss": 0.4815, "step": 4461 }, { "epoch": 0.5482245976164148, "grad_norm": 0.4016503577332414, "learning_rate": 1.5093282184006728e-05, "loss": 0.4139, "step": 4462 }, { "epoch": 0.5483474628332718, "grad_norm": 0.34063806903382327, "learning_rate": 1.5086849004488897e-05, "loss": 0.4143, "step": 4463 }, { "epoch": 0.548470328050129, "grad_norm": 0.4091599176506104, "learning_rate": 1.508041580899576e-05, "loss": 0.4543, "step": 4464 }, { "epoch": 0.5485931932669861, "grad_norm": 0.33729724420634627, "learning_rate": 1.507398259871065e-05, "loss": 0.4743, "step": 4465 }, { "epoch": 0.5487160584838432, "grad_norm": 0.32098870997477086, "learning_rate": 1.5067549374816924e-05, "loss": 0.4582, "step": 4466 }, { "epoch": 0.5488389237007003, "grad_norm": 0.38092099913556615, "learning_rate": 1.506111613849793e-05, "loss": 0.3918, "step": 4467 }, { "epoch": 0.5489617889175574, "grad_norm": 0.2896715244902122, "learning_rate": 1.5054682890937019e-05, "loss": 0.3728, "step": 4468 }, { "epoch": 0.5490846541344145, "grad_norm": 0.3666800452132602, "learning_rate": 1.5048249633317546e-05, "loss": 0.3715, "step": 4469 }, { "epoch": 0.5492075193512717, "grad_norm": 0.4408358460298106, "learning_rate": 1.5041816366822859e-05, "loss": 0.3672, "step": 4470 }, { "epoch": 0.5493303845681288, "grad_norm": 0.3981019156978647, "learning_rate": 1.503538309263633e-05, "loss": 0.4073, "step": 4471 }, { "epoch": 0.5494532497849859, "grad_norm": 0.33284679100753767, "learning_rate": 1.5028949811941304e-05, "loss": 0.4125, "step": 4472 }, { "epoch": 0.549576115001843, "grad_norm": 0.32948577452837235, "learning_rate": 1.5022516525921152e-05, "loss": 0.3693, "step": 4473 }, { "epoch": 0.5496989802187001, "grad_norm": 0.35569877666876426, "learning_rate": 1.5016083235759227e-05, "loss": 0.4202, "step": 4474 }, { "epoch": 0.5498218454355572, "grad_norm": 0.33371941451769693, "learning_rate": 1.5009649942638901e-05, "loss": 0.3866, "step": 4475 }, { "epoch": 0.5499447106524143, "grad_norm": 0.38503308849755713, "learning_rate": 1.5003216647743528e-05, "loss": 0.3598, "step": 4476 }, { "epoch": 0.5500675758692715, "grad_norm": 0.3249055791968851, "learning_rate": 1.4996783352256473e-05, "loss": 0.3647, "step": 4477 }, { "epoch": 0.5501904410861285, "grad_norm": 0.3429929068043299, "learning_rate": 1.4990350057361101e-05, "loss": 0.3811, "step": 4478 }, { "epoch": 0.5503133063029856, "grad_norm": 0.3914917188345512, "learning_rate": 1.4983916764240773e-05, "loss": 0.4032, "step": 4479 }, { "epoch": 0.5504361715198427, "grad_norm": 0.4415361825359352, "learning_rate": 1.4977483474078852e-05, "loss": 0.3769, "step": 4480 }, { "epoch": 0.5505590367366998, "grad_norm": 0.31946360037558214, "learning_rate": 1.4971050188058697e-05, "loss": 0.3793, "step": 4481 }, { "epoch": 0.5506819019535569, "grad_norm": 0.3719507754413011, "learning_rate": 1.4964616907363675e-05, "loss": 0.355, "step": 4482 }, { "epoch": 0.550804767170414, "grad_norm": 0.36938490887052655, "learning_rate": 1.4958183633177142e-05, "loss": 0.3729, "step": 4483 }, { "epoch": 0.5509276323872712, "grad_norm": 0.3527792732441293, "learning_rate": 1.4951750366682462e-05, "loss": 0.3526, "step": 4484 }, { "epoch": 0.5510504976041283, "grad_norm": 0.35683094036937524, "learning_rate": 1.4945317109062985e-05, "loss": 0.3417, "step": 4485 }, { "epoch": 0.5511733628209854, "grad_norm": 0.33251806561271463, "learning_rate": 1.4938883861502073e-05, "loss": 0.4105, "step": 4486 }, { "epoch": 0.5512962280378425, "grad_norm": 0.4395408948407631, "learning_rate": 1.493245062518308e-05, "loss": 0.4806, "step": 4487 }, { "epoch": 0.5514190932546996, "grad_norm": 0.3594805985614231, "learning_rate": 1.4926017401289349e-05, "loss": 0.445, "step": 4488 }, { "epoch": 0.5515419584715567, "grad_norm": 0.3243185046785306, "learning_rate": 1.4919584191004244e-05, "loss": 0.4327, "step": 4489 }, { "epoch": 0.5516648236884139, "grad_norm": 0.3339991137167613, "learning_rate": 1.4913150995511104e-05, "loss": 0.427, "step": 4490 }, { "epoch": 0.551787688905271, "grad_norm": 0.32436240598102334, "learning_rate": 1.4906717815993278e-05, "loss": 0.4322, "step": 4491 }, { "epoch": 0.551910554122128, "grad_norm": 0.3532644726507897, "learning_rate": 1.4900284653634095e-05, "loss": 0.4201, "step": 4492 }, { "epoch": 0.5520334193389851, "grad_norm": 0.3522742922517666, "learning_rate": 1.4893851509616913e-05, "loss": 0.3963, "step": 4493 }, { "epoch": 0.5521562845558422, "grad_norm": 0.37815347529033316, "learning_rate": 1.4887418385125056e-05, "loss": 0.4301, "step": 4494 }, { "epoch": 0.5522791497726993, "grad_norm": 0.359724293685289, "learning_rate": 1.4880985281341855e-05, "loss": 0.4536, "step": 4495 }, { "epoch": 0.5524020149895564, "grad_norm": 0.34587755918343976, "learning_rate": 1.487455219945064e-05, "loss": 0.3519, "step": 4496 }, { "epoch": 0.5525248802064135, "grad_norm": 0.37581888075772996, "learning_rate": 1.4868119140634736e-05, "loss": 0.4014, "step": 4497 }, { "epoch": 0.5526477454232707, "grad_norm": 0.29922738152682693, "learning_rate": 1.4861686106077462e-05, "loss": 0.3805, "step": 4498 }, { "epoch": 0.5527706106401278, "grad_norm": 0.46009848935984987, "learning_rate": 1.485525309696213e-05, "loss": 0.4334, "step": 4499 }, { "epoch": 0.5528934758569849, "grad_norm": 0.4546956281824456, "learning_rate": 1.4848820114472045e-05, "loss": 0.4128, "step": 4500 }, { "epoch": 0.553016341073842, "grad_norm": 0.5238521253830808, "learning_rate": 1.4842387159790527e-05, "loss": 0.4923, "step": 4501 }, { "epoch": 0.5531392062906991, "grad_norm": 0.38382303302468873, "learning_rate": 1.483595423410087e-05, "loss": 0.3892, "step": 4502 }, { "epoch": 0.5532620715075562, "grad_norm": 0.37461693012361497, "learning_rate": 1.4829521338586367e-05, "loss": 0.3859, "step": 4503 }, { "epoch": 0.5533849367244134, "grad_norm": 0.35122233808787234, "learning_rate": 1.4823088474430304e-05, "loss": 0.3688, "step": 4504 }, { "epoch": 0.5535078019412705, "grad_norm": 0.42005848426173953, "learning_rate": 1.4816655642815972e-05, "loss": 0.4484, "step": 4505 }, { "epoch": 0.5536306671581276, "grad_norm": 0.3374088677636554, "learning_rate": 1.4810222844926647e-05, "loss": 0.3906, "step": 4506 }, { "epoch": 0.5537535323749846, "grad_norm": 0.4083628244345953, "learning_rate": 1.4803790081945597e-05, "loss": 0.4432, "step": 4507 }, { "epoch": 0.5538763975918417, "grad_norm": 0.33025727796911486, "learning_rate": 1.4797357355056085e-05, "loss": 0.3357, "step": 4508 }, { "epoch": 0.5539992628086988, "grad_norm": 0.42327262614804934, "learning_rate": 1.4790924665441379e-05, "loss": 0.4568, "step": 4509 }, { "epoch": 0.5541221280255559, "grad_norm": 0.333316550092702, "learning_rate": 1.4784492014284723e-05, "loss": 0.3702, "step": 4510 }, { "epoch": 0.554244993242413, "grad_norm": 0.3463427929671176, "learning_rate": 1.4778059402769358e-05, "loss": 0.4178, "step": 4511 }, { "epoch": 0.5543678584592702, "grad_norm": 0.36592452157249006, "learning_rate": 1.4771626832078534e-05, "loss": 0.3721, "step": 4512 }, { "epoch": 0.5544907236761273, "grad_norm": 0.3503810746336398, "learning_rate": 1.4765194303395473e-05, "loss": 0.4123, "step": 4513 }, { "epoch": 0.5546135888929844, "grad_norm": 0.3801410996141769, "learning_rate": 1.4758761817903396e-05, "loss": 0.405, "step": 4514 }, { "epoch": 0.5547364541098415, "grad_norm": 0.31846144963478296, "learning_rate": 1.4752329376785516e-05, "loss": 0.4074, "step": 4515 }, { "epoch": 0.5548593193266986, "grad_norm": 0.4082208891925979, "learning_rate": 1.4745896981225043e-05, "loss": 0.4471, "step": 4516 }, { "epoch": 0.5549821845435557, "grad_norm": 0.33288757571680233, "learning_rate": 1.4739464632405173e-05, "loss": 0.3642, "step": 4517 }, { "epoch": 0.5551050497604129, "grad_norm": 0.3697535449132459, "learning_rate": 1.4733032331509094e-05, "loss": 0.3863, "step": 4518 }, { "epoch": 0.55522791497727, "grad_norm": 0.3045606111231536, "learning_rate": 1.472660007971998e-05, "loss": 0.3812, "step": 4519 }, { "epoch": 0.5553507801941271, "grad_norm": 0.3077020917314486, "learning_rate": 1.4720167878221014e-05, "loss": 0.3756, "step": 4520 }, { "epoch": 0.5554736454109841, "grad_norm": 0.3118783173439604, "learning_rate": 1.4713735728195353e-05, "loss": 0.3664, "step": 4521 }, { "epoch": 0.5555965106278412, "grad_norm": 0.34505458869692024, "learning_rate": 1.4707303630826148e-05, "loss": 0.3435, "step": 4522 }, { "epoch": 0.5557193758446983, "grad_norm": 0.3217952107729692, "learning_rate": 1.4700871587296539e-05, "loss": 0.5053, "step": 4523 }, { "epoch": 0.5558422410615554, "grad_norm": 0.31916061526819944, "learning_rate": 1.4694439598789664e-05, "loss": 0.3817, "step": 4524 }, { "epoch": 0.5559651062784126, "grad_norm": 0.40397957028922554, "learning_rate": 1.4688007666488645e-05, "loss": 0.3984, "step": 4525 }, { "epoch": 0.5560879714952697, "grad_norm": 0.33333345662088854, "learning_rate": 1.468157579157659e-05, "loss": 0.3946, "step": 4526 }, { "epoch": 0.5562108367121268, "grad_norm": 0.34808896475107115, "learning_rate": 1.4675143975236599e-05, "loss": 0.3474, "step": 4527 }, { "epoch": 0.5563337019289839, "grad_norm": 0.3693277624805928, "learning_rate": 1.4668712218651772e-05, "loss": 0.4031, "step": 4528 }, { "epoch": 0.556456567145841, "grad_norm": 0.3829788080066468, "learning_rate": 1.4662280523005185e-05, "loss": 0.4224, "step": 4529 }, { "epoch": 0.5565794323626981, "grad_norm": 0.32530331195623263, "learning_rate": 1.4655848889479897e-05, "loss": 0.3586, "step": 4530 }, { "epoch": 0.5567022975795552, "grad_norm": 0.32105368941981927, "learning_rate": 1.4649417319258982e-05, "loss": 0.3341, "step": 4531 }, { "epoch": 0.5568251627964124, "grad_norm": 0.42650010719508064, "learning_rate": 1.4642985813525477e-05, "loss": 0.4858, "step": 4532 }, { "epoch": 0.5569480280132695, "grad_norm": 0.3755114448902635, "learning_rate": 1.4636554373462416e-05, "loss": 0.4732, "step": 4533 }, { "epoch": 0.5570708932301266, "grad_norm": 0.4877099575272311, "learning_rate": 1.463012300025282e-05, "loss": 0.5181, "step": 4534 }, { "epoch": 0.5571937584469837, "grad_norm": 0.4804922066663376, "learning_rate": 1.4623691695079698e-05, "loss": 0.4598, "step": 4535 }, { "epoch": 0.5573166236638407, "grad_norm": 0.3961892968588918, "learning_rate": 1.4617260459126053e-05, "loss": 0.4782, "step": 4536 }, { "epoch": 0.5574394888806978, "grad_norm": 0.29629942424551514, "learning_rate": 1.461082929357486e-05, "loss": 0.3548, "step": 4537 }, { "epoch": 0.557562354097555, "grad_norm": 0.3082523742825957, "learning_rate": 1.4604398199609092e-05, "loss": 0.3709, "step": 4538 }, { "epoch": 0.5576852193144121, "grad_norm": 0.3062877421942766, "learning_rate": 1.4597967178411715e-05, "loss": 0.3888, "step": 4539 }, { "epoch": 0.5578080845312692, "grad_norm": 0.3508937604702014, "learning_rate": 1.4591536231165668e-05, "loss": 0.4625, "step": 4540 }, { "epoch": 0.5579309497481263, "grad_norm": 0.36920124830675066, "learning_rate": 1.4585105359053882e-05, "loss": 0.3893, "step": 4541 }, { "epoch": 0.5580538149649834, "grad_norm": 0.24569185425493759, "learning_rate": 1.4578674563259272e-05, "loss": 0.425, "step": 4542 }, { "epoch": 0.5581766801818405, "grad_norm": 0.3726704548213319, "learning_rate": 1.4572243844964745e-05, "loss": 0.4128, "step": 4543 }, { "epoch": 0.5582995453986976, "grad_norm": 0.36365271899140983, "learning_rate": 1.4565813205353191e-05, "loss": 0.4408, "step": 4544 }, { "epoch": 0.5584224106155548, "grad_norm": 0.28069082206347495, "learning_rate": 1.455938264560748e-05, "loss": 0.3866, "step": 4545 }, { "epoch": 0.5585452758324119, "grad_norm": 0.41930441845650657, "learning_rate": 1.455295216691047e-05, "loss": 0.4502, "step": 4546 }, { "epoch": 0.558668141049269, "grad_norm": 0.37185663434878485, "learning_rate": 1.4546521770445014e-05, "loss": 0.3622, "step": 4547 }, { "epoch": 0.5587910062661261, "grad_norm": 0.41130977230601173, "learning_rate": 1.4540091457393938e-05, "loss": 0.3629, "step": 4548 }, { "epoch": 0.5589138714829832, "grad_norm": 0.3551112538896807, "learning_rate": 1.4533661228940056e-05, "loss": 0.3864, "step": 4549 }, { "epoch": 0.5590367366998402, "grad_norm": 0.3755864412216793, "learning_rate": 1.452723108626616e-05, "loss": 0.3722, "step": 4550 }, { "epoch": 0.5591596019166973, "grad_norm": 0.30475240857880864, "learning_rate": 1.4520801030555044e-05, "loss": 0.4373, "step": 4551 }, { "epoch": 0.5592824671335545, "grad_norm": 0.3721086907493918, "learning_rate": 1.4514371062989473e-05, "loss": 0.4245, "step": 4552 }, { "epoch": 0.5594053323504116, "grad_norm": 0.36109608648354496, "learning_rate": 1.4507941184752195e-05, "loss": 0.4046, "step": 4553 }, { "epoch": 0.5595281975672687, "grad_norm": 0.33184479206030537, "learning_rate": 1.4501511397025943e-05, "loss": 0.3897, "step": 4554 }, { "epoch": 0.5596510627841258, "grad_norm": 0.35071110760321306, "learning_rate": 1.449508170099344e-05, "loss": 0.3076, "step": 4555 }, { "epoch": 0.5597739280009829, "grad_norm": 0.3783176642674468, "learning_rate": 1.4488652097837384e-05, "loss": 0.4038, "step": 4556 }, { "epoch": 0.55989679321784, "grad_norm": 0.33227545017361165, "learning_rate": 1.4482222588740448e-05, "loss": 0.4502, "step": 4557 }, { "epoch": 0.5600196584346971, "grad_norm": 0.3692175922535993, "learning_rate": 1.447579317488532e-05, "loss": 0.4073, "step": 4558 }, { "epoch": 0.5601425236515543, "grad_norm": 0.4121152880488028, "learning_rate": 1.4469363857454635e-05, "loss": 0.4548, "step": 4559 }, { "epoch": 0.5602653888684114, "grad_norm": 0.3756178622803251, "learning_rate": 1.4462934637631027e-05, "loss": 0.3796, "step": 4560 }, { "epoch": 0.5603882540852685, "grad_norm": 0.39417169430339316, "learning_rate": 1.4456505516597107e-05, "loss": 0.3485, "step": 4561 }, { "epoch": 0.5605111193021256, "grad_norm": 0.3233230362396991, "learning_rate": 1.4450076495535477e-05, "loss": 0.4024, "step": 4562 }, { "epoch": 0.5606339845189827, "grad_norm": 0.3049138706336511, "learning_rate": 1.4443647575628707e-05, "loss": 0.3544, "step": 4563 }, { "epoch": 0.5607568497358398, "grad_norm": 0.5259945774789223, "learning_rate": 1.443721875805936e-05, "loss": 0.3734, "step": 4564 }, { "epoch": 0.5608797149526968, "grad_norm": 0.3245091991563892, "learning_rate": 1.4430790044009965e-05, "loss": 0.4372, "step": 4565 }, { "epoch": 0.561002580169554, "grad_norm": 0.35252853191591693, "learning_rate": 1.4424361434663057e-05, "loss": 0.3729, "step": 4566 }, { "epoch": 0.5611254453864111, "grad_norm": 0.39689263254914836, "learning_rate": 1.4417932931201126e-05, "loss": 0.4311, "step": 4567 }, { "epoch": 0.5612483106032682, "grad_norm": 0.45444130293524343, "learning_rate": 1.4411504534806662e-05, "loss": 0.3998, "step": 4568 }, { "epoch": 0.5613711758201253, "grad_norm": 0.3996524596000613, "learning_rate": 1.4405076246662113e-05, "loss": 0.4604, "step": 4569 }, { "epoch": 0.5614940410369824, "grad_norm": 0.35728566789060817, "learning_rate": 1.439864806794994e-05, "loss": 0.4147, "step": 4570 }, { "epoch": 0.5616169062538395, "grad_norm": 0.3799386638238981, "learning_rate": 1.4392219999852552e-05, "loss": 0.3246, "step": 4571 }, { "epoch": 0.5617397714706966, "grad_norm": 0.30980961034218835, "learning_rate": 1.4385792043552354e-05, "loss": 0.3244, "step": 4572 }, { "epoch": 0.5618626366875538, "grad_norm": 0.31398522942547125, "learning_rate": 1.4379364200231724e-05, "loss": 0.3998, "step": 4573 }, { "epoch": 0.5619855019044109, "grad_norm": 0.4112637029172515, "learning_rate": 1.4372936471073028e-05, "loss": 0.3665, "step": 4574 }, { "epoch": 0.562108367121268, "grad_norm": 0.32254076906023, "learning_rate": 1.43665088572586e-05, "loss": 0.4084, "step": 4575 }, { "epoch": 0.5622312323381251, "grad_norm": 0.33103552852957085, "learning_rate": 1.4360081359970755e-05, "loss": 0.338, "step": 4576 }, { "epoch": 0.5623540975549822, "grad_norm": 0.3297693792881429, "learning_rate": 1.4353653980391799e-05, "loss": 0.3697, "step": 4577 }, { "epoch": 0.5624769627718393, "grad_norm": 0.33916147464356805, "learning_rate": 1.4347226719704e-05, "loss": 0.3619, "step": 4578 }, { "epoch": 0.5625998279886965, "grad_norm": 0.4957409810914106, "learning_rate": 1.4340799579089615e-05, "loss": 0.418, "step": 4579 }, { "epoch": 0.5627226932055535, "grad_norm": 0.3474809911763493, "learning_rate": 1.4334372559730867e-05, "loss": 0.3717, "step": 4580 }, { "epoch": 0.5628455584224106, "grad_norm": 0.3452293717461288, "learning_rate": 1.4327945662809975e-05, "loss": 0.3544, "step": 4581 }, { "epoch": 0.5629684236392677, "grad_norm": 0.3499353359215728, "learning_rate": 1.4321518889509118e-05, "loss": 0.4572, "step": 4582 }, { "epoch": 0.5630912888561248, "grad_norm": 0.36573577147931097, "learning_rate": 1.4315092241010465e-05, "loss": 0.3903, "step": 4583 }, { "epoch": 0.5632141540729819, "grad_norm": 0.3475834219667151, "learning_rate": 1.4308665718496143e-05, "loss": 0.4365, "step": 4584 }, { "epoch": 0.563337019289839, "grad_norm": 0.3174304431912385, "learning_rate": 1.4302239323148284e-05, "loss": 0.3202, "step": 4585 }, { "epoch": 0.5634598845066962, "grad_norm": 0.3275432546005546, "learning_rate": 1.4295813056148979e-05, "loss": 0.3084, "step": 4586 }, { "epoch": 0.5635827497235533, "grad_norm": 0.4304033344296862, "learning_rate": 1.4289386918680294e-05, "loss": 0.4999, "step": 4587 }, { "epoch": 0.5637056149404104, "grad_norm": 0.28337589288100673, "learning_rate": 1.428296091192427e-05, "loss": 0.3766, "step": 4588 }, { "epoch": 0.5638284801572675, "grad_norm": 0.3936400513964984, "learning_rate": 1.4276535037062943e-05, "loss": 0.4606, "step": 4589 }, { "epoch": 0.5639513453741246, "grad_norm": 0.35999722215326235, "learning_rate": 1.4270109295278305e-05, "loss": 0.3879, "step": 4590 }, { "epoch": 0.5640742105909817, "grad_norm": 0.34105428974759255, "learning_rate": 1.4263683687752329e-05, "loss": 0.399, "step": 4591 }, { "epoch": 0.5641970758078388, "grad_norm": 0.3200119232315497, "learning_rate": 1.4257258215666957e-05, "loss": 0.4702, "step": 4592 }, { "epoch": 0.564319941024696, "grad_norm": 0.35897569657859263, "learning_rate": 1.4250832880204126e-05, "loss": 0.4838, "step": 4593 }, { "epoch": 0.564442806241553, "grad_norm": 0.3644370223362971, "learning_rate": 1.4244407682545728e-05, "loss": 0.4128, "step": 4594 }, { "epoch": 0.5645656714584101, "grad_norm": 0.46750438131143623, "learning_rate": 1.4237982623873629e-05, "loss": 0.4751, "step": 4595 }, { "epoch": 0.5646885366752672, "grad_norm": 0.35110180832179916, "learning_rate": 1.4231557705369689e-05, "loss": 0.4618, "step": 4596 }, { "epoch": 0.5648114018921243, "grad_norm": 0.33738580271116886, "learning_rate": 1.4225132928215729e-05, "loss": 0.4323, "step": 4597 }, { "epoch": 0.5649342671089814, "grad_norm": 0.3899190779564103, "learning_rate": 1.4218708293593539e-05, "loss": 0.3734, "step": 4598 }, { "epoch": 0.5650571323258385, "grad_norm": 0.30815339379623813, "learning_rate": 1.421228380268489e-05, "loss": 0.3772, "step": 4599 }, { "epoch": 0.5651799975426957, "grad_norm": 0.27800554306381103, "learning_rate": 1.420585945667153e-05, "loss": 0.3896, "step": 4600 }, { "epoch": 0.5653028627595528, "grad_norm": 0.3660288763339177, "learning_rate": 1.4199435256735172e-05, "loss": 0.381, "step": 4601 }, { "epoch": 0.5654257279764099, "grad_norm": 0.3654727754193103, "learning_rate": 1.4193011204057507e-05, "loss": 0.3491, "step": 4602 }, { "epoch": 0.565548593193267, "grad_norm": 0.31304637110751365, "learning_rate": 1.4186587299820193e-05, "loss": 0.4302, "step": 4603 }, { "epoch": 0.5656714584101241, "grad_norm": 0.3349885531090961, "learning_rate": 1.4180163545204875e-05, "loss": 0.4006, "step": 4604 }, { "epoch": 0.5657943236269812, "grad_norm": 0.390196751637754, "learning_rate": 1.4173739941393156e-05, "loss": 0.3991, "step": 4605 }, { "epoch": 0.5659171888438383, "grad_norm": 0.3326996923805415, "learning_rate": 1.4167316489566617e-05, "loss": 0.3621, "step": 4606 }, { "epoch": 0.5660400540606955, "grad_norm": 0.34210236593302806, "learning_rate": 1.4160893190906804e-05, "loss": 0.311, "step": 4607 }, { "epoch": 0.5661629192775526, "grad_norm": 0.31303053122410296, "learning_rate": 1.4154470046595251e-05, "loss": 0.3259, "step": 4608 }, { "epoch": 0.5662857844944096, "grad_norm": 0.35336703717739704, "learning_rate": 1.414804705781345e-05, "loss": 0.4009, "step": 4609 }, { "epoch": 0.5664086497112667, "grad_norm": 0.33955997265127463, "learning_rate": 1.4141624225742867e-05, "loss": 0.4208, "step": 4610 }, { "epoch": 0.5665315149281238, "grad_norm": 0.3616568052484809, "learning_rate": 1.4135201551564937e-05, "loss": 0.4766, "step": 4611 }, { "epoch": 0.5666543801449809, "grad_norm": 0.40564996901641587, "learning_rate": 1.4128779036461077e-05, "loss": 0.3967, "step": 4612 }, { "epoch": 0.566777245361838, "grad_norm": 0.3522429510647814, "learning_rate": 1.4122356681612664e-05, "loss": 0.4249, "step": 4613 }, { "epoch": 0.5669001105786952, "grad_norm": 0.41712236186759183, "learning_rate": 1.4115934488201047e-05, "loss": 0.446, "step": 4614 }, { "epoch": 0.5670229757955523, "grad_norm": 0.2716735212363015, "learning_rate": 1.4109512457407543e-05, "loss": 0.3394, "step": 4615 }, { "epoch": 0.5671458410124094, "grad_norm": 0.3036496406847069, "learning_rate": 1.4103090590413452e-05, "loss": 0.326, "step": 4616 }, { "epoch": 0.5672687062292665, "grad_norm": 0.34150160449013245, "learning_rate": 1.409666888840003e-05, "loss": 0.4393, "step": 4617 }, { "epoch": 0.5673915714461236, "grad_norm": 0.38084709015548984, "learning_rate": 1.4090247352548504e-05, "loss": 0.3249, "step": 4618 }, { "epoch": 0.5675144366629807, "grad_norm": 0.31351019710326145, "learning_rate": 1.4083825984040083e-05, "loss": 0.3736, "step": 4619 }, { "epoch": 0.5676373018798379, "grad_norm": 0.3321460687558929, "learning_rate": 1.407740478405593e-05, "loss": 0.3798, "step": 4620 }, { "epoch": 0.567760167096695, "grad_norm": 0.35897492622840504, "learning_rate": 1.4070983753777183e-05, "loss": 0.4534, "step": 4621 }, { "epoch": 0.5678830323135521, "grad_norm": 0.42408394327689247, "learning_rate": 1.4064562894384944e-05, "loss": 0.531, "step": 4622 }, { "epoch": 0.5680058975304091, "grad_norm": 0.3107301228827491, "learning_rate": 1.40581422070603e-05, "loss": 0.3988, "step": 4623 }, { "epoch": 0.5681287627472662, "grad_norm": 0.3091486529331593, "learning_rate": 1.4051721692984289e-05, "loss": 0.4298, "step": 4624 }, { "epoch": 0.5682516279641233, "grad_norm": 0.24608929185239925, "learning_rate": 1.4045301353337922e-05, "loss": 0.3469, "step": 4625 }, { "epoch": 0.5683744931809804, "grad_norm": 0.3560143843381713, "learning_rate": 1.4038881189302175e-05, "loss": 0.4312, "step": 4626 }, { "epoch": 0.5684973583978375, "grad_norm": 0.28819094227822295, "learning_rate": 1.4032461202058009e-05, "loss": 0.4296, "step": 4627 }, { "epoch": 0.5686202236146947, "grad_norm": 0.3381876713021278, "learning_rate": 1.4026041392786325e-05, "loss": 0.4009, "step": 4628 }, { "epoch": 0.5687430888315518, "grad_norm": 0.3816040748862209, "learning_rate": 1.4019621762668011e-05, "loss": 0.3919, "step": 4629 }, { "epoch": 0.5688659540484089, "grad_norm": 0.3398352317760337, "learning_rate": 1.4013202312883912e-05, "loss": 0.4189, "step": 4630 }, { "epoch": 0.568988819265266, "grad_norm": 0.2939936049850057, "learning_rate": 1.4006783044614853e-05, "loss": 0.4165, "step": 4631 }, { "epoch": 0.5691116844821231, "grad_norm": 0.38118319954342667, "learning_rate": 1.400036395904161e-05, "loss": 0.3625, "step": 4632 }, { "epoch": 0.5692345496989802, "grad_norm": 0.3622188344378798, "learning_rate": 1.3993945057344935e-05, "loss": 0.2992, "step": 4633 }, { "epoch": 0.5693574149158374, "grad_norm": 0.381931608968319, "learning_rate": 1.3987526340705538e-05, "loss": 0.3772, "step": 4634 }, { "epoch": 0.5694802801326945, "grad_norm": 0.3631599236598145, "learning_rate": 1.3981107810304106e-05, "loss": 0.3812, "step": 4635 }, { "epoch": 0.5696031453495516, "grad_norm": 0.34040780968539885, "learning_rate": 1.3974689467321289e-05, "loss": 0.4559, "step": 4636 }, { "epoch": 0.5697260105664087, "grad_norm": 0.3235734708911234, "learning_rate": 1.396827131293769e-05, "loss": 0.3484, "step": 4637 }, { "epoch": 0.5698488757832657, "grad_norm": 0.3187550717333074, "learning_rate": 1.3961853348333896e-05, "loss": 0.3914, "step": 4638 }, { "epoch": 0.5699717410001228, "grad_norm": 0.33651196252952426, "learning_rate": 1.3955435574690444e-05, "loss": 0.4816, "step": 4639 }, { "epoch": 0.5700946062169799, "grad_norm": 0.31830854553020715, "learning_rate": 1.3949017993187848e-05, "loss": 0.4537, "step": 4640 }, { "epoch": 0.570217471433837, "grad_norm": 0.5486519693881378, "learning_rate": 1.3942600605006565e-05, "loss": 0.4895, "step": 4641 }, { "epoch": 0.5703403366506942, "grad_norm": 0.4453711047650728, "learning_rate": 1.3936183411327054e-05, "loss": 0.4886, "step": 4642 }, { "epoch": 0.5704632018675513, "grad_norm": 0.32311247424581513, "learning_rate": 1.3929766413329702e-05, "loss": 0.3676, "step": 4643 }, { "epoch": 0.5705860670844084, "grad_norm": 0.2964237023335569, "learning_rate": 1.392334961219488e-05, "loss": 0.3381, "step": 4644 }, { "epoch": 0.5707089323012655, "grad_norm": 0.3615660064908533, "learning_rate": 1.391693300910291e-05, "loss": 0.3526, "step": 4645 }, { "epoch": 0.5708317975181226, "grad_norm": 0.3735880127280742, "learning_rate": 1.3910516605234091e-05, "loss": 0.3648, "step": 4646 }, { "epoch": 0.5709546627349797, "grad_norm": 0.3625178816523554, "learning_rate": 1.390410040176868e-05, "loss": 0.3926, "step": 4647 }, { "epoch": 0.5710775279518369, "grad_norm": 0.3857774046039519, "learning_rate": 1.3897684399886892e-05, "loss": 0.4409, "step": 4648 }, { "epoch": 0.571200393168694, "grad_norm": 0.3394500045389547, "learning_rate": 1.3891268600768902e-05, "loss": 0.4232, "step": 4649 }, { "epoch": 0.5713232583855511, "grad_norm": 0.3064490497169047, "learning_rate": 1.3884853005594869e-05, "loss": 0.3321, "step": 4650 }, { "epoch": 0.5714461236024082, "grad_norm": 0.26683707268596635, "learning_rate": 1.3878437615544896e-05, "loss": 0.3281, "step": 4651 }, { "epoch": 0.5715689888192652, "grad_norm": 0.2935767694233553, "learning_rate": 1.3872022431799047e-05, "loss": 0.4198, "step": 4652 }, { "epoch": 0.5716918540361223, "grad_norm": 0.30544130148694554, "learning_rate": 1.3865607455537352e-05, "loss": 0.3775, "step": 4653 }, { "epoch": 0.5718147192529794, "grad_norm": 0.34814929338812206, "learning_rate": 1.3859192687939813e-05, "loss": 0.4148, "step": 4654 }, { "epoch": 0.5719375844698366, "grad_norm": 0.3467828257374375, "learning_rate": 1.3852778130186384e-05, "loss": 0.3717, "step": 4655 }, { "epoch": 0.5720604496866937, "grad_norm": 0.3306673406968698, "learning_rate": 1.3846363783456976e-05, "loss": 0.4252, "step": 4656 }, { "epoch": 0.5721833149035508, "grad_norm": 0.44791373522854905, "learning_rate": 1.3839949648931465e-05, "loss": 0.4361, "step": 4657 }, { "epoch": 0.5723061801204079, "grad_norm": 0.381671465088474, "learning_rate": 1.3833535727789695e-05, "loss": 0.3489, "step": 4658 }, { "epoch": 0.572429045337265, "grad_norm": 0.37463343000384663, "learning_rate": 1.3827122021211465e-05, "loss": 0.4, "step": 4659 }, { "epoch": 0.5725519105541221, "grad_norm": 0.3320073966647188, "learning_rate": 1.3820708530376527e-05, "loss": 0.3814, "step": 4660 }, { "epoch": 0.5726747757709792, "grad_norm": 0.5080819472442688, "learning_rate": 1.3814295256464613e-05, "loss": 0.4013, "step": 4661 }, { "epoch": 0.5727976409878364, "grad_norm": 0.4570216164527147, "learning_rate": 1.3807882200655396e-05, "loss": 0.5345, "step": 4662 }, { "epoch": 0.5729205062046935, "grad_norm": 0.4055120249371116, "learning_rate": 1.3801469364128515e-05, "loss": 0.432, "step": 4663 }, { "epoch": 0.5730433714215506, "grad_norm": 0.34275733020412263, "learning_rate": 1.3795056748063574e-05, "loss": 0.3425, "step": 4664 }, { "epoch": 0.5731662366384077, "grad_norm": 0.31516365745314134, "learning_rate": 1.3788644353640129e-05, "loss": 0.3825, "step": 4665 }, { "epoch": 0.5732891018552648, "grad_norm": 0.4173892822820638, "learning_rate": 1.3782232182037701e-05, "loss": 0.4152, "step": 4666 }, { "epoch": 0.5734119670721218, "grad_norm": 0.33586008232235093, "learning_rate": 1.3775820234435764e-05, "loss": 0.5241, "step": 4667 }, { "epoch": 0.573534832288979, "grad_norm": 0.35722592395852265, "learning_rate": 1.3769408512013748e-05, "loss": 0.3754, "step": 4668 }, { "epoch": 0.5736576975058361, "grad_norm": 0.34733814909293453, "learning_rate": 1.3762997015951066e-05, "loss": 0.2915, "step": 4669 }, { "epoch": 0.5737805627226932, "grad_norm": 0.5903614479523445, "learning_rate": 1.375658574742706e-05, "loss": 0.5306, "step": 4670 }, { "epoch": 0.5739034279395503, "grad_norm": 0.362875026266888, "learning_rate": 1.375017470762104e-05, "loss": 0.3313, "step": 4671 }, { "epoch": 0.5740262931564074, "grad_norm": 0.36608057236615127, "learning_rate": 1.3743763897712271e-05, "loss": 0.3682, "step": 4672 }, { "epoch": 0.5741491583732645, "grad_norm": 0.3445536730663286, "learning_rate": 1.3737353318879993e-05, "loss": 0.4254, "step": 4673 }, { "epoch": 0.5742720235901216, "grad_norm": 0.3893777464628386, "learning_rate": 1.3730942972303383e-05, "loss": 0.4606, "step": 4674 }, { "epoch": 0.5743948888069788, "grad_norm": 0.3935613113501984, "learning_rate": 1.3724532859161583e-05, "loss": 0.3908, "step": 4675 }, { "epoch": 0.5745177540238359, "grad_norm": 0.34460922738428773, "learning_rate": 1.371812298063369e-05, "loss": 0.4165, "step": 4676 }, { "epoch": 0.574640619240693, "grad_norm": 0.4193313344109486, "learning_rate": 1.3711713337898763e-05, "loss": 0.4466, "step": 4677 }, { "epoch": 0.5747634844575501, "grad_norm": 0.3505461348698007, "learning_rate": 1.3705303932135813e-05, "loss": 0.3804, "step": 4678 }, { "epoch": 0.5748863496744072, "grad_norm": 0.36505414896773297, "learning_rate": 1.3698894764523809e-05, "loss": 0.3908, "step": 4679 }, { "epoch": 0.5750092148912643, "grad_norm": 0.29328460140101315, "learning_rate": 1.3692485836241668e-05, "loss": 0.3917, "step": 4680 }, { "epoch": 0.5751320801081214, "grad_norm": 0.3694363282622603, "learning_rate": 1.3686077148468285e-05, "loss": 0.396, "step": 4681 }, { "epoch": 0.5752549453249785, "grad_norm": 0.3216687776478617, "learning_rate": 1.367966870238249e-05, "loss": 0.405, "step": 4682 }, { "epoch": 0.5753778105418356, "grad_norm": 0.4161624497288167, "learning_rate": 1.367326049916307e-05, "loss": 0.4057, "step": 4683 }, { "epoch": 0.5755006757586927, "grad_norm": 0.3403440908253086, "learning_rate": 1.366685253998878e-05, "loss": 0.3987, "step": 4684 }, { "epoch": 0.5756235409755498, "grad_norm": 0.41292973637686836, "learning_rate": 1.3660444826038322e-05, "loss": 0.4292, "step": 4685 }, { "epoch": 0.5757464061924069, "grad_norm": 0.35137276904237724, "learning_rate": 1.3654037358490348e-05, "loss": 0.3892, "step": 4686 }, { "epoch": 0.575869271409264, "grad_norm": 0.34562528449500296, "learning_rate": 1.3647630138523467e-05, "loss": 0.391, "step": 4687 }, { "epoch": 0.5759921366261211, "grad_norm": 0.29849921606327606, "learning_rate": 1.364122316731626e-05, "loss": 0.364, "step": 4688 }, { "epoch": 0.5761150018429783, "grad_norm": 0.3192395963118813, "learning_rate": 1.3634816446047237e-05, "loss": 0.3779, "step": 4689 }, { "epoch": 0.5762378670598354, "grad_norm": 0.3546027114834717, "learning_rate": 1.3628409975894878e-05, "loss": 0.4332, "step": 4690 }, { "epoch": 0.5763607322766925, "grad_norm": 0.3273395956033755, "learning_rate": 1.36220037580376e-05, "loss": 0.3661, "step": 4691 }, { "epoch": 0.5764835974935496, "grad_norm": 0.33730274397114035, "learning_rate": 1.36155977936538e-05, "loss": 0.3674, "step": 4692 }, { "epoch": 0.5766064627104067, "grad_norm": 0.37882518435603574, "learning_rate": 1.360919208392181e-05, "loss": 0.3513, "step": 4693 }, { "epoch": 0.5767293279272638, "grad_norm": 0.32410826061606995, "learning_rate": 1.3602786630019914e-05, "loss": 0.3321, "step": 4694 }, { "epoch": 0.576852193144121, "grad_norm": 0.3941671233052485, "learning_rate": 1.3596381433126356e-05, "loss": 0.3471, "step": 4695 }, { "epoch": 0.576975058360978, "grad_norm": 0.4680308673123178, "learning_rate": 1.3589976494419333e-05, "loss": 0.3757, "step": 4696 }, { "epoch": 0.5770979235778351, "grad_norm": 0.46165077613636973, "learning_rate": 1.3583571815076988e-05, "loss": 0.4853, "step": 4697 }, { "epoch": 0.5772207887946922, "grad_norm": 0.3507999825597982, "learning_rate": 1.3577167396277421e-05, "loss": 0.424, "step": 4698 }, { "epoch": 0.5773436540115493, "grad_norm": 0.34732460105897317, "learning_rate": 1.357076323919868e-05, "loss": 0.3288, "step": 4699 }, { "epoch": 0.5774665192284064, "grad_norm": 0.29129722558733595, "learning_rate": 1.3564359345018777e-05, "loss": 0.399, "step": 4700 }, { "epoch": 0.5775893844452635, "grad_norm": 0.34688014645411935, "learning_rate": 1.3557955714915665e-05, "loss": 0.399, "step": 4701 }, { "epoch": 0.5777122496621206, "grad_norm": 0.3597017983082421, "learning_rate": 1.3551552350067241e-05, "loss": 0.4819, "step": 4702 }, { "epoch": 0.5778351148789778, "grad_norm": 0.3455805990305377, "learning_rate": 1.3545149251651372e-05, "loss": 0.4138, "step": 4703 }, { "epoch": 0.5779579800958349, "grad_norm": 0.32086191722215884, "learning_rate": 1.3538746420845866e-05, "loss": 0.3475, "step": 4704 }, { "epoch": 0.578080845312692, "grad_norm": 0.37559249720199606, "learning_rate": 1.3532343858828476e-05, "loss": 0.4639, "step": 4705 }, { "epoch": 0.5782037105295491, "grad_norm": 0.32328720593015126, "learning_rate": 1.3525941566776909e-05, "loss": 0.3859, "step": 4706 }, { "epoch": 0.5783265757464062, "grad_norm": 0.36382514373317587, "learning_rate": 1.351953954586884e-05, "loss": 0.379, "step": 4707 }, { "epoch": 0.5784494409632633, "grad_norm": 0.4338985375535547, "learning_rate": 1.3513137797281868e-05, "loss": 0.3915, "step": 4708 }, { "epoch": 0.5785723061801205, "grad_norm": 0.3423125979727923, "learning_rate": 1.3506736322193556e-05, "loss": 0.348, "step": 4709 }, { "epoch": 0.5786951713969776, "grad_norm": 0.3763613890354591, "learning_rate": 1.350033512178141e-05, "loss": 0.4477, "step": 4710 }, { "epoch": 0.5788180366138346, "grad_norm": 0.272521515026343, "learning_rate": 1.3493934197222893e-05, "loss": 0.4608, "step": 4711 }, { "epoch": 0.5789409018306917, "grad_norm": 0.33699276938979483, "learning_rate": 1.3487533549695417e-05, "loss": 0.3764, "step": 4712 }, { "epoch": 0.5790637670475488, "grad_norm": 0.35268931522831565, "learning_rate": 1.3481133180376336e-05, "loss": 0.3842, "step": 4713 }, { "epoch": 0.5791866322644059, "grad_norm": 0.34309871225798594, "learning_rate": 1.3474733090442953e-05, "loss": 0.3791, "step": 4714 }, { "epoch": 0.579309497481263, "grad_norm": 0.4738327862940407, "learning_rate": 1.3468333281072528e-05, "loss": 0.4056, "step": 4715 }, { "epoch": 0.5794323626981202, "grad_norm": 0.3463136485454795, "learning_rate": 1.3461933753442265e-05, "loss": 0.4247, "step": 4716 }, { "epoch": 0.5795552279149773, "grad_norm": 0.3598273070013587, "learning_rate": 1.3455534508729313e-05, "loss": 0.272, "step": 4717 }, { "epoch": 0.5796780931318344, "grad_norm": 0.3270815101840358, "learning_rate": 1.3449135548110763e-05, "loss": 0.4881, "step": 4718 }, { "epoch": 0.5798009583486915, "grad_norm": 0.3742378341886708, "learning_rate": 1.3442736872763681e-05, "loss": 0.4795, "step": 4719 }, { "epoch": 0.5799238235655486, "grad_norm": 0.393384974817101, "learning_rate": 1.343633848386505e-05, "loss": 0.3902, "step": 4720 }, { "epoch": 0.5800466887824057, "grad_norm": 0.41338405559967517, "learning_rate": 1.3429940382591815e-05, "loss": 0.3557, "step": 4721 }, { "epoch": 0.5801695539992628, "grad_norm": 0.40798494002644103, "learning_rate": 1.3423542570120861e-05, "loss": 0.39, "step": 4722 }, { "epoch": 0.58029241921612, "grad_norm": 0.32906464746339553, "learning_rate": 1.3417145047629029e-05, "loss": 0.3307, "step": 4723 }, { "epoch": 0.5804152844329771, "grad_norm": 0.3990730182414341, "learning_rate": 1.3410747816293102e-05, "loss": 0.3795, "step": 4724 }, { "epoch": 0.5805381496498341, "grad_norm": 0.35901374402395164, "learning_rate": 1.34043508772898e-05, "loss": 0.4554, "step": 4725 }, { "epoch": 0.5806610148666912, "grad_norm": 0.36734037039325146, "learning_rate": 1.3397954231795815e-05, "loss": 0.3518, "step": 4726 }, { "epoch": 0.5807838800835483, "grad_norm": 0.3164036494608469, "learning_rate": 1.3391557880987757e-05, "loss": 0.3682, "step": 4727 }, { "epoch": 0.5809067453004054, "grad_norm": 0.3560380867363539, "learning_rate": 1.3385161826042199e-05, "loss": 0.4071, "step": 4728 }, { "epoch": 0.5810296105172625, "grad_norm": 0.3030141336980799, "learning_rate": 1.3378766068135642e-05, "loss": 0.323, "step": 4729 }, { "epoch": 0.5811524757341197, "grad_norm": 0.43570865795385816, "learning_rate": 1.337237060844456e-05, "loss": 0.3527, "step": 4730 }, { "epoch": 0.5812753409509768, "grad_norm": 0.3675134565406862, "learning_rate": 1.3365975448145348e-05, "loss": 0.4131, "step": 4731 }, { "epoch": 0.5813982061678339, "grad_norm": 0.3283978654950152, "learning_rate": 1.3359580588414354e-05, "loss": 0.3435, "step": 4732 }, { "epoch": 0.581521071384691, "grad_norm": 0.3826057770444086, "learning_rate": 1.3353186030427868e-05, "loss": 0.4299, "step": 4733 }, { "epoch": 0.5816439366015481, "grad_norm": 0.2923803469328987, "learning_rate": 1.3346791775362136e-05, "loss": 0.4149, "step": 4734 }, { "epoch": 0.5817668018184052, "grad_norm": 0.3225492591689489, "learning_rate": 1.3340397824393337e-05, "loss": 0.3954, "step": 4735 }, { "epoch": 0.5818896670352623, "grad_norm": 0.3294617751702237, "learning_rate": 1.3334004178697595e-05, "loss": 0.3404, "step": 4736 }, { "epoch": 0.5820125322521195, "grad_norm": 0.32364538386168534, "learning_rate": 1.3327610839450972e-05, "loss": 0.3902, "step": 4737 }, { "epoch": 0.5821353974689766, "grad_norm": 0.35846690092956995, "learning_rate": 1.3321217807829498e-05, "loss": 0.3259, "step": 4738 }, { "epoch": 0.5822582626858337, "grad_norm": 0.41684787803919277, "learning_rate": 1.331482508500912e-05, "loss": 0.421, "step": 4739 }, { "epoch": 0.5823811279026907, "grad_norm": 0.35751558600024524, "learning_rate": 1.3308432672165738e-05, "loss": 0.3253, "step": 4740 }, { "epoch": 0.5825039931195478, "grad_norm": 0.3275753391840251, "learning_rate": 1.3302040570475194e-05, "loss": 0.3495, "step": 4741 }, { "epoch": 0.5826268583364049, "grad_norm": 0.3565441840907825, "learning_rate": 1.3295648781113277e-05, "loss": 0.4539, "step": 4742 }, { "epoch": 0.582749723553262, "grad_norm": 0.2839931719747165, "learning_rate": 1.3289257305255716e-05, "loss": 0.4258, "step": 4743 }, { "epoch": 0.5828725887701192, "grad_norm": 0.3609420663253216, "learning_rate": 1.3282866144078171e-05, "loss": 0.356, "step": 4744 }, { "epoch": 0.5829954539869763, "grad_norm": 0.31827630433347465, "learning_rate": 1.327647529875627e-05, "loss": 0.4592, "step": 4745 }, { "epoch": 0.5831183192038334, "grad_norm": 0.40984262637862484, "learning_rate": 1.327008477046556e-05, "loss": 0.4499, "step": 4746 }, { "epoch": 0.5832411844206905, "grad_norm": 0.32421404099426576, "learning_rate": 1.3263694560381538e-05, "loss": 0.3258, "step": 4747 }, { "epoch": 0.5833640496375476, "grad_norm": 0.3204682875091887, "learning_rate": 1.3257304669679637e-05, "loss": 0.3034, "step": 4748 }, { "epoch": 0.5834869148544047, "grad_norm": 0.3069044819049799, "learning_rate": 1.3250915099535245e-05, "loss": 0.4357, "step": 4749 }, { "epoch": 0.5836097800712619, "grad_norm": 0.3682844243690121, "learning_rate": 1.3244525851123676e-05, "loss": 0.3344, "step": 4750 }, { "epoch": 0.583732645288119, "grad_norm": 0.337611830253193, "learning_rate": 1.3238136925620191e-05, "loss": 0.4074, "step": 4751 }, { "epoch": 0.5838555105049761, "grad_norm": 0.3282739911332596, "learning_rate": 1.3231748324199989e-05, "loss": 0.4511, "step": 4752 }, { "epoch": 0.5839783757218332, "grad_norm": 0.32459003816706694, "learning_rate": 1.322536004803822e-05, "loss": 0.3554, "step": 4753 }, { "epoch": 0.5841012409386902, "grad_norm": 0.3903686906980373, "learning_rate": 1.321897209830996e-05, "loss": 0.3227, "step": 4754 }, { "epoch": 0.5842241061555473, "grad_norm": 0.30626962359837995, "learning_rate": 1.3212584476190233e-05, "loss": 0.433, "step": 4755 }, { "epoch": 0.5843469713724044, "grad_norm": 0.3954241320196068, "learning_rate": 1.3206197182853994e-05, "loss": 0.4189, "step": 4756 }, { "epoch": 0.5844698365892615, "grad_norm": 0.39287743685636023, "learning_rate": 1.3199810219476156e-05, "loss": 0.3836, "step": 4757 }, { "epoch": 0.5845927018061187, "grad_norm": 0.35137595643282604, "learning_rate": 1.3193423587231553e-05, "loss": 0.4258, "step": 4758 }, { "epoch": 0.5847155670229758, "grad_norm": 0.40107125172782954, "learning_rate": 1.3187037287294967e-05, "loss": 0.4428, "step": 4759 }, { "epoch": 0.5848384322398329, "grad_norm": 0.33932507855945543, "learning_rate": 1.318065132084111e-05, "loss": 0.4614, "step": 4760 }, { "epoch": 0.58496129745669, "grad_norm": 0.3269101054166342, "learning_rate": 1.3174265689044646e-05, "loss": 0.3942, "step": 4761 }, { "epoch": 0.5850841626735471, "grad_norm": 0.30840117876417417, "learning_rate": 1.3167880393080171e-05, "loss": 0.3633, "step": 4762 }, { "epoch": 0.5852070278904042, "grad_norm": 0.42121074559327726, "learning_rate": 1.3161495434122213e-05, "loss": 0.4603, "step": 4763 }, { "epoch": 0.5853298931072614, "grad_norm": 0.2839287834557967, "learning_rate": 1.315511081334524e-05, "loss": 0.3557, "step": 4764 }, { "epoch": 0.5854527583241185, "grad_norm": 0.3805974086181253, "learning_rate": 1.3148726531923677e-05, "loss": 0.3709, "step": 4765 }, { "epoch": 0.5855756235409756, "grad_norm": 0.4115843336861698, "learning_rate": 1.3142342591031862e-05, "loss": 0.4008, "step": 4766 }, { "epoch": 0.5856984887578327, "grad_norm": 0.28818352727551155, "learning_rate": 1.3135958991844076e-05, "loss": 0.3478, "step": 4767 }, { "epoch": 0.5858213539746898, "grad_norm": 0.3691259200004973, "learning_rate": 1.3129575735534548e-05, "loss": 0.3702, "step": 4768 }, { "epoch": 0.5859442191915468, "grad_norm": 0.3791096364836946, "learning_rate": 1.3123192823277435e-05, "loss": 0.3392, "step": 4769 }, { "epoch": 0.5860670844084039, "grad_norm": 0.3730652071926542, "learning_rate": 1.3116810256246828e-05, "loss": 0.3504, "step": 4770 }, { "epoch": 0.586189949625261, "grad_norm": 0.3523773707070132, "learning_rate": 1.3110428035616757e-05, "loss": 0.4387, "step": 4771 }, { "epoch": 0.5863128148421182, "grad_norm": 0.36035388020555725, "learning_rate": 1.31040461625612e-05, "loss": 0.3747, "step": 4772 }, { "epoch": 0.5864356800589753, "grad_norm": 0.30841310596401816, "learning_rate": 1.3097664638254057e-05, "loss": 0.4344, "step": 4773 }, { "epoch": 0.5865585452758324, "grad_norm": 0.36116473381139697, "learning_rate": 1.3091283463869167e-05, "loss": 0.3922, "step": 4774 }, { "epoch": 0.5866814104926895, "grad_norm": 0.3625310198777733, "learning_rate": 1.3084902640580297e-05, "loss": 0.4111, "step": 4775 }, { "epoch": 0.5868042757095466, "grad_norm": 0.32344710993586406, "learning_rate": 1.3078522169561172e-05, "loss": 0.3236, "step": 4776 }, { "epoch": 0.5869271409264037, "grad_norm": 0.44506434605921535, "learning_rate": 1.3072142051985436e-05, "loss": 0.4163, "step": 4777 }, { "epoch": 0.5870500061432609, "grad_norm": 0.3798119423000881, "learning_rate": 1.3065762289026665e-05, "loss": 0.4115, "step": 4778 }, { "epoch": 0.587172871360118, "grad_norm": 0.30381574564478553, "learning_rate": 1.3059382881858375e-05, "loss": 0.339, "step": 4779 }, { "epoch": 0.5872957365769751, "grad_norm": 0.34505775053295995, "learning_rate": 1.3053003831654019e-05, "loss": 0.4591, "step": 4780 }, { "epoch": 0.5874186017938322, "grad_norm": 0.36329383223401235, "learning_rate": 1.3046625139586984e-05, "loss": 0.4419, "step": 4781 }, { "epoch": 0.5875414670106893, "grad_norm": 0.32771786376680695, "learning_rate": 1.3040246806830585e-05, "loss": 0.3932, "step": 4782 }, { "epoch": 0.5876643322275463, "grad_norm": 0.32665992082934625, "learning_rate": 1.3033868834558071e-05, "loss": 0.3715, "step": 4783 }, { "epoch": 0.5877871974444034, "grad_norm": 0.3103193469578557, "learning_rate": 1.302749122394264e-05, "loss": 0.4044, "step": 4784 }, { "epoch": 0.5879100626612606, "grad_norm": 0.35738720336421, "learning_rate": 1.3021113976157408e-05, "loss": 0.3625, "step": 4785 }, { "epoch": 0.5880329278781177, "grad_norm": 0.3448624385113122, "learning_rate": 1.3014737092375423e-05, "loss": 0.3994, "step": 4786 }, { "epoch": 0.5881557930949748, "grad_norm": 0.3474198583835649, "learning_rate": 1.3008360573769676e-05, "loss": 0.4245, "step": 4787 }, { "epoch": 0.5882786583118319, "grad_norm": 0.33920651696452264, "learning_rate": 1.3001984421513085e-05, "loss": 0.3684, "step": 4788 }, { "epoch": 0.588401523528689, "grad_norm": 0.31226400313076963, "learning_rate": 1.2995608636778502e-05, "loss": 0.4122, "step": 4789 }, { "epoch": 0.5885243887455461, "grad_norm": 0.35299119526104217, "learning_rate": 1.2989233220738707e-05, "loss": 0.3975, "step": 4790 }, { "epoch": 0.5886472539624032, "grad_norm": 0.3693941516260295, "learning_rate": 1.2982858174566425e-05, "loss": 0.3722, "step": 4791 }, { "epoch": 0.5887701191792604, "grad_norm": 0.43505701490089643, "learning_rate": 1.2976483499434302e-05, "loss": 0.4162, "step": 4792 }, { "epoch": 0.5888929843961175, "grad_norm": 0.36976716476755955, "learning_rate": 1.2970109196514918e-05, "loss": 0.3667, "step": 4793 }, { "epoch": 0.5890158496129746, "grad_norm": 0.36707469001895576, "learning_rate": 1.2963735266980773e-05, "loss": 0.3625, "step": 4794 }, { "epoch": 0.5891387148298317, "grad_norm": 0.29655581139218623, "learning_rate": 1.2957361712004327e-05, "loss": 0.4518, "step": 4795 }, { "epoch": 0.5892615800466888, "grad_norm": 0.4118098282811761, "learning_rate": 1.295098853275795e-05, "loss": 0.3601, "step": 4796 }, { "epoch": 0.5893844452635459, "grad_norm": 0.28420569510857574, "learning_rate": 1.2944615730413941e-05, "loss": 0.3864, "step": 4797 }, { "epoch": 0.589507310480403, "grad_norm": 0.36707255019537743, "learning_rate": 1.2938243306144536e-05, "loss": 0.3906, "step": 4798 }, { "epoch": 0.5896301756972601, "grad_norm": 0.38101945742592486, "learning_rate": 1.2931871261121907e-05, "loss": 0.4188, "step": 4799 }, { "epoch": 0.5897530409141172, "grad_norm": 0.4211893520245812, "learning_rate": 1.292549959651815e-05, "loss": 0.4082, "step": 4800 }, { "epoch": 0.5898759061309743, "grad_norm": 0.43494492694421477, "learning_rate": 1.2919128313505286e-05, "loss": 0.3955, "step": 4801 }, { "epoch": 0.5899987713478314, "grad_norm": 0.38125550921898704, "learning_rate": 1.2912757413255266e-05, "loss": 0.3971, "step": 4802 }, { "epoch": 0.5901216365646885, "grad_norm": 0.312766722084985, "learning_rate": 1.2906386896939994e-05, "loss": 0.324, "step": 4803 }, { "epoch": 0.5902445017815456, "grad_norm": 0.5668657868872292, "learning_rate": 1.2900016765731271e-05, "loss": 0.5044, "step": 4804 }, { "epoch": 0.5903673669984028, "grad_norm": 0.39628205344618095, "learning_rate": 1.2893647020800847e-05, "loss": 0.4189, "step": 4805 }, { "epoch": 0.5904902322152599, "grad_norm": 0.4794410366396781, "learning_rate": 1.288727766332039e-05, "loss": 0.3739, "step": 4806 }, { "epoch": 0.590613097432117, "grad_norm": 0.35558296054893385, "learning_rate": 1.288090869446151e-05, "loss": 0.3136, "step": 4807 }, { "epoch": 0.5907359626489741, "grad_norm": 0.4385694572697661, "learning_rate": 1.287454011539573e-05, "loss": 0.4756, "step": 4808 }, { "epoch": 0.5908588278658312, "grad_norm": 0.3165096892616048, "learning_rate": 1.2868171927294507e-05, "loss": 0.3718, "step": 4809 }, { "epoch": 0.5909816930826883, "grad_norm": 0.349374368363766, "learning_rate": 1.2861804131329237e-05, "loss": 0.4043, "step": 4810 }, { "epoch": 0.5911045582995454, "grad_norm": 0.3185426570090293, "learning_rate": 1.2855436728671232e-05, "loss": 0.3819, "step": 4811 }, { "epoch": 0.5912274235164026, "grad_norm": 0.31205278885301563, "learning_rate": 1.2849069720491735e-05, "loss": 0.3707, "step": 4812 }, { "epoch": 0.5913502887332596, "grad_norm": 0.4340848605126974, "learning_rate": 1.2842703107961903e-05, "loss": 0.441, "step": 4813 }, { "epoch": 0.5914731539501167, "grad_norm": 0.37904460718279026, "learning_rate": 1.2836336892252851e-05, "loss": 0.4165, "step": 4814 }, { "epoch": 0.5915960191669738, "grad_norm": 0.3927838426353121, "learning_rate": 1.2829971074535597e-05, "loss": 0.3972, "step": 4815 }, { "epoch": 0.5917188843838309, "grad_norm": 0.3110065525156381, "learning_rate": 1.282360565598109e-05, "loss": 0.4184, "step": 4816 }, { "epoch": 0.591841749600688, "grad_norm": 0.42105560603865066, "learning_rate": 1.2817240637760206e-05, "loss": 0.4385, "step": 4817 }, { "epoch": 0.5919646148175451, "grad_norm": 0.48458556518163204, "learning_rate": 1.2810876021043753e-05, "loss": 0.364, "step": 4818 }, { "epoch": 0.5920874800344023, "grad_norm": 0.343543474111817, "learning_rate": 1.280451180700246e-05, "loss": 0.3689, "step": 4819 }, { "epoch": 0.5922103452512594, "grad_norm": 0.3758961109866574, "learning_rate": 1.2798147996806982e-05, "loss": 0.3963, "step": 4820 }, { "epoch": 0.5923332104681165, "grad_norm": 0.29310474376262174, "learning_rate": 1.2791784591627893e-05, "loss": 0.3735, "step": 4821 }, { "epoch": 0.5924560756849736, "grad_norm": 0.3340017326234202, "learning_rate": 1.2785421592635716e-05, "loss": 0.3206, "step": 4822 }, { "epoch": 0.5925789409018307, "grad_norm": 0.32134282630626126, "learning_rate": 1.2779059001000873e-05, "loss": 0.3312, "step": 4823 }, { "epoch": 0.5927018061186878, "grad_norm": 0.39140527433165634, "learning_rate": 1.2772696817893726e-05, "loss": 0.3774, "step": 4824 }, { "epoch": 0.592824671335545, "grad_norm": 0.34135454218953887, "learning_rate": 1.2766335044484548e-05, "loss": 0.3823, "step": 4825 }, { "epoch": 0.5929475365524021, "grad_norm": 0.3423318040646892, "learning_rate": 1.2759973681943559e-05, "loss": 0.4222, "step": 4826 }, { "epoch": 0.5930704017692591, "grad_norm": 0.34184271095455976, "learning_rate": 1.2753612731440882e-05, "loss": 0.3984, "step": 4827 }, { "epoch": 0.5931932669861162, "grad_norm": 0.3298230206049942, "learning_rate": 1.2747252194146575e-05, "loss": 0.4681, "step": 4828 }, { "epoch": 0.5933161322029733, "grad_norm": 0.33118252863480657, "learning_rate": 1.274089207123061e-05, "loss": 0.3657, "step": 4829 }, { "epoch": 0.5934389974198304, "grad_norm": 0.32973943218569657, "learning_rate": 1.2734532363862907e-05, "loss": 0.3571, "step": 4830 }, { "epoch": 0.5935618626366875, "grad_norm": 0.4048753822151617, "learning_rate": 1.2728173073213282e-05, "loss": 0.4132, "step": 4831 }, { "epoch": 0.5936847278535446, "grad_norm": 0.45958211343041494, "learning_rate": 1.2721814200451483e-05, "loss": 0.44, "step": 4832 }, { "epoch": 0.5938075930704018, "grad_norm": 0.320738139380405, "learning_rate": 1.2715455746747188e-05, "loss": 0.3636, "step": 4833 }, { "epoch": 0.5939304582872589, "grad_norm": 0.3364927970009936, "learning_rate": 1.2709097713269996e-05, "loss": 0.3277, "step": 4834 }, { "epoch": 0.594053323504116, "grad_norm": 0.42599390481321947, "learning_rate": 1.2702740101189423e-05, "loss": 0.3605, "step": 4835 }, { "epoch": 0.5941761887209731, "grad_norm": 0.33475734084026537, "learning_rate": 1.2696382911674905e-05, "loss": 0.4611, "step": 4836 }, { "epoch": 0.5942990539378302, "grad_norm": 0.38105319756717626, "learning_rate": 1.2690026145895814e-05, "loss": 0.4604, "step": 4837 }, { "epoch": 0.5944219191546873, "grad_norm": 0.33361202856863054, "learning_rate": 1.2683669805021437e-05, "loss": 0.3814, "step": 4838 }, { "epoch": 0.5945447843715445, "grad_norm": 0.4086164737540137, "learning_rate": 1.2677313890220974e-05, "loss": 0.44, "step": 4839 }, { "epoch": 0.5946676495884016, "grad_norm": 0.41711189911936836, "learning_rate": 1.2670958402663552e-05, "loss": 0.419, "step": 4840 }, { "epoch": 0.5947905148052587, "grad_norm": 0.38085246067249584, "learning_rate": 1.2664603343518232e-05, "loss": 0.4697, "step": 4841 }, { "epoch": 0.5949133800221157, "grad_norm": 0.3148171465787605, "learning_rate": 1.2658248713953983e-05, "loss": 0.3941, "step": 4842 }, { "epoch": 0.5950362452389728, "grad_norm": 0.3566880579382173, "learning_rate": 1.2651894515139697e-05, "loss": 0.3918, "step": 4843 }, { "epoch": 0.5951591104558299, "grad_norm": 0.3689879205238114, "learning_rate": 1.2645540748244183e-05, "loss": 0.3096, "step": 4844 }, { "epoch": 0.595281975672687, "grad_norm": 0.31793833377496955, "learning_rate": 1.2639187414436182e-05, "loss": 0.363, "step": 4845 }, { "epoch": 0.5954048408895442, "grad_norm": 0.31536298327945245, "learning_rate": 1.2632834514884347e-05, "loss": 0.3253, "step": 4846 }, { "epoch": 0.5955277061064013, "grad_norm": 0.37821048358676124, "learning_rate": 1.2626482050757251e-05, "loss": 0.3709, "step": 4847 }, { "epoch": 0.5956505713232584, "grad_norm": 0.3762572684727129, "learning_rate": 1.2620130023223382e-05, "loss": 0.4239, "step": 4848 }, { "epoch": 0.5957734365401155, "grad_norm": 0.3379445057738818, "learning_rate": 1.2613778433451168e-05, "loss": 0.4276, "step": 4849 }, { "epoch": 0.5958963017569726, "grad_norm": 0.32047226440664145, "learning_rate": 1.2607427282608936e-05, "loss": 0.3571, "step": 4850 }, { "epoch": 0.5960191669738297, "grad_norm": 0.2776413593058544, "learning_rate": 1.2601076571864934e-05, "loss": 0.4387, "step": 4851 }, { "epoch": 0.5961420321906868, "grad_norm": 0.3193456699213906, "learning_rate": 1.2594726302387345e-05, "loss": 0.3583, "step": 4852 }, { "epoch": 0.596264897407544, "grad_norm": 0.3976733179888361, "learning_rate": 1.2588376475344252e-05, "loss": 0.3946, "step": 4853 }, { "epoch": 0.5963877626244011, "grad_norm": 0.3096301259578248, "learning_rate": 1.2582027091903667e-05, "loss": 0.3515, "step": 4854 }, { "epoch": 0.5965106278412582, "grad_norm": 0.3043882120451896, "learning_rate": 1.2575678153233512e-05, "loss": 0.327, "step": 4855 }, { "epoch": 0.5966334930581152, "grad_norm": 0.3448321318811927, "learning_rate": 1.2569329660501643e-05, "loss": 0.3659, "step": 4856 }, { "epoch": 0.5967563582749723, "grad_norm": 0.36190908214853856, "learning_rate": 1.256298161487582e-05, "loss": 0.483, "step": 4857 }, { "epoch": 0.5968792234918294, "grad_norm": 0.37017696733584693, "learning_rate": 1.2556634017523727e-05, "loss": 0.4383, "step": 4858 }, { "epoch": 0.5970020887086865, "grad_norm": 0.44725930957098153, "learning_rate": 1.255028686961295e-05, "loss": 0.4709, "step": 4859 }, { "epoch": 0.5971249539255437, "grad_norm": 0.30973065220369966, "learning_rate": 1.2543940172311026e-05, "loss": 0.3901, "step": 4860 }, { "epoch": 0.5972478191424008, "grad_norm": 0.3422092862419657, "learning_rate": 1.2537593926785378e-05, "loss": 0.396, "step": 4861 }, { "epoch": 0.5973706843592579, "grad_norm": 0.3141016713784206, "learning_rate": 1.2531248134203357e-05, "loss": 0.4605, "step": 4862 }, { "epoch": 0.597493549576115, "grad_norm": 0.42493745418374856, "learning_rate": 1.252490279573223e-05, "loss": 0.4215, "step": 4863 }, { "epoch": 0.5976164147929721, "grad_norm": 0.36099190590208385, "learning_rate": 1.2518557912539185e-05, "loss": 0.3321, "step": 4864 }, { "epoch": 0.5977392800098292, "grad_norm": 0.32797717827208683, "learning_rate": 1.2512213485791318e-05, "loss": 0.3507, "step": 4865 }, { "epoch": 0.5978621452266863, "grad_norm": 0.3721368399841488, "learning_rate": 1.2505869516655647e-05, "loss": 0.419, "step": 4866 }, { "epoch": 0.5979850104435435, "grad_norm": 0.42318647222480604, "learning_rate": 1.2499526006299097e-05, "loss": 0.423, "step": 4867 }, { "epoch": 0.5981078756604006, "grad_norm": 0.3888113874392465, "learning_rate": 1.249318295588853e-05, "loss": 0.3911, "step": 4868 }, { "epoch": 0.5982307408772577, "grad_norm": 0.2947161327933036, "learning_rate": 1.2486840366590698e-05, "loss": 0.3891, "step": 4869 }, { "epoch": 0.5983536060941148, "grad_norm": 0.35794988941824896, "learning_rate": 1.2480498239572285e-05, "loss": 0.4318, "step": 4870 }, { "epoch": 0.5984764713109718, "grad_norm": 0.522190795485326, "learning_rate": 1.2474156575999875e-05, "loss": 0.4377, "step": 4871 }, { "epoch": 0.5985993365278289, "grad_norm": 0.2942299866003092, "learning_rate": 1.2467815377039988e-05, "loss": 0.334, "step": 4872 }, { "epoch": 0.598722201744686, "grad_norm": 0.35404396801469623, "learning_rate": 1.246147464385904e-05, "loss": 0.4634, "step": 4873 }, { "epoch": 0.5988450669615432, "grad_norm": 0.3296495723175549, "learning_rate": 1.2455134377623361e-05, "loss": 0.4418, "step": 4874 }, { "epoch": 0.5989679321784003, "grad_norm": 0.49805311226883814, "learning_rate": 1.2448794579499216e-05, "loss": 0.4813, "step": 4875 }, { "epoch": 0.5990907973952574, "grad_norm": 0.34606354939255773, "learning_rate": 1.2442455250652763e-05, "loss": 0.3958, "step": 4876 }, { "epoch": 0.5992136626121145, "grad_norm": 0.36853523663581067, "learning_rate": 1.243611639225008e-05, "loss": 0.463, "step": 4877 }, { "epoch": 0.5993365278289716, "grad_norm": 0.3195540233024828, "learning_rate": 1.2429778005457154e-05, "loss": 0.39, "step": 4878 }, { "epoch": 0.5994593930458287, "grad_norm": 0.5125843347801651, "learning_rate": 1.2423440091439902e-05, "loss": 0.4171, "step": 4879 }, { "epoch": 0.5995822582626859, "grad_norm": 0.3237042089934817, "learning_rate": 1.2417102651364134e-05, "loss": 0.4005, "step": 4880 }, { "epoch": 0.599705123479543, "grad_norm": 0.41435040778741133, "learning_rate": 1.2410765686395584e-05, "loss": 0.4379, "step": 4881 }, { "epoch": 0.5998279886964001, "grad_norm": 0.38720293559370095, "learning_rate": 1.240442919769989e-05, "loss": 0.4146, "step": 4882 }, { "epoch": 0.5999508539132572, "grad_norm": 0.4023172096624101, "learning_rate": 1.2398093186442616e-05, "loss": 0.3981, "step": 4883 }, { "epoch": 0.6000737191301143, "grad_norm": 0.33522789485555654, "learning_rate": 1.2391757653789227e-05, "loss": 0.353, "step": 4884 }, { "epoch": 0.6001965843469713, "grad_norm": 0.2903902190380116, "learning_rate": 1.2385422600905102e-05, "loss": 0.3814, "step": 4885 }, { "epoch": 0.6003194495638284, "grad_norm": 0.4614439319063805, "learning_rate": 1.2379088028955525e-05, "loss": 0.4426, "step": 4886 }, { "epoch": 0.6004423147806855, "grad_norm": 0.3328897441132973, "learning_rate": 1.2372753939105716e-05, "loss": 0.333, "step": 4887 }, { "epoch": 0.6005651799975427, "grad_norm": 0.2897193649400806, "learning_rate": 1.2366420332520783e-05, "loss": 0.4136, "step": 4888 }, { "epoch": 0.6006880452143998, "grad_norm": 0.4424121914035639, "learning_rate": 1.236008721036575e-05, "loss": 0.4513, "step": 4889 }, { "epoch": 0.6008109104312569, "grad_norm": 0.3849323003927797, "learning_rate": 1.2353754573805549e-05, "loss": 0.4173, "step": 4890 }, { "epoch": 0.600933775648114, "grad_norm": 0.4114655125376129, "learning_rate": 1.2347422424005039e-05, "loss": 0.3572, "step": 4891 }, { "epoch": 0.6010566408649711, "grad_norm": 0.3389908514664158, "learning_rate": 1.2341090762128969e-05, "loss": 0.4413, "step": 4892 }, { "epoch": 0.6011795060818282, "grad_norm": 0.34949150477189106, "learning_rate": 1.2334759589342003e-05, "loss": 0.3798, "step": 4893 }, { "epoch": 0.6013023712986854, "grad_norm": 0.3387179231945924, "learning_rate": 1.2328428906808734e-05, "loss": 0.383, "step": 4894 }, { "epoch": 0.6014252365155425, "grad_norm": 0.35419401560902686, "learning_rate": 1.232209871569364e-05, "loss": 0.3582, "step": 4895 }, { "epoch": 0.6015481017323996, "grad_norm": 0.3352744132043871, "learning_rate": 1.2315769017161121e-05, "loss": 0.396, "step": 4896 }, { "epoch": 0.6016709669492567, "grad_norm": 0.2797620313505778, "learning_rate": 1.2309439812375479e-05, "loss": 0.3148, "step": 4897 }, { "epoch": 0.6017938321661138, "grad_norm": 0.3405552733982116, "learning_rate": 1.2303111102500938e-05, "loss": 0.3968, "step": 4898 }, { "epoch": 0.6019166973829709, "grad_norm": 0.30857704271894587, "learning_rate": 1.2296782888701621e-05, "loss": 0.3421, "step": 4899 }, { "epoch": 0.6020395625998279, "grad_norm": 0.3411601940740562, "learning_rate": 1.2290455172141563e-05, "loss": 0.3995, "step": 4900 }, { "epoch": 0.602162427816685, "grad_norm": 0.3938456568755591, "learning_rate": 1.2284127953984698e-05, "loss": 0.3964, "step": 4901 }, { "epoch": 0.6022852930335422, "grad_norm": 0.339482789209131, "learning_rate": 1.2277801235394885e-05, "loss": 0.3795, "step": 4902 }, { "epoch": 0.6024081582503993, "grad_norm": 0.37023475012763085, "learning_rate": 1.2271475017535884e-05, "loss": 0.3855, "step": 4903 }, { "epoch": 0.6025310234672564, "grad_norm": 0.3824723579251162, "learning_rate": 1.2265149301571357e-05, "loss": 0.3799, "step": 4904 }, { "epoch": 0.6026538886841135, "grad_norm": 0.3585219370480286, "learning_rate": 1.2258824088664874e-05, "loss": 0.4208, "step": 4905 }, { "epoch": 0.6027767539009706, "grad_norm": 0.37859216783918814, "learning_rate": 1.2252499379979928e-05, "loss": 0.4911, "step": 4906 }, { "epoch": 0.6028996191178277, "grad_norm": 0.3419184170622758, "learning_rate": 1.2246175176679902e-05, "loss": 0.3882, "step": 4907 }, { "epoch": 0.6030224843346849, "grad_norm": 0.3209400906709372, "learning_rate": 1.2239851479928096e-05, "loss": 0.4077, "step": 4908 }, { "epoch": 0.603145349551542, "grad_norm": 0.34945075658475233, "learning_rate": 1.2233528290887705e-05, "loss": 0.3777, "step": 4909 }, { "epoch": 0.6032682147683991, "grad_norm": 0.3842039482589539, "learning_rate": 1.2227205610721848e-05, "loss": 0.3508, "step": 4910 }, { "epoch": 0.6033910799852562, "grad_norm": 0.30957448342254196, "learning_rate": 1.2220883440593536e-05, "loss": 0.3875, "step": 4911 }, { "epoch": 0.6035139452021133, "grad_norm": 0.3876590518163434, "learning_rate": 1.221456178166569e-05, "loss": 0.3911, "step": 4912 }, { "epoch": 0.6036368104189704, "grad_norm": 0.38601987806344945, "learning_rate": 1.2208240635101137e-05, "loss": 0.4674, "step": 4913 }, { "epoch": 0.6037596756358274, "grad_norm": 0.35134998628196085, "learning_rate": 1.2201920002062617e-05, "loss": 0.3839, "step": 4914 }, { "epoch": 0.6038825408526846, "grad_norm": 0.36130389465138085, "learning_rate": 1.2195599883712768e-05, "loss": 0.444, "step": 4915 }, { "epoch": 0.6040054060695417, "grad_norm": 0.386162622008532, "learning_rate": 1.2189280281214128e-05, "loss": 0.4725, "step": 4916 }, { "epoch": 0.6041282712863988, "grad_norm": 0.35018826937405995, "learning_rate": 1.2182961195729158e-05, "loss": 0.4008, "step": 4917 }, { "epoch": 0.6042511365032559, "grad_norm": 0.35255993609086944, "learning_rate": 1.2176642628420206e-05, "loss": 0.4471, "step": 4918 }, { "epoch": 0.604374001720113, "grad_norm": 0.26595242109660594, "learning_rate": 1.2170324580449534e-05, "loss": 0.3934, "step": 4919 }, { "epoch": 0.6044968669369701, "grad_norm": 0.31416256249009433, "learning_rate": 1.2164007052979299e-05, "loss": 0.3879, "step": 4920 }, { "epoch": 0.6046197321538272, "grad_norm": 0.3498442926199312, "learning_rate": 1.2157690047171578e-05, "loss": 0.3271, "step": 4921 }, { "epoch": 0.6047425973706844, "grad_norm": 0.3806264601997422, "learning_rate": 1.215137356418834e-05, "loss": 0.4341, "step": 4922 }, { "epoch": 0.6048654625875415, "grad_norm": 0.3807524861547718, "learning_rate": 1.2145057605191462e-05, "loss": 0.4947, "step": 4923 }, { "epoch": 0.6049883278043986, "grad_norm": 0.3811660149728518, "learning_rate": 1.2138742171342716e-05, "loss": 0.3373, "step": 4924 }, { "epoch": 0.6051111930212557, "grad_norm": 0.3039140296856914, "learning_rate": 1.2132427263803797e-05, "loss": 0.4126, "step": 4925 }, { "epoch": 0.6052340582381128, "grad_norm": 0.333285269724002, "learning_rate": 1.2126112883736288e-05, "loss": 0.3983, "step": 4926 }, { "epoch": 0.6053569234549699, "grad_norm": 0.29662954681515885, "learning_rate": 1.2119799032301675e-05, "loss": 0.3178, "step": 4927 }, { "epoch": 0.6054797886718271, "grad_norm": 0.41738952748637975, "learning_rate": 1.2113485710661348e-05, "loss": 0.4369, "step": 4928 }, { "epoch": 0.6056026538886841, "grad_norm": 0.35965149541697955, "learning_rate": 1.2107172919976607e-05, "loss": 0.4021, "step": 4929 }, { "epoch": 0.6057255191055412, "grad_norm": 0.29691584538920296, "learning_rate": 1.2100860661408648e-05, "loss": 0.4177, "step": 4930 }, { "epoch": 0.6058483843223983, "grad_norm": 0.3077722354857694, "learning_rate": 1.2094548936118567e-05, "loss": 0.3685, "step": 4931 }, { "epoch": 0.6059712495392554, "grad_norm": 0.3858812227560357, "learning_rate": 1.2088237745267363e-05, "loss": 0.4599, "step": 4932 }, { "epoch": 0.6060941147561125, "grad_norm": 0.4290812869634005, "learning_rate": 1.2081927090015949e-05, "loss": 0.4066, "step": 4933 }, { "epoch": 0.6062169799729696, "grad_norm": 0.3182749579109412, "learning_rate": 1.2075616971525119e-05, "loss": 0.4139, "step": 4934 }, { "epoch": 0.6063398451898268, "grad_norm": 0.3248713582430178, "learning_rate": 1.2069307390955584e-05, "loss": 0.4051, "step": 4935 }, { "epoch": 0.6064627104066839, "grad_norm": 0.2927059223802141, "learning_rate": 1.2062998349467941e-05, "loss": 0.3502, "step": 4936 }, { "epoch": 0.606585575623541, "grad_norm": 0.2928988008108695, "learning_rate": 1.2056689848222713e-05, "loss": 0.4139, "step": 4937 }, { "epoch": 0.6067084408403981, "grad_norm": 0.3263828176449239, "learning_rate": 1.2050381888380297e-05, "loss": 0.4803, "step": 4938 }, { "epoch": 0.6068313060572552, "grad_norm": 0.38599422384059795, "learning_rate": 1.2044074471101e-05, "loss": 0.4614, "step": 4939 }, { "epoch": 0.6069541712741123, "grad_norm": 0.29431005342190053, "learning_rate": 1.2037767597545039e-05, "loss": 0.3055, "step": 4940 }, { "epoch": 0.6070770364909694, "grad_norm": 0.3546627924715086, "learning_rate": 1.2031461268872518e-05, "loss": 0.3551, "step": 4941 }, { "epoch": 0.6071999017078266, "grad_norm": 0.34474914106156485, "learning_rate": 1.2025155486243444e-05, "loss": 0.3946, "step": 4942 }, { "epoch": 0.6073227669246837, "grad_norm": 0.43223210433612164, "learning_rate": 1.2018850250817719e-05, "loss": 0.4619, "step": 4943 }, { "epoch": 0.6074456321415407, "grad_norm": 0.4397165823972759, "learning_rate": 1.2012545563755165e-05, "loss": 0.3952, "step": 4944 }, { "epoch": 0.6075684973583978, "grad_norm": 0.33148439750620146, "learning_rate": 1.2006241426215479e-05, "loss": 0.3747, "step": 4945 }, { "epoch": 0.6076913625752549, "grad_norm": 0.37395203713778535, "learning_rate": 1.1999937839358268e-05, "loss": 0.4609, "step": 4946 }, { "epoch": 0.607814227792112, "grad_norm": 0.38961469251353587, "learning_rate": 1.1993634804343032e-05, "loss": 0.3835, "step": 4947 }, { "epoch": 0.6079370930089691, "grad_norm": 0.352811311489878, "learning_rate": 1.198733232232918e-05, "loss": 0.4156, "step": 4948 }, { "epoch": 0.6080599582258263, "grad_norm": 0.3204566093852573, "learning_rate": 1.198103039447601e-05, "loss": 0.4062, "step": 4949 }, { "epoch": 0.6081828234426834, "grad_norm": 0.3142026229895584, "learning_rate": 1.1974729021942717e-05, "loss": 0.3469, "step": 4950 }, { "epoch": 0.6083056886595405, "grad_norm": 3.463631366050983, "learning_rate": 1.1968428205888397e-05, "loss": 0.4875, "step": 4951 }, { "epoch": 0.6084285538763976, "grad_norm": 0.3156390337117551, "learning_rate": 1.1962127947472055e-05, "loss": 0.3475, "step": 4952 }, { "epoch": 0.6085514190932547, "grad_norm": 0.4620963181196171, "learning_rate": 1.1955828247852576e-05, "loss": 0.4503, "step": 4953 }, { "epoch": 0.6086742843101118, "grad_norm": 0.3495085122657262, "learning_rate": 1.1949529108188746e-05, "loss": 0.4001, "step": 4954 }, { "epoch": 0.608797149526969, "grad_norm": 0.28652467510194585, "learning_rate": 1.1943230529639251e-05, "loss": 0.3633, "step": 4955 }, { "epoch": 0.6089200147438261, "grad_norm": 0.35606416149529885, "learning_rate": 1.193693251336268e-05, "loss": 0.329, "step": 4956 }, { "epoch": 0.6090428799606832, "grad_norm": 0.3620694490576209, "learning_rate": 1.1930635060517509e-05, "loss": 0.3953, "step": 4957 }, { "epoch": 0.6091657451775402, "grad_norm": 0.30135995978292507, "learning_rate": 1.192433817226211e-05, "loss": 0.395, "step": 4958 }, { "epoch": 0.6092886103943973, "grad_norm": 0.38651420036132744, "learning_rate": 1.191804184975476e-05, "loss": 0.4246, "step": 4959 }, { "epoch": 0.6094114756112544, "grad_norm": 0.34706703048208537, "learning_rate": 1.1911746094153627e-05, "loss": 0.3125, "step": 4960 }, { "epoch": 0.6095343408281115, "grad_norm": 0.30004492265277005, "learning_rate": 1.190545090661677e-05, "loss": 0.3732, "step": 4961 }, { "epoch": 0.6096572060449686, "grad_norm": 0.3257837183853238, "learning_rate": 1.1899156288302144e-05, "loss": 0.4155, "step": 4962 }, { "epoch": 0.6097800712618258, "grad_norm": 0.38709243193349485, "learning_rate": 1.1892862240367615e-05, "loss": 0.4292, "step": 4963 }, { "epoch": 0.6099029364786829, "grad_norm": 0.3827747436924763, "learning_rate": 1.1886568763970928e-05, "loss": 0.4218, "step": 4964 }, { "epoch": 0.61002580169554, "grad_norm": 0.36068335920192135, "learning_rate": 1.1880275860269723e-05, "loss": 0.346, "step": 4965 }, { "epoch": 0.6101486669123971, "grad_norm": 0.3277959059347873, "learning_rate": 1.1873983530421539e-05, "loss": 0.2982, "step": 4966 }, { "epoch": 0.6102715321292542, "grad_norm": 0.31619122315883635, "learning_rate": 1.1867691775583816e-05, "loss": 0.371, "step": 4967 }, { "epoch": 0.6103943973461113, "grad_norm": 0.30561620625237845, "learning_rate": 1.1861400596913877e-05, "loss": 0.352, "step": 4968 }, { "epoch": 0.6105172625629685, "grad_norm": 0.3461131309627027, "learning_rate": 1.1855109995568944e-05, "loss": 0.4822, "step": 4969 }, { "epoch": 0.6106401277798256, "grad_norm": 0.3951197968009017, "learning_rate": 1.1848819972706124e-05, "loss": 0.424, "step": 4970 }, { "epoch": 0.6107629929966827, "grad_norm": 0.38190318710681564, "learning_rate": 1.1842530529482441e-05, "loss": 0.382, "step": 4971 }, { "epoch": 0.6108858582135398, "grad_norm": 0.32151489548741835, "learning_rate": 1.183624166705479e-05, "loss": 0.3678, "step": 4972 }, { "epoch": 0.6110087234303968, "grad_norm": 0.33306957637072665, "learning_rate": 1.1829953386579967e-05, "loss": 0.3976, "step": 4973 }, { "epoch": 0.6111315886472539, "grad_norm": 0.3315889454380374, "learning_rate": 1.1823665689214657e-05, "loss": 0.3752, "step": 4974 }, { "epoch": 0.611254453864111, "grad_norm": 0.33734484358016353, "learning_rate": 1.1817378576115447e-05, "loss": 0.5074, "step": 4975 }, { "epoch": 0.6113773190809682, "grad_norm": 0.35159187872240594, "learning_rate": 1.1811092048438808e-05, "loss": 0.3549, "step": 4976 }, { "epoch": 0.6115001842978253, "grad_norm": 0.38138009502575915, "learning_rate": 1.1804806107341106e-05, "loss": 0.3856, "step": 4977 }, { "epoch": 0.6116230495146824, "grad_norm": 0.32883054142259194, "learning_rate": 1.1798520753978592e-05, "loss": 0.3455, "step": 4978 }, { "epoch": 0.6117459147315395, "grad_norm": 0.3646873288788655, "learning_rate": 1.179223598950743e-05, "loss": 0.441, "step": 4979 }, { "epoch": 0.6118687799483966, "grad_norm": 0.3578822740440125, "learning_rate": 1.1785951815083655e-05, "loss": 0.3785, "step": 4980 }, { "epoch": 0.6119916451652537, "grad_norm": 0.7173390684007126, "learning_rate": 1.1779668231863193e-05, "loss": 0.4862, "step": 4981 }, { "epoch": 0.6121145103821108, "grad_norm": 0.3885361145033683, "learning_rate": 1.1773385241001882e-05, "loss": 0.4166, "step": 4982 }, { "epoch": 0.612237375598968, "grad_norm": 0.4011203535122146, "learning_rate": 1.176710284365543e-05, "loss": 0.4036, "step": 4983 }, { "epoch": 0.6123602408158251, "grad_norm": 0.33602048536299084, "learning_rate": 1.1760821040979446e-05, "loss": 0.3704, "step": 4984 }, { "epoch": 0.6124831060326822, "grad_norm": 0.37122162194107006, "learning_rate": 1.1754539834129417e-05, "loss": 0.3612, "step": 4985 }, { "epoch": 0.6126059712495393, "grad_norm": 0.33187025694811634, "learning_rate": 1.1748259224260745e-05, "loss": 0.4511, "step": 4986 }, { "epoch": 0.6127288364663963, "grad_norm": 0.3446114907464896, "learning_rate": 1.1741979212528698e-05, "loss": 0.3025, "step": 4987 }, { "epoch": 0.6128517016832534, "grad_norm": 0.39389401204875896, "learning_rate": 1.1735699800088447e-05, "loss": 0.389, "step": 4988 }, { "epoch": 0.6129745669001105, "grad_norm": 0.3555932306602919, "learning_rate": 1.1729420988095042e-05, "loss": 0.3463, "step": 4989 }, { "epoch": 0.6130974321169677, "grad_norm": 0.33505159852350985, "learning_rate": 1.1723142777703442e-05, "loss": 0.3679, "step": 4990 }, { "epoch": 0.6132202973338248, "grad_norm": 0.4586680939774062, "learning_rate": 1.1716865170068475e-05, "loss": 0.4124, "step": 4991 }, { "epoch": 0.6133431625506819, "grad_norm": 0.39974634421743055, "learning_rate": 1.1710588166344872e-05, "loss": 0.3936, "step": 4992 }, { "epoch": 0.613466027767539, "grad_norm": 0.3240037266095869, "learning_rate": 1.1704311767687237e-05, "loss": 0.3863, "step": 4993 }, { "epoch": 0.6135888929843961, "grad_norm": 0.39214455618428296, "learning_rate": 1.1698035975250082e-05, "loss": 0.4038, "step": 4994 }, { "epoch": 0.6137117582012532, "grad_norm": 0.4456911223321744, "learning_rate": 1.1691760790187798e-05, "loss": 0.4543, "step": 4995 }, { "epoch": 0.6138346234181103, "grad_norm": 0.46346661996210015, "learning_rate": 1.168548621365466e-05, "loss": 0.4993, "step": 4996 }, { "epoch": 0.6139574886349675, "grad_norm": 0.4532488487965926, "learning_rate": 1.1679212246804831e-05, "loss": 0.3679, "step": 4997 }, { "epoch": 0.6140803538518246, "grad_norm": 0.7915827081335021, "learning_rate": 1.167293889079238e-05, "loss": 0.4981, "step": 4998 }, { "epoch": 0.6142032190686817, "grad_norm": 0.4258037444749209, "learning_rate": 1.1666666146771243e-05, "loss": 0.3751, "step": 4999 }, { "epoch": 0.6143260842855388, "grad_norm": 0.36806820208661684, "learning_rate": 1.1660394015895245e-05, "loss": 0.3196, "step": 5000 }, { "epoch": 0.6144489495023959, "grad_norm": 0.3432974420609852, "learning_rate": 1.1654122499318117e-05, "loss": 0.3863, "step": 5001 }, { "epoch": 0.6145718147192529, "grad_norm": 0.3636020044006925, "learning_rate": 1.1647851598193456e-05, "loss": 0.3917, "step": 5002 }, { "epoch": 0.61469467993611, "grad_norm": 0.3416368617994892, "learning_rate": 1.1641581313674752e-05, "loss": 0.3522, "step": 5003 }, { "epoch": 0.6148175451529672, "grad_norm": 0.3268966865801337, "learning_rate": 1.1635311646915385e-05, "loss": 0.3928, "step": 5004 }, { "epoch": 0.6149404103698243, "grad_norm": 0.4204379255680297, "learning_rate": 1.162904259906862e-05, "loss": 0.4705, "step": 5005 }, { "epoch": 0.6150632755866814, "grad_norm": 0.29246877279564676, "learning_rate": 1.162277417128761e-05, "loss": 0.3722, "step": 5006 }, { "epoch": 0.6151861408035385, "grad_norm": 0.34157124253028, "learning_rate": 1.1616506364725388e-05, "loss": 0.3728, "step": 5007 }, { "epoch": 0.6153090060203956, "grad_norm": 0.36773386987116685, "learning_rate": 1.1610239180534872e-05, "loss": 0.4073, "step": 5008 }, { "epoch": 0.6154318712372527, "grad_norm": 0.3394738474156739, "learning_rate": 1.1603972619868881e-05, "loss": 0.4746, "step": 5009 }, { "epoch": 0.6155547364541099, "grad_norm": 0.3568643833356791, "learning_rate": 1.15977066838801e-05, "loss": 0.3869, "step": 5010 }, { "epoch": 0.615677601670967, "grad_norm": 0.31624641411327936, "learning_rate": 1.1591441373721115e-05, "loss": 0.3671, "step": 5011 }, { "epoch": 0.6158004668878241, "grad_norm": 0.44694311328543784, "learning_rate": 1.1585176690544377e-05, "loss": 0.3882, "step": 5012 }, { "epoch": 0.6159233321046812, "grad_norm": 0.3328806546698004, "learning_rate": 1.1578912635502245e-05, "loss": 0.4232, "step": 5013 }, { "epoch": 0.6160461973215383, "grad_norm": 0.28111162978762627, "learning_rate": 1.1572649209746948e-05, "loss": 0.3543, "step": 5014 }, { "epoch": 0.6161690625383954, "grad_norm": 0.3316616078765195, "learning_rate": 1.1566386414430602e-05, "loss": 0.4602, "step": 5015 }, { "epoch": 0.6162919277552524, "grad_norm": 0.43319495243088363, "learning_rate": 1.1560124250705198e-05, "loss": 0.4626, "step": 5016 }, { "epoch": 0.6164147929721095, "grad_norm": 0.3743770132133581, "learning_rate": 1.1553862719722639e-05, "loss": 0.3797, "step": 5017 }, { "epoch": 0.6165376581889667, "grad_norm": 0.4320035390066381, "learning_rate": 1.1547601822634684e-05, "loss": 0.4398, "step": 5018 }, { "epoch": 0.6166605234058238, "grad_norm": 0.39283999841143546, "learning_rate": 1.1541341560592982e-05, "loss": 0.4723, "step": 5019 }, { "epoch": 0.6167833886226809, "grad_norm": 0.4453025137382944, "learning_rate": 1.1535081934749064e-05, "loss": 0.3838, "step": 5020 }, { "epoch": 0.616906253839538, "grad_norm": 0.3170388005715293, "learning_rate": 1.152882294625436e-05, "loss": 0.4007, "step": 5021 }, { "epoch": 0.6170291190563951, "grad_norm": 0.392745476958903, "learning_rate": 1.1522564596260165e-05, "loss": 0.3541, "step": 5022 }, { "epoch": 0.6171519842732522, "grad_norm": 0.3322243167452183, "learning_rate": 1.1516306885917656e-05, "loss": 0.3784, "step": 5023 }, { "epoch": 0.6172748494901094, "grad_norm": 0.3694993673141008, "learning_rate": 1.1510049816377904e-05, "loss": 0.481, "step": 5024 }, { "epoch": 0.6173977147069665, "grad_norm": 0.4365350945683974, "learning_rate": 1.1503793388791859e-05, "loss": 0.3949, "step": 5025 }, { "epoch": 0.6175205799238236, "grad_norm": 0.4363902666011406, "learning_rate": 1.1497537604310343e-05, "loss": 0.4829, "step": 5026 }, { "epoch": 0.6176434451406807, "grad_norm": 0.3803327715588811, "learning_rate": 1.1491282464084067e-05, "loss": 0.368, "step": 5027 }, { "epoch": 0.6177663103575378, "grad_norm": 0.3420427891024224, "learning_rate": 1.1485027969263632e-05, "loss": 0.4589, "step": 5028 }, { "epoch": 0.6178891755743949, "grad_norm": 0.38663987110452497, "learning_rate": 1.1478774120999507e-05, "loss": 0.4036, "step": 5029 }, { "epoch": 0.618012040791252, "grad_norm": 0.3632280665132752, "learning_rate": 1.1472520920442044e-05, "loss": 0.371, "step": 5030 }, { "epoch": 0.618134906008109, "grad_norm": 0.4190289392501705, "learning_rate": 1.146626836874148e-05, "loss": 0.442, "step": 5031 }, { "epoch": 0.6182577712249662, "grad_norm": 0.3896407963521837, "learning_rate": 1.1460016467047937e-05, "loss": 0.4966, "step": 5032 }, { "epoch": 0.6183806364418233, "grad_norm": 0.6290998424795223, "learning_rate": 1.1453765216511408e-05, "loss": 0.4218, "step": 5033 }, { "epoch": 0.6185035016586804, "grad_norm": 0.32095150119136334, "learning_rate": 1.1447514618281768e-05, "loss": 0.4247, "step": 5034 }, { "epoch": 0.6186263668755375, "grad_norm": 0.3556268063984005, "learning_rate": 1.1441264673508766e-05, "loss": 0.4092, "step": 5035 }, { "epoch": 0.6187492320923946, "grad_norm": 0.3134284379977821, "learning_rate": 1.1435015383342058e-05, "loss": 0.3707, "step": 5036 }, { "epoch": 0.6188720973092517, "grad_norm": 0.3128359397802778, "learning_rate": 1.1428766748931148e-05, "loss": 0.3944, "step": 5037 }, { "epoch": 0.6189949625261089, "grad_norm": 0.35314812556385, "learning_rate": 1.1422518771425435e-05, "loss": 0.4805, "step": 5038 }, { "epoch": 0.619117827742966, "grad_norm": 0.4083493407211913, "learning_rate": 1.1416271451974187e-05, "loss": 0.3625, "step": 5039 }, { "epoch": 0.6192406929598231, "grad_norm": 0.3442453253267225, "learning_rate": 1.1410024791726573e-05, "loss": 0.3894, "step": 5040 }, { "epoch": 0.6193635581766802, "grad_norm": 0.4665818319431293, "learning_rate": 1.1403778791831614e-05, "loss": 0.3899, "step": 5041 }, { "epoch": 0.6194864233935373, "grad_norm": 0.4127035361941586, "learning_rate": 1.1397533453438223e-05, "loss": 0.4621, "step": 5042 }, { "epoch": 0.6196092886103944, "grad_norm": 0.45048253275777866, "learning_rate": 1.139128877769519e-05, "loss": 0.4406, "step": 5043 }, { "epoch": 0.6197321538272516, "grad_norm": 0.330379237184144, "learning_rate": 1.1385044765751185e-05, "loss": 0.3822, "step": 5044 }, { "epoch": 0.6198550190441087, "grad_norm": 0.38504599848768895, "learning_rate": 1.1378801418754752e-05, "loss": 0.4166, "step": 5045 }, { "epoch": 0.6199778842609657, "grad_norm": 0.35237175712807306, "learning_rate": 1.1372558737854307e-05, "loss": 0.3828, "step": 5046 }, { "epoch": 0.6201007494778228, "grad_norm": 0.37254263086807377, "learning_rate": 1.1366316724198163e-05, "loss": 0.4196, "step": 5047 }, { "epoch": 0.6202236146946799, "grad_norm": 0.37493722190204903, "learning_rate": 1.1360075378934492e-05, "loss": 0.4999, "step": 5048 }, { "epoch": 0.620346479911537, "grad_norm": 0.3458755166965034, "learning_rate": 1.1353834703211351e-05, "loss": 0.3875, "step": 5049 }, { "epoch": 0.6204693451283941, "grad_norm": 0.32159552398069485, "learning_rate": 1.1347594698176666e-05, "loss": 0.382, "step": 5050 }, { "epoch": 0.6205922103452512, "grad_norm": 0.34914556910143874, "learning_rate": 1.1341355364978253e-05, "loss": 0.3737, "step": 5051 }, { "epoch": 0.6207150755621084, "grad_norm": 0.38015765672697605, "learning_rate": 1.1335116704763794e-05, "loss": 0.4192, "step": 5052 }, { "epoch": 0.6208379407789655, "grad_norm": 0.40030975708969657, "learning_rate": 1.132887871868085e-05, "loss": 0.4048, "step": 5053 }, { "epoch": 0.6209608059958226, "grad_norm": 0.3037315424821055, "learning_rate": 1.132264140787685e-05, "loss": 0.3561, "step": 5054 }, { "epoch": 0.6210836712126797, "grad_norm": 0.38539961970682296, "learning_rate": 1.1316404773499122e-05, "loss": 0.3878, "step": 5055 }, { "epoch": 0.6212065364295368, "grad_norm": 0.3511997956059637, "learning_rate": 1.1310168816694846e-05, "loss": 0.3748, "step": 5056 }, { "epoch": 0.6213294016463939, "grad_norm": 0.36329458202988224, "learning_rate": 1.1303933538611086e-05, "loss": 0.3784, "step": 5057 }, { "epoch": 0.6214522668632511, "grad_norm": 0.33082985014726624, "learning_rate": 1.1297698940394777e-05, "loss": 0.394, "step": 5058 }, { "epoch": 0.6215751320801082, "grad_norm": 0.4242807942574778, "learning_rate": 1.1291465023192742e-05, "loss": 0.4642, "step": 5059 }, { "epoch": 0.6216979972969652, "grad_norm": 0.34895868280144526, "learning_rate": 1.1285231788151667e-05, "loss": 0.442, "step": 5060 }, { "epoch": 0.6218208625138223, "grad_norm": 0.30384290153283317, "learning_rate": 1.1278999236418113e-05, "loss": 0.3937, "step": 5061 }, { "epoch": 0.6219437277306794, "grad_norm": 0.36683986705575944, "learning_rate": 1.1272767369138515e-05, "loss": 0.3447, "step": 5062 }, { "epoch": 0.6220665929475365, "grad_norm": 0.2847476475615627, "learning_rate": 1.126653618745919e-05, "loss": 0.3713, "step": 5063 }, { "epoch": 0.6221894581643936, "grad_norm": 0.4230960223944224, "learning_rate": 1.1260305692526321e-05, "loss": 0.4646, "step": 5064 }, { "epoch": 0.6223123233812508, "grad_norm": 0.40797851171698707, "learning_rate": 1.1254075885485962e-05, "loss": 0.3312, "step": 5065 }, { "epoch": 0.6224351885981079, "grad_norm": 0.31421631598956945, "learning_rate": 1.1247846767484057e-05, "loss": 0.3371, "step": 5066 }, { "epoch": 0.622558053814965, "grad_norm": 0.3105952190442728, "learning_rate": 1.1241618339666404e-05, "loss": 0.3769, "step": 5067 }, { "epoch": 0.6226809190318221, "grad_norm": 0.4282802282760476, "learning_rate": 1.1235390603178684e-05, "loss": 0.3731, "step": 5068 }, { "epoch": 0.6228037842486792, "grad_norm": 0.3554078909951909, "learning_rate": 1.1229163559166445e-05, "loss": 0.4023, "step": 5069 }, { "epoch": 0.6229266494655363, "grad_norm": 0.3458105818003198, "learning_rate": 1.1222937208775117e-05, "loss": 0.3699, "step": 5070 }, { "epoch": 0.6230495146823934, "grad_norm": 0.3685107916750809, "learning_rate": 1.1216711553149995e-05, "loss": 0.4106, "step": 5071 }, { "epoch": 0.6231723798992506, "grad_norm": 0.3922815028531289, "learning_rate": 1.1210486593436249e-05, "loss": 0.332, "step": 5072 }, { "epoch": 0.6232952451161077, "grad_norm": 0.3046187753788558, "learning_rate": 1.1204262330778912e-05, "loss": 0.3519, "step": 5073 }, { "epoch": 0.6234181103329648, "grad_norm": 0.2988552647724795, "learning_rate": 1.1198038766322907e-05, "loss": 0.4852, "step": 5074 }, { "epoch": 0.6235409755498218, "grad_norm": 0.39333172433993163, "learning_rate": 1.1191815901213015e-05, "loss": 0.3906, "step": 5075 }, { "epoch": 0.6236638407666789, "grad_norm": 0.36984140231987406, "learning_rate": 1.118559373659389e-05, "loss": 0.4165, "step": 5076 }, { "epoch": 0.623786705983536, "grad_norm": 0.28071937750668513, "learning_rate": 1.117937227361006e-05, "loss": 0.4212, "step": 5077 }, { "epoch": 0.6239095712003931, "grad_norm": 0.36112741803432885, "learning_rate": 1.1173151513405923e-05, "loss": 0.396, "step": 5078 }, { "epoch": 0.6240324364172503, "grad_norm": 0.3041491416866328, "learning_rate": 1.1166931457125744e-05, "loss": 0.4031, "step": 5079 }, { "epoch": 0.6241553016341074, "grad_norm": 0.5263450629018569, "learning_rate": 1.116071210591367e-05, "loss": 0.3678, "step": 5080 }, { "epoch": 0.6242781668509645, "grad_norm": 0.38328666515565396, "learning_rate": 1.1154493460913702e-05, "loss": 0.4044, "step": 5081 }, { "epoch": 0.6244010320678216, "grad_norm": 0.30188069953836516, "learning_rate": 1.1148275523269724e-05, "loss": 0.4536, "step": 5082 }, { "epoch": 0.6245238972846787, "grad_norm": 0.36091524184710055, "learning_rate": 1.1142058294125486e-05, "loss": 0.3918, "step": 5083 }, { "epoch": 0.6246467625015358, "grad_norm": 0.35250863875659, "learning_rate": 1.1135841774624605e-05, "loss": 0.3754, "step": 5084 }, { "epoch": 0.624769627718393, "grad_norm": 0.3785262275458326, "learning_rate": 1.1129625965910563e-05, "loss": 0.5092, "step": 5085 }, { "epoch": 0.6248924929352501, "grad_norm": 0.309370284431266, "learning_rate": 1.1123410869126731e-05, "loss": 0.3938, "step": 5086 }, { "epoch": 0.6250153581521072, "grad_norm": 0.4619937768446565, "learning_rate": 1.1117196485416328e-05, "loss": 0.5139, "step": 5087 }, { "epoch": 0.6251382233689643, "grad_norm": 0.40658399231343423, "learning_rate": 1.1110982815922449e-05, "loss": 0.3798, "step": 5088 }, { "epoch": 0.6252610885858213, "grad_norm": 0.3995768431580591, "learning_rate": 1.1104769861788062e-05, "loss": 0.4953, "step": 5089 }, { "epoch": 0.6253839538026784, "grad_norm": 0.36506468486258536, "learning_rate": 1.1098557624155997e-05, "loss": 0.3461, "step": 5090 }, { "epoch": 0.6255068190195355, "grad_norm": 0.44296435949408813, "learning_rate": 1.1092346104168955e-05, "loss": 0.3427, "step": 5091 }, { "epoch": 0.6256296842363926, "grad_norm": 0.35580719681791456, "learning_rate": 1.10861353029695e-05, "loss": 0.3979, "step": 5092 }, { "epoch": 0.6257525494532498, "grad_norm": 0.36701859629114936, "learning_rate": 1.107992522170008e-05, "loss": 0.4101, "step": 5093 }, { "epoch": 0.6258754146701069, "grad_norm": 0.3422717503430283, "learning_rate": 1.1073715861502994e-05, "loss": 0.4048, "step": 5094 }, { "epoch": 0.625998279886964, "grad_norm": 0.3887890920499324, "learning_rate": 1.106750722352041e-05, "loss": 0.3379, "step": 5095 }, { "epoch": 0.6261211451038211, "grad_norm": 0.36823408779786293, "learning_rate": 1.1061299308894367e-05, "loss": 0.4186, "step": 5096 }, { "epoch": 0.6262440103206782, "grad_norm": 0.3058215556325282, "learning_rate": 1.1055092118766776e-05, "loss": 0.4304, "step": 5097 }, { "epoch": 0.6263668755375353, "grad_norm": 0.3329634970662744, "learning_rate": 1.1048885654279407e-05, "loss": 0.3426, "step": 5098 }, { "epoch": 0.6264897407543925, "grad_norm": 0.3492722688021027, "learning_rate": 1.1042679916573898e-05, "loss": 0.3868, "step": 5099 }, { "epoch": 0.6266126059712496, "grad_norm": 0.3617538812777011, "learning_rate": 1.1036474906791746e-05, "loss": 0.3955, "step": 5100 }, { "epoch": 0.6267354711881067, "grad_norm": 0.37131418145943274, "learning_rate": 1.1030270626074338e-05, "loss": 0.4571, "step": 5101 }, { "epoch": 0.6268583364049638, "grad_norm": 0.3708756397058841, "learning_rate": 1.1024067075562903e-05, "loss": 0.3108, "step": 5102 }, { "epoch": 0.6269812016218209, "grad_norm": 0.4114566726618978, "learning_rate": 1.1017864256398547e-05, "loss": 0.4256, "step": 5103 }, { "epoch": 0.6271040668386779, "grad_norm": 0.3486998106320647, "learning_rate": 1.1011662169722227e-05, "loss": 0.3703, "step": 5104 }, { "epoch": 0.627226932055535, "grad_norm": 0.3326833585489006, "learning_rate": 1.1005460816674792e-05, "loss": 0.4513, "step": 5105 }, { "epoch": 0.6273497972723922, "grad_norm": 0.332278679029723, "learning_rate": 1.0999260198396936e-05, "loss": 0.4068, "step": 5106 }, { "epoch": 0.6274726624892493, "grad_norm": 0.29845618830005016, "learning_rate": 1.0993060316029216e-05, "loss": 0.4182, "step": 5107 }, { "epoch": 0.6275955277061064, "grad_norm": 0.3003682086587509, "learning_rate": 1.098686117071207e-05, "loss": 0.35, "step": 5108 }, { "epoch": 0.6277183929229635, "grad_norm": 0.3518820798891859, "learning_rate": 1.0980662763585783e-05, "loss": 0.3786, "step": 5109 }, { "epoch": 0.6278412581398206, "grad_norm": 0.29655393117946305, "learning_rate": 1.0974465095790516e-05, "loss": 0.3412, "step": 5110 }, { "epoch": 0.6279641233566777, "grad_norm": 0.32039956230410543, "learning_rate": 1.0968268168466282e-05, "loss": 0.4335, "step": 5111 }, { "epoch": 0.6280869885735348, "grad_norm": 0.3333828022109496, "learning_rate": 1.0962071982752977e-05, "loss": 0.322, "step": 5112 }, { "epoch": 0.628209853790392, "grad_norm": 0.3461715493768236, "learning_rate": 1.0955876539790344e-05, "loss": 0.3873, "step": 5113 }, { "epoch": 0.6283327190072491, "grad_norm": 0.3784956427114625, "learning_rate": 1.0949681840717997e-05, "loss": 0.4311, "step": 5114 }, { "epoch": 0.6284555842241062, "grad_norm": 0.3002867619807721, "learning_rate": 1.0943487886675401e-05, "loss": 0.3789, "step": 5115 }, { "epoch": 0.6285784494409633, "grad_norm": 0.33196481332249816, "learning_rate": 1.0937294678801905e-05, "loss": 0.3406, "step": 5116 }, { "epoch": 0.6287013146578204, "grad_norm": 0.3637054219874131, "learning_rate": 1.0931102218236707e-05, "loss": 0.4079, "step": 5117 }, { "epoch": 0.6288241798746774, "grad_norm": 0.39795079799763594, "learning_rate": 1.0924910506118868e-05, "loss": 0.4504, "step": 5118 }, { "epoch": 0.6289470450915345, "grad_norm": 0.41192428180015356, "learning_rate": 1.0918719543587307e-05, "loss": 0.4821, "step": 5119 }, { "epoch": 0.6290699103083917, "grad_norm": 0.36306166068111606, "learning_rate": 1.0912529331780824e-05, "loss": 0.3373, "step": 5120 }, { "epoch": 0.6291927755252488, "grad_norm": 0.3471824876682485, "learning_rate": 1.090633987183806e-05, "loss": 0.471, "step": 5121 }, { "epoch": 0.6293156407421059, "grad_norm": 0.35923215357014215, "learning_rate": 1.0900151164897532e-05, "loss": 0.33, "step": 5122 }, { "epoch": 0.629438505958963, "grad_norm": 0.3952439731816436, "learning_rate": 1.08939632120976e-05, "loss": 0.3832, "step": 5123 }, { "epoch": 0.6295613711758201, "grad_norm": 0.42771885473431165, "learning_rate": 1.0887776014576514e-05, "loss": 0.3837, "step": 5124 }, { "epoch": 0.6296842363926772, "grad_norm": 0.34697297033463426, "learning_rate": 1.088158957347236e-05, "loss": 0.5474, "step": 5125 }, { "epoch": 0.6298071016095343, "grad_norm": 0.3740013094450192, "learning_rate": 1.0875403889923098e-05, "loss": 0.3978, "step": 5126 }, { "epoch": 0.6299299668263915, "grad_norm": 0.33280279095948406, "learning_rate": 1.0869218965066536e-05, "loss": 0.3841, "step": 5127 }, { "epoch": 0.6300528320432486, "grad_norm": 0.31293151946843356, "learning_rate": 1.086303480004036e-05, "loss": 0.3631, "step": 5128 }, { "epoch": 0.6301756972601057, "grad_norm": 0.3524642474994389, "learning_rate": 1.0856851395982103e-05, "loss": 0.389, "step": 5129 }, { "epoch": 0.6302985624769628, "grad_norm": 0.3367532399204151, "learning_rate": 1.0850668754029157e-05, "loss": 0.4183, "step": 5130 }, { "epoch": 0.6304214276938199, "grad_norm": 0.3719580254710229, "learning_rate": 1.084448687531879e-05, "loss": 0.4136, "step": 5131 }, { "epoch": 0.630544292910677, "grad_norm": 0.31216028642797994, "learning_rate": 1.0838305760988113e-05, "loss": 0.3952, "step": 5132 }, { "epoch": 0.630667158127534, "grad_norm": 0.3338601915965321, "learning_rate": 1.0832125412174102e-05, "loss": 0.3622, "step": 5133 }, { "epoch": 0.6307900233443912, "grad_norm": 0.3516850393467012, "learning_rate": 1.0825945830013588e-05, "loss": 0.3388, "step": 5134 }, { "epoch": 0.6309128885612483, "grad_norm": 0.34728023014824855, "learning_rate": 1.0819767015643273e-05, "loss": 0.4089, "step": 5135 }, { "epoch": 0.6310357537781054, "grad_norm": 0.3660471826903317, "learning_rate": 1.0813588970199705e-05, "loss": 0.3749, "step": 5136 }, { "epoch": 0.6311586189949625, "grad_norm": 0.3550732501300865, "learning_rate": 1.0807411694819295e-05, "loss": 0.3539, "step": 5137 }, { "epoch": 0.6312814842118196, "grad_norm": 0.24596123405360812, "learning_rate": 1.0801235190638309e-05, "loss": 0.4045, "step": 5138 }, { "epoch": 0.6314043494286767, "grad_norm": 0.368958251554663, "learning_rate": 1.0795059458792886e-05, "loss": 0.4241, "step": 5139 }, { "epoch": 0.6315272146455339, "grad_norm": 0.3205686841939237, "learning_rate": 1.0788884500419005e-05, "loss": 0.3903, "step": 5140 }, { "epoch": 0.631650079862391, "grad_norm": 0.32689975477363203, "learning_rate": 1.0782710316652512e-05, "loss": 0.3794, "step": 5141 }, { "epoch": 0.6317729450792481, "grad_norm": 0.30237570131480923, "learning_rate": 1.0776536908629098e-05, "loss": 0.3618, "step": 5142 }, { "epoch": 0.6318958102961052, "grad_norm": 0.3519882169358943, "learning_rate": 1.0770364277484335e-05, "loss": 0.381, "step": 5143 }, { "epoch": 0.6320186755129623, "grad_norm": 0.39771475927756383, "learning_rate": 1.0764192424353634e-05, "loss": 0.496, "step": 5144 }, { "epoch": 0.6321415407298194, "grad_norm": 0.35129747731820465, "learning_rate": 1.0758021350372268e-05, "loss": 0.3489, "step": 5145 }, { "epoch": 0.6322644059466765, "grad_norm": 0.3176161438476041, "learning_rate": 1.0751851056675358e-05, "loss": 0.3457, "step": 5146 }, { "epoch": 0.6323872711635335, "grad_norm": 0.2894880258645306, "learning_rate": 1.0745681544397902e-05, "loss": 0.3416, "step": 5147 }, { "epoch": 0.6325101363803907, "grad_norm": 0.37849207927683, "learning_rate": 1.0739512814674734e-05, "loss": 0.3483, "step": 5148 }, { "epoch": 0.6326330015972478, "grad_norm": 0.34014104926744004, "learning_rate": 1.0733344868640556e-05, "loss": 0.3618, "step": 5149 }, { "epoch": 0.6327558668141049, "grad_norm": 0.3270593603243725, "learning_rate": 1.072717770742991e-05, "loss": 0.3663, "step": 5150 }, { "epoch": 0.632878732030962, "grad_norm": 0.3122496468564147, "learning_rate": 1.0721011332177223e-05, "loss": 0.3746, "step": 5151 }, { "epoch": 0.6330015972478191, "grad_norm": 0.3663270687053174, "learning_rate": 1.0714845744016749e-05, "loss": 0.4889, "step": 5152 }, { "epoch": 0.6331244624646762, "grad_norm": 0.34108332501764776, "learning_rate": 1.0708680944082608e-05, "loss": 0.417, "step": 5153 }, { "epoch": 0.6332473276815334, "grad_norm": 0.27772178912682055, "learning_rate": 1.0702516933508779e-05, "loss": 0.3576, "step": 5154 }, { "epoch": 0.6333701928983905, "grad_norm": 0.3282652732822789, "learning_rate": 1.0696353713429092e-05, "loss": 0.3226, "step": 5155 }, { "epoch": 0.6334930581152476, "grad_norm": 0.3886117670317126, "learning_rate": 1.0690191284977229e-05, "loss": 0.3769, "step": 5156 }, { "epoch": 0.6336159233321047, "grad_norm": 0.38707436556637287, "learning_rate": 1.0684029649286721e-05, "loss": 0.429, "step": 5157 }, { "epoch": 0.6337387885489618, "grad_norm": 0.3253094759518557, "learning_rate": 1.0677868807490977e-05, "loss": 0.3655, "step": 5158 }, { "epoch": 0.6338616537658189, "grad_norm": 0.40078681927815807, "learning_rate": 1.0671708760723236e-05, "loss": 0.3963, "step": 5159 }, { "epoch": 0.633984518982676, "grad_norm": 0.30411564273044633, "learning_rate": 1.0665549510116597e-05, "loss": 0.3265, "step": 5160 }, { "epoch": 0.6341073841995332, "grad_norm": 0.28619529288026113, "learning_rate": 1.065939105680401e-05, "loss": 0.3098, "step": 5161 }, { "epoch": 0.6342302494163902, "grad_norm": 0.4008285623316501, "learning_rate": 1.0653233401918296e-05, "loss": 0.4292, "step": 5162 }, { "epoch": 0.6343531146332473, "grad_norm": 0.3472409238750848, "learning_rate": 1.0647076546592105e-05, "loss": 0.4209, "step": 5163 }, { "epoch": 0.6344759798501044, "grad_norm": 0.3134333143493678, "learning_rate": 1.0640920491957957e-05, "loss": 0.43, "step": 5164 }, { "epoch": 0.6345988450669615, "grad_norm": 0.37260950785407726, "learning_rate": 1.063476523914821e-05, "loss": 0.3625, "step": 5165 }, { "epoch": 0.6347217102838186, "grad_norm": 0.37057161845581826, "learning_rate": 1.062861078929509e-05, "loss": 0.428, "step": 5166 }, { "epoch": 0.6348445755006757, "grad_norm": 0.34236778958155595, "learning_rate": 1.0622457143530666e-05, "loss": 0.3424, "step": 5167 }, { "epoch": 0.6349674407175329, "grad_norm": 0.3547738786647238, "learning_rate": 1.0616304302986863e-05, "loss": 0.3136, "step": 5168 }, { "epoch": 0.63509030593439, "grad_norm": 0.3097738144922763, "learning_rate": 1.0610152268795446e-05, "loss": 0.3825, "step": 5169 }, { "epoch": 0.6352131711512471, "grad_norm": 0.3300957928564699, "learning_rate": 1.0604001042088057e-05, "loss": 0.3683, "step": 5170 }, { "epoch": 0.6353360363681042, "grad_norm": 0.37006196847157746, "learning_rate": 1.0597850623996169e-05, "loss": 0.3857, "step": 5171 }, { "epoch": 0.6354589015849613, "grad_norm": 0.3660981475113656, "learning_rate": 1.0591701015651104e-05, "loss": 0.3445, "step": 5172 }, { "epoch": 0.6355817668018184, "grad_norm": 0.3202088669197015, "learning_rate": 1.0585552218184054e-05, "loss": 0.4562, "step": 5173 }, { "epoch": 0.6357046320186756, "grad_norm": 0.3520514925633884, "learning_rate": 1.0579404232726041e-05, "loss": 0.331, "step": 5174 }, { "epoch": 0.6358274972355327, "grad_norm": 0.35940898818035766, "learning_rate": 1.0573257060407955e-05, "loss": 0.3643, "step": 5175 }, { "epoch": 0.6359503624523898, "grad_norm": 0.4482812600431493, "learning_rate": 1.0567110702360514e-05, "loss": 0.4133, "step": 5176 }, { "epoch": 0.6360732276692468, "grad_norm": 0.3336496046409743, "learning_rate": 1.056096515971432e-05, "loss": 0.4033, "step": 5177 }, { "epoch": 0.6361960928861039, "grad_norm": 0.35040993294701406, "learning_rate": 1.0554820433599797e-05, "loss": 0.3685, "step": 5178 }, { "epoch": 0.636318958102961, "grad_norm": 0.3013640360430862, "learning_rate": 1.0548676525147226e-05, "loss": 0.39, "step": 5179 }, { "epoch": 0.6364418233198181, "grad_norm": 0.32981661582724253, "learning_rate": 1.0542533435486734e-05, "loss": 0.3672, "step": 5180 }, { "epoch": 0.6365646885366752, "grad_norm": 0.32134738529957685, "learning_rate": 1.0536391165748315e-05, "loss": 0.3686, "step": 5181 }, { "epoch": 0.6366875537535324, "grad_norm": 0.40930601205331024, "learning_rate": 1.0530249717061795e-05, "loss": 0.3906, "step": 5182 }, { "epoch": 0.6368104189703895, "grad_norm": 0.3099885453943226, "learning_rate": 1.052410909055685e-05, "loss": 0.4036, "step": 5183 }, { "epoch": 0.6369332841872466, "grad_norm": 0.3728948589887258, "learning_rate": 1.051796928736301e-05, "loss": 0.3583, "step": 5184 }, { "epoch": 0.6370561494041037, "grad_norm": 0.3118262876321732, "learning_rate": 1.0511830308609655e-05, "loss": 0.4039, "step": 5185 }, { "epoch": 0.6371790146209608, "grad_norm": 0.3454001565020762, "learning_rate": 1.0505692155426007e-05, "loss": 0.3802, "step": 5186 }, { "epoch": 0.6373018798378179, "grad_norm": 0.47684665308591073, "learning_rate": 1.049955482894114e-05, "loss": 0.4198, "step": 5187 }, { "epoch": 0.6374247450546751, "grad_norm": 0.36630239416575094, "learning_rate": 1.049341833028397e-05, "loss": 0.4069, "step": 5188 }, { "epoch": 0.6375476102715322, "grad_norm": 0.3937305145404306, "learning_rate": 1.0487282660583278e-05, "loss": 0.3747, "step": 5189 }, { "epoch": 0.6376704754883893, "grad_norm": 0.37462478969128526, "learning_rate": 1.0481147820967677e-05, "loss": 0.385, "step": 5190 }, { "epoch": 0.6377933407052463, "grad_norm": 0.46136951057709064, "learning_rate": 1.0475013812565628e-05, "loss": 0.3377, "step": 5191 }, { "epoch": 0.6379162059221034, "grad_norm": 0.3937464854111679, "learning_rate": 1.0468880636505437e-05, "loss": 0.4017, "step": 5192 }, { "epoch": 0.6380390711389605, "grad_norm": 0.33299081428980754, "learning_rate": 1.0462748293915271e-05, "loss": 0.3994, "step": 5193 }, { "epoch": 0.6381619363558176, "grad_norm": 0.3881572503310139, "learning_rate": 1.0456616785923131e-05, "loss": 0.3545, "step": 5194 }, { "epoch": 0.6382848015726748, "grad_norm": 0.4496420936067343, "learning_rate": 1.0450486113656862e-05, "loss": 0.3621, "step": 5195 }, { "epoch": 0.6384076667895319, "grad_norm": 0.40272716767766686, "learning_rate": 1.0444356278244178e-05, "loss": 0.4167, "step": 5196 }, { "epoch": 0.638530532006389, "grad_norm": 0.39587868507384405, "learning_rate": 1.0438227280812608e-05, "loss": 0.3513, "step": 5197 }, { "epoch": 0.6386533972232461, "grad_norm": 0.34147281505313676, "learning_rate": 1.0432099122489547e-05, "loss": 0.3551, "step": 5198 }, { "epoch": 0.6387762624401032, "grad_norm": 0.3574021640710382, "learning_rate": 1.0425971804402227e-05, "loss": 0.4212, "step": 5199 }, { "epoch": 0.6388991276569603, "grad_norm": 0.34685267686921273, "learning_rate": 1.0419845327677731e-05, "loss": 0.3394, "step": 5200 }, { "epoch": 0.6390219928738174, "grad_norm": 0.3258546428439126, "learning_rate": 1.0413719693442984e-05, "loss": 0.4408, "step": 5201 }, { "epoch": 0.6391448580906746, "grad_norm": 0.3014437249296628, "learning_rate": 1.0407594902824751e-05, "loss": 0.362, "step": 5202 }, { "epoch": 0.6392677233075317, "grad_norm": 0.3124451983691304, "learning_rate": 1.0401470956949656e-05, "loss": 0.3528, "step": 5203 }, { "epoch": 0.6393905885243888, "grad_norm": 0.3358004496804436, "learning_rate": 1.0395347856944158e-05, "loss": 0.3822, "step": 5204 }, { "epoch": 0.6395134537412459, "grad_norm": 0.40104971536893824, "learning_rate": 1.0389225603934561e-05, "loss": 0.3592, "step": 5205 }, { "epoch": 0.6396363189581029, "grad_norm": 0.34046945576108995, "learning_rate": 1.038310419904701e-05, "loss": 0.3804, "step": 5206 }, { "epoch": 0.63975918417496, "grad_norm": 0.3076060205538603, "learning_rate": 1.0376983643407497e-05, "loss": 0.3397, "step": 5207 }, { "epoch": 0.6398820493918171, "grad_norm": 0.40051311387626576, "learning_rate": 1.0370863938141864e-05, "loss": 0.3961, "step": 5208 }, { "epoch": 0.6400049146086743, "grad_norm": 0.466556582692077, "learning_rate": 1.036474508437579e-05, "loss": 0.4063, "step": 5209 }, { "epoch": 0.6401277798255314, "grad_norm": 0.3563135908111393, "learning_rate": 1.0358627083234797e-05, "loss": 0.3777, "step": 5210 }, { "epoch": 0.6402506450423885, "grad_norm": 0.27622765991425763, "learning_rate": 1.0352509935844248e-05, "loss": 0.3887, "step": 5211 }, { "epoch": 0.6403735102592456, "grad_norm": 0.41951760166285285, "learning_rate": 1.0346393643329359e-05, "loss": 0.4467, "step": 5212 }, { "epoch": 0.6404963754761027, "grad_norm": 0.3496374309075019, "learning_rate": 1.0340278206815183e-05, "loss": 0.3273, "step": 5213 }, { "epoch": 0.6406192406929598, "grad_norm": 0.4115247983256326, "learning_rate": 1.0334163627426603e-05, "loss": 0.4081, "step": 5214 }, { "epoch": 0.640742105909817, "grad_norm": 0.3736908510088163, "learning_rate": 1.0328049906288371e-05, "loss": 0.3406, "step": 5215 }, { "epoch": 0.6408649711266741, "grad_norm": 0.3587930858137993, "learning_rate": 1.0321937044525059e-05, "loss": 0.3321, "step": 5216 }, { "epoch": 0.6409878363435312, "grad_norm": 0.39737164947924697, "learning_rate": 1.031582504326109e-05, "loss": 0.4197, "step": 5217 }, { "epoch": 0.6411107015603883, "grad_norm": 0.34803922735992304, "learning_rate": 1.0309713903620723e-05, "loss": 0.3724, "step": 5218 }, { "epoch": 0.6412335667772454, "grad_norm": 0.3606595407435514, "learning_rate": 1.0303603626728069e-05, "loss": 0.4512, "step": 5219 }, { "epoch": 0.6413564319941024, "grad_norm": 0.33359346275887675, "learning_rate": 1.0297494213707073e-05, "loss": 0.4316, "step": 5220 }, { "epoch": 0.6414792972109595, "grad_norm": 0.38176979654972637, "learning_rate": 1.0291385665681516e-05, "loss": 0.4116, "step": 5221 }, { "epoch": 0.6416021624278166, "grad_norm": 0.41756046186219464, "learning_rate": 1.0285277983775026e-05, "loss": 0.4026, "step": 5222 }, { "epoch": 0.6417250276446738, "grad_norm": 0.3902539976702111, "learning_rate": 1.0279171169111079e-05, "loss": 0.3741, "step": 5223 }, { "epoch": 0.6418478928615309, "grad_norm": 0.3587766453763497, "learning_rate": 1.0273065222812982e-05, "loss": 0.3643, "step": 5224 }, { "epoch": 0.641970758078388, "grad_norm": 0.2828822448151576, "learning_rate": 1.0266960146003878e-05, "loss": 0.3253, "step": 5225 }, { "epoch": 0.6420936232952451, "grad_norm": 0.33565644393528105, "learning_rate": 1.0260855939806759e-05, "loss": 0.328, "step": 5226 }, { "epoch": 0.6422164885121022, "grad_norm": 0.33139409767350736, "learning_rate": 1.0254752605344458e-05, "loss": 0.3852, "step": 5227 }, { "epoch": 0.6423393537289593, "grad_norm": 0.3456440954888656, "learning_rate": 1.0248650143739643e-05, "loss": 0.2726, "step": 5228 }, { "epoch": 0.6424622189458165, "grad_norm": 0.3701526965654287, "learning_rate": 1.024254855611482e-05, "loss": 0.4219, "step": 5229 }, { "epoch": 0.6425850841626736, "grad_norm": 0.4586119827008059, "learning_rate": 1.0236447843592334e-05, "loss": 0.3783, "step": 5230 }, { "epoch": 0.6427079493795307, "grad_norm": 0.3331262646639299, "learning_rate": 1.0230348007294377e-05, "loss": 0.4441, "step": 5231 }, { "epoch": 0.6428308145963878, "grad_norm": 0.374707475464431, "learning_rate": 1.0224249048342974e-05, "loss": 0.3803, "step": 5232 }, { "epoch": 0.6429536798132449, "grad_norm": 0.3796209862830334, "learning_rate": 1.0218150967859984e-05, "loss": 0.3948, "step": 5233 }, { "epoch": 0.643076545030102, "grad_norm": 0.5290148138098247, "learning_rate": 1.0212053766967107e-05, "loss": 0.3345, "step": 5234 }, { "epoch": 0.643199410246959, "grad_norm": 0.3237354758096236, "learning_rate": 1.0205957446785894e-05, "loss": 0.4459, "step": 5235 }, { "epoch": 0.6433222754638162, "grad_norm": 0.3396269650250772, "learning_rate": 1.0199862008437718e-05, "loss": 0.4481, "step": 5236 }, { "epoch": 0.6434451406806733, "grad_norm": 0.3356642762682053, "learning_rate": 1.0193767453043795e-05, "loss": 0.4016, "step": 5237 }, { "epoch": 0.6435680058975304, "grad_norm": 0.35939235436258504, "learning_rate": 1.0187673781725181e-05, "loss": 0.3459, "step": 5238 }, { "epoch": 0.6436908711143875, "grad_norm": 0.37150779395762296, "learning_rate": 1.0181580995602766e-05, "loss": 0.338, "step": 5239 }, { "epoch": 0.6438137363312446, "grad_norm": 0.4761156644320343, "learning_rate": 1.0175489095797278e-05, "loss": 0.4357, "step": 5240 }, { "epoch": 0.6439366015481017, "grad_norm": 0.3566977673574394, "learning_rate": 1.0169398083429277e-05, "loss": 0.3506, "step": 5241 }, { "epoch": 0.6440594667649588, "grad_norm": 0.3757783769798699, "learning_rate": 1.0163307959619176e-05, "loss": 0.4914, "step": 5242 }, { "epoch": 0.644182331981816, "grad_norm": 0.43876413831447864, "learning_rate": 1.015721872548721e-05, "loss": 0.5158, "step": 5243 }, { "epoch": 0.6443051971986731, "grad_norm": 0.3948902634558596, "learning_rate": 1.0151130382153453e-05, "loss": 0.4497, "step": 5244 }, { "epoch": 0.6444280624155302, "grad_norm": 0.35135512443619765, "learning_rate": 1.014504293073781e-05, "loss": 0.4046, "step": 5245 }, { "epoch": 0.6445509276323873, "grad_norm": 0.30579345987151235, "learning_rate": 1.0138956372360041e-05, "loss": 0.4397, "step": 5246 }, { "epoch": 0.6446737928492444, "grad_norm": 0.30216696148159666, "learning_rate": 1.013287070813972e-05, "loss": 0.3707, "step": 5247 }, { "epoch": 0.6447966580661015, "grad_norm": 0.3228788161210449, "learning_rate": 1.012678593919627e-05, "loss": 0.374, "step": 5248 }, { "epoch": 0.6449195232829585, "grad_norm": 0.36409061166830387, "learning_rate": 1.0120702066648938e-05, "loss": 0.3441, "step": 5249 }, { "epoch": 0.6450423884998157, "grad_norm": 0.3844985251292455, "learning_rate": 1.0114619091616822e-05, "loss": 0.4009, "step": 5250 }, { "epoch": 0.6451652537166728, "grad_norm": 0.4277721892496507, "learning_rate": 1.010853701521884e-05, "loss": 0.4441, "step": 5251 }, { "epoch": 0.6452881189335299, "grad_norm": 0.33232169500929026, "learning_rate": 1.0102455838573753e-05, "loss": 0.3291, "step": 5252 }, { "epoch": 0.645410984150387, "grad_norm": 0.37041365867927106, "learning_rate": 1.0096375562800146e-05, "loss": 0.3703, "step": 5253 }, { "epoch": 0.6455338493672441, "grad_norm": 0.327510666167101, "learning_rate": 1.0090296189016459e-05, "loss": 0.4527, "step": 5254 }, { "epoch": 0.6456567145841012, "grad_norm": 0.3844291889696476, "learning_rate": 1.0084217718340949e-05, "loss": 0.4379, "step": 5255 }, { "epoch": 0.6457795798009583, "grad_norm": 0.311828352554933, "learning_rate": 1.0078140151891705e-05, "loss": 0.371, "step": 5256 }, { "epoch": 0.6459024450178155, "grad_norm": 0.3175664359157761, "learning_rate": 1.0072063490786665e-05, "loss": 0.3564, "step": 5257 }, { "epoch": 0.6460253102346726, "grad_norm": 0.2883722965792918, "learning_rate": 1.0065987736143586e-05, "loss": 0.3877, "step": 5258 }, { "epoch": 0.6461481754515297, "grad_norm": 0.34861835920081374, "learning_rate": 1.0059912889080064e-05, "loss": 0.4279, "step": 5259 }, { "epoch": 0.6462710406683868, "grad_norm": 0.38572914613149023, "learning_rate": 1.0053838950713523e-05, "loss": 0.3641, "step": 5260 }, { "epoch": 0.6463939058852439, "grad_norm": 0.3191140729331245, "learning_rate": 1.0047765922161237e-05, "loss": 0.3604, "step": 5261 }, { "epoch": 0.646516771102101, "grad_norm": 0.3799791023639264, "learning_rate": 1.0041693804540293e-05, "loss": 0.4427, "step": 5262 }, { "epoch": 0.6466396363189582, "grad_norm": 0.4117500195936766, "learning_rate": 1.0035622598967618e-05, "loss": 0.3804, "step": 5263 }, { "epoch": 0.6467625015358152, "grad_norm": 0.27720300789925395, "learning_rate": 1.0029552306559965e-05, "loss": 0.3551, "step": 5264 }, { "epoch": 0.6468853667526723, "grad_norm": 0.44285919144731495, "learning_rate": 1.0023482928433934e-05, "loss": 0.45, "step": 5265 }, { "epoch": 0.6470082319695294, "grad_norm": 0.37306705171552845, "learning_rate": 1.0017414465705948e-05, "loss": 0.3892, "step": 5266 }, { "epoch": 0.6471310971863865, "grad_norm": 0.3925620267914454, "learning_rate": 1.0011346919492256e-05, "loss": 0.4499, "step": 5267 }, { "epoch": 0.6472539624032436, "grad_norm": 0.38187540934264735, "learning_rate": 1.0005280290908943e-05, "loss": 0.3687, "step": 5268 }, { "epoch": 0.6473768276201007, "grad_norm": 0.3405798153660756, "learning_rate": 9.999214581071933e-06, "loss": 0.46, "step": 5269 }, { "epoch": 0.6474996928369579, "grad_norm": 0.3580949102817832, "learning_rate": 9.993149791096968e-06, "loss": 0.3459, "step": 5270 }, { "epoch": 0.647622558053815, "grad_norm": 0.39106950777367566, "learning_rate": 9.987085922099628e-06, "loss": 0.4107, "step": 5271 }, { "epoch": 0.6477454232706721, "grad_norm": 0.35738523332055705, "learning_rate": 9.981022975195319e-06, "loss": 0.4212, "step": 5272 }, { "epoch": 0.6478682884875292, "grad_norm": 0.30571082849124137, "learning_rate": 9.974960951499288e-06, "loss": 0.4939, "step": 5273 }, { "epoch": 0.6479911537043863, "grad_norm": 0.3626688073893643, "learning_rate": 9.968899852126605e-06, "loss": 0.3446, "step": 5274 }, { "epoch": 0.6481140189212434, "grad_norm": 0.3765352982524763, "learning_rate": 9.962839678192163e-06, "loss": 0.351, "step": 5275 }, { "epoch": 0.6482368841381005, "grad_norm": 0.4072774908692263, "learning_rate": 9.956780430810692e-06, "loss": 0.429, "step": 5276 }, { "epoch": 0.6483597493549577, "grad_norm": 0.35422973538049757, "learning_rate": 9.950722111096758e-06, "loss": 0.3555, "step": 5277 }, { "epoch": 0.6484826145718147, "grad_norm": 0.4079140660084252, "learning_rate": 9.944664720164745e-06, "loss": 0.5024, "step": 5278 }, { "epoch": 0.6486054797886718, "grad_norm": 0.286417124312777, "learning_rate": 9.938608259128866e-06, "loss": 0.3815, "step": 5279 }, { "epoch": 0.6487283450055289, "grad_norm": 0.3179553677368645, "learning_rate": 9.932552729103183e-06, "loss": 0.3387, "step": 5280 }, { "epoch": 0.648851210222386, "grad_norm": 0.38843778503164805, "learning_rate": 9.926498131201556e-06, "loss": 0.4138, "step": 5281 }, { "epoch": 0.6489740754392431, "grad_norm": 0.3092093785952463, "learning_rate": 9.9204444665377e-06, "loss": 0.408, "step": 5282 }, { "epoch": 0.6490969406561002, "grad_norm": 0.3413759132693015, "learning_rate": 9.914391736225134e-06, "loss": 0.4261, "step": 5283 }, { "epoch": 0.6492198058729574, "grad_norm": 0.3328717037485162, "learning_rate": 9.908339941377232e-06, "loss": 0.3444, "step": 5284 }, { "epoch": 0.6493426710898145, "grad_norm": 0.3976590504071228, "learning_rate": 9.902289083107181e-06, "loss": 0.4658, "step": 5285 }, { "epoch": 0.6494655363066716, "grad_norm": 0.3374027308860944, "learning_rate": 9.89623916252799e-06, "loss": 0.3045, "step": 5286 }, { "epoch": 0.6495884015235287, "grad_norm": 0.28108362659183783, "learning_rate": 9.890190180752503e-06, "loss": 0.4219, "step": 5287 }, { "epoch": 0.6497112667403858, "grad_norm": 0.4123211508944347, "learning_rate": 9.884142138893399e-06, "loss": 0.4638, "step": 5288 }, { "epoch": 0.6498341319572429, "grad_norm": 0.4245251107297531, "learning_rate": 9.87809503806317e-06, "loss": 0.4343, "step": 5289 }, { "epoch": 0.6499569971741, "grad_norm": 0.3362353961840522, "learning_rate": 9.87204887937414e-06, "loss": 0.3026, "step": 5290 }, { "epoch": 0.6500798623909572, "grad_norm": 0.3441542808654465, "learning_rate": 9.86600366393846e-06, "loss": 0.2866, "step": 5291 }, { "epoch": 0.6502027276078143, "grad_norm": 0.3561106246566509, "learning_rate": 9.859959392868114e-06, "loss": 0.344, "step": 5292 }, { "epoch": 0.6503255928246713, "grad_norm": 0.2834623707338869, "learning_rate": 9.853916067274905e-06, "loss": 0.3696, "step": 5293 }, { "epoch": 0.6504484580415284, "grad_norm": 0.3364773519506986, "learning_rate": 9.847873688270462e-06, "loss": 0.4099, "step": 5294 }, { "epoch": 0.6505713232583855, "grad_norm": 0.3198169253374253, "learning_rate": 9.841832256966239e-06, "loss": 0.3378, "step": 5295 }, { "epoch": 0.6506941884752426, "grad_norm": 0.4062689350104478, "learning_rate": 9.835791774473522e-06, "loss": 0.4165, "step": 5296 }, { "epoch": 0.6508170536920997, "grad_norm": 0.3750443293578648, "learning_rate": 9.829752241903418e-06, "loss": 0.3668, "step": 5297 }, { "epoch": 0.6509399189089569, "grad_norm": 0.380738020663698, "learning_rate": 9.823713660366858e-06, "loss": 0.4228, "step": 5298 }, { "epoch": 0.651062784125814, "grad_norm": 0.3205568120839836, "learning_rate": 9.817676030974596e-06, "loss": 0.3091, "step": 5299 }, { "epoch": 0.6511856493426711, "grad_norm": 0.3346551335747322, "learning_rate": 9.811639354837224e-06, "loss": 0.4183, "step": 5300 }, { "epoch": 0.6513085145595282, "grad_norm": 0.34799434049558337, "learning_rate": 9.805603633065145e-06, "loss": 0.3557, "step": 5301 }, { "epoch": 0.6514313797763853, "grad_norm": 0.32978532748823075, "learning_rate": 9.799568866768584e-06, "loss": 0.3388, "step": 5302 }, { "epoch": 0.6515542449932424, "grad_norm": 0.3303236967369983, "learning_rate": 9.793535057057614e-06, "loss": 0.4196, "step": 5303 }, { "epoch": 0.6516771102100996, "grad_norm": 0.36882810295276863, "learning_rate": 9.787502205042102e-06, "loss": 0.4346, "step": 5304 }, { "epoch": 0.6517999754269567, "grad_norm": 0.3874996981243445, "learning_rate": 9.781470311831755e-06, "loss": 0.3912, "step": 5305 }, { "epoch": 0.6519228406438138, "grad_norm": 0.37838469026408394, "learning_rate": 9.7754393785361e-06, "loss": 0.3186, "step": 5306 }, { "epoch": 0.6520457058606709, "grad_norm": 0.4449226701469871, "learning_rate": 9.76940940626449e-06, "loss": 0.4166, "step": 5307 }, { "epoch": 0.6521685710775279, "grad_norm": 0.5370101345525976, "learning_rate": 9.763380396126099e-06, "loss": 0.4809, "step": 5308 }, { "epoch": 0.652291436294385, "grad_norm": 0.37850872018552784, "learning_rate": 9.757352349229922e-06, "loss": 0.363, "step": 5309 }, { "epoch": 0.6524143015112421, "grad_norm": 0.328082447598406, "learning_rate": 9.751325266684775e-06, "loss": 0.4567, "step": 5310 }, { "epoch": 0.6525371667280992, "grad_norm": 0.33413980199335586, "learning_rate": 9.745299149599314e-06, "loss": 0.3387, "step": 5311 }, { "epoch": 0.6526600319449564, "grad_norm": 0.34981869652033976, "learning_rate": 9.739273999081995e-06, "loss": 0.4578, "step": 5312 }, { "epoch": 0.6527828971618135, "grad_norm": 0.3688199499830948, "learning_rate": 9.733249816241108e-06, "loss": 0.4567, "step": 5313 }, { "epoch": 0.6529057623786706, "grad_norm": 0.411988139610438, "learning_rate": 9.727226602184759e-06, "loss": 0.3888, "step": 5314 }, { "epoch": 0.6530286275955277, "grad_norm": 0.3303345444986706, "learning_rate": 9.721204358020881e-06, "loss": 0.4919, "step": 5315 }, { "epoch": 0.6531514928123848, "grad_norm": 0.3432262262463667, "learning_rate": 9.71518308485723e-06, "loss": 0.3593, "step": 5316 }, { "epoch": 0.6532743580292419, "grad_norm": 0.3209480009492807, "learning_rate": 9.709162783801375e-06, "loss": 0.3864, "step": 5317 }, { "epoch": 0.6533972232460991, "grad_norm": 0.34072904598410997, "learning_rate": 9.70314345596071e-06, "loss": 0.4382, "step": 5318 }, { "epoch": 0.6535200884629562, "grad_norm": 0.48164403427916475, "learning_rate": 9.697125102442461e-06, "loss": 0.4493, "step": 5319 }, { "epoch": 0.6536429536798133, "grad_norm": 0.32655760091451735, "learning_rate": 9.691107724353656e-06, "loss": 0.3466, "step": 5320 }, { "epoch": 0.6537658188966704, "grad_norm": 0.37967650833637706, "learning_rate": 9.685091322801155e-06, "loss": 0.5348, "step": 5321 }, { "epoch": 0.6538886841135274, "grad_norm": 0.4133021744060527, "learning_rate": 9.67907589889164e-06, "loss": 0.4484, "step": 5322 }, { "epoch": 0.6540115493303845, "grad_norm": 0.3447038801151448, "learning_rate": 9.673061453731605e-06, "loss": 0.3787, "step": 5323 }, { "epoch": 0.6541344145472416, "grad_norm": 0.36771749627289213, "learning_rate": 9.66704798842737e-06, "loss": 0.3904, "step": 5324 }, { "epoch": 0.6542572797640988, "grad_norm": 0.35488486227645694, "learning_rate": 9.661035504085065e-06, "loss": 0.3712, "step": 5325 }, { "epoch": 0.6543801449809559, "grad_norm": 0.34640103200537964, "learning_rate": 9.655024001810662e-06, "loss": 0.4028, "step": 5326 }, { "epoch": 0.654503010197813, "grad_norm": 0.35377193329782913, "learning_rate": 9.64901348270993e-06, "loss": 0.3482, "step": 5327 }, { "epoch": 0.6546258754146701, "grad_norm": 0.37142513417956124, "learning_rate": 9.643003947888465e-06, "loss": 0.445, "step": 5328 }, { "epoch": 0.6547487406315272, "grad_norm": 0.3308856497895485, "learning_rate": 9.636995398451677e-06, "loss": 0.4998, "step": 5329 }, { "epoch": 0.6548716058483843, "grad_norm": 0.34790668927164137, "learning_rate": 9.630987835504811e-06, "loss": 0.3697, "step": 5330 }, { "epoch": 0.6549944710652414, "grad_norm": 0.3869055254428737, "learning_rate": 9.624981260152914e-06, "loss": 0.4192, "step": 5331 }, { "epoch": 0.6551173362820986, "grad_norm": 0.6579903606373341, "learning_rate": 9.618975673500856e-06, "loss": 0.5145, "step": 5332 }, { "epoch": 0.6552402014989557, "grad_norm": 0.39253305511330316, "learning_rate": 9.61297107665332e-06, "loss": 0.463, "step": 5333 }, { "epoch": 0.6553630667158128, "grad_norm": 0.3657448431853525, "learning_rate": 9.606967470714826e-06, "loss": 0.3883, "step": 5334 }, { "epoch": 0.6554859319326699, "grad_norm": 0.33143827355671585, "learning_rate": 9.600964856789688e-06, "loss": 0.5204, "step": 5335 }, { "epoch": 0.655608797149527, "grad_norm": 0.33743518923710986, "learning_rate": 9.59496323598205e-06, "loss": 0.3244, "step": 5336 }, { "epoch": 0.655731662366384, "grad_norm": 0.30265951616225134, "learning_rate": 9.588962609395867e-06, "loss": 0.4741, "step": 5337 }, { "epoch": 0.6558545275832411, "grad_norm": 0.4272755266946942, "learning_rate": 9.582962978134924e-06, "loss": 0.4179, "step": 5338 }, { "epoch": 0.6559773928000983, "grad_norm": 0.3142215847260652, "learning_rate": 9.576964343302812e-06, "loss": 0.3482, "step": 5339 }, { "epoch": 0.6561002580169554, "grad_norm": 0.3197158801981731, "learning_rate": 9.570966706002941e-06, "loss": 0.4043, "step": 5340 }, { "epoch": 0.6562231232338125, "grad_norm": 0.3381378625342979, "learning_rate": 9.564970067338532e-06, "loss": 0.3299, "step": 5341 }, { "epoch": 0.6563459884506696, "grad_norm": 0.3639708478051216, "learning_rate": 9.558974428412634e-06, "loss": 0.3623, "step": 5342 }, { "epoch": 0.6564688536675267, "grad_norm": 0.3896933643759065, "learning_rate": 9.552979790328105e-06, "loss": 0.4713, "step": 5343 }, { "epoch": 0.6565917188843838, "grad_norm": 0.3663765617909762, "learning_rate": 9.54698615418761e-06, "loss": 0.3704, "step": 5344 }, { "epoch": 0.656714584101241, "grad_norm": 0.33863528556805716, "learning_rate": 9.540993521093654e-06, "loss": 0.4372, "step": 5345 }, { "epoch": 0.6568374493180981, "grad_norm": 0.3586167449784262, "learning_rate": 9.535001892148538e-06, "loss": 0.403, "step": 5346 }, { "epoch": 0.6569603145349552, "grad_norm": 0.35910426302771803, "learning_rate": 9.529011268454384e-06, "loss": 0.353, "step": 5347 }, { "epoch": 0.6570831797518123, "grad_norm": 0.36352154425703426, "learning_rate": 9.523021651113118e-06, "loss": 0.4261, "step": 5348 }, { "epoch": 0.6572060449686694, "grad_norm": 0.2875870153668895, "learning_rate": 9.517033041226506e-06, "loss": 0.4204, "step": 5349 }, { "epoch": 0.6573289101855265, "grad_norm": 0.4111847637404555, "learning_rate": 9.51104543989611e-06, "loss": 0.3911, "step": 5350 }, { "epoch": 0.6574517754023835, "grad_norm": 0.32615808144941255, "learning_rate": 9.505058848223306e-06, "loss": 0.451, "step": 5351 }, { "epoch": 0.6575746406192406, "grad_norm": 0.3753535790892371, "learning_rate": 9.49907326730929e-06, "loss": 0.4472, "step": 5352 }, { "epoch": 0.6576975058360978, "grad_norm": 0.29519597038928364, "learning_rate": 9.49308869825507e-06, "loss": 0.4056, "step": 5353 }, { "epoch": 0.6578203710529549, "grad_norm": 0.33338066266803174, "learning_rate": 9.487105142161475e-06, "loss": 0.3572, "step": 5354 }, { "epoch": 0.657943236269812, "grad_norm": 0.35373093107749753, "learning_rate": 9.481122600129137e-06, "loss": 0.3466, "step": 5355 }, { "epoch": 0.6580661014866691, "grad_norm": 0.30736534567874385, "learning_rate": 9.475141073258498e-06, "loss": 0.3186, "step": 5356 }, { "epoch": 0.6581889667035262, "grad_norm": 0.3894336165624811, "learning_rate": 9.469160562649832e-06, "loss": 0.527, "step": 5357 }, { "epoch": 0.6583118319203833, "grad_norm": 0.34015419235703276, "learning_rate": 9.463181069403216e-06, "loss": 0.3796, "step": 5358 }, { "epoch": 0.6584346971372405, "grad_norm": 0.3624930775601776, "learning_rate": 9.457202594618532e-06, "loss": 0.3449, "step": 5359 }, { "epoch": 0.6585575623540976, "grad_norm": 0.2902465573927758, "learning_rate": 9.451225139395482e-06, "loss": 0.3816, "step": 5360 }, { "epoch": 0.6586804275709547, "grad_norm": 0.4414277846823068, "learning_rate": 9.445248704833587e-06, "loss": 0.3739, "step": 5361 }, { "epoch": 0.6588032927878118, "grad_norm": 0.38064423189828284, "learning_rate": 9.439273292032168e-06, "loss": 0.4894, "step": 5362 }, { "epoch": 0.6589261580046689, "grad_norm": 0.3547883508921758, "learning_rate": 9.43329890209036e-06, "loss": 0.3913, "step": 5363 }, { "epoch": 0.659049023221526, "grad_norm": 0.3273729727142288, "learning_rate": 9.42732553610712e-06, "loss": 0.3554, "step": 5364 }, { "epoch": 0.6591718884383831, "grad_norm": 0.3145945401460033, "learning_rate": 9.42135319518121e-06, "loss": 0.3978, "step": 5365 }, { "epoch": 0.6592947536552402, "grad_norm": 0.45354168457725713, "learning_rate": 9.4153818804112e-06, "loss": 0.4643, "step": 5366 }, { "epoch": 0.6594176188720973, "grad_norm": 0.3219620094423895, "learning_rate": 9.409411592895469e-06, "loss": 0.456, "step": 5367 }, { "epoch": 0.6595404840889544, "grad_norm": 0.4164228094739154, "learning_rate": 9.403442333732227e-06, "loss": 0.4357, "step": 5368 }, { "epoch": 0.6596633493058115, "grad_norm": 0.32994604487038887, "learning_rate": 9.397474104019471e-06, "loss": 0.3502, "step": 5369 }, { "epoch": 0.6597862145226686, "grad_norm": 0.3888916695068164, "learning_rate": 9.391506904855022e-06, "loss": 0.3341, "step": 5370 }, { "epoch": 0.6599090797395257, "grad_norm": 0.3430431174352198, "learning_rate": 9.385540737336502e-06, "loss": 0.4437, "step": 5371 }, { "epoch": 0.6600319449563828, "grad_norm": 0.33458557838862363, "learning_rate": 9.379575602561355e-06, "loss": 0.4236, "step": 5372 }, { "epoch": 0.66015481017324, "grad_norm": 0.37996662490371863, "learning_rate": 9.373611501626826e-06, "loss": 0.3568, "step": 5373 }, { "epoch": 0.6602776753900971, "grad_norm": 0.40376323899981204, "learning_rate": 9.367648435629973e-06, "loss": 0.468, "step": 5374 }, { "epoch": 0.6604005406069542, "grad_norm": 0.35582092048698166, "learning_rate": 9.361686405667657e-06, "loss": 0.4128, "step": 5375 }, { "epoch": 0.6605234058238113, "grad_norm": 0.43205132716619277, "learning_rate": 9.355725412836565e-06, "loss": 0.4377, "step": 5376 }, { "epoch": 0.6606462710406684, "grad_norm": 0.35840741787267494, "learning_rate": 9.349765458233182e-06, "loss": 0.382, "step": 5377 }, { "epoch": 0.6607691362575255, "grad_norm": 0.3322207302943976, "learning_rate": 9.343806542953798e-06, "loss": 0.4077, "step": 5378 }, { "epoch": 0.6608920014743827, "grad_norm": 0.26555944318167635, "learning_rate": 9.337848668094517e-06, "loss": 0.3694, "step": 5379 }, { "epoch": 0.6610148666912397, "grad_norm": 0.47183052740153913, "learning_rate": 9.331891834751254e-06, "loss": 0.4549, "step": 5380 }, { "epoch": 0.6611377319080968, "grad_norm": 0.3598333704410755, "learning_rate": 9.32593604401973e-06, "loss": 0.3468, "step": 5381 }, { "epoch": 0.6612605971249539, "grad_norm": 0.35798304010431614, "learning_rate": 9.319981296995474e-06, "loss": 0.3007, "step": 5382 }, { "epoch": 0.661383462341811, "grad_norm": 0.36250411248163344, "learning_rate": 9.314027594773816e-06, "loss": 0.342, "step": 5383 }, { "epoch": 0.6615063275586681, "grad_norm": 0.3516578080386877, "learning_rate": 9.308074938449914e-06, "loss": 0.3802, "step": 5384 }, { "epoch": 0.6616291927755252, "grad_norm": 0.4215883083488663, "learning_rate": 9.302123329118712e-06, "loss": 0.3745, "step": 5385 }, { "epoch": 0.6617520579923823, "grad_norm": 0.4361240190128167, "learning_rate": 9.296172767874966e-06, "loss": 0.4376, "step": 5386 }, { "epoch": 0.6618749232092395, "grad_norm": 0.35280511415638555, "learning_rate": 9.290223255813256e-06, "loss": 0.3599, "step": 5387 }, { "epoch": 0.6619977884260966, "grad_norm": 0.3375392492507405, "learning_rate": 9.284274794027947e-06, "loss": 0.4079, "step": 5388 }, { "epoch": 0.6621206536429537, "grad_norm": 0.3640055140256486, "learning_rate": 9.278327383613224e-06, "loss": 0.3497, "step": 5389 }, { "epoch": 0.6622435188598108, "grad_norm": 0.32114194678491476, "learning_rate": 9.272381025663068e-06, "loss": 0.3382, "step": 5390 }, { "epoch": 0.6623663840766679, "grad_norm": 0.39619928206327254, "learning_rate": 9.26643572127128e-06, "loss": 0.3949, "step": 5391 }, { "epoch": 0.662489249293525, "grad_norm": 0.28575942432858864, "learning_rate": 9.260491471531459e-06, "loss": 0.3389, "step": 5392 }, { "epoch": 0.6626121145103822, "grad_norm": 0.2831515640706857, "learning_rate": 9.254548277537008e-06, "loss": 0.2886, "step": 5393 }, { "epoch": 0.6627349797272393, "grad_norm": 0.4386241051013211, "learning_rate": 9.248606140381135e-06, "loss": 0.3077, "step": 5394 }, { "epoch": 0.6628578449440963, "grad_norm": 0.37741898966740217, "learning_rate": 9.242665061156871e-06, "loss": 0.4396, "step": 5395 }, { "epoch": 0.6629807101609534, "grad_norm": 0.2944642828679692, "learning_rate": 9.236725040957032e-06, "loss": 0.3739, "step": 5396 }, { "epoch": 0.6631035753778105, "grad_norm": 0.2896169673191949, "learning_rate": 9.230786080874243e-06, "loss": 0.3407, "step": 5397 }, { "epoch": 0.6632264405946676, "grad_norm": 0.3529261906068363, "learning_rate": 9.224848182000937e-06, "loss": 0.3541, "step": 5398 }, { "epoch": 0.6633493058115247, "grad_norm": 0.41866448459577166, "learning_rate": 9.21891134542936e-06, "loss": 0.3753, "step": 5399 }, { "epoch": 0.6634721710283819, "grad_norm": 0.2664113422865977, "learning_rate": 9.212975572251547e-06, "loss": 0.405, "step": 5400 }, { "epoch": 0.663595036245239, "grad_norm": 0.38859870695336285, "learning_rate": 9.207040863559349e-06, "loss": 0.3388, "step": 5401 }, { "epoch": 0.6637179014620961, "grad_norm": 0.32655822183539013, "learning_rate": 9.201107220444407e-06, "loss": 0.3492, "step": 5402 }, { "epoch": 0.6638407666789532, "grad_norm": 0.32828949630086407, "learning_rate": 9.195174643998193e-06, "loss": 0.4283, "step": 5403 }, { "epoch": 0.6639636318958103, "grad_norm": 0.3731023033305997, "learning_rate": 9.189243135311957e-06, "loss": 0.4168, "step": 5404 }, { "epoch": 0.6640864971126674, "grad_norm": 0.4190239306901573, "learning_rate": 9.183312695476762e-06, "loss": 0.3998, "step": 5405 }, { "epoch": 0.6642093623295245, "grad_norm": 0.4328893367741152, "learning_rate": 9.17738332558347e-06, "loss": 0.4261, "step": 5406 }, { "epoch": 0.6643322275463817, "grad_norm": 0.37855346707214055, "learning_rate": 9.171455026722757e-06, "loss": 0.3796, "step": 5407 }, { "epoch": 0.6644550927632388, "grad_norm": 0.35841719779836706, "learning_rate": 9.165527799985095e-06, "loss": 0.463, "step": 5408 }, { "epoch": 0.6645779579800959, "grad_norm": 0.3356431848323819, "learning_rate": 9.159601646460752e-06, "loss": 0.4349, "step": 5409 }, { "epoch": 0.6647008231969529, "grad_norm": 0.3154326192633924, "learning_rate": 9.153676567239812e-06, "loss": 0.4184, "step": 5410 }, { "epoch": 0.66482368841381, "grad_norm": 0.3304212919131426, "learning_rate": 9.147752563412155e-06, "loss": 0.3094, "step": 5411 }, { "epoch": 0.6649465536306671, "grad_norm": 0.33844083613019443, "learning_rate": 9.141829636067458e-06, "loss": 0.3771, "step": 5412 }, { "epoch": 0.6650694188475242, "grad_norm": 0.3556151471750582, "learning_rate": 9.135907786295204e-06, "loss": 0.3394, "step": 5413 }, { "epoch": 0.6651922840643814, "grad_norm": 0.3082188220445296, "learning_rate": 9.129987015184687e-06, "loss": 0.477, "step": 5414 }, { "epoch": 0.6653151492812385, "grad_norm": 0.30570339063711793, "learning_rate": 9.124067323824993e-06, "loss": 0.4162, "step": 5415 }, { "epoch": 0.6654380144980956, "grad_norm": 0.3708091980223649, "learning_rate": 9.118148713305006e-06, "loss": 0.4243, "step": 5416 }, { "epoch": 0.6655608797149527, "grad_norm": 0.3394425397297569, "learning_rate": 9.112231184713415e-06, "loss": 0.3311, "step": 5417 }, { "epoch": 0.6656837449318098, "grad_norm": 0.3149001520830076, "learning_rate": 9.106314739138718e-06, "loss": 0.3812, "step": 5418 }, { "epoch": 0.6658066101486669, "grad_norm": 0.34360053699445214, "learning_rate": 9.100399377669203e-06, "loss": 0.3872, "step": 5419 }, { "epoch": 0.665929475365524, "grad_norm": 0.3905915749590072, "learning_rate": 9.09448510139296e-06, "loss": 0.3754, "step": 5420 }, { "epoch": 0.6660523405823812, "grad_norm": 0.3424781856232055, "learning_rate": 9.088571911397882e-06, "loss": 0.3688, "step": 5421 }, { "epoch": 0.6661752057992383, "grad_norm": 0.3410406765193141, "learning_rate": 9.082659808771666e-06, "loss": 0.3826, "step": 5422 }, { "epoch": 0.6662980710160954, "grad_norm": 0.36723195739198417, "learning_rate": 9.076748794601803e-06, "loss": 0.3186, "step": 5423 }, { "epoch": 0.6664209362329524, "grad_norm": 0.42738359003422055, "learning_rate": 9.070838869975587e-06, "loss": 0.51, "step": 5424 }, { "epoch": 0.6665438014498095, "grad_norm": 0.33368146959524037, "learning_rate": 9.064930035980104e-06, "loss": 0.3997, "step": 5425 }, { "epoch": 0.6666666666666666, "grad_norm": 0.39700045102250187, "learning_rate": 9.059022293702257e-06, "loss": 0.4017, "step": 5426 }, { "epoch": 0.6667895318835237, "grad_norm": 0.33012927081857774, "learning_rate": 9.053115644228729e-06, "loss": 0.4065, "step": 5427 }, { "epoch": 0.6669123971003809, "grad_norm": 0.589391795721933, "learning_rate": 9.047210088646005e-06, "loss": 0.4262, "step": 5428 }, { "epoch": 0.667035262317238, "grad_norm": 0.3865379714212973, "learning_rate": 9.04130562804039e-06, "loss": 0.404, "step": 5429 }, { "epoch": 0.6671581275340951, "grad_norm": 0.3906165467114895, "learning_rate": 9.035402263497956e-06, "loss": 0.3899, "step": 5430 }, { "epoch": 0.6672809927509522, "grad_norm": 0.3220361075119075, "learning_rate": 9.029499996104594e-06, "loss": 0.3719, "step": 5431 }, { "epoch": 0.6674038579678093, "grad_norm": 0.39070539428157136, "learning_rate": 9.023598826945983e-06, "loss": 0.4261, "step": 5432 }, { "epoch": 0.6675267231846664, "grad_norm": 0.3325605099761942, "learning_rate": 9.017698757107618e-06, "loss": 0.3758, "step": 5433 }, { "epoch": 0.6676495884015236, "grad_norm": 0.4251132023236607, "learning_rate": 9.011799787674767e-06, "loss": 0.3755, "step": 5434 }, { "epoch": 0.6677724536183807, "grad_norm": 0.34964945683464094, "learning_rate": 9.00590191973251e-06, "loss": 0.452, "step": 5435 }, { "epoch": 0.6678953188352378, "grad_norm": 0.4121321777319094, "learning_rate": 9.00000515436572e-06, "loss": 0.3696, "step": 5436 }, { "epoch": 0.6680181840520949, "grad_norm": 0.30450268389267665, "learning_rate": 8.994109492659072e-06, "loss": 0.3006, "step": 5437 }, { "epoch": 0.668141049268952, "grad_norm": 0.30413238925149305, "learning_rate": 8.988214935697036e-06, "loss": 0.3723, "step": 5438 }, { "epoch": 0.668263914485809, "grad_norm": 0.32502269940203277, "learning_rate": 8.982321484563872e-06, "loss": 0.3092, "step": 5439 }, { "epoch": 0.6683867797026661, "grad_norm": 0.36460825807611513, "learning_rate": 8.976429140343639e-06, "loss": 0.3909, "step": 5440 }, { "epoch": 0.6685096449195232, "grad_norm": 0.3846997711940175, "learning_rate": 8.970537904120211e-06, "loss": 0.4475, "step": 5441 }, { "epoch": 0.6686325101363804, "grad_norm": 0.35324944644941836, "learning_rate": 8.96464777697723e-06, "loss": 0.3848, "step": 5442 }, { "epoch": 0.6687553753532375, "grad_norm": 0.37208747103968937, "learning_rate": 8.95875875999815e-06, "loss": 0.4116, "step": 5443 }, { "epoch": 0.6688782405700946, "grad_norm": 0.31594773219661426, "learning_rate": 8.952870854266214e-06, "loss": 0.3554, "step": 5444 }, { "epoch": 0.6690011057869517, "grad_norm": 0.3935713744926109, "learning_rate": 8.946984060864471e-06, "loss": 0.4028, "step": 5445 }, { "epoch": 0.6691239710038088, "grad_norm": 0.36547104958250864, "learning_rate": 8.941098380875754e-06, "loss": 0.4571, "step": 5446 }, { "epoch": 0.6692468362206659, "grad_norm": 0.35598709747457286, "learning_rate": 8.935213815382698e-06, "loss": 0.3415, "step": 5447 }, { "epoch": 0.6693697014375231, "grad_norm": 0.3812801921324694, "learning_rate": 8.929330365467722e-06, "loss": 0.3583, "step": 5448 }, { "epoch": 0.6694925666543802, "grad_norm": 0.3245492872070572, "learning_rate": 8.923448032213062e-06, "loss": 0.3686, "step": 5449 }, { "epoch": 0.6696154318712373, "grad_norm": 0.3684050573716814, "learning_rate": 8.917566816700729e-06, "loss": 0.3995, "step": 5450 }, { "epoch": 0.6697382970880944, "grad_norm": 0.34035402282854377, "learning_rate": 8.911686720012527e-06, "loss": 0.395, "step": 5451 }, { "epoch": 0.6698611623049515, "grad_norm": 0.3854606699169082, "learning_rate": 8.905807743230075e-06, "loss": 0.4664, "step": 5452 }, { "epoch": 0.6699840275218085, "grad_norm": 0.24811454038680755, "learning_rate": 8.899929887434767e-06, "loss": 0.2996, "step": 5453 }, { "epoch": 0.6701068927386656, "grad_norm": 0.41611196109060133, "learning_rate": 8.894053153707798e-06, "loss": 0.3434, "step": 5454 }, { "epoch": 0.6702297579555228, "grad_norm": 0.34438768390672814, "learning_rate": 8.888177543130144e-06, "loss": 0.3412, "step": 5455 }, { "epoch": 0.6703526231723799, "grad_norm": 0.3449871248562618, "learning_rate": 8.882303056782603e-06, "loss": 0.3687, "step": 5456 }, { "epoch": 0.670475488389237, "grad_norm": 0.390501151925174, "learning_rate": 8.876429695745739e-06, "loss": 0.4386, "step": 5457 }, { "epoch": 0.6705983536060941, "grad_norm": 0.3426161581218515, "learning_rate": 8.870557461099917e-06, "loss": 0.3532, "step": 5458 }, { "epoch": 0.6707212188229512, "grad_norm": 0.3733125055896179, "learning_rate": 8.864686353925295e-06, "loss": 0.3778, "step": 5459 }, { "epoch": 0.6708440840398083, "grad_norm": 0.3782467408915593, "learning_rate": 8.858816375301836e-06, "loss": 0.3371, "step": 5460 }, { "epoch": 0.6709669492566654, "grad_norm": 0.31265395533650575, "learning_rate": 8.852947526309278e-06, "loss": 0.3939, "step": 5461 }, { "epoch": 0.6710898144735226, "grad_norm": 0.36541357046560846, "learning_rate": 8.847079808027156e-06, "loss": 0.4283, "step": 5462 }, { "epoch": 0.6712126796903797, "grad_norm": 0.33207569251075436, "learning_rate": 8.841213221534798e-06, "loss": 0.4291, "step": 5463 }, { "epoch": 0.6713355449072368, "grad_norm": 0.31078683288028613, "learning_rate": 8.835347767911329e-06, "loss": 0.3671, "step": 5464 }, { "epoch": 0.6714584101240939, "grad_norm": 0.32340327779578765, "learning_rate": 8.829483448235659e-06, "loss": 0.3469, "step": 5465 }, { "epoch": 0.671581275340951, "grad_norm": 0.31651329911821346, "learning_rate": 8.823620263586493e-06, "loss": 0.394, "step": 5466 }, { "epoch": 0.6717041405578081, "grad_norm": 0.41355502242366665, "learning_rate": 8.817758215042316e-06, "loss": 0.3947, "step": 5467 }, { "epoch": 0.6718270057746651, "grad_norm": 0.42894657938206726, "learning_rate": 8.81189730368143e-06, "loss": 0.3873, "step": 5468 }, { "epoch": 0.6719498709915223, "grad_norm": 0.3583492779714938, "learning_rate": 8.806037530581904e-06, "loss": 0.3667, "step": 5469 }, { "epoch": 0.6720727362083794, "grad_norm": 0.29251973007475157, "learning_rate": 8.800178896821597e-06, "loss": 0.4034, "step": 5470 }, { "epoch": 0.6721956014252365, "grad_norm": 0.40136476452991515, "learning_rate": 8.794321403478182e-06, "loss": 0.3446, "step": 5471 }, { "epoch": 0.6723184666420936, "grad_norm": 0.37023170232871566, "learning_rate": 8.788465051629101e-06, "loss": 0.3661, "step": 5472 }, { "epoch": 0.6724413318589507, "grad_norm": 0.27907409223418145, "learning_rate": 8.782609842351587e-06, "loss": 0.3735, "step": 5473 }, { "epoch": 0.6725641970758078, "grad_norm": 0.3645225643598499, "learning_rate": 8.77675577672267e-06, "loss": 0.4209, "step": 5474 }, { "epoch": 0.672687062292665, "grad_norm": 0.7893243347624038, "learning_rate": 8.770902855819174e-06, "loss": 0.4911, "step": 5475 }, { "epoch": 0.6728099275095221, "grad_norm": 0.3248617182006278, "learning_rate": 8.765051080717696e-06, "loss": 0.3638, "step": 5476 }, { "epoch": 0.6729327927263792, "grad_norm": 0.3062459800930977, "learning_rate": 8.75920045249464e-06, "loss": 0.3636, "step": 5477 }, { "epoch": 0.6730556579432363, "grad_norm": 0.3297389500673032, "learning_rate": 8.75335097222618e-06, "loss": 0.3868, "step": 5478 }, { "epoch": 0.6731785231600934, "grad_norm": 0.40911980415774, "learning_rate": 8.74750264098831e-06, "loss": 0.4641, "step": 5479 }, { "epoch": 0.6733013883769505, "grad_norm": 0.32152661135288696, "learning_rate": 8.74165545985677e-06, "loss": 0.4134, "step": 5480 }, { "epoch": 0.6734242535938076, "grad_norm": 0.34217520490436854, "learning_rate": 8.73580942990713e-06, "loss": 0.3986, "step": 5481 }, { "epoch": 0.6735471188106646, "grad_norm": 0.4478279617900395, "learning_rate": 8.729964552214708e-06, "loss": 0.3541, "step": 5482 }, { "epoch": 0.6736699840275218, "grad_norm": 0.2886147668367577, "learning_rate": 8.724120827854657e-06, "loss": 0.4033, "step": 5483 }, { "epoch": 0.6737928492443789, "grad_norm": 0.409073604930986, "learning_rate": 8.718278257901872e-06, "loss": 0.4349, "step": 5484 }, { "epoch": 0.673915714461236, "grad_norm": 0.35641797898229466, "learning_rate": 8.712436843431068e-06, "loss": 0.3585, "step": 5485 }, { "epoch": 0.6740385796780931, "grad_norm": 0.3313149426475946, "learning_rate": 8.70659658551672e-06, "loss": 0.3619, "step": 5486 }, { "epoch": 0.6741614448949502, "grad_norm": 0.41077077804628764, "learning_rate": 8.700757485233126e-06, "loss": 0.4277, "step": 5487 }, { "epoch": 0.6742843101118073, "grad_norm": 0.28583618221547, "learning_rate": 8.694919543654337e-06, "loss": 0.345, "step": 5488 }, { "epoch": 0.6744071753286645, "grad_norm": 0.3485127732721125, "learning_rate": 8.689082761854213e-06, "loss": 0.4265, "step": 5489 }, { "epoch": 0.6745300405455216, "grad_norm": 0.3919167603566419, "learning_rate": 8.683247140906382e-06, "loss": 0.3892, "step": 5490 }, { "epoch": 0.6746529057623787, "grad_norm": 0.327837176579767, "learning_rate": 8.677412681884273e-06, "loss": 0.3756, "step": 5491 }, { "epoch": 0.6747757709792358, "grad_norm": 0.3528797267580021, "learning_rate": 8.671579385861105e-06, "loss": 0.4261, "step": 5492 }, { "epoch": 0.6748986361960929, "grad_norm": 0.38547199467239196, "learning_rate": 8.665747253909855e-06, "loss": 0.3693, "step": 5493 }, { "epoch": 0.67502150141295, "grad_norm": 0.34839075462205704, "learning_rate": 8.659916287103329e-06, "loss": 0.4273, "step": 5494 }, { "epoch": 0.6751443666298071, "grad_norm": 0.4755379181792254, "learning_rate": 8.65408648651408e-06, "loss": 0.4555, "step": 5495 }, { "epoch": 0.6752672318466643, "grad_norm": 0.3648703074571601, "learning_rate": 8.648257853214474e-06, "loss": 0.366, "step": 5496 }, { "epoch": 0.6753900970635213, "grad_norm": 0.4266656059108737, "learning_rate": 8.642430388276638e-06, "loss": 0.4132, "step": 5497 }, { "epoch": 0.6755129622803784, "grad_norm": 0.3471849748643649, "learning_rate": 8.6366040927725e-06, "loss": 0.3591, "step": 5498 }, { "epoch": 0.6756358274972355, "grad_norm": 0.35372494499009993, "learning_rate": 8.630778967773777e-06, "loss": 0.4852, "step": 5499 }, { "epoch": 0.6757586927140926, "grad_norm": 0.39422344462412395, "learning_rate": 8.624955014351953e-06, "loss": 0.3697, "step": 5500 }, { "epoch": 0.6758815579309497, "grad_norm": 0.3385252036113506, "learning_rate": 8.619132233578308e-06, "loss": 0.3631, "step": 5501 }, { "epoch": 0.6760044231478068, "grad_norm": 0.34276123062191055, "learning_rate": 8.61331062652391e-06, "loss": 0.4098, "step": 5502 }, { "epoch": 0.676127288364664, "grad_norm": 0.3270030816621705, "learning_rate": 8.607490194259606e-06, "loss": 0.3829, "step": 5503 }, { "epoch": 0.6762501535815211, "grad_norm": 0.3586318336172493, "learning_rate": 8.60167093785602e-06, "loss": 0.369, "step": 5504 }, { "epoch": 0.6763730187983782, "grad_norm": 0.35004080867090975, "learning_rate": 8.59585285838357e-06, "loss": 0.3781, "step": 5505 }, { "epoch": 0.6764958840152353, "grad_norm": 0.42043194168103376, "learning_rate": 8.590035956912461e-06, "loss": 0.4174, "step": 5506 }, { "epoch": 0.6766187492320924, "grad_norm": 0.3321468973261497, "learning_rate": 8.58422023451266e-06, "loss": 0.3653, "step": 5507 }, { "epoch": 0.6767416144489495, "grad_norm": 0.3416752244843571, "learning_rate": 8.578405692253945e-06, "loss": 0.3241, "step": 5508 }, { "epoch": 0.6768644796658067, "grad_norm": 0.3288288271532132, "learning_rate": 8.572592331205849e-06, "loss": 0.3645, "step": 5509 }, { "epoch": 0.6769873448826638, "grad_norm": 0.48130514821597836, "learning_rate": 8.566780152437717e-06, "loss": 0.4355, "step": 5510 }, { "epoch": 0.6771102100995208, "grad_norm": 0.3626852076956057, "learning_rate": 8.560969157018655e-06, "loss": 0.3684, "step": 5511 }, { "epoch": 0.6772330753163779, "grad_norm": 0.3565695933557286, "learning_rate": 8.555159346017559e-06, "loss": 0.3829, "step": 5512 }, { "epoch": 0.677355940533235, "grad_norm": 0.3060437639464468, "learning_rate": 8.549350720503094e-06, "loss": 0.3505, "step": 5513 }, { "epoch": 0.6774788057500921, "grad_norm": 0.29230398337300756, "learning_rate": 8.543543281543745e-06, "loss": 0.3236, "step": 5514 }, { "epoch": 0.6776016709669492, "grad_norm": 0.4139299417663928, "learning_rate": 8.537737030207728e-06, "loss": 0.3682, "step": 5515 }, { "epoch": 0.6777245361838063, "grad_norm": 0.37792768418154793, "learning_rate": 8.531931967563078e-06, "loss": 0.3044, "step": 5516 }, { "epoch": 0.6778474014006635, "grad_norm": 0.37804002022933875, "learning_rate": 8.5261280946776e-06, "loss": 0.3647, "step": 5517 }, { "epoch": 0.6779702666175206, "grad_norm": 0.30709732043565685, "learning_rate": 8.520325412618868e-06, "loss": 0.4131, "step": 5518 }, { "epoch": 0.6780931318343777, "grad_norm": 0.3310469456887949, "learning_rate": 8.514523922454263e-06, "loss": 0.3301, "step": 5519 }, { "epoch": 0.6782159970512348, "grad_norm": 0.34903220775646376, "learning_rate": 8.508723625250907e-06, "loss": 0.4047, "step": 5520 }, { "epoch": 0.6783388622680919, "grad_norm": 0.46510960724290645, "learning_rate": 8.502924522075757e-06, "loss": 0.3591, "step": 5521 }, { "epoch": 0.678461727484949, "grad_norm": 0.32883870614457544, "learning_rate": 8.4971266139955e-06, "loss": 0.4334, "step": 5522 }, { "epoch": 0.6785845927018062, "grad_norm": 0.3260570889478819, "learning_rate": 8.491329902076635e-06, "loss": 0.3516, "step": 5523 }, { "epoch": 0.6787074579186633, "grad_norm": 0.28882634413701996, "learning_rate": 8.48553438738542e-06, "loss": 0.3948, "step": 5524 }, { "epoch": 0.6788303231355204, "grad_norm": 0.29358022599466305, "learning_rate": 8.479740070987904e-06, "loss": 0.3241, "step": 5525 }, { "epoch": 0.6789531883523774, "grad_norm": 0.3911404151345638, "learning_rate": 8.473946953949924e-06, "loss": 0.3655, "step": 5526 }, { "epoch": 0.6790760535692345, "grad_norm": 0.33872486180432093, "learning_rate": 8.468155037337072e-06, "loss": 0.4049, "step": 5527 }, { "epoch": 0.6791989187860916, "grad_norm": 0.3302384180211296, "learning_rate": 8.462364322214742e-06, "loss": 0.318, "step": 5528 }, { "epoch": 0.6793217840029487, "grad_norm": 0.3253637853820291, "learning_rate": 8.456574809648096e-06, "loss": 0.4119, "step": 5529 }, { "epoch": 0.6794446492198059, "grad_norm": 0.3393123520782617, "learning_rate": 8.450786500702084e-06, "loss": 0.3985, "step": 5530 }, { "epoch": 0.679567514436663, "grad_norm": 0.38735884068262094, "learning_rate": 8.444999396441416e-06, "loss": 0.3575, "step": 5531 }, { "epoch": 0.6796903796535201, "grad_norm": 0.38852130800596885, "learning_rate": 8.439213497930598e-06, "loss": 0.348, "step": 5532 }, { "epoch": 0.6798132448703772, "grad_norm": 0.2890787069604063, "learning_rate": 8.43342880623391e-06, "loss": 0.3689, "step": 5533 }, { "epoch": 0.6799361100872343, "grad_norm": 0.3392494944516379, "learning_rate": 8.427645322415412e-06, "loss": 0.4066, "step": 5534 }, { "epoch": 0.6800589753040914, "grad_norm": 0.3802383368432346, "learning_rate": 8.42186304753893e-06, "loss": 0.4813, "step": 5535 }, { "epoch": 0.6801818405209485, "grad_norm": 0.3528136067609718, "learning_rate": 8.41608198266808e-06, "loss": 0.4097, "step": 5536 }, { "epoch": 0.6803047057378057, "grad_norm": 0.3248369456417584, "learning_rate": 8.410302128866253e-06, "loss": 0.4436, "step": 5537 }, { "epoch": 0.6804275709546628, "grad_norm": 0.33836771256771253, "learning_rate": 8.40452348719661e-06, "loss": 0.3644, "step": 5538 }, { "epoch": 0.6805504361715199, "grad_norm": 0.7292896767635856, "learning_rate": 8.3987460587221e-06, "loss": 0.5392, "step": 5539 }, { "epoch": 0.680673301388377, "grad_norm": 0.33089096306206006, "learning_rate": 8.392969844505441e-06, "loss": 0.3543, "step": 5540 }, { "epoch": 0.680796166605234, "grad_norm": 0.32379122672080834, "learning_rate": 8.387194845609134e-06, "loss": 0.3877, "step": 5541 }, { "epoch": 0.6809190318220911, "grad_norm": 0.3259557558698197, "learning_rate": 8.381421063095447e-06, "loss": 0.385, "step": 5542 }, { "epoch": 0.6810418970389482, "grad_norm": 0.3570411691606814, "learning_rate": 8.375648498026431e-06, "loss": 0.4702, "step": 5543 }, { "epoch": 0.6811647622558054, "grad_norm": 0.3371427263686637, "learning_rate": 8.36987715146392e-06, "loss": 0.3868, "step": 5544 }, { "epoch": 0.6812876274726625, "grad_norm": 0.2997378749439503, "learning_rate": 8.364107024469502e-06, "loss": 0.3378, "step": 5545 }, { "epoch": 0.6814104926895196, "grad_norm": 0.4325983289125251, "learning_rate": 8.358338118104568e-06, "loss": 0.3953, "step": 5546 }, { "epoch": 0.6815333579063767, "grad_norm": 0.37837583273702324, "learning_rate": 8.352570433430254e-06, "loss": 0.4606, "step": 5547 }, { "epoch": 0.6816562231232338, "grad_norm": 0.3804906948972089, "learning_rate": 8.346803971507508e-06, "loss": 0.3685, "step": 5548 }, { "epoch": 0.6817790883400909, "grad_norm": 0.3384304949716393, "learning_rate": 8.34103873339702e-06, "loss": 0.311, "step": 5549 }, { "epoch": 0.681901953556948, "grad_norm": 0.3294152408994042, "learning_rate": 8.335274720159279e-06, "loss": 0.3276, "step": 5550 }, { "epoch": 0.6820248187738052, "grad_norm": 0.34758529592557486, "learning_rate": 8.329511932854517e-06, "loss": 0.3615, "step": 5551 }, { "epoch": 0.6821476839906623, "grad_norm": 0.2797404481210146, "learning_rate": 8.323750372542788e-06, "loss": 0.3885, "step": 5552 }, { "epoch": 0.6822705492075194, "grad_norm": 0.3562223974351125, "learning_rate": 8.317990040283876e-06, "loss": 0.3741, "step": 5553 }, { "epoch": 0.6823934144243765, "grad_norm": 0.35568087028396966, "learning_rate": 8.312230937137365e-06, "loss": 0.323, "step": 5554 }, { "epoch": 0.6825162796412335, "grad_norm": 0.38035215354252466, "learning_rate": 8.306473064162597e-06, "loss": 0.3981, "step": 5555 }, { "epoch": 0.6826391448580906, "grad_norm": 0.4346335310257729, "learning_rate": 8.300716422418699e-06, "loss": 0.4202, "step": 5556 }, { "epoch": 0.6827620100749477, "grad_norm": 0.3611214504187955, "learning_rate": 8.294961012964576e-06, "loss": 0.461, "step": 5557 }, { "epoch": 0.6828848752918049, "grad_norm": 0.36641315114218503, "learning_rate": 8.289206836858879e-06, "loss": 0.3905, "step": 5558 }, { "epoch": 0.683007740508662, "grad_norm": 0.3720119780798097, "learning_rate": 8.283453895160075e-06, "loss": 0.4061, "step": 5559 }, { "epoch": 0.6831306057255191, "grad_norm": 0.4127635952518569, "learning_rate": 8.277702188926363e-06, "loss": 0.3355, "step": 5560 }, { "epoch": 0.6832534709423762, "grad_norm": 0.5215907544579219, "learning_rate": 8.27195171921574e-06, "loss": 0.475, "step": 5561 }, { "epoch": 0.6833763361592333, "grad_norm": 0.31411703002568214, "learning_rate": 8.266202487085964e-06, "loss": 0.3534, "step": 5562 }, { "epoch": 0.6834992013760904, "grad_norm": 0.3615866657597441, "learning_rate": 8.26045449359457e-06, "loss": 0.3151, "step": 5563 }, { "epoch": 0.6836220665929476, "grad_norm": 0.40673600258724085, "learning_rate": 8.25470773979887e-06, "loss": 0.3697, "step": 5564 }, { "epoch": 0.6837449318098047, "grad_norm": 0.45088711475718424, "learning_rate": 8.248962226755929e-06, "loss": 0.3646, "step": 5565 }, { "epoch": 0.6838677970266618, "grad_norm": 0.3865295395882883, "learning_rate": 8.243217955522605e-06, "loss": 0.4191, "step": 5566 }, { "epoch": 0.6839906622435189, "grad_norm": 0.361021358862172, "learning_rate": 8.237474927155517e-06, "loss": 0.3906, "step": 5567 }, { "epoch": 0.684113527460376, "grad_norm": 0.3146682781440391, "learning_rate": 8.23173314271107e-06, "loss": 0.308, "step": 5568 }, { "epoch": 0.6842363926772331, "grad_norm": 0.3195862485903557, "learning_rate": 8.225992603245408e-06, "loss": 0.4343, "step": 5569 }, { "epoch": 0.6843592578940901, "grad_norm": 0.40955356952114663, "learning_rate": 8.220253309814479e-06, "loss": 0.3573, "step": 5570 }, { "epoch": 0.6844821231109472, "grad_norm": 0.348094350590273, "learning_rate": 8.214515263473983e-06, "loss": 0.3352, "step": 5571 }, { "epoch": 0.6846049883278044, "grad_norm": 0.30597551904843334, "learning_rate": 8.208778465279404e-06, "loss": 0.3665, "step": 5572 }, { "epoch": 0.6847278535446615, "grad_norm": 0.31525576725506527, "learning_rate": 8.203042916285977e-06, "loss": 0.3884, "step": 5573 }, { "epoch": 0.6848507187615186, "grad_norm": 0.4064111102517134, "learning_rate": 8.19730861754873e-06, "loss": 0.4607, "step": 5574 }, { "epoch": 0.6849735839783757, "grad_norm": 0.3191713682117283, "learning_rate": 8.191575570122449e-06, "loss": 0.4179, "step": 5575 }, { "epoch": 0.6850964491952328, "grad_norm": 0.33371713217072896, "learning_rate": 8.185843775061682e-06, "loss": 0.3123, "step": 5576 }, { "epoch": 0.6852193144120899, "grad_norm": 0.31914840114889365, "learning_rate": 8.180113233420761e-06, "loss": 0.3536, "step": 5577 }, { "epoch": 0.6853421796289471, "grad_norm": 0.367146609807945, "learning_rate": 8.174383946253783e-06, "loss": 0.4187, "step": 5578 }, { "epoch": 0.6854650448458042, "grad_norm": 0.34854717488520826, "learning_rate": 8.168655914614617e-06, "loss": 0.3961, "step": 5579 }, { "epoch": 0.6855879100626613, "grad_norm": 0.43312192959949586, "learning_rate": 8.162929139556888e-06, "loss": 0.3871, "step": 5580 }, { "epoch": 0.6857107752795184, "grad_norm": 0.35258270667884367, "learning_rate": 8.157203622134004e-06, "loss": 0.3728, "step": 5581 }, { "epoch": 0.6858336404963755, "grad_norm": 0.4097471738502769, "learning_rate": 8.151479363399143e-06, "loss": 0.4831, "step": 5582 }, { "epoch": 0.6859565057132326, "grad_norm": 0.2757699118657028, "learning_rate": 8.14575636440523e-06, "loss": 0.3894, "step": 5583 }, { "epoch": 0.6860793709300896, "grad_norm": 0.36379212709021547, "learning_rate": 8.14003462620499e-06, "loss": 0.3628, "step": 5584 }, { "epoch": 0.6862022361469468, "grad_norm": 0.3312720673855911, "learning_rate": 8.134314149850882e-06, "loss": 0.3568, "step": 5585 }, { "epoch": 0.6863251013638039, "grad_norm": 0.29587968238387274, "learning_rate": 8.12859493639517e-06, "loss": 0.3816, "step": 5586 }, { "epoch": 0.686447966580661, "grad_norm": 0.3414885038828933, "learning_rate": 8.122876986889853e-06, "loss": 0.4109, "step": 5587 }, { "epoch": 0.6865708317975181, "grad_norm": 0.35428540010290444, "learning_rate": 8.117160302386718e-06, "loss": 0.4429, "step": 5588 }, { "epoch": 0.6866936970143752, "grad_norm": 0.39165108138988597, "learning_rate": 8.111444883937299e-06, "loss": 0.4123, "step": 5589 }, { "epoch": 0.6868165622312323, "grad_norm": 0.49975695063744957, "learning_rate": 8.105730732592931e-06, "loss": 0.5085, "step": 5590 }, { "epoch": 0.6869394274480894, "grad_norm": 0.345291221848607, "learning_rate": 8.100017849404677e-06, "loss": 0.3669, "step": 5591 }, { "epoch": 0.6870622926649466, "grad_norm": 0.32067336529322377, "learning_rate": 8.094306235423398e-06, "loss": 0.4443, "step": 5592 }, { "epoch": 0.6871851578818037, "grad_norm": 0.3083450353272738, "learning_rate": 8.088595891699695e-06, "loss": 0.3307, "step": 5593 }, { "epoch": 0.6873080230986608, "grad_norm": 0.34267835894482696, "learning_rate": 8.082886819283958e-06, "loss": 0.3889, "step": 5594 }, { "epoch": 0.6874308883155179, "grad_norm": 0.3854046089953149, "learning_rate": 8.077179019226335e-06, "loss": 0.4314, "step": 5595 }, { "epoch": 0.687553753532375, "grad_norm": 0.3093138796464489, "learning_rate": 8.07147249257673e-06, "loss": 0.3665, "step": 5596 }, { "epoch": 0.6876766187492321, "grad_norm": 0.3978668921489297, "learning_rate": 8.06576724038483e-06, "loss": 0.3232, "step": 5597 }, { "epoch": 0.6877994839660893, "grad_norm": 0.3106780624202122, "learning_rate": 8.060063263700074e-06, "loss": 0.4186, "step": 5598 }, { "epoch": 0.6879223491829463, "grad_norm": 0.3986491396025336, "learning_rate": 8.054360563571678e-06, "loss": 0.3284, "step": 5599 }, { "epoch": 0.6880452143998034, "grad_norm": 0.3179716492089914, "learning_rate": 8.048659141048608e-06, "loss": 0.4251, "step": 5600 }, { "epoch": 0.6881680796166605, "grad_norm": 0.3580308651846004, "learning_rate": 8.042958997179608e-06, "loss": 0.4001, "step": 5601 }, { "epoch": 0.6882909448335176, "grad_norm": 0.3754333896099267, "learning_rate": 8.037260133013188e-06, "loss": 0.4145, "step": 5602 }, { "epoch": 0.6884138100503747, "grad_norm": 0.374597646902786, "learning_rate": 8.031562549597606e-06, "loss": 0.3557, "step": 5603 }, { "epoch": 0.6885366752672318, "grad_norm": 0.36529132732799663, "learning_rate": 8.025866247980902e-06, "loss": 0.3674, "step": 5604 }, { "epoch": 0.688659540484089, "grad_norm": 0.3233646984424778, "learning_rate": 8.02017122921087e-06, "loss": 0.4349, "step": 5605 }, { "epoch": 0.6887824057009461, "grad_norm": 0.3274545830412016, "learning_rate": 8.014477494335082e-06, "loss": 0.4602, "step": 5606 }, { "epoch": 0.6889052709178032, "grad_norm": 0.3351150964033654, "learning_rate": 8.00878504440085e-06, "loss": 0.4483, "step": 5607 }, { "epoch": 0.6890281361346603, "grad_norm": 0.3258672820054115, "learning_rate": 8.00309388045527e-06, "loss": 0.3478, "step": 5608 }, { "epoch": 0.6891510013515174, "grad_norm": 0.5000336330409653, "learning_rate": 7.997404003545195e-06, "loss": 0.4864, "step": 5609 }, { "epoch": 0.6892738665683745, "grad_norm": 0.33655873067469827, "learning_rate": 7.991715414717246e-06, "loss": 0.4069, "step": 5610 }, { "epoch": 0.6893967317852316, "grad_norm": 0.33017632677485464, "learning_rate": 7.986028115017788e-06, "loss": 0.3557, "step": 5611 }, { "epoch": 0.6895195970020888, "grad_norm": 0.3476873912666114, "learning_rate": 7.980342105492973e-06, "loss": 0.3879, "step": 5612 }, { "epoch": 0.6896424622189458, "grad_norm": 0.48926588715742614, "learning_rate": 7.97465738718871e-06, "loss": 0.3759, "step": 5613 }, { "epoch": 0.6897653274358029, "grad_norm": 0.34766366892118467, "learning_rate": 7.968973961150653e-06, "loss": 0.4323, "step": 5614 }, { "epoch": 0.68988819265266, "grad_norm": 0.3742685583944588, "learning_rate": 7.963291828424242e-06, "loss": 0.3054, "step": 5615 }, { "epoch": 0.6900110578695171, "grad_norm": 0.3725118070671125, "learning_rate": 7.957610990054654e-06, "loss": 0.4013, "step": 5616 }, { "epoch": 0.6901339230863742, "grad_norm": 0.3214323150164074, "learning_rate": 7.951931447086864e-06, "loss": 0.3497, "step": 5617 }, { "epoch": 0.6902567883032313, "grad_norm": 0.33562659372236897, "learning_rate": 7.946253200565572e-06, "loss": 0.4299, "step": 5618 }, { "epoch": 0.6903796535200885, "grad_norm": 0.39981121851195023, "learning_rate": 7.940576251535264e-06, "loss": 0.431, "step": 5619 }, { "epoch": 0.6905025187369456, "grad_norm": 0.3312368184488546, "learning_rate": 7.934900601040165e-06, "loss": 0.3858, "step": 5620 }, { "epoch": 0.6906253839538027, "grad_norm": 0.3248830706022737, "learning_rate": 7.929226250124284e-06, "loss": 0.3093, "step": 5621 }, { "epoch": 0.6907482491706598, "grad_norm": 0.3945189385453632, "learning_rate": 7.923553199831384e-06, "loss": 0.3835, "step": 5622 }, { "epoch": 0.6908711143875169, "grad_norm": 0.4158372876735479, "learning_rate": 7.917881451204966e-06, "loss": 0.4335, "step": 5623 }, { "epoch": 0.690993979604374, "grad_norm": 0.3811051253965677, "learning_rate": 7.912211005288342e-06, "loss": 0.4292, "step": 5624 }, { "epoch": 0.6911168448212311, "grad_norm": 0.3504486758927372, "learning_rate": 7.906541863124529e-06, "loss": 0.3073, "step": 5625 }, { "epoch": 0.6912397100380883, "grad_norm": 0.3212699703088723, "learning_rate": 7.900874025756344e-06, "loss": 0.3792, "step": 5626 }, { "epoch": 0.6913625752549454, "grad_norm": 0.40665990396196855, "learning_rate": 7.895207494226338e-06, "loss": 0.3568, "step": 5627 }, { "epoch": 0.6914854404718024, "grad_norm": 0.30410441934476795, "learning_rate": 7.889542269576836e-06, "loss": 0.4094, "step": 5628 }, { "epoch": 0.6916083056886595, "grad_norm": 0.3413825148174719, "learning_rate": 7.883878352849925e-06, "loss": 0.4065, "step": 5629 }, { "epoch": 0.6917311709055166, "grad_norm": 0.28534251675753025, "learning_rate": 7.878215745087438e-06, "loss": 0.3304, "step": 5630 }, { "epoch": 0.6918540361223737, "grad_norm": 0.3200209507236127, "learning_rate": 7.872554447330977e-06, "loss": 0.3693, "step": 5631 }, { "epoch": 0.6919769013392308, "grad_norm": 0.40774267732258485, "learning_rate": 7.866894460621903e-06, "loss": 0.4813, "step": 5632 }, { "epoch": 0.692099766556088, "grad_norm": 0.43375755548629874, "learning_rate": 7.861235786001338e-06, "loss": 0.4643, "step": 5633 }, { "epoch": 0.6922226317729451, "grad_norm": 0.38937474301167774, "learning_rate": 7.855578424510146e-06, "loss": 0.3323, "step": 5634 }, { "epoch": 0.6923454969898022, "grad_norm": 0.3541374680693346, "learning_rate": 7.849922377188973e-06, "loss": 0.3818, "step": 5635 }, { "epoch": 0.6924683622066593, "grad_norm": 0.3395237144395506, "learning_rate": 7.844267645078209e-06, "loss": 0.4241, "step": 5636 }, { "epoch": 0.6925912274235164, "grad_norm": 0.4132132304515679, "learning_rate": 7.83861422921801e-06, "loss": 0.4184, "step": 5637 }, { "epoch": 0.6927140926403735, "grad_norm": 0.3128649115129203, "learning_rate": 7.832962130648273e-06, "loss": 0.3642, "step": 5638 }, { "epoch": 0.6928369578572307, "grad_norm": 0.4248892638404526, "learning_rate": 7.827311350408674e-06, "loss": 0.4424, "step": 5639 }, { "epoch": 0.6929598230740878, "grad_norm": 0.3053626545608794, "learning_rate": 7.821661889538641e-06, "loss": 0.3448, "step": 5640 }, { "epoch": 0.6930826882909449, "grad_norm": 0.333644828346323, "learning_rate": 7.816013749077344e-06, "loss": 0.3407, "step": 5641 }, { "epoch": 0.6932055535078019, "grad_norm": 0.3676220291629427, "learning_rate": 7.810366930063729e-06, "loss": 0.4361, "step": 5642 }, { "epoch": 0.693328418724659, "grad_norm": 0.32596201513205947, "learning_rate": 7.80472143353649e-06, "loss": 0.3739, "step": 5643 }, { "epoch": 0.6934512839415161, "grad_norm": 0.32281551406580056, "learning_rate": 7.799077260534085e-06, "loss": 0.3711, "step": 5644 }, { "epoch": 0.6935741491583732, "grad_norm": 0.3523120268311154, "learning_rate": 7.793434412094714e-06, "loss": 0.3012, "step": 5645 }, { "epoch": 0.6936970143752303, "grad_norm": 0.29272106475755394, "learning_rate": 7.787792889256347e-06, "loss": 0.3571, "step": 5646 }, { "epoch": 0.6938198795920875, "grad_norm": 0.3986632178086879, "learning_rate": 7.782152693056711e-06, "loss": 0.4143, "step": 5647 }, { "epoch": 0.6939427448089446, "grad_norm": 0.3811166590294957, "learning_rate": 7.776513824533272e-06, "loss": 0.4246, "step": 5648 }, { "epoch": 0.6940656100258017, "grad_norm": 0.32715531175612517, "learning_rate": 7.770876284723272e-06, "loss": 0.4313, "step": 5649 }, { "epoch": 0.6941884752426588, "grad_norm": 0.32667798403237075, "learning_rate": 7.765240074663689e-06, "loss": 0.3763, "step": 5650 }, { "epoch": 0.6943113404595159, "grad_norm": 0.37692143143882506, "learning_rate": 7.759605195391285e-06, "loss": 0.3816, "step": 5651 }, { "epoch": 0.694434205676373, "grad_norm": 0.34885293401179523, "learning_rate": 7.753971647942543e-06, "loss": 0.3438, "step": 5652 }, { "epoch": 0.6945570708932302, "grad_norm": 0.3501649242343321, "learning_rate": 7.748339433353731e-06, "loss": 0.3446, "step": 5653 }, { "epoch": 0.6946799361100873, "grad_norm": 0.3041544704359312, "learning_rate": 7.74270855266084e-06, "loss": 0.3945, "step": 5654 }, { "epoch": 0.6948028013269444, "grad_norm": 0.34799087050017896, "learning_rate": 7.737079006899658e-06, "loss": 0.3798, "step": 5655 }, { "epoch": 0.6949256665438015, "grad_norm": 0.3383906220716193, "learning_rate": 7.731450797105687e-06, "loss": 0.3543, "step": 5656 }, { "epoch": 0.6950485317606585, "grad_norm": 0.3261078575813438, "learning_rate": 7.725823924314203e-06, "loss": 0.358, "step": 5657 }, { "epoch": 0.6951713969775156, "grad_norm": 0.3834199137580372, "learning_rate": 7.720198389560233e-06, "loss": 0.466, "step": 5658 }, { "epoch": 0.6952942621943727, "grad_norm": 0.2772392185609728, "learning_rate": 7.714574193878557e-06, "loss": 0.3421, "step": 5659 }, { "epoch": 0.6954171274112299, "grad_norm": 0.33153523873649726, "learning_rate": 7.708951338303715e-06, "loss": 0.3724, "step": 5660 }, { "epoch": 0.695539992628087, "grad_norm": 0.45376196414813497, "learning_rate": 7.703329823869987e-06, "loss": 0.3855, "step": 5661 }, { "epoch": 0.6956628578449441, "grad_norm": 0.3594457111588356, "learning_rate": 7.697709651611415e-06, "loss": 0.4274, "step": 5662 }, { "epoch": 0.6957857230618012, "grad_norm": 0.3145468882309744, "learning_rate": 7.692090822561796e-06, "loss": 0.3487, "step": 5663 }, { "epoch": 0.6959085882786583, "grad_norm": 0.35015899993393035, "learning_rate": 7.686473337754682e-06, "loss": 0.3908, "step": 5664 }, { "epoch": 0.6960314534955154, "grad_norm": 0.3191610171116798, "learning_rate": 7.680857198223364e-06, "loss": 0.3954, "step": 5665 }, { "epoch": 0.6961543187123725, "grad_norm": 0.3415340077995901, "learning_rate": 7.675242405000896e-06, "loss": 0.443, "step": 5666 }, { "epoch": 0.6962771839292297, "grad_norm": 0.39894279428693735, "learning_rate": 7.66962895912009e-06, "loss": 0.3952, "step": 5667 }, { "epoch": 0.6964000491460868, "grad_norm": 0.3217357285556794, "learning_rate": 7.664016861613495e-06, "loss": 0.3627, "step": 5668 }, { "epoch": 0.6965229143629439, "grad_norm": 0.3257287824587221, "learning_rate": 7.65840611351342e-06, "loss": 0.3212, "step": 5669 }, { "epoch": 0.696645779579801, "grad_norm": 0.4021620533701508, "learning_rate": 7.65279671585193e-06, "loss": 0.4397, "step": 5670 }, { "epoch": 0.6967686447966581, "grad_norm": 0.30897360594361506, "learning_rate": 7.647188669660842e-06, "loss": 0.3467, "step": 5671 }, { "epoch": 0.6968915100135151, "grad_norm": 0.35402688678961813, "learning_rate": 7.641581975971705e-06, "loss": 0.4196, "step": 5672 }, { "epoch": 0.6970143752303722, "grad_norm": 0.40489516838276884, "learning_rate": 7.635976635815845e-06, "loss": 0.3748, "step": 5673 }, { "epoch": 0.6971372404472294, "grad_norm": 0.3270039371592765, "learning_rate": 7.630372650224326e-06, "loss": 0.379, "step": 5674 }, { "epoch": 0.6972601056640865, "grad_norm": 0.3940449154562426, "learning_rate": 7.624770020227968e-06, "loss": 0.4298, "step": 5675 }, { "epoch": 0.6973829708809436, "grad_norm": 0.37763137075596853, "learning_rate": 7.619168746857331e-06, "loss": 0.41, "step": 5676 }, { "epoch": 0.6975058360978007, "grad_norm": 0.28892730602427436, "learning_rate": 7.6135688311427364e-06, "loss": 0.3687, "step": 5677 }, { "epoch": 0.6976287013146578, "grad_norm": 0.2864126839098085, "learning_rate": 7.607970274114257e-06, "loss": 0.4232, "step": 5678 }, { "epoch": 0.6977515665315149, "grad_norm": 0.35197963600313226, "learning_rate": 7.602373076801701e-06, "loss": 0.4245, "step": 5679 }, { "epoch": 0.697874431748372, "grad_norm": 0.366584442631703, "learning_rate": 7.596777240234649e-06, "loss": 0.3615, "step": 5680 }, { "epoch": 0.6979972969652292, "grad_norm": 0.3667392924487922, "learning_rate": 7.5911827654424005e-06, "loss": 0.447, "step": 5681 }, { "epoch": 0.6981201621820863, "grad_norm": 0.3830878683814853, "learning_rate": 7.585589653454045e-06, "loss": 0.4869, "step": 5682 }, { "epoch": 0.6982430273989434, "grad_norm": 0.3637771909298648, "learning_rate": 7.579997905298382e-06, "loss": 0.3872, "step": 5683 }, { "epoch": 0.6983658926158005, "grad_norm": 0.3201942803600973, "learning_rate": 7.574407522003988e-06, "loss": 0.4161, "step": 5684 }, { "epoch": 0.6984887578326576, "grad_norm": 0.32189063396115947, "learning_rate": 7.568818504599175e-06, "loss": 0.3363, "step": 5685 }, { "epoch": 0.6986116230495146, "grad_norm": 0.381839994150521, "learning_rate": 7.563230854112002e-06, "loss": 0.4628, "step": 5686 }, { "epoch": 0.6987344882663717, "grad_norm": 0.33162007964989904, "learning_rate": 7.557644571570289e-06, "loss": 0.3909, "step": 5687 }, { "epoch": 0.6988573534832289, "grad_norm": 0.30828817070425724, "learning_rate": 7.55205965800158e-06, "loss": 0.3575, "step": 5688 }, { "epoch": 0.698980218700086, "grad_norm": 0.38013958836868217, "learning_rate": 7.5464761144332074e-06, "loss": 0.3746, "step": 5689 }, { "epoch": 0.6991030839169431, "grad_norm": 0.4007246078735954, "learning_rate": 7.5408939418922095e-06, "loss": 0.4926, "step": 5690 }, { "epoch": 0.6992259491338002, "grad_norm": 0.32155483456634587, "learning_rate": 7.5353131414054025e-06, "loss": 0.3979, "step": 5691 }, { "epoch": 0.6993488143506573, "grad_norm": 0.3850744081205093, "learning_rate": 7.529733713999323e-06, "loss": 0.3932, "step": 5692 }, { "epoch": 0.6994716795675144, "grad_norm": 0.35033912682332435, "learning_rate": 7.52415566070029e-06, "loss": 0.4278, "step": 5693 }, { "epoch": 0.6995945447843716, "grad_norm": 0.3488507987005712, "learning_rate": 7.518578982534336e-06, "loss": 0.4958, "step": 5694 }, { "epoch": 0.6997174100012287, "grad_norm": 0.3192726892757572, "learning_rate": 7.513003680527265e-06, "loss": 0.2954, "step": 5695 }, { "epoch": 0.6998402752180858, "grad_norm": 0.29093070230528856, "learning_rate": 7.507429755704606e-06, "loss": 0.38, "step": 5696 }, { "epoch": 0.6999631404349429, "grad_norm": 0.36802262832673205, "learning_rate": 7.5018572090916526e-06, "loss": 0.3555, "step": 5697 }, { "epoch": 0.7000860056518, "grad_norm": 0.3980509283529663, "learning_rate": 7.496286041713444e-06, "loss": 0.3727, "step": 5698 }, { "epoch": 0.7002088708686571, "grad_norm": 0.34212280928178, "learning_rate": 7.490716254594751e-06, "loss": 0.4551, "step": 5699 }, { "epoch": 0.7003317360855142, "grad_norm": 0.4219992092042029, "learning_rate": 7.485147848760102e-06, "loss": 0.3993, "step": 5700 }, { "epoch": 0.7004546013023712, "grad_norm": 0.4200327894547572, "learning_rate": 7.47958082523377e-06, "loss": 0.381, "step": 5701 }, { "epoch": 0.7005774665192284, "grad_norm": 0.44355056931597125, "learning_rate": 7.47401518503978e-06, "loss": 0.3677, "step": 5702 }, { "epoch": 0.7007003317360855, "grad_norm": 0.3882130398357408, "learning_rate": 7.468450929201882e-06, "loss": 0.4283, "step": 5703 }, { "epoch": 0.7008231969529426, "grad_norm": 0.41619809999732604, "learning_rate": 7.462888058743593e-06, "loss": 0.473, "step": 5704 }, { "epoch": 0.7009460621697997, "grad_norm": 0.36756912480284354, "learning_rate": 7.457326574688172e-06, "loss": 0.339, "step": 5705 }, { "epoch": 0.7010689273866568, "grad_norm": 0.31350875985871707, "learning_rate": 7.451766478058605e-06, "loss": 0.4572, "step": 5706 }, { "epoch": 0.7011917926035139, "grad_norm": 0.32224038401331456, "learning_rate": 7.446207769877642e-06, "loss": 0.3848, "step": 5707 }, { "epoch": 0.7013146578203711, "grad_norm": 0.37785707637219035, "learning_rate": 7.440650451167772e-06, "loss": 0.3767, "step": 5708 }, { "epoch": 0.7014375230372282, "grad_norm": 0.3535038616674032, "learning_rate": 7.435094522951234e-06, "loss": 0.3346, "step": 5709 }, { "epoch": 0.7015603882540853, "grad_norm": 0.3165846874350855, "learning_rate": 7.429539986249992e-06, "loss": 0.4307, "step": 5710 }, { "epoch": 0.7016832534709424, "grad_norm": 0.43158622664415247, "learning_rate": 7.423986842085774e-06, "loss": 0.4032, "step": 5711 }, { "epoch": 0.7018061186877995, "grad_norm": 0.41489707876674703, "learning_rate": 7.4184350914800435e-06, "loss": 0.4486, "step": 5712 }, { "epoch": 0.7019289839046566, "grad_norm": 0.37346915626260924, "learning_rate": 7.412884735454016e-06, "loss": 0.3551, "step": 5713 }, { "epoch": 0.7020518491215137, "grad_norm": 0.3485300970291306, "learning_rate": 7.407335775028631e-06, "loss": 0.3868, "step": 5714 }, { "epoch": 0.7021747143383708, "grad_norm": 0.37261618793187595, "learning_rate": 7.401788211224589e-06, "loss": 0.417, "step": 5715 }, { "epoch": 0.7022975795552279, "grad_norm": 0.33902428329178846, "learning_rate": 7.396242045062336e-06, "loss": 0.3657, "step": 5716 }, { "epoch": 0.702420444772085, "grad_norm": 0.3761184406574658, "learning_rate": 7.3906972775620415e-06, "loss": 0.364, "step": 5717 }, { "epoch": 0.7025433099889421, "grad_norm": 0.3781437117469123, "learning_rate": 7.385153909743641e-06, "loss": 0.3471, "step": 5718 }, { "epoch": 0.7026661752057992, "grad_norm": 0.300213317562397, "learning_rate": 7.3796119426267815e-06, "loss": 0.4684, "step": 5719 }, { "epoch": 0.7027890404226563, "grad_norm": 0.3020585521629813, "learning_rate": 7.374071377230898e-06, "loss": 0.3538, "step": 5720 }, { "epoch": 0.7029119056395134, "grad_norm": 0.30241682147269655, "learning_rate": 7.3685322145751235e-06, "loss": 0.4138, "step": 5721 }, { "epoch": 0.7030347708563706, "grad_norm": 0.29920416400925054, "learning_rate": 7.36299445567836e-06, "loss": 0.3937, "step": 5722 }, { "epoch": 0.7031576360732277, "grad_norm": 0.33960597152555294, "learning_rate": 7.3574581015592355e-06, "loss": 0.4579, "step": 5723 }, { "epoch": 0.7032805012900848, "grad_norm": 0.32673753684396145, "learning_rate": 7.351923153236128e-06, "loss": 0.3835, "step": 5724 }, { "epoch": 0.7034033665069419, "grad_norm": 0.32896083989780095, "learning_rate": 7.346389611727163e-06, "loss": 0.3585, "step": 5725 }, { "epoch": 0.703526231723799, "grad_norm": 0.39521819524013285, "learning_rate": 7.340857478050183e-06, "loss": 0.4104, "step": 5726 }, { "epoch": 0.7036490969406561, "grad_norm": 0.3783644517221416, "learning_rate": 7.335326753222808e-06, "loss": 0.4381, "step": 5727 }, { "epoch": 0.7037719621575133, "grad_norm": 0.37699394620373305, "learning_rate": 7.329797438262366e-06, "loss": 0.4487, "step": 5728 }, { "epoch": 0.7038948273743704, "grad_norm": 0.3430920953296556, "learning_rate": 7.324269534185947e-06, "loss": 0.373, "step": 5729 }, { "epoch": 0.7040176925912274, "grad_norm": 0.3368851811830969, "learning_rate": 7.318743042010361e-06, "loss": 0.3648, "step": 5730 }, { "epoch": 0.7041405578080845, "grad_norm": 0.3783413679260017, "learning_rate": 7.313217962752179e-06, "loss": 0.3576, "step": 5731 }, { "epoch": 0.7042634230249416, "grad_norm": 0.3432698388581226, "learning_rate": 7.307694297427704e-06, "loss": 0.4206, "step": 5732 }, { "epoch": 0.7043862882417987, "grad_norm": 0.3620170602631493, "learning_rate": 7.3021720470529794e-06, "loss": 0.3506, "step": 5733 }, { "epoch": 0.7045091534586558, "grad_norm": 0.4090022246734171, "learning_rate": 7.296651212643781e-06, "loss": 0.3424, "step": 5734 }, { "epoch": 0.704632018675513, "grad_norm": 0.32501701443791536, "learning_rate": 7.291131795215632e-06, "loss": 0.4389, "step": 5735 }, { "epoch": 0.7047548838923701, "grad_norm": 0.4413806906231627, "learning_rate": 7.285613795783803e-06, "loss": 0.3495, "step": 5736 }, { "epoch": 0.7048777491092272, "grad_norm": 0.3072359466515262, "learning_rate": 7.28009721536328e-06, "loss": 0.3799, "step": 5737 }, { "epoch": 0.7050006143260843, "grad_norm": 0.4157708565182623, "learning_rate": 7.274582054968811e-06, "loss": 0.3567, "step": 5738 }, { "epoch": 0.7051234795429414, "grad_norm": 0.35612686711915875, "learning_rate": 7.2690683156148705e-06, "loss": 0.3915, "step": 5739 }, { "epoch": 0.7052463447597985, "grad_norm": 0.42170523789445236, "learning_rate": 7.2635559983156825e-06, "loss": 0.3996, "step": 5740 }, { "epoch": 0.7053692099766556, "grad_norm": 0.3667074992500355, "learning_rate": 7.258045104085189e-06, "loss": 0.4688, "step": 5741 }, { "epoch": 0.7054920751935128, "grad_norm": 0.3685286969431141, "learning_rate": 7.252535633937092e-06, "loss": 0.4288, "step": 5742 }, { "epoch": 0.7056149404103699, "grad_norm": 0.4598953688532703, "learning_rate": 7.247027588884825e-06, "loss": 0.3975, "step": 5743 }, { "epoch": 0.7057378056272269, "grad_norm": 0.27424158146217176, "learning_rate": 7.2415209699415485e-06, "loss": 0.4039, "step": 5744 }, { "epoch": 0.705860670844084, "grad_norm": 0.3477785785717108, "learning_rate": 7.23601577812018e-06, "loss": 0.4251, "step": 5745 }, { "epoch": 0.7059835360609411, "grad_norm": 0.30493259161023745, "learning_rate": 7.2305120144333465e-06, "loss": 0.3912, "step": 5746 }, { "epoch": 0.7061064012777982, "grad_norm": 0.3570683694713157, "learning_rate": 7.225009679893452e-06, "loss": 0.4309, "step": 5747 }, { "epoch": 0.7062292664946553, "grad_norm": 0.3371246123923936, "learning_rate": 7.2195087755125975e-06, "loss": 0.4118, "step": 5748 }, { "epoch": 0.7063521317115125, "grad_norm": 0.34930030833392706, "learning_rate": 7.214009302302648e-06, "loss": 0.4101, "step": 5749 }, { "epoch": 0.7064749969283696, "grad_norm": 0.38390764648515285, "learning_rate": 7.208511261275198e-06, "loss": 0.4057, "step": 5750 }, { "epoch": 0.7065978621452267, "grad_norm": 0.33973487120891543, "learning_rate": 7.203014653441567e-06, "loss": 0.3689, "step": 5751 }, { "epoch": 0.7067207273620838, "grad_norm": 0.4081500271588326, "learning_rate": 7.197519479812828e-06, "loss": 0.4136, "step": 5752 }, { "epoch": 0.7068435925789409, "grad_norm": 0.37795639473912246, "learning_rate": 7.192025741399771e-06, "loss": 0.3994, "step": 5753 }, { "epoch": 0.706966457795798, "grad_norm": 0.38137598423912755, "learning_rate": 7.186533439212953e-06, "loss": 0.3604, "step": 5754 }, { "epoch": 0.7070893230126551, "grad_norm": 0.3060525283422416, "learning_rate": 7.181042574262633e-06, "loss": 0.2858, "step": 5755 }, { "epoch": 0.7072121882295123, "grad_norm": 0.39187575936424984, "learning_rate": 7.1755531475588265e-06, "loss": 0.4005, "step": 5756 }, { "epoch": 0.7073350534463694, "grad_norm": 0.38201835346361496, "learning_rate": 7.1700651601112646e-06, "loss": 0.4014, "step": 5757 }, { "epoch": 0.7074579186632265, "grad_norm": 0.3886487712844083, "learning_rate": 7.16457861292945e-06, "loss": 0.3581, "step": 5758 }, { "epoch": 0.7075807838800835, "grad_norm": 0.39818684526490883, "learning_rate": 7.159093507022579e-06, "loss": 0.3789, "step": 5759 }, { "epoch": 0.7077036490969406, "grad_norm": 0.3438292491583801, "learning_rate": 7.153609843399613e-06, "loss": 0.3795, "step": 5760 }, { "epoch": 0.7078265143137977, "grad_norm": 0.6479196214336383, "learning_rate": 7.148127623069225e-06, "loss": 0.4192, "step": 5761 }, { "epoch": 0.7079493795306548, "grad_norm": 0.32204938718465886, "learning_rate": 7.14264684703984e-06, "loss": 0.4897, "step": 5762 }, { "epoch": 0.708072244747512, "grad_norm": 0.28879142779742295, "learning_rate": 7.137167516319615e-06, "loss": 0.3085, "step": 5763 }, { "epoch": 0.7081951099643691, "grad_norm": 0.36963525232885325, "learning_rate": 7.131689631916427e-06, "loss": 0.3071, "step": 5764 }, { "epoch": 0.7083179751812262, "grad_norm": 0.31038770695565016, "learning_rate": 7.126213194837905e-06, "loss": 0.3851, "step": 5765 }, { "epoch": 0.7084408403980833, "grad_norm": 0.37347754465641175, "learning_rate": 7.120738206091403e-06, "loss": 0.3685, "step": 5766 }, { "epoch": 0.7085637056149404, "grad_norm": 0.6689663274006594, "learning_rate": 7.115264666684013e-06, "loss": 0.5719, "step": 5767 }, { "epoch": 0.7086865708317975, "grad_norm": 0.35253689781131886, "learning_rate": 7.1097925776225495e-06, "loss": 0.3864, "step": 5768 }, { "epoch": 0.7088094360486547, "grad_norm": 0.3050262716234201, "learning_rate": 7.10432193991357e-06, "loss": 0.3505, "step": 5769 }, { "epoch": 0.7089323012655118, "grad_norm": 0.28429711799858687, "learning_rate": 7.098852754563371e-06, "loss": 0.3761, "step": 5770 }, { "epoch": 0.7090551664823689, "grad_norm": 0.39589227071845995, "learning_rate": 7.09338502257796e-06, "loss": 0.4551, "step": 5771 }, { "epoch": 0.709178031699226, "grad_norm": 0.5246314739041976, "learning_rate": 7.0879187449631e-06, "loss": 0.504, "step": 5772 }, { "epoch": 0.7093008969160831, "grad_norm": 0.3267219219244797, "learning_rate": 7.082453922724275e-06, "loss": 0.3422, "step": 5773 }, { "epoch": 0.7094237621329401, "grad_norm": 0.42592054901595394, "learning_rate": 7.076990556866708e-06, "loss": 0.3883, "step": 5774 }, { "epoch": 0.7095466273497972, "grad_norm": 0.43860002639526574, "learning_rate": 7.0715286483953405e-06, "loss": 0.4762, "step": 5775 }, { "epoch": 0.7096694925666543, "grad_norm": 0.44312547452230566, "learning_rate": 7.06606819831486e-06, "loss": 0.5191, "step": 5776 }, { "epoch": 0.7097923577835115, "grad_norm": 0.4060535050616636, "learning_rate": 7.060609207629682e-06, "loss": 0.3926, "step": 5777 }, { "epoch": 0.7099152230003686, "grad_norm": 0.3406150934499126, "learning_rate": 7.055151677343955e-06, "loss": 0.3338, "step": 5778 }, { "epoch": 0.7100380882172257, "grad_norm": 0.4096908254343643, "learning_rate": 7.04969560846155e-06, "loss": 0.4972, "step": 5779 }, { "epoch": 0.7101609534340828, "grad_norm": 0.37139598556173853, "learning_rate": 7.044241001986076e-06, "loss": 0.3474, "step": 5780 }, { "epoch": 0.7102838186509399, "grad_norm": 0.33718025962114573, "learning_rate": 7.038787858920881e-06, "loss": 0.4117, "step": 5781 }, { "epoch": 0.710406683867797, "grad_norm": 0.43171937063336585, "learning_rate": 7.033336180269024e-06, "loss": 0.4058, "step": 5782 }, { "epoch": 0.7105295490846542, "grad_norm": 0.3693209803050969, "learning_rate": 7.027885967033316e-06, "loss": 0.4067, "step": 5783 }, { "epoch": 0.7106524143015113, "grad_norm": 0.3592977667069764, "learning_rate": 7.022437220216273e-06, "loss": 0.367, "step": 5784 }, { "epoch": 0.7107752795183684, "grad_norm": 0.38898454936834836, "learning_rate": 7.016989940820178e-06, "loss": 0.4566, "step": 5785 }, { "epoch": 0.7108981447352255, "grad_norm": 0.3561339353865683, "learning_rate": 7.011544129847006e-06, "loss": 0.3971, "step": 5786 }, { "epoch": 0.7110210099520826, "grad_norm": 0.3572138210795876, "learning_rate": 7.00609978829849e-06, "loss": 0.3599, "step": 5787 }, { "epoch": 0.7111438751689396, "grad_norm": 0.32497555317576743, "learning_rate": 7.000656917176069e-06, "loss": 0.4659, "step": 5788 }, { "epoch": 0.7112667403857967, "grad_norm": 0.3780285302830039, "learning_rate": 6.995215517480932e-06, "loss": 0.3083, "step": 5789 }, { "epoch": 0.7113896056026539, "grad_norm": 0.27710655305421156, "learning_rate": 6.9897755902139946e-06, "loss": 0.3699, "step": 5790 }, { "epoch": 0.711512470819511, "grad_norm": 0.3338289744842717, "learning_rate": 6.984337136375875e-06, "loss": 0.4578, "step": 5791 }, { "epoch": 0.7116353360363681, "grad_norm": 0.33033967280089604, "learning_rate": 6.978900156966968e-06, "loss": 0.4163, "step": 5792 }, { "epoch": 0.7117582012532252, "grad_norm": 0.3149467221395675, "learning_rate": 6.973464652987353e-06, "loss": 0.3675, "step": 5793 }, { "epoch": 0.7118810664700823, "grad_norm": 0.33714564305680184, "learning_rate": 6.968030625436867e-06, "loss": 0.371, "step": 5794 }, { "epoch": 0.7120039316869394, "grad_norm": 0.39422882532414044, "learning_rate": 6.962598075315047e-06, "loss": 0.4003, "step": 5795 }, { "epoch": 0.7121267969037965, "grad_norm": 0.3356276587570646, "learning_rate": 6.957167003621199e-06, "loss": 0.5064, "step": 5796 }, { "epoch": 0.7122496621206537, "grad_norm": 0.3519626984599877, "learning_rate": 6.951737411354313e-06, "loss": 0.4154, "step": 5797 }, { "epoch": 0.7123725273375108, "grad_norm": 0.33150107286095504, "learning_rate": 6.9463092995131426e-06, "loss": 0.4284, "step": 5798 }, { "epoch": 0.7124953925543679, "grad_norm": 0.38597900546320785, "learning_rate": 6.94088266909614e-06, "loss": 0.4148, "step": 5799 }, { "epoch": 0.712618257771225, "grad_norm": 0.31367233011077134, "learning_rate": 6.935457521101507e-06, "loss": 0.4035, "step": 5800 }, { "epoch": 0.7127411229880821, "grad_norm": 0.31914237518519456, "learning_rate": 6.930033856527167e-06, "loss": 0.4178, "step": 5801 }, { "epoch": 0.7128639882049392, "grad_norm": 0.3082878277074375, "learning_rate": 6.9246116763707575e-06, "loss": 0.3507, "step": 5802 }, { "epoch": 0.7129868534217962, "grad_norm": 0.35067856817820225, "learning_rate": 6.91919098162966e-06, "loss": 0.4739, "step": 5803 }, { "epoch": 0.7131097186386534, "grad_norm": 0.37156275270481726, "learning_rate": 6.913771773300975e-06, "loss": 0.3755, "step": 5804 }, { "epoch": 0.7132325838555105, "grad_norm": 0.35527046956210395, "learning_rate": 6.908354052381538e-06, "loss": 0.3816, "step": 5805 }, { "epoch": 0.7133554490723676, "grad_norm": 0.31227792559533923, "learning_rate": 6.902937819867891e-06, "loss": 0.4359, "step": 5806 }, { "epoch": 0.7134783142892247, "grad_norm": 0.7661048421773059, "learning_rate": 6.897523076756319e-06, "loss": 0.5741, "step": 5807 }, { "epoch": 0.7136011795060818, "grad_norm": 0.33969274360252816, "learning_rate": 6.892109824042838e-06, "loss": 0.3799, "step": 5808 }, { "epoch": 0.7137240447229389, "grad_norm": 0.3506235086935714, "learning_rate": 6.886698062723167e-06, "loss": 0.3727, "step": 5809 }, { "epoch": 0.713846909939796, "grad_norm": 0.33499821746334946, "learning_rate": 6.881287793792777e-06, "loss": 0.2688, "step": 5810 }, { "epoch": 0.7139697751566532, "grad_norm": 0.34922805232094556, "learning_rate": 6.875879018246835e-06, "loss": 0.3839, "step": 5811 }, { "epoch": 0.7140926403735103, "grad_norm": 0.36762325277888885, "learning_rate": 6.87047173708027e-06, "loss": 0.3996, "step": 5812 }, { "epoch": 0.7142155055903674, "grad_norm": 0.4402487435838597, "learning_rate": 6.865065951287703e-06, "loss": 0.3795, "step": 5813 }, { "epoch": 0.7143383708072245, "grad_norm": 0.2836863993284565, "learning_rate": 6.859661661863497e-06, "loss": 0.4253, "step": 5814 }, { "epoch": 0.7144612360240816, "grad_norm": 0.38033358700065306, "learning_rate": 6.854258869801736e-06, "loss": 0.3699, "step": 5815 }, { "epoch": 0.7145841012409387, "grad_norm": 0.33493589181339173, "learning_rate": 6.848857576096235e-06, "loss": 0.4369, "step": 5816 }, { "epoch": 0.7147069664577957, "grad_norm": 0.3725124958515529, "learning_rate": 6.843457781740516e-06, "loss": 0.364, "step": 5817 }, { "epoch": 0.7148298316746529, "grad_norm": 0.359647621895218, "learning_rate": 6.83805948772784e-06, "loss": 0.3975, "step": 5818 }, { "epoch": 0.71495269689151, "grad_norm": 0.35934844448580416, "learning_rate": 6.832662695051195e-06, "loss": 0.4, "step": 5819 }, { "epoch": 0.7150755621083671, "grad_norm": 0.32797052814141825, "learning_rate": 6.827267404703274e-06, "loss": 0.4698, "step": 5820 }, { "epoch": 0.7151984273252242, "grad_norm": 0.32239412648831434, "learning_rate": 6.821873617676519e-06, "loss": 0.3623, "step": 5821 }, { "epoch": 0.7153212925420813, "grad_norm": 0.33125179762800266, "learning_rate": 6.816481334963061e-06, "loss": 0.3971, "step": 5822 }, { "epoch": 0.7154441577589384, "grad_norm": 0.32808321133681173, "learning_rate": 6.811090557554803e-06, "loss": 0.2964, "step": 5823 }, { "epoch": 0.7155670229757956, "grad_norm": 0.30584854818659435, "learning_rate": 6.805701286443323e-06, "loss": 0.4169, "step": 5824 }, { "epoch": 0.7156898881926527, "grad_norm": 0.2924389717519499, "learning_rate": 6.800313522619957e-06, "loss": 0.3564, "step": 5825 }, { "epoch": 0.7158127534095098, "grad_norm": 0.4185231948813614, "learning_rate": 6.794927267075735e-06, "loss": 0.4025, "step": 5826 }, { "epoch": 0.7159356186263669, "grad_norm": 0.3911900875979098, "learning_rate": 6.7895425208014304e-06, "loss": 0.3785, "step": 5827 }, { "epoch": 0.716058483843224, "grad_norm": 0.38698251772404607, "learning_rate": 6.784159284787537e-06, "loss": 0.4053, "step": 5828 }, { "epoch": 0.7161813490600811, "grad_norm": 0.3212971860065086, "learning_rate": 6.7787775600242575e-06, "loss": 0.3604, "step": 5829 }, { "epoch": 0.7163042142769382, "grad_norm": 0.3748273703982955, "learning_rate": 6.773397347501529e-06, "loss": 0.4635, "step": 5830 }, { "epoch": 0.7164270794937954, "grad_norm": 0.37148433990950813, "learning_rate": 6.768018648209008e-06, "loss": 0.3693, "step": 5831 }, { "epoch": 0.7165499447106524, "grad_norm": 0.2860187894681748, "learning_rate": 6.762641463136074e-06, "loss": 0.4244, "step": 5832 }, { "epoch": 0.7166728099275095, "grad_norm": 0.40770154376934326, "learning_rate": 6.757265793271811e-06, "loss": 0.4595, "step": 5833 }, { "epoch": 0.7167956751443666, "grad_norm": 0.2948827831884152, "learning_rate": 6.7518916396050606e-06, "loss": 0.3201, "step": 5834 }, { "epoch": 0.7169185403612237, "grad_norm": 0.380493631919749, "learning_rate": 6.746519003124347e-06, "loss": 0.4418, "step": 5835 }, { "epoch": 0.7170414055780808, "grad_norm": 0.360135413298959, "learning_rate": 6.7411478848179435e-06, "loss": 0.3616, "step": 5836 }, { "epoch": 0.7171642707949379, "grad_norm": 0.3493414172312952, "learning_rate": 6.73577828567382e-06, "loss": 0.3785, "step": 5837 }, { "epoch": 0.7172871360117951, "grad_norm": 0.36449940806821923, "learning_rate": 6.730410206679684e-06, "loss": 0.3693, "step": 5838 }, { "epoch": 0.7174100012286522, "grad_norm": 0.34075421460411104, "learning_rate": 6.725043648822967e-06, "loss": 0.4165, "step": 5839 }, { "epoch": 0.7175328664455093, "grad_norm": 0.3672115430699732, "learning_rate": 6.719678613090801e-06, "loss": 0.3305, "step": 5840 }, { "epoch": 0.7176557316623664, "grad_norm": 0.36303002563757375, "learning_rate": 6.714315100470053e-06, "loss": 0.4128, "step": 5841 }, { "epoch": 0.7177785968792235, "grad_norm": 0.3557059691756564, "learning_rate": 6.708953111947308e-06, "loss": 0.3765, "step": 5842 }, { "epoch": 0.7179014620960806, "grad_norm": 0.42909587587290304, "learning_rate": 6.703592648508875e-06, "loss": 0.4033, "step": 5843 }, { "epoch": 0.7180243273129377, "grad_norm": 0.3648662555123228, "learning_rate": 6.698233711140764e-06, "loss": 0.3834, "step": 5844 }, { "epoch": 0.7181471925297949, "grad_norm": 0.37976322164877974, "learning_rate": 6.692876300828723e-06, "loss": 0.3035, "step": 5845 }, { "epoch": 0.7182700577466519, "grad_norm": 0.3093597248498299, "learning_rate": 6.687520418558219e-06, "loss": 0.4081, "step": 5846 }, { "epoch": 0.718392922963509, "grad_norm": 0.37801328987652333, "learning_rate": 6.68216606531442e-06, "loss": 0.3606, "step": 5847 }, { "epoch": 0.7185157881803661, "grad_norm": 0.37859404038742805, "learning_rate": 6.676813242082236e-06, "loss": 0.4329, "step": 5848 }, { "epoch": 0.7186386533972232, "grad_norm": 0.34027460509834023, "learning_rate": 6.671461949846265e-06, "loss": 0.3703, "step": 5849 }, { "epoch": 0.7187615186140803, "grad_norm": 0.3399266685804644, "learning_rate": 6.6661121895908695e-06, "loss": 0.3849, "step": 5850 }, { "epoch": 0.7188843838309374, "grad_norm": 0.3278283511497498, "learning_rate": 6.660763962300084e-06, "loss": 0.4449, "step": 5851 }, { "epoch": 0.7190072490477946, "grad_norm": 0.286663978291073, "learning_rate": 6.6554172689576896e-06, "loss": 0.4771, "step": 5852 }, { "epoch": 0.7191301142646517, "grad_norm": 0.3955754620044851, "learning_rate": 6.650072110547169e-06, "loss": 0.3895, "step": 5853 }, { "epoch": 0.7192529794815088, "grad_norm": 0.4552465134954286, "learning_rate": 6.64472848805173e-06, "loss": 0.46, "step": 5854 }, { "epoch": 0.7193758446983659, "grad_norm": 0.43732296664437315, "learning_rate": 6.639386402454302e-06, "loss": 0.3996, "step": 5855 }, { "epoch": 0.719498709915223, "grad_norm": 0.40515689844422104, "learning_rate": 6.634045854737523e-06, "loss": 0.4244, "step": 5856 }, { "epoch": 0.7196215751320801, "grad_norm": 0.3527260877776804, "learning_rate": 6.628706845883759e-06, "loss": 0.3805, "step": 5857 }, { "epoch": 0.7197444403489373, "grad_norm": 0.286682318700576, "learning_rate": 6.623369376875077e-06, "loss": 0.4014, "step": 5858 }, { "epoch": 0.7198673055657944, "grad_norm": 0.3060779933185839, "learning_rate": 6.618033448693279e-06, "loss": 0.302, "step": 5859 }, { "epoch": 0.7199901707826515, "grad_norm": 0.40056589872767356, "learning_rate": 6.612699062319858e-06, "loss": 0.3881, "step": 5860 }, { "epoch": 0.7201130359995085, "grad_norm": 0.35725029765562505, "learning_rate": 6.607366218736062e-06, "loss": 0.4082, "step": 5861 }, { "epoch": 0.7202359012163656, "grad_norm": 0.4479715065922711, "learning_rate": 6.602034918922816e-06, "loss": 0.4318, "step": 5862 }, { "epoch": 0.7203587664332227, "grad_norm": 0.40561513916518227, "learning_rate": 6.59670516386079e-06, "loss": 0.3622, "step": 5863 }, { "epoch": 0.7204816316500798, "grad_norm": 0.40331600280274077, "learning_rate": 6.591376954530345e-06, "loss": 0.4269, "step": 5864 }, { "epoch": 0.720604496866937, "grad_norm": 0.29405684582404107, "learning_rate": 6.586050291911579e-06, "loss": 0.3351, "step": 5865 }, { "epoch": 0.7207273620837941, "grad_norm": 0.3837181025926267, "learning_rate": 6.5807251769843e-06, "loss": 0.4028, "step": 5866 }, { "epoch": 0.7208502273006512, "grad_norm": 0.3073179429545199, "learning_rate": 6.575401610728019e-06, "loss": 0.3405, "step": 5867 }, { "epoch": 0.7209730925175083, "grad_norm": 0.3992075935764549, "learning_rate": 6.570079594121976e-06, "loss": 0.3176, "step": 5868 }, { "epoch": 0.7210959577343654, "grad_norm": 0.4605861937544197, "learning_rate": 6.5647591281451215e-06, "loss": 0.4837, "step": 5869 }, { "epoch": 0.7212188229512225, "grad_norm": 0.37236702687798967, "learning_rate": 6.559440213776126e-06, "loss": 0.4769, "step": 5870 }, { "epoch": 0.7213416881680796, "grad_norm": 0.42677365100796744, "learning_rate": 6.554122851993359e-06, "loss": 0.4019, "step": 5871 }, { "epoch": 0.7214645533849368, "grad_norm": 0.3949985943757925, "learning_rate": 6.54880704377492e-06, "loss": 0.4888, "step": 5872 }, { "epoch": 0.7215874186017939, "grad_norm": 0.3151261767907563, "learning_rate": 6.543492790098623e-06, "loss": 0.387, "step": 5873 }, { "epoch": 0.721710283818651, "grad_norm": 0.3368292221878858, "learning_rate": 6.5381800919419805e-06, "loss": 0.3009, "step": 5874 }, { "epoch": 0.721833149035508, "grad_norm": 0.3903413533638625, "learning_rate": 6.532868950282237e-06, "loss": 0.3663, "step": 5875 }, { "epoch": 0.7219560142523651, "grad_norm": 0.28883284103944756, "learning_rate": 6.527559366096328e-06, "loss": 0.3937, "step": 5876 }, { "epoch": 0.7220788794692222, "grad_norm": 0.3381628500572816, "learning_rate": 6.5222513403609405e-06, "loss": 0.3634, "step": 5877 }, { "epoch": 0.7222017446860793, "grad_norm": 0.34392971560003427, "learning_rate": 6.5169448740524315e-06, "loss": 0.4039, "step": 5878 }, { "epoch": 0.7223246099029365, "grad_norm": 0.4880188972849361, "learning_rate": 6.511639968146898e-06, "loss": 0.3988, "step": 5879 }, { "epoch": 0.7224474751197936, "grad_norm": 0.37630276126977596, "learning_rate": 6.506336623620145e-06, "loss": 0.3954, "step": 5880 }, { "epoch": 0.7225703403366507, "grad_norm": 0.29693831607753945, "learning_rate": 6.501034841447692e-06, "loss": 0.3209, "step": 5881 }, { "epoch": 0.7226932055535078, "grad_norm": 0.4472100517628728, "learning_rate": 6.495734622604757e-06, "loss": 0.453, "step": 5882 }, { "epoch": 0.7228160707703649, "grad_norm": 0.39586358026053503, "learning_rate": 6.490435968066284e-06, "loss": 0.4069, "step": 5883 }, { "epoch": 0.722938935987222, "grad_norm": 0.41477017536688066, "learning_rate": 6.485138878806937e-06, "loss": 0.4415, "step": 5884 }, { "epoch": 0.7230618012040791, "grad_norm": 0.28759049560832955, "learning_rate": 6.479843355801064e-06, "loss": 0.4095, "step": 5885 }, { "epoch": 0.7231846664209363, "grad_norm": 0.36314018754749605, "learning_rate": 6.474549400022757e-06, "loss": 0.3448, "step": 5886 }, { "epoch": 0.7233075316377934, "grad_norm": 0.30507060486121934, "learning_rate": 6.469257012445788e-06, "loss": 0.318, "step": 5887 }, { "epoch": 0.7234303968546505, "grad_norm": 0.416284208537538, "learning_rate": 6.463966194043678e-06, "loss": 0.3825, "step": 5888 }, { "epoch": 0.7235532620715076, "grad_norm": 0.3808002032148582, "learning_rate": 6.458676945789624e-06, "loss": 0.4754, "step": 5889 }, { "epoch": 0.7236761272883646, "grad_norm": 0.3343862417542423, "learning_rate": 6.453389268656558e-06, "loss": 0.4067, "step": 5890 }, { "epoch": 0.7237989925052217, "grad_norm": 0.37103859733143807, "learning_rate": 6.448103163617103e-06, "loss": 0.3584, "step": 5891 }, { "epoch": 0.7239218577220788, "grad_norm": 0.3904439106796482, "learning_rate": 6.442818631643612e-06, "loss": 0.4143, "step": 5892 }, { "epoch": 0.724044722938936, "grad_norm": 0.37260870734692403, "learning_rate": 6.437535673708143e-06, "loss": 0.3907, "step": 5893 }, { "epoch": 0.7241675881557931, "grad_norm": 0.2890265534236589, "learning_rate": 6.432254290782452e-06, "loss": 0.3346, "step": 5894 }, { "epoch": 0.7242904533726502, "grad_norm": 0.3607720113062382, "learning_rate": 6.42697448383802e-06, "loss": 0.3733, "step": 5895 }, { "epoch": 0.7244133185895073, "grad_norm": 0.38938980481906843, "learning_rate": 6.421696253846033e-06, "loss": 0.3433, "step": 5896 }, { "epoch": 0.7245361838063644, "grad_norm": 0.31360918375860297, "learning_rate": 6.416419601777395e-06, "loss": 0.3961, "step": 5897 }, { "epoch": 0.7246590490232215, "grad_norm": 0.33879046107343297, "learning_rate": 6.411144528602693e-06, "loss": 0.4151, "step": 5898 }, { "epoch": 0.7247819142400787, "grad_norm": 0.3972257018561146, "learning_rate": 6.405871035292266e-06, "loss": 0.3938, "step": 5899 }, { "epoch": 0.7249047794569358, "grad_norm": 0.3768775897553548, "learning_rate": 6.40059912281612e-06, "loss": 0.3765, "step": 5900 }, { "epoch": 0.7250276446737929, "grad_norm": 0.3390569592037857, "learning_rate": 6.395328792144003e-06, "loss": 0.2943, "step": 5901 }, { "epoch": 0.72515050989065, "grad_norm": 0.3528442164051358, "learning_rate": 6.390060044245345e-06, "loss": 0.4769, "step": 5902 }, { "epoch": 0.7252733751075071, "grad_norm": 0.39451216513633763, "learning_rate": 6.384792880089306e-06, "loss": 0.3939, "step": 5903 }, { "epoch": 0.7253962403243642, "grad_norm": 0.3473069599671532, "learning_rate": 6.3795273006447505e-06, "loss": 0.3959, "step": 5904 }, { "epoch": 0.7255191055412212, "grad_norm": 0.3827564796885304, "learning_rate": 6.3742633068802356e-06, "loss": 0.3295, "step": 5905 }, { "epoch": 0.7256419707580783, "grad_norm": 0.3129256056463625, "learning_rate": 6.369000899764046e-06, "loss": 0.4254, "step": 5906 }, { "epoch": 0.7257648359749355, "grad_norm": 0.3316847261458268, "learning_rate": 6.363740080264166e-06, "loss": 0.3717, "step": 5907 }, { "epoch": 0.7258877011917926, "grad_norm": 0.30636053938124513, "learning_rate": 6.358480849348296e-06, "loss": 0.3904, "step": 5908 }, { "epoch": 0.7260105664086497, "grad_norm": 0.40312076339396413, "learning_rate": 6.3532232079838275e-06, "loss": 0.3989, "step": 5909 }, { "epoch": 0.7261334316255068, "grad_norm": 0.5730508160435211, "learning_rate": 6.347967157137873e-06, "loss": 0.5095, "step": 5910 }, { "epoch": 0.7262562968423639, "grad_norm": 0.41769516846688143, "learning_rate": 6.342712697777254e-06, "loss": 0.379, "step": 5911 }, { "epoch": 0.726379162059221, "grad_norm": 0.36742227336462624, "learning_rate": 6.337459830868486e-06, "loss": 0.4007, "step": 5912 }, { "epoch": 0.7265020272760782, "grad_norm": 0.32670714845411497, "learning_rate": 6.332208557377807e-06, "loss": 0.3669, "step": 5913 }, { "epoch": 0.7266248924929353, "grad_norm": 0.4320281083744567, "learning_rate": 6.326958878271143e-06, "loss": 0.3716, "step": 5914 }, { "epoch": 0.7267477577097924, "grad_norm": 0.3104667734519495, "learning_rate": 6.321710794514154e-06, "loss": 0.3728, "step": 5915 }, { "epoch": 0.7268706229266495, "grad_norm": 0.3493225641054481, "learning_rate": 6.3164643070721806e-06, "loss": 0.4225, "step": 5916 }, { "epoch": 0.7269934881435066, "grad_norm": 0.39956197396253856, "learning_rate": 6.3112194169102885e-06, "loss": 0.3156, "step": 5917 }, { "epoch": 0.7271163533603637, "grad_norm": 0.36418590143064, "learning_rate": 6.305976124993225e-06, "loss": 0.3932, "step": 5918 }, { "epoch": 0.7272392185772207, "grad_norm": 0.333154673798653, "learning_rate": 6.3007344322854815e-06, "loss": 0.3492, "step": 5919 }, { "epoch": 0.7273620837940779, "grad_norm": 0.39308002431649813, "learning_rate": 6.295494339751217e-06, "loss": 0.4, "step": 5920 }, { "epoch": 0.727484949010935, "grad_norm": 0.37746537158240856, "learning_rate": 6.290255848354316e-06, "loss": 0.3554, "step": 5921 }, { "epoch": 0.7276078142277921, "grad_norm": 0.5223220032371367, "learning_rate": 6.285018959058376e-06, "loss": 0.4593, "step": 5922 }, { "epoch": 0.7277306794446492, "grad_norm": 0.32412520020116403, "learning_rate": 6.279783672826672e-06, "loss": 0.4376, "step": 5923 }, { "epoch": 0.7278535446615063, "grad_norm": 0.39038774518747893, "learning_rate": 6.2745499906222136e-06, "loss": 0.408, "step": 5924 }, { "epoch": 0.7279764098783634, "grad_norm": 0.3536731191978655, "learning_rate": 6.269317913407688e-06, "loss": 0.442, "step": 5925 }, { "epoch": 0.7280992750952205, "grad_norm": 0.36122597726976313, "learning_rate": 6.264087442145524e-06, "loss": 0.3886, "step": 5926 }, { "epoch": 0.7282221403120777, "grad_norm": 0.3500028463598804, "learning_rate": 6.258858577797815e-06, "loss": 0.3968, "step": 5927 }, { "epoch": 0.7283450055289348, "grad_norm": 0.3404701421430843, "learning_rate": 6.253631321326386e-06, "loss": 0.378, "step": 5928 }, { "epoch": 0.7284678707457919, "grad_norm": 0.3281655006961608, "learning_rate": 6.248405673692748e-06, "loss": 0.4187, "step": 5929 }, { "epoch": 0.728590735962649, "grad_norm": 0.36903865657025414, "learning_rate": 6.243181635858131e-06, "loss": 0.3879, "step": 5930 }, { "epoch": 0.7287136011795061, "grad_norm": 0.3471540056905186, "learning_rate": 6.237959208783468e-06, "loss": 0.4455, "step": 5931 }, { "epoch": 0.7288364663963632, "grad_norm": 0.3598564663648279, "learning_rate": 6.232738393429378e-06, "loss": 0.3658, "step": 5932 }, { "epoch": 0.7289593316132204, "grad_norm": 0.3026116705631571, "learning_rate": 6.227519190756204e-06, "loss": 0.3668, "step": 5933 }, { "epoch": 0.7290821968300774, "grad_norm": 0.3974049295962387, "learning_rate": 6.2223016017239835e-06, "loss": 0.3977, "step": 5934 }, { "epoch": 0.7292050620469345, "grad_norm": 0.3058511873365636, "learning_rate": 6.217085627292463e-06, "loss": 0.361, "step": 5935 }, { "epoch": 0.7293279272637916, "grad_norm": 0.31642035408846797, "learning_rate": 6.2118712684210755e-06, "loss": 0.3504, "step": 5936 }, { "epoch": 0.7294507924806487, "grad_norm": 0.30944983387046293, "learning_rate": 6.206658526068976e-06, "loss": 0.3955, "step": 5937 }, { "epoch": 0.7295736576975058, "grad_norm": 0.3154061166229297, "learning_rate": 6.201447401195015e-06, "loss": 0.3871, "step": 5938 }, { "epoch": 0.7296965229143629, "grad_norm": 0.3748261364813196, "learning_rate": 6.1962378947577486e-06, "loss": 0.4142, "step": 5939 }, { "epoch": 0.72981938813122, "grad_norm": 0.3363428708637857, "learning_rate": 6.191030007715422e-06, "loss": 0.4125, "step": 5940 }, { "epoch": 0.7299422533480772, "grad_norm": 0.3873752743056491, "learning_rate": 6.185823741025995e-06, "loss": 0.4376, "step": 5941 }, { "epoch": 0.7300651185649343, "grad_norm": 0.3488730807744903, "learning_rate": 6.180619095647137e-06, "loss": 0.3816, "step": 5942 }, { "epoch": 0.7301879837817914, "grad_norm": 0.3673650822388114, "learning_rate": 6.175416072536194e-06, "loss": 0.3784, "step": 5943 }, { "epoch": 0.7303108489986485, "grad_norm": 0.3377145353794722, "learning_rate": 6.170214672650236e-06, "loss": 0.3997, "step": 5944 }, { "epoch": 0.7304337142155056, "grad_norm": 0.32484445135105455, "learning_rate": 6.165014896946024e-06, "loss": 0.3177, "step": 5945 }, { "epoch": 0.7305565794323627, "grad_norm": 0.4162706872439664, "learning_rate": 6.159816746380033e-06, "loss": 0.4135, "step": 5946 }, { "epoch": 0.7306794446492199, "grad_norm": 0.4311343049826909, "learning_rate": 6.154620221908414e-06, "loss": 0.3836, "step": 5947 }, { "epoch": 0.7308023098660769, "grad_norm": 0.429144645804844, "learning_rate": 6.149425324487039e-06, "loss": 0.4256, "step": 5948 }, { "epoch": 0.730925175082934, "grad_norm": 0.459599704268188, "learning_rate": 6.144232055071485e-06, "loss": 0.4151, "step": 5949 }, { "epoch": 0.7310480402997911, "grad_norm": 0.3778470193771058, "learning_rate": 6.139040414617006e-06, "loss": 0.3193, "step": 5950 }, { "epoch": 0.7311709055166482, "grad_norm": 0.32378020431774956, "learning_rate": 6.133850404078585e-06, "loss": 0.4098, "step": 5951 }, { "epoch": 0.7312937707335053, "grad_norm": 0.36897346799769354, "learning_rate": 6.128662024410871e-06, "loss": 0.3554, "step": 5952 }, { "epoch": 0.7314166359503624, "grad_norm": 0.3678931284083277, "learning_rate": 6.123475276568257e-06, "loss": 0.4214, "step": 5953 }, { "epoch": 0.7315395011672196, "grad_norm": 0.3547915329843866, "learning_rate": 6.118290161504792e-06, "loss": 0.4154, "step": 5954 }, { "epoch": 0.7316623663840767, "grad_norm": 0.3456483520620347, "learning_rate": 6.113106680174259e-06, "loss": 0.4838, "step": 5955 }, { "epoch": 0.7317852316009338, "grad_norm": 0.35540444029008655, "learning_rate": 6.107924833530107e-06, "loss": 0.4098, "step": 5956 }, { "epoch": 0.7319080968177909, "grad_norm": 0.320015867342296, "learning_rate": 6.102744622525527e-06, "loss": 0.3465, "step": 5957 }, { "epoch": 0.732030962034648, "grad_norm": 0.3792820531718298, "learning_rate": 6.097566048113365e-06, "loss": 0.3642, "step": 5958 }, { "epoch": 0.7321538272515051, "grad_norm": 0.3245238335517669, "learning_rate": 6.092389111246201e-06, "loss": 0.3311, "step": 5959 }, { "epoch": 0.7322766924683622, "grad_norm": 0.3570904849474924, "learning_rate": 6.0872138128762866e-06, "loss": 0.3524, "step": 5960 }, { "epoch": 0.7323995576852194, "grad_norm": 0.3474494936867414, "learning_rate": 6.08204015395559e-06, "loss": 0.3271, "step": 5961 }, { "epoch": 0.7325224229020765, "grad_norm": 0.3969714690762138, "learning_rate": 6.076868135435778e-06, "loss": 0.4063, "step": 5962 }, { "epoch": 0.7326452881189335, "grad_norm": 0.41847163516929525, "learning_rate": 6.071697758268192e-06, "loss": 0.398, "step": 5963 }, { "epoch": 0.7327681533357906, "grad_norm": 0.33831422310874537, "learning_rate": 6.066529023403913e-06, "loss": 0.3186, "step": 5964 }, { "epoch": 0.7328910185526477, "grad_norm": 0.38491298904806936, "learning_rate": 6.061361931793679e-06, "loss": 0.33, "step": 5965 }, { "epoch": 0.7330138837695048, "grad_norm": 0.3660646963985384, "learning_rate": 6.056196484387954e-06, "loss": 0.4427, "step": 5966 }, { "epoch": 0.7331367489863619, "grad_norm": 0.6449612655954563, "learning_rate": 6.051032682136877e-06, "loss": 0.5369, "step": 5967 }, { "epoch": 0.7332596142032191, "grad_norm": 0.3380996332166708, "learning_rate": 6.0458705259903015e-06, "loss": 0.2746, "step": 5968 }, { "epoch": 0.7333824794200762, "grad_norm": 0.41953948315896245, "learning_rate": 6.04071001689778e-06, "loss": 0.3843, "step": 5969 }, { "epoch": 0.7335053446369333, "grad_norm": 0.3799236775343064, "learning_rate": 6.035551155808542e-06, "loss": 0.4742, "step": 5970 }, { "epoch": 0.7336282098537904, "grad_norm": 0.3014101335232219, "learning_rate": 6.0303939436715324e-06, "loss": 0.3325, "step": 5971 }, { "epoch": 0.7337510750706475, "grad_norm": 0.3520170424238807, "learning_rate": 6.025238381435387e-06, "loss": 0.3454, "step": 5972 }, { "epoch": 0.7338739402875046, "grad_norm": 0.34920917858899864, "learning_rate": 6.020084470048444e-06, "loss": 0.3564, "step": 5973 }, { "epoch": 0.7339968055043617, "grad_norm": 0.3368288153116504, "learning_rate": 6.01493221045872e-06, "loss": 0.3954, "step": 5974 }, { "epoch": 0.7341196707212189, "grad_norm": 0.31250678852690766, "learning_rate": 6.0097816036139455e-06, "loss": 0.4207, "step": 5975 }, { "epoch": 0.734242535938076, "grad_norm": 0.4072642902218502, "learning_rate": 6.004632650461542e-06, "loss": 0.3767, "step": 5976 }, { "epoch": 0.734365401154933, "grad_norm": 0.35381604148197554, "learning_rate": 5.9994853519486284e-06, "loss": 0.3909, "step": 5977 }, { "epoch": 0.7344882663717901, "grad_norm": 0.3244314355739053, "learning_rate": 5.994339709022012e-06, "loss": 0.4549, "step": 5978 }, { "epoch": 0.7346111315886472, "grad_norm": 0.4497299783372453, "learning_rate": 5.9891957226282e-06, "loss": 0.3838, "step": 5979 }, { "epoch": 0.7347339968055043, "grad_norm": 0.374525522005313, "learning_rate": 5.984053393713405e-06, "loss": 0.3751, "step": 5980 }, { "epoch": 0.7348568620223614, "grad_norm": 0.338742559976313, "learning_rate": 5.97891272322351e-06, "loss": 0.4208, "step": 5981 }, { "epoch": 0.7349797272392186, "grad_norm": 0.3342072006734691, "learning_rate": 5.973773712104122e-06, "loss": 0.4499, "step": 5982 }, { "epoch": 0.7351025924560757, "grad_norm": 0.3986093308921654, "learning_rate": 5.968636361300512e-06, "loss": 0.3792, "step": 5983 }, { "epoch": 0.7352254576729328, "grad_norm": 0.38844666486437107, "learning_rate": 5.963500671757684e-06, "loss": 0.4326, "step": 5984 }, { "epoch": 0.7353483228897899, "grad_norm": 0.38230821982809177, "learning_rate": 5.958366644420298e-06, "loss": 0.485, "step": 5985 }, { "epoch": 0.735471188106647, "grad_norm": 0.2819054153615162, "learning_rate": 5.9532342802327315e-06, "loss": 0.3881, "step": 5986 }, { "epoch": 0.7355940533235041, "grad_norm": 0.3338922228523045, "learning_rate": 5.948103580139052e-06, "loss": 0.3686, "step": 5987 }, { "epoch": 0.7357169185403613, "grad_norm": 0.3631590490877879, "learning_rate": 5.942974545083013e-06, "loss": 0.3386, "step": 5988 }, { "epoch": 0.7358397837572184, "grad_norm": 0.34503353611866083, "learning_rate": 5.937847176008072e-06, "loss": 0.3258, "step": 5989 }, { "epoch": 0.7359626489740755, "grad_norm": 0.40630585218634074, "learning_rate": 5.9327214738573645e-06, "loss": 0.3258, "step": 5990 }, { "epoch": 0.7360855141909326, "grad_norm": 0.2997231326930532, "learning_rate": 5.927597439573748e-06, "loss": 0.3483, "step": 5991 }, { "epoch": 0.7362083794077896, "grad_norm": 0.3414582766904079, "learning_rate": 5.92247507409974e-06, "loss": 0.3786, "step": 5992 }, { "epoch": 0.7363312446246467, "grad_norm": 0.3582166662973648, "learning_rate": 5.917354378377579e-06, "loss": 0.4058, "step": 5993 }, { "epoch": 0.7364541098415038, "grad_norm": 0.3550334222532756, "learning_rate": 5.912235353349171e-06, "loss": 0.3858, "step": 5994 }, { "epoch": 0.736576975058361, "grad_norm": 0.3355774227365862, "learning_rate": 5.907117999956134e-06, "loss": 0.4189, "step": 5995 }, { "epoch": 0.7366998402752181, "grad_norm": 0.34384954897137654, "learning_rate": 5.9020023191397766e-06, "loss": 0.414, "step": 5996 }, { "epoch": 0.7368227054920752, "grad_norm": 0.37491221160721977, "learning_rate": 5.896888311841084e-06, "loss": 0.3737, "step": 5997 }, { "epoch": 0.7369455707089323, "grad_norm": 0.33357366048323317, "learning_rate": 5.891775979000752e-06, "loss": 0.3528, "step": 5998 }, { "epoch": 0.7370684359257894, "grad_norm": 0.3574009017694032, "learning_rate": 5.886665321559158e-06, "loss": 0.4489, "step": 5999 }, { "epoch": 0.7371913011426465, "grad_norm": 0.30599920420570814, "learning_rate": 5.881556340456382e-06, "loss": 0.3899, "step": 6000 }, { "epoch": 0.7373141663595036, "grad_norm": 0.3874012969064186, "learning_rate": 5.876449036632177e-06, "loss": 0.4329, "step": 6001 }, { "epoch": 0.7374370315763608, "grad_norm": 0.3082363218633781, "learning_rate": 5.871343411026004e-06, "loss": 0.4452, "step": 6002 }, { "epoch": 0.7375598967932179, "grad_norm": 0.4227058906338993, "learning_rate": 5.866239464577008e-06, "loss": 0.4028, "step": 6003 }, { "epoch": 0.737682762010075, "grad_norm": 0.3527443858183526, "learning_rate": 5.8611371982240344e-06, "loss": 0.3644, "step": 6004 }, { "epoch": 0.7378056272269321, "grad_norm": 0.37485734612650157, "learning_rate": 5.856036612905598e-06, "loss": 0.4183, "step": 6005 }, { "epoch": 0.7379284924437891, "grad_norm": 0.38280543635042347, "learning_rate": 5.850937709559929e-06, "loss": 0.37, "step": 6006 }, { "epoch": 0.7380513576606462, "grad_norm": 0.35093696315279727, "learning_rate": 5.845840489124939e-06, "loss": 0.3845, "step": 6007 }, { "epoch": 0.7381742228775033, "grad_norm": 0.4012782634286917, "learning_rate": 5.840744952538218e-06, "loss": 0.3348, "step": 6008 }, { "epoch": 0.7382970880943605, "grad_norm": 0.3892576408014288, "learning_rate": 5.835651100737064e-06, "loss": 0.4014, "step": 6009 }, { "epoch": 0.7384199533112176, "grad_norm": 0.3981973552718461, "learning_rate": 5.8305589346584555e-06, "loss": 0.3767, "step": 6010 }, { "epoch": 0.7385428185280747, "grad_norm": 0.3372296623599361, "learning_rate": 5.825468455239073e-06, "loss": 0.469, "step": 6011 }, { "epoch": 0.7386656837449318, "grad_norm": 0.408181638931458, "learning_rate": 5.820379663415262e-06, "loss": 0.3435, "step": 6012 }, { "epoch": 0.7387885489617889, "grad_norm": 0.32652934546170204, "learning_rate": 5.81529256012308e-06, "loss": 0.377, "step": 6013 }, { "epoch": 0.738911414178646, "grad_norm": 0.36300340344850474, "learning_rate": 5.810207146298273e-06, "loss": 0.4142, "step": 6014 }, { "epoch": 0.7390342793955031, "grad_norm": 0.29621433861496765, "learning_rate": 5.8051234228762574e-06, "loss": 0.3331, "step": 6015 }, { "epoch": 0.7391571446123603, "grad_norm": 0.38148823468472365, "learning_rate": 5.800041390792163e-06, "loss": 0.4626, "step": 6016 }, { "epoch": 0.7392800098292174, "grad_norm": 0.34540759341779875, "learning_rate": 5.79496105098078e-06, "loss": 0.4483, "step": 6017 }, { "epoch": 0.7394028750460745, "grad_norm": 0.36245113520217803, "learning_rate": 5.789882404376626e-06, "loss": 0.3283, "step": 6018 }, { "epoch": 0.7395257402629316, "grad_norm": 0.43095166697396203, "learning_rate": 5.7848054519138686e-06, "loss": 0.44, "step": 6019 }, { "epoch": 0.7396486054797887, "grad_norm": 0.35729695307581893, "learning_rate": 5.77973019452639e-06, "loss": 0.3561, "step": 6020 }, { "epoch": 0.7397714706966457, "grad_norm": 0.45701379996716435, "learning_rate": 5.7746566331477375e-06, "loss": 0.4046, "step": 6021 }, { "epoch": 0.7398943359135028, "grad_norm": 0.3479150462840983, "learning_rate": 5.769584768711178e-06, "loss": 0.3748, "step": 6022 }, { "epoch": 0.74001720113036, "grad_norm": 0.3431460565077398, "learning_rate": 5.764514602149634e-06, "loss": 0.433, "step": 6023 }, { "epoch": 0.7401400663472171, "grad_norm": 0.28681609255786733, "learning_rate": 5.7594461343957416e-06, "loss": 0.3119, "step": 6024 }, { "epoch": 0.7402629315640742, "grad_norm": 0.3677327087481217, "learning_rate": 5.7543793663817995e-06, "loss": 0.4434, "step": 6025 }, { "epoch": 0.7403857967809313, "grad_norm": 0.4041499084566263, "learning_rate": 5.749314299039813e-06, "loss": 0.4272, "step": 6026 }, { "epoch": 0.7405086619977884, "grad_norm": 0.390595997059998, "learning_rate": 5.744250933301473e-06, "loss": 0.3925, "step": 6027 }, { "epoch": 0.7406315272146455, "grad_norm": 0.2995560973915198, "learning_rate": 5.739189270098137e-06, "loss": 0.3958, "step": 6028 }, { "epoch": 0.7407543924315027, "grad_norm": 0.35678342876230074, "learning_rate": 5.734129310360889e-06, "loss": 0.3528, "step": 6029 }, { "epoch": 0.7408772576483598, "grad_norm": 0.43280861777592006, "learning_rate": 5.729071055020456e-06, "loss": 0.3883, "step": 6030 }, { "epoch": 0.7410001228652169, "grad_norm": 0.375535819070019, "learning_rate": 5.724014505007285e-06, "loss": 0.3618, "step": 6031 }, { "epoch": 0.741122988082074, "grad_norm": 0.3578469363514859, "learning_rate": 5.7189596612514814e-06, "loss": 0.3921, "step": 6032 }, { "epoch": 0.7412458532989311, "grad_norm": 0.36660551842559763, "learning_rate": 5.71390652468286e-06, "loss": 0.4103, "step": 6033 }, { "epoch": 0.7413687185157882, "grad_norm": 0.37870664482645167, "learning_rate": 5.7088550962309175e-06, "loss": 0.3551, "step": 6034 }, { "epoch": 0.7414915837326453, "grad_norm": 0.29918376301278227, "learning_rate": 5.703805376824817e-06, "loss": 0.3529, "step": 6035 }, { "epoch": 0.7416144489495023, "grad_norm": 0.42393416584630983, "learning_rate": 5.69875736739343e-06, "loss": 0.4288, "step": 6036 }, { "epoch": 0.7417373141663595, "grad_norm": 0.38190827255085336, "learning_rate": 5.693711068865307e-06, "loss": 0.4397, "step": 6037 }, { "epoch": 0.7418601793832166, "grad_norm": 0.32835988490359264, "learning_rate": 5.688666482168682e-06, "loss": 0.374, "step": 6038 }, { "epoch": 0.7419830446000737, "grad_norm": 0.2967231737285458, "learning_rate": 5.683623608231467e-06, "loss": 0.3846, "step": 6039 }, { "epoch": 0.7421059098169308, "grad_norm": 0.3716993517948996, "learning_rate": 5.678582447981271e-06, "loss": 0.4215, "step": 6040 }, { "epoch": 0.7422287750337879, "grad_norm": 0.3591847362248036, "learning_rate": 5.673543002345383e-06, "loss": 0.4028, "step": 6041 }, { "epoch": 0.742351640250645, "grad_norm": 0.4436866265582736, "learning_rate": 5.66850527225078e-06, "loss": 0.4611, "step": 6042 }, { "epoch": 0.7424745054675022, "grad_norm": 0.4300369593376293, "learning_rate": 5.663469258624109e-06, "loss": 0.4639, "step": 6043 }, { "epoch": 0.7425973706843593, "grad_norm": 0.3002097351223872, "learning_rate": 5.658434962391719e-06, "loss": 0.3863, "step": 6044 }, { "epoch": 0.7427202359012164, "grad_norm": 0.36073919322022924, "learning_rate": 5.653402384479642e-06, "loss": 0.3946, "step": 6045 }, { "epoch": 0.7428431011180735, "grad_norm": 0.3707749397298706, "learning_rate": 5.648371525813575e-06, "loss": 0.4362, "step": 6046 }, { "epoch": 0.7429659663349306, "grad_norm": 0.3754375032838958, "learning_rate": 5.6433423873189184e-06, "loss": 0.3619, "step": 6047 }, { "epoch": 0.7430888315517877, "grad_norm": 0.3699771879071182, "learning_rate": 5.638314969920749e-06, "loss": 0.3433, "step": 6048 }, { "epoch": 0.7432116967686448, "grad_norm": 0.5512550717807956, "learning_rate": 5.633289274543835e-06, "loss": 0.419, "step": 6049 }, { "epoch": 0.7433345619855019, "grad_norm": 0.3183996282886081, "learning_rate": 5.628265302112607e-06, "loss": 0.3462, "step": 6050 }, { "epoch": 0.743457427202359, "grad_norm": 0.36519477535746214, "learning_rate": 5.623243053551199e-06, "loss": 0.395, "step": 6051 }, { "epoch": 0.7435802924192161, "grad_norm": 0.2936392489833197, "learning_rate": 5.618222529783428e-06, "loss": 0.3581, "step": 6052 }, { "epoch": 0.7437031576360732, "grad_norm": 0.31117293011797365, "learning_rate": 5.613203731732772e-06, "loss": 0.4064, "step": 6053 }, { "epoch": 0.7438260228529303, "grad_norm": 0.3944362900918956, "learning_rate": 5.608186660322421e-06, "loss": 0.4191, "step": 6054 }, { "epoch": 0.7439488880697874, "grad_norm": 0.35836898317138877, "learning_rate": 5.603171316475213e-06, "loss": 0.4059, "step": 6055 }, { "epoch": 0.7440717532866445, "grad_norm": 0.4152948841220696, "learning_rate": 5.598157701113714e-06, "loss": 0.3883, "step": 6056 }, { "epoch": 0.7441946185035017, "grad_norm": 0.3674152207112403, "learning_rate": 5.593145815160127e-06, "loss": 0.3983, "step": 6057 }, { "epoch": 0.7443174837203588, "grad_norm": 0.3083822247901158, "learning_rate": 5.588135659536366e-06, "loss": 0.4743, "step": 6058 }, { "epoch": 0.7444403489372159, "grad_norm": 0.3559701425120208, "learning_rate": 5.583127235164003e-06, "loss": 0.3843, "step": 6059 }, { "epoch": 0.744563214154073, "grad_norm": 0.4562214236500804, "learning_rate": 5.578120542964324e-06, "loss": 0.4236, "step": 6060 }, { "epoch": 0.7446860793709301, "grad_norm": 0.44489635641211744, "learning_rate": 5.573115583858262e-06, "loss": 0.3998, "step": 6061 }, { "epoch": 0.7448089445877872, "grad_norm": 0.3811482038455336, "learning_rate": 5.568112358766461e-06, "loss": 0.3923, "step": 6062 }, { "epoch": 0.7449318098046444, "grad_norm": 0.36593560409161047, "learning_rate": 5.563110868609215e-06, "loss": 0.4344, "step": 6063 }, { "epoch": 0.7450546750215015, "grad_norm": 0.33561597960241296, "learning_rate": 5.5581111143065265e-06, "loss": 0.3694, "step": 6064 }, { "epoch": 0.7451775402383585, "grad_norm": 0.39501733243689596, "learning_rate": 5.55311309677807e-06, "loss": 0.5, "step": 6065 }, { "epoch": 0.7453004054552156, "grad_norm": 0.2886124813680191, "learning_rate": 5.548116816943191e-06, "loss": 0.3458, "step": 6066 }, { "epoch": 0.7454232706720727, "grad_norm": 0.28425293339285973, "learning_rate": 5.543122275720922e-06, "loss": 0.3254, "step": 6067 }, { "epoch": 0.7455461358889298, "grad_norm": 0.330864487635686, "learning_rate": 5.538129474029984e-06, "loss": 0.431, "step": 6068 }, { "epoch": 0.7456690011057869, "grad_norm": 0.4027695834555939, "learning_rate": 5.533138412788771e-06, "loss": 0.3702, "step": 6069 }, { "epoch": 0.745791866322644, "grad_norm": 0.4272711979127558, "learning_rate": 5.528149092915346e-06, "loss": 0.4209, "step": 6070 }, { "epoch": 0.7459147315395012, "grad_norm": 0.3968203844134727, "learning_rate": 5.523161515327469e-06, "loss": 0.4381, "step": 6071 }, { "epoch": 0.7460375967563583, "grad_norm": 0.3487084698403199, "learning_rate": 5.518175680942577e-06, "loss": 0.3153, "step": 6072 }, { "epoch": 0.7461604619732154, "grad_norm": 0.3888046731200097, "learning_rate": 5.513191590677772e-06, "loss": 0.3868, "step": 6073 }, { "epoch": 0.7462833271900725, "grad_norm": 0.38276836169272144, "learning_rate": 5.508209245449849e-06, "loss": 0.3574, "step": 6074 }, { "epoch": 0.7464061924069296, "grad_norm": 0.3424048018875607, "learning_rate": 5.503228646175278e-06, "loss": 0.4305, "step": 6075 }, { "epoch": 0.7465290576237867, "grad_norm": 0.36380279932054566, "learning_rate": 5.498249793770216e-06, "loss": 0.3911, "step": 6076 }, { "epoch": 0.7466519228406439, "grad_norm": 0.2748656888464537, "learning_rate": 5.493272689150478e-06, "loss": 0.3626, "step": 6077 }, { "epoch": 0.746774788057501, "grad_norm": 0.3656867536642334, "learning_rate": 5.4882973332315746e-06, "loss": 0.4172, "step": 6078 }, { "epoch": 0.746897653274358, "grad_norm": 0.3432981849355081, "learning_rate": 5.4833237269286915e-06, "loss": 0.2696, "step": 6079 }, { "epoch": 0.7470205184912151, "grad_norm": 0.36042236567490366, "learning_rate": 5.478351871156696e-06, "loss": 0.4275, "step": 6080 }, { "epoch": 0.7471433837080722, "grad_norm": 0.4495920164934837, "learning_rate": 5.473381766830119e-06, "loss": 0.3353, "step": 6081 }, { "epoch": 0.7472662489249293, "grad_norm": 0.3318699155363495, "learning_rate": 5.468413414863184e-06, "loss": 0.5856, "step": 6082 }, { "epoch": 0.7473891141417864, "grad_norm": 0.3638179791188276, "learning_rate": 5.463446816169792e-06, "loss": 0.4031, "step": 6083 }, { "epoch": 0.7475119793586436, "grad_norm": 0.3899692108018746, "learning_rate": 5.458481971663505e-06, "loss": 0.3807, "step": 6084 }, { "epoch": 0.7476348445755007, "grad_norm": 0.3900318758824112, "learning_rate": 5.453518882257586e-06, "loss": 0.4531, "step": 6085 }, { "epoch": 0.7477577097923578, "grad_norm": 0.3569294436516303, "learning_rate": 5.448557548864948e-06, "loss": 0.3464, "step": 6086 }, { "epoch": 0.7478805750092149, "grad_norm": 0.42712042877829787, "learning_rate": 5.4435979723982145e-06, "loss": 0.4477, "step": 6087 }, { "epoch": 0.748003440226072, "grad_norm": 0.38339915300753163, "learning_rate": 5.438640153769654e-06, "loss": 0.4251, "step": 6088 }, { "epoch": 0.7481263054429291, "grad_norm": 0.3897532728166376, "learning_rate": 5.433684093891231e-06, "loss": 0.3874, "step": 6089 }, { "epoch": 0.7482491706597862, "grad_norm": 0.29776718340117864, "learning_rate": 5.428729793674582e-06, "loss": 0.4389, "step": 6090 }, { "epoch": 0.7483720358766434, "grad_norm": 0.35455856860158147, "learning_rate": 5.423777254031013e-06, "loss": 0.3717, "step": 6091 }, { "epoch": 0.7484949010935005, "grad_norm": 0.4743450118724182, "learning_rate": 5.4188264758715165e-06, "loss": 0.4251, "step": 6092 }, { "epoch": 0.7486177663103576, "grad_norm": 0.3940224840150728, "learning_rate": 5.4138774601067456e-06, "loss": 0.3626, "step": 6093 }, { "epoch": 0.7487406315272146, "grad_norm": 0.43883631218206864, "learning_rate": 5.408930207647057e-06, "loss": 0.4322, "step": 6094 }, { "epoch": 0.7488634967440717, "grad_norm": 0.42534048240720096, "learning_rate": 5.403984719402452e-06, "loss": 0.4489, "step": 6095 }, { "epoch": 0.7489863619609288, "grad_norm": 0.32836376854707144, "learning_rate": 5.399040996282631e-06, "loss": 0.4228, "step": 6096 }, { "epoch": 0.7491092271777859, "grad_norm": 0.285030590990854, "learning_rate": 5.394099039196947e-06, "loss": 0.4014, "step": 6097 }, { "epoch": 0.7492320923946431, "grad_norm": 0.3775247641414592, "learning_rate": 5.38915884905445e-06, "loss": 0.3866, "step": 6098 }, { "epoch": 0.7493549576115002, "grad_norm": 0.3863791671744741, "learning_rate": 5.384220426763854e-06, "loss": 0.4234, "step": 6099 }, { "epoch": 0.7494778228283573, "grad_norm": 0.38897371416606435, "learning_rate": 5.379283773233556e-06, "loss": 0.447, "step": 6100 }, { "epoch": 0.7496006880452144, "grad_norm": 0.2981549612255735, "learning_rate": 5.374348889371608e-06, "loss": 0.3767, "step": 6101 }, { "epoch": 0.7497235532620715, "grad_norm": 0.3341558121380502, "learning_rate": 5.369415776085759e-06, "loss": 0.396, "step": 6102 }, { "epoch": 0.7498464184789286, "grad_norm": 0.7626127444592439, "learning_rate": 5.364484434283427e-06, "loss": 0.5366, "step": 6103 }, { "epoch": 0.7499692836957857, "grad_norm": 0.38347547132766324, "learning_rate": 5.3595548648716884e-06, "loss": 0.4248, "step": 6104 }, { "epoch": 0.7500921489126429, "grad_norm": 0.36438034207543823, "learning_rate": 5.354627068757311e-06, "loss": 0.311, "step": 6105 }, { "epoch": 0.7502150141295, "grad_norm": 0.44376473647330883, "learning_rate": 5.349701046846734e-06, "loss": 0.4397, "step": 6106 }, { "epoch": 0.7503378793463571, "grad_norm": 0.29073032677140614, "learning_rate": 5.344776800046068e-06, "loss": 0.3775, "step": 6107 }, { "epoch": 0.7504607445632141, "grad_norm": 0.38406192899369507, "learning_rate": 5.33985432926109e-06, "loss": 0.4114, "step": 6108 }, { "epoch": 0.7505836097800712, "grad_norm": 0.2986014140405129, "learning_rate": 5.334933635397261e-06, "loss": 0.4333, "step": 6109 }, { "epoch": 0.7507064749969283, "grad_norm": 0.33491315763159274, "learning_rate": 5.330014719359712e-06, "loss": 0.4145, "step": 6110 }, { "epoch": 0.7508293402137854, "grad_norm": 0.3546998737642342, "learning_rate": 5.325097582053239e-06, "loss": 0.3745, "step": 6111 }, { "epoch": 0.7509522054306426, "grad_norm": 0.311651598584578, "learning_rate": 5.320182224382322e-06, "loss": 0.476, "step": 6112 }, { "epoch": 0.7510750706474997, "grad_norm": 0.3410798983069756, "learning_rate": 5.315268647251109e-06, "loss": 0.3517, "step": 6113 }, { "epoch": 0.7511979358643568, "grad_norm": 0.42382951574035665, "learning_rate": 5.310356851563427e-06, "loss": 0.4051, "step": 6114 }, { "epoch": 0.7513208010812139, "grad_norm": 0.447768018138447, "learning_rate": 5.305446838222757e-06, "loss": 0.4036, "step": 6115 }, { "epoch": 0.751443666298071, "grad_norm": 0.4784578762336049, "learning_rate": 5.300538608132269e-06, "loss": 0.3605, "step": 6116 }, { "epoch": 0.7515665315149281, "grad_norm": 0.33611288207734, "learning_rate": 5.295632162194806e-06, "loss": 0.3922, "step": 6117 }, { "epoch": 0.7516893967317853, "grad_norm": 0.34526991349748815, "learning_rate": 5.290727501312867e-06, "loss": 0.2935, "step": 6118 }, { "epoch": 0.7518122619486424, "grad_norm": 0.3153484049521922, "learning_rate": 5.285824626388641e-06, "loss": 0.422, "step": 6119 }, { "epoch": 0.7519351271654995, "grad_norm": 0.3736948860885919, "learning_rate": 5.280923538323967e-06, "loss": 0.4252, "step": 6120 }, { "epoch": 0.7520579923823566, "grad_norm": 0.34110471577060836, "learning_rate": 5.276024238020389e-06, "loss": 0.3036, "step": 6121 }, { "epoch": 0.7521808575992137, "grad_norm": 0.35130657759236467, "learning_rate": 5.2711267263790845e-06, "loss": 0.3738, "step": 6122 }, { "epoch": 0.7523037228160707, "grad_norm": 0.36762089376476675, "learning_rate": 5.2662310043009295e-06, "loss": 0.3822, "step": 6123 }, { "epoch": 0.7524265880329278, "grad_norm": 0.3983573158976961, "learning_rate": 5.2613370726864445e-06, "loss": 0.3381, "step": 6124 }, { "epoch": 0.752549453249785, "grad_norm": 0.3408143808153673, "learning_rate": 5.256444932435859e-06, "loss": 0.4312, "step": 6125 }, { "epoch": 0.7526723184666421, "grad_norm": 0.33872198941580073, "learning_rate": 5.251554584449034e-06, "loss": 0.423, "step": 6126 }, { "epoch": 0.7527951836834992, "grad_norm": 0.45136561593914726, "learning_rate": 5.246666029625527e-06, "loss": 0.3478, "step": 6127 }, { "epoch": 0.7529180489003563, "grad_norm": 0.3545921695846939, "learning_rate": 5.241779268864546e-06, "loss": 0.3965, "step": 6128 }, { "epoch": 0.7530409141172134, "grad_norm": 0.37923997349124206, "learning_rate": 5.2368943030649835e-06, "loss": 0.4168, "step": 6129 }, { "epoch": 0.7531637793340705, "grad_norm": 0.3475050313775172, "learning_rate": 5.2320111331254054e-06, "loss": 0.38, "step": 6130 }, { "epoch": 0.7532866445509276, "grad_norm": 0.3449229964074002, "learning_rate": 5.227129759944024e-06, "loss": 0.3759, "step": 6131 }, { "epoch": 0.7534095097677848, "grad_norm": 0.37559042177597646, "learning_rate": 5.2222501844187465e-06, "loss": 0.362, "step": 6132 }, { "epoch": 0.7535323749846419, "grad_norm": 0.4251871184135119, "learning_rate": 5.217372407447135e-06, "loss": 0.3979, "step": 6133 }, { "epoch": 0.753655240201499, "grad_norm": 0.30001517380366804, "learning_rate": 5.212496429926432e-06, "loss": 0.3215, "step": 6134 }, { "epoch": 0.7537781054183561, "grad_norm": 0.43215339861948576, "learning_rate": 5.2076222527535296e-06, "loss": 0.4681, "step": 6135 }, { "epoch": 0.7539009706352132, "grad_norm": 0.3488921497751562, "learning_rate": 5.202749876825011e-06, "loss": 0.3636, "step": 6136 }, { "epoch": 0.7540238358520703, "grad_norm": 0.3147529907236149, "learning_rate": 5.197879303037119e-06, "loss": 0.4355, "step": 6137 }, { "epoch": 0.7541467010689273, "grad_norm": 0.3712574433419498, "learning_rate": 5.193010532285755e-06, "loss": 0.3845, "step": 6138 }, { "epoch": 0.7542695662857845, "grad_norm": 0.30786107573549437, "learning_rate": 5.188143565466503e-06, "loss": 0.298, "step": 6139 }, { "epoch": 0.7543924315026416, "grad_norm": 0.4199218345005405, "learning_rate": 5.183278403474611e-06, "loss": 0.4986, "step": 6140 }, { "epoch": 0.7545152967194987, "grad_norm": 0.44585325361686595, "learning_rate": 5.1784150472049975e-06, "loss": 0.4313, "step": 6141 }, { "epoch": 0.7546381619363558, "grad_norm": 0.4521901209367899, "learning_rate": 5.173553497552235e-06, "loss": 0.4467, "step": 6142 }, { "epoch": 0.7547610271532129, "grad_norm": 0.430395068491835, "learning_rate": 5.168693755410581e-06, "loss": 0.3363, "step": 6143 }, { "epoch": 0.75488389237007, "grad_norm": 0.34102517905720453, "learning_rate": 5.163835821673952e-06, "loss": 0.3604, "step": 6144 }, { "epoch": 0.7550067575869271, "grad_norm": 0.3236119111832728, "learning_rate": 5.158979697235938e-06, "loss": 0.4255, "step": 6145 }, { "epoch": 0.7551296228037843, "grad_norm": 0.3292853561740901, "learning_rate": 5.154125382989783e-06, "loss": 0.4395, "step": 6146 }, { "epoch": 0.7552524880206414, "grad_norm": 0.3153474158828406, "learning_rate": 5.149272879828411e-06, "loss": 0.3409, "step": 6147 }, { "epoch": 0.7553753532374985, "grad_norm": 0.35662537729199995, "learning_rate": 5.144422188644414e-06, "loss": 0.4407, "step": 6148 }, { "epoch": 0.7554982184543556, "grad_norm": 0.3220495253704688, "learning_rate": 5.139573310330035e-06, "loss": 0.3922, "step": 6149 }, { "epoch": 0.7556210836712127, "grad_norm": 0.5315596950617989, "learning_rate": 5.134726245777202e-06, "loss": 0.4116, "step": 6150 }, { "epoch": 0.7557439488880698, "grad_norm": 0.391370667015126, "learning_rate": 5.1298809958774884e-06, "loss": 0.354, "step": 6151 }, { "epoch": 0.7558668141049268, "grad_norm": 0.27864350967312723, "learning_rate": 5.125037561522166e-06, "loss": 0.3926, "step": 6152 }, { "epoch": 0.755989679321784, "grad_norm": 0.4339329763214615, "learning_rate": 5.120195943602138e-06, "loss": 0.4391, "step": 6153 }, { "epoch": 0.7561125445386411, "grad_norm": 0.3491322137544811, "learning_rate": 5.115356143007993e-06, "loss": 0.3733, "step": 6154 }, { "epoch": 0.7562354097554982, "grad_norm": 0.3206148323289685, "learning_rate": 5.110518160629987e-06, "loss": 0.3824, "step": 6155 }, { "epoch": 0.7563582749723553, "grad_norm": 0.27199920134929706, "learning_rate": 5.105681997358023e-06, "loss": 0.4306, "step": 6156 }, { "epoch": 0.7564811401892124, "grad_norm": 0.34197078787685886, "learning_rate": 5.100847654081695e-06, "loss": 0.3932, "step": 6157 }, { "epoch": 0.7566040054060695, "grad_norm": 0.3145381072140107, "learning_rate": 5.096015131690233e-06, "loss": 0.4202, "step": 6158 }, { "epoch": 0.7567268706229267, "grad_norm": 0.2616824786972446, "learning_rate": 5.091184431072567e-06, "loss": 0.3739, "step": 6159 }, { "epoch": 0.7568497358397838, "grad_norm": 0.4728272884008911, "learning_rate": 5.086355553117259e-06, "loss": 0.4505, "step": 6160 }, { "epoch": 0.7569726010566409, "grad_norm": 0.3339786584098611, "learning_rate": 5.08152849871256e-06, "loss": 0.3312, "step": 6161 }, { "epoch": 0.757095466273498, "grad_norm": 0.3497142027566144, "learning_rate": 5.07670326874636e-06, "loss": 0.3799, "step": 6162 }, { "epoch": 0.7572183314903551, "grad_norm": 0.31136946468054116, "learning_rate": 5.07187986410625e-06, "loss": 0.4143, "step": 6163 }, { "epoch": 0.7573411967072122, "grad_norm": 0.43604338985065383, "learning_rate": 5.067058285679448e-06, "loss": 0.3945, "step": 6164 }, { "epoch": 0.7574640619240693, "grad_norm": 0.34317364634280173, "learning_rate": 5.06223853435286e-06, "loss": 0.339, "step": 6165 }, { "epoch": 0.7575869271409265, "grad_norm": 0.355260560510427, "learning_rate": 5.057420611013041e-06, "loss": 0.4394, "step": 6166 }, { "epoch": 0.7577097923577835, "grad_norm": 0.36889047234757427, "learning_rate": 5.052604516546221e-06, "loss": 0.3577, "step": 6167 }, { "epoch": 0.7578326575746406, "grad_norm": 0.4722455438000802, "learning_rate": 5.047790251838293e-06, "loss": 0.4218, "step": 6168 }, { "epoch": 0.7579555227914977, "grad_norm": 0.39100635641785636, "learning_rate": 5.042977817774802e-06, "loss": 0.3438, "step": 6169 }, { "epoch": 0.7580783880083548, "grad_norm": 0.3997360111018277, "learning_rate": 5.038167215240967e-06, "loss": 0.3794, "step": 6170 }, { "epoch": 0.7582012532252119, "grad_norm": 0.44904096977349744, "learning_rate": 5.033358445121669e-06, "loss": 0.3728, "step": 6171 }, { "epoch": 0.758324118442069, "grad_norm": 0.3496190600602258, "learning_rate": 5.028551508301453e-06, "loss": 0.3623, "step": 6172 }, { "epoch": 0.7584469836589262, "grad_norm": 0.26946101197470096, "learning_rate": 5.0237464056645155e-06, "loss": 0.4007, "step": 6173 }, { "epoch": 0.7585698488757833, "grad_norm": 0.3383482945032429, "learning_rate": 5.0189431380947295e-06, "loss": 0.3974, "step": 6174 }, { "epoch": 0.7586927140926404, "grad_norm": 0.2949422164488736, "learning_rate": 5.014141706475626e-06, "loss": 0.368, "step": 6175 }, { "epoch": 0.7588155793094975, "grad_norm": 0.3938519360642704, "learning_rate": 5.009342111690393e-06, "loss": 0.3777, "step": 6176 }, { "epoch": 0.7589384445263546, "grad_norm": 0.31977834318356513, "learning_rate": 5.0045443546218855e-06, "loss": 0.444, "step": 6177 }, { "epoch": 0.7590613097432117, "grad_norm": 0.2974932909930261, "learning_rate": 4.999748436152621e-06, "loss": 0.3397, "step": 6178 }, { "epoch": 0.7591841749600688, "grad_norm": 0.4079214392452578, "learning_rate": 4.9949543571647834e-06, "loss": 0.332, "step": 6179 }, { "epoch": 0.759307040176926, "grad_norm": 0.36710982597816233, "learning_rate": 4.9901621185402005e-06, "loss": 0.3702, "step": 6180 }, { "epoch": 0.759429905393783, "grad_norm": 0.3245020827421684, "learning_rate": 4.985371721160381e-06, "loss": 0.376, "step": 6181 }, { "epoch": 0.7595527706106401, "grad_norm": 0.3760681002079718, "learning_rate": 4.980583165906486e-06, "loss": 0.4362, "step": 6182 }, { "epoch": 0.7596756358274972, "grad_norm": 0.3209506389208336, "learning_rate": 4.9757964536593444e-06, "loss": 0.3818, "step": 6183 }, { "epoch": 0.7597985010443543, "grad_norm": 0.3392323862012653, "learning_rate": 4.971011585299431e-06, "loss": 0.398, "step": 6184 }, { "epoch": 0.7599213662612114, "grad_norm": 0.5231686375154018, "learning_rate": 4.966228561706895e-06, "loss": 0.4873, "step": 6185 }, { "epoch": 0.7600442314780685, "grad_norm": 0.38258812632145744, "learning_rate": 4.9614473837615505e-06, "loss": 0.3671, "step": 6186 }, { "epoch": 0.7601670966949257, "grad_norm": 0.3301188805494917, "learning_rate": 4.956668052342852e-06, "loss": 0.3413, "step": 6187 }, { "epoch": 0.7602899619117828, "grad_norm": 0.31850554126897285, "learning_rate": 4.951890568329937e-06, "loss": 0.3629, "step": 6188 }, { "epoch": 0.7604128271286399, "grad_norm": 0.3635101975647412, "learning_rate": 4.947114932601577e-06, "loss": 0.3813, "step": 6189 }, { "epoch": 0.760535692345497, "grad_norm": 0.33324025691368736, "learning_rate": 4.94234114603624e-06, "loss": 0.4008, "step": 6190 }, { "epoch": 0.7606585575623541, "grad_norm": 0.3443243853234103, "learning_rate": 4.937569209512019e-06, "loss": 0.4232, "step": 6191 }, { "epoch": 0.7607814227792112, "grad_norm": 0.3898139946469685, "learning_rate": 4.9327991239066885e-06, "loss": 0.4252, "step": 6192 }, { "epoch": 0.7609042879960684, "grad_norm": 0.3218485664044362, "learning_rate": 4.928030890097666e-06, "loss": 0.3332, "step": 6193 }, { "epoch": 0.7610271532129255, "grad_norm": 0.38669023309704986, "learning_rate": 4.923264508962044e-06, "loss": 0.4845, "step": 6194 }, { "epoch": 0.7611500184297826, "grad_norm": 0.3639845580166072, "learning_rate": 4.91849998137657e-06, "loss": 0.4141, "step": 6195 }, { "epoch": 0.7612728836466396, "grad_norm": 0.3433477893220173, "learning_rate": 4.9137373082176336e-06, "loss": 0.4819, "step": 6196 }, { "epoch": 0.7613957488634967, "grad_norm": 0.367451041478638, "learning_rate": 4.908976490361316e-06, "loss": 0.44, "step": 6197 }, { "epoch": 0.7615186140803538, "grad_norm": 0.3927973757837423, "learning_rate": 4.904217528683327e-06, "loss": 0.4173, "step": 6198 }, { "epoch": 0.7616414792972109, "grad_norm": 0.35600756746078205, "learning_rate": 4.899460424059056e-06, "loss": 0.3512, "step": 6199 }, { "epoch": 0.761764344514068, "grad_norm": 0.39917187219441824, "learning_rate": 4.894705177363523e-06, "loss": 0.4132, "step": 6200 }, { "epoch": 0.7618872097309252, "grad_norm": 0.4021057638044734, "learning_rate": 4.88995178947145e-06, "loss": 0.3768, "step": 6201 }, { "epoch": 0.7620100749477823, "grad_norm": 0.4273716977412925, "learning_rate": 4.885200261257172e-06, "loss": 0.4037, "step": 6202 }, { "epoch": 0.7621329401646394, "grad_norm": 0.3675841796965548, "learning_rate": 4.880450593594717e-06, "loss": 0.4078, "step": 6203 }, { "epoch": 0.7622558053814965, "grad_norm": 0.35379091719680283, "learning_rate": 4.87570278735774e-06, "loss": 0.3469, "step": 6204 }, { "epoch": 0.7623786705983536, "grad_norm": 0.33609166271046037, "learning_rate": 4.870956843419579e-06, "loss": 0.3515, "step": 6205 }, { "epoch": 0.7625015358152107, "grad_norm": 0.3681421178053491, "learning_rate": 4.866212762653221e-06, "loss": 0.341, "step": 6206 }, { "epoch": 0.7626244010320679, "grad_norm": 0.40804324225184707, "learning_rate": 4.861470545931302e-06, "loss": 0.3868, "step": 6207 }, { "epoch": 0.762747266248925, "grad_norm": 0.3125684259216847, "learning_rate": 4.856730194126124e-06, "loss": 0.3907, "step": 6208 }, { "epoch": 0.7628701314657821, "grad_norm": 0.3711485321989131, "learning_rate": 4.851991708109646e-06, "loss": 0.3664, "step": 6209 }, { "epoch": 0.7629929966826391, "grad_norm": 0.34417586700958674, "learning_rate": 4.8472550887534865e-06, "loss": 0.3359, "step": 6210 }, { "epoch": 0.7631158618994962, "grad_norm": 0.37745273866834345, "learning_rate": 4.842520336928904e-06, "loss": 0.4884, "step": 6211 }, { "epoch": 0.7632387271163533, "grad_norm": 0.34519643507775044, "learning_rate": 4.837787453506833e-06, "loss": 0.4115, "step": 6212 }, { "epoch": 0.7633615923332104, "grad_norm": 0.37343781759554123, "learning_rate": 4.83305643935786e-06, "loss": 0.4176, "step": 6213 }, { "epoch": 0.7634844575500676, "grad_norm": 0.33217327059900553, "learning_rate": 4.828327295352217e-06, "loss": 0.3632, "step": 6214 }, { "epoch": 0.7636073227669247, "grad_norm": 0.3593581406778232, "learning_rate": 4.8236000223598045e-06, "loss": 0.3365, "step": 6215 }, { "epoch": 0.7637301879837818, "grad_norm": 0.338099121438429, "learning_rate": 4.8188746212501634e-06, "loss": 0.3616, "step": 6216 }, { "epoch": 0.7638530532006389, "grad_norm": 0.4342543346610297, "learning_rate": 4.814151092892518e-06, "loss": 0.4723, "step": 6217 }, { "epoch": 0.763975918417496, "grad_norm": 0.31190545688081717, "learning_rate": 4.809429438155717e-06, "loss": 0.3986, "step": 6218 }, { "epoch": 0.7640987836343531, "grad_norm": 0.35025410829617987, "learning_rate": 4.804709657908283e-06, "loss": 0.4499, "step": 6219 }, { "epoch": 0.7642216488512102, "grad_norm": 0.4156540556758283, "learning_rate": 4.799991753018393e-06, "loss": 0.4477, "step": 6220 }, { "epoch": 0.7643445140680674, "grad_norm": 0.3207806837171755, "learning_rate": 4.795275724353867e-06, "loss": 0.4367, "step": 6221 }, { "epoch": 0.7644673792849245, "grad_norm": 0.4101037756913017, "learning_rate": 4.790561572782192e-06, "loss": 0.4498, "step": 6222 }, { "epoch": 0.7645902445017816, "grad_norm": 0.3810683865336674, "learning_rate": 4.785849299170502e-06, "loss": 0.5124, "step": 6223 }, { "epoch": 0.7647131097186387, "grad_norm": 0.3320834045345639, "learning_rate": 4.7811389043856e-06, "loss": 0.3732, "step": 6224 }, { "epoch": 0.7648359749354957, "grad_norm": 0.35780393188429804, "learning_rate": 4.776430389293919e-06, "loss": 0.341, "step": 6225 }, { "epoch": 0.7649588401523528, "grad_norm": 0.39553083003171613, "learning_rate": 4.77172375476157e-06, "loss": 0.4159, "step": 6226 }, { "epoch": 0.7650817053692099, "grad_norm": 0.3574340955627426, "learning_rate": 4.767019001654295e-06, "loss": 0.4595, "step": 6227 }, { "epoch": 0.7652045705860671, "grad_norm": 0.3209568301772379, "learning_rate": 4.762316130837522e-06, "loss": 0.3969, "step": 6228 }, { "epoch": 0.7653274358029242, "grad_norm": 0.3609493334202166, "learning_rate": 4.757615143176296e-06, "loss": 0.4649, "step": 6229 }, { "epoch": 0.7654503010197813, "grad_norm": 0.3759638873953825, "learning_rate": 4.752916039535345e-06, "loss": 0.3282, "step": 6230 }, { "epoch": 0.7655731662366384, "grad_norm": 0.32285057725512745, "learning_rate": 4.74821882077903e-06, "loss": 0.3173, "step": 6231 }, { "epoch": 0.7656960314534955, "grad_norm": 0.35765161039285354, "learning_rate": 4.743523487771378e-06, "loss": 0.3167, "step": 6232 }, { "epoch": 0.7658188966703526, "grad_norm": 0.3275096552125981, "learning_rate": 4.73883004137607e-06, "loss": 0.3083, "step": 6233 }, { "epoch": 0.7659417618872097, "grad_norm": 0.3755939151340565, "learning_rate": 4.7341384824564235e-06, "loss": 0.3945, "step": 6234 }, { "epoch": 0.7660646271040669, "grad_norm": 0.35335093559498676, "learning_rate": 4.729448811875428e-06, "loss": 0.3754, "step": 6235 }, { "epoch": 0.766187492320924, "grad_norm": 0.36731433041291567, "learning_rate": 4.724761030495716e-06, "loss": 0.433, "step": 6236 }, { "epoch": 0.7663103575377811, "grad_norm": 0.4392239248843756, "learning_rate": 4.72007513917958e-06, "loss": 0.3395, "step": 6237 }, { "epoch": 0.7664332227546382, "grad_norm": 0.3635462706931041, "learning_rate": 4.71539113878895e-06, "loss": 0.3708, "step": 6238 }, { "epoch": 0.7665560879714952, "grad_norm": 0.3376668523864839, "learning_rate": 4.710709030185422e-06, "loss": 0.334, "step": 6239 }, { "epoch": 0.7666789531883523, "grad_norm": 0.2652810626812866, "learning_rate": 4.706028814230245e-06, "loss": 0.3658, "step": 6240 }, { "epoch": 0.7668018184052094, "grad_norm": 0.3184470001985646, "learning_rate": 4.701350491784302e-06, "loss": 0.3608, "step": 6241 }, { "epoch": 0.7669246836220666, "grad_norm": 0.31566274969757757, "learning_rate": 4.696674063708148e-06, "loss": 0.4114, "step": 6242 }, { "epoch": 0.7670475488389237, "grad_norm": 0.37026176705390057, "learning_rate": 4.691999530861981e-06, "loss": 0.3583, "step": 6243 }, { "epoch": 0.7671704140557808, "grad_norm": 0.5152498635817025, "learning_rate": 4.687326894105657e-06, "loss": 0.4629, "step": 6244 }, { "epoch": 0.7672932792726379, "grad_norm": 0.3184692403328827, "learning_rate": 4.682656154298662e-06, "loss": 0.4139, "step": 6245 }, { "epoch": 0.767416144489495, "grad_norm": 0.348264037606802, "learning_rate": 4.67798731230016e-06, "loss": 0.364, "step": 6246 }, { "epoch": 0.7675390097063521, "grad_norm": 0.3908434271942013, "learning_rate": 4.673320368968951e-06, "loss": 0.4453, "step": 6247 }, { "epoch": 0.7676618749232093, "grad_norm": 0.365882304966584, "learning_rate": 4.668655325163493e-06, "loss": 0.392, "step": 6248 }, { "epoch": 0.7677847401400664, "grad_norm": 0.4579462389777439, "learning_rate": 4.663992181741883e-06, "loss": 0.3354, "step": 6249 }, { "epoch": 0.7679076053569235, "grad_norm": 0.29350622699261675, "learning_rate": 4.659330939561879e-06, "loss": 0.3571, "step": 6250 }, { "epoch": 0.7680304705737806, "grad_norm": 0.3062456345480231, "learning_rate": 4.654671599480893e-06, "loss": 0.3959, "step": 6251 }, { "epoch": 0.7681533357906377, "grad_norm": 0.315984287075518, "learning_rate": 4.650014162355969e-06, "loss": 0.3376, "step": 6252 }, { "epoch": 0.7682762010074948, "grad_norm": 0.46501216899025266, "learning_rate": 4.6453586290438214e-06, "loss": 0.4208, "step": 6253 }, { "epoch": 0.7683990662243518, "grad_norm": 0.3121992632119754, "learning_rate": 4.640705000400795e-06, "loss": 0.4029, "step": 6254 }, { "epoch": 0.768521931441209, "grad_norm": 0.29253251250364953, "learning_rate": 4.636053277282909e-06, "loss": 0.4059, "step": 6255 }, { "epoch": 0.7686447966580661, "grad_norm": 0.3201726830579793, "learning_rate": 4.631403460545806e-06, "loss": 0.3196, "step": 6256 }, { "epoch": 0.7687676618749232, "grad_norm": 0.326470570506357, "learning_rate": 4.626755551044798e-06, "loss": 0.4186, "step": 6257 }, { "epoch": 0.7688905270917803, "grad_norm": 0.3333465065726669, "learning_rate": 4.622109549634829e-06, "loss": 0.333, "step": 6258 }, { "epoch": 0.7690133923086374, "grad_norm": 0.35675355129647784, "learning_rate": 4.617465457170504e-06, "loss": 0.3571, "step": 6259 }, { "epoch": 0.7691362575254945, "grad_norm": 0.34256476511367717, "learning_rate": 4.6128232745060815e-06, "loss": 0.3672, "step": 6260 }, { "epoch": 0.7692591227423516, "grad_norm": 0.30769479388606374, "learning_rate": 4.608183002495445e-06, "loss": 0.3945, "step": 6261 }, { "epoch": 0.7693819879592088, "grad_norm": 0.3459871569230709, "learning_rate": 4.603544641992161e-06, "loss": 0.3861, "step": 6262 }, { "epoch": 0.7695048531760659, "grad_norm": 0.31014993860336965, "learning_rate": 4.598908193849412e-06, "loss": 0.3479, "step": 6263 }, { "epoch": 0.769627718392923, "grad_norm": 0.3805192663184487, "learning_rate": 4.594273658920052e-06, "loss": 0.3205, "step": 6264 }, { "epoch": 0.7697505836097801, "grad_norm": 0.380686157693044, "learning_rate": 4.58964103805656e-06, "loss": 0.3996, "step": 6265 }, { "epoch": 0.7698734488266372, "grad_norm": 0.3401177787265533, "learning_rate": 4.585010332111093e-06, "loss": 0.3468, "step": 6266 }, { "epoch": 0.7699963140434943, "grad_norm": 0.3887375443150434, "learning_rate": 4.580381541935429e-06, "loss": 0.4236, "step": 6267 }, { "epoch": 0.7701191792603514, "grad_norm": 0.4225486039652506, "learning_rate": 4.575754668381011e-06, "loss": 0.391, "step": 6268 }, { "epoch": 0.7702420444772085, "grad_norm": 0.30847073398499464, "learning_rate": 4.571129712298913e-06, "loss": 0.3429, "step": 6269 }, { "epoch": 0.7703649096940656, "grad_norm": 0.4988281377281875, "learning_rate": 4.5665066745398705e-06, "loss": 0.3972, "step": 6270 }, { "epoch": 0.7704877749109227, "grad_norm": 0.320266628820296, "learning_rate": 4.561885555954269e-06, "loss": 0.3965, "step": 6271 }, { "epoch": 0.7706106401277798, "grad_norm": 0.3547904295834574, "learning_rate": 4.557266357392119e-06, "loss": 0.3824, "step": 6272 }, { "epoch": 0.7707335053446369, "grad_norm": 0.32359340046938406, "learning_rate": 4.552649079703099e-06, "loss": 0.3956, "step": 6273 }, { "epoch": 0.770856370561494, "grad_norm": 0.399797117960718, "learning_rate": 4.548033723736527e-06, "loss": 0.3872, "step": 6274 }, { "epoch": 0.7709792357783511, "grad_norm": 0.4457173006628377, "learning_rate": 4.543420290341374e-06, "loss": 0.417, "step": 6275 }, { "epoch": 0.7711021009952083, "grad_norm": 0.3395219518764458, "learning_rate": 4.538808780366239e-06, "loss": 0.3707, "step": 6276 }, { "epoch": 0.7712249662120654, "grad_norm": 0.4756218308385745, "learning_rate": 4.534199194659387e-06, "loss": 0.4611, "step": 6277 }, { "epoch": 0.7713478314289225, "grad_norm": 0.3380952713994836, "learning_rate": 4.5295915340687255e-06, "loss": 0.4944, "step": 6278 }, { "epoch": 0.7714706966457796, "grad_norm": 0.3897186175963282, "learning_rate": 4.524985799441792e-06, "loss": 0.4586, "step": 6279 }, { "epoch": 0.7715935618626367, "grad_norm": 0.5245692694780636, "learning_rate": 4.520381991625794e-06, "loss": 0.4554, "step": 6280 }, { "epoch": 0.7717164270794938, "grad_norm": 0.47502988149919406, "learning_rate": 4.515780111467555e-06, "loss": 0.4341, "step": 6281 }, { "epoch": 0.771839292296351, "grad_norm": 0.38497721999975804, "learning_rate": 4.511180159813582e-06, "loss": 0.3884, "step": 6282 }, { "epoch": 0.771962157513208, "grad_norm": 0.37393580754451133, "learning_rate": 4.506582137509992e-06, "loss": 0.4312, "step": 6283 }, { "epoch": 0.7720850227300651, "grad_norm": 0.3343310126387896, "learning_rate": 4.501986045402565e-06, "loss": 0.3673, "step": 6284 }, { "epoch": 0.7722078879469222, "grad_norm": 0.39136937787529197, "learning_rate": 4.497391884336722e-06, "loss": 0.4034, "step": 6285 }, { "epoch": 0.7723307531637793, "grad_norm": 0.3484006308361976, "learning_rate": 4.492799655157538e-06, "loss": 0.4344, "step": 6286 }, { "epoch": 0.7724536183806364, "grad_norm": 0.2946305598659508, "learning_rate": 4.488209358709708e-06, "loss": 0.3001, "step": 6287 }, { "epoch": 0.7725764835974935, "grad_norm": 0.39179120728911443, "learning_rate": 4.483620995837597e-06, "loss": 0.4194, "step": 6288 }, { "epoch": 0.7726993488143507, "grad_norm": 0.3855390729570975, "learning_rate": 4.4790345673852055e-06, "loss": 0.3773, "step": 6289 }, { "epoch": 0.7728222140312078, "grad_norm": 0.40491290842446687, "learning_rate": 4.474450074196171e-06, "loss": 0.3818, "step": 6290 }, { "epoch": 0.7729450792480649, "grad_norm": 0.45948345123858075, "learning_rate": 4.4698675171137895e-06, "loss": 0.436, "step": 6291 }, { "epoch": 0.773067944464922, "grad_norm": 0.38951247695075136, "learning_rate": 4.465286896980979e-06, "loss": 0.3768, "step": 6292 }, { "epoch": 0.7731908096817791, "grad_norm": 0.35840898551249956, "learning_rate": 4.460708214640331e-06, "loss": 0.3619, "step": 6293 }, { "epoch": 0.7733136748986362, "grad_norm": 0.44731840390269045, "learning_rate": 4.456131470934053e-06, "loss": 0.4269, "step": 6294 }, { "epoch": 0.7734365401154933, "grad_norm": 0.43691227442080616, "learning_rate": 4.451556666704018e-06, "loss": 0.4323, "step": 6295 }, { "epoch": 0.7735594053323505, "grad_norm": 0.3710913714505824, "learning_rate": 4.44698380279172e-06, "loss": 0.4384, "step": 6296 }, { "epoch": 0.7736822705492076, "grad_norm": 0.3695191304891716, "learning_rate": 4.442412880038312e-06, "loss": 0.423, "step": 6297 }, { "epoch": 0.7738051357660646, "grad_norm": 0.3365556818430672, "learning_rate": 4.437843899284592e-06, "loss": 0.4271, "step": 6298 }, { "epoch": 0.7739280009829217, "grad_norm": 0.25424157100450134, "learning_rate": 4.433276861370984e-06, "loss": 0.3865, "step": 6299 }, { "epoch": 0.7740508661997788, "grad_norm": 0.31257001053124406, "learning_rate": 4.428711767137568e-06, "loss": 0.3472, "step": 6300 }, { "epoch": 0.7741737314166359, "grad_norm": 0.3599248726288611, "learning_rate": 4.424148617424066e-06, "loss": 0.4597, "step": 6301 }, { "epoch": 0.774296596633493, "grad_norm": 0.3976888503656521, "learning_rate": 4.4195874130698455e-06, "loss": 0.3377, "step": 6302 }, { "epoch": 0.7744194618503502, "grad_norm": 0.38667217704957685, "learning_rate": 4.415028154913892e-06, "loss": 0.4249, "step": 6303 }, { "epoch": 0.7745423270672073, "grad_norm": 0.34946881816560366, "learning_rate": 4.410470843794876e-06, "loss": 0.3867, "step": 6304 }, { "epoch": 0.7746651922840644, "grad_norm": 0.3351694470016228, "learning_rate": 4.405915480551065e-06, "loss": 0.3998, "step": 6305 }, { "epoch": 0.7747880575009215, "grad_norm": 0.34571560228809906, "learning_rate": 4.401362066020402e-06, "loss": 0.4313, "step": 6306 }, { "epoch": 0.7749109227177786, "grad_norm": 0.3351057965634517, "learning_rate": 4.396810601040448e-06, "loss": 0.318, "step": 6307 }, { "epoch": 0.7750337879346357, "grad_norm": 0.37538283050150073, "learning_rate": 4.39226108644842e-06, "loss": 0.421, "step": 6308 }, { "epoch": 0.7751566531514928, "grad_norm": 0.4106611640598702, "learning_rate": 4.387713523081176e-06, "loss": 0.3618, "step": 6309 }, { "epoch": 0.77527951836835, "grad_norm": 0.338431473041252, "learning_rate": 4.383167911775201e-06, "loss": 0.4478, "step": 6310 }, { "epoch": 0.7754023835852071, "grad_norm": 0.3586259257738573, "learning_rate": 4.378624253366636e-06, "loss": 0.3678, "step": 6311 }, { "epoch": 0.7755252488020641, "grad_norm": 0.3345467291558332, "learning_rate": 4.3740825486912585e-06, "loss": 0.3694, "step": 6312 }, { "epoch": 0.7756481140189212, "grad_norm": 0.4656975899384154, "learning_rate": 4.36954279858449e-06, "loss": 0.3832, "step": 6313 }, { "epoch": 0.7757709792357783, "grad_norm": 0.32408963916841915, "learning_rate": 4.365005003881377e-06, "loss": 0.3171, "step": 6314 }, { "epoch": 0.7758938444526354, "grad_norm": 0.3536935948081498, "learning_rate": 4.360469165416623e-06, "loss": 0.4396, "step": 6315 }, { "epoch": 0.7760167096694925, "grad_norm": 0.4230746534169351, "learning_rate": 4.355935284024571e-06, "loss": 0.3289, "step": 6316 }, { "epoch": 0.7761395748863497, "grad_norm": 0.3451291821104032, "learning_rate": 4.35140336053919e-06, "loss": 0.4186, "step": 6317 }, { "epoch": 0.7762624401032068, "grad_norm": 0.44697157912593344, "learning_rate": 4.346873395794107e-06, "loss": 0.4434, "step": 6318 }, { "epoch": 0.7763853053200639, "grad_norm": 0.3329340630700091, "learning_rate": 4.342345390622564e-06, "loss": 0.3243, "step": 6319 }, { "epoch": 0.776508170536921, "grad_norm": 0.3775993683456621, "learning_rate": 4.33781934585748e-06, "loss": 0.3607, "step": 6320 }, { "epoch": 0.7766310357537781, "grad_norm": 0.3777845838529421, "learning_rate": 4.333295262331375e-06, "loss": 0.3733, "step": 6321 }, { "epoch": 0.7767539009706352, "grad_norm": 0.35675322916036084, "learning_rate": 4.328773140876436e-06, "loss": 0.3499, "step": 6322 }, { "epoch": 0.7768767661874924, "grad_norm": 0.41749114379229907, "learning_rate": 4.324252982324465e-06, "loss": 0.3938, "step": 6323 }, { "epoch": 0.7769996314043495, "grad_norm": 0.39956590424438304, "learning_rate": 4.3197347875069285e-06, "loss": 0.3719, "step": 6324 }, { "epoch": 0.7771224966212066, "grad_norm": 0.32315157344697665, "learning_rate": 4.315218557254912e-06, "loss": 0.3737, "step": 6325 }, { "epoch": 0.7772453618380637, "grad_norm": 0.3368952874370542, "learning_rate": 4.310704292399147e-06, "loss": 0.3779, "step": 6326 }, { "epoch": 0.7773682270549207, "grad_norm": 0.32972633867260137, "learning_rate": 4.306191993770011e-06, "loss": 0.4132, "step": 6327 }, { "epoch": 0.7774910922717778, "grad_norm": 0.31627037094116967, "learning_rate": 4.3016816621975006e-06, "loss": 0.359, "step": 6328 }, { "epoch": 0.7776139574886349, "grad_norm": 0.313573260738055, "learning_rate": 4.297173298511273e-06, "loss": 0.3647, "step": 6329 }, { "epoch": 0.777736822705492, "grad_norm": 0.4156428940459043, "learning_rate": 4.292666903540597e-06, "loss": 0.4159, "step": 6330 }, { "epoch": 0.7778596879223492, "grad_norm": 0.301562277989684, "learning_rate": 4.288162478114413e-06, "loss": 0.3717, "step": 6331 }, { "epoch": 0.7779825531392063, "grad_norm": 0.24326823553037358, "learning_rate": 4.283660023061268e-06, "loss": 0.3249, "step": 6332 }, { "epoch": 0.7781054183560634, "grad_norm": 0.3207045960079175, "learning_rate": 4.27915953920937e-06, "loss": 0.3021, "step": 6333 }, { "epoch": 0.7782282835729205, "grad_norm": 0.36297613527055345, "learning_rate": 4.274661027386542e-06, "loss": 0.3905, "step": 6334 }, { "epoch": 0.7783511487897776, "grad_norm": 0.287195171653174, "learning_rate": 4.270164488420262e-06, "loss": 0.439, "step": 6335 }, { "epoch": 0.7784740140066347, "grad_norm": 0.32176049552849145, "learning_rate": 4.265669923137642e-06, "loss": 0.3618, "step": 6336 }, { "epoch": 0.7785968792234919, "grad_norm": 0.36434691023040644, "learning_rate": 4.261177332365422e-06, "loss": 0.4481, "step": 6337 }, { "epoch": 0.778719744440349, "grad_norm": 0.360646851423279, "learning_rate": 4.256686716929989e-06, "loss": 0.3214, "step": 6338 }, { "epoch": 0.7788426096572061, "grad_norm": 0.5839595553138128, "learning_rate": 4.25219807765736e-06, "loss": 0.4708, "step": 6339 }, { "epoch": 0.7789654748740632, "grad_norm": 0.29906314291150593, "learning_rate": 4.247711415373198e-06, "loss": 0.3857, "step": 6340 }, { "epoch": 0.7790883400909202, "grad_norm": 0.3938185204910539, "learning_rate": 4.243226730902785e-06, "loss": 0.4554, "step": 6341 }, { "epoch": 0.7792112053077773, "grad_norm": 0.34758956347602915, "learning_rate": 4.238744025071055e-06, "loss": 0.3944, "step": 6342 }, { "epoch": 0.7793340705246344, "grad_norm": 0.3456778290737519, "learning_rate": 4.234263298702576e-06, "loss": 0.4193, "step": 6343 }, { "epoch": 0.7794569357414916, "grad_norm": 0.3750394696263331, "learning_rate": 4.229784552621541e-06, "loss": 0.437, "step": 6344 }, { "epoch": 0.7795798009583487, "grad_norm": 0.337261611229215, "learning_rate": 4.2253077876517914e-06, "loss": 0.3691, "step": 6345 }, { "epoch": 0.7797026661752058, "grad_norm": 0.34595154443388254, "learning_rate": 4.220833004616796e-06, "loss": 0.3964, "step": 6346 }, { "epoch": 0.7798255313920629, "grad_norm": 0.30633137920445225, "learning_rate": 4.2163602043396696e-06, "loss": 0.4123, "step": 6347 }, { "epoch": 0.77994839660892, "grad_norm": 0.35771502576848024, "learning_rate": 4.211889387643145e-06, "loss": 0.4256, "step": 6348 }, { "epoch": 0.7800712618257771, "grad_norm": 0.3270513345823396, "learning_rate": 4.207420555349603e-06, "loss": 0.3925, "step": 6349 }, { "epoch": 0.7801941270426342, "grad_norm": 0.4176232336240313, "learning_rate": 4.202953708281059e-06, "loss": 0.4156, "step": 6350 }, { "epoch": 0.7803169922594914, "grad_norm": 0.4237907661279625, "learning_rate": 4.198488847259163e-06, "loss": 0.4179, "step": 6351 }, { "epoch": 0.7804398574763485, "grad_norm": 0.30556818222655, "learning_rate": 4.19402597310519e-06, "loss": 0.3147, "step": 6352 }, { "epoch": 0.7805627226932056, "grad_norm": 0.3140974698034795, "learning_rate": 4.189565086640057e-06, "loss": 0.3995, "step": 6353 }, { "epoch": 0.7806855879100627, "grad_norm": 0.40886879745997134, "learning_rate": 4.185106188684325e-06, "loss": 0.3801, "step": 6354 }, { "epoch": 0.7808084531269198, "grad_norm": 0.3359811335008091, "learning_rate": 4.180649280058168e-06, "loss": 0.3765, "step": 6355 }, { "epoch": 0.7809313183437768, "grad_norm": 0.7928915077692336, "learning_rate": 4.176194361581414e-06, "loss": 0.5407, "step": 6356 }, { "epoch": 0.7810541835606339, "grad_norm": 0.3392573147492974, "learning_rate": 4.1717414340735025e-06, "loss": 0.3194, "step": 6357 }, { "epoch": 0.7811770487774911, "grad_norm": 0.5016220943758273, "learning_rate": 4.167290498353541e-06, "loss": 0.4174, "step": 6358 }, { "epoch": 0.7812999139943482, "grad_norm": 0.4040351485403887, "learning_rate": 4.162841555240234e-06, "loss": 0.32, "step": 6359 }, { "epoch": 0.7814227792112053, "grad_norm": 0.4030790892327359, "learning_rate": 4.158394605551946e-06, "loss": 0.4647, "step": 6360 }, { "epoch": 0.7815456444280624, "grad_norm": 0.34033228741918026, "learning_rate": 4.153949650106658e-06, "loss": 0.3997, "step": 6361 }, { "epoch": 0.7816685096449195, "grad_norm": 0.3255944649207002, "learning_rate": 4.149506689721989e-06, "loss": 0.3916, "step": 6362 }, { "epoch": 0.7817913748617766, "grad_norm": 0.36652954172880514, "learning_rate": 4.1450657252152035e-06, "loss": 0.3803, "step": 6363 }, { "epoch": 0.7819142400786337, "grad_norm": 0.34811329065302354, "learning_rate": 4.140626757403176e-06, "loss": 0.3309, "step": 6364 }, { "epoch": 0.7820371052954909, "grad_norm": 0.36759192218083275, "learning_rate": 4.1361897871024315e-06, "loss": 0.3814, "step": 6365 }, { "epoch": 0.782159970512348, "grad_norm": 0.33341134326101607, "learning_rate": 4.13175481512912e-06, "loss": 0.3434, "step": 6366 }, { "epoch": 0.7822828357292051, "grad_norm": 0.32337892590750494, "learning_rate": 4.127321842299034e-06, "loss": 0.4109, "step": 6367 }, { "epoch": 0.7824057009460622, "grad_norm": 0.40092571571974234, "learning_rate": 4.122890869427572e-06, "loss": 0.3581, "step": 6368 }, { "epoch": 0.7825285661629193, "grad_norm": 0.39845908136853153, "learning_rate": 4.118461897329804e-06, "loss": 0.3501, "step": 6369 }, { "epoch": 0.7826514313797763, "grad_norm": 0.33202582188902197, "learning_rate": 4.114034926820396e-06, "loss": 0.3527, "step": 6370 }, { "epoch": 0.7827742965966334, "grad_norm": 0.40535451958377394, "learning_rate": 4.10960995871367e-06, "loss": 0.4066, "step": 6371 }, { "epoch": 0.7828971618134906, "grad_norm": 0.35084178592924564, "learning_rate": 4.10518699382356e-06, "loss": 0.4422, "step": 6372 }, { "epoch": 0.7830200270303477, "grad_norm": 0.31896734345543254, "learning_rate": 4.1007660329636484e-06, "loss": 0.3505, "step": 6373 }, { "epoch": 0.7831428922472048, "grad_norm": 0.28101292367755876, "learning_rate": 4.096347076947145e-06, "loss": 0.3781, "step": 6374 }, { "epoch": 0.7832657574640619, "grad_norm": 0.3169104380518277, "learning_rate": 4.091930126586879e-06, "loss": 0.3815, "step": 6375 }, { "epoch": 0.783388622680919, "grad_norm": 0.3118759906682291, "learning_rate": 4.087515182695326e-06, "loss": 0.3901, "step": 6376 }, { "epoch": 0.7835114878977761, "grad_norm": 0.3750088639143561, "learning_rate": 4.083102246084584e-06, "loss": 0.4438, "step": 6377 }, { "epoch": 0.7836343531146333, "grad_norm": 0.3618259754935621, "learning_rate": 4.078691317566392e-06, "loss": 0.4661, "step": 6378 }, { "epoch": 0.7837572183314904, "grad_norm": 0.34277225535221123, "learning_rate": 4.074282397952097e-06, "loss": 0.412, "step": 6379 }, { "epoch": 0.7838800835483475, "grad_norm": 0.3008011603510034, "learning_rate": 4.069875488052702e-06, "loss": 0.3693, "step": 6380 }, { "epoch": 0.7840029487652046, "grad_norm": 0.3370153038144426, "learning_rate": 4.06547058867883e-06, "loss": 0.3734, "step": 6381 }, { "epoch": 0.7841258139820617, "grad_norm": 0.3447508556950831, "learning_rate": 4.061067700640726e-06, "loss": 0.3905, "step": 6382 }, { "epoch": 0.7842486791989188, "grad_norm": 0.3545563270053051, "learning_rate": 4.056666824748282e-06, "loss": 0.3999, "step": 6383 }, { "epoch": 0.784371544415776, "grad_norm": 0.33963263646770947, "learning_rate": 4.052267961810995e-06, "loss": 0.4666, "step": 6384 }, { "epoch": 0.784494409632633, "grad_norm": 0.3320910720767599, "learning_rate": 4.047871112638029e-06, "loss": 0.3607, "step": 6385 }, { "epoch": 0.7846172748494901, "grad_norm": 0.32953282467691064, "learning_rate": 4.043476278038139e-06, "loss": 0.3598, "step": 6386 }, { "epoch": 0.7847401400663472, "grad_norm": 0.35234212165207796, "learning_rate": 4.039083458819736e-06, "loss": 0.3833, "step": 6387 }, { "epoch": 0.7848630052832043, "grad_norm": 0.36456317049388115, "learning_rate": 4.034692655790839e-06, "loss": 0.3591, "step": 6388 }, { "epoch": 0.7849858705000614, "grad_norm": 0.426584365430464, "learning_rate": 4.030303869759124e-06, "loss": 0.4386, "step": 6389 }, { "epoch": 0.7851087357169185, "grad_norm": 0.2931902804312158, "learning_rate": 4.025917101531866e-06, "loss": 0.3818, "step": 6390 }, { "epoch": 0.7852316009337756, "grad_norm": 0.2972419297055811, "learning_rate": 4.0215323519159896e-06, "loss": 0.3321, "step": 6391 }, { "epoch": 0.7853544661506328, "grad_norm": 0.4139923882073758, "learning_rate": 4.017149621718043e-06, "loss": 0.3776, "step": 6392 }, { "epoch": 0.7854773313674899, "grad_norm": 0.403164424097863, "learning_rate": 4.012768911744192e-06, "loss": 0.3735, "step": 6393 }, { "epoch": 0.785600196584347, "grad_norm": 0.31751574917800574, "learning_rate": 4.0083902228002495e-06, "loss": 0.3422, "step": 6394 }, { "epoch": 0.7857230618012041, "grad_norm": 0.31070209403920424, "learning_rate": 4.004013555691633e-06, "loss": 0.3572, "step": 6395 }, { "epoch": 0.7858459270180612, "grad_norm": 0.3283983497598443, "learning_rate": 3.999638911223422e-06, "loss": 0.4527, "step": 6396 }, { "epoch": 0.7859687922349183, "grad_norm": 0.31457333132192083, "learning_rate": 3.9952662902002886e-06, "loss": 0.4096, "step": 6397 }, { "epoch": 0.7860916574517754, "grad_norm": 0.3524003272983343, "learning_rate": 3.990895693426557e-06, "loss": 0.4757, "step": 6398 }, { "epoch": 0.7862145226686326, "grad_norm": 0.39000129559143226, "learning_rate": 3.98652712170616e-06, "loss": 0.3834, "step": 6399 }, { "epoch": 0.7863373878854896, "grad_norm": 0.33726767285607623, "learning_rate": 3.982160575842675e-06, "loss": 0.4006, "step": 6400 }, { "epoch": 0.7864602531023467, "grad_norm": 0.35789111191593226, "learning_rate": 3.977796056639304e-06, "loss": 0.3132, "step": 6401 }, { "epoch": 0.7865831183192038, "grad_norm": 0.4175697642814684, "learning_rate": 3.973433564898863e-06, "loss": 0.4596, "step": 6402 }, { "epoch": 0.7867059835360609, "grad_norm": 0.2917221800430368, "learning_rate": 3.9690731014238066e-06, "loss": 0.3953, "step": 6403 }, { "epoch": 0.786828848752918, "grad_norm": 0.33655209848463274, "learning_rate": 3.964714667016216e-06, "loss": 0.3897, "step": 6404 }, { "epoch": 0.7869517139697751, "grad_norm": 0.3246659826382805, "learning_rate": 3.960358262477801e-06, "loss": 0.4512, "step": 6405 }, { "epoch": 0.7870745791866323, "grad_norm": 0.3236661005541945, "learning_rate": 3.956003888609883e-06, "loss": 0.3523, "step": 6406 }, { "epoch": 0.7871974444034894, "grad_norm": 0.28443083230995103, "learning_rate": 3.951651546213428e-06, "loss": 0.4063, "step": 6407 }, { "epoch": 0.7873203096203465, "grad_norm": 0.4031691923128781, "learning_rate": 3.94730123608902e-06, "loss": 0.3389, "step": 6408 }, { "epoch": 0.7874431748372036, "grad_norm": 0.4015976735129274, "learning_rate": 3.942952959036874e-06, "loss": 0.4596, "step": 6409 }, { "epoch": 0.7875660400540607, "grad_norm": 0.38757886537906344, "learning_rate": 3.938606715856821e-06, "loss": 0.3915, "step": 6410 }, { "epoch": 0.7876889052709178, "grad_norm": 0.37316764056264784, "learning_rate": 3.934262507348325e-06, "loss": 0.4138, "step": 6411 }, { "epoch": 0.787811770487775, "grad_norm": 0.3469383026971661, "learning_rate": 3.929920334310481e-06, "loss": 0.4084, "step": 6412 }, { "epoch": 0.7879346357046321, "grad_norm": 0.36535597425741956, "learning_rate": 3.925580197541996e-06, "loss": 0.3497, "step": 6413 }, { "epoch": 0.7880575009214891, "grad_norm": 0.3324890273155565, "learning_rate": 3.921242097841214e-06, "loss": 0.4672, "step": 6414 }, { "epoch": 0.7881803661383462, "grad_norm": 0.3592454665002353, "learning_rate": 3.916906036006101e-06, "loss": 0.3824, "step": 6415 }, { "epoch": 0.7883032313552033, "grad_norm": 0.48054788772538887, "learning_rate": 3.912572012834248e-06, "loss": 0.449, "step": 6416 }, { "epoch": 0.7884260965720604, "grad_norm": 0.3402769314546131, "learning_rate": 3.908240029122865e-06, "loss": 0.4406, "step": 6417 }, { "epoch": 0.7885489617889175, "grad_norm": 0.3664527849518094, "learning_rate": 3.903910085668798e-06, "loss": 0.4671, "step": 6418 }, { "epoch": 0.7886718270057747, "grad_norm": 0.3625146194715895, "learning_rate": 3.899582183268512e-06, "loss": 0.5076, "step": 6419 }, { "epoch": 0.7887946922226318, "grad_norm": 0.3502169488131576, "learning_rate": 3.895256322718091e-06, "loss": 0.3959, "step": 6420 }, { "epoch": 0.7889175574394889, "grad_norm": 0.3592665675406946, "learning_rate": 3.890932504813258e-06, "loss": 0.3577, "step": 6421 }, { "epoch": 0.789040422656346, "grad_norm": 0.31131390396372655, "learning_rate": 3.886610730349337e-06, "loss": 0.3208, "step": 6422 }, { "epoch": 0.7891632878732031, "grad_norm": 0.35036092388385653, "learning_rate": 3.882291000121308e-06, "loss": 0.3619, "step": 6423 }, { "epoch": 0.7892861530900602, "grad_norm": 0.4075075660809605, "learning_rate": 3.877973314923744e-06, "loss": 0.3589, "step": 6424 }, { "epoch": 0.7894090183069173, "grad_norm": 0.3713246531859024, "learning_rate": 3.873657675550864e-06, "loss": 0.3855, "step": 6425 }, { "epoch": 0.7895318835237745, "grad_norm": 0.38699198647658345, "learning_rate": 3.869344082796489e-06, "loss": 0.4224, "step": 6426 }, { "epoch": 0.7896547487406316, "grad_norm": 0.36487085857674345, "learning_rate": 3.8650325374540935e-06, "loss": 0.4019, "step": 6427 }, { "epoch": 0.7897776139574887, "grad_norm": 0.36876947377253105, "learning_rate": 3.860723040316747e-06, "loss": 0.3341, "step": 6428 }, { "epoch": 0.7899004791743457, "grad_norm": 0.39625010432765956, "learning_rate": 3.8564155921771585e-06, "loss": 0.4584, "step": 6429 }, { "epoch": 0.7900233443912028, "grad_norm": 0.4057370660356021, "learning_rate": 3.852110193827651e-06, "loss": 0.3464, "step": 6430 }, { "epoch": 0.7901462096080599, "grad_norm": 0.33192524984252697, "learning_rate": 3.847806846060175e-06, "loss": 0.4275, "step": 6431 }, { "epoch": 0.790269074824917, "grad_norm": 0.31333171465824605, "learning_rate": 3.843505549666311e-06, "loss": 0.3666, "step": 6432 }, { "epoch": 0.7903919400417742, "grad_norm": 0.3299627867116511, "learning_rate": 3.839206305437239e-06, "loss": 0.3312, "step": 6433 }, { "epoch": 0.7905148052586313, "grad_norm": 0.34612428397058287, "learning_rate": 3.834909114163797e-06, "loss": 0.4247, "step": 6434 }, { "epoch": 0.7906376704754884, "grad_norm": 0.3841502296213415, "learning_rate": 3.830613976636408e-06, "loss": 0.4239, "step": 6435 }, { "epoch": 0.7907605356923455, "grad_norm": 0.34866681502603486, "learning_rate": 3.826320893645149e-06, "loss": 0.3592, "step": 6436 }, { "epoch": 0.7908834009092026, "grad_norm": 0.9348662554261863, "learning_rate": 3.822029865979693e-06, "loss": 0.5271, "step": 6437 }, { "epoch": 0.7910062661260597, "grad_norm": 0.3394395615627219, "learning_rate": 3.817740894429352e-06, "loss": 0.4007, "step": 6438 }, { "epoch": 0.7911291313429168, "grad_norm": 0.35549645801992175, "learning_rate": 3.8134539797830557e-06, "loss": 0.4706, "step": 6439 }, { "epoch": 0.791251996559774, "grad_norm": 0.3742423721058549, "learning_rate": 3.8091691228293515e-06, "loss": 0.4365, "step": 6440 }, { "epoch": 0.7913748617766311, "grad_norm": 0.2687667461319295, "learning_rate": 3.804886324356409e-06, "loss": 0.324, "step": 6441 }, { "epoch": 0.7914977269934882, "grad_norm": 0.42684366587574335, "learning_rate": 3.8006055851520262e-06, "loss": 0.3423, "step": 6442 }, { "epoch": 0.7916205922103452, "grad_norm": 0.3558300383793565, "learning_rate": 3.796326906003619e-06, "loss": 0.3452, "step": 6443 }, { "epoch": 0.7917434574272023, "grad_norm": 0.3520578011889783, "learning_rate": 3.792050287698216e-06, "loss": 0.3967, "step": 6444 }, { "epoch": 0.7918663226440594, "grad_norm": 0.40878764243758275, "learning_rate": 3.7877757310224753e-06, "loss": 0.381, "step": 6445 }, { "epoch": 0.7919891878609165, "grad_norm": 0.46073087844097727, "learning_rate": 3.783503236762674e-06, "loss": 0.4177, "step": 6446 }, { "epoch": 0.7921120530777737, "grad_norm": 0.5158420848046298, "learning_rate": 3.7792328057047175e-06, "loss": 0.4503, "step": 6447 }, { "epoch": 0.7922349182946308, "grad_norm": 0.3181531528346509, "learning_rate": 3.774964438634112e-06, "loss": 0.3085, "step": 6448 }, { "epoch": 0.7923577835114879, "grad_norm": 0.3388900012711451, "learning_rate": 3.7706981363359995e-06, "loss": 0.4349, "step": 6449 }, { "epoch": 0.792480648728345, "grad_norm": 0.3475342408136357, "learning_rate": 3.766433899595147e-06, "loss": 0.3684, "step": 6450 }, { "epoch": 0.7926035139452021, "grad_norm": 0.4169131325780268, "learning_rate": 3.762171729195921e-06, "loss": 0.3234, "step": 6451 }, { "epoch": 0.7927263791620592, "grad_norm": 0.376690914762606, "learning_rate": 3.757911625922325e-06, "loss": 0.3625, "step": 6452 }, { "epoch": 0.7928492443789164, "grad_norm": 0.3078295320655709, "learning_rate": 3.7536535905579785e-06, "loss": 0.5138, "step": 6453 }, { "epoch": 0.7929721095957735, "grad_norm": 0.300830039352162, "learning_rate": 3.7493976238861223e-06, "loss": 0.3977, "step": 6454 }, { "epoch": 0.7930949748126306, "grad_norm": 0.37270286319371976, "learning_rate": 3.745143726689607e-06, "loss": 0.4124, "step": 6455 }, { "epoch": 0.7932178400294877, "grad_norm": 0.3372343769528095, "learning_rate": 3.7408918997509125e-06, "loss": 0.3966, "step": 6456 }, { "epoch": 0.7933407052463448, "grad_norm": 0.37473946821801024, "learning_rate": 3.73664214385214e-06, "loss": 0.4035, "step": 6457 }, { "epoch": 0.7934635704632018, "grad_norm": 0.319410683081299, "learning_rate": 3.732394459774996e-06, "loss": 0.3573, "step": 6458 }, { "epoch": 0.7935864356800589, "grad_norm": 0.3136049278566794, "learning_rate": 3.728148848300821e-06, "loss": 0.3824, "step": 6459 }, { "epoch": 0.793709300896916, "grad_norm": 0.360300901799138, "learning_rate": 3.7239053102105568e-06, "loss": 0.3934, "step": 6460 }, { "epoch": 0.7938321661137732, "grad_norm": 0.37412298827531837, "learning_rate": 3.7196638462847916e-06, "loss": 0.3789, "step": 6461 }, { "epoch": 0.7939550313306303, "grad_norm": 0.3161339652672709, "learning_rate": 3.715424457303702e-06, "loss": 0.4548, "step": 6462 }, { "epoch": 0.7940778965474874, "grad_norm": 0.36249146207508576, "learning_rate": 3.7111871440471036e-06, "loss": 0.3721, "step": 6463 }, { "epoch": 0.7942007617643445, "grad_norm": 0.32196617727003735, "learning_rate": 3.7069519072944168e-06, "loss": 0.3985, "step": 6464 }, { "epoch": 0.7943236269812016, "grad_norm": 0.36958277521886224, "learning_rate": 3.702718747824688e-06, "loss": 0.4329, "step": 6465 }, { "epoch": 0.7944464921980587, "grad_norm": 0.328014696385692, "learning_rate": 3.6984876664165845e-06, "loss": 0.4167, "step": 6466 }, { "epoch": 0.7945693574149159, "grad_norm": 0.3303851876445511, "learning_rate": 3.6942586638483768e-06, "loss": 0.4389, "step": 6467 }, { "epoch": 0.794692222631773, "grad_norm": 0.32938445801096644, "learning_rate": 3.690031740897968e-06, "loss": 0.4127, "step": 6468 }, { "epoch": 0.7948150878486301, "grad_norm": 0.36598192484650366, "learning_rate": 3.6858068983428745e-06, "loss": 0.3781, "step": 6469 }, { "epoch": 0.7949379530654872, "grad_norm": 0.2915033228188649, "learning_rate": 3.6815841369602297e-06, "loss": 0.3552, "step": 6470 }, { "epoch": 0.7950608182823443, "grad_norm": 0.35607733366426425, "learning_rate": 3.677363457526775e-06, "loss": 0.4104, "step": 6471 }, { "epoch": 0.7951836834992013, "grad_norm": 0.31299285902860435, "learning_rate": 3.673144860818884e-06, "loss": 0.369, "step": 6472 }, { "epoch": 0.7953065487160584, "grad_norm": 0.3772464786305777, "learning_rate": 3.6689283476125392e-06, "loss": 0.3654, "step": 6473 }, { "epoch": 0.7954294139329156, "grad_norm": 0.35163431799881056, "learning_rate": 3.6647139186833435e-06, "loss": 0.3654, "step": 6474 }, { "epoch": 0.7955522791497727, "grad_norm": 0.3420566887336221, "learning_rate": 3.6605015748065053e-06, "loss": 0.4748, "step": 6475 }, { "epoch": 0.7956751443666298, "grad_norm": 0.32927377198329166, "learning_rate": 3.6562913167568645e-06, "loss": 0.3354, "step": 6476 }, { "epoch": 0.7957980095834869, "grad_norm": 0.3364180217369815, "learning_rate": 3.652083145308874e-06, "loss": 0.3481, "step": 6477 }, { "epoch": 0.795920874800344, "grad_norm": 0.2951257944300975, "learning_rate": 3.6478770612365902e-06, "loss": 0.3449, "step": 6478 }, { "epoch": 0.7960437400172011, "grad_norm": 0.3727424502924253, "learning_rate": 3.6436730653136986e-06, "loss": 0.4188, "step": 6479 }, { "epoch": 0.7961666052340582, "grad_norm": 0.44203831192280263, "learning_rate": 3.6394711583135e-06, "loss": 0.3588, "step": 6480 }, { "epoch": 0.7962894704509154, "grad_norm": 0.34084472511719627, "learning_rate": 3.635271341008911e-06, "loss": 0.4523, "step": 6481 }, { "epoch": 0.7964123356677725, "grad_norm": 0.4557868685448125, "learning_rate": 3.631073614172449e-06, "loss": 0.4223, "step": 6482 }, { "epoch": 0.7965352008846296, "grad_norm": 0.31457131283030915, "learning_rate": 3.6268779785762686e-06, "loss": 0.394, "step": 6483 }, { "epoch": 0.7966580661014867, "grad_norm": 0.3685047252907927, "learning_rate": 3.6226844349921294e-06, "loss": 0.3579, "step": 6484 }, { "epoch": 0.7967809313183438, "grad_norm": 0.3660046652540331, "learning_rate": 3.6184929841914004e-06, "loss": 0.3506, "step": 6485 }, { "epoch": 0.7969037965352009, "grad_norm": 0.3659930382082842, "learning_rate": 3.6143036269450796e-06, "loss": 0.3999, "step": 6486 }, { "epoch": 0.7970266617520579, "grad_norm": 0.31758049901098273, "learning_rate": 3.610116364023759e-06, "loss": 0.4109, "step": 6487 }, { "epoch": 0.7971495269689151, "grad_norm": 0.3886383926844835, "learning_rate": 3.6059311961976756e-06, "loss": 0.3944, "step": 6488 }, { "epoch": 0.7972723921857722, "grad_norm": 0.3076099322730118, "learning_rate": 3.6017481242366503e-06, "loss": 0.4198, "step": 6489 }, { "epoch": 0.7973952574026293, "grad_norm": 0.5286733390192523, "learning_rate": 3.5975671489101423e-06, "loss": 0.4375, "step": 6490 }, { "epoch": 0.7975181226194864, "grad_norm": 0.3840507615821125, "learning_rate": 3.5933882709872023e-06, "loss": 0.4254, "step": 6491 }, { "epoch": 0.7976409878363435, "grad_norm": 0.341655870991454, "learning_rate": 3.589211491236523e-06, "loss": 0.333, "step": 6492 }, { "epoch": 0.7977638530532006, "grad_norm": 0.2866003397965526, "learning_rate": 3.5850368104263836e-06, "loss": 0.3536, "step": 6493 }, { "epoch": 0.7978867182700577, "grad_norm": 0.3557708729921844, "learning_rate": 3.5808642293246995e-06, "loss": 0.3607, "step": 6494 }, { "epoch": 0.7980095834869149, "grad_norm": 0.35138091911284314, "learning_rate": 3.5766937486989802e-06, "loss": 0.423, "step": 6495 }, { "epoch": 0.798132448703772, "grad_norm": 0.33485269465410217, "learning_rate": 3.572525369316364e-06, "loss": 0.3657, "step": 6496 }, { "epoch": 0.7982553139206291, "grad_norm": 0.30097352852119436, "learning_rate": 3.568359091943599e-06, "loss": 0.3519, "step": 6497 }, { "epoch": 0.7983781791374862, "grad_norm": 0.32968233807107766, "learning_rate": 3.564194917347035e-06, "loss": 0.3851, "step": 6498 }, { "epoch": 0.7985010443543433, "grad_norm": 0.3717727112478961, "learning_rate": 3.56003284629266e-06, "loss": 0.3416, "step": 6499 }, { "epoch": 0.7986239095712004, "grad_norm": 0.3387470775556057, "learning_rate": 3.5558728795460467e-06, "loss": 0.3891, "step": 6500 }, { "epoch": 0.7987467747880576, "grad_norm": 0.41284247578921823, "learning_rate": 3.5517150178724058e-06, "loss": 0.3867, "step": 6501 }, { "epoch": 0.7988696400049146, "grad_norm": 0.33539400479366227, "learning_rate": 3.547559262036537e-06, "loss": 0.3827, "step": 6502 }, { "epoch": 0.7989925052217717, "grad_norm": 0.33208389511087166, "learning_rate": 3.5434056128028715e-06, "loss": 0.4211, "step": 6503 }, { "epoch": 0.7991153704386288, "grad_norm": 0.4226755986671922, "learning_rate": 3.5392540709354486e-06, "loss": 0.4287, "step": 6504 }, { "epoch": 0.7992382356554859, "grad_norm": 0.38579215512571685, "learning_rate": 3.5351046371979084e-06, "loss": 0.4592, "step": 6505 }, { "epoch": 0.799361100872343, "grad_norm": 0.3504574292246602, "learning_rate": 3.5309573123535184e-06, "loss": 0.2777, "step": 6506 }, { "epoch": 0.7994839660892001, "grad_norm": 0.34274416276688563, "learning_rate": 3.5268120971651528e-06, "loss": 0.3711, "step": 6507 }, { "epoch": 0.7996068313060573, "grad_norm": 0.3488385832767374, "learning_rate": 3.5226689923952975e-06, "loss": 0.3932, "step": 6508 }, { "epoch": 0.7997296965229144, "grad_norm": 0.36189710523122903, "learning_rate": 3.518527998806046e-06, "loss": 0.3231, "step": 6509 }, { "epoch": 0.7998525617397715, "grad_norm": 0.34374813902394014, "learning_rate": 3.5143891171591088e-06, "loss": 0.4502, "step": 6510 }, { "epoch": 0.7999754269566286, "grad_norm": 0.2952963354126614, "learning_rate": 3.510252348215805e-06, "loss": 0.3592, "step": 6511 }, { "epoch": 0.8000982921734857, "grad_norm": 0.40491561956820565, "learning_rate": 3.5061176927370745e-06, "loss": 0.4237, "step": 6512 }, { "epoch": 0.8002211573903428, "grad_norm": 0.3469105234525288, "learning_rate": 3.5019851514834476e-06, "loss": 0.425, "step": 6513 }, { "epoch": 0.8003440226072, "grad_norm": 0.3222855410611746, "learning_rate": 3.4978547252150862e-06, "loss": 0.3524, "step": 6514 }, { "epoch": 0.8004668878240571, "grad_norm": 0.4227862398158576, "learning_rate": 3.4937264146917587e-06, "loss": 0.5333, "step": 6515 }, { "epoch": 0.8005897530409141, "grad_norm": 0.41431581743226936, "learning_rate": 3.4896002206728313e-06, "loss": 0.451, "step": 6516 }, { "epoch": 0.8007126182577712, "grad_norm": 0.39369030891300627, "learning_rate": 3.485476143917295e-06, "loss": 0.3891, "step": 6517 }, { "epoch": 0.8008354834746283, "grad_norm": 0.3306269022895825, "learning_rate": 3.4813541851837498e-06, "loss": 0.3417, "step": 6518 }, { "epoch": 0.8009583486914854, "grad_norm": 0.3090831455402509, "learning_rate": 3.4772343452304047e-06, "loss": 0.3218, "step": 6519 }, { "epoch": 0.8010812139083425, "grad_norm": 0.38449960249353893, "learning_rate": 3.4731166248150693e-06, "loss": 0.3265, "step": 6520 }, { "epoch": 0.8012040791251996, "grad_norm": 0.3482027487734137, "learning_rate": 3.4690010246951765e-06, "loss": 0.3757, "step": 6521 }, { "epoch": 0.8013269443420568, "grad_norm": 0.3380948751796832, "learning_rate": 3.464887545627767e-06, "loss": 0.3632, "step": 6522 }, { "epoch": 0.8014498095589139, "grad_norm": 0.34577328437754457, "learning_rate": 3.4607761883694834e-06, "loss": 0.3601, "step": 6523 }, { "epoch": 0.801572674775771, "grad_norm": 0.3188013953693979, "learning_rate": 3.4566669536765893e-06, "loss": 0.3222, "step": 6524 }, { "epoch": 0.8016955399926281, "grad_norm": 0.3365530317650079, "learning_rate": 3.452559842304938e-06, "loss": 0.3936, "step": 6525 }, { "epoch": 0.8018184052094852, "grad_norm": 0.41396631537773804, "learning_rate": 3.4484548550100254e-06, "loss": 0.4425, "step": 6526 }, { "epoch": 0.8019412704263423, "grad_norm": 0.318955035774152, "learning_rate": 3.4443519925469236e-06, "loss": 0.377, "step": 6527 }, { "epoch": 0.8020641356431994, "grad_norm": 0.46322243880128144, "learning_rate": 3.440251255670337e-06, "loss": 0.3608, "step": 6528 }, { "epoch": 0.8021870008600566, "grad_norm": 0.3319406311432949, "learning_rate": 3.4361526451345536e-06, "loss": 0.3438, "step": 6529 }, { "epoch": 0.8023098660769137, "grad_norm": 0.37411101897222393, "learning_rate": 3.4320561616935076e-06, "loss": 0.4135, "step": 6530 }, { "epoch": 0.8024327312937707, "grad_norm": 0.3081769326265536, "learning_rate": 3.427961806100704e-06, "loss": 0.3911, "step": 6531 }, { "epoch": 0.8025555965106278, "grad_norm": 0.44781927150644524, "learning_rate": 3.423869579109284e-06, "loss": 0.4798, "step": 6532 }, { "epoch": 0.8026784617274849, "grad_norm": 0.38689725113279994, "learning_rate": 3.4197794814719768e-06, "loss": 0.3885, "step": 6533 }, { "epoch": 0.802801326944342, "grad_norm": 0.37340575495844347, "learning_rate": 3.4156915139411343e-06, "loss": 0.3356, "step": 6534 }, { "epoch": 0.8029241921611991, "grad_norm": 0.36985204808884037, "learning_rate": 3.4116056772687147e-06, "loss": 0.3151, "step": 6535 }, { "epoch": 0.8030470573780563, "grad_norm": 0.3459523978568424, "learning_rate": 3.407521972206272e-06, "loss": 0.3495, "step": 6536 }, { "epoch": 0.8031699225949134, "grad_norm": 0.3402667631450522, "learning_rate": 3.403440399504984e-06, "loss": 0.2913, "step": 6537 }, { "epoch": 0.8032927878117705, "grad_norm": 0.340098563165322, "learning_rate": 3.3993609599156277e-06, "loss": 0.3658, "step": 6538 }, { "epoch": 0.8034156530286276, "grad_norm": 0.3427528668270123, "learning_rate": 3.3952836541885933e-06, "loss": 0.4079, "step": 6539 }, { "epoch": 0.8035385182454847, "grad_norm": 0.36281446209189394, "learning_rate": 3.3912084830738695e-06, "loss": 0.3908, "step": 6540 }, { "epoch": 0.8036613834623418, "grad_norm": 0.4169575445746121, "learning_rate": 3.3871354473210573e-06, "loss": 0.3851, "step": 6541 }, { "epoch": 0.803784248679199, "grad_norm": 0.36019935091873373, "learning_rate": 3.383064547679374e-06, "loss": 0.4286, "step": 6542 }, { "epoch": 0.8039071138960561, "grad_norm": 0.2974460109974709, "learning_rate": 3.378995784897622e-06, "loss": 0.3819, "step": 6543 }, { "epoch": 0.8040299791129132, "grad_norm": 0.3468413127808109, "learning_rate": 3.3749291597242327e-06, "loss": 0.3974, "step": 6544 }, { "epoch": 0.8041528443297702, "grad_norm": 0.3245744924699244, "learning_rate": 3.370864672907232e-06, "loss": 0.4352, "step": 6545 }, { "epoch": 0.8042757095466273, "grad_norm": 0.35676251010596344, "learning_rate": 3.3668023251942615e-06, "loss": 0.3787, "step": 6546 }, { "epoch": 0.8043985747634844, "grad_norm": 0.37630580347101966, "learning_rate": 3.362742117332554e-06, "loss": 0.3769, "step": 6547 }, { "epoch": 0.8045214399803415, "grad_norm": 0.3334791613344774, "learning_rate": 3.358684050068965e-06, "loss": 0.4138, "step": 6548 }, { "epoch": 0.8046443051971987, "grad_norm": 0.33600649816668626, "learning_rate": 3.35462812414995e-06, "loss": 0.3617, "step": 6549 }, { "epoch": 0.8047671704140558, "grad_norm": 0.3764082822467489, "learning_rate": 3.3505743403215712e-06, "loss": 0.3715, "step": 6550 }, { "epoch": 0.8048900356309129, "grad_norm": 0.3662155155837514, "learning_rate": 3.346522699329489e-06, "loss": 0.3799, "step": 6551 }, { "epoch": 0.80501290084777, "grad_norm": 0.46097040301668474, "learning_rate": 3.3424732019189806e-06, "loss": 0.4379, "step": 6552 }, { "epoch": 0.8051357660646271, "grad_norm": 0.35187576482421884, "learning_rate": 3.338425848834929e-06, "loss": 0.4637, "step": 6553 }, { "epoch": 0.8052586312814842, "grad_norm": 0.3025735626031966, "learning_rate": 3.3343806408218116e-06, "loss": 0.369, "step": 6554 }, { "epoch": 0.8053814964983413, "grad_norm": 0.30680890622310164, "learning_rate": 3.3303375786237244e-06, "loss": 0.3882, "step": 6555 }, { "epoch": 0.8055043617151985, "grad_norm": 0.38159466238655243, "learning_rate": 3.32629666298435e-06, "loss": 0.3798, "step": 6556 }, { "epoch": 0.8056272269320556, "grad_norm": 0.3538815834729877, "learning_rate": 3.3222578946470085e-06, "loss": 0.4206, "step": 6557 }, { "epoch": 0.8057500921489127, "grad_norm": 0.34534827178873534, "learning_rate": 3.3182212743545885e-06, "loss": 0.425, "step": 6558 }, { "epoch": 0.8058729573657698, "grad_norm": 0.36607043271473255, "learning_rate": 3.314186802849607e-06, "loss": 0.4209, "step": 6559 }, { "epoch": 0.8059958225826268, "grad_norm": 0.3536766373246765, "learning_rate": 3.3101544808741813e-06, "loss": 0.3961, "step": 6560 }, { "epoch": 0.8061186877994839, "grad_norm": 0.38075300118799527, "learning_rate": 3.306124309170023e-06, "loss": 0.4059, "step": 6561 }, { "epoch": 0.806241553016341, "grad_norm": 0.30182738431827455, "learning_rate": 3.3020962884784667e-06, "loss": 0.3062, "step": 6562 }, { "epoch": 0.8063644182331982, "grad_norm": 0.33097793274552956, "learning_rate": 3.2980704195404237e-06, "loss": 0.34, "step": 6563 }, { "epoch": 0.8064872834500553, "grad_norm": 0.42110177127568665, "learning_rate": 3.2940467030964472e-06, "loss": 0.3721, "step": 6564 }, { "epoch": 0.8066101486669124, "grad_norm": 0.32705012637852016, "learning_rate": 3.2900251398866598e-06, "loss": 0.3962, "step": 6565 }, { "epoch": 0.8067330138837695, "grad_norm": 0.41004963509475517, "learning_rate": 3.28600573065081e-06, "loss": 0.4511, "step": 6566 }, { "epoch": 0.8068558791006266, "grad_norm": 0.3637681212303734, "learning_rate": 3.28198847612823e-06, "loss": 0.4921, "step": 6567 }, { "epoch": 0.8069787443174837, "grad_norm": 0.3190935005686401, "learning_rate": 3.2779733770578846e-06, "loss": 0.3203, "step": 6568 }, { "epoch": 0.8071016095343408, "grad_norm": 0.35514638245580893, "learning_rate": 3.2739604341783103e-06, "loss": 0.3526, "step": 6569 }, { "epoch": 0.807224474751198, "grad_norm": 0.42042067590547355, "learning_rate": 3.2699496482276747e-06, "loss": 0.3654, "step": 6570 }, { "epoch": 0.8073473399680551, "grad_norm": 0.36706724741689356, "learning_rate": 3.265941019943723e-06, "loss": 0.3261, "step": 6571 }, { "epoch": 0.8074702051849122, "grad_norm": 0.3703310213386506, "learning_rate": 3.2619345500638246e-06, "loss": 0.4735, "step": 6572 }, { "epoch": 0.8075930704017693, "grad_norm": 0.2986456926520257, "learning_rate": 3.2579302393249446e-06, "loss": 0.3709, "step": 6573 }, { "epoch": 0.8077159356186263, "grad_norm": 0.33454230674493995, "learning_rate": 3.2539280884636422e-06, "loss": 0.3696, "step": 6574 }, { "epoch": 0.8078388008354834, "grad_norm": 0.35056423095125105, "learning_rate": 3.2499280982160934e-06, "loss": 0.3443, "step": 6575 }, { "epoch": 0.8079616660523405, "grad_norm": 0.3019406539941546, "learning_rate": 3.2459302693180686e-06, "loss": 0.3568, "step": 6576 }, { "epoch": 0.8080845312691977, "grad_norm": 0.3525893279910445, "learning_rate": 3.2419346025049483e-06, "loss": 0.3881, "step": 6577 }, { "epoch": 0.8082073964860548, "grad_norm": 0.3441369974674471, "learning_rate": 3.237941098511698e-06, "loss": 0.4011, "step": 6578 }, { "epoch": 0.8083302617029119, "grad_norm": 0.3161902266744119, "learning_rate": 3.233949758072905e-06, "loss": 0.3308, "step": 6579 }, { "epoch": 0.808453126919769, "grad_norm": 0.3760388677068815, "learning_rate": 3.22996058192275e-06, "loss": 0.3699, "step": 6580 }, { "epoch": 0.8085759921366261, "grad_norm": 0.4325042338412977, "learning_rate": 3.2259735707950117e-06, "loss": 0.4575, "step": 6581 }, { "epoch": 0.8086988573534832, "grad_norm": 0.3607426554590419, "learning_rate": 3.2219887254230797e-06, "loss": 0.3603, "step": 6582 }, { "epoch": 0.8088217225703404, "grad_norm": 0.38320470713531, "learning_rate": 3.2180060465399357e-06, "loss": 0.3917, "step": 6583 }, { "epoch": 0.8089445877871975, "grad_norm": 0.30784300822200467, "learning_rate": 3.214025534878176e-06, "loss": 0.3325, "step": 6584 }, { "epoch": 0.8090674530040546, "grad_norm": 0.42451839736883773, "learning_rate": 3.2100471911699796e-06, "loss": 0.4495, "step": 6585 }, { "epoch": 0.8091903182209117, "grad_norm": 0.39988308453616367, "learning_rate": 3.2060710161471427e-06, "loss": 0.3819, "step": 6586 }, { "epoch": 0.8093131834377688, "grad_norm": 0.34291446697105604, "learning_rate": 3.2020970105410607e-06, "loss": 0.3944, "step": 6587 }, { "epoch": 0.8094360486546259, "grad_norm": 0.29421873267457926, "learning_rate": 3.198125175082717e-06, "loss": 0.3241, "step": 6588 }, { "epoch": 0.8095589138714829, "grad_norm": 0.3061773246274742, "learning_rate": 3.1941555105027115e-06, "loss": 0.3348, "step": 6589 }, { "epoch": 0.80968177908834, "grad_norm": 0.45050516217499254, "learning_rate": 3.1901880175312307e-06, "loss": 0.4795, "step": 6590 }, { "epoch": 0.8098046443051972, "grad_norm": 0.29807242430735464, "learning_rate": 3.1862226968980813e-06, "loss": 0.3721, "step": 6591 }, { "epoch": 0.8099275095220543, "grad_norm": 0.3652104400226022, "learning_rate": 3.182259549332649e-06, "loss": 0.3969, "step": 6592 }, { "epoch": 0.8100503747389114, "grad_norm": 0.40611228060238064, "learning_rate": 3.1782985755639344e-06, "loss": 0.327, "step": 6593 }, { "epoch": 0.8101732399557685, "grad_norm": 0.29107035662852954, "learning_rate": 3.174339776320523e-06, "loss": 0.4483, "step": 6594 }, { "epoch": 0.8102961051726256, "grad_norm": 0.45439094136981023, "learning_rate": 3.170383152330627e-06, "loss": 0.3918, "step": 6595 }, { "epoch": 0.8104189703894827, "grad_norm": 0.3207724032575305, "learning_rate": 3.1664287043220265e-06, "loss": 0.3659, "step": 6596 }, { "epoch": 0.8105418356063399, "grad_norm": 0.367055884513969, "learning_rate": 3.162476433022127e-06, "loss": 0.4237, "step": 6597 }, { "epoch": 0.810664700823197, "grad_norm": 0.3601755688081564, "learning_rate": 3.158526339157915e-06, "loss": 0.4427, "step": 6598 }, { "epoch": 0.8107875660400541, "grad_norm": 0.474985189664575, "learning_rate": 3.1545784234559883e-06, "loss": 0.361, "step": 6599 }, { "epoch": 0.8109104312569112, "grad_norm": 0.3728300973929095, "learning_rate": 3.1506326866425445e-06, "loss": 0.3462, "step": 6600 }, { "epoch": 0.8110332964737683, "grad_norm": 0.34346192580261803, "learning_rate": 3.146689129443368e-06, "loss": 0.3446, "step": 6601 }, { "epoch": 0.8111561616906254, "grad_norm": 0.28135012133545845, "learning_rate": 3.142747752583854e-06, "loss": 0.4683, "step": 6602 }, { "epoch": 0.8112790269074824, "grad_norm": 0.33626761584201464, "learning_rate": 3.1388085567889934e-06, "loss": 0.3935, "step": 6603 }, { "epoch": 0.8114018921243396, "grad_norm": 0.4016633193598618, "learning_rate": 3.1348715427833824e-06, "loss": 0.3393, "step": 6604 }, { "epoch": 0.8115247573411967, "grad_norm": 0.32477645293545365, "learning_rate": 3.130936711291198e-06, "loss": 0.425, "step": 6605 }, { "epoch": 0.8116476225580538, "grad_norm": 0.36069470336917303, "learning_rate": 3.1270040630362313e-06, "loss": 0.4044, "step": 6606 }, { "epoch": 0.8117704877749109, "grad_norm": 0.30317712026057725, "learning_rate": 3.1230735987418733e-06, "loss": 0.3508, "step": 6607 }, { "epoch": 0.811893352991768, "grad_norm": 0.29319953988957764, "learning_rate": 3.1191453191310967e-06, "loss": 0.3739, "step": 6608 }, { "epoch": 0.8120162182086251, "grad_norm": 0.34740387975065323, "learning_rate": 3.1152192249264907e-06, "loss": 0.3577, "step": 6609 }, { "epoch": 0.8121390834254822, "grad_norm": 0.4498653954472621, "learning_rate": 3.111295316850231e-06, "loss": 0.4493, "step": 6610 }, { "epoch": 0.8122619486423394, "grad_norm": 0.3077457885535805, "learning_rate": 3.107373595624101e-06, "loss": 0.3683, "step": 6611 }, { "epoch": 0.8123848138591965, "grad_norm": 0.3054205488120532, "learning_rate": 3.1034540619694683e-06, "loss": 0.4168, "step": 6612 }, { "epoch": 0.8125076790760536, "grad_norm": 0.3554900500527145, "learning_rate": 3.09953671660731e-06, "loss": 0.4059, "step": 6613 }, { "epoch": 0.8126305442929107, "grad_norm": 0.3701645930442735, "learning_rate": 3.0956215602581933e-06, "loss": 0.3724, "step": 6614 }, { "epoch": 0.8127534095097678, "grad_norm": 0.3327118429571123, "learning_rate": 3.0917085936422934e-06, "loss": 0.4022, "step": 6615 }, { "epoch": 0.8128762747266249, "grad_norm": 0.4157407793667799, "learning_rate": 3.0877978174793642e-06, "loss": 0.4095, "step": 6616 }, { "epoch": 0.812999139943482, "grad_norm": 0.3429996549323361, "learning_rate": 3.083889232488775e-06, "loss": 0.4008, "step": 6617 }, { "epoch": 0.8131220051603391, "grad_norm": 0.43195171506188057, "learning_rate": 3.0799828393894863e-06, "loss": 0.4429, "step": 6618 }, { "epoch": 0.8132448703771962, "grad_norm": 0.35397346607636415, "learning_rate": 3.076078638900046e-06, "loss": 0.3673, "step": 6619 }, { "epoch": 0.8133677355940533, "grad_norm": 0.3959798269405211, "learning_rate": 3.0721766317386153e-06, "loss": 0.4464, "step": 6620 }, { "epoch": 0.8134906008109104, "grad_norm": 0.362677952356224, "learning_rate": 3.068276818622929e-06, "loss": 0.3173, "step": 6621 }, { "epoch": 0.8136134660277675, "grad_norm": 0.3564433280193112, "learning_rate": 3.0643792002703515e-06, "loss": 0.3411, "step": 6622 }, { "epoch": 0.8137363312446246, "grad_norm": 0.3092563632244881, "learning_rate": 3.0604837773978095e-06, "loss": 0.3979, "step": 6623 }, { "epoch": 0.8138591964614817, "grad_norm": 0.32943301560712374, "learning_rate": 3.0565905507218473e-06, "loss": 0.3368, "step": 6624 }, { "epoch": 0.8139820616783389, "grad_norm": 0.34580103544002244, "learning_rate": 3.0526995209586016e-06, "loss": 0.3745, "step": 6625 }, { "epoch": 0.814104926895196, "grad_norm": 0.3584892137558329, "learning_rate": 3.048810688823794e-06, "loss": 0.4591, "step": 6626 }, { "epoch": 0.8142277921120531, "grad_norm": 0.3604103537016055, "learning_rate": 3.0449240550327577e-06, "loss": 0.4605, "step": 6627 }, { "epoch": 0.8143506573289102, "grad_norm": 0.3775133764744031, "learning_rate": 3.041039620300402e-06, "loss": 0.4371, "step": 6628 }, { "epoch": 0.8144735225457673, "grad_norm": 0.30589891199161, "learning_rate": 3.03715738534126e-06, "loss": 0.4203, "step": 6629 }, { "epoch": 0.8145963877626244, "grad_norm": 0.3718092026279435, "learning_rate": 3.0332773508694302e-06, "loss": 0.3675, "step": 6630 }, { "epoch": 0.8147192529794816, "grad_norm": 0.3644110657809488, "learning_rate": 3.02939951759863e-06, "loss": 0.4046, "step": 6631 }, { "epoch": 0.8148421181963387, "grad_norm": 0.44793965710509437, "learning_rate": 3.0255238862421474e-06, "loss": 0.4137, "step": 6632 }, { "epoch": 0.8149649834131957, "grad_norm": 0.3734470789612565, "learning_rate": 3.021650457512897e-06, "loss": 0.3904, "step": 6633 }, { "epoch": 0.8150878486300528, "grad_norm": 0.2987382759843763, "learning_rate": 3.0177792321233595e-06, "loss": 0.3887, "step": 6634 }, { "epoch": 0.8152107138469099, "grad_norm": 0.4302130168481581, "learning_rate": 3.013910210785629e-06, "loss": 0.4037, "step": 6635 }, { "epoch": 0.815333579063767, "grad_norm": 0.32526249787967704, "learning_rate": 3.0100433942113776e-06, "loss": 0.3634, "step": 6636 }, { "epoch": 0.8154564442806241, "grad_norm": 0.4010035732252657, "learning_rate": 3.006178783111887e-06, "loss": 0.3447, "step": 6637 }, { "epoch": 0.8155793094974813, "grad_norm": 0.33150203995383004, "learning_rate": 3.002316378198029e-06, "loss": 0.3581, "step": 6638 }, { "epoch": 0.8157021747143384, "grad_norm": 0.2973685029173853, "learning_rate": 2.9984561801802635e-06, "loss": 0.4305, "step": 6639 }, { "epoch": 0.8158250399311955, "grad_norm": 0.3542690798320522, "learning_rate": 2.994598189768649e-06, "loss": 0.435, "step": 6640 }, { "epoch": 0.8159479051480526, "grad_norm": 0.4275248849152271, "learning_rate": 2.9907424076728417e-06, "loss": 0.3815, "step": 6641 }, { "epoch": 0.8160707703649097, "grad_norm": 0.3227852625533363, "learning_rate": 2.986888834602089e-06, "loss": 0.3382, "step": 6642 }, { "epoch": 0.8161936355817668, "grad_norm": 0.30234680551940246, "learning_rate": 2.9830374712652235e-06, "loss": 0.3267, "step": 6643 }, { "epoch": 0.816316500798624, "grad_norm": 0.37632525517872223, "learning_rate": 2.9791883183706823e-06, "loss": 0.4208, "step": 6644 }, { "epoch": 0.8164393660154811, "grad_norm": 0.30896624126664385, "learning_rate": 2.975341376626496e-06, "loss": 0.4056, "step": 6645 }, { "epoch": 0.8165622312323382, "grad_norm": 0.3020142167736342, "learning_rate": 2.971496646740276e-06, "loss": 0.3592, "step": 6646 }, { "epoch": 0.8166850964491952, "grad_norm": 0.500031929512402, "learning_rate": 2.9676541294192423e-06, "loss": 0.3901, "step": 6647 }, { "epoch": 0.8168079616660523, "grad_norm": 0.5008815704323423, "learning_rate": 2.9638138253701974e-06, "loss": 0.4053, "step": 6648 }, { "epoch": 0.8169308268829094, "grad_norm": 0.3114482612088576, "learning_rate": 2.9599757352995466e-06, "loss": 0.355, "step": 6649 }, { "epoch": 0.8170536920997665, "grad_norm": 0.341632625462454, "learning_rate": 2.9561398599132733e-06, "loss": 0.3706, "step": 6650 }, { "epoch": 0.8171765573166236, "grad_norm": 0.36463795483788586, "learning_rate": 2.9523061999169646e-06, "loss": 0.4389, "step": 6651 }, { "epoch": 0.8172994225334808, "grad_norm": 0.34862985348622405, "learning_rate": 2.9484747560157986e-06, "loss": 0.4549, "step": 6652 }, { "epoch": 0.8174222877503379, "grad_norm": 0.2912026845088731, "learning_rate": 2.944645528914548e-06, "loss": 0.412, "step": 6653 }, { "epoch": 0.817545152967195, "grad_norm": 0.3612441109848585, "learning_rate": 2.9408185193175673e-06, "loss": 0.4149, "step": 6654 }, { "epoch": 0.8176680181840521, "grad_norm": 0.32053662471002875, "learning_rate": 2.9369937279288138e-06, "loss": 0.3444, "step": 6655 }, { "epoch": 0.8177908834009092, "grad_norm": 0.3047868373174892, "learning_rate": 2.9331711554518364e-06, "loss": 0.4247, "step": 6656 }, { "epoch": 0.8179137486177663, "grad_norm": 0.4125249984142774, "learning_rate": 2.9293508025897644e-06, "loss": 0.4592, "step": 6657 }, { "epoch": 0.8180366138346234, "grad_norm": 0.3866179369753799, "learning_rate": 2.9255326700453365e-06, "loss": 0.4843, "step": 6658 }, { "epoch": 0.8181594790514806, "grad_norm": 0.4254977231269255, "learning_rate": 2.9217167585208587e-06, "loss": 0.437, "step": 6659 }, { "epoch": 0.8182823442683377, "grad_norm": 0.3836409319455941, "learning_rate": 2.917903068718262e-06, "loss": 0.4452, "step": 6660 }, { "epoch": 0.8184052094851948, "grad_norm": 0.31145400223064396, "learning_rate": 2.914091601339036e-06, "loss": 0.4014, "step": 6661 }, { "epoch": 0.8185280747020518, "grad_norm": 0.44377202635267216, "learning_rate": 2.9102823570842846e-06, "loss": 0.4332, "step": 6662 }, { "epoch": 0.8186509399189089, "grad_norm": 0.3307946754817127, "learning_rate": 2.9064753366546836e-06, "loss": 0.3836, "step": 6663 }, { "epoch": 0.818773805135766, "grad_norm": 0.3571708721367521, "learning_rate": 2.9026705407505165e-06, "loss": 0.3797, "step": 6664 }, { "epoch": 0.8188966703526231, "grad_norm": 0.306311047069295, "learning_rate": 2.8988679700716534e-06, "loss": 0.3924, "step": 6665 }, { "epoch": 0.8190195355694803, "grad_norm": 0.4066982643611563, "learning_rate": 2.89506762531754e-06, "loss": 0.3615, "step": 6666 }, { "epoch": 0.8191424007863374, "grad_norm": 0.3919208142160058, "learning_rate": 2.891269507187242e-06, "loss": 0.4047, "step": 6667 }, { "epoch": 0.8192652660031945, "grad_norm": 0.3847267440992714, "learning_rate": 2.887473616379387e-06, "loss": 0.404, "step": 6668 }, { "epoch": 0.8193881312200516, "grad_norm": 0.37036847256679717, "learning_rate": 2.8836799535922116e-06, "loss": 0.4126, "step": 6669 }, { "epoch": 0.8195109964369087, "grad_norm": 0.3388041230747206, "learning_rate": 2.8798885195235224e-06, "loss": 0.3452, "step": 6670 }, { "epoch": 0.8196338616537658, "grad_norm": 0.3981347978492014, "learning_rate": 2.876099314870747e-06, "loss": 0.3762, "step": 6671 }, { "epoch": 0.819756726870623, "grad_norm": 0.2903717627674361, "learning_rate": 2.8723123403308726e-06, "loss": 0.3931, "step": 6672 }, { "epoch": 0.8198795920874801, "grad_norm": 0.3222940052375983, "learning_rate": 2.868527596600497e-06, "loss": 0.4205, "step": 6673 }, { "epoch": 0.8200024573043372, "grad_norm": 0.4852570438348561, "learning_rate": 2.86474508437579e-06, "loss": 0.352, "step": 6674 }, { "epoch": 0.8201253225211943, "grad_norm": 0.3443614640067924, "learning_rate": 2.860964804352525e-06, "loss": 0.361, "step": 6675 }, { "epoch": 0.8202481877380513, "grad_norm": 0.32846358915217017, "learning_rate": 2.8571867572260626e-06, "loss": 0.3807, "step": 6676 }, { "epoch": 0.8203710529549084, "grad_norm": 0.31362900815793965, "learning_rate": 2.8534109436913445e-06, "loss": 0.3784, "step": 6677 }, { "epoch": 0.8204939181717655, "grad_norm": 0.33090613277947406, "learning_rate": 2.8496373644429095e-06, "loss": 0.4272, "step": 6678 }, { "epoch": 0.8206167833886227, "grad_norm": 0.29619300092026, "learning_rate": 2.8458660201748836e-06, "loss": 0.3986, "step": 6679 }, { "epoch": 0.8207396486054798, "grad_norm": 0.42960900501619065, "learning_rate": 2.842096911580985e-06, "loss": 0.3434, "step": 6680 }, { "epoch": 0.8208625138223369, "grad_norm": 0.3533942037732465, "learning_rate": 2.8383300393545098e-06, "loss": 0.4142, "step": 6681 }, { "epoch": 0.820985379039194, "grad_norm": 0.47294117521399115, "learning_rate": 2.834565404188351e-06, "loss": 0.3628, "step": 6682 }, { "epoch": 0.8211082442560511, "grad_norm": 0.39044871654908586, "learning_rate": 2.8308030067749955e-06, "loss": 0.5127, "step": 6683 }, { "epoch": 0.8212311094729082, "grad_norm": 0.32241956188716187, "learning_rate": 2.8270428478065015e-06, "loss": 0.2864, "step": 6684 }, { "epoch": 0.8213539746897653, "grad_norm": 0.37556319181736303, "learning_rate": 2.8232849279745366e-06, "loss": 0.376, "step": 6685 }, { "epoch": 0.8214768399066225, "grad_norm": 0.3481128592743029, "learning_rate": 2.8195292479703315e-06, "loss": 0.3797, "step": 6686 }, { "epoch": 0.8215997051234796, "grad_norm": 0.3678459375502868, "learning_rate": 2.815775808484737e-06, "loss": 0.311, "step": 6687 }, { "epoch": 0.8217225703403367, "grad_norm": 0.38546867574840865, "learning_rate": 2.8120246102081614e-06, "loss": 0.4333, "step": 6688 }, { "epoch": 0.8218454355571938, "grad_norm": 0.370920649283926, "learning_rate": 2.808275653830617e-06, "loss": 0.3945, "step": 6689 }, { "epoch": 0.8219683007740509, "grad_norm": 0.3133543158202898, "learning_rate": 2.804528940041699e-06, "loss": 0.3156, "step": 6690 }, { "epoch": 0.8220911659909079, "grad_norm": 0.444053774720527, "learning_rate": 2.800784469530596e-06, "loss": 0.3955, "step": 6691 }, { "epoch": 0.822214031207765, "grad_norm": 0.32624777620811135, "learning_rate": 2.797042242986071e-06, "loss": 0.3881, "step": 6692 }, { "epoch": 0.8223368964246222, "grad_norm": 0.36940004805605964, "learning_rate": 2.7933022610964877e-06, "loss": 0.4351, "step": 6693 }, { "epoch": 0.8224597616414793, "grad_norm": 0.4735056142017058, "learning_rate": 2.7895645245497926e-06, "loss": 0.3752, "step": 6694 }, { "epoch": 0.8225826268583364, "grad_norm": 0.4035920446859672, "learning_rate": 2.7858290340335126e-06, "loss": 0.3189, "step": 6695 }, { "epoch": 0.8227054920751935, "grad_norm": 0.3575533096598398, "learning_rate": 2.7820957902347744e-06, "loss": 0.4711, "step": 6696 }, { "epoch": 0.8228283572920506, "grad_norm": 0.26263605190388384, "learning_rate": 2.7783647938402724e-06, "loss": 0.3643, "step": 6697 }, { "epoch": 0.8229512225089077, "grad_norm": 0.3479155360411468, "learning_rate": 2.7746360455363123e-06, "loss": 0.3567, "step": 6698 }, { "epoch": 0.8230740877257648, "grad_norm": 0.36151536566102754, "learning_rate": 2.7709095460087656e-06, "loss": 0.3521, "step": 6699 }, { "epoch": 0.823196952942622, "grad_norm": 0.36353095414078557, "learning_rate": 2.767185295943101e-06, "loss": 0.3943, "step": 6700 }, { "epoch": 0.8233198181594791, "grad_norm": 0.3369247618052543, "learning_rate": 2.7634632960243667e-06, "loss": 0.393, "step": 6701 }, { "epoch": 0.8234426833763362, "grad_norm": 0.2985087942731995, "learning_rate": 2.759743546937202e-06, "loss": 0.2894, "step": 6702 }, { "epoch": 0.8235655485931933, "grad_norm": 0.3630136701123858, "learning_rate": 2.756026049365834e-06, "loss": 0.378, "step": 6703 }, { "epoch": 0.8236884138100504, "grad_norm": 0.33194301493035283, "learning_rate": 2.7523108039940662e-06, "loss": 0.3616, "step": 6704 }, { "epoch": 0.8238112790269074, "grad_norm": 0.35037692878704774, "learning_rate": 2.748597811505297e-06, "loss": 0.3663, "step": 6705 }, { "epoch": 0.8239341442437645, "grad_norm": 0.303676275001468, "learning_rate": 2.744887072582507e-06, "loss": 0.2911, "step": 6706 }, { "epoch": 0.8240570094606217, "grad_norm": 0.2812573767158773, "learning_rate": 2.7411785879082663e-06, "loss": 0.3629, "step": 6707 }, { "epoch": 0.8241798746774788, "grad_norm": 0.3463423289168863, "learning_rate": 2.737472358164721e-06, "loss": 0.2718, "step": 6708 }, { "epoch": 0.8243027398943359, "grad_norm": 0.2881404457110364, "learning_rate": 2.7337683840336074e-06, "loss": 0.4514, "step": 6709 }, { "epoch": 0.824425605111193, "grad_norm": 0.34443984766303415, "learning_rate": 2.7300666661962558e-06, "loss": 0.4136, "step": 6710 }, { "epoch": 0.8245484703280501, "grad_norm": 0.3747576465714835, "learning_rate": 2.726367205333563e-06, "loss": 0.4644, "step": 6711 }, { "epoch": 0.8246713355449072, "grad_norm": 0.3170685058665574, "learning_rate": 2.7226700021260267e-06, "loss": 0.3605, "step": 6712 }, { "epoch": 0.8247942007617644, "grad_norm": 0.33470665426826135, "learning_rate": 2.718975057253722e-06, "loss": 0.4361, "step": 6713 }, { "epoch": 0.8249170659786215, "grad_norm": 0.3205284947351307, "learning_rate": 2.7152823713963125e-06, "loss": 0.386, "step": 6714 }, { "epoch": 0.8250399311954786, "grad_norm": 0.3621020647317901, "learning_rate": 2.7115919452330403e-06, "loss": 0.5106, "step": 6715 }, { "epoch": 0.8251627964123357, "grad_norm": 0.31778426501260826, "learning_rate": 2.7079037794427346e-06, "loss": 0.3373, "step": 6716 }, { "epoch": 0.8252856616291928, "grad_norm": 0.3489858069180359, "learning_rate": 2.704217874703812e-06, "loss": 0.3665, "step": 6717 }, { "epoch": 0.8254085268460499, "grad_norm": 0.4052200449383784, "learning_rate": 2.7005342316942748e-06, "loss": 0.4637, "step": 6718 }, { "epoch": 0.825531392062907, "grad_norm": 0.36588929390097696, "learning_rate": 2.696852851091696e-06, "loss": 0.3702, "step": 6719 }, { "epoch": 0.825654257279764, "grad_norm": 0.3163067296706481, "learning_rate": 2.6931737335732476e-06, "loss": 0.4077, "step": 6720 }, { "epoch": 0.8257771224966212, "grad_norm": 0.3680314349171667, "learning_rate": 2.689496879815681e-06, "loss": 0.4404, "step": 6721 }, { "epoch": 0.8258999877134783, "grad_norm": 0.3474233209038969, "learning_rate": 2.685822290495324e-06, "loss": 0.3661, "step": 6722 }, { "epoch": 0.8260228529303354, "grad_norm": 0.3492805602257473, "learning_rate": 2.6821499662881004e-06, "loss": 0.3919, "step": 6723 }, { "epoch": 0.8261457181471925, "grad_norm": 0.36673597621332843, "learning_rate": 2.6784799078694987e-06, "loss": 0.4137, "step": 6724 }, { "epoch": 0.8262685833640496, "grad_norm": 0.3309109050352, "learning_rate": 2.674812115914617e-06, "loss": 0.4332, "step": 6725 }, { "epoch": 0.8263914485809067, "grad_norm": 0.41550262709518265, "learning_rate": 2.6711465910981125e-06, "loss": 0.3565, "step": 6726 }, { "epoch": 0.8265143137977639, "grad_norm": 0.314453214846344, "learning_rate": 2.667483334094239e-06, "loss": 0.3566, "step": 6727 }, { "epoch": 0.826637179014621, "grad_norm": 0.32697607146209395, "learning_rate": 2.6638223455768242e-06, "loss": 0.3931, "step": 6728 }, { "epoch": 0.8267600442314781, "grad_norm": 0.35448914489983574, "learning_rate": 2.6601636262192874e-06, "loss": 0.3697, "step": 6729 }, { "epoch": 0.8268829094483352, "grad_norm": 0.3722050155589981, "learning_rate": 2.6565071766946277e-06, "loss": 0.3848, "step": 6730 }, { "epoch": 0.8270057746651923, "grad_norm": 0.34309368910877575, "learning_rate": 2.6528529976754128e-06, "loss": 0.3616, "step": 6731 }, { "epoch": 0.8271286398820494, "grad_norm": 0.3149952608621273, "learning_rate": 2.649201089833826e-06, "loss": 0.4287, "step": 6732 }, { "epoch": 0.8272515050989065, "grad_norm": 0.3098431943121298, "learning_rate": 2.6455514538415943e-06, "loss": 0.3746, "step": 6733 }, { "epoch": 0.8273743703157636, "grad_norm": 0.33243558139284246, "learning_rate": 2.641904090370056e-06, "loss": 0.3497, "step": 6734 }, { "epoch": 0.8274972355326207, "grad_norm": 0.32270640461005606, "learning_rate": 2.638259000090109e-06, "loss": 0.3403, "step": 6735 }, { "epoch": 0.8276201007494778, "grad_norm": 0.3917854765169264, "learning_rate": 2.634616183672256e-06, "loss": 0.4005, "step": 6736 }, { "epoch": 0.8277429659663349, "grad_norm": 0.32955717545464436, "learning_rate": 2.6309756417865607e-06, "loss": 0.3949, "step": 6737 }, { "epoch": 0.827865831183192, "grad_norm": 0.3723600392380285, "learning_rate": 2.6273373751026837e-06, "loss": 0.4022, "step": 6738 }, { "epoch": 0.8279886964000491, "grad_norm": 0.3239759657597695, "learning_rate": 2.6237013842898533e-06, "loss": 0.3406, "step": 6739 }, { "epoch": 0.8281115616169062, "grad_norm": 0.310709028972024, "learning_rate": 2.6200676700168898e-06, "loss": 0.4359, "step": 6740 }, { "epoch": 0.8282344268337634, "grad_norm": 0.3542090666942656, "learning_rate": 2.616436232952196e-06, "loss": 0.5094, "step": 6741 }, { "epoch": 0.8283572920506205, "grad_norm": 0.3509523748591816, "learning_rate": 2.6128070737637437e-06, "loss": 0.3544, "step": 6742 }, { "epoch": 0.8284801572674776, "grad_norm": 0.38550875922284444, "learning_rate": 2.609180193119095e-06, "loss": 0.3241, "step": 6743 }, { "epoch": 0.8286030224843347, "grad_norm": 0.3418634958221766, "learning_rate": 2.6055555916853945e-06, "loss": 0.3317, "step": 6744 }, { "epoch": 0.8287258877011918, "grad_norm": 0.4079366906131057, "learning_rate": 2.601933270129364e-06, "loss": 0.3729, "step": 6745 }, { "epoch": 0.8288487529180489, "grad_norm": 0.43800353804346925, "learning_rate": 2.5983132291173007e-06, "loss": 0.4108, "step": 6746 }, { "epoch": 0.828971618134906, "grad_norm": 0.38082501776917765, "learning_rate": 2.5946954693150915e-06, "loss": 0.3644, "step": 6747 }, { "epoch": 0.8290944833517632, "grad_norm": 0.40757939796614023, "learning_rate": 2.591079991388203e-06, "loss": 0.3856, "step": 6748 }, { "epoch": 0.8292173485686202, "grad_norm": 0.382320239651037, "learning_rate": 2.5874667960016725e-06, "loss": 0.4111, "step": 6749 }, { "epoch": 0.8293402137854773, "grad_norm": 0.3957640127641479, "learning_rate": 2.5838558838201304e-06, "loss": 0.3587, "step": 6750 }, { "epoch": 0.8294630790023344, "grad_norm": 0.39481969502398534, "learning_rate": 2.580247255507769e-06, "loss": 0.4318, "step": 6751 }, { "epoch": 0.8295859442191915, "grad_norm": 0.3528480070524135, "learning_rate": 2.576640911728387e-06, "loss": 0.4146, "step": 6752 }, { "epoch": 0.8297088094360486, "grad_norm": 0.3735579548451217, "learning_rate": 2.573036853145337e-06, "loss": 0.3259, "step": 6753 }, { "epoch": 0.8298316746529057, "grad_norm": 0.3550391264892645, "learning_rate": 2.569435080421567e-06, "loss": 0.3822, "step": 6754 }, { "epoch": 0.8299545398697629, "grad_norm": 0.29445105988401254, "learning_rate": 2.5658355942195994e-06, "loss": 0.4704, "step": 6755 }, { "epoch": 0.83007740508662, "grad_norm": 0.3131375048110218, "learning_rate": 2.5622383952015386e-06, "loss": 0.3901, "step": 6756 }, { "epoch": 0.8302002703034771, "grad_norm": 0.36488560538276127, "learning_rate": 2.5586434840290597e-06, "loss": 0.3985, "step": 6757 }, { "epoch": 0.8303231355203342, "grad_norm": 0.36719888700449776, "learning_rate": 2.555050861363428e-06, "loss": 0.4087, "step": 6758 }, { "epoch": 0.8304460007371913, "grad_norm": 0.31234180547934165, "learning_rate": 2.5514605278654844e-06, "loss": 0.3842, "step": 6759 }, { "epoch": 0.8305688659540484, "grad_norm": 0.29679065382509945, "learning_rate": 2.547872484195642e-06, "loss": 0.3768, "step": 6760 }, { "epoch": 0.8306917311709056, "grad_norm": 0.30387685907304346, "learning_rate": 2.544286731013905e-06, "loss": 0.3813, "step": 6761 }, { "epoch": 0.8308145963877627, "grad_norm": 0.3613375136802771, "learning_rate": 2.540703268979838e-06, "loss": 0.3669, "step": 6762 }, { "epoch": 0.8309374616046198, "grad_norm": 0.4114448461495624, "learning_rate": 2.5371220987526105e-06, "loss": 0.3951, "step": 6763 }, { "epoch": 0.8310603268214768, "grad_norm": 0.4162464898122869, "learning_rate": 2.533543220990944e-06, "loss": 0.457, "step": 6764 }, { "epoch": 0.8311831920383339, "grad_norm": 0.29945699580463525, "learning_rate": 2.5299666363531594e-06, "loss": 0.4239, "step": 6765 }, { "epoch": 0.831306057255191, "grad_norm": 0.3588120557543782, "learning_rate": 2.526392345497136e-06, "loss": 0.4474, "step": 6766 }, { "epoch": 0.8314289224720481, "grad_norm": 0.385447951319415, "learning_rate": 2.522820349080348e-06, "loss": 0.3831, "step": 6767 }, { "epoch": 0.8315517876889053, "grad_norm": 0.3243411486861325, "learning_rate": 2.5192506477598415e-06, "loss": 0.3347, "step": 6768 }, { "epoch": 0.8316746529057624, "grad_norm": 0.46363667033791217, "learning_rate": 2.515683242192236e-06, "loss": 0.4372, "step": 6769 }, { "epoch": 0.8317975181226195, "grad_norm": 0.2422172793224104, "learning_rate": 2.5121181330337336e-06, "loss": 0.3853, "step": 6770 }, { "epoch": 0.8319203833394766, "grad_norm": 0.34077568993269014, "learning_rate": 2.5085553209401123e-06, "loss": 0.3288, "step": 6771 }, { "epoch": 0.8320432485563337, "grad_norm": 0.3999704945004977, "learning_rate": 2.5049948065667355e-06, "loss": 0.3777, "step": 6772 }, { "epoch": 0.8321661137731908, "grad_norm": 0.4202863731853645, "learning_rate": 2.5014365905685237e-06, "loss": 0.5043, "step": 6773 }, { "epoch": 0.832288978990048, "grad_norm": 0.36162653114831456, "learning_rate": 2.497880673600002e-06, "loss": 0.3961, "step": 6774 }, { "epoch": 0.8324118442069051, "grad_norm": 0.29944338405394794, "learning_rate": 2.494327056315247e-06, "loss": 0.3921, "step": 6775 }, { "epoch": 0.8325347094237622, "grad_norm": 0.3465264945644496, "learning_rate": 2.4907757393679326e-06, "loss": 0.4356, "step": 6776 }, { "epoch": 0.8326575746406193, "grad_norm": 0.4006994061877023, "learning_rate": 2.487226723411291e-06, "loss": 0.3904, "step": 6777 }, { "epoch": 0.8327804398574763, "grad_norm": 0.44332882554606357, "learning_rate": 2.4836800090981455e-06, "loss": 0.5001, "step": 6778 }, { "epoch": 0.8329033050743334, "grad_norm": 0.3645383766020643, "learning_rate": 2.4801355970808955e-06, "loss": 0.4677, "step": 6779 }, { "epoch": 0.8330261702911905, "grad_norm": 0.34942933699320294, "learning_rate": 2.4765934880115042e-06, "loss": 0.3501, "step": 6780 }, { "epoch": 0.8331490355080476, "grad_norm": 0.347183825937191, "learning_rate": 2.4730536825415247e-06, "loss": 0.3857, "step": 6781 }, { "epoch": 0.8332719007249048, "grad_norm": 0.4041098579745886, "learning_rate": 2.4695161813220783e-06, "loss": 0.3698, "step": 6782 }, { "epoch": 0.8333947659417619, "grad_norm": 0.2864506300111, "learning_rate": 2.4659809850038724e-06, "loss": 0.3293, "step": 6783 }, { "epoch": 0.833517631158619, "grad_norm": 0.3716967066839798, "learning_rate": 2.462448094237174e-06, "loss": 0.4354, "step": 6784 }, { "epoch": 0.8336404963754761, "grad_norm": 0.278223578964738, "learning_rate": 2.458917509671839e-06, "loss": 0.3369, "step": 6785 }, { "epoch": 0.8337633615923332, "grad_norm": 0.3706894499147529, "learning_rate": 2.4553892319573012e-06, "loss": 0.3421, "step": 6786 }, { "epoch": 0.8338862268091903, "grad_norm": 0.2922465362457355, "learning_rate": 2.4518632617425563e-06, "loss": 0.4235, "step": 6787 }, { "epoch": 0.8340090920260474, "grad_norm": 0.35985607212868687, "learning_rate": 2.4483395996761903e-06, "loss": 0.4224, "step": 6788 }, { "epoch": 0.8341319572429046, "grad_norm": 0.3034133934064815, "learning_rate": 2.444818246406347e-06, "loss": 0.4558, "step": 6789 }, { "epoch": 0.8342548224597617, "grad_norm": 0.355519704479729, "learning_rate": 2.4412992025807708e-06, "loss": 0.3967, "step": 6790 }, { "epoch": 0.8343776876766188, "grad_norm": 0.4428813527121619, "learning_rate": 2.437782468846756e-06, "loss": 0.3891, "step": 6791 }, { "epoch": 0.8345005528934759, "grad_norm": 0.4125780501983096, "learning_rate": 2.4342680458511916e-06, "loss": 0.4195, "step": 6792 }, { "epoch": 0.8346234181103329, "grad_norm": 0.38229979948614745, "learning_rate": 2.4307559342405227e-06, "loss": 0.3332, "step": 6793 }, { "epoch": 0.83474628332719, "grad_norm": 0.36669711632035507, "learning_rate": 2.4272461346607904e-06, "loss": 0.3709, "step": 6794 }, { "epoch": 0.8348691485440471, "grad_norm": 0.40273294438632634, "learning_rate": 2.4237386477575917e-06, "loss": 0.3759, "step": 6795 }, { "epoch": 0.8349920137609043, "grad_norm": 0.345784802992101, "learning_rate": 2.420233474176109e-06, "loss": 0.3885, "step": 6796 }, { "epoch": 0.8351148789777614, "grad_norm": 0.34335620749509976, "learning_rate": 2.4167306145610996e-06, "loss": 0.4339, "step": 6797 }, { "epoch": 0.8352377441946185, "grad_norm": 0.3596990144130861, "learning_rate": 2.413230069556885e-06, "loss": 0.4588, "step": 6798 }, { "epoch": 0.8353606094114756, "grad_norm": 0.38539981427311676, "learning_rate": 2.409731839807375e-06, "loss": 0.4878, "step": 6799 }, { "epoch": 0.8354834746283327, "grad_norm": 0.3105241827343518, "learning_rate": 2.4062359259560348e-06, "loss": 0.3418, "step": 6800 }, { "epoch": 0.8356063398451898, "grad_norm": 0.39881927242304066, "learning_rate": 2.4027423286459284e-06, "loss": 0.3866, "step": 6801 }, { "epoch": 0.835729205062047, "grad_norm": 0.4352787578683159, "learning_rate": 2.3992510485196716e-06, "loss": 0.4591, "step": 6802 }, { "epoch": 0.8358520702789041, "grad_norm": 0.3511991683803719, "learning_rate": 2.3957620862194695e-06, "loss": 0.3452, "step": 6803 }, { "epoch": 0.8359749354957612, "grad_norm": 0.3246581246021114, "learning_rate": 2.392275442387087e-06, "loss": 0.3568, "step": 6804 }, { "epoch": 0.8360978007126183, "grad_norm": 0.3013948524242616, "learning_rate": 2.3887911176638737e-06, "loss": 0.3872, "step": 6805 }, { "epoch": 0.8362206659294754, "grad_norm": 0.30931804789469886, "learning_rate": 2.3853091126907493e-06, "loss": 0.3105, "step": 6806 }, { "epoch": 0.8363435311463324, "grad_norm": 0.3886774409825427, "learning_rate": 2.381829428108203e-06, "loss": 0.3039, "step": 6807 }, { "epoch": 0.8364663963631895, "grad_norm": 0.3672356753467134, "learning_rate": 2.3783520645562996e-06, "loss": 0.384, "step": 6808 }, { "epoch": 0.8365892615800467, "grad_norm": 0.32228493535245273, "learning_rate": 2.374877022674682e-06, "loss": 0.3616, "step": 6809 }, { "epoch": 0.8367121267969038, "grad_norm": 0.354869947628417, "learning_rate": 2.3714043031025608e-06, "loss": 0.4404, "step": 6810 }, { "epoch": 0.8368349920137609, "grad_norm": 0.34872010845236673, "learning_rate": 2.3679339064787165e-06, "loss": 0.3669, "step": 6811 }, { "epoch": 0.836957857230618, "grad_norm": 0.2906659279824399, "learning_rate": 2.364465833441507e-06, "loss": 0.3945, "step": 6812 }, { "epoch": 0.8370807224474751, "grad_norm": 0.4051046157145019, "learning_rate": 2.3610000846288637e-06, "loss": 0.3596, "step": 6813 }, { "epoch": 0.8372035876643322, "grad_norm": 0.31592655727291197, "learning_rate": 2.3575366606782916e-06, "loss": 0.395, "step": 6814 }, { "epoch": 0.8373264528811893, "grad_norm": 0.3430738320112372, "learning_rate": 2.3540755622268597e-06, "loss": 0.3877, "step": 6815 }, { "epoch": 0.8374493180980465, "grad_norm": 0.3322679534353692, "learning_rate": 2.3506167899112146e-06, "loss": 0.3282, "step": 6816 }, { "epoch": 0.8375721833149036, "grad_norm": 0.40714332387986074, "learning_rate": 2.34716034436758e-06, "loss": 0.3645, "step": 6817 }, { "epoch": 0.8376950485317607, "grad_norm": 0.42517254462418086, "learning_rate": 2.3437062262317398e-06, "loss": 0.4084, "step": 6818 }, { "epoch": 0.8378179137486178, "grad_norm": 0.4010637941756678, "learning_rate": 2.3402544361390614e-06, "loss": 0.4325, "step": 6819 }, { "epoch": 0.8379407789654749, "grad_norm": 0.3784147828042778, "learning_rate": 2.3368049747244786e-06, "loss": 0.4459, "step": 6820 }, { "epoch": 0.838063644182332, "grad_norm": 0.29667064728424747, "learning_rate": 2.3333578426225e-06, "loss": 0.4691, "step": 6821 }, { "epoch": 0.838186509399189, "grad_norm": 0.34407052359514867, "learning_rate": 2.329913040467195e-06, "loss": 0.338, "step": 6822 }, { "epoch": 0.8383093746160462, "grad_norm": 0.30794968887476903, "learning_rate": 2.326470568892221e-06, "loss": 0.4352, "step": 6823 }, { "epoch": 0.8384322398329033, "grad_norm": 0.37043743358718667, "learning_rate": 2.3230304285307956e-06, "loss": 0.4051, "step": 6824 }, { "epoch": 0.8385551050497604, "grad_norm": 0.4070668240331556, "learning_rate": 2.319592620015708e-06, "loss": 0.3886, "step": 6825 }, { "epoch": 0.8386779702666175, "grad_norm": 0.3789612421995289, "learning_rate": 2.3161571439793255e-06, "loss": 0.331, "step": 6826 }, { "epoch": 0.8388008354834746, "grad_norm": 0.3665375648415973, "learning_rate": 2.3127240010535728e-06, "loss": 0.4889, "step": 6827 }, { "epoch": 0.8389237007003317, "grad_norm": 0.3581070750312794, "learning_rate": 2.309293191869966e-06, "loss": 0.3682, "step": 6828 }, { "epoch": 0.8390465659171888, "grad_norm": 0.3453179111211136, "learning_rate": 2.305864717059571e-06, "loss": 0.4304, "step": 6829 }, { "epoch": 0.839169431134046, "grad_norm": 0.3774513689998645, "learning_rate": 2.3024385772530408e-06, "loss": 0.3472, "step": 6830 }, { "epoch": 0.8392922963509031, "grad_norm": 0.5191248314054099, "learning_rate": 2.2990147730805855e-06, "loss": 0.3564, "step": 6831 }, { "epoch": 0.8394151615677602, "grad_norm": 0.43928643590897054, "learning_rate": 2.2955933051719924e-06, "loss": 0.4137, "step": 6832 }, { "epoch": 0.8395380267846173, "grad_norm": 0.3801760943895438, "learning_rate": 2.292174174156623e-06, "loss": 0.4376, "step": 6833 }, { "epoch": 0.8396608920014744, "grad_norm": 0.3237140322790966, "learning_rate": 2.2887573806633983e-06, "loss": 0.3294, "step": 6834 }, { "epoch": 0.8397837572183315, "grad_norm": 0.41921920690539827, "learning_rate": 2.285342925320818e-06, "loss": 0.3907, "step": 6835 }, { "epoch": 0.8399066224351885, "grad_norm": 0.3060566185660119, "learning_rate": 2.2819308087569502e-06, "loss": 0.4225, "step": 6836 }, { "epoch": 0.8400294876520457, "grad_norm": 0.4469524341954293, "learning_rate": 2.2785210315994325e-06, "loss": 0.4418, "step": 6837 }, { "epoch": 0.8401523528689028, "grad_norm": 0.37377125730632565, "learning_rate": 2.2751135944754637e-06, "loss": 0.4091, "step": 6838 }, { "epoch": 0.8402752180857599, "grad_norm": 0.2975318399969454, "learning_rate": 2.2717084980118304e-06, "loss": 0.3755, "step": 6839 }, { "epoch": 0.840398083302617, "grad_norm": 0.30021826754231, "learning_rate": 2.2683057428348715e-06, "loss": 0.419, "step": 6840 }, { "epoch": 0.8405209485194741, "grad_norm": 0.3560281796516607, "learning_rate": 2.264905329570506e-06, "loss": 0.414, "step": 6841 }, { "epoch": 0.8406438137363312, "grad_norm": 0.3564862279596161, "learning_rate": 2.2615072588442116e-06, "loss": 0.4293, "step": 6842 }, { "epoch": 0.8407666789531884, "grad_norm": 0.3044229289143514, "learning_rate": 2.258111531281045e-06, "loss": 0.3084, "step": 6843 }, { "epoch": 0.8408895441700455, "grad_norm": 0.3323505436178027, "learning_rate": 2.2547181475056313e-06, "loss": 0.371, "step": 6844 }, { "epoch": 0.8410124093869026, "grad_norm": 0.36814873203845916, "learning_rate": 2.251327108142155e-06, "loss": 0.3754, "step": 6845 }, { "epoch": 0.8411352746037597, "grad_norm": 0.33733839683897354, "learning_rate": 2.2479384138143794e-06, "loss": 0.3743, "step": 6846 }, { "epoch": 0.8412581398206168, "grad_norm": 0.3146140156676846, "learning_rate": 2.2445520651456326e-06, "loss": 0.3331, "step": 6847 }, { "epoch": 0.8413810050374739, "grad_norm": 0.42657917925939143, "learning_rate": 2.2411680627588143e-06, "loss": 0.4911, "step": 6848 }, { "epoch": 0.841503870254331, "grad_norm": 0.3409815010532817, "learning_rate": 2.237786407276384e-06, "loss": 0.3603, "step": 6849 }, { "epoch": 0.8416267354711882, "grad_norm": 0.3252009894357112, "learning_rate": 2.234407099320378e-06, "loss": 0.3432, "step": 6850 }, { "epoch": 0.8417496006880452, "grad_norm": 0.32160073102235964, "learning_rate": 2.2310301395124016e-06, "loss": 0.4315, "step": 6851 }, { "epoch": 0.8418724659049023, "grad_norm": 0.2950879427249362, "learning_rate": 2.227655528473618e-06, "loss": 0.4171, "step": 6852 }, { "epoch": 0.8419953311217594, "grad_norm": 0.3469449493639603, "learning_rate": 2.224283266824773e-06, "loss": 0.4107, "step": 6853 }, { "epoch": 0.8421181963386165, "grad_norm": 0.32326504606069534, "learning_rate": 2.22091335518616e-06, "loss": 0.3799, "step": 6854 }, { "epoch": 0.8422410615554736, "grad_norm": 0.345091135079923, "learning_rate": 2.2175457941776654e-06, "loss": 0.3801, "step": 6855 }, { "epoch": 0.8423639267723307, "grad_norm": 0.34834183154869697, "learning_rate": 2.214180584418723e-06, "loss": 0.4007, "step": 6856 }, { "epoch": 0.8424867919891879, "grad_norm": 0.314755836478681, "learning_rate": 2.2108177265283468e-06, "loss": 0.3979, "step": 6857 }, { "epoch": 0.842609657206045, "grad_norm": 0.3900561504821315, "learning_rate": 2.207457221125101e-06, "loss": 0.3915, "step": 6858 }, { "epoch": 0.8427325224229021, "grad_norm": 0.3322552852904332, "learning_rate": 2.204099068827144e-06, "loss": 0.3748, "step": 6859 }, { "epoch": 0.8428553876397592, "grad_norm": 0.3494569585676421, "learning_rate": 2.200743270252177e-06, "loss": 0.4481, "step": 6860 }, { "epoch": 0.8429782528566163, "grad_norm": 0.3385346902968001, "learning_rate": 2.1973898260174773e-06, "loss": 0.3911, "step": 6861 }, { "epoch": 0.8431011180734734, "grad_norm": 0.39562238385295023, "learning_rate": 2.1940387367398956e-06, "loss": 0.4526, "step": 6862 }, { "epoch": 0.8432239832903305, "grad_norm": 0.3107806233939655, "learning_rate": 2.1906900030358353e-06, "loss": 0.2843, "step": 6863 }, { "epoch": 0.8433468485071877, "grad_norm": 0.3472193459731936, "learning_rate": 2.1873436255212814e-06, "loss": 0.405, "step": 6864 }, { "epoch": 0.8434697137240448, "grad_norm": 0.3265168536835169, "learning_rate": 2.183999604811767e-06, "loss": 0.4506, "step": 6865 }, { "epoch": 0.8435925789409018, "grad_norm": 0.3245811130551594, "learning_rate": 2.1806579415224172e-06, "loss": 0.3892, "step": 6866 }, { "epoch": 0.8437154441577589, "grad_norm": 0.32651446696958697, "learning_rate": 2.1773186362678993e-06, "loss": 0.4012, "step": 6867 }, { "epoch": 0.843838309374616, "grad_norm": 0.36099232798368763, "learning_rate": 2.1739816896624643e-06, "loss": 0.4664, "step": 6868 }, { "epoch": 0.8439611745914731, "grad_norm": 0.35674987171491357, "learning_rate": 2.170647102319914e-06, "loss": 0.3694, "step": 6869 }, { "epoch": 0.8440840398083302, "grad_norm": 0.30586619574348334, "learning_rate": 2.1673148748536287e-06, "loss": 0.3883, "step": 6870 }, { "epoch": 0.8442069050251874, "grad_norm": 0.3451811285146625, "learning_rate": 2.1639850078765523e-06, "loss": 0.4049, "step": 6871 }, { "epoch": 0.8443297702420445, "grad_norm": 0.4067572328597112, "learning_rate": 2.1606575020011864e-06, "loss": 0.3827, "step": 6872 }, { "epoch": 0.8444526354589016, "grad_norm": 0.5935761607812229, "learning_rate": 2.157332357839607e-06, "loss": 0.3529, "step": 6873 }, { "epoch": 0.8445755006757587, "grad_norm": 0.4051479297413583, "learning_rate": 2.1540095760034513e-06, "loss": 0.3825, "step": 6874 }, { "epoch": 0.8446983658926158, "grad_norm": 0.4304070981125932, "learning_rate": 2.15068915710393e-06, "loss": 0.3671, "step": 6875 }, { "epoch": 0.8448212311094729, "grad_norm": 0.31523859186046466, "learning_rate": 2.1473711017518032e-06, "loss": 0.3626, "step": 6876 }, { "epoch": 0.84494409632633, "grad_norm": 0.30459067400487433, "learning_rate": 2.1440554105574097e-06, "loss": 0.3946, "step": 6877 }, { "epoch": 0.8450669615431872, "grad_norm": 0.3207076017322854, "learning_rate": 2.140742084130649e-06, "loss": 0.379, "step": 6878 }, { "epoch": 0.8451898267600443, "grad_norm": 0.3510446083767093, "learning_rate": 2.137431123080991e-06, "loss": 0.3496, "step": 6879 }, { "epoch": 0.8453126919769013, "grad_norm": 0.3323708190582962, "learning_rate": 2.1341225280174586e-06, "loss": 0.3419, "step": 6880 }, { "epoch": 0.8454355571937584, "grad_norm": 0.341814719980254, "learning_rate": 2.13081629954865e-06, "loss": 0.3267, "step": 6881 }, { "epoch": 0.8455584224106155, "grad_norm": 0.509993809235114, "learning_rate": 2.1275124382827243e-06, "loss": 0.4262, "step": 6882 }, { "epoch": 0.8456812876274726, "grad_norm": 0.3574422310945019, "learning_rate": 2.1242109448274015e-06, "loss": 0.3919, "step": 6883 }, { "epoch": 0.8458041528443297, "grad_norm": 0.3555192873762335, "learning_rate": 2.120911819789974e-06, "loss": 0.3511, "step": 6884 }, { "epoch": 0.8459270180611869, "grad_norm": 0.3986518996795717, "learning_rate": 2.117615063777293e-06, "loss": 0.4109, "step": 6885 }, { "epoch": 0.846049883278044, "grad_norm": 0.37617290386129315, "learning_rate": 2.1143206773957797e-06, "loss": 0.3815, "step": 6886 }, { "epoch": 0.8461727484949011, "grad_norm": 0.29597948079866854, "learning_rate": 2.1110286612514077e-06, "loss": 0.3232, "step": 6887 }, { "epoch": 0.8462956137117582, "grad_norm": 0.3515328871184669, "learning_rate": 2.107739015949725e-06, "loss": 0.469, "step": 6888 }, { "epoch": 0.8464184789286153, "grad_norm": 0.40570074671446316, "learning_rate": 2.104451742095845e-06, "loss": 0.3826, "step": 6889 }, { "epoch": 0.8465413441454724, "grad_norm": 0.3310023499247555, "learning_rate": 2.101166840294433e-06, "loss": 0.3749, "step": 6890 }, { "epoch": 0.8466642093623296, "grad_norm": 0.341983769407262, "learning_rate": 2.0978843111497324e-06, "loss": 0.423, "step": 6891 }, { "epoch": 0.8467870745791867, "grad_norm": 0.3268619068961125, "learning_rate": 2.0946041552655314e-06, "loss": 0.434, "step": 6892 }, { "epoch": 0.8469099397960438, "grad_norm": 0.4021405691988958, "learning_rate": 2.0913263732452093e-06, "loss": 0.3969, "step": 6893 }, { "epoch": 0.8470328050129009, "grad_norm": 0.4026072183549643, "learning_rate": 2.0880509656916836e-06, "loss": 0.4383, "step": 6894 }, { "epoch": 0.8471556702297579, "grad_norm": 0.421421998659253, "learning_rate": 2.0847779332074475e-06, "loss": 0.4204, "step": 6895 }, { "epoch": 0.847278535446615, "grad_norm": 0.34629119450722085, "learning_rate": 2.081507276394544e-06, "loss": 0.3766, "step": 6896 }, { "epoch": 0.8474014006634721, "grad_norm": 0.401462123481819, "learning_rate": 2.078238995854608e-06, "loss": 0.4271, "step": 6897 }, { "epoch": 0.8475242658803293, "grad_norm": 0.33386858547689274, "learning_rate": 2.0749730921888022e-06, "loss": 0.3629, "step": 6898 }, { "epoch": 0.8476471310971864, "grad_norm": 0.33896368912698915, "learning_rate": 2.0717095659978784e-06, "loss": 0.3473, "step": 6899 }, { "epoch": 0.8477699963140435, "grad_norm": 0.32437468658839796, "learning_rate": 2.0684484178821333e-06, "loss": 0.4514, "step": 6900 }, { "epoch": 0.8478928615309006, "grad_norm": 0.3620016360969305, "learning_rate": 2.0651896484414383e-06, "loss": 0.4053, "step": 6901 }, { "epoch": 0.8480157267477577, "grad_norm": 0.4114377273679642, "learning_rate": 2.061933258275226e-06, "loss": 0.3882, "step": 6902 }, { "epoch": 0.8481385919646148, "grad_norm": 0.3054975438199365, "learning_rate": 2.0586792479824766e-06, "loss": 0.3883, "step": 6903 }, { "epoch": 0.848261457181472, "grad_norm": 0.3107215169227951, "learning_rate": 2.0554276181617603e-06, "loss": 0.3335, "step": 6904 }, { "epoch": 0.8483843223983291, "grad_norm": 0.3567756826022422, "learning_rate": 2.05217836941118e-06, "loss": 0.3669, "step": 6905 }, { "epoch": 0.8485071876151862, "grad_norm": 0.3689529830352406, "learning_rate": 2.0489315023284244e-06, "loss": 0.3722, "step": 6906 }, { "epoch": 0.8486300528320433, "grad_norm": 0.32534235697928543, "learning_rate": 2.045687017510724e-06, "loss": 0.4227, "step": 6907 }, { "epoch": 0.8487529180489004, "grad_norm": 0.47208835940346616, "learning_rate": 2.0424449155548846e-06, "loss": 0.3941, "step": 6908 }, { "epoch": 0.8488757832657574, "grad_norm": 0.29265731202261525, "learning_rate": 2.039205197057273e-06, "loss": 0.4062, "step": 6909 }, { "epoch": 0.8489986484826145, "grad_norm": 0.37168669381317565, "learning_rate": 2.0359678626138102e-06, "loss": 0.4285, "step": 6910 }, { "epoch": 0.8491215136994716, "grad_norm": 0.3447087312951963, "learning_rate": 2.0327329128199834e-06, "loss": 0.379, "step": 6911 }, { "epoch": 0.8492443789163288, "grad_norm": 0.33805481750558236, "learning_rate": 2.029500348270842e-06, "loss": 0.387, "step": 6912 }, { "epoch": 0.8493672441331859, "grad_norm": 0.30720483480160227, "learning_rate": 2.026270169560998e-06, "loss": 0.3968, "step": 6913 }, { "epoch": 0.849490109350043, "grad_norm": 0.3812461727036577, "learning_rate": 2.023042377284615e-06, "loss": 0.3473, "step": 6914 }, { "epoch": 0.8496129745669001, "grad_norm": 0.6707252867880555, "learning_rate": 2.0198169720354283e-06, "loss": 0.576, "step": 6915 }, { "epoch": 0.8497358397837572, "grad_norm": 0.36716880884125136, "learning_rate": 2.0165939544067306e-06, "loss": 0.4918, "step": 6916 }, { "epoch": 0.8498587050006143, "grad_norm": 0.3247634294769103, "learning_rate": 2.013373324991377e-06, "loss": 0.3558, "step": 6917 }, { "epoch": 0.8499815702174714, "grad_norm": 0.3478485805100272, "learning_rate": 2.0101550843817768e-06, "loss": 0.384, "step": 6918 }, { "epoch": 0.8501044354343286, "grad_norm": 0.3021863349179689, "learning_rate": 2.0069392331699077e-06, "loss": 0.3974, "step": 6919 }, { "epoch": 0.8502273006511857, "grad_norm": 0.33270773372638135, "learning_rate": 2.003725771947305e-06, "loss": 0.397, "step": 6920 }, { "epoch": 0.8503501658680428, "grad_norm": 0.29648514282400745, "learning_rate": 2.0005147013050594e-06, "loss": 0.4135, "step": 6921 }, { "epoch": 0.8504730310848999, "grad_norm": 0.3720295166917452, "learning_rate": 1.997306021833832e-06, "loss": 0.3331, "step": 6922 }, { "epoch": 0.850595896301757, "grad_norm": 0.32364635791643875, "learning_rate": 1.9940997341238347e-06, "loss": 0.4482, "step": 6923 }, { "epoch": 0.850718761518614, "grad_norm": 0.3651534189913219, "learning_rate": 1.9908958387648485e-06, "loss": 0.4774, "step": 6924 }, { "epoch": 0.8508416267354711, "grad_norm": 0.3915129865285574, "learning_rate": 1.987694336346203e-06, "loss": 0.3613, "step": 6925 }, { "epoch": 0.8509644919523283, "grad_norm": 0.375355910537672, "learning_rate": 1.9844952274567955e-06, "loss": 0.2844, "step": 6926 }, { "epoch": 0.8510873571691854, "grad_norm": 0.5759947758655447, "learning_rate": 1.9812985126850875e-06, "loss": 0.4679, "step": 6927 }, { "epoch": 0.8512102223860425, "grad_norm": 0.39812032085474147, "learning_rate": 1.9781041926190847e-06, "loss": 0.3926, "step": 6928 }, { "epoch": 0.8513330876028996, "grad_norm": 0.3346433938737865, "learning_rate": 1.974912267846369e-06, "loss": 0.3319, "step": 6929 }, { "epoch": 0.8514559528197567, "grad_norm": 0.2933194777972718, "learning_rate": 1.971722738954064e-06, "loss": 0.3767, "step": 6930 }, { "epoch": 0.8515788180366138, "grad_norm": 0.34305584914245274, "learning_rate": 1.968535606528877e-06, "loss": 0.3558, "step": 6931 }, { "epoch": 0.851701683253471, "grad_norm": 0.5206391273484554, "learning_rate": 1.965350871157049e-06, "loss": 0.4817, "step": 6932 }, { "epoch": 0.8518245484703281, "grad_norm": 0.3664180690886453, "learning_rate": 1.9621685334243984e-06, "loss": 0.3317, "step": 6933 }, { "epoch": 0.8519474136871852, "grad_norm": 0.3508028352483101, "learning_rate": 1.9589885939162917e-06, "loss": 0.3406, "step": 6934 }, { "epoch": 0.8520702789040423, "grad_norm": 0.3214742632842741, "learning_rate": 1.9558110532176576e-06, "loss": 0.4587, "step": 6935 }, { "epoch": 0.8521931441208994, "grad_norm": 0.4011167073839311, "learning_rate": 1.9526359119129856e-06, "loss": 0.4293, "step": 6936 }, { "epoch": 0.8523160093377565, "grad_norm": 0.38795018853631136, "learning_rate": 1.9494631705863265e-06, "loss": 0.3783, "step": 6937 }, { "epoch": 0.8524388745546135, "grad_norm": 0.3053433869865936, "learning_rate": 1.9462928298212785e-06, "loss": 0.3596, "step": 6938 }, { "epoch": 0.8525617397714707, "grad_norm": 0.27924386187714845, "learning_rate": 1.943124890201007e-06, "loss": 0.3997, "step": 6939 }, { "epoch": 0.8526846049883278, "grad_norm": 0.32641878965930116, "learning_rate": 1.9399593523082387e-06, "loss": 0.3645, "step": 6940 }, { "epoch": 0.8528074702051849, "grad_norm": 0.3827516152323891, "learning_rate": 1.9367962167252483e-06, "loss": 0.3479, "step": 6941 }, { "epoch": 0.852930335422042, "grad_norm": 0.35781151170448006, "learning_rate": 1.9336354840338737e-06, "loss": 0.2911, "step": 6942 }, { "epoch": 0.8530532006388991, "grad_norm": 0.36062371977599883, "learning_rate": 1.9304771548155148e-06, "loss": 0.3459, "step": 6943 }, { "epoch": 0.8531760658557562, "grad_norm": 0.39317511517268583, "learning_rate": 1.927321229651128e-06, "loss": 0.4697, "step": 6944 }, { "epoch": 0.8532989310726133, "grad_norm": 0.2966178657269292, "learning_rate": 1.9241677091212183e-06, "loss": 0.3615, "step": 6945 }, { "epoch": 0.8534217962894705, "grad_norm": 0.2822327463498447, "learning_rate": 1.9210165938058594e-06, "loss": 0.397, "step": 6946 }, { "epoch": 0.8535446615063276, "grad_norm": 0.4034156234900619, "learning_rate": 1.917867884284679e-06, "loss": 0.3633, "step": 6947 }, { "epoch": 0.8536675267231847, "grad_norm": 0.3804778102875855, "learning_rate": 1.9147215811368597e-06, "loss": 0.3492, "step": 6948 }, { "epoch": 0.8537903919400418, "grad_norm": 0.30327288463504404, "learning_rate": 1.9115776849411425e-06, "loss": 0.3564, "step": 6949 }, { "epoch": 0.8539132571568989, "grad_norm": 0.36728886877346184, "learning_rate": 1.9084361962758306e-06, "loss": 0.4019, "step": 6950 }, { "epoch": 0.854036122373756, "grad_norm": 0.34558181698125945, "learning_rate": 1.9052971157187816e-06, "loss": 0.3966, "step": 6951 }, { "epoch": 0.8541589875906132, "grad_norm": 0.3480134414795881, "learning_rate": 1.9021604438474016e-06, "loss": 0.4127, "step": 6952 }, { "epoch": 0.8542818528074702, "grad_norm": 0.36997349430741716, "learning_rate": 1.899026181238666e-06, "loss": 0.3688, "step": 6953 }, { "epoch": 0.8544047180243273, "grad_norm": 0.33141214626794374, "learning_rate": 1.8958943284691056e-06, "loss": 0.3587, "step": 6954 }, { "epoch": 0.8545275832411844, "grad_norm": 0.3710287250128465, "learning_rate": 1.8927648861147956e-06, "loss": 0.389, "step": 6955 }, { "epoch": 0.8546504484580415, "grad_norm": 0.41714316737241375, "learning_rate": 1.889637854751386e-06, "loss": 0.3642, "step": 6956 }, { "epoch": 0.8547733136748986, "grad_norm": 0.34190629604030415, "learning_rate": 1.8865132349540615e-06, "loss": 0.3753, "step": 6957 }, { "epoch": 0.8548961788917557, "grad_norm": 0.32931121783838596, "learning_rate": 1.8833910272975906e-06, "loss": 0.3325, "step": 6958 }, { "epoch": 0.8550190441086128, "grad_norm": 0.3373616106049502, "learning_rate": 1.8802712323562742e-06, "loss": 0.3931, "step": 6959 }, { "epoch": 0.85514190932547, "grad_norm": 0.3422211488743556, "learning_rate": 1.8771538507039815e-06, "loss": 0.3731, "step": 6960 }, { "epoch": 0.8552647745423271, "grad_norm": 0.41500063028749234, "learning_rate": 1.8740388829141285e-06, "loss": 0.315, "step": 6961 }, { "epoch": 0.8553876397591842, "grad_norm": 0.3177758172659432, "learning_rate": 1.8709263295597023e-06, "loss": 0.3329, "step": 6962 }, { "epoch": 0.8555105049760413, "grad_norm": 0.3666345546637831, "learning_rate": 1.8678161912132313e-06, "loss": 0.4451, "step": 6963 }, { "epoch": 0.8556333701928984, "grad_norm": 0.3823563889325841, "learning_rate": 1.8647084684468096e-06, "loss": 0.36, "step": 6964 }, { "epoch": 0.8557562354097555, "grad_norm": 0.36764364602582694, "learning_rate": 1.8616031618320767e-06, "loss": 0.3476, "step": 6965 }, { "epoch": 0.8558791006266127, "grad_norm": 0.383803181179143, "learning_rate": 1.8585002719402372e-06, "loss": 0.3347, "step": 6966 }, { "epoch": 0.8560019658434697, "grad_norm": 0.4516007601989717, "learning_rate": 1.8553997993420495e-06, "loss": 0.3967, "step": 6967 }, { "epoch": 0.8561248310603268, "grad_norm": 0.4012233234932706, "learning_rate": 1.852301744607816e-06, "loss": 0.4403, "step": 6968 }, { "epoch": 0.8562476962771839, "grad_norm": 0.3386868058252444, "learning_rate": 1.8492061083074174e-06, "loss": 0.3873, "step": 6969 }, { "epoch": 0.856370561494041, "grad_norm": 0.32272174931164277, "learning_rate": 1.8461128910102665e-06, "loss": 0.3325, "step": 6970 }, { "epoch": 0.8564934267108981, "grad_norm": 0.34068323710176424, "learning_rate": 1.8430220932853465e-06, "loss": 0.4326, "step": 6971 }, { "epoch": 0.8566162919277552, "grad_norm": 0.303482145338898, "learning_rate": 1.8399337157011842e-06, "loss": 0.3132, "step": 6972 }, { "epoch": 0.8567391571446124, "grad_norm": 0.339131231538177, "learning_rate": 1.836847758825867e-06, "loss": 0.4063, "step": 6973 }, { "epoch": 0.8568620223614695, "grad_norm": 0.357976530488145, "learning_rate": 1.8337642232270424e-06, "loss": 0.4362, "step": 6974 }, { "epoch": 0.8569848875783266, "grad_norm": 0.38883934378078877, "learning_rate": 1.8306831094719002e-06, "loss": 0.3317, "step": 6975 }, { "epoch": 0.8571077527951837, "grad_norm": 0.3684756921004521, "learning_rate": 1.8276044181271935e-06, "loss": 0.3896, "step": 6976 }, { "epoch": 0.8572306180120408, "grad_norm": 0.36346710693826695, "learning_rate": 1.8245281497592293e-06, "loss": 0.491, "step": 6977 }, { "epoch": 0.8573534832288979, "grad_norm": 0.3966571012489606, "learning_rate": 1.8214543049338683e-06, "loss": 0.349, "step": 6978 }, { "epoch": 0.857476348445755, "grad_norm": 0.4239122092979377, "learning_rate": 1.8183828842165183e-06, "loss": 0.4374, "step": 6979 }, { "epoch": 0.8575992136626122, "grad_norm": 0.4024590193866183, "learning_rate": 1.815313888172151e-06, "loss": 0.3194, "step": 6980 }, { "epoch": 0.8577220788794693, "grad_norm": 0.2875812076809387, "learning_rate": 1.8122473173652893e-06, "loss": 0.3959, "step": 6981 }, { "epoch": 0.8578449440963263, "grad_norm": 0.3542138701442976, "learning_rate": 1.8091831723600105e-06, "loss": 0.3277, "step": 6982 }, { "epoch": 0.8579678093131834, "grad_norm": 0.30151282126018875, "learning_rate": 1.8061214537199388e-06, "loss": 0.405, "step": 6983 }, { "epoch": 0.8580906745300405, "grad_norm": 0.3229302755616819, "learning_rate": 1.8030621620082604e-06, "loss": 0.3517, "step": 6984 }, { "epoch": 0.8582135397468976, "grad_norm": 0.33225665828205675, "learning_rate": 1.8000052977877152e-06, "loss": 0.4012, "step": 6985 }, { "epoch": 0.8583364049637547, "grad_norm": 0.3795851333217075, "learning_rate": 1.7969508616205866e-06, "loss": 0.3486, "step": 6986 }, { "epoch": 0.8584592701806119, "grad_norm": 0.34130711555305127, "learning_rate": 1.7938988540687233e-06, "loss": 0.3881, "step": 6987 }, { "epoch": 0.858582135397469, "grad_norm": 0.3597421501682232, "learning_rate": 1.7908492756935203e-06, "loss": 0.3609, "step": 6988 }, { "epoch": 0.8587050006143261, "grad_norm": 0.34531949233535425, "learning_rate": 1.787802127055933e-06, "loss": 0.4084, "step": 6989 }, { "epoch": 0.8588278658311832, "grad_norm": 0.36455379567992613, "learning_rate": 1.784757408716457e-06, "loss": 0.3735, "step": 6990 }, { "epoch": 0.8589507310480403, "grad_norm": 0.3479527598699353, "learning_rate": 1.7817151212351507e-06, "loss": 0.3899, "step": 6991 }, { "epoch": 0.8590735962648974, "grad_norm": 0.2981322713171015, "learning_rate": 1.7786752651716281e-06, "loss": 0.3683, "step": 6992 }, { "epoch": 0.8591964614817545, "grad_norm": 0.2601216429996111, "learning_rate": 1.7756378410850437e-06, "loss": 0.3968, "step": 6993 }, { "epoch": 0.8593193266986117, "grad_norm": 0.35092433560943276, "learning_rate": 1.772602849534119e-06, "loss": 0.3767, "step": 6994 }, { "epoch": 0.8594421919154688, "grad_norm": 0.3463654373581264, "learning_rate": 1.7695702910771106e-06, "loss": 0.3641, "step": 6995 }, { "epoch": 0.8595650571323259, "grad_norm": 0.31563623083154785, "learning_rate": 1.7665401662718522e-06, "loss": 0.3256, "step": 6996 }, { "epoch": 0.8596879223491829, "grad_norm": 0.36920491566100766, "learning_rate": 1.7635124756757031e-06, "loss": 0.4096, "step": 6997 }, { "epoch": 0.85981078756604, "grad_norm": 0.4232795582259984, "learning_rate": 1.760487219845598e-06, "loss": 0.4649, "step": 6998 }, { "epoch": 0.8599336527828971, "grad_norm": 0.41271658186178367, "learning_rate": 1.7574643993379996e-06, "loss": 0.3504, "step": 6999 }, { "epoch": 0.8600565179997542, "grad_norm": 0.31995030873788793, "learning_rate": 1.7544440147089518e-06, "loss": 0.3755, "step": 7000 }, { "epoch": 0.8601793832166114, "grad_norm": 0.46053858087758937, "learning_rate": 1.751426066514022e-06, "loss": 0.4788, "step": 7001 }, { "epoch": 0.8603022484334685, "grad_norm": 0.43217175399513685, "learning_rate": 1.7484105553083523e-06, "loss": 0.3765, "step": 7002 }, { "epoch": 0.8604251136503256, "grad_norm": 0.31263564785904957, "learning_rate": 1.7453974816466162e-06, "loss": 0.4036, "step": 7003 }, { "epoch": 0.8605479788671827, "grad_norm": 0.31123455297143837, "learning_rate": 1.7423868460830566e-06, "loss": 0.3368, "step": 7004 }, { "epoch": 0.8606708440840398, "grad_norm": 0.4215367353908664, "learning_rate": 1.7393786491714591e-06, "loss": 0.3492, "step": 7005 }, { "epoch": 0.8607937093008969, "grad_norm": 0.3526422667215128, "learning_rate": 1.7363728914651594e-06, "loss": 0.4018, "step": 7006 }, { "epoch": 0.860916574517754, "grad_norm": 0.36517223353564904, "learning_rate": 1.7333695735170468e-06, "loss": 0.48, "step": 7007 }, { "epoch": 0.8610394397346112, "grad_norm": 0.4727889153759781, "learning_rate": 1.7303686958795662e-06, "loss": 0.3803, "step": 7008 }, { "epoch": 0.8611623049514683, "grad_norm": 0.31103302690746515, "learning_rate": 1.7273702591047091e-06, "loss": 0.3016, "step": 7009 }, { "epoch": 0.8612851701683254, "grad_norm": 0.3556209092590524, "learning_rate": 1.7243742637440129e-06, "loss": 0.36, "step": 7010 }, { "epoch": 0.8614080353851824, "grad_norm": 0.3558339649139132, "learning_rate": 1.7213807103485768e-06, "loss": 0.444, "step": 7011 }, { "epoch": 0.8615309006020395, "grad_norm": 0.3824026106663141, "learning_rate": 1.7183895994690468e-06, "loss": 0.4071, "step": 7012 }, { "epoch": 0.8616537658188966, "grad_norm": 0.3989170993123379, "learning_rate": 1.715400931655613e-06, "loss": 0.3852, "step": 7013 }, { "epoch": 0.8617766310357537, "grad_norm": 0.5344198363875882, "learning_rate": 1.7124147074580254e-06, "loss": 0.4645, "step": 7014 }, { "epoch": 0.8618994962526109, "grad_norm": 0.35835725084307996, "learning_rate": 1.7094309274255764e-06, "loss": 0.4205, "step": 7015 }, { "epoch": 0.862022361469468, "grad_norm": 0.3247863166268439, "learning_rate": 1.7064495921071221e-06, "loss": 0.4422, "step": 7016 }, { "epoch": 0.8621452266863251, "grad_norm": 0.3347087101541969, "learning_rate": 1.7034707020510487e-06, "loss": 0.3348, "step": 7017 }, { "epoch": 0.8622680919031822, "grad_norm": 0.302843630057397, "learning_rate": 1.7004942578053078e-06, "loss": 0.4003, "step": 7018 }, { "epoch": 0.8623909571200393, "grad_norm": 0.3211320419794864, "learning_rate": 1.6975202599174e-06, "loss": 0.3395, "step": 7019 }, { "epoch": 0.8625138223368964, "grad_norm": 0.36791987113969965, "learning_rate": 1.6945487089343725e-06, "loss": 0.4341, "step": 7020 }, { "epoch": 0.8626366875537536, "grad_norm": 0.3391078026214734, "learning_rate": 1.6915796054028182e-06, "loss": 0.3352, "step": 7021 }, { "epoch": 0.8627595527706107, "grad_norm": 0.3011232509378677, "learning_rate": 1.6886129498688884e-06, "loss": 0.3885, "step": 7022 }, { "epoch": 0.8628824179874678, "grad_norm": 0.35658146176101585, "learning_rate": 1.6856487428782802e-06, "loss": 0.4714, "step": 7023 }, { "epoch": 0.8630052832043249, "grad_norm": 0.35077395739847245, "learning_rate": 1.6826869849762372e-06, "loss": 0.35, "step": 7024 }, { "epoch": 0.863128148421182, "grad_norm": 0.32337785235706307, "learning_rate": 1.679727676707562e-06, "loss": 0.2962, "step": 7025 }, { "epoch": 0.863251013638039, "grad_norm": 0.37121719806092657, "learning_rate": 1.6767708186165875e-06, "loss": 0.3898, "step": 7026 }, { "epoch": 0.8633738788548961, "grad_norm": 0.42549524464599286, "learning_rate": 1.6738164112472238e-06, "loss": 0.375, "step": 7027 }, { "epoch": 0.8634967440717533, "grad_norm": 0.3885935249355843, "learning_rate": 1.6708644551429043e-06, "loss": 0.3565, "step": 7028 }, { "epoch": 0.8636196092886104, "grad_norm": 0.4117553542813419, "learning_rate": 1.6679149508466263e-06, "loss": 0.464, "step": 7029 }, { "epoch": 0.8637424745054675, "grad_norm": 0.34149759264028606, "learning_rate": 1.6649678989009343e-06, "loss": 0.3319, "step": 7030 }, { "epoch": 0.8638653397223246, "grad_norm": 0.3270270887510196, "learning_rate": 1.6620232998479129e-06, "loss": 0.3675, "step": 7031 }, { "epoch": 0.8639882049391817, "grad_norm": 0.5102776113393513, "learning_rate": 1.659081154229209e-06, "loss": 0.4145, "step": 7032 }, { "epoch": 0.8641110701560388, "grad_norm": 0.3958138661596934, "learning_rate": 1.6561414625860028e-06, "loss": 0.396, "step": 7033 }, { "epoch": 0.864233935372896, "grad_norm": 0.4382611645403817, "learning_rate": 1.6532042254590418e-06, "loss": 0.427, "step": 7034 }, { "epoch": 0.8643568005897531, "grad_norm": 0.3900757248927135, "learning_rate": 1.650269443388604e-06, "loss": 0.3886, "step": 7035 }, { "epoch": 0.8644796658066102, "grad_norm": 0.4441792033393328, "learning_rate": 1.647337116914529e-06, "loss": 0.4582, "step": 7036 }, { "epoch": 0.8646025310234673, "grad_norm": 0.35651599670320344, "learning_rate": 1.644407246576189e-06, "loss": 0.3282, "step": 7037 }, { "epoch": 0.8647253962403244, "grad_norm": 0.2785961986647175, "learning_rate": 1.6414798329125291e-06, "loss": 0.3289, "step": 7038 }, { "epoch": 0.8648482614571815, "grad_norm": 0.42403081223880756, "learning_rate": 1.6385548764620174e-06, "loss": 0.3549, "step": 7039 }, { "epoch": 0.8649711266740385, "grad_norm": 0.42756947812831986, "learning_rate": 1.635632377762688e-06, "loss": 0.4401, "step": 7040 }, { "epoch": 0.8650939918908956, "grad_norm": 0.6270524620296232, "learning_rate": 1.632712337352108e-06, "loss": 0.4011, "step": 7041 }, { "epoch": 0.8652168571077528, "grad_norm": 0.418752449839309, "learning_rate": 1.6297947557674042e-06, "loss": 0.4158, "step": 7042 }, { "epoch": 0.8653397223246099, "grad_norm": 0.3536157996223972, "learning_rate": 1.626879633545249e-06, "loss": 0.3976, "step": 7043 }, { "epoch": 0.865462587541467, "grad_norm": 0.32246209242771284, "learning_rate": 1.6239669712218553e-06, "loss": 0.3702, "step": 7044 }, { "epoch": 0.8655854527583241, "grad_norm": 0.2951905497536859, "learning_rate": 1.6210567693329892e-06, "loss": 0.4142, "step": 7045 }, { "epoch": 0.8657083179751812, "grad_norm": 0.32770919561161055, "learning_rate": 1.6181490284139645e-06, "loss": 0.3391, "step": 7046 }, { "epoch": 0.8658311831920383, "grad_norm": 0.39659776577574446, "learning_rate": 1.6152437489996464e-06, "loss": 0.3741, "step": 7047 }, { "epoch": 0.8659540484088954, "grad_norm": 0.35387890120959303, "learning_rate": 1.612340931624434e-06, "loss": 0.3542, "step": 7048 }, { "epoch": 0.8660769136257526, "grad_norm": 0.33208738393587545, "learning_rate": 1.6094405768222841e-06, "loss": 0.3628, "step": 7049 }, { "epoch": 0.8661997788426097, "grad_norm": 0.3224417493485141, "learning_rate": 1.606542685126703e-06, "loss": 0.3823, "step": 7050 }, { "epoch": 0.8663226440594668, "grad_norm": 0.3523298772522316, "learning_rate": 1.6036472570707323e-06, "loss": 0.3198, "step": 7051 }, { "epoch": 0.8664455092763239, "grad_norm": 0.30990457435963886, "learning_rate": 1.6007542931869712e-06, "loss": 0.396, "step": 7052 }, { "epoch": 0.866568374493181, "grad_norm": 0.30944810735566136, "learning_rate": 1.597863794007559e-06, "loss": 0.3582, "step": 7053 }, { "epoch": 0.8666912397100381, "grad_norm": 0.4483104420392763, "learning_rate": 1.5949757600641906e-06, "loss": 0.4233, "step": 7054 }, { "epoch": 0.8668141049268951, "grad_norm": 0.37591746307790624, "learning_rate": 1.5920901918880925e-06, "loss": 0.3683, "step": 7055 }, { "epoch": 0.8669369701437523, "grad_norm": 0.375323358777901, "learning_rate": 1.5892070900100503e-06, "loss": 0.3985, "step": 7056 }, { "epoch": 0.8670598353606094, "grad_norm": 0.3247815354525966, "learning_rate": 1.5863264549603945e-06, "loss": 0.368, "step": 7057 }, { "epoch": 0.8671827005774665, "grad_norm": 0.3020091495409616, "learning_rate": 1.5834482872689949e-06, "loss": 0.3956, "step": 7058 }, { "epoch": 0.8673055657943236, "grad_norm": 0.41205504122221154, "learning_rate": 1.5805725874652726e-06, "loss": 0.392, "step": 7059 }, { "epoch": 0.8674284310111807, "grad_norm": 0.36157942401309395, "learning_rate": 1.5776993560781948e-06, "loss": 0.3869, "step": 7060 }, { "epoch": 0.8675512962280378, "grad_norm": 0.3631725197237481, "learning_rate": 1.5748285936362772e-06, "loss": 0.3887, "step": 7061 }, { "epoch": 0.867674161444895, "grad_norm": 0.3181293583762937, "learning_rate": 1.5719603006675703e-06, "loss": 0.3718, "step": 7062 }, { "epoch": 0.8677970266617521, "grad_norm": 0.3245031103487036, "learning_rate": 1.5690944776996875e-06, "loss": 0.3462, "step": 7063 }, { "epoch": 0.8679198918786092, "grad_norm": 0.3595419648383691, "learning_rate": 1.566231125259765e-06, "loss": 0.3252, "step": 7064 }, { "epoch": 0.8680427570954663, "grad_norm": 0.37502380679838176, "learning_rate": 1.5633702438745118e-06, "loss": 0.3904, "step": 7065 }, { "epoch": 0.8681656223123234, "grad_norm": 0.31589054864897503, "learning_rate": 1.5605118340701602e-06, "loss": 0.3468, "step": 7066 }, { "epoch": 0.8682884875291805, "grad_norm": 0.2869250511850379, "learning_rate": 1.5576558963725029e-06, "loss": 0.4094, "step": 7067 }, { "epoch": 0.8684113527460376, "grad_norm": 0.3288564541273446, "learning_rate": 1.5548024313068633e-06, "loss": 0.4259, "step": 7068 }, { "epoch": 0.8685342179628947, "grad_norm": 0.3725872381358546, "learning_rate": 1.5519514393981193e-06, "loss": 0.4785, "step": 7069 }, { "epoch": 0.8686570831797518, "grad_norm": 0.3715845470448696, "learning_rate": 1.5491029211706986e-06, "loss": 0.3321, "step": 7070 }, { "epoch": 0.8687799483966089, "grad_norm": 0.31507273653406975, "learning_rate": 1.5462568771485618e-06, "loss": 0.4471, "step": 7071 }, { "epoch": 0.868902813613466, "grad_norm": 0.2800664845045184, "learning_rate": 1.5434133078552204e-06, "loss": 0.3867, "step": 7072 }, { "epoch": 0.8690256788303231, "grad_norm": 0.3934689015861109, "learning_rate": 1.5405722138137323e-06, "loss": 0.4502, "step": 7073 }, { "epoch": 0.8691485440471802, "grad_norm": 0.3178970949027488, "learning_rate": 1.5377335955466997e-06, "loss": 0.4111, "step": 7074 }, { "epoch": 0.8692714092640373, "grad_norm": 0.3747304323144337, "learning_rate": 1.534897453576265e-06, "loss": 0.3667, "step": 7075 }, { "epoch": 0.8693942744808945, "grad_norm": 0.46220428049961204, "learning_rate": 1.532063788424119e-06, "loss": 0.4718, "step": 7076 }, { "epoch": 0.8695171396977516, "grad_norm": 0.4093887974431983, "learning_rate": 1.5292326006114998e-06, "loss": 0.4184, "step": 7077 }, { "epoch": 0.8696400049146087, "grad_norm": 0.37232333000648205, "learning_rate": 1.5264038906591793e-06, "loss": 0.3376, "step": 7078 }, { "epoch": 0.8697628701314658, "grad_norm": 0.3260124302758318, "learning_rate": 1.5235776590874844e-06, "loss": 0.3917, "step": 7079 }, { "epoch": 0.8698857353483229, "grad_norm": 0.28404421992404977, "learning_rate": 1.5207539064162811e-06, "loss": 0.3798, "step": 7080 }, { "epoch": 0.87000860056518, "grad_norm": 0.3570427300932692, "learning_rate": 1.5179326331649823e-06, "loss": 0.4193, "step": 7081 }, { "epoch": 0.8701314657820372, "grad_norm": 0.3722944229238225, "learning_rate": 1.515113839852541e-06, "loss": 0.3933, "step": 7082 }, { "epoch": 0.8702543309988943, "grad_norm": 0.30557825438554054, "learning_rate": 1.5122975269974542e-06, "loss": 0.409, "step": 7083 }, { "epoch": 0.8703771962157513, "grad_norm": 0.3289221292704271, "learning_rate": 1.5094836951177676e-06, "loss": 0.3851, "step": 7084 }, { "epoch": 0.8705000614326084, "grad_norm": 0.35855810028978996, "learning_rate": 1.5066723447310688e-06, "loss": 0.4656, "step": 7085 }, { "epoch": 0.8706229266494655, "grad_norm": 0.3805588599004439, "learning_rate": 1.5038634763544822e-06, "loss": 0.459, "step": 7086 }, { "epoch": 0.8707457918663226, "grad_norm": 0.3453897906172944, "learning_rate": 1.5010570905046833e-06, "loss": 0.3667, "step": 7087 }, { "epoch": 0.8708686570831797, "grad_norm": 0.3994958338357811, "learning_rate": 1.4982531876978923e-06, "loss": 0.3643, "step": 7088 }, { "epoch": 0.8709915223000368, "grad_norm": 0.40331725960590176, "learning_rate": 1.4954517684498614e-06, "loss": 0.3896, "step": 7089 }, { "epoch": 0.871114387516894, "grad_norm": 0.31422400848765825, "learning_rate": 1.4926528332759005e-06, "loss": 0.452, "step": 7090 }, { "epoch": 0.8712372527337511, "grad_norm": 0.33735799973818786, "learning_rate": 1.489856382690849e-06, "loss": 0.3905, "step": 7091 }, { "epoch": 0.8713601179506082, "grad_norm": 0.3968746951890043, "learning_rate": 1.4870624172091041e-06, "loss": 0.3795, "step": 7092 }, { "epoch": 0.8714829831674653, "grad_norm": 0.4020461878162978, "learning_rate": 1.4842709373445896e-06, "loss": 0.4086, "step": 7093 }, { "epoch": 0.8716058483843224, "grad_norm": 0.3034629619182101, "learning_rate": 1.4814819436107846e-06, "loss": 0.3815, "step": 7094 }, { "epoch": 0.8717287136011795, "grad_norm": 0.2817301031126324, "learning_rate": 1.4786954365207072e-06, "loss": 0.3669, "step": 7095 }, { "epoch": 0.8718515788180367, "grad_norm": 0.3979585169032132, "learning_rate": 1.4759114165869126e-06, "loss": 0.42, "step": 7096 }, { "epoch": 0.8719744440348938, "grad_norm": 0.31946867163589515, "learning_rate": 1.4731298843215107e-06, "loss": 0.3381, "step": 7097 }, { "epoch": 0.8720973092517508, "grad_norm": 0.34365698815986234, "learning_rate": 1.4703508402361343e-06, "loss": 0.4062, "step": 7098 }, { "epoch": 0.8722201744686079, "grad_norm": 0.3179637811653083, "learning_rate": 1.4675742848419842e-06, "loss": 0.3786, "step": 7099 }, { "epoch": 0.872343039685465, "grad_norm": 0.3652602263508572, "learning_rate": 1.4648002186497805e-06, "loss": 0.3777, "step": 7100 }, { "epoch": 0.8724659049023221, "grad_norm": 0.2890878536338674, "learning_rate": 1.4620286421698014e-06, "loss": 0.3912, "step": 7101 }, { "epoch": 0.8725887701191792, "grad_norm": 0.3336261153172338, "learning_rate": 1.459259555911851e-06, "loss": 0.4482, "step": 7102 }, { "epoch": 0.8727116353360364, "grad_norm": 0.40652330307590645, "learning_rate": 1.456492960385295e-06, "loss": 0.3922, "step": 7103 }, { "epoch": 0.8728345005528935, "grad_norm": 0.32287618902521437, "learning_rate": 1.4537288560990247e-06, "loss": 0.3429, "step": 7104 }, { "epoch": 0.8729573657697506, "grad_norm": 0.3521641349866513, "learning_rate": 1.4509672435614819e-06, "loss": 0.3983, "step": 7105 }, { "epoch": 0.8730802309866077, "grad_norm": 0.4780408540500329, "learning_rate": 1.448208123280645e-06, "loss": 0.3986, "step": 7106 }, { "epoch": 0.8732030962034648, "grad_norm": 0.3306631686519652, "learning_rate": 1.4454514957640363e-06, "loss": 0.4304, "step": 7107 }, { "epoch": 0.8733259614203219, "grad_norm": 0.403095311051181, "learning_rate": 1.4426973615187239e-06, "loss": 0.3841, "step": 7108 }, { "epoch": 0.873448826637179, "grad_norm": 0.34435458650004264, "learning_rate": 1.4399457210513072e-06, "loss": 0.4685, "step": 7109 }, { "epoch": 0.8735716918540362, "grad_norm": 0.4970930199666604, "learning_rate": 1.4371965748679333e-06, "loss": 0.4711, "step": 7110 }, { "epoch": 0.8736945570708933, "grad_norm": 0.3650136382086891, "learning_rate": 1.4344499234742941e-06, "loss": 0.3161, "step": 7111 }, { "epoch": 0.8738174222877504, "grad_norm": 0.341705125416511, "learning_rate": 1.4317057673756172e-06, "loss": 0.3895, "step": 7112 }, { "epoch": 0.8739402875046074, "grad_norm": 0.34751209857614673, "learning_rate": 1.4289641070766674e-06, "loss": 0.4147, "step": 7113 }, { "epoch": 0.8740631527214645, "grad_norm": 0.34404257914896225, "learning_rate": 1.4262249430817609e-06, "loss": 0.4117, "step": 7114 }, { "epoch": 0.8741860179383216, "grad_norm": 0.3862356021544961, "learning_rate": 1.4234882758947482e-06, "loss": 0.3189, "step": 7115 }, { "epoch": 0.8743088831551787, "grad_norm": 0.32398291931211165, "learning_rate": 1.4207541060190182e-06, "loss": 0.4203, "step": 7116 }, { "epoch": 0.8744317483720359, "grad_norm": 0.3657134646134658, "learning_rate": 1.4180224339575055e-06, "loss": 0.3859, "step": 7117 }, { "epoch": 0.874554613588893, "grad_norm": 0.4528099960502782, "learning_rate": 1.4152932602126844e-06, "loss": 0.5214, "step": 7118 }, { "epoch": 0.8746774788057501, "grad_norm": 0.43014341750832774, "learning_rate": 1.4125665852865704e-06, "loss": 0.3796, "step": 7119 }, { "epoch": 0.8748003440226072, "grad_norm": 0.28226696103268406, "learning_rate": 1.4098424096807138e-06, "loss": 0.3969, "step": 7120 }, { "epoch": 0.8749232092394643, "grad_norm": 0.333499843476459, "learning_rate": 1.407120733896209e-06, "loss": 0.4049, "step": 7121 }, { "epoch": 0.8750460744563214, "grad_norm": 0.38928991097908117, "learning_rate": 1.4044015584336934e-06, "loss": 0.3488, "step": 7122 }, { "epoch": 0.8751689396731785, "grad_norm": 0.31583746482295333, "learning_rate": 1.401684883793342e-06, "loss": 0.3785, "step": 7123 }, { "epoch": 0.8752918048900357, "grad_norm": 0.31476891715235794, "learning_rate": 1.3989707104748673e-06, "loss": 0.4023, "step": 7124 }, { "epoch": 0.8754146701068928, "grad_norm": 0.31120506259800107, "learning_rate": 1.3962590389775242e-06, "loss": 0.3453, "step": 7125 }, { "epoch": 0.8755375353237499, "grad_norm": 0.3304331642936027, "learning_rate": 1.3935498698001093e-06, "loss": 0.3254, "step": 7126 }, { "epoch": 0.875660400540607, "grad_norm": 0.4101470355641454, "learning_rate": 1.3908432034409518e-06, "loss": 0.3839, "step": 7127 }, { "epoch": 0.875783265757464, "grad_norm": 0.35160643609791153, "learning_rate": 1.3881390403979321e-06, "loss": 0.3488, "step": 7128 }, { "epoch": 0.8759061309743211, "grad_norm": 0.40381163091160605, "learning_rate": 1.3854373811684557e-06, "loss": 0.3396, "step": 7129 }, { "epoch": 0.8760289961911782, "grad_norm": 0.3141961705910673, "learning_rate": 1.382738226249483e-06, "loss": 0.3633, "step": 7130 }, { "epoch": 0.8761518614080354, "grad_norm": 0.3603212393826446, "learning_rate": 1.3800415761375007e-06, "loss": 0.4155, "step": 7131 }, { "epoch": 0.8762747266248925, "grad_norm": 0.32355796604539205, "learning_rate": 1.377347431328545e-06, "loss": 0.3205, "step": 7132 }, { "epoch": 0.8763975918417496, "grad_norm": 0.35651026021709914, "learning_rate": 1.3746557923181795e-06, "loss": 0.3986, "step": 7133 }, { "epoch": 0.8765204570586067, "grad_norm": 0.31617155350730924, "learning_rate": 1.3719666596015184e-06, "loss": 0.3774, "step": 7134 }, { "epoch": 0.8766433222754638, "grad_norm": 0.3302398819688601, "learning_rate": 1.3692800336732108e-06, "loss": 0.4211, "step": 7135 }, { "epoch": 0.8767661874923209, "grad_norm": 0.2765144383030933, "learning_rate": 1.3665959150274382e-06, "loss": 0.3422, "step": 7136 }, { "epoch": 0.876889052709178, "grad_norm": 0.2833058691752238, "learning_rate": 1.3639143041579371e-06, "loss": 0.3467, "step": 7137 }, { "epoch": 0.8770119179260352, "grad_norm": 0.3872656665375745, "learning_rate": 1.3612352015579631e-06, "loss": 0.3597, "step": 7138 }, { "epoch": 0.8771347831428923, "grad_norm": 0.3528641703669791, "learning_rate": 1.358558607720327e-06, "loss": 0.3658, "step": 7139 }, { "epoch": 0.8772576483597494, "grad_norm": 0.3304196236682052, "learning_rate": 1.3558845231373617e-06, "loss": 0.3452, "step": 7140 }, { "epoch": 0.8773805135766065, "grad_norm": 0.3766863941440677, "learning_rate": 1.353212948300957e-06, "loss": 0.4137, "step": 7141 }, { "epoch": 0.8775033787934635, "grad_norm": 0.4411034486059259, "learning_rate": 1.3505438837025265e-06, "loss": 0.3519, "step": 7142 }, { "epoch": 0.8776262440103206, "grad_norm": 0.5293826305519594, "learning_rate": 1.3478773298330322e-06, "loss": 0.347, "step": 7143 }, { "epoch": 0.8777491092271777, "grad_norm": 0.3285319414817675, "learning_rate": 1.345213287182962e-06, "loss": 0.3445, "step": 7144 }, { "epoch": 0.8778719744440349, "grad_norm": 0.395796518455139, "learning_rate": 1.3425517562423539e-06, "loss": 0.3197, "step": 7145 }, { "epoch": 0.877994839660892, "grad_norm": 0.3730186378921999, "learning_rate": 1.3398927375007814e-06, "loss": 0.3898, "step": 7146 }, { "epoch": 0.8781177048777491, "grad_norm": 0.28190134104190573, "learning_rate": 1.3372362314473464e-06, "loss": 0.3711, "step": 7147 }, { "epoch": 0.8782405700946062, "grad_norm": 0.34645565024862984, "learning_rate": 1.334582238570703e-06, "loss": 0.3644, "step": 7148 }, { "epoch": 0.8783634353114633, "grad_norm": 0.37543587327959727, "learning_rate": 1.3319307593590325e-06, "loss": 0.4065, "step": 7149 }, { "epoch": 0.8784863005283204, "grad_norm": 0.43416723067492846, "learning_rate": 1.3292817943000597e-06, "loss": 0.3883, "step": 7150 }, { "epoch": 0.8786091657451776, "grad_norm": 0.34179112756282326, "learning_rate": 1.3266353438810414e-06, "loss": 0.389, "step": 7151 }, { "epoch": 0.8787320309620347, "grad_norm": 0.3355579771720534, "learning_rate": 1.3239914085887767e-06, "loss": 0.3189, "step": 7152 }, { "epoch": 0.8788548961788918, "grad_norm": 0.3669122228662043, "learning_rate": 1.321349988909603e-06, "loss": 0.3161, "step": 7153 }, { "epoch": 0.8789777613957489, "grad_norm": 0.37026679286863334, "learning_rate": 1.318711085329387e-06, "loss": 0.4238, "step": 7154 }, { "epoch": 0.879100626612606, "grad_norm": 0.3161489259849155, "learning_rate": 1.3160746983335437e-06, "loss": 0.4253, "step": 7155 }, { "epoch": 0.8792234918294631, "grad_norm": 0.31477007504682286, "learning_rate": 1.3134408284070115e-06, "loss": 0.4409, "step": 7156 }, { "epoch": 0.8793463570463201, "grad_norm": 0.4388658537026959, "learning_rate": 1.310809476034283e-06, "loss": 0.4268, "step": 7157 }, { "epoch": 0.8794692222631773, "grad_norm": 0.38001528019638603, "learning_rate": 1.3081806416993714e-06, "loss": 0.4149, "step": 7158 }, { "epoch": 0.8795920874800344, "grad_norm": 0.3202803652239961, "learning_rate": 1.305554325885836e-06, "loss": 0.3546, "step": 7159 }, { "epoch": 0.8797149526968915, "grad_norm": 0.29130704778182986, "learning_rate": 1.3029305290767708e-06, "loss": 0.3954, "step": 7160 }, { "epoch": 0.8798378179137486, "grad_norm": 0.3328583464948505, "learning_rate": 1.3003092517548076e-06, "loss": 0.3523, "step": 7161 }, { "epoch": 0.8799606831306057, "grad_norm": 0.3043717459648794, "learning_rate": 1.2976904944021112e-06, "loss": 0.3464, "step": 7162 }, { "epoch": 0.8800835483474628, "grad_norm": 0.3491984851848255, "learning_rate": 1.2950742575003843e-06, "loss": 0.3796, "step": 7163 }, { "epoch": 0.88020641356432, "grad_norm": 0.3177651420108808, "learning_rate": 1.2924605415308722e-06, "loss": 0.3417, "step": 7164 }, { "epoch": 0.8803292787811771, "grad_norm": 0.28515058740215754, "learning_rate": 1.2898493469743433e-06, "loss": 0.4404, "step": 7165 }, { "epoch": 0.8804521439980342, "grad_norm": 0.3488343551396734, "learning_rate": 1.287240674311117e-06, "loss": 0.4183, "step": 7166 }, { "epoch": 0.8805750092148913, "grad_norm": 0.44184947862756857, "learning_rate": 1.284634524021031e-06, "loss": 0.3854, "step": 7167 }, { "epoch": 0.8806978744317484, "grad_norm": 0.43782162882216036, "learning_rate": 1.2820308965834854e-06, "loss": 0.3683, "step": 7168 }, { "epoch": 0.8808207396486055, "grad_norm": 0.2802880940027501, "learning_rate": 1.2794297924773868e-06, "loss": 0.3977, "step": 7169 }, { "epoch": 0.8809436048654626, "grad_norm": 0.3496910935305501, "learning_rate": 1.2768312121812008e-06, "loss": 0.3444, "step": 7170 }, { "epoch": 0.8810664700823196, "grad_norm": 0.3432536797125899, "learning_rate": 1.2742351561729138e-06, "loss": 0.4043, "step": 7171 }, { "epoch": 0.8811893352991768, "grad_norm": 0.36765790453487873, "learning_rate": 1.2716416249300532e-06, "loss": 0.3542, "step": 7172 }, { "epoch": 0.8813122005160339, "grad_norm": 0.3447742517190296, "learning_rate": 1.269050618929688e-06, "loss": 0.3704, "step": 7173 }, { "epoch": 0.881435065732891, "grad_norm": 0.3465769042623065, "learning_rate": 1.2664621386484098e-06, "loss": 0.3925, "step": 7174 }, { "epoch": 0.8815579309497481, "grad_norm": 0.33136004746650655, "learning_rate": 1.2638761845623565e-06, "loss": 0.4275, "step": 7175 }, { "epoch": 0.8816807961666052, "grad_norm": 0.38730277268757934, "learning_rate": 1.2612927571471972e-06, "loss": 0.4221, "step": 7176 }, { "epoch": 0.8818036613834623, "grad_norm": 0.30426976369704045, "learning_rate": 1.2587118568781387e-06, "loss": 0.3345, "step": 7177 }, { "epoch": 0.8819265266003194, "grad_norm": 0.32817010393976204, "learning_rate": 1.2561334842299161e-06, "loss": 0.3223, "step": 7178 }, { "epoch": 0.8820493918171766, "grad_norm": 0.33331787649520334, "learning_rate": 1.2535576396768085e-06, "loss": 0.358, "step": 7179 }, { "epoch": 0.8821722570340337, "grad_norm": 0.35630991556167946, "learning_rate": 1.250984323692625e-06, "loss": 0.4143, "step": 7180 }, { "epoch": 0.8822951222508908, "grad_norm": 0.4424469977399332, "learning_rate": 1.248413536750707e-06, "loss": 0.4231, "step": 7181 }, { "epoch": 0.8824179874677479, "grad_norm": 0.32778302253624386, "learning_rate": 1.2458452793239383e-06, "loss": 0.3335, "step": 7182 }, { "epoch": 0.882540852684605, "grad_norm": 0.33382685174924864, "learning_rate": 1.2432795518847306e-06, "loss": 0.3355, "step": 7183 }, { "epoch": 0.8826637179014621, "grad_norm": 0.360857529770608, "learning_rate": 1.2407163549050366e-06, "loss": 0.3402, "step": 7184 }, { "epoch": 0.8827865831183193, "grad_norm": 0.3711406123898158, "learning_rate": 1.2381556888563338e-06, "loss": 0.4296, "step": 7185 }, { "epoch": 0.8829094483351763, "grad_norm": 0.3852827313458948, "learning_rate": 1.2355975542096444e-06, "loss": 0.399, "step": 7186 }, { "epoch": 0.8830323135520334, "grad_norm": 0.31970078761880427, "learning_rate": 1.2330419514355195e-06, "loss": 0.31, "step": 7187 }, { "epoch": 0.8831551787688905, "grad_norm": 0.37879450166056144, "learning_rate": 1.2304888810040487e-06, "loss": 0.4005, "step": 7188 }, { "epoch": 0.8832780439857476, "grad_norm": 0.3433244249138126, "learning_rate": 1.2279383433848462e-06, "loss": 0.4217, "step": 7189 }, { "epoch": 0.8834009092026047, "grad_norm": 0.35530120690760175, "learning_rate": 1.2253903390470717e-06, "loss": 0.2881, "step": 7190 }, { "epoch": 0.8835237744194618, "grad_norm": 0.3839551231201674, "learning_rate": 1.222844868459415e-06, "loss": 0.3897, "step": 7191 }, { "epoch": 0.883646639636319, "grad_norm": 0.34522740133939805, "learning_rate": 1.2203019320900938e-06, "loss": 0.3422, "step": 7192 }, { "epoch": 0.8837695048531761, "grad_norm": 0.36261865892335915, "learning_rate": 1.2177615304068702e-06, "loss": 0.4048, "step": 7193 }, { "epoch": 0.8838923700700332, "grad_norm": 0.3559119563641099, "learning_rate": 1.215223663877027e-06, "loss": 0.3659, "step": 7194 }, { "epoch": 0.8840152352868903, "grad_norm": 0.35793132265894145, "learning_rate": 1.2126883329673977e-06, "loss": 0.3693, "step": 7195 }, { "epoch": 0.8841381005037474, "grad_norm": 0.2730284751109138, "learning_rate": 1.2101555381443341e-06, "loss": 0.3492, "step": 7196 }, { "epoch": 0.8842609657206045, "grad_norm": 0.37325936223176603, "learning_rate": 1.2076252798737318e-06, "loss": 0.3586, "step": 7197 }, { "epoch": 0.8843838309374616, "grad_norm": 0.40523548209546073, "learning_rate": 1.2050975586210106e-06, "loss": 0.3735, "step": 7198 }, { "epoch": 0.8845066961543188, "grad_norm": 0.3824318363945271, "learning_rate": 1.2025723748511297e-06, "loss": 0.4084, "step": 7199 }, { "epoch": 0.8846295613711758, "grad_norm": 0.3464483575498175, "learning_rate": 1.2000497290285827e-06, "loss": 0.3692, "step": 7200 }, { "epoch": 0.8847524265880329, "grad_norm": 0.3152227391286033, "learning_rate": 1.1975296216173887e-06, "loss": 0.3475, "step": 7201 }, { "epoch": 0.88487529180489, "grad_norm": 0.2760250874690311, "learning_rate": 1.1950120530811131e-06, "loss": 0.333, "step": 7202 }, { "epoch": 0.8849981570217471, "grad_norm": 0.3487629923744775, "learning_rate": 1.1924970238828393e-06, "loss": 0.351, "step": 7203 }, { "epoch": 0.8851210222386042, "grad_norm": 0.3575134427702205, "learning_rate": 1.1899845344851951e-06, "loss": 0.3787, "step": 7204 }, { "epoch": 0.8852438874554613, "grad_norm": 0.34665037850002745, "learning_rate": 1.1874745853503293e-06, "loss": 0.3416, "step": 7205 }, { "epoch": 0.8853667526723185, "grad_norm": 0.3555969150763818, "learning_rate": 1.1849671769399427e-06, "loss": 0.42, "step": 7206 }, { "epoch": 0.8854896178891756, "grad_norm": 0.3090292591207698, "learning_rate": 1.1824623097152466e-06, "loss": 0.3856, "step": 7207 }, { "epoch": 0.8856124831060327, "grad_norm": 0.3278519815070329, "learning_rate": 1.179959984137002e-06, "loss": 0.3412, "step": 7208 }, { "epoch": 0.8857353483228898, "grad_norm": 0.5892236982326822, "learning_rate": 1.1774602006654888e-06, "loss": 0.4625, "step": 7209 }, { "epoch": 0.8858582135397469, "grad_norm": 0.2958497479949117, "learning_rate": 1.1749629597605299e-06, "loss": 0.4099, "step": 7210 }, { "epoch": 0.885981078756604, "grad_norm": 0.32570649088406745, "learning_rate": 1.1724682618814792e-06, "loss": 0.4828, "step": 7211 }, { "epoch": 0.8861039439734612, "grad_norm": 0.4062850307753576, "learning_rate": 1.1699761074872128e-06, "loss": 0.4765, "step": 7212 }, { "epoch": 0.8862268091903183, "grad_norm": 0.3702607627710723, "learning_rate": 1.1674864970361527e-06, "loss": 0.4715, "step": 7213 }, { "epoch": 0.8863496744071754, "grad_norm": 0.4615304125436298, "learning_rate": 1.164999430986242e-06, "loss": 0.3884, "step": 7214 }, { "epoch": 0.8864725396240324, "grad_norm": 0.3412598481100906, "learning_rate": 1.1625149097949672e-06, "loss": 0.3841, "step": 7215 }, { "epoch": 0.8865954048408895, "grad_norm": 0.3618251314032629, "learning_rate": 1.1600329339193321e-06, "loss": 0.4375, "step": 7216 }, { "epoch": 0.8867182700577466, "grad_norm": 0.35805249249340765, "learning_rate": 1.1575535038158852e-06, "loss": 0.3895, "step": 7217 }, { "epoch": 0.8868411352746037, "grad_norm": 0.41047402528776655, "learning_rate": 1.1550766199407014e-06, "loss": 0.3459, "step": 7218 }, { "epoch": 0.8869640004914608, "grad_norm": 0.3034061044447125, "learning_rate": 1.1526022827493832e-06, "loss": 0.3415, "step": 7219 }, { "epoch": 0.887086865708318, "grad_norm": 0.3292606224853106, "learning_rate": 1.1501304926970728e-06, "loss": 0.4193, "step": 7220 }, { "epoch": 0.8872097309251751, "grad_norm": 0.3236934669194148, "learning_rate": 1.1476612502384354e-06, "loss": 0.357, "step": 7221 }, { "epoch": 0.8873325961420322, "grad_norm": 0.32727822496346937, "learning_rate": 1.1451945558276788e-06, "loss": 0.3541, "step": 7222 }, { "epoch": 0.8874554613588893, "grad_norm": 0.32500786253368535, "learning_rate": 1.142730409918532e-06, "loss": 0.3168, "step": 7223 }, { "epoch": 0.8875783265757464, "grad_norm": 0.399469403815017, "learning_rate": 1.1402688129642575e-06, "loss": 0.4191, "step": 7224 }, { "epoch": 0.8877011917926035, "grad_norm": 0.2742154242353986, "learning_rate": 1.137809765417651e-06, "loss": 0.3561, "step": 7225 }, { "epoch": 0.8878240570094607, "grad_norm": 0.36264136496207583, "learning_rate": 1.1353532677310413e-06, "loss": 0.4011, "step": 7226 }, { "epoch": 0.8879469222263178, "grad_norm": 0.3250011730904601, "learning_rate": 1.13289932035628e-06, "loss": 0.4187, "step": 7227 }, { "epoch": 0.8880697874431749, "grad_norm": 0.276518330196773, "learning_rate": 1.1304479237447574e-06, "loss": 0.4695, "step": 7228 }, { "epoch": 0.888192652660032, "grad_norm": 0.3296348201767402, "learning_rate": 1.1279990783473948e-06, "loss": 0.3838, "step": 7229 }, { "epoch": 0.888315517876889, "grad_norm": 0.5456755235139555, "learning_rate": 1.1255527846146369e-06, "loss": 0.4836, "step": 7230 }, { "epoch": 0.8884383830937461, "grad_norm": 0.3937050866762519, "learning_rate": 1.1231090429964668e-06, "loss": 0.3896, "step": 7231 }, { "epoch": 0.8885612483106032, "grad_norm": 0.42150015984583894, "learning_rate": 1.1206678539423886e-06, "loss": 0.3232, "step": 7232 }, { "epoch": 0.8886841135274604, "grad_norm": 0.30152850134813286, "learning_rate": 1.118229217901453e-06, "loss": 0.376, "step": 7233 }, { "epoch": 0.8888069787443175, "grad_norm": 0.321074073522865, "learning_rate": 1.1157931353222244e-06, "loss": 0.3724, "step": 7234 }, { "epoch": 0.8889298439611746, "grad_norm": 0.3321218567277627, "learning_rate": 1.1133596066528079e-06, "loss": 0.4525, "step": 7235 }, { "epoch": 0.8890527091780317, "grad_norm": 0.36164300617607675, "learning_rate": 1.1109286323408318e-06, "loss": 0.3651, "step": 7236 }, { "epoch": 0.8891755743948888, "grad_norm": 0.3354983878906924, "learning_rate": 1.1085002128334603e-06, "loss": 0.4096, "step": 7237 }, { "epoch": 0.8892984396117459, "grad_norm": 0.30441149588992794, "learning_rate": 1.1060743485773861e-06, "loss": 0.3667, "step": 7238 }, { "epoch": 0.889421304828603, "grad_norm": 0.3219954775264217, "learning_rate": 1.1036510400188287e-06, "loss": 0.3406, "step": 7239 }, { "epoch": 0.8895441700454602, "grad_norm": 0.33799644820050423, "learning_rate": 1.101230287603542e-06, "loss": 0.3331, "step": 7240 }, { "epoch": 0.8896670352623173, "grad_norm": 0.35669891780346863, "learning_rate": 1.0988120917768074e-06, "loss": 0.3936, "step": 7241 }, { "epoch": 0.8897899004791744, "grad_norm": 0.4369235776015597, "learning_rate": 1.0963964529834381e-06, "loss": 0.3935, "step": 7242 }, { "epoch": 0.8899127656960315, "grad_norm": 0.3240472235196433, "learning_rate": 1.0939833716677683e-06, "loss": 0.4405, "step": 7243 }, { "epoch": 0.8900356309128885, "grad_norm": 0.3503046428066084, "learning_rate": 1.091572848273678e-06, "loss": 0.3795, "step": 7244 }, { "epoch": 0.8901584961297456, "grad_norm": 0.3804480623736981, "learning_rate": 1.0891648832445611e-06, "loss": 0.414, "step": 7245 }, { "epoch": 0.8902813613466027, "grad_norm": 0.300636721537835, "learning_rate": 1.0867594770233514e-06, "loss": 0.457, "step": 7246 }, { "epoch": 0.8904042265634599, "grad_norm": 0.36561776931359663, "learning_rate": 1.084356630052503e-06, "loss": 0.318, "step": 7247 }, { "epoch": 0.890527091780317, "grad_norm": 0.3519854621378235, "learning_rate": 1.0819563427740064e-06, "loss": 0.4711, "step": 7248 }, { "epoch": 0.8906499569971741, "grad_norm": 0.3691268894762229, "learning_rate": 1.0795586156293814e-06, "loss": 0.3766, "step": 7249 }, { "epoch": 0.8907728222140312, "grad_norm": 0.29207718096953955, "learning_rate": 1.0771634490596683e-06, "loss": 0.384, "step": 7250 }, { "epoch": 0.8908956874308883, "grad_norm": 0.2757203839199928, "learning_rate": 1.0747708435054464e-06, "loss": 0.3694, "step": 7251 }, { "epoch": 0.8910185526477454, "grad_norm": 0.32244096799811, "learning_rate": 1.0723807994068208e-06, "loss": 0.416, "step": 7252 }, { "epoch": 0.8911414178646025, "grad_norm": 0.37887077227924587, "learning_rate": 1.0699933172034242e-06, "loss": 0.4243, "step": 7253 }, { "epoch": 0.8912642830814597, "grad_norm": 0.37890841929886043, "learning_rate": 1.0676083973344158e-06, "loss": 0.4063, "step": 7254 }, { "epoch": 0.8913871482983168, "grad_norm": 0.31754899185680235, "learning_rate": 1.0652260402384895e-06, "loss": 0.375, "step": 7255 }, { "epoch": 0.8915100135151739, "grad_norm": 0.41015454920876565, "learning_rate": 1.062846246353863e-06, "loss": 0.4301, "step": 7256 }, { "epoch": 0.891632878732031, "grad_norm": 0.3254086753427219, "learning_rate": 1.0604690161182827e-06, "loss": 0.3376, "step": 7257 }, { "epoch": 0.8917557439488881, "grad_norm": 0.3865011809377186, "learning_rate": 1.0580943499690277e-06, "loss": 0.3753, "step": 7258 }, { "epoch": 0.8918786091657451, "grad_norm": 0.3557807341252625, "learning_rate": 1.0557222483428962e-06, "loss": 0.4049, "step": 7259 }, { "epoch": 0.8920014743826022, "grad_norm": 0.36591407411315396, "learning_rate": 1.0533527116762298e-06, "loss": 0.3327, "step": 7260 }, { "epoch": 0.8921243395994594, "grad_norm": 0.33457655895526034, "learning_rate": 1.0509857404048827e-06, "loss": 0.3216, "step": 7261 }, { "epoch": 0.8922472048163165, "grad_norm": 0.36788395383505407, "learning_rate": 1.0486213349642486e-06, "loss": 0.4514, "step": 7262 }, { "epoch": 0.8923700700331736, "grad_norm": 0.3287874492446262, "learning_rate": 1.046259495789238e-06, "loss": 0.3941, "step": 7263 }, { "epoch": 0.8924929352500307, "grad_norm": 0.3261171336502908, "learning_rate": 1.043900223314303e-06, "loss": 0.3689, "step": 7264 }, { "epoch": 0.8926158004668878, "grad_norm": 0.33601078603017354, "learning_rate": 1.0415435179734118e-06, "loss": 0.4037, "step": 7265 }, { "epoch": 0.8927386656837449, "grad_norm": 0.4333177834097188, "learning_rate": 1.0391893802000674e-06, "loss": 0.382, "step": 7266 }, { "epoch": 0.892861530900602, "grad_norm": 0.4458218461943022, "learning_rate": 1.0368378104272986e-06, "loss": 0.4144, "step": 7267 }, { "epoch": 0.8929843961174592, "grad_norm": 0.4008477143306701, "learning_rate": 1.0344888090876592e-06, "loss": 0.3909, "step": 7268 }, { "epoch": 0.8931072613343163, "grad_norm": 0.4299084053208505, "learning_rate": 1.0321423766132354e-06, "loss": 0.3668, "step": 7269 }, { "epoch": 0.8932301265511734, "grad_norm": 0.39058832174561275, "learning_rate": 1.0297985134356319e-06, "loss": 0.4122, "step": 7270 }, { "epoch": 0.8933529917680305, "grad_norm": 0.33993862569647626, "learning_rate": 1.0274572199859972e-06, "loss": 0.373, "step": 7271 }, { "epoch": 0.8934758569848876, "grad_norm": 0.37569735257594283, "learning_rate": 1.0251184966949883e-06, "loss": 0.335, "step": 7272 }, { "epoch": 0.8935987222017446, "grad_norm": 0.31965275074774746, "learning_rate": 1.0227823439928065e-06, "loss": 0.3767, "step": 7273 }, { "epoch": 0.8937215874186017, "grad_norm": 0.32055665264683086, "learning_rate": 1.0204487623091624e-06, "loss": 0.3488, "step": 7274 }, { "epoch": 0.8938444526354589, "grad_norm": 0.37115165124548377, "learning_rate": 1.0181177520733082e-06, "loss": 0.3886, "step": 7275 }, { "epoch": 0.893967317852316, "grad_norm": 0.37348140608205715, "learning_rate": 1.0157893137140206e-06, "loss": 0.3221, "step": 7276 }, { "epoch": 0.8940901830691731, "grad_norm": 0.30925420394827563, "learning_rate": 1.0134634476595955e-06, "loss": 0.4405, "step": 7277 }, { "epoch": 0.8942130482860302, "grad_norm": 0.36257309966191775, "learning_rate": 1.011140154337864e-06, "loss": 0.3549, "step": 7278 }, { "epoch": 0.8943359135028873, "grad_norm": 0.3558675127536125, "learning_rate": 1.0088194341761792e-06, "loss": 0.387, "step": 7279 }, { "epoch": 0.8944587787197444, "grad_norm": 0.38703959683848005, "learning_rate": 1.0065012876014261e-06, "loss": 0.3802, "step": 7280 }, { "epoch": 0.8945816439366016, "grad_norm": 0.32558539013598436, "learning_rate": 1.0041857150400075e-06, "loss": 0.353, "step": 7281 }, { "epoch": 0.8947045091534587, "grad_norm": 0.2951784689393105, "learning_rate": 1.0018727169178604e-06, "loss": 0.3539, "step": 7282 }, { "epoch": 0.8948273743703158, "grad_norm": 0.41193029356086563, "learning_rate": 9.995622936604465e-07, "loss": 0.4452, "step": 7283 }, { "epoch": 0.8949502395871729, "grad_norm": 0.4003157014305072, "learning_rate": 9.972544456927556e-07, "loss": 0.413, "step": 7284 }, { "epoch": 0.89507310480403, "grad_norm": 0.3149802982357303, "learning_rate": 9.949491734392952e-07, "loss": 0.4066, "step": 7285 }, { "epoch": 0.8951959700208871, "grad_norm": 0.41465059189480635, "learning_rate": 9.926464773241089e-07, "loss": 0.3892, "step": 7286 }, { "epoch": 0.8953188352377442, "grad_norm": 0.3541714083752457, "learning_rate": 9.90346357770765e-07, "loss": 0.3714, "step": 7287 }, { "epoch": 0.8954417004546013, "grad_norm": 0.3735807274367485, "learning_rate": 9.880488152023499e-07, "loss": 0.3467, "step": 7288 }, { "epoch": 0.8955645656714584, "grad_norm": 0.4050116049190095, "learning_rate": 9.857538500414837e-07, "loss": 0.3633, "step": 7289 }, { "epoch": 0.8956874308883155, "grad_norm": 0.3476509059167945, "learning_rate": 9.834614627103123e-07, "loss": 0.3565, "step": 7290 }, { "epoch": 0.8958102961051726, "grad_norm": 0.3137682630879071, "learning_rate": 9.811716536305066e-07, "loss": 0.4075, "step": 7291 }, { "epoch": 0.8959331613220297, "grad_norm": 0.4475732936119173, "learning_rate": 9.788844232232563e-07, "loss": 0.3718, "step": 7292 }, { "epoch": 0.8960560265388868, "grad_norm": 0.40025879963454797, "learning_rate": 9.765997719092867e-07, "loss": 0.3727, "step": 7293 }, { "epoch": 0.896178891755744, "grad_norm": 0.3864059409052248, "learning_rate": 9.743177001088482e-07, "loss": 0.4204, "step": 7294 }, { "epoch": 0.8963017569726011, "grad_norm": 0.4310670758595747, "learning_rate": 9.720382082417052e-07, "loss": 0.4213, "step": 7295 }, { "epoch": 0.8964246221894582, "grad_norm": 0.36104651165682994, "learning_rate": 9.69761296727162e-07, "loss": 0.457, "step": 7296 }, { "epoch": 0.8965474874063153, "grad_norm": 0.3124207325591953, "learning_rate": 9.674869659840334e-07, "loss": 0.2945, "step": 7297 }, { "epoch": 0.8966703526231724, "grad_norm": 0.3110298523203572, "learning_rate": 9.652152164306788e-07, "loss": 0.3659, "step": 7298 }, { "epoch": 0.8967932178400295, "grad_norm": 0.2856543313769583, "learning_rate": 9.62946048484965e-07, "loss": 0.3126, "step": 7299 }, { "epoch": 0.8969160830568866, "grad_norm": 0.37117328472018046, "learning_rate": 9.606794625642934e-07, "loss": 0.4537, "step": 7300 }, { "epoch": 0.8970389482737438, "grad_norm": 0.3244410527455363, "learning_rate": 9.584154590855836e-07, "loss": 0.4159, "step": 7301 }, { "epoch": 0.8971618134906008, "grad_norm": 0.35750920547907217, "learning_rate": 9.561540384652879e-07, "loss": 0.3902, "step": 7302 }, { "epoch": 0.8972846787074579, "grad_norm": 0.3910898691495907, "learning_rate": 9.538952011193814e-07, "loss": 0.4879, "step": 7303 }, { "epoch": 0.897407543924315, "grad_norm": 0.3306964799533868, "learning_rate": 9.516389474633585e-07, "loss": 0.3575, "step": 7304 }, { "epoch": 0.8975304091411721, "grad_norm": 0.33841987194139345, "learning_rate": 9.493852779122441e-07, "loss": 0.4132, "step": 7305 }, { "epoch": 0.8976532743580292, "grad_norm": 0.3825094888818939, "learning_rate": 9.471341928805865e-07, "loss": 0.3992, "step": 7306 }, { "epoch": 0.8977761395748863, "grad_norm": 0.3134793082927644, "learning_rate": 9.448856927824612e-07, "loss": 0.4015, "step": 7307 }, { "epoch": 0.8978990047917434, "grad_norm": 0.373299851176836, "learning_rate": 9.426397780314555e-07, "loss": 0.3825, "step": 7308 }, { "epoch": 0.8980218700086006, "grad_norm": 0.322329745758491, "learning_rate": 9.403964490407041e-07, "loss": 0.3871, "step": 7309 }, { "epoch": 0.8981447352254577, "grad_norm": 0.3644223163856595, "learning_rate": 9.381557062228435e-07, "loss": 0.4142, "step": 7310 }, { "epoch": 0.8982676004423148, "grad_norm": 0.39544175726231756, "learning_rate": 9.359175499900474e-07, "loss": 0.4038, "step": 7311 }, { "epoch": 0.8983904656591719, "grad_norm": 0.2842400110717971, "learning_rate": 9.336819807540081e-07, "loss": 0.2953, "step": 7312 }, { "epoch": 0.898513330876029, "grad_norm": 0.4004449896549713, "learning_rate": 9.31448998925945e-07, "loss": 0.4433, "step": 7313 }, { "epoch": 0.8986361960928861, "grad_norm": 0.3668176650490201, "learning_rate": 9.292186049166029e-07, "loss": 0.388, "step": 7314 }, { "epoch": 0.8987590613097433, "grad_norm": 0.3194025951648376, "learning_rate": 9.269907991362436e-07, "loss": 0.3116, "step": 7315 }, { "epoch": 0.8988819265266004, "grad_norm": 0.3012274809090897, "learning_rate": 9.247655819946609e-07, "loss": 0.3447, "step": 7316 }, { "epoch": 0.8990047917434574, "grad_norm": 0.33017777071720256, "learning_rate": 9.225429539011676e-07, "loss": 0.3756, "step": 7317 }, { "epoch": 0.8991276569603145, "grad_norm": 0.3447394829794491, "learning_rate": 9.203229152646047e-07, "loss": 0.2946, "step": 7318 }, { "epoch": 0.8992505221771716, "grad_norm": 0.4043505406348553, "learning_rate": 9.181054664933291e-07, "loss": 0.432, "step": 7319 }, { "epoch": 0.8993733873940287, "grad_norm": 0.36472554851812905, "learning_rate": 9.158906079952295e-07, "loss": 0.3925, "step": 7320 }, { "epoch": 0.8994962526108858, "grad_norm": 0.36059226560894114, "learning_rate": 9.136783401777165e-07, "loss": 0.4053, "step": 7321 }, { "epoch": 0.899619117827743, "grad_norm": 0.371519486000236, "learning_rate": 9.114686634477165e-07, "loss": 0.4255, "step": 7322 }, { "epoch": 0.8997419830446001, "grad_norm": 0.445610523020455, "learning_rate": 9.092615782116909e-07, "loss": 0.3676, "step": 7323 }, { "epoch": 0.8998648482614572, "grad_norm": 0.4362212867931161, "learning_rate": 9.070570848756116e-07, "loss": 0.4452, "step": 7324 }, { "epoch": 0.8999877134783143, "grad_norm": 0.3565920934876253, "learning_rate": 9.048551838449909e-07, "loss": 0.4001, "step": 7325 }, { "epoch": 0.9001105786951714, "grad_norm": 0.4306646369197742, "learning_rate": 9.026558755248465e-07, "loss": 0.378, "step": 7326 }, { "epoch": 0.9002334439120285, "grad_norm": 0.3226480627284526, "learning_rate": 9.004591603197315e-07, "loss": 0.3417, "step": 7327 }, { "epoch": 0.9003563091288856, "grad_norm": 0.32992214701492983, "learning_rate": 8.98265038633711e-07, "loss": 0.4188, "step": 7328 }, { "epoch": 0.9004791743457428, "grad_norm": 0.431397128998417, "learning_rate": 8.960735108703872e-07, "loss": 0.3939, "step": 7329 }, { "epoch": 0.9006020395625999, "grad_norm": 0.3271122770599181, "learning_rate": 8.938845774328725e-07, "loss": 0.413, "step": 7330 }, { "epoch": 0.9007249047794569, "grad_norm": 0.3805970607018563, "learning_rate": 8.916982387238082e-07, "loss": 0.4292, "step": 7331 }, { "epoch": 0.900847769996314, "grad_norm": 0.3433277216520862, "learning_rate": 8.895144951453593e-07, "loss": 0.3726, "step": 7332 }, { "epoch": 0.9009706352131711, "grad_norm": 0.38515355943093166, "learning_rate": 8.873333470992079e-07, "loss": 0.3438, "step": 7333 }, { "epoch": 0.9010935004300282, "grad_norm": 0.34702750855392905, "learning_rate": 8.851547949865646e-07, "loss": 0.4042, "step": 7334 }, { "epoch": 0.9012163656468853, "grad_norm": 0.7293426439149353, "learning_rate": 8.82978839208154e-07, "loss": 0.5272, "step": 7335 }, { "epoch": 0.9013392308637425, "grad_norm": 0.27405757911922657, "learning_rate": 8.808054801642407e-07, "loss": 0.3165, "step": 7336 }, { "epoch": 0.9014620960805996, "grad_norm": 0.3979012515296896, "learning_rate": 8.786347182545884e-07, "loss": 0.3201, "step": 7337 }, { "epoch": 0.9015849612974567, "grad_norm": 0.38078144206584785, "learning_rate": 8.764665538785028e-07, "loss": 0.3841, "step": 7338 }, { "epoch": 0.9017078265143138, "grad_norm": 0.46329122248743276, "learning_rate": 8.743009874347979e-07, "loss": 0.4113, "step": 7339 }, { "epoch": 0.9018306917311709, "grad_norm": 0.2930325853158022, "learning_rate": 8.72138019321817e-07, "loss": 0.3555, "step": 7340 }, { "epoch": 0.901953556948028, "grad_norm": 0.3917939946894995, "learning_rate": 8.699776499374285e-07, "loss": 0.4141, "step": 7341 }, { "epoch": 0.9020764221648851, "grad_norm": 0.3096144686382187, "learning_rate": 8.678198796790126e-07, "loss": 0.3377, "step": 7342 }, { "epoch": 0.9021992873817423, "grad_norm": 0.49730524653475905, "learning_rate": 8.656647089434788e-07, "loss": 0.4584, "step": 7343 }, { "epoch": 0.9023221525985994, "grad_norm": 0.32369487634268446, "learning_rate": 8.635121381272582e-07, "loss": 0.32, "step": 7344 }, { "epoch": 0.9024450178154565, "grad_norm": 0.29584790450356974, "learning_rate": 8.613621676263023e-07, "loss": 0.3855, "step": 7345 }, { "epoch": 0.9025678830323135, "grad_norm": 0.43379529569179337, "learning_rate": 8.592147978360831e-07, "loss": 0.3981, "step": 7346 }, { "epoch": 0.9026907482491706, "grad_norm": 0.3698321688216795, "learning_rate": 8.570700291515948e-07, "loss": 0.4079, "step": 7347 }, { "epoch": 0.9028136134660277, "grad_norm": 0.34802525474403717, "learning_rate": 8.549278619673534e-07, "loss": 0.3405, "step": 7348 }, { "epoch": 0.9029364786828848, "grad_norm": 0.3654419955527274, "learning_rate": 8.527882966774003e-07, "loss": 0.4084, "step": 7349 }, { "epoch": 0.903059343899742, "grad_norm": 0.3747643546406551, "learning_rate": 8.506513336752908e-07, "loss": 0.3551, "step": 7350 }, { "epoch": 0.9031822091165991, "grad_norm": 0.3680929926115827, "learning_rate": 8.485169733541071e-07, "loss": 0.342, "step": 7351 }, { "epoch": 0.9033050743334562, "grad_norm": 0.4261372824024074, "learning_rate": 8.463852161064517e-07, "loss": 0.4492, "step": 7352 }, { "epoch": 0.9034279395503133, "grad_norm": 0.34276762960061163, "learning_rate": 8.442560623244444e-07, "loss": 0.3647, "step": 7353 }, { "epoch": 0.9035508047671704, "grad_norm": 0.34313475129987786, "learning_rate": 8.421295123997319e-07, "loss": 0.3731, "step": 7354 }, { "epoch": 0.9036736699840275, "grad_norm": 0.2806570799401964, "learning_rate": 8.400055667234779e-07, "loss": 0.3817, "step": 7355 }, { "epoch": 0.9037965352008847, "grad_norm": 0.32453130927752677, "learning_rate": 8.378842256863717e-07, "loss": 0.3173, "step": 7356 }, { "epoch": 0.9039194004177418, "grad_norm": 0.2880448737351318, "learning_rate": 8.357654896786143e-07, "loss": 0.4228, "step": 7357 }, { "epoch": 0.9040422656345989, "grad_norm": 0.4075521606070892, "learning_rate": 8.336493590899391e-07, "loss": 0.3995, "step": 7358 }, { "epoch": 0.904165130851456, "grad_norm": 0.39870406821662935, "learning_rate": 8.31535834309593e-07, "loss": 0.54, "step": 7359 }, { "epoch": 0.9042879960683131, "grad_norm": 0.3535407683575497, "learning_rate": 8.294249157263417e-07, "loss": 0.3962, "step": 7360 }, { "epoch": 0.9044108612851701, "grad_norm": 0.38801844398416285, "learning_rate": 8.273166037284812e-07, "loss": 0.3195, "step": 7361 }, { "epoch": 0.9045337265020272, "grad_norm": 0.3147250056504953, "learning_rate": 8.252108987038131e-07, "loss": 0.4322, "step": 7362 }, { "epoch": 0.9046565917188844, "grad_norm": 0.3254933306513764, "learning_rate": 8.231078010396775e-07, "loss": 0.3195, "step": 7363 }, { "epoch": 0.9047794569357415, "grad_norm": 0.3393081526214455, "learning_rate": 8.210073111229199e-07, "loss": 0.3908, "step": 7364 }, { "epoch": 0.9049023221525986, "grad_norm": 0.34019154704273463, "learning_rate": 8.189094293399163e-07, "loss": 0.4319, "step": 7365 }, { "epoch": 0.9050251873694557, "grad_norm": 0.32131635489093474, "learning_rate": 8.168141560765496e-07, "loss": 0.38, "step": 7366 }, { "epoch": 0.9051480525863128, "grad_norm": 0.43304735307159586, "learning_rate": 8.147214917182433e-07, "loss": 0.3453, "step": 7367 }, { "epoch": 0.9052709178031699, "grad_norm": 0.37407179934146734, "learning_rate": 8.12631436649921e-07, "loss": 0.3797, "step": 7368 }, { "epoch": 0.905393783020027, "grad_norm": 0.3347827069809331, "learning_rate": 8.105439912560403e-07, "loss": 0.3658, "step": 7369 }, { "epoch": 0.9055166482368842, "grad_norm": 0.3413289915149855, "learning_rate": 8.08459155920569e-07, "loss": 0.3839, "step": 7370 }, { "epoch": 0.9056395134537413, "grad_norm": 0.3005378888503539, "learning_rate": 8.063769310270003e-07, "loss": 0.3743, "step": 7371 }, { "epoch": 0.9057623786705984, "grad_norm": 0.39262707551409093, "learning_rate": 8.042973169583479e-07, "loss": 0.4179, "step": 7372 }, { "epoch": 0.9058852438874555, "grad_norm": 0.35497409559805815, "learning_rate": 8.022203140971373e-07, "loss": 0.4093, "step": 7373 }, { "epoch": 0.9060081091043126, "grad_norm": 0.3668689039077085, "learning_rate": 8.001459228254282e-07, "loss": 0.4747, "step": 7374 }, { "epoch": 0.9061309743211696, "grad_norm": 0.3216303386072254, "learning_rate": 7.980741435247851e-07, "loss": 0.3702, "step": 7375 }, { "epoch": 0.9062538395380267, "grad_norm": 0.31672870063449854, "learning_rate": 7.960049765763034e-07, "loss": 0.3087, "step": 7376 }, { "epoch": 0.9063767047548839, "grad_norm": 0.36588915521640997, "learning_rate": 7.939384223605867e-07, "loss": 0.4154, "step": 7377 }, { "epoch": 0.906499569971741, "grad_norm": 0.39407542380309907, "learning_rate": 7.918744812577694e-07, "loss": 0.3866, "step": 7378 }, { "epoch": 0.9066224351885981, "grad_norm": 0.38872738992783834, "learning_rate": 7.898131536474995e-07, "loss": 0.3985, "step": 7379 }, { "epoch": 0.9067453004054552, "grad_norm": 0.3368734175805012, "learning_rate": 7.877544399089421e-07, "loss": 0.3465, "step": 7380 }, { "epoch": 0.9068681656223123, "grad_norm": 0.2934987219399404, "learning_rate": 7.856983404207857e-07, "loss": 0.4267, "step": 7381 }, { "epoch": 0.9069910308391694, "grad_norm": 0.3799050283886535, "learning_rate": 7.836448555612363e-07, "loss": 0.3678, "step": 7382 }, { "epoch": 0.9071138960560265, "grad_norm": 0.33882422317566785, "learning_rate": 7.815939857080218e-07, "loss": 0.3942, "step": 7383 }, { "epoch": 0.9072367612728837, "grad_norm": 0.30524711673054516, "learning_rate": 7.79545731238382e-07, "loss": 0.3517, "step": 7384 }, { "epoch": 0.9073596264897408, "grad_norm": 0.3528475913772838, "learning_rate": 7.775000925290804e-07, "loss": 0.3765, "step": 7385 }, { "epoch": 0.9074824917065979, "grad_norm": 0.3812017892385202, "learning_rate": 7.754570699564028e-07, "loss": 0.379, "step": 7386 }, { "epoch": 0.907605356923455, "grad_norm": 0.33358368348701156, "learning_rate": 7.734166638961488e-07, "loss": 0.3989, "step": 7387 }, { "epoch": 0.9077282221403121, "grad_norm": 0.3416875713998356, "learning_rate": 7.713788747236361e-07, "loss": 0.4844, "step": 7388 }, { "epoch": 0.9078510873571692, "grad_norm": 0.42618879193552967, "learning_rate": 7.693437028137018e-07, "loss": 0.4145, "step": 7389 }, { "epoch": 0.9079739525740262, "grad_norm": 0.36054415080075, "learning_rate": 7.673111485407064e-07, "loss": 0.3148, "step": 7390 }, { "epoch": 0.9080968177908834, "grad_norm": 0.3658812302128237, "learning_rate": 7.652812122785225e-07, "loss": 0.459, "step": 7391 }, { "epoch": 0.9082196830077405, "grad_norm": 0.3152707176806523, "learning_rate": 7.632538944005429e-07, "loss": 0.3341, "step": 7392 }, { "epoch": 0.9083425482245976, "grad_norm": 0.42444493390416904, "learning_rate": 7.612291952796813e-07, "loss": 0.4934, "step": 7393 }, { "epoch": 0.9084654134414547, "grad_norm": 0.39289426144848466, "learning_rate": 7.592071152883695e-07, "loss": 0.3781, "step": 7394 }, { "epoch": 0.9085882786583118, "grad_norm": 0.3415074869155467, "learning_rate": 7.571876547985518e-07, "loss": 0.4321, "step": 7395 }, { "epoch": 0.9087111438751689, "grad_norm": 0.3834372341725786, "learning_rate": 7.551708141816977e-07, "loss": 0.418, "step": 7396 }, { "epoch": 0.908834009092026, "grad_norm": 0.4627745879445355, "learning_rate": 7.531565938087937e-07, "loss": 0.413, "step": 7397 }, { "epoch": 0.9089568743088832, "grad_norm": 0.3450385306075327, "learning_rate": 7.511449940503368e-07, "loss": 0.4581, "step": 7398 }, { "epoch": 0.9090797395257403, "grad_norm": 0.39379800927725245, "learning_rate": 7.491360152763543e-07, "loss": 0.3952, "step": 7399 }, { "epoch": 0.9092026047425974, "grad_norm": 0.3311049333682139, "learning_rate": 7.471296578563774e-07, "loss": 0.3693, "step": 7400 }, { "epoch": 0.9093254699594545, "grad_norm": 0.4208978759319617, "learning_rate": 7.451259221594709e-07, "loss": 0.4078, "step": 7401 }, { "epoch": 0.9094483351763116, "grad_norm": 0.35793602439320105, "learning_rate": 7.431248085542031e-07, "loss": 0.3357, "step": 7402 }, { "epoch": 0.9095712003931687, "grad_norm": 0.31785859542537137, "learning_rate": 7.411263174086696e-07, "loss": 0.3743, "step": 7403 }, { "epoch": 0.9096940656100257, "grad_norm": 0.32027585451688456, "learning_rate": 7.391304490904732e-07, "loss": 0.4154, "step": 7404 }, { "epoch": 0.9098169308268829, "grad_norm": 0.3208596297380902, "learning_rate": 7.371372039667518e-07, "loss": 0.3744, "step": 7405 }, { "epoch": 0.90993979604374, "grad_norm": 0.3753051173071001, "learning_rate": 7.351465824041403e-07, "loss": 0.3972, "step": 7406 }, { "epoch": 0.9100626612605971, "grad_norm": 0.40066048539176996, "learning_rate": 7.33158584768806e-07, "loss": 0.3072, "step": 7407 }, { "epoch": 0.9101855264774542, "grad_norm": 0.3576680197424805, "learning_rate": 7.311732114264247e-07, "loss": 0.391, "step": 7408 }, { "epoch": 0.9103083916943113, "grad_norm": 0.2710224941738866, "learning_rate": 7.291904627421942e-07, "loss": 0.4234, "step": 7409 }, { "epoch": 0.9104312569111684, "grad_norm": 0.332831341460214, "learning_rate": 7.27210339080831e-07, "loss": 0.3761, "step": 7410 }, { "epoch": 0.9105541221280256, "grad_norm": 0.3564162572897687, "learning_rate": 7.252328408065606e-07, "loss": 0.3413, "step": 7411 }, { "epoch": 0.9106769873448827, "grad_norm": 0.2651566296417407, "learning_rate": 7.232579682831353e-07, "loss": 0.4033, "step": 7412 }, { "epoch": 0.9107998525617398, "grad_norm": 0.35462519693560657, "learning_rate": 7.212857218738178e-07, "loss": 0.4449, "step": 7413 }, { "epoch": 0.9109227177785969, "grad_norm": 0.3048665896838259, "learning_rate": 7.193161019413946e-07, "loss": 0.3405, "step": 7414 }, { "epoch": 0.911045582995454, "grad_norm": 0.3000027681206265, "learning_rate": 7.173491088481576e-07, "loss": 0.4172, "step": 7415 }, { "epoch": 0.9111684482123111, "grad_norm": 0.3409185507157316, "learning_rate": 7.153847429559257e-07, "loss": 0.3313, "step": 7416 }, { "epoch": 0.9112913134291682, "grad_norm": 0.28495555683852103, "learning_rate": 7.134230046260348e-07, "loss": 0.4024, "step": 7417 }, { "epoch": 0.9114141786460254, "grad_norm": 0.27001796213254586, "learning_rate": 7.114638942193264e-07, "loss": 0.4062, "step": 7418 }, { "epoch": 0.9115370438628824, "grad_norm": 0.37417282938634944, "learning_rate": 7.09507412096172e-07, "loss": 0.3574, "step": 7419 }, { "epoch": 0.9116599090797395, "grad_norm": 0.3274896393293924, "learning_rate": 7.075535586164506e-07, "loss": 0.3886, "step": 7420 }, { "epoch": 0.9117827742965966, "grad_norm": 0.39191130128578733, "learning_rate": 7.056023341395662e-07, "loss": 0.3669, "step": 7421 }, { "epoch": 0.9119056395134537, "grad_norm": 0.40852885818177925, "learning_rate": 7.036537390244269e-07, "loss": 0.3163, "step": 7422 }, { "epoch": 0.9120285047303108, "grad_norm": 0.2760491104655002, "learning_rate": 7.017077736294675e-07, "loss": 0.3876, "step": 7423 }, { "epoch": 0.912151369947168, "grad_norm": 0.3469558610915363, "learning_rate": 6.997644383126367e-07, "loss": 0.3747, "step": 7424 }, { "epoch": 0.9122742351640251, "grad_norm": 0.3042196944210611, "learning_rate": 6.978237334313953e-07, "loss": 0.3354, "step": 7425 }, { "epoch": 0.9123971003808822, "grad_norm": 0.47371042981305944, "learning_rate": 6.958856593427277e-07, "loss": 0.3959, "step": 7426 }, { "epoch": 0.9125199655977393, "grad_norm": 0.43878469814229587, "learning_rate": 6.939502164031236e-07, "loss": 0.3469, "step": 7427 }, { "epoch": 0.9126428308145964, "grad_norm": 0.39616445480958606, "learning_rate": 6.920174049686035e-07, "loss": 0.4716, "step": 7428 }, { "epoch": 0.9127656960314535, "grad_norm": 0.3393486462587929, "learning_rate": 6.900872253946894e-07, "loss": 0.3762, "step": 7429 }, { "epoch": 0.9128885612483106, "grad_norm": 0.30206201027317336, "learning_rate": 6.881596780364291e-07, "loss": 0.4067, "step": 7430 }, { "epoch": 0.9130114264651678, "grad_norm": 0.320652414400586, "learning_rate": 6.862347632483757e-07, "loss": 0.3919, "step": 7431 }, { "epoch": 0.9131342916820249, "grad_norm": 0.37053500855737403, "learning_rate": 6.843124813846141e-07, "loss": 0.4276, "step": 7432 }, { "epoch": 0.9132571568988819, "grad_norm": 0.29338900534907336, "learning_rate": 6.823928327987283e-07, "loss": 0.3922, "step": 7433 }, { "epoch": 0.913380022115739, "grad_norm": 0.41181707539067136, "learning_rate": 6.804758178438309e-07, "loss": 0.405, "step": 7434 }, { "epoch": 0.9135028873325961, "grad_norm": 0.4279174430652024, "learning_rate": 6.785614368725396e-07, "loss": 0.3664, "step": 7435 }, { "epoch": 0.9136257525494532, "grad_norm": 0.32946032386265034, "learning_rate": 6.766496902369929e-07, "loss": 0.4345, "step": 7436 }, { "epoch": 0.9137486177663103, "grad_norm": 0.3962927413604802, "learning_rate": 6.747405782888478e-07, "loss": 0.4568, "step": 7437 }, { "epoch": 0.9138714829831674, "grad_norm": 0.46656913596384525, "learning_rate": 6.728341013792683e-07, "loss": 0.4188, "step": 7438 }, { "epoch": 0.9139943482000246, "grad_norm": 0.352907092773423, "learning_rate": 6.70930259858944e-07, "loss": 0.3742, "step": 7439 }, { "epoch": 0.9141172134168817, "grad_norm": 0.33762909651504186, "learning_rate": 6.690290540780681e-07, "loss": 0.3416, "step": 7440 }, { "epoch": 0.9142400786337388, "grad_norm": 0.4568166688335859, "learning_rate": 6.671304843863607e-07, "loss": 0.3529, "step": 7441 }, { "epoch": 0.9143629438505959, "grad_norm": 0.3405519583663218, "learning_rate": 6.652345511330477e-07, "loss": 0.3841, "step": 7442 }, { "epoch": 0.914485809067453, "grad_norm": 0.3665578077124081, "learning_rate": 6.633412546668733e-07, "loss": 0.3902, "step": 7443 }, { "epoch": 0.9146086742843101, "grad_norm": 0.34707708453460867, "learning_rate": 6.614505953361022e-07, "loss": 0.4259, "step": 7444 }, { "epoch": 0.9147315395011673, "grad_norm": 0.43974515209382414, "learning_rate": 6.59562573488503e-07, "loss": 0.3635, "step": 7445 }, { "epoch": 0.9148544047180244, "grad_norm": 0.3554978717354057, "learning_rate": 6.576771894713662e-07, "loss": 0.3084, "step": 7446 }, { "epoch": 0.9149772699348815, "grad_norm": 0.3702100864989216, "learning_rate": 6.557944436314978e-07, "loss": 0.4188, "step": 7447 }, { "epoch": 0.9151001351517385, "grad_norm": 0.382881857532258, "learning_rate": 6.539143363152189e-07, "loss": 0.3409, "step": 7448 }, { "epoch": 0.9152230003685956, "grad_norm": 0.4046539595796864, "learning_rate": 6.52036867868358e-07, "loss": 0.4453, "step": 7449 }, { "epoch": 0.9153458655854527, "grad_norm": 0.3348104812305888, "learning_rate": 6.501620386362639e-07, "loss": 0.4092, "step": 7450 }, { "epoch": 0.9154687308023098, "grad_norm": 0.3578379470011246, "learning_rate": 6.482898489638023e-07, "loss": 0.3579, "step": 7451 }, { "epoch": 0.915591596019167, "grad_norm": 0.37856594255490766, "learning_rate": 6.46420299195351e-07, "loss": 0.3843, "step": 7452 }, { "epoch": 0.9157144612360241, "grad_norm": 0.41646001127566235, "learning_rate": 6.445533896747968e-07, "loss": 0.405, "step": 7453 }, { "epoch": 0.9158373264528812, "grad_norm": 0.34016799010567367, "learning_rate": 6.426891207455482e-07, "loss": 0.3749, "step": 7454 }, { "epoch": 0.9159601916697383, "grad_norm": 0.3313346826772399, "learning_rate": 6.408274927505276e-07, "loss": 0.3507, "step": 7455 }, { "epoch": 0.9160830568865954, "grad_norm": 0.3663296475437425, "learning_rate": 6.389685060321643e-07, "loss": 0.4077, "step": 7456 }, { "epoch": 0.9162059221034525, "grad_norm": 0.36142347539235364, "learning_rate": 6.371121609324115e-07, "loss": 0.3939, "step": 7457 }, { "epoch": 0.9163287873203096, "grad_norm": 0.3145657888136112, "learning_rate": 6.352584577927278e-07, "loss": 0.3608, "step": 7458 }, { "epoch": 0.9164516525371668, "grad_norm": 0.4648943779872298, "learning_rate": 6.334073969540955e-07, "loss": 0.445, "step": 7459 }, { "epoch": 0.9165745177540239, "grad_norm": 0.3032736946324236, "learning_rate": 6.315589787570003e-07, "loss": 0.3604, "step": 7460 }, { "epoch": 0.916697382970881, "grad_norm": 0.27230417144993896, "learning_rate": 6.297132035414488e-07, "loss": 0.2923, "step": 7461 }, { "epoch": 0.916820248187738, "grad_norm": 0.4196666678884968, "learning_rate": 6.278700716469593e-07, "loss": 0.4048, "step": 7462 }, { "epoch": 0.9169431134045951, "grad_norm": 0.3328517763766689, "learning_rate": 6.260295834125623e-07, "loss": 0.3569, "step": 7463 }, { "epoch": 0.9170659786214522, "grad_norm": 0.3859037784905835, "learning_rate": 6.241917391768071e-07, "loss": 0.3963, "step": 7464 }, { "epoch": 0.9171888438383093, "grad_norm": 0.39622786024565243, "learning_rate": 6.223565392777481e-07, "loss": 0.3961, "step": 7465 }, { "epoch": 0.9173117090551665, "grad_norm": 0.35516601111451773, "learning_rate": 6.205239840529636e-07, "loss": 0.3726, "step": 7466 }, { "epoch": 0.9174345742720236, "grad_norm": 0.3120620264494894, "learning_rate": 6.186940738395374e-07, "loss": 0.302, "step": 7467 }, { "epoch": 0.9175574394888807, "grad_norm": 0.3397324803726025, "learning_rate": 6.16866808974072e-07, "loss": 0.4457, "step": 7468 }, { "epoch": 0.9176803047057378, "grad_norm": 0.3396141293453403, "learning_rate": 6.15042189792675e-07, "loss": 0.3215, "step": 7469 }, { "epoch": 0.9178031699225949, "grad_norm": 0.3634357464576, "learning_rate": 6.132202166309814e-07, "loss": 0.3952, "step": 7470 }, { "epoch": 0.917926035139452, "grad_norm": 0.35402400008631885, "learning_rate": 6.114008898241247e-07, "loss": 0.3003, "step": 7471 }, { "epoch": 0.9180489003563091, "grad_norm": 0.3354170031043555, "learning_rate": 6.095842097067639e-07, "loss": 0.406, "step": 7472 }, { "epoch": 0.9181717655731663, "grad_norm": 0.3757880901931311, "learning_rate": 6.0777017661306e-07, "loss": 0.3591, "step": 7473 }, { "epoch": 0.9182946307900234, "grad_norm": 0.29131794718862336, "learning_rate": 6.059587908766962e-07, "loss": 0.3551, "step": 7474 }, { "epoch": 0.9184174960068805, "grad_norm": 0.4173575062442887, "learning_rate": 6.041500528308641e-07, "loss": 0.4351, "step": 7475 }, { "epoch": 0.9185403612237376, "grad_norm": 0.43198101970973535, "learning_rate": 6.023439628082694e-07, "loss": 0.3846, "step": 7476 }, { "epoch": 0.9186632264405946, "grad_norm": 0.3554700455051469, "learning_rate": 6.005405211411297e-07, "loss": 0.3852, "step": 7477 }, { "epoch": 0.9187860916574517, "grad_norm": 0.4082382131300281, "learning_rate": 5.987397281611779e-07, "loss": 0.3878, "step": 7478 }, { "epoch": 0.9189089568743088, "grad_norm": 0.34733643445092477, "learning_rate": 5.969415841996606e-07, "loss": 0.4759, "step": 7479 }, { "epoch": 0.919031822091166, "grad_norm": 0.3711502805632518, "learning_rate": 5.951460895873284e-07, "loss": 0.4731, "step": 7480 }, { "epoch": 0.9191546873080231, "grad_norm": 0.3672057420378559, "learning_rate": 5.933532446544538e-07, "loss": 0.3896, "step": 7481 }, { "epoch": 0.9192775525248802, "grad_norm": 0.4111664945003129, "learning_rate": 5.915630497308228e-07, "loss": 0.3979, "step": 7482 }, { "epoch": 0.9194004177417373, "grad_norm": 0.35613424026431295, "learning_rate": 5.897755051457238e-07, "loss": 0.406, "step": 7483 }, { "epoch": 0.9195232829585944, "grad_norm": 0.3379225010652371, "learning_rate": 5.87990611227967e-07, "loss": 0.3492, "step": 7484 }, { "epoch": 0.9196461481754515, "grad_norm": 0.3222999110382658, "learning_rate": 5.862083683058733e-07, "loss": 0.4066, "step": 7485 }, { "epoch": 0.9197690133923087, "grad_norm": 0.41737896386464396, "learning_rate": 5.844287767072753e-07, "loss": 0.4364, "step": 7486 }, { "epoch": 0.9198918786091658, "grad_norm": 0.4014971911227219, "learning_rate": 5.82651836759513e-07, "loss": 0.419, "step": 7487 }, { "epoch": 0.9200147438260229, "grad_norm": 0.3438071714523257, "learning_rate": 5.808775487894447e-07, "loss": 0.3717, "step": 7488 }, { "epoch": 0.92013760904288, "grad_norm": 0.3365655618890277, "learning_rate": 5.791059131234411e-07, "loss": 0.3439, "step": 7489 }, { "epoch": 0.9202604742597371, "grad_norm": 0.3080204474432808, "learning_rate": 5.773369300873849e-07, "loss": 0.4289, "step": 7490 }, { "epoch": 0.9203833394765942, "grad_norm": 0.3100673948198445, "learning_rate": 5.755706000066624e-07, "loss": 0.305, "step": 7491 }, { "epoch": 0.9205062046934512, "grad_norm": 0.34830832355073693, "learning_rate": 5.738069232061837e-07, "loss": 0.3044, "step": 7492 }, { "epoch": 0.9206290699103084, "grad_norm": 0.42379847915027424, "learning_rate": 5.720459000103644e-07, "loss": 0.4029, "step": 7493 }, { "epoch": 0.9207519351271655, "grad_norm": 0.31735841859583347, "learning_rate": 5.702875307431321e-07, "loss": 0.4112, "step": 7494 }, { "epoch": 0.9208748003440226, "grad_norm": 0.3608584864840073, "learning_rate": 5.685318157279313e-07, "loss": 0.3971, "step": 7495 }, { "epoch": 0.9209976655608797, "grad_norm": 0.3771598427941731, "learning_rate": 5.667787552877085e-07, "loss": 0.3582, "step": 7496 }, { "epoch": 0.9211205307777368, "grad_norm": 0.4236531636248736, "learning_rate": 5.650283497449327e-07, "loss": 0.4029, "step": 7497 }, { "epoch": 0.9212433959945939, "grad_norm": 0.3812078528497457, "learning_rate": 5.632805994215761e-07, "loss": 0.3824, "step": 7498 }, { "epoch": 0.921366261211451, "grad_norm": 0.3700396860846505, "learning_rate": 5.615355046391302e-07, "loss": 0.443, "step": 7499 }, { "epoch": 0.9214891264283082, "grad_norm": 0.2966601375933047, "learning_rate": 5.597930657185913e-07, "loss": 0.3906, "step": 7500 }, { "epoch": 0.9216119916451653, "grad_norm": 0.3137289711889713, "learning_rate": 5.58053282980468e-07, "loss": 0.3602, "step": 7501 }, { "epoch": 0.9217348568620224, "grad_norm": 0.30209755683262035, "learning_rate": 5.56316156744786e-07, "loss": 0.3167, "step": 7502 }, { "epoch": 0.9218577220788795, "grad_norm": 0.3941589396186521, "learning_rate": 5.545816873310733e-07, "loss": 0.365, "step": 7503 }, { "epoch": 0.9219805872957366, "grad_norm": 0.41136978264181995, "learning_rate": 5.52849875058381e-07, "loss": 0.4064, "step": 7504 }, { "epoch": 0.9221034525125937, "grad_norm": 0.3711493729927164, "learning_rate": 5.511207202452595e-07, "loss": 0.4304, "step": 7505 }, { "epoch": 0.9222263177294507, "grad_norm": 0.3138023341231059, "learning_rate": 5.493942232097792e-07, "loss": 0.4013, "step": 7506 }, { "epoch": 0.9223491829463079, "grad_norm": 0.38391354977393405, "learning_rate": 5.476703842695114e-07, "loss": 0.3859, "step": 7507 }, { "epoch": 0.922472048163165, "grad_norm": 0.33566819030634787, "learning_rate": 5.459492037415536e-07, "loss": 0.3842, "step": 7508 }, { "epoch": 0.9225949133800221, "grad_norm": 0.3547058570445526, "learning_rate": 5.442306819425013e-07, "loss": 0.3219, "step": 7509 }, { "epoch": 0.9227177785968792, "grad_norm": 0.3564233893229935, "learning_rate": 5.425148191884666e-07, "loss": 0.3848, "step": 7510 }, { "epoch": 0.9228406438137363, "grad_norm": 0.4972708196157778, "learning_rate": 5.408016157950701e-07, "loss": 0.4081, "step": 7511 }, { "epoch": 0.9229635090305934, "grad_norm": 0.31127158907460306, "learning_rate": 5.390910720774433e-07, "loss": 0.4078, "step": 7512 }, { "epoch": 0.9230863742474505, "grad_norm": 0.3069032408557467, "learning_rate": 5.373831883502345e-07, "loss": 0.2904, "step": 7513 }, { "epoch": 0.9232092394643077, "grad_norm": 0.3187896454700851, "learning_rate": 5.35677964927594e-07, "loss": 0.3709, "step": 7514 }, { "epoch": 0.9233321046811648, "grad_norm": 0.37071449313603283, "learning_rate": 5.339754021231857e-07, "loss": 0.3201, "step": 7515 }, { "epoch": 0.9234549698980219, "grad_norm": 0.45566904882489345, "learning_rate": 5.322755002501878e-07, "loss": 0.4303, "step": 7516 }, { "epoch": 0.923577835114879, "grad_norm": 0.3204728680170821, "learning_rate": 5.305782596212866e-07, "loss": 0.3912, "step": 7517 }, { "epoch": 0.9237007003317361, "grad_norm": 0.4348579874495453, "learning_rate": 5.288836805486758e-07, "loss": 0.4594, "step": 7518 }, { "epoch": 0.9238235655485932, "grad_norm": 0.36776767594473037, "learning_rate": 5.271917633440627e-07, "loss": 0.3256, "step": 7519 }, { "epoch": 0.9239464307654504, "grad_norm": 0.34315369677396296, "learning_rate": 5.255025083186682e-07, "loss": 0.3464, "step": 7520 }, { "epoch": 0.9240692959823074, "grad_norm": 0.3883263165753183, "learning_rate": 5.23815915783214e-07, "loss": 0.3967, "step": 7521 }, { "epoch": 0.9241921611991645, "grad_norm": 0.3334721847475254, "learning_rate": 5.221319860479401e-07, "loss": 0.3951, "step": 7522 }, { "epoch": 0.9243150264160216, "grad_norm": 0.3221119557597026, "learning_rate": 5.204507194225971e-07, "loss": 0.3985, "step": 7523 }, { "epoch": 0.9244378916328787, "grad_norm": 0.35267696763861395, "learning_rate": 5.18772116216441e-07, "loss": 0.389, "step": 7524 }, { "epoch": 0.9245607568497358, "grad_norm": 0.40780371568434415, "learning_rate": 5.170961767382398e-07, "loss": 0.3819, "step": 7525 }, { "epoch": 0.9246836220665929, "grad_norm": 0.3315729228256489, "learning_rate": 5.154229012962702e-07, "loss": 0.3719, "step": 7526 }, { "epoch": 0.92480648728345, "grad_norm": 0.3467122335930865, "learning_rate": 5.137522901983244e-07, "loss": 0.3758, "step": 7527 }, { "epoch": 0.9249293525003072, "grad_norm": 0.3402452455950554, "learning_rate": 5.120843437516981e-07, "loss": 0.3444, "step": 7528 }, { "epoch": 0.9250522177171643, "grad_norm": 0.3297637633453587, "learning_rate": 5.104190622631977e-07, "loss": 0.4159, "step": 7529 }, { "epoch": 0.9251750829340214, "grad_norm": 0.42875023769289716, "learning_rate": 5.087564460391431e-07, "loss": 0.3549, "step": 7530 }, { "epoch": 0.9252979481508785, "grad_norm": 0.3621768961216368, "learning_rate": 5.070964953853629e-07, "loss": 0.3113, "step": 7531 }, { "epoch": 0.9254208133677356, "grad_norm": 0.3479564203589565, "learning_rate": 5.054392106071914e-07, "loss": 0.3918, "step": 7532 }, { "epoch": 0.9255436785845927, "grad_norm": 0.3771676450002592, "learning_rate": 5.03784592009478e-07, "loss": 0.3716, "step": 7533 }, { "epoch": 0.9256665438014499, "grad_norm": 0.3529106823022096, "learning_rate": 5.021326398965742e-07, "loss": 0.4403, "step": 7534 }, { "epoch": 0.9257894090183069, "grad_norm": 0.37734805881030264, "learning_rate": 5.004833545723519e-07, "loss": 0.44, "step": 7535 }, { "epoch": 0.925912274235164, "grad_norm": 0.31761004198232934, "learning_rate": 4.988367363401835e-07, "loss": 0.4293, "step": 7536 }, { "epoch": 0.9260351394520211, "grad_norm": 0.3477028954554739, "learning_rate": 4.971927855029551e-07, "loss": 0.4041, "step": 7537 }, { "epoch": 0.9261580046688782, "grad_norm": 0.37223277368573926, "learning_rate": 4.95551502363058e-07, "loss": 0.4206, "step": 7538 }, { "epoch": 0.9262808698857353, "grad_norm": 0.36327102637586783, "learning_rate": 4.939128872223975e-07, "loss": 0.4921, "step": 7539 }, { "epoch": 0.9264037351025924, "grad_norm": 0.34648668583054143, "learning_rate": 4.922769403823873e-07, "loss": 0.3356, "step": 7540 }, { "epoch": 0.9265266003194496, "grad_norm": 0.35307229935430195, "learning_rate": 4.90643662143947e-07, "loss": 0.3502, "step": 7541 }, { "epoch": 0.9266494655363067, "grad_norm": 0.2914797470943421, "learning_rate": 4.890130528075093e-07, "loss": 0.3224, "step": 7542 }, { "epoch": 0.9267723307531638, "grad_norm": 0.45335483524039016, "learning_rate": 4.873851126730128e-07, "loss": 0.4477, "step": 7543 }, { "epoch": 0.9268951959700209, "grad_norm": 0.330320839440217, "learning_rate": 4.857598420399078e-07, "loss": 0.4388, "step": 7544 }, { "epoch": 0.927018061186878, "grad_norm": 0.35036654994574573, "learning_rate": 4.841372412071504e-07, "loss": 0.3713, "step": 7545 }, { "epoch": 0.9271409264037351, "grad_norm": 0.40531841555156034, "learning_rate": 4.8251731047321e-07, "loss": 0.4776, "step": 7546 }, { "epoch": 0.9272637916205922, "grad_norm": 0.34165285238274407, "learning_rate": 4.809000501360616e-07, "loss": 0.374, "step": 7547 }, { "epoch": 0.9273866568374494, "grad_norm": 0.4036618148553662, "learning_rate": 4.79285460493189e-07, "loss": 0.3703, "step": 7548 }, { "epoch": 0.9275095220543065, "grad_norm": 0.3323662387632746, "learning_rate": 4.776735418415846e-07, "loss": 0.4128, "step": 7549 }, { "epoch": 0.9276323872711635, "grad_norm": 0.34039189695216066, "learning_rate": 4.760642944777527e-07, "loss": 0.4503, "step": 7550 }, { "epoch": 0.9277552524880206, "grad_norm": 0.33284278248316224, "learning_rate": 4.744577186977034e-07, "loss": 0.3433, "step": 7551 }, { "epoch": 0.9278781177048777, "grad_norm": 0.3726573519910246, "learning_rate": 4.728538147969536e-07, "loss": 0.3133, "step": 7552 }, { "epoch": 0.9280009829217348, "grad_norm": 0.27540722644235244, "learning_rate": 4.7125258307053385e-07, "loss": 0.3637, "step": 7553 }, { "epoch": 0.928123848138592, "grad_norm": 0.2878291911068937, "learning_rate": 4.6965402381297874e-07, "loss": 0.3388, "step": 7554 }, { "epoch": 0.9282467133554491, "grad_norm": 0.30964848163508335, "learning_rate": 4.6805813731833456e-07, "loss": 0.3592, "step": 7555 }, { "epoch": 0.9283695785723062, "grad_norm": 0.4692787513154242, "learning_rate": 4.664649238801516e-07, "loss": 0.4617, "step": 7556 }, { "epoch": 0.9284924437891633, "grad_norm": 0.3444255589480403, "learning_rate": 4.6487438379149207e-07, "loss": 0.3735, "step": 7557 }, { "epoch": 0.9286153090060204, "grad_norm": 0.37438476305523877, "learning_rate": 4.632865173449285e-07, "loss": 0.4484, "step": 7558 }, { "epoch": 0.9287381742228775, "grad_norm": 0.43281849878768525, "learning_rate": 4.617013248325341e-07, "loss": 0.3616, "step": 7559 }, { "epoch": 0.9288610394397346, "grad_norm": 0.40908159042069236, "learning_rate": 4.601188065458989e-07, "loss": 0.3885, "step": 7560 }, { "epoch": 0.9289839046565918, "grad_norm": 0.3404733466233858, "learning_rate": 4.5853896277610995e-07, "loss": 0.349, "step": 7561 }, { "epoch": 0.9291067698734489, "grad_norm": 0.34086434654363795, "learning_rate": 4.569617938137799e-07, "loss": 0.4131, "step": 7562 }, { "epoch": 0.929229635090306, "grad_norm": 0.3375182293844212, "learning_rate": 4.5538729994900994e-07, "loss": 0.4127, "step": 7563 }, { "epoch": 0.929352500307163, "grad_norm": 0.4274689704661208, "learning_rate": 4.5381548147142015e-07, "loss": 0.442, "step": 7564 }, { "epoch": 0.9294753655240201, "grad_norm": 0.2958680580127411, "learning_rate": 4.5224633867014086e-07, "loss": 0.3763, "step": 7565 }, { "epoch": 0.9295982307408772, "grad_norm": 0.38544462452607114, "learning_rate": 4.5067987183379956e-07, "loss": 0.3841, "step": 7566 }, { "epoch": 0.9297210959577343, "grad_norm": 0.31196400099632005, "learning_rate": 4.491160812505407e-07, "loss": 0.3788, "step": 7567 }, { "epoch": 0.9298439611745914, "grad_norm": 0.3749834357271043, "learning_rate": 4.4755496720801094e-07, "loss": 0.434, "step": 7568 }, { "epoch": 0.9299668263914486, "grad_norm": 0.3746080889722457, "learning_rate": 4.4599652999337213e-07, "loss": 0.4114, "step": 7569 }, { "epoch": 0.9300896916083057, "grad_norm": 0.3742384595118698, "learning_rate": 4.444407698932834e-07, "loss": 0.3936, "step": 7570 }, { "epoch": 0.9302125568251628, "grad_norm": 0.3951058991321585, "learning_rate": 4.428876871939208e-07, "loss": 0.3402, "step": 7571 }, { "epoch": 0.9303354220420199, "grad_norm": 0.41034379430391893, "learning_rate": 4.4133728218095916e-07, "loss": 0.4242, "step": 7572 }, { "epoch": 0.930458287258877, "grad_norm": 0.342136568039731, "learning_rate": 4.3978955513959195e-07, "loss": 0.3596, "step": 7573 }, { "epoch": 0.9305811524757341, "grad_norm": 0.3822736378525564, "learning_rate": 4.382445063545065e-07, "loss": 0.3907, "step": 7574 }, { "epoch": 0.9307040176925913, "grad_norm": 0.29923757975098514, "learning_rate": 4.367021361099105e-07, "loss": 0.3004, "step": 7575 }, { "epoch": 0.9308268829094484, "grad_norm": 0.40715485174416244, "learning_rate": 4.351624446895086e-07, "loss": 0.4624, "step": 7576 }, { "epoch": 0.9309497481263055, "grad_norm": 0.3621539792215637, "learning_rate": 4.336254323765193e-07, "loss": 0.3197, "step": 7577 }, { "epoch": 0.9310726133431626, "grad_norm": 0.2894609175674602, "learning_rate": 4.320910994536664e-07, "loss": 0.3799, "step": 7578 }, { "epoch": 0.9311954785600196, "grad_norm": 0.3536478061018946, "learning_rate": 4.3055944620317754e-07, "loss": 0.3387, "step": 7579 }, { "epoch": 0.9313183437768767, "grad_norm": 0.35813092155829496, "learning_rate": 4.2903047290679233e-07, "loss": 0.3369, "step": 7580 }, { "epoch": 0.9314412089937338, "grad_norm": 0.2963607041470913, "learning_rate": 4.2750417984575573e-07, "loss": 0.4132, "step": 7581 }, { "epoch": 0.931564074210591, "grad_norm": 0.3655853617342909, "learning_rate": 4.259805673008216e-07, "loss": 0.3621, "step": 7582 }, { "epoch": 0.9316869394274481, "grad_norm": 0.42982449220363317, "learning_rate": 4.2445963555224396e-07, "loss": 0.41, "step": 7583 }, { "epoch": 0.9318098046443052, "grad_norm": 0.4236279748910196, "learning_rate": 4.2294138487979083e-07, "loss": 0.4302, "step": 7584 }, { "epoch": 0.9319326698611623, "grad_norm": 0.4032282506662662, "learning_rate": 4.214258155627371e-07, "loss": 0.376, "step": 7585 }, { "epoch": 0.9320555350780194, "grad_norm": 0.39142119634627465, "learning_rate": 4.1991292787985636e-07, "loss": 0.4224, "step": 7586 }, { "epoch": 0.9321784002948765, "grad_norm": 0.302729702544417, "learning_rate": 4.1840272210943773e-07, "loss": 0.3418, "step": 7587 }, { "epoch": 0.9323012655117336, "grad_norm": 0.3061927837033933, "learning_rate": 4.168951985292724e-07, "loss": 0.42, "step": 7588 }, { "epoch": 0.9324241307285908, "grad_norm": 0.3186114480708992, "learning_rate": 4.1539035741666344e-07, "loss": 0.4259, "step": 7589 }, { "epoch": 0.9325469959454479, "grad_norm": 0.3440046655291542, "learning_rate": 4.1388819904841115e-07, "loss": 0.403, "step": 7590 }, { "epoch": 0.932669861162305, "grad_norm": 0.3050157440466286, "learning_rate": 4.123887237008311e-07, "loss": 0.443, "step": 7591 }, { "epoch": 0.9327927263791621, "grad_norm": 0.3492985250845149, "learning_rate": 4.1089193164974115e-07, "loss": 0.4541, "step": 7592 }, { "epoch": 0.9329155915960192, "grad_norm": 0.3194567679798791, "learning_rate": 4.0939782317046924e-07, "loss": 0.3112, "step": 7593 }, { "epoch": 0.9330384568128762, "grad_norm": 0.3726441102663276, "learning_rate": 4.0790639853784227e-07, "loss": 0.4051, "step": 7594 }, { "epoch": 0.9331613220297333, "grad_norm": 0.40736267913997926, "learning_rate": 4.0641765802619914e-07, "loss": 0.3619, "step": 7595 }, { "epoch": 0.9332841872465905, "grad_norm": 0.34796068775096217, "learning_rate": 4.049316019093874e-07, "loss": 0.3104, "step": 7596 }, { "epoch": 0.9334070524634476, "grad_norm": 0.3496979714138192, "learning_rate": 4.0344823046075343e-07, "loss": 0.3438, "step": 7597 }, { "epoch": 0.9335299176803047, "grad_norm": 0.3231993684810767, "learning_rate": 4.0196754395315726e-07, "loss": 0.346, "step": 7598 }, { "epoch": 0.9336527828971618, "grad_norm": 0.35088126750631815, "learning_rate": 4.0048954265895774e-07, "loss": 0.3801, "step": 7599 }, { "epoch": 0.9337756481140189, "grad_norm": 0.3651913667805557, "learning_rate": 3.990142268500274e-07, "loss": 0.3111, "step": 7600 }, { "epoch": 0.933898513330876, "grad_norm": 0.3214377811128702, "learning_rate": 3.975415967977375e-07, "loss": 0.3396, "step": 7601 }, { "epoch": 0.9340213785477331, "grad_norm": 0.3361311847379927, "learning_rate": 3.96071652772973e-07, "loss": 0.4416, "step": 7602 }, { "epoch": 0.9341442437645903, "grad_norm": 0.5795758692550957, "learning_rate": 3.9460439504611587e-07, "loss": 0.4735, "step": 7603 }, { "epoch": 0.9342671089814474, "grad_norm": 0.470173930047587, "learning_rate": 3.9313982388706206e-07, "loss": 0.5418, "step": 7604 }, { "epoch": 0.9343899741983045, "grad_norm": 0.3784562479037715, "learning_rate": 3.9167793956520927e-07, "loss": 0.4561, "step": 7605 }, { "epoch": 0.9345128394151616, "grad_norm": 0.3436137764260755, "learning_rate": 3.902187423494591e-07, "loss": 0.3427, "step": 7606 }, { "epoch": 0.9346357046320187, "grad_norm": 0.34686616600458875, "learning_rate": 3.8876223250822516e-07, "loss": 0.3775, "step": 7607 }, { "epoch": 0.9347585698488757, "grad_norm": 0.29425551952178847, "learning_rate": 3.8730841030942155e-07, "loss": 0.3281, "step": 7608 }, { "epoch": 0.9348814350657328, "grad_norm": 0.33843825900064534, "learning_rate": 3.858572760204693e-07, "loss": 0.421, "step": 7609 }, { "epoch": 0.93500430028259, "grad_norm": 0.3242583042358868, "learning_rate": 3.844088299082932e-07, "loss": 0.3901, "step": 7610 }, { "epoch": 0.9351271654994471, "grad_norm": 0.3552713257535368, "learning_rate": 3.829630722393301e-07, "loss": 0.4187, "step": 7611 }, { "epoch": 0.9352500307163042, "grad_norm": 0.40996029164130865, "learning_rate": 3.815200032795141e-07, "loss": 0.4465, "step": 7612 }, { "epoch": 0.9353728959331613, "grad_norm": 0.4029287471175372, "learning_rate": 3.800796232942894e-07, "loss": 0.4493, "step": 7613 }, { "epoch": 0.9354957611500184, "grad_norm": 0.35145773494204147, "learning_rate": 3.78641932548604e-07, "loss": 0.3398, "step": 7614 }, { "epoch": 0.9356186263668755, "grad_norm": 0.35274780312278625, "learning_rate": 3.7720693130691155e-07, "loss": 0.4178, "step": 7615 }, { "epoch": 0.9357414915837327, "grad_norm": 0.3078863498697365, "learning_rate": 3.7577461983317407e-07, "loss": 0.3617, "step": 7616 }, { "epoch": 0.9358643568005898, "grad_norm": 0.2813354746753107, "learning_rate": 3.743449983908526e-07, "loss": 0.3716, "step": 7617 }, { "epoch": 0.9359872220174469, "grad_norm": 0.2968425889557146, "learning_rate": 3.7291806724291667e-07, "loss": 0.3837, "step": 7618 }, { "epoch": 0.936110087234304, "grad_norm": 0.34266250017406813, "learning_rate": 3.7149382665184305e-07, "loss": 0.3915, "step": 7619 }, { "epoch": 0.9362329524511611, "grad_norm": 0.31730448529036925, "learning_rate": 3.700722768796122e-07, "loss": 0.3564, "step": 7620 }, { "epoch": 0.9363558176680182, "grad_norm": 0.303328596607787, "learning_rate": 3.686534181877066e-07, "loss": 0.3736, "step": 7621 }, { "epoch": 0.9364786828848753, "grad_norm": 0.4111051002056761, "learning_rate": 3.6723725083711745e-07, "loss": 0.4416, "step": 7622 }, { "epoch": 0.9366015481017324, "grad_norm": 0.3319069340675933, "learning_rate": 3.658237750883398e-07, "loss": 0.3301, "step": 7623 }, { "epoch": 0.9367244133185895, "grad_norm": 0.33564905994154975, "learning_rate": 3.644129912013705e-07, "loss": 0.297, "step": 7624 }, { "epoch": 0.9368472785354466, "grad_norm": 0.3876659255748757, "learning_rate": 3.630048994357188e-07, "loss": 0.3344, "step": 7625 }, { "epoch": 0.9369701437523037, "grad_norm": 0.4191175052733619, "learning_rate": 3.615995000503891e-07, "loss": 0.3459, "step": 7626 }, { "epoch": 0.9370930089691608, "grad_norm": 0.3071089472651746, "learning_rate": 3.601967933039013e-07, "loss": 0.3782, "step": 7627 }, { "epoch": 0.9372158741860179, "grad_norm": 0.381431111624899, "learning_rate": 3.5879677945426904e-07, "loss": 0.4292, "step": 7628 }, { "epoch": 0.937338739402875, "grad_norm": 0.35059462977352235, "learning_rate": 3.573994587590163e-07, "loss": 0.3886, "step": 7629 }, { "epoch": 0.9374616046197322, "grad_norm": 0.3170789308444072, "learning_rate": 3.5600483147517406e-07, "loss": 0.3718, "step": 7630 }, { "epoch": 0.9375844698365893, "grad_norm": 0.41264862577607947, "learning_rate": 3.5461289785927384e-07, "loss": 0.52, "step": 7631 }, { "epoch": 0.9377073350534464, "grad_norm": 0.33481933439308775, "learning_rate": 3.532236581673526e-07, "loss": 0.4325, "step": 7632 }, { "epoch": 0.9378302002703035, "grad_norm": 0.25866556890883524, "learning_rate": 3.5183711265495077e-07, "loss": 0.3548, "step": 7633 }, { "epoch": 0.9379530654871606, "grad_norm": 0.2958623126705061, "learning_rate": 3.504532615771161e-07, "loss": 0.3132, "step": 7634 }, { "epoch": 0.9380759307040177, "grad_norm": 0.42508048463133813, "learning_rate": 3.490721051883966e-07, "loss": 0.4326, "step": 7635 }, { "epoch": 0.9381987959208749, "grad_norm": 0.40214728483847634, "learning_rate": 3.476936437428524e-07, "loss": 0.394, "step": 7636 }, { "epoch": 0.9383216611377319, "grad_norm": 0.35296494007535517, "learning_rate": 3.46317877494034e-07, "loss": 0.3827, "step": 7637 }, { "epoch": 0.938444526354589, "grad_norm": 0.31819942994057615, "learning_rate": 3.449448066950139e-07, "loss": 0.4161, "step": 7638 }, { "epoch": 0.9385673915714461, "grad_norm": 0.3488157655421079, "learning_rate": 3.435744315983519e-07, "loss": 0.5092, "step": 7639 }, { "epoch": 0.9386902567883032, "grad_norm": 0.34000326262438046, "learning_rate": 3.422067524561262e-07, "loss": 0.4012, "step": 7640 }, { "epoch": 0.9388131220051603, "grad_norm": 0.3919557068078476, "learning_rate": 3.408417695199073e-07, "loss": 0.3813, "step": 7641 }, { "epoch": 0.9389359872220174, "grad_norm": 0.4919909044171473, "learning_rate": 3.39479483040776e-07, "loss": 0.4253, "step": 7642 }, { "epoch": 0.9390588524388745, "grad_norm": 0.3290521666607954, "learning_rate": 3.3811989326932026e-07, "loss": 0.3385, "step": 7643 }, { "epoch": 0.9391817176557317, "grad_norm": 0.3652912758205887, "learning_rate": 3.367630004556216e-07, "loss": 0.391, "step": 7644 }, { "epoch": 0.9393045828725888, "grad_norm": 0.3731707140653063, "learning_rate": 3.354088048492754e-07, "loss": 0.3892, "step": 7645 }, { "epoch": 0.9394274480894459, "grad_norm": 0.4746346124972766, "learning_rate": 3.340573066993757e-07, "loss": 0.3371, "step": 7646 }, { "epoch": 0.939550313306303, "grad_norm": 0.33450414434473713, "learning_rate": 3.3270850625452377e-07, "loss": 0.3759, "step": 7647 }, { "epoch": 0.9396731785231601, "grad_norm": 0.33321118503646513, "learning_rate": 3.3136240376281935e-07, "loss": 0.3991, "step": 7648 }, { "epoch": 0.9397960437400172, "grad_norm": 0.43352997969596313, "learning_rate": 3.3001899947187275e-07, "loss": 0.4013, "step": 7649 }, { "epoch": 0.9399189089568744, "grad_norm": 0.4108585307368024, "learning_rate": 3.28678293628793e-07, "loss": 0.4116, "step": 7650 }, { "epoch": 0.9400417741737315, "grad_norm": 0.32267765556021927, "learning_rate": 3.273402864801944e-07, "loss": 0.4242, "step": 7651 }, { "epoch": 0.9401646393905885, "grad_norm": 0.3103682532939773, "learning_rate": 3.2600497827219524e-07, "loss": 0.376, "step": 7652 }, { "epoch": 0.9402875046074456, "grad_norm": 0.4255682139939715, "learning_rate": 3.246723692504139e-07, "loss": 0.3495, "step": 7653 }, { "epoch": 0.9404103698243027, "grad_norm": 0.45393420947792823, "learning_rate": 3.2334245965997933e-07, "loss": 0.374, "step": 7654 }, { "epoch": 0.9405332350411598, "grad_norm": 0.3196087999947688, "learning_rate": 3.220152497455175e-07, "loss": 0.3898, "step": 7655 }, { "epoch": 0.9406561002580169, "grad_norm": 0.36691654730767365, "learning_rate": 3.206907397511599e-07, "loss": 0.3882, "step": 7656 }, { "epoch": 0.940778965474874, "grad_norm": 0.39377218683726645, "learning_rate": 3.1936892992054155e-07, "loss": 0.3896, "step": 7657 }, { "epoch": 0.9409018306917312, "grad_norm": 0.3235713564411378, "learning_rate": 3.18049820496803e-07, "loss": 0.3876, "step": 7658 }, { "epoch": 0.9410246959085883, "grad_norm": 0.35131810631443416, "learning_rate": 3.167334117225834e-07, "loss": 0.3964, "step": 7659 }, { "epoch": 0.9411475611254454, "grad_norm": 0.3371374838834, "learning_rate": 3.154197038400275e-07, "loss": 0.3091, "step": 7660 }, { "epoch": 0.9412704263423025, "grad_norm": 0.37777425044605717, "learning_rate": 3.141086970907853e-07, "loss": 0.4044, "step": 7661 }, { "epoch": 0.9413932915591596, "grad_norm": 0.4209475753939854, "learning_rate": 3.1280039171600715e-07, "loss": 0.3405, "step": 7662 }, { "epoch": 0.9415161567760167, "grad_norm": 0.40973351441409933, "learning_rate": 3.1149478795634736e-07, "loss": 0.4907, "step": 7663 }, { "epoch": 0.9416390219928739, "grad_norm": 0.3833050550940264, "learning_rate": 3.1019188605196035e-07, "loss": 0.4167, "step": 7664 }, { "epoch": 0.941761887209731, "grad_norm": 0.37513506937268165, "learning_rate": 3.088916862425112e-07, "loss": 0.4489, "step": 7665 }, { "epoch": 0.941884752426588, "grad_norm": 0.3517869947123134, "learning_rate": 3.0759418876716183e-07, "loss": 0.3575, "step": 7666 }, { "epoch": 0.9420076176434451, "grad_norm": 0.34088689865601507, "learning_rate": 3.062993938645781e-07, "loss": 0.3665, "step": 7667 }, { "epoch": 0.9421304828603022, "grad_norm": 0.2848353126923212, "learning_rate": 3.0500730177292604e-07, "loss": 0.3155, "step": 7668 }, { "epoch": 0.9422533480771593, "grad_norm": 0.32153293453657406, "learning_rate": 3.037179127298823e-07, "loss": 0.3949, "step": 7669 }, { "epoch": 0.9423762132940164, "grad_norm": 0.2671289481454387, "learning_rate": 3.024312269726204e-07, "loss": 0.3787, "step": 7670 }, { "epoch": 0.9424990785108736, "grad_norm": 0.30353513657006814, "learning_rate": 3.0114724473781443e-07, "loss": 0.3012, "step": 7671 }, { "epoch": 0.9426219437277307, "grad_norm": 0.43511848189770563, "learning_rate": 2.998659662616504e-07, "loss": 0.3535, "step": 7672 }, { "epoch": 0.9427448089445878, "grad_norm": 0.3354074648671101, "learning_rate": 2.985873917798082e-07, "loss": 0.3338, "step": 7673 }, { "epoch": 0.9428676741614449, "grad_norm": 0.41699024796916107, "learning_rate": 2.97311521527473e-07, "loss": 0.4412, "step": 7674 }, { "epoch": 0.942990539378302, "grad_norm": 0.32271618294583604, "learning_rate": 2.9603835573933034e-07, "loss": 0.3277, "step": 7675 }, { "epoch": 0.9431134045951591, "grad_norm": 0.3475345366065042, "learning_rate": 2.947678946495763e-07, "loss": 0.3484, "step": 7676 }, { "epoch": 0.9432362698120162, "grad_norm": 0.33970291693440036, "learning_rate": 2.935001384919006e-07, "loss": 0.3987, "step": 7677 }, { "epoch": 0.9433591350288734, "grad_norm": 0.36201120274309523, "learning_rate": 2.9223508749950003e-07, "loss": 0.3703, "step": 7678 }, { "epoch": 0.9434820002457305, "grad_norm": 0.39099914406577013, "learning_rate": 2.909727419050717e-07, "loss": 0.4043, "step": 7679 }, { "epoch": 0.9436048654625876, "grad_norm": 0.3651700012175808, "learning_rate": 2.89713101940815e-07, "loss": 0.4066, "step": 7680 }, { "epoch": 0.9437277306794446, "grad_norm": 0.32323129004166334, "learning_rate": 2.8845616783843455e-07, "loss": 0.3422, "step": 7681 }, { "epoch": 0.9438505958963017, "grad_norm": 0.3762042183095099, "learning_rate": 2.872019398291337e-07, "loss": 0.4633, "step": 7682 }, { "epoch": 0.9439734611131588, "grad_norm": 0.37229183813224825, "learning_rate": 2.8595041814362124e-07, "loss": 0.4671, "step": 7683 }, { "epoch": 0.944096326330016, "grad_norm": 0.3920533217240954, "learning_rate": 2.8470160301210304e-07, "loss": 0.3592, "step": 7684 }, { "epoch": 0.9442191915468731, "grad_norm": 0.32496760787034845, "learning_rate": 2.83455494664297e-07, "loss": 0.4148, "step": 7685 }, { "epoch": 0.9443420567637302, "grad_norm": 0.3010041882979097, "learning_rate": 2.822120933294098e-07, "loss": 0.3598, "step": 7686 }, { "epoch": 0.9444649219805873, "grad_norm": 0.4653641322330199, "learning_rate": 2.8097139923615845e-07, "loss": 0.4554, "step": 7687 }, { "epoch": 0.9445877871974444, "grad_norm": 0.3295947078825061, "learning_rate": 2.797334126127654e-07, "loss": 0.369, "step": 7688 }, { "epoch": 0.9447106524143015, "grad_norm": 0.29167969171318964, "learning_rate": 2.784981336869452e-07, "loss": 0.3972, "step": 7689 }, { "epoch": 0.9448335176311586, "grad_norm": 0.296045454847138, "learning_rate": 2.772655626859211e-07, "loss": 0.3714, "step": 7690 }, { "epoch": 0.9449563828480158, "grad_norm": 0.31891138689573034, "learning_rate": 2.7603569983641496e-07, "loss": 0.3376, "step": 7691 }, { "epoch": 0.9450792480648729, "grad_norm": 0.3722602451337574, "learning_rate": 2.748085453646559e-07, "loss": 0.4069, "step": 7692 }, { "epoch": 0.94520211328173, "grad_norm": 0.35747057710766916, "learning_rate": 2.7358409949636674e-07, "loss": 0.3789, "step": 7693 }, { "epoch": 0.9453249784985871, "grad_norm": 0.39018989046104224, "learning_rate": 2.723623624567789e-07, "loss": 0.4229, "step": 7694 }, { "epoch": 0.9454478437154441, "grad_norm": 0.3434286186166346, "learning_rate": 2.711433344706227e-07, "loss": 0.4055, "step": 7695 }, { "epoch": 0.9455707089323012, "grad_norm": 0.2955972992814723, "learning_rate": 2.69927015762132e-07, "loss": 0.3561, "step": 7696 }, { "epoch": 0.9456935741491583, "grad_norm": 0.3434793634889508, "learning_rate": 2.687134065550362e-07, "loss": 0.3711, "step": 7697 }, { "epoch": 0.9458164393660154, "grad_norm": 0.39774657676265457, "learning_rate": 2.675025070725734e-07, "loss": 0.4254, "step": 7698 }, { "epoch": 0.9459393045828726, "grad_norm": 0.32452588607360633, "learning_rate": 2.662943175374838e-07, "loss": 0.4412, "step": 7699 }, { "epoch": 0.9460621697997297, "grad_norm": 0.31916425712848284, "learning_rate": 2.650888381719996e-07, "loss": 0.4145, "step": 7700 }, { "epoch": 0.9461850350165868, "grad_norm": 0.36736160739401436, "learning_rate": 2.6388606919786673e-07, "loss": 0.3986, "step": 7701 }, { "epoch": 0.9463079002334439, "grad_norm": 0.36295356265668977, "learning_rate": 2.626860108363233e-07, "loss": 0.3554, "step": 7702 }, { "epoch": 0.946430765450301, "grad_norm": 0.36126218320528725, "learning_rate": 2.614886633081143e-07, "loss": 0.5036, "step": 7703 }, { "epoch": 0.9465536306671581, "grad_norm": 0.36047555217450483, "learning_rate": 2.602940268334819e-07, "loss": 0.4512, "step": 7704 }, { "epoch": 0.9466764958840153, "grad_norm": 0.3241321238534845, "learning_rate": 2.5910210163217376e-07, "loss": 0.3378, "step": 7705 }, { "epoch": 0.9467993611008724, "grad_norm": 0.3313134732952991, "learning_rate": 2.5791288792343437e-07, "loss": 0.4704, "step": 7706 }, { "epoch": 0.9469222263177295, "grad_norm": 0.37212707049630567, "learning_rate": 2.567263859260155e-07, "loss": 0.3453, "step": 7707 }, { "epoch": 0.9470450915345866, "grad_norm": 0.3352228909526243, "learning_rate": 2.555425958581642e-07, "loss": 0.3839, "step": 7708 }, { "epoch": 0.9471679567514437, "grad_norm": 0.37137810540282057, "learning_rate": 2.5436151793762964e-07, "loss": 0.3247, "step": 7709 }, { "epoch": 0.9472908219683007, "grad_norm": 0.353303615821647, "learning_rate": 2.531831523816663e-07, "loss": 0.4181, "step": 7710 }, { "epoch": 0.9474136871851578, "grad_norm": 0.32318788047204694, "learning_rate": 2.520074994070243e-07, "loss": 0.3457, "step": 7711 }, { "epoch": 0.947536552402015, "grad_norm": 0.35851137956189016, "learning_rate": 2.5083455922996044e-07, "loss": 0.3771, "step": 7712 }, { "epoch": 0.9476594176188721, "grad_norm": 0.3167919156386104, "learning_rate": 2.496643320662256e-07, "loss": 0.4053, "step": 7713 }, { "epoch": 0.9477822828357292, "grad_norm": 0.33885046582883616, "learning_rate": 2.484968181310793e-07, "loss": 0.4292, "step": 7714 }, { "epoch": 0.9479051480525863, "grad_norm": 0.29256325608389816, "learning_rate": 2.4733201763927624e-07, "loss": 0.3778, "step": 7715 }, { "epoch": 0.9480280132694434, "grad_norm": 0.3018278290175745, "learning_rate": 2.461699308050752e-07, "loss": 0.4584, "step": 7716 }, { "epoch": 0.9481508784863005, "grad_norm": 0.3292885584882907, "learning_rate": 2.450105578422318e-07, "loss": 0.3415, "step": 7717 }, { "epoch": 0.9482737437031576, "grad_norm": 0.35344178354002187, "learning_rate": 2.438538989640071e-07, "loss": 0.4506, "step": 7718 }, { "epoch": 0.9483966089200148, "grad_norm": 0.33190922588948907, "learning_rate": 2.4269995438316093e-07, "loss": 0.344, "step": 7719 }, { "epoch": 0.9485194741368719, "grad_norm": 0.44280630937672855, "learning_rate": 2.415487243119535e-07, "loss": 0.3547, "step": 7720 }, { "epoch": 0.948642339353729, "grad_norm": 0.2921445622468166, "learning_rate": 2.404002089621471e-07, "loss": 0.4252, "step": 7721 }, { "epoch": 0.9487652045705861, "grad_norm": 0.4005397332810649, "learning_rate": 2.3925440854500104e-07, "loss": 0.3866, "step": 7722 }, { "epoch": 0.9488880697874432, "grad_norm": 0.31305114224186753, "learning_rate": 2.3811132327128172e-07, "loss": 0.3439, "step": 7723 }, { "epoch": 0.9490109350043003, "grad_norm": 0.4114051854594187, "learning_rate": 2.369709533512493e-07, "loss": 0.3938, "step": 7724 }, { "epoch": 0.9491338002211573, "grad_norm": 0.3373395021766906, "learning_rate": 2.3583329899466765e-07, "loss": 0.4498, "step": 7725 }, { "epoch": 0.9492566654380145, "grad_norm": 0.3691876198925732, "learning_rate": 2.346983604108044e-07, "loss": 0.3883, "step": 7726 }, { "epoch": 0.9493795306548716, "grad_norm": 0.36243414737455565, "learning_rate": 2.3356613780841919e-07, "loss": 0.3573, "step": 7727 }, { "epoch": 0.9495023958717287, "grad_norm": 0.40722030522636826, "learning_rate": 2.3243663139578042e-07, "loss": 0.3851, "step": 7728 }, { "epoch": 0.9496252610885858, "grad_norm": 0.33780488472922865, "learning_rate": 2.3130984138065026e-07, "loss": 0.3342, "step": 7729 }, { "epoch": 0.9497481263054429, "grad_norm": 0.3632798392887431, "learning_rate": 2.301857679702979e-07, "loss": 0.3732, "step": 7730 }, { "epoch": 0.9498709915223, "grad_norm": 0.29672004990815215, "learning_rate": 2.2906441137148793e-07, "loss": 0.3649, "step": 7731 }, { "epoch": 0.9499938567391571, "grad_norm": 0.3971508561825186, "learning_rate": 2.2794577179048702e-07, "loss": 0.3748, "step": 7732 }, { "epoch": 0.9501167219560143, "grad_norm": 0.3463006996195387, "learning_rate": 2.2682984943305894e-07, "loss": 0.3996, "step": 7733 }, { "epoch": 0.9502395871728714, "grad_norm": 0.39445553993351756, "learning_rate": 2.2571664450447616e-07, "loss": 0.3881, "step": 7734 }, { "epoch": 0.9503624523897285, "grad_norm": 0.3429518237190701, "learning_rate": 2.2460615720949984e-07, "loss": 0.4055, "step": 7735 }, { "epoch": 0.9504853176065856, "grad_norm": 0.3060379758860914, "learning_rate": 2.2349838775239828e-07, "loss": 0.4333, "step": 7736 }, { "epoch": 0.9506081828234427, "grad_norm": 0.32832274959439794, "learning_rate": 2.2239333633694182e-07, "loss": 0.4398, "step": 7737 }, { "epoch": 0.9507310480402998, "grad_norm": 0.39866234558111063, "learning_rate": 2.2129100316639282e-07, "loss": 0.4594, "step": 7738 }, { "epoch": 0.9508539132571568, "grad_norm": 0.32179609531095715, "learning_rate": 2.2019138844352249e-07, "loss": 0.3953, "step": 7739 }, { "epoch": 0.950976778474014, "grad_norm": 0.37654882384212657, "learning_rate": 2.19094492370594e-07, "loss": 0.423, "step": 7740 }, { "epoch": 0.9510996436908711, "grad_norm": 0.2814468805411834, "learning_rate": 2.1800031514937757e-07, "loss": 0.3724, "step": 7741 }, { "epoch": 0.9512225089077282, "grad_norm": 0.371090129548882, "learning_rate": 2.1690885698113728e-07, "loss": 0.3752, "step": 7742 }, { "epoch": 0.9513453741245853, "grad_norm": 0.38537631442852704, "learning_rate": 2.1582011806664248e-07, "loss": 0.3937, "step": 7743 }, { "epoch": 0.9514682393414424, "grad_norm": 0.31724572928092587, "learning_rate": 2.1473409860615635e-07, "loss": 0.3111, "step": 7744 }, { "epoch": 0.9515911045582995, "grad_norm": 0.35762977993734235, "learning_rate": 2.1365079879944904e-07, "loss": 0.4169, "step": 7745 }, { "epoch": 0.9517139697751567, "grad_norm": 0.4239466430840743, "learning_rate": 2.1257021884578286e-07, "loss": 0.3872, "step": 7746 }, { "epoch": 0.9518368349920138, "grad_norm": 0.3228359548126578, "learning_rate": 2.114923589439255e-07, "loss": 0.3617, "step": 7747 }, { "epoch": 0.9519597002088709, "grad_norm": 0.47410979435045686, "learning_rate": 2.1041721929214163e-07, "loss": 0.3873, "step": 7748 }, { "epoch": 0.952082565425728, "grad_norm": 0.34418315594766613, "learning_rate": 2.0934480008819645e-07, "loss": 0.3904, "step": 7749 }, { "epoch": 0.9522054306425851, "grad_norm": 0.40152289929457224, "learning_rate": 2.0827510152935546e-07, "loss": 0.4452, "step": 7750 }, { "epoch": 0.9523282958594422, "grad_norm": 0.30658027563588286, "learning_rate": 2.0720812381238131e-07, "loss": 0.3694, "step": 7751 }, { "epoch": 0.9524511610762993, "grad_norm": 0.3643000605470974, "learning_rate": 2.0614386713353696e-07, "loss": 0.4027, "step": 7752 }, { "epoch": 0.9525740262931565, "grad_norm": 0.3024653893866832, "learning_rate": 2.0508233168858749e-07, "loss": 0.4253, "step": 7753 }, { "epoch": 0.9526968915100135, "grad_norm": 0.39547860689489234, "learning_rate": 2.040235176727967e-07, "loss": 0.3705, "step": 7754 }, { "epoch": 0.9528197567268706, "grad_norm": 0.3684596468255994, "learning_rate": 2.0296742528092216e-07, "loss": 0.3793, "step": 7755 }, { "epoch": 0.9529426219437277, "grad_norm": 0.3800773084160288, "learning_rate": 2.0191405470722847e-07, "loss": 0.3751, "step": 7756 }, { "epoch": 0.9530654871605848, "grad_norm": 0.43040877503671326, "learning_rate": 2.008634061454756e-07, "loss": 0.3956, "step": 7757 }, { "epoch": 0.9531883523774419, "grad_norm": 0.36025573916198733, "learning_rate": 1.9981547978892234e-07, "loss": 0.473, "step": 7758 }, { "epoch": 0.953311217594299, "grad_norm": 0.3305094178553393, "learning_rate": 1.9877027583032947e-07, "loss": 0.3505, "step": 7759 }, { "epoch": 0.9534340828111562, "grad_norm": 0.33070451153141933, "learning_rate": 1.9772779446195488e-07, "loss": 0.3542, "step": 7760 }, { "epoch": 0.9535569480280133, "grad_norm": 0.3264882639114595, "learning_rate": 1.966880358755585e-07, "loss": 0.4319, "step": 7761 }, { "epoch": 0.9536798132448704, "grad_norm": 0.3000052568933473, "learning_rate": 1.9565100026239237e-07, "loss": 0.4031, "step": 7762 }, { "epoch": 0.9538026784617275, "grad_norm": 0.38565709901776246, "learning_rate": 1.9461668781321717e-07, "loss": 0.3523, "step": 7763 }, { "epoch": 0.9539255436785846, "grad_norm": 0.3456252277316431, "learning_rate": 1.9358509871828577e-07, "loss": 0.3839, "step": 7764 }, { "epoch": 0.9540484088954417, "grad_norm": 0.3512414188465955, "learning_rate": 1.925562331673514e-07, "loss": 0.3152, "step": 7765 }, { "epoch": 0.9541712741122989, "grad_norm": 0.354711239859151, "learning_rate": 1.9153009134966926e-07, "loss": 0.4003, "step": 7766 }, { "epoch": 0.954294139329156, "grad_norm": 0.36513032253523997, "learning_rate": 1.905066734539884e-07, "loss": 0.3208, "step": 7767 }, { "epoch": 0.954417004546013, "grad_norm": 0.3109079052986187, "learning_rate": 1.894859796685633e-07, "loss": 0.4166, "step": 7768 }, { "epoch": 0.9545398697628701, "grad_norm": 0.445888764140128, "learning_rate": 1.884680101811437e-07, "loss": 0.3637, "step": 7769 }, { "epoch": 0.9546627349797272, "grad_norm": 0.3433206755192348, "learning_rate": 1.8745276517897647e-07, "loss": 0.429, "step": 7770 }, { "epoch": 0.9547856001965843, "grad_norm": 0.3779761648980408, "learning_rate": 1.8644024484880894e-07, "loss": 0.3625, "step": 7771 }, { "epoch": 0.9549084654134414, "grad_norm": 0.3517012918791368, "learning_rate": 1.8543044937689213e-07, "loss": 0.4499, "step": 7772 }, { "epoch": 0.9550313306302985, "grad_norm": 0.3201737152344823, "learning_rate": 1.8442337894896577e-07, "loss": 0.3758, "step": 7773 }, { "epoch": 0.9551541958471557, "grad_norm": 0.3266657971806425, "learning_rate": 1.8341903375027836e-07, "loss": 0.3542, "step": 7774 }, { "epoch": 0.9552770610640128, "grad_norm": 0.39760033918689985, "learning_rate": 1.8241741396557044e-07, "loss": 0.3765, "step": 7775 }, { "epoch": 0.9553999262808699, "grad_norm": 0.338424513325826, "learning_rate": 1.8141851977908298e-07, "loss": 0.4265, "step": 7776 }, { "epoch": 0.955522791497727, "grad_norm": 0.33251699700641296, "learning_rate": 1.804223513745573e-07, "loss": 0.4101, "step": 7777 }, { "epoch": 0.9556456567145841, "grad_norm": 0.3608611628047877, "learning_rate": 1.7942890893523022e-07, "loss": 0.3913, "step": 7778 }, { "epoch": 0.9557685219314412, "grad_norm": 0.34472297737674445, "learning_rate": 1.7843819264384386e-07, "loss": 0.3837, "step": 7779 }, { "epoch": 0.9558913871482984, "grad_norm": 0.44342407911937576, "learning_rate": 1.7745020268262746e-07, "loss": 0.4091, "step": 7780 }, { "epoch": 0.9560142523651555, "grad_norm": 0.33139240151265364, "learning_rate": 1.7646493923332063e-07, "loss": 0.4414, "step": 7781 }, { "epoch": 0.9561371175820126, "grad_norm": 0.35579582577888186, "learning_rate": 1.7548240247715342e-07, "loss": 0.4102, "step": 7782 }, { "epoch": 0.9562599827988696, "grad_norm": 0.34685121565559623, "learning_rate": 1.745025925948579e-07, "loss": 0.44, "step": 7783 }, { "epoch": 0.9563828480157267, "grad_norm": 0.40983974974030185, "learning_rate": 1.7352550976666493e-07, "loss": 0.3629, "step": 7784 }, { "epoch": 0.9565057132325838, "grad_norm": 0.38370895851705744, "learning_rate": 1.725511541723007e-07, "loss": 0.4638, "step": 7785 }, { "epoch": 0.9566285784494409, "grad_norm": 0.36472974584018536, "learning_rate": 1.7157952599099192e-07, "loss": 0.2991, "step": 7786 }, { "epoch": 0.956751443666298, "grad_norm": 0.37725785854423144, "learning_rate": 1.7061062540146387e-07, "loss": 0.42, "step": 7787 }, { "epoch": 0.9568743088831552, "grad_norm": 0.3187142671818406, "learning_rate": 1.6964445258193906e-07, "loss": 0.3744, "step": 7788 }, { "epoch": 0.9569971741000123, "grad_norm": 0.29183965938587314, "learning_rate": 1.6868100771014027e-07, "loss": 0.3868, "step": 7789 }, { "epoch": 0.9571200393168694, "grad_norm": 0.43090339870391164, "learning_rate": 1.677202909632841e-07, "loss": 0.3994, "step": 7790 }, { "epoch": 0.9572429045337265, "grad_norm": 0.31527744460366064, "learning_rate": 1.6676230251809088e-07, "loss": 0.3575, "step": 7791 }, { "epoch": 0.9573657697505836, "grad_norm": 0.2813352618457808, "learning_rate": 1.6580704255077295e-07, "loss": 0.4901, "step": 7792 }, { "epoch": 0.9574886349674407, "grad_norm": 0.3598732768200624, "learning_rate": 1.6485451123704974e-07, "loss": 0.3733, "step": 7793 }, { "epoch": 0.9576115001842979, "grad_norm": 0.2852809513533757, "learning_rate": 1.6390470875212615e-07, "loss": 0.3583, "step": 7794 }, { "epoch": 0.957734365401155, "grad_norm": 0.3573501053728117, "learning_rate": 1.6295763527071906e-07, "loss": 0.4078, "step": 7795 }, { "epoch": 0.9578572306180121, "grad_norm": 0.34058467194451225, "learning_rate": 1.6201329096703076e-07, "loss": 0.4863, "step": 7796 }, { "epoch": 0.9579800958348691, "grad_norm": 0.26927335711784445, "learning_rate": 1.6107167601477235e-07, "loss": 0.3816, "step": 7797 }, { "epoch": 0.9581029610517262, "grad_norm": 0.3910513519562421, "learning_rate": 1.6013279058714357e-07, "loss": 0.3691, "step": 7798 }, { "epoch": 0.9582258262685833, "grad_norm": 0.3445856709818358, "learning_rate": 1.5919663485684965e-07, "loss": 0.3932, "step": 7799 }, { "epoch": 0.9583486914854404, "grad_norm": 0.38068013885730584, "learning_rate": 1.5826320899608616e-07, "loss": 0.3573, "step": 7800 }, { "epoch": 0.9584715567022976, "grad_norm": 0.31806298603061595, "learning_rate": 1.5733251317655574e-07, "loss": 0.3673, "step": 7801 }, { "epoch": 0.9585944219191547, "grad_norm": 0.3664264801482915, "learning_rate": 1.5640454756945144e-07, "loss": 0.3591, "step": 7802 }, { "epoch": 0.9587172871360118, "grad_norm": 0.3997034791440579, "learning_rate": 1.554793123454651e-07, "loss": 0.3947, "step": 7803 }, { "epoch": 0.9588401523528689, "grad_norm": 0.36001821935127454, "learning_rate": 1.5455680767479053e-07, "loss": 0.3161, "step": 7804 }, { "epoch": 0.958963017569726, "grad_norm": 0.3614246752972688, "learning_rate": 1.5363703372711368e-07, "loss": 0.3267, "step": 7805 }, { "epoch": 0.9590858827865831, "grad_norm": 0.4541090182104698, "learning_rate": 1.5271999067162256e-07, "loss": 0.3802, "step": 7806 }, { "epoch": 0.9592087480034402, "grad_norm": 0.36745579127331623, "learning_rate": 1.5180567867700223e-07, "loss": 0.3649, "step": 7807 }, { "epoch": 0.9593316132202974, "grad_norm": 0.36220220612960397, "learning_rate": 1.5089409791143316e-07, "loss": 0.4225, "step": 7808 }, { "epoch": 0.9594544784371545, "grad_norm": 0.3728533806401237, "learning_rate": 1.4998524854259454e-07, "loss": 0.3535, "step": 7809 }, { "epoch": 0.9595773436540116, "grad_norm": 0.34556783004092434, "learning_rate": 1.4907913073766432e-07, "loss": 0.3271, "step": 7810 }, { "epoch": 0.9597002088708687, "grad_norm": 0.3553078093222426, "learning_rate": 1.4817574466331586e-07, "loss": 0.3488, "step": 7811 }, { "epoch": 0.9598230740877257, "grad_norm": 0.3377263912547785, "learning_rate": 1.4727509048572118e-07, "loss": 0.4301, "step": 7812 }, { "epoch": 0.9599459393045828, "grad_norm": 0.40369091703507987, "learning_rate": 1.4637716837055115e-07, "loss": 0.3831, "step": 7813 }, { "epoch": 0.96006880452144, "grad_norm": 0.3699681722254154, "learning_rate": 1.4548197848297194e-07, "loss": 0.4345, "step": 7814 }, { "epoch": 0.9601916697382971, "grad_norm": 0.30153347502237215, "learning_rate": 1.4458952098764688e-07, "loss": 0.3604, "step": 7815 }, { "epoch": 0.9603145349551542, "grad_norm": 0.30417875145129764, "learning_rate": 1.4369979604873962e-07, "loss": 0.3746, "step": 7816 }, { "epoch": 0.9604374001720113, "grad_norm": 0.3860923797321108, "learning_rate": 1.4281280382990758e-07, "loss": 0.4004, "step": 7817 }, { "epoch": 0.9605602653888684, "grad_norm": 0.4340413694950177, "learning_rate": 1.419285444943086e-07, "loss": 0.398, "step": 7818 }, { "epoch": 0.9606831306057255, "grad_norm": 0.3027533747659267, "learning_rate": 1.4104701820459588e-07, "loss": 0.3906, "step": 7819 }, { "epoch": 0.9608059958225826, "grad_norm": 0.34515226089199313, "learning_rate": 1.4016822512292138e-07, "loss": 0.3839, "step": 7820 }, { "epoch": 0.9609288610394398, "grad_norm": 0.3548825329918181, "learning_rate": 1.3929216541093083e-07, "loss": 0.306, "step": 7821 }, { "epoch": 0.9610517262562969, "grad_norm": 0.30097954599622684, "learning_rate": 1.3841883922977194e-07, "loss": 0.4039, "step": 7822 }, { "epoch": 0.961174591473154, "grad_norm": 0.47775456554182255, "learning_rate": 1.3754824674008792e-07, "loss": 0.4643, "step": 7823 }, { "epoch": 0.9612974566900111, "grad_norm": 0.34486391051943355, "learning_rate": 1.3668038810201565e-07, "loss": 0.3836, "step": 7824 }, { "epoch": 0.9614203219068682, "grad_norm": 0.3509854007525182, "learning_rate": 1.3581526347519414e-07, "loss": 0.353, "step": 7825 }, { "epoch": 0.9615431871237252, "grad_norm": 0.312385103959265, "learning_rate": 1.3495287301875936e-07, "loss": 0.3637, "step": 7826 }, { "epoch": 0.9616660523405823, "grad_norm": 0.30749265534473014, "learning_rate": 1.3409321689133947e-07, "loss": 0.3329, "step": 7827 }, { "epoch": 0.9617889175574394, "grad_norm": 0.3253780070778151, "learning_rate": 1.3323629525106295e-07, "loss": 0.4211, "step": 7828 }, { "epoch": 0.9619117827742966, "grad_norm": 0.30556290216010074, "learning_rate": 1.3238210825555542e-07, "loss": 0.408, "step": 7829 }, { "epoch": 0.9620346479911537, "grad_norm": 0.29170855057299705, "learning_rate": 1.3153065606193948e-07, "loss": 0.3982, "step": 7830 }, { "epoch": 0.9621575132080108, "grad_norm": 0.3531611872423174, "learning_rate": 1.3068193882683488e-07, "loss": 0.3834, "step": 7831 }, { "epoch": 0.9622803784248679, "grad_norm": 0.3720641317282434, "learning_rate": 1.2983595670635507e-07, "loss": 0.3548, "step": 7832 }, { "epoch": 0.962403243641725, "grad_norm": 0.37532055647256124, "learning_rate": 1.2899270985611555e-07, "loss": 0.3599, "step": 7833 }, { "epoch": 0.9625261088585821, "grad_norm": 0.3174874572447704, "learning_rate": 1.281521984312256e-07, "loss": 0.281, "step": 7834 }, { "epoch": 0.9626489740754393, "grad_norm": 0.35902924856256285, "learning_rate": 1.2731442258629156e-07, "loss": 0.3825, "step": 7835 }, { "epoch": 0.9627718392922964, "grad_norm": 0.3086858644574483, "learning_rate": 1.2647938247541345e-07, "loss": 0.35, "step": 7836 }, { "epoch": 0.9628947045091535, "grad_norm": 0.32490368823329974, "learning_rate": 1.2564707825219845e-07, "loss": 0.4254, "step": 7837 }, { "epoch": 0.9630175697260106, "grad_norm": 0.3490109188618582, "learning_rate": 1.2481751006973908e-07, "loss": 0.4166, "step": 7838 }, { "epoch": 0.9631404349428677, "grad_norm": 0.38098345261980165, "learning_rate": 1.2399067808062992e-07, "loss": 0.3781, "step": 7839 }, { "epoch": 0.9632633001597248, "grad_norm": 0.3294599717997828, "learning_rate": 1.23166582436961e-07, "loss": 0.3262, "step": 7840 }, { "epoch": 0.9633861653765818, "grad_norm": 0.32844389695030185, "learning_rate": 1.2234522329031773e-07, "loss": 0.3514, "step": 7841 }, { "epoch": 0.963509030593439, "grad_norm": 0.48823632490791563, "learning_rate": 1.2152660079178923e-07, "loss": 0.3786, "step": 7842 }, { "epoch": 0.9636318958102961, "grad_norm": 0.30027320657552703, "learning_rate": 1.2071071509194842e-07, "loss": 0.3437, "step": 7843 }, { "epoch": 0.9637547610271532, "grad_norm": 0.34909766209911686, "learning_rate": 1.1989756634087856e-07, "loss": 0.3621, "step": 7844 }, { "epoch": 0.9638776262440103, "grad_norm": 0.4324303594772804, "learning_rate": 1.1908715468815002e-07, "loss": 0.3965, "step": 7845 }, { "epoch": 0.9640004914608674, "grad_norm": 0.39224632357722583, "learning_rate": 1.1827948028283353e-07, "loss": 0.457, "step": 7846 }, { "epoch": 0.9641233566777245, "grad_norm": 0.27190536391312603, "learning_rate": 1.174745432734936e-07, "loss": 0.4339, "step": 7847 }, { "epoch": 0.9642462218945816, "grad_norm": 0.43663901648715947, "learning_rate": 1.166723438081968e-07, "loss": 0.3924, "step": 7848 }, { "epoch": 0.9643690871114388, "grad_norm": 0.38981319109625057, "learning_rate": 1.1587288203450008e-07, "loss": 0.3273, "step": 7849 }, { "epoch": 0.9644919523282959, "grad_norm": 0.4184964046121767, "learning_rate": 1.1507615809945915e-07, "loss": 0.4528, "step": 7850 }, { "epoch": 0.964614817545153, "grad_norm": 0.5883930985702613, "learning_rate": 1.1428217214962677e-07, "loss": 0.4835, "step": 7851 }, { "epoch": 0.9647376827620101, "grad_norm": 0.3116319446061421, "learning_rate": 1.1349092433105279e-07, "loss": 0.3704, "step": 7852 }, { "epoch": 0.9648605479788672, "grad_norm": 0.3350801454167809, "learning_rate": 1.1270241478927912e-07, "loss": 0.4861, "step": 7853 }, { "epoch": 0.9649834131957243, "grad_norm": 0.35736824516148824, "learning_rate": 1.1191664366934973e-07, "loss": 0.3859, "step": 7854 }, { "epoch": 0.9651062784125815, "grad_norm": 0.45102912945674095, "learning_rate": 1.1113361111580067e-07, "loss": 0.4012, "step": 7855 }, { "epoch": 0.9652291436294385, "grad_norm": 0.4360367989249746, "learning_rate": 1.1035331727266673e-07, "loss": 0.4061, "step": 7856 }, { "epoch": 0.9653520088462956, "grad_norm": 0.4048593174962806, "learning_rate": 1.095757622834781e-07, "loss": 0.3577, "step": 7857 }, { "epoch": 0.9654748740631527, "grad_norm": 0.33146385998032424, "learning_rate": 1.088009462912587e-07, "loss": 0.3964, "step": 7858 }, { "epoch": 0.9655977392800098, "grad_norm": 0.29692904698546896, "learning_rate": 1.0802886943853285e-07, "loss": 0.3809, "step": 7859 }, { "epoch": 0.9657206044968669, "grad_norm": 0.3827326926913098, "learning_rate": 1.0725953186731863e-07, "loss": 0.3319, "step": 7860 }, { "epoch": 0.965843469713724, "grad_norm": 0.35259236064860067, "learning_rate": 1.0649293371913115e-07, "loss": 0.3508, "step": 7861 }, { "epoch": 0.9659663349305811, "grad_norm": 0.3691253965866995, "learning_rate": 1.0572907513498097e-07, "loss": 0.3211, "step": 7862 }, { "epoch": 0.9660892001474383, "grad_norm": 0.3688410123077948, "learning_rate": 1.0496795625537403e-07, "loss": 0.3595, "step": 7863 }, { "epoch": 0.9662120653642954, "grad_norm": 0.4682046011087368, "learning_rate": 1.0420957722031333e-07, "loss": 0.3815, "step": 7864 }, { "epoch": 0.9663349305811525, "grad_norm": 0.2974436745090697, "learning_rate": 1.0345393816929893e-07, "loss": 0.3707, "step": 7865 }, { "epoch": 0.9664577957980096, "grad_norm": 0.3038305122307508, "learning_rate": 1.0270103924132467e-07, "loss": 0.3656, "step": 7866 }, { "epoch": 0.9665806610148667, "grad_norm": 0.35496869723407876, "learning_rate": 1.0195088057488311e-07, "loss": 0.2744, "step": 7867 }, { "epoch": 0.9667035262317238, "grad_norm": 0.30563434800848077, "learning_rate": 1.0120346230795884e-07, "loss": 0.377, "step": 7868 }, { "epoch": 0.966826391448581, "grad_norm": 0.30301991657272365, "learning_rate": 1.0045878457803692e-07, "loss": 0.3481, "step": 7869 }, { "epoch": 0.966949256665438, "grad_norm": 0.32538740639770614, "learning_rate": 9.971684752209276e-08, "loss": 0.3826, "step": 7870 }, { "epoch": 0.9670721218822951, "grad_norm": 0.27874277475584036, "learning_rate": 9.897765127660386e-08, "loss": 0.4115, "step": 7871 }, { "epoch": 0.9671949870991522, "grad_norm": 0.340011187809386, "learning_rate": 9.824119597753811e-08, "loss": 0.3728, "step": 7872 }, { "epoch": 0.9673178523160093, "grad_norm": 0.36080823801510903, "learning_rate": 9.75074817603655e-08, "loss": 0.4147, "step": 7873 }, { "epoch": 0.9674407175328664, "grad_norm": 0.3310152047028987, "learning_rate": 9.677650876004307e-08, "loss": 0.405, "step": 7874 }, { "epoch": 0.9675635827497235, "grad_norm": 0.32846896081898574, "learning_rate": 9.604827711103326e-08, "loss": 0.3279, "step": 7875 }, { "epoch": 0.9676864479665807, "grad_norm": 0.3342339740870822, "learning_rate": 9.532278694728557e-08, "loss": 0.3614, "step": 7876 }, { "epoch": 0.9678093131834378, "grad_norm": 0.3175847991195792, "learning_rate": 9.460003840225162e-08, "loss": 0.4012, "step": 7877 }, { "epoch": 0.9679321784002949, "grad_norm": 0.3296183192699268, "learning_rate": 9.388003160887503e-08, "loss": 0.4528, "step": 7878 }, { "epoch": 0.968055043617152, "grad_norm": 0.3439335329589002, "learning_rate": 9.316276669959822e-08, "loss": 0.3721, "step": 7879 }, { "epoch": 0.9681779088340091, "grad_norm": 0.3499866912399568, "learning_rate": 9.244824380635564e-08, "loss": 0.3436, "step": 7880 }, { "epoch": 0.9683007740508662, "grad_norm": 0.33081020148574575, "learning_rate": 9.173646306058048e-08, "loss": 0.371, "step": 7881 }, { "epoch": 0.9684236392677233, "grad_norm": 0.4509245651068654, "learning_rate": 9.102742459319802e-08, "loss": 0.4016, "step": 7882 }, { "epoch": 0.9685465044845805, "grad_norm": 0.3598392476534631, "learning_rate": 9.032112853463393e-08, "loss": 0.4054, "step": 7883 }, { "epoch": 0.9686693697014376, "grad_norm": 0.43608841855256164, "learning_rate": 8.961757501480595e-08, "loss": 0.3303, "step": 7884 }, { "epoch": 0.9687922349182946, "grad_norm": 0.37418052130844437, "learning_rate": 8.891676416312722e-08, "loss": 0.447, "step": 7885 }, { "epoch": 0.9689151001351517, "grad_norm": 0.39497604557766214, "learning_rate": 8.82186961085063e-08, "loss": 0.3732, "step": 7886 }, { "epoch": 0.9690379653520088, "grad_norm": 0.3130365762105493, "learning_rate": 8.752337097935215e-08, "loss": 0.4281, "step": 7887 }, { "epoch": 0.9691608305688659, "grad_norm": 0.3630973273399573, "learning_rate": 8.683078890356245e-08, "loss": 0.4601, "step": 7888 }, { "epoch": 0.969283695785723, "grad_norm": 0.392028682553879, "learning_rate": 8.614095000853361e-08, "loss": 0.3321, "step": 7889 }, { "epoch": 0.9694065610025802, "grad_norm": 0.38220582515468077, "learning_rate": 8.545385442115749e-08, "loss": 0.3543, "step": 7890 }, { "epoch": 0.9695294262194373, "grad_norm": 0.30620237434627323, "learning_rate": 8.476950226782131e-08, "loss": 0.3465, "step": 7891 }, { "epoch": 0.9696522914362944, "grad_norm": 0.4659367764553998, "learning_rate": 8.408789367440606e-08, "loss": 0.3885, "step": 7892 }, { "epoch": 0.9697751566531515, "grad_norm": 0.3070671583635878, "learning_rate": 8.340902876628809e-08, "loss": 0.3567, "step": 7893 }, { "epoch": 0.9698980218700086, "grad_norm": 0.32619749309807566, "learning_rate": 8.273290766834252e-08, "loss": 0.4235, "step": 7894 }, { "epoch": 0.9700208870868657, "grad_norm": 0.36328891081971526, "learning_rate": 8.20595305049382e-08, "loss": 0.406, "step": 7895 }, { "epoch": 0.9701437523037229, "grad_norm": 0.3054183955830209, "learning_rate": 8.138889739993604e-08, "loss": 0.3482, "step": 7896 }, { "epoch": 0.97026661752058, "grad_norm": 0.3487390603472239, "learning_rate": 8.072100847669572e-08, "loss": 0.3417, "step": 7897 }, { "epoch": 0.9703894827374371, "grad_norm": 0.3163025876355097, "learning_rate": 8.005586385807063e-08, "loss": 0.3594, "step": 7898 }, { "epoch": 0.9705123479542941, "grad_norm": 0.3631291476198248, "learning_rate": 7.93934636664112e-08, "loss": 0.404, "step": 7899 }, { "epoch": 0.9706352131711512, "grad_norm": 0.3638172063916359, "learning_rate": 7.873380802356001e-08, "loss": 0.4029, "step": 7900 }, { "epoch": 0.9707580783880083, "grad_norm": 0.4034943366779541, "learning_rate": 7.807689705085663e-08, "loss": 0.4528, "step": 7901 }, { "epoch": 0.9708809436048654, "grad_norm": 0.5879634585933023, "learning_rate": 7.742273086913609e-08, "loss": 0.4815, "step": 7902 }, { "epoch": 0.9710038088217225, "grad_norm": 0.42347067341022915, "learning_rate": 7.677130959872713e-08, "loss": 0.4227, "step": 7903 }, { "epoch": 0.9711266740385797, "grad_norm": 0.31149915939696565, "learning_rate": 7.612263335945724e-08, "loss": 0.3642, "step": 7904 }, { "epoch": 0.9712495392554368, "grad_norm": 0.3721897684188028, "learning_rate": 7.547670227064263e-08, "loss": 0.3629, "step": 7905 }, { "epoch": 0.9713724044722939, "grad_norm": 0.32215080698016435, "learning_rate": 7.483351645109993e-08, "loss": 0.3288, "step": 7906 }, { "epoch": 0.971495269689151, "grad_norm": 0.3580815112746732, "learning_rate": 7.41930760191395e-08, "loss": 0.3403, "step": 7907 }, { "epoch": 0.9716181349060081, "grad_norm": 0.37527372215386673, "learning_rate": 7.355538109256377e-08, "loss": 0.4054, "step": 7908 }, { "epoch": 0.9717410001228652, "grad_norm": 0.35792141903288227, "learning_rate": 7.292043178867558e-08, "loss": 0.3721, "step": 7909 }, { "epoch": 0.9718638653397224, "grad_norm": 0.37111128789854725, "learning_rate": 7.228822822426817e-08, "loss": 0.4818, "step": 7910 }, { "epoch": 0.9719867305565795, "grad_norm": 0.2989709327649704, "learning_rate": 7.165877051563186e-08, "loss": 0.3729, "step": 7911 }, { "epoch": 0.9721095957734366, "grad_norm": 0.3387152552309802, "learning_rate": 7.103205877855067e-08, "loss": 0.3096, "step": 7912 }, { "epoch": 0.9722324609902937, "grad_norm": 0.381566220371336, "learning_rate": 7.040809312830576e-08, "loss": 0.3578, "step": 7913 }, { "epoch": 0.9723553262071507, "grad_norm": 0.30646235024247165, "learning_rate": 6.978687367966862e-08, "loss": 0.4166, "step": 7914 }, { "epoch": 0.9724781914240078, "grad_norm": 0.3848009862397392, "learning_rate": 6.91684005469112e-08, "loss": 0.5262, "step": 7915 }, { "epoch": 0.9726010566408649, "grad_norm": 0.35559979188223884, "learning_rate": 6.855267384379582e-08, "loss": 0.422, "step": 7916 }, { "epoch": 0.972723921857722, "grad_norm": 0.34444931244391797, "learning_rate": 6.793969368358355e-08, "loss": 0.3646, "step": 7917 }, { "epoch": 0.9728467870745792, "grad_norm": 0.34792355747510484, "learning_rate": 6.732946017902586e-08, "loss": 0.3792, "step": 7918 }, { "epoch": 0.9729696522914363, "grad_norm": 0.3670870100023742, "learning_rate": 6.672197344237296e-08, "loss": 0.4305, "step": 7919 }, { "epoch": 0.9730925175082934, "grad_norm": 0.3881761359259171, "learning_rate": 6.611723358536547e-08, "loss": 0.3883, "step": 7920 }, { "epoch": 0.9732153827251505, "grad_norm": 0.3037137342604485, "learning_rate": 6.551524071924442e-08, "loss": 0.429, "step": 7921 }, { "epoch": 0.9733382479420076, "grad_norm": 0.3399100448300429, "learning_rate": 6.491599495474288e-08, "loss": 0.3482, "step": 7922 }, { "epoch": 0.9734611131588647, "grad_norm": 0.3793140313485767, "learning_rate": 6.431949640208434e-08, "loss": 0.3855, "step": 7923 }, { "epoch": 0.9735839783757219, "grad_norm": 0.2888522914645577, "learning_rate": 6.372574517099439e-08, "loss": 0.4034, "step": 7924 }, { "epoch": 0.973706843592579, "grad_norm": 0.33094793325275207, "learning_rate": 6.313474137068731e-08, "loss": 0.3957, "step": 7925 }, { "epoch": 0.9738297088094361, "grad_norm": 0.356776286705828, "learning_rate": 6.254648510987616e-08, "loss": 0.4395, "step": 7926 }, { "epoch": 0.9739525740262932, "grad_norm": 0.4070868558544545, "learning_rate": 6.196097649676768e-08, "loss": 0.3432, "step": 7927 }, { "epoch": 0.9740754392431502, "grad_norm": 0.3149936354701606, "learning_rate": 6.13782156390591e-08, "loss": 0.301, "step": 7928 }, { "epoch": 0.9741983044600073, "grad_norm": 0.3567118666420545, "learning_rate": 6.079820264394797e-08, "loss": 0.4279, "step": 7929 }, { "epoch": 0.9743211696768644, "grad_norm": 0.3009885964846208, "learning_rate": 6.022093761812398e-08, "loss": 0.3363, "step": 7930 }, { "epoch": 0.9744440348937216, "grad_norm": 0.33082398803882573, "learning_rate": 5.964642066776882e-08, "loss": 0.3933, "step": 7931 }, { "epoch": 0.9745669001105787, "grad_norm": 0.3685098315565267, "learning_rate": 5.907465189856465e-08, "loss": 0.3559, "step": 7932 }, { "epoch": 0.9746897653274358, "grad_norm": 0.35017333961499164, "learning_rate": 5.8505631415682325e-08, "loss": 0.35, "step": 7933 }, { "epoch": 0.9748126305442929, "grad_norm": 0.3699373536447334, "learning_rate": 5.7939359323791465e-08, "loss": 0.4255, "step": 7934 }, { "epoch": 0.97493549576115, "grad_norm": 0.34550852369625046, "learning_rate": 5.737583572705041e-08, "loss": 0.4459, "step": 7935 }, { "epoch": 0.9750583609780071, "grad_norm": 0.3413622647624132, "learning_rate": 5.681506072911957e-08, "loss": 0.4144, "step": 7936 }, { "epoch": 0.9751812261948642, "grad_norm": 0.3400467634699136, "learning_rate": 5.6257034433148115e-08, "loss": 0.3095, "step": 7937 }, { "epoch": 0.9753040914117214, "grad_norm": 0.3408000681755815, "learning_rate": 5.570175694178226e-08, "loss": 0.4122, "step": 7938 }, { "epoch": 0.9754269566285785, "grad_norm": 0.32686425878269676, "learning_rate": 5.5149228357160296e-08, "loss": 0.4488, "step": 7939 }, { "epoch": 0.9755498218454356, "grad_norm": 0.3586351014767006, "learning_rate": 5.459944878091761e-08, "loss": 0.3977, "step": 7940 }, { "epoch": 0.9756726870622927, "grad_norm": 0.3360703324312274, "learning_rate": 5.405241831418162e-08, "loss": 0.3091, "step": 7941 }, { "epoch": 0.9757955522791498, "grad_norm": 0.29992473015508886, "learning_rate": 5.350813705757518e-08, "loss": 0.4094, "step": 7942 }, { "epoch": 0.9759184174960068, "grad_norm": 0.33314757820994084, "learning_rate": 5.2966605111214874e-08, "loss": 0.314, "step": 7943 }, { "epoch": 0.976041282712864, "grad_norm": 0.3796035262826097, "learning_rate": 5.242782257471268e-08, "loss": 0.3318, "step": 7944 }, { "epoch": 0.9761641479297211, "grad_norm": 0.312321454396402, "learning_rate": 5.189178954717599e-08, "loss": 0.4067, "step": 7945 }, { "epoch": 0.9762870131465782, "grad_norm": 0.3803669534822824, "learning_rate": 5.135850612720094e-08, "loss": 0.3751, "step": 7946 }, { "epoch": 0.9764098783634353, "grad_norm": 0.41664762478865813, "learning_rate": 5.082797241288406e-08, "loss": 0.463, "step": 7947 }, { "epoch": 0.9765327435802924, "grad_norm": 0.3855725819350649, "learning_rate": 5.030018850181228e-08, "loss": 0.4019, "step": 7948 }, { "epoch": 0.9766556087971495, "grad_norm": 0.3887162586655717, "learning_rate": 4.977515449106962e-08, "loss": 0.3668, "step": 7949 }, { "epoch": 0.9767784740140066, "grad_norm": 0.2956854527929649, "learning_rate": 4.925287047723048e-08, "loss": 0.3802, "step": 7950 }, { "epoch": 0.9769013392308638, "grad_norm": 0.36562313915118627, "learning_rate": 4.8733336556368024e-08, "loss": 0.3421, "step": 7951 }, { "epoch": 0.9770242044477209, "grad_norm": 0.3506701637084288, "learning_rate": 4.82165528240458e-08, "loss": 0.3409, "step": 7952 }, { "epoch": 0.977147069664578, "grad_norm": 0.32603294181961856, "learning_rate": 4.770251937532277e-08, "loss": 0.3475, "step": 7953 }, { "epoch": 0.9772699348814351, "grad_norm": 0.30755881781932315, "learning_rate": 4.719123630475164e-08, "loss": 0.3722, "step": 7954 }, { "epoch": 0.9773928000982922, "grad_norm": 0.4614251089121583, "learning_rate": 4.66827037063805e-08, "loss": 0.4072, "step": 7955 }, { "epoch": 0.9775156653151493, "grad_norm": 0.3056664176601174, "learning_rate": 4.6176921673751204e-08, "loss": 0.4256, "step": 7956 }, { "epoch": 0.9776385305320064, "grad_norm": 0.33125343709663657, "learning_rate": 4.567389029989599e-08, "loss": 0.4174, "step": 7957 }, { "epoch": 0.9777613957488634, "grad_norm": 0.31433135246772187, "learning_rate": 4.517360967734918e-08, "loss": 0.3575, "step": 7958 }, { "epoch": 0.9778842609657206, "grad_norm": 0.4060540236630573, "learning_rate": 4.467607989812883e-08, "loss": 0.4356, "step": 7959 }, { "epoch": 0.9780071261825777, "grad_norm": 0.3111760901177748, "learning_rate": 4.418130105375673e-08, "loss": 0.4436, "step": 7960 }, { "epoch": 0.9781299913994348, "grad_norm": 0.3044227765626895, "learning_rate": 4.368927323524174e-08, "loss": 0.4084, "step": 7961 }, { "epoch": 0.9782528566162919, "grad_norm": 0.37336316756785787, "learning_rate": 4.3199996533089815e-08, "loss": 0.356, "step": 7962 }, { "epoch": 0.978375721833149, "grad_norm": 0.254508765400932, "learning_rate": 4.271347103730061e-08, "loss": 0.418, "step": 7963 }, { "epoch": 0.9784985870500061, "grad_norm": 0.34874999512475824, "learning_rate": 4.222969683736755e-08, "loss": 0.2549, "step": 7964 }, { "epoch": 0.9786214522668633, "grad_norm": 0.34403259579067313, "learning_rate": 4.1748674022276114e-08, "loss": 0.343, "step": 7965 }, { "epoch": 0.9787443174837204, "grad_norm": 0.3705347373142729, "learning_rate": 4.127040268050886e-08, "loss": 0.3902, "step": 7966 }, { "epoch": 0.9788671827005775, "grad_norm": 0.34374245439473955, "learning_rate": 4.0794882900040406e-08, "loss": 0.353, "step": 7967 }, { "epoch": 0.9789900479174346, "grad_norm": 0.3612084546709863, "learning_rate": 4.032211476833914e-08, "loss": 0.3768, "step": 7968 }, { "epoch": 0.9791129131342917, "grad_norm": 0.3808800958868697, "learning_rate": 3.985209837236881e-08, "loss": 0.3612, "step": 7969 }, { "epoch": 0.9792357783511488, "grad_norm": 0.3721115384743983, "learning_rate": 3.93848337985836e-08, "loss": 0.3724, "step": 7970 }, { "epoch": 0.979358643568006, "grad_norm": 0.41796374059696195, "learning_rate": 3.892032113293642e-08, "loss": 0.4872, "step": 7971 }, { "epoch": 0.979481508784863, "grad_norm": 0.39036989465489885, "learning_rate": 3.845856046086893e-08, "loss": 0.3449, "step": 7972 }, { "epoch": 0.9796043740017201, "grad_norm": 0.31111004465334047, "learning_rate": 3.799955186732151e-08, "loss": 0.4043, "step": 7973 }, { "epoch": 0.9797272392185772, "grad_norm": 0.4059945142354921, "learning_rate": 3.7543295436723304e-08, "loss": 0.4315, "step": 7974 }, { "epoch": 0.9798501044354343, "grad_norm": 0.35376624962428843, "learning_rate": 3.7089791253002156e-08, "loss": 0.3546, "step": 7975 }, { "epoch": 0.9799729696522914, "grad_norm": 0.5427598174291471, "learning_rate": 3.6639039399574694e-08, "loss": 0.3814, "step": 7976 }, { "epoch": 0.9800958348691485, "grad_norm": 0.3358302722528453, "learning_rate": 3.6191039959356245e-08, "loss": 0.3911, "step": 7977 }, { "epoch": 0.9802187000860056, "grad_norm": 0.4647879671862071, "learning_rate": 3.574579301475256e-08, "loss": 0.418, "step": 7978 }, { "epoch": 0.9803415653028628, "grad_norm": 0.34935412754294426, "learning_rate": 3.530329864766313e-08, "loss": 0.4103, "step": 7979 }, { "epoch": 0.9804644305197199, "grad_norm": 0.33143262139974017, "learning_rate": 3.4863556939482846e-08, "loss": 0.3671, "step": 7980 }, { "epoch": 0.980587295736577, "grad_norm": 0.30121516887539035, "learning_rate": 3.4426567971097e-08, "loss": 0.4666, "step": 7981 }, { "epoch": 0.9807101609534341, "grad_norm": 0.3438348506185247, "learning_rate": 3.39923318228913e-08, "loss": 0.381, "step": 7982 }, { "epoch": 0.9808330261702912, "grad_norm": 0.40882758086719856, "learning_rate": 3.3560848574736845e-08, "loss": 0.4018, "step": 7983 }, { "epoch": 0.9809558913871483, "grad_norm": 0.35980551093472984, "learning_rate": 3.313211830600349e-08, "loss": 0.4059, "step": 7984 }, { "epoch": 0.9810787566040055, "grad_norm": 0.309190323351825, "learning_rate": 3.270614109555314e-08, "loss": 0.3901, "step": 7985 }, { "epoch": 0.9812016218208626, "grad_norm": 0.3237193599196218, "learning_rate": 3.228291702174313e-08, "loss": 0.3917, "step": 7986 }, { "epoch": 0.9813244870377196, "grad_norm": 0.44104583492250743, "learning_rate": 3.1862446162421176e-08, "loss": 0.4098, "step": 7987 }, { "epoch": 0.9814473522545767, "grad_norm": 0.27722651079885813, "learning_rate": 3.144472859493042e-08, "loss": 0.4285, "step": 7988 }, { "epoch": 0.9815702174714338, "grad_norm": 0.4148488564105364, "learning_rate": 3.1029764396106055e-08, "loss": 0.4644, "step": 7989 }, { "epoch": 0.9816930826882909, "grad_norm": 0.4708020746095509, "learning_rate": 3.061755364228036e-08, "loss": 0.4132, "step": 7990 }, { "epoch": 0.981815947905148, "grad_norm": 0.32515029220132663, "learning_rate": 3.020809640927602e-08, "loss": 0.4067, "step": 7991 }, { "epoch": 0.9819388131220051, "grad_norm": 0.3364718166472625, "learning_rate": 2.9801392772409453e-08, "loss": 0.3695, "step": 7992 }, { "epoch": 0.9820616783388623, "grad_norm": 0.3857668521503032, "learning_rate": 2.9397442806492482e-08, "loss": 0.4036, "step": 7993 }, { "epoch": 0.9821845435557194, "grad_norm": 0.3917288903816071, "learning_rate": 2.8996246585827335e-08, "loss": 0.3078, "step": 7994 }, { "epoch": 0.9823074087725765, "grad_norm": 0.3050157014822867, "learning_rate": 2.859780418421165e-08, "loss": 0.3461, "step": 7995 }, { "epoch": 0.9824302739894336, "grad_norm": 0.3565938881678756, "learning_rate": 2.8202115674938468e-08, "loss": 0.3721, "step": 7996 }, { "epoch": 0.9825531392062907, "grad_norm": 0.34281864385691996, "learning_rate": 2.7809181130789562e-08, "loss": 0.4356, "step": 7997 }, { "epoch": 0.9826760044231478, "grad_norm": 0.378810250123737, "learning_rate": 2.7419000624043787e-08, "loss": 0.4513, "step": 7998 }, { "epoch": 0.982798869640005, "grad_norm": 0.3014145849926525, "learning_rate": 2.7031574226472066e-08, "loss": 0.4327, "step": 7999 }, { "epoch": 0.9829217348568621, "grad_norm": 0.37019560261562245, "learning_rate": 2.6646902009339057e-08, "loss": 0.41, "step": 8000 }, { "epoch": 0.9830446000737191, "grad_norm": 0.3585723209737883, "learning_rate": 2.626498404340316e-08, "loss": 0.3818, "step": 8001 }, { "epoch": 0.9831674652905762, "grad_norm": 0.45254109815829713, "learning_rate": 2.5885820398916516e-08, "loss": 0.3652, "step": 8002 }, { "epoch": 0.9832903305074333, "grad_norm": 0.36217155511251486, "learning_rate": 2.5509411145621665e-08, "loss": 0.3604, "step": 8003 }, { "epoch": 0.9834131957242904, "grad_norm": 0.3744527363054329, "learning_rate": 2.5135756352756555e-08, "loss": 0.3021, "step": 8004 }, { "epoch": 0.9835360609411475, "grad_norm": 0.4045082934020736, "learning_rate": 2.4764856089054544e-08, "loss": 0.3377, "step": 8005 }, { "epoch": 0.9836589261580047, "grad_norm": 0.3096129924698462, "learning_rate": 2.4396710422739387e-08, "loss": 0.3935, "step": 8006 }, { "epoch": 0.9837817913748618, "grad_norm": 0.3741683624162985, "learning_rate": 2.4031319421530252e-08, "loss": 0.401, "step": 8007 }, { "epoch": 0.9839046565917189, "grad_norm": 0.3396597177943626, "learning_rate": 2.366868315263504e-08, "loss": 0.3291, "step": 8008 }, { "epoch": 0.984027521808576, "grad_norm": 0.3622014012612469, "learning_rate": 2.330880168276206e-08, "loss": 0.3948, "step": 8009 }, { "epoch": 0.9841503870254331, "grad_norm": 0.37885062159773736, "learning_rate": 2.2951675078108357e-08, "loss": 0.392, "step": 8010 }, { "epoch": 0.9842732522422902, "grad_norm": 0.43744634796498927, "learning_rate": 2.2597303404363058e-08, "loss": 0.3972, "step": 8011 }, { "epoch": 0.9843961174591473, "grad_norm": 0.3336623921551501, "learning_rate": 2.2245686726712346e-08, "loss": 0.4468, "step": 8012 }, { "epoch": 0.9845189826760045, "grad_norm": 0.3657423704256839, "learning_rate": 2.1896825109834486e-08, "loss": 0.4077, "step": 8013 }, { "epoch": 0.9846418478928616, "grad_norm": 0.3131548868490155, "learning_rate": 2.1550718617898145e-08, "loss": 0.3389, "step": 8014 }, { "epoch": 0.9847647131097187, "grad_norm": 0.37593122854432115, "learning_rate": 2.120736731456907e-08, "loss": 0.3287, "step": 8015 }, { "epoch": 0.9848875783265757, "grad_norm": 0.4149859751141943, "learning_rate": 2.0866771263003403e-08, "loss": 0.3861, "step": 8016 }, { "epoch": 0.9850104435434328, "grad_norm": 0.2907329132370782, "learning_rate": 2.0528930525852697e-08, "loss": 0.4483, "step": 8017 }, { "epoch": 0.9851333087602899, "grad_norm": 0.37859393460023166, "learning_rate": 2.0193845165258906e-08, "loss": 0.4319, "step": 8018 }, { "epoch": 0.985256173977147, "grad_norm": 0.3207745851067326, "learning_rate": 1.9861515242861062e-08, "loss": 0.3752, "step": 8019 }, { "epoch": 0.9853790391940042, "grad_norm": 0.4863318038239508, "learning_rate": 1.953194081978693e-08, "loss": 0.4291, "step": 8020 }, { "epoch": 0.9855019044108613, "grad_norm": 0.3106321449857267, "learning_rate": 1.9205121956661352e-08, "loss": 0.4013, "step": 8021 }, { "epoch": 0.9856247696277184, "grad_norm": 0.3416127401604636, "learning_rate": 1.8881058713599577e-08, "loss": 0.3148, "step": 8022 }, { "epoch": 0.9857476348445755, "grad_norm": 0.39183608601491876, "learning_rate": 1.855975115021058e-08, "loss": 0.3622, "step": 8023 }, { "epoch": 0.9858705000614326, "grad_norm": 0.3304066261959977, "learning_rate": 1.824119932559709e-08, "loss": 0.3509, "step": 8024 }, { "epoch": 0.9859933652782897, "grad_norm": 0.31472579358068925, "learning_rate": 1.792540329835557e-08, "loss": 0.3973, "step": 8025 }, { "epoch": 0.9861162304951469, "grad_norm": 0.29567535606922746, "learning_rate": 1.7612363126572883e-08, "loss": 0.3149, "step": 8026 }, { "epoch": 0.986239095712004, "grad_norm": 0.3962192859254068, "learning_rate": 1.730207886783297e-08, "loss": 0.4411, "step": 8027 }, { "epoch": 0.9863619609288611, "grad_norm": 0.4446478175518599, "learning_rate": 1.699455057920851e-08, "loss": 0.4364, "step": 8028 }, { "epoch": 0.9864848261457182, "grad_norm": 0.3882067257248991, "learning_rate": 1.6689778317269254e-08, "loss": 0.3002, "step": 8029 }, { "epoch": 0.9866076913625752, "grad_norm": 0.3927263639641671, "learning_rate": 1.6387762138075358e-08, "loss": 0.3876, "step": 8030 }, { "epoch": 0.9867305565794323, "grad_norm": 0.41817494434620756, "learning_rate": 1.6088502097179047e-08, "loss": 0.4284, "step": 8031 }, { "epoch": 0.9868534217962894, "grad_norm": 0.4314655315206307, "learning_rate": 1.5791998249629625e-08, "loss": 0.3775, "step": 8032 }, { "epoch": 0.9869762870131465, "grad_norm": 0.4224112322614347, "learning_rate": 1.5498250649965128e-08, "loss": 0.3565, "step": 8033 }, { "epoch": 0.9870991522300037, "grad_norm": 0.34685510291034993, "learning_rate": 1.520725935222067e-08, "loss": 0.3538, "step": 8034 }, { "epoch": 0.9872220174468608, "grad_norm": 0.3449195779968826, "learning_rate": 1.49190244099201e-08, "loss": 0.3367, "step": 8035 }, { "epoch": 0.9873448826637179, "grad_norm": 0.3054011938588327, "learning_rate": 1.4633545876084342e-08, "loss": 0.3973, "step": 8036 }, { "epoch": 0.987467747880575, "grad_norm": 0.38143877873647575, "learning_rate": 1.4350823803224721e-08, "loss": 0.431, "step": 8037 }, { "epoch": 0.9875906130974321, "grad_norm": 0.3179679064872477, "learning_rate": 1.4070858243344641e-08, "loss": 0.3762, "step": 8038 }, { "epoch": 0.9877134783142892, "grad_norm": 0.4399631047494593, "learning_rate": 1.3793649247942909e-08, "loss": 0.3864, "step": 8039 }, { "epoch": 0.9878363435311464, "grad_norm": 0.3321402032144746, "learning_rate": 1.3519196868010398e-08, "loss": 0.3929, "step": 8040 }, { "epoch": 0.9879592087480035, "grad_norm": 0.32336493536518474, "learning_rate": 1.3247501154031727e-08, "loss": 0.3499, "step": 8041 }, { "epoch": 0.9880820739648606, "grad_norm": 0.38275284823511463, "learning_rate": 1.297856215598192e-08, "loss": 0.3675, "step": 8042 }, { "epoch": 0.9882049391817177, "grad_norm": 0.284014668409752, "learning_rate": 1.2712379923331407e-08, "loss": 0.3536, "step": 8043 }, { "epoch": 0.9883278043985748, "grad_norm": 0.3265935752268779, "learning_rate": 1.2448954505042686e-08, "loss": 0.3636, "step": 8044 }, { "epoch": 0.9884506696154318, "grad_norm": 0.39330462777975594, "learning_rate": 1.2188285949571998e-08, "loss": 0.4822, "step": 8045 }, { "epoch": 0.9885735348322889, "grad_norm": 0.3786110500275078, "learning_rate": 1.1930374304865988e-08, "loss": 0.3368, "step": 8046 }, { "epoch": 0.988696400049146, "grad_norm": 0.36801215231624945, "learning_rate": 1.1675219618366706e-08, "loss": 0.4094, "step": 8047 }, { "epoch": 0.9888192652660032, "grad_norm": 0.33048653376537673, "learning_rate": 1.1422821937008276e-08, "loss": 0.4677, "step": 8048 }, { "epoch": 0.9889421304828603, "grad_norm": 0.3964946522045355, "learning_rate": 1.1173181307216896e-08, "loss": 0.4395, "step": 8049 }, { "epoch": 0.9890649956997174, "grad_norm": 0.3051923080352833, "learning_rate": 1.0926297774912497e-08, "loss": 0.3128, "step": 8050 }, { "epoch": 0.9891878609165745, "grad_norm": 0.31401358405573143, "learning_rate": 1.0682171385508755e-08, "loss": 0.3698, "step": 8051 }, { "epoch": 0.9893107261334316, "grad_norm": 0.3810320808985879, "learning_rate": 1.0440802183911414e-08, "loss": 0.39, "step": 8052 }, { "epoch": 0.9894335913502887, "grad_norm": 0.3824862138382765, "learning_rate": 1.0202190214516626e-08, "loss": 0.3973, "step": 8053 }, { "epoch": 0.9895564565671459, "grad_norm": 0.3289352677829501, "learning_rate": 9.966335521215953e-09, "loss": 0.3263, "step": 8054 }, { "epoch": 0.989679321784003, "grad_norm": 0.537752050400215, "learning_rate": 9.733238147394685e-09, "loss": 0.539, "step": 8055 }, { "epoch": 0.9898021870008601, "grad_norm": 0.3228324195161279, "learning_rate": 9.502898135930194e-09, "loss": 0.4474, "step": 8056 }, { "epoch": 0.9899250522177172, "grad_norm": 0.3824803688434605, "learning_rate": 9.275315529188588e-09, "loss": 0.4096, "step": 8057 }, { "epoch": 0.9900479174345743, "grad_norm": 0.2947933795595066, "learning_rate": 9.050490369036379e-09, "loss": 0.4377, "step": 8058 }, { "epoch": 0.9901707826514313, "grad_norm": 0.3520148849156306, "learning_rate": 8.828422696825488e-09, "loss": 0.3595, "step": 8059 }, { "epoch": 0.9902936478682884, "grad_norm": 0.33533506531520646, "learning_rate": 8.609112553406573e-09, "loss": 0.3758, "step": 8060 }, { "epoch": 0.9904165130851456, "grad_norm": 0.37003938675181863, "learning_rate": 8.392559979117365e-09, "loss": 0.4249, "step": 8061 }, { "epoch": 0.9905393783020027, "grad_norm": 0.3156430665685667, "learning_rate": 8.178765013792665e-09, "loss": 0.3498, "step": 8062 }, { "epoch": 0.9906622435188598, "grad_norm": 0.30833569984030784, "learning_rate": 7.967727696761019e-09, "loss": 0.3282, "step": 8063 }, { "epoch": 0.9907851087357169, "grad_norm": 0.35717967563102115, "learning_rate": 7.759448066836373e-09, "loss": 0.5112, "step": 8064 }, { "epoch": 0.990907973952574, "grad_norm": 0.34749996282578977, "learning_rate": 7.553926162334745e-09, "loss": 0.3802, "step": 8065 }, { "epoch": 0.9910308391694311, "grad_norm": 0.3217861017624763, "learning_rate": 7.351162021059232e-09, "loss": 0.3953, "step": 8066 }, { "epoch": 0.9911537043862882, "grad_norm": 0.3475620745880793, "learning_rate": 7.151155680304999e-09, "loss": 0.383, "step": 8067 }, { "epoch": 0.9912765696031454, "grad_norm": 0.32359829527501943, "learning_rate": 6.953907176864283e-09, "loss": 0.3617, "step": 8068 }, { "epoch": 0.9913994348200025, "grad_norm": 0.41350824951857157, "learning_rate": 6.759416547019725e-09, "loss": 0.3882, "step": 8069 }, { "epoch": 0.9915223000368596, "grad_norm": 0.3496611403264738, "learning_rate": 6.567683826546045e-09, "loss": 0.3955, "step": 8070 }, { "epoch": 0.9916451652537167, "grad_norm": 0.28626396986231406, "learning_rate": 6.37870905071003e-09, "loss": 0.3836, "step": 8071 }, { "epoch": 0.9917680304705738, "grad_norm": 0.3589010154525106, "learning_rate": 6.192492254273874e-09, "loss": 0.3682, "step": 8072 }, { "epoch": 0.9918908956874309, "grad_norm": 0.4040208255129239, "learning_rate": 6.009033471491842e-09, "loss": 0.3705, "step": 8073 }, { "epoch": 0.992013760904288, "grad_norm": 0.37296593072842366, "learning_rate": 5.828332736106945e-09, "loss": 0.3971, "step": 8074 }, { "epoch": 0.9921366261211451, "grad_norm": 0.36662034421921114, "learning_rate": 5.650390081359258e-09, "loss": 0.3985, "step": 8075 }, { "epoch": 0.9922594913380022, "grad_norm": 0.428591210725047, "learning_rate": 5.4752055399825975e-09, "loss": 0.4438, "step": 8076 }, { "epoch": 0.9923823565548593, "grad_norm": 0.3123228831715209, "learning_rate": 5.302779144197856e-09, "loss": 0.3632, "step": 8077 }, { "epoch": 0.9925052217717164, "grad_norm": 0.336168828954814, "learning_rate": 5.1331109257229945e-09, "loss": 0.399, "step": 8078 }, { "epoch": 0.9926280869885735, "grad_norm": 0.2617958619699684, "learning_rate": 4.966200915766383e-09, "loss": 0.4192, "step": 8079 }, { "epoch": 0.9927509522054306, "grad_norm": 0.41904292365944296, "learning_rate": 4.802049145031795e-09, "loss": 0.4376, "step": 8080 }, { "epoch": 0.9928738174222878, "grad_norm": 0.38005515309291643, "learning_rate": 4.640655643713409e-09, "loss": 0.3992, "step": 8081 }, { "epoch": 0.9929966826391449, "grad_norm": 0.38706530801927563, "learning_rate": 4.482020441497481e-09, "loss": 0.4192, "step": 8082 }, { "epoch": 0.993119547856002, "grad_norm": 0.374121798214391, "learning_rate": 4.326143567564e-09, "loss": 0.3954, "step": 8083 }, { "epoch": 0.9932424130728591, "grad_norm": 0.3522148835752869, "learning_rate": 4.173025050586699e-09, "loss": 0.3536, "step": 8084 }, { "epoch": 0.9933652782897162, "grad_norm": 0.36285403524649074, "learning_rate": 4.022664918729713e-09, "loss": 0.333, "step": 8085 }, { "epoch": 0.9934881435065733, "grad_norm": 0.3192101107118821, "learning_rate": 3.875063199650919e-09, "loss": 0.3968, "step": 8086 }, { "epoch": 0.9936110087234304, "grad_norm": 0.29603457814648393, "learning_rate": 3.730219920501932e-09, "loss": 0.4043, "step": 8087 }, { "epoch": 0.9937338739402876, "grad_norm": 0.37826895919406006, "learning_rate": 3.5881351079247725e-09, "loss": 0.4058, "step": 8088 }, { "epoch": 0.9938567391571446, "grad_norm": 0.3205228460734893, "learning_rate": 3.448808788053537e-09, "loss": 0.3362, "step": 8089 }, { "epoch": 0.9939796043740017, "grad_norm": 0.4563865562141776, "learning_rate": 3.312240986519388e-09, "loss": 0.4264, "step": 8090 }, { "epoch": 0.9941024695908588, "grad_norm": 0.35824612214150836, "learning_rate": 3.1784317284405675e-09, "loss": 0.3832, "step": 8091 }, { "epoch": 0.9942253348077159, "grad_norm": 0.3235706061763816, "learning_rate": 3.0473810384323843e-09, "loss": 0.3477, "step": 8092 }, { "epoch": 0.994348200024573, "grad_norm": 0.4372130170339096, "learning_rate": 2.9190889406005562e-09, "loss": 0.4929, "step": 8093 }, { "epoch": 0.9944710652414301, "grad_norm": 0.35180989238932586, "learning_rate": 2.7935554585412083e-09, "loss": 0.3786, "step": 8094 }, { "epoch": 0.9945939304582873, "grad_norm": 0.3566783786658112, "learning_rate": 2.6707806153475347e-09, "loss": 0.3961, "step": 8095 }, { "epoch": 0.9947167956751444, "grad_norm": 0.4000726167499439, "learning_rate": 2.5507644336014713e-09, "loss": 0.3339, "step": 8096 }, { "epoch": 0.9948396608920015, "grad_norm": 0.4510142909535837, "learning_rate": 2.4335069353820238e-09, "loss": 0.353, "step": 8097 }, { "epoch": 0.9949625261088586, "grad_norm": 0.36942152837508596, "learning_rate": 2.3190081422569398e-09, "loss": 0.4115, "step": 8098 }, { "epoch": 0.9950853913257157, "grad_norm": 0.38538880948147713, "learning_rate": 2.2072680752843745e-09, "loss": 0.3874, "step": 8099 }, { "epoch": 0.9952082565425728, "grad_norm": 0.38688287025181994, "learning_rate": 2.0982867550228822e-09, "loss": 0.4367, "step": 8100 }, { "epoch": 0.99533112175943, "grad_norm": 0.3943449748056881, "learning_rate": 1.9920642015164305e-09, "loss": 0.4252, "step": 8101 }, { "epoch": 0.9954539869762871, "grad_norm": 0.3290373265111348, "learning_rate": 1.8886004343043885e-09, "loss": 0.4016, "step": 8102 }, { "epoch": 0.9955768521931441, "grad_norm": 0.36005715980528585, "learning_rate": 1.7878954724165342e-09, "loss": 0.3912, "step": 8103 }, { "epoch": 0.9956997174100012, "grad_norm": 0.3316973797509188, "learning_rate": 1.6899493343797146e-09, "loss": 0.33, "step": 8104 }, { "epoch": 0.9958225826268583, "grad_norm": 0.48175743054526493, "learning_rate": 1.5947620382095185e-09, "loss": 0.4711, "step": 8105 }, { "epoch": 0.9959454478437154, "grad_norm": 0.35122663257365483, "learning_rate": 1.5023336014152734e-09, "loss": 0.4783, "step": 8106 }, { "epoch": 0.9960683130605725, "grad_norm": 0.28821503768771217, "learning_rate": 1.412664040996714e-09, "loss": 0.3973, "step": 8107 }, { "epoch": 0.9961911782774296, "grad_norm": 0.3510549328916566, "learning_rate": 1.325753373448979e-09, "loss": 0.3469, "step": 8108 }, { "epoch": 0.9963140434942868, "grad_norm": 0.4320733318349777, "learning_rate": 1.2416016147609454e-09, "loss": 0.3847, "step": 8109 }, { "epoch": 0.9964369087111439, "grad_norm": 0.32817440665625364, "learning_rate": 1.160208780408567e-09, "loss": 0.3712, "step": 8110 }, { "epoch": 0.996559773928001, "grad_norm": 0.40482644267003265, "learning_rate": 1.0815748853648666e-09, "loss": 0.4788, "step": 8111 }, { "epoch": 0.9966826391448581, "grad_norm": 0.3683840816795728, "learning_rate": 1.00569994409494e-09, "loss": 0.3052, "step": 8112 }, { "epoch": 0.9968055043617152, "grad_norm": 0.2968368639977592, "learning_rate": 9.325839705542904e-10, "loss": 0.3628, "step": 8113 }, { "epoch": 0.9969283695785723, "grad_norm": 0.7185239683207872, "learning_rate": 8.622269781921599e-10, "loss": 0.4734, "step": 8114 }, { "epoch": 0.9970512347954295, "grad_norm": 0.358663690870794, "learning_rate": 7.946289799515282e-10, "loss": 0.4586, "step": 8115 }, { "epoch": 0.9971741000122866, "grad_norm": 0.3610813783433356, "learning_rate": 7.297899882641179e-10, "loss": 0.3727, "step": 8116 }, { "epoch": 0.9972969652291437, "grad_norm": 0.3801796067777575, "learning_rate": 6.677100150587201e-10, "loss": 0.3293, "step": 8117 }, { "epoch": 0.9974198304460007, "grad_norm": 0.3794473345066764, "learning_rate": 6.083890717545337e-10, "loss": 0.341, "step": 8118 }, { "epoch": 0.9975426956628578, "grad_norm": 0.3540233234663647, "learning_rate": 5.518271692628308e-10, "loss": 0.3832, "step": 8119 }, { "epoch": 0.9976655608797149, "grad_norm": 0.4694404838480922, "learning_rate": 4.980243179869559e-10, "loss": 0.4606, "step": 8120 }, { "epoch": 0.997788426096572, "grad_norm": 0.44118932004370387, "learning_rate": 4.4698052782399244e-10, "loss": 0.3586, "step": 8121 }, { "epoch": 0.9979112913134291, "grad_norm": 0.4824051200048354, "learning_rate": 3.986958081647618e-10, "loss": 0.4365, "step": 8122 }, { "epoch": 0.9980341565302863, "grad_norm": 0.3055080078555921, "learning_rate": 3.5317016788882773e-10, "loss": 0.3787, "step": 8123 }, { "epoch": 0.9981570217471434, "grad_norm": 0.3920696425630348, "learning_rate": 3.1040361536949223e-10, "loss": 0.3642, "step": 8124 }, { "epoch": 0.9982798869640005, "grad_norm": 0.34876492077485804, "learning_rate": 2.703961584771264e-10, "loss": 0.3613, "step": 8125 }, { "epoch": 0.9984027521808576, "grad_norm": 0.5053680449995387, "learning_rate": 2.331478045691782e-10, "loss": 0.4937, "step": 8126 }, { "epoch": 0.9985256173977147, "grad_norm": 0.34631019943251906, "learning_rate": 1.986585604951685e-10, "loss": 0.4024, "step": 8127 }, { "epoch": 0.9986484826145718, "grad_norm": 0.39123609133008513, "learning_rate": 1.6692843260168734e-10, "loss": 0.4258, "step": 8128 }, { "epoch": 0.998771347831429, "grad_norm": 0.31726736638054714, "learning_rate": 1.3795742672406687e-10, "loss": 0.3818, "step": 8129 }, { "epoch": 0.9988942130482861, "grad_norm": 0.38097406553576346, "learning_rate": 1.1174554819137761e-10, "loss": 0.3489, "step": 8130 }, { "epoch": 0.9990170782651432, "grad_norm": 0.32486757953797774, "learning_rate": 8.82928018264284e-11, "loss": 0.3683, "step": 8131 }, { "epoch": 0.9991399434820002, "grad_norm": 0.3777505700364591, "learning_rate": 6.759919194077036e-11, "loss": 0.3752, "step": 8132 }, { "epoch": 0.9992628086988573, "grad_norm": 0.3471148243684579, "learning_rate": 4.966472234302355e-11, "loss": 0.42, "step": 8133 }, { "epoch": 0.9993856739157144, "grad_norm": 0.33857274136223625, "learning_rate": 3.4489396332215705e-11, "loss": 0.3953, "step": 8134 }, { "epoch": 0.9995085391325715, "grad_norm": 0.5616143743523302, "learning_rate": 2.2073216697782174e-11, "loss": 0.4364, "step": 8135 }, { "epoch": 0.9996314043494287, "grad_norm": 0.298812297582966, "learning_rate": 1.2416185724561935e-11, "loss": 0.3263, "step": 8136 }, { "epoch": 0.9997542695662858, "grad_norm": 0.3048193114273076, "learning_rate": 5.518305189466944e-12, "loss": 0.3718, "step": 8137 }, { "epoch": 0.9998771347831429, "grad_norm": 0.34733858703079024, "learning_rate": 1.3795763614821155e-12, "loss": 0.3759, "step": 8138 }, { "epoch": 1.0, "grad_norm": 0.3714773308130186, "learning_rate": 0.0, "loss": 0.3561, "step": 8139 } ], "logging_steps": 1, "max_steps": 8139, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 340316098934784.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }