{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0286513694778567, "eval_steps": 500, "global_step": 18400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016591314446887055, "grad_norm": 85.19542448446617, "learning_rate": 3.0000000000000004e-08, "loss": 8.5863, "step": 1 }, { "epoch": 0.0003318262889377411, "grad_norm": 82.32409405573297, "learning_rate": 6.000000000000001e-08, "loss": 8.473, "step": 2 }, { "epoch": 0.0004977394334066117, "grad_norm": 87.13591882926195, "learning_rate": 9e-08, "loss": 8.6763, "step": 3 }, { "epoch": 0.0006636525778754822, "grad_norm": 84.18601565903892, "learning_rate": 1.2000000000000002e-07, "loss": 8.4935, "step": 4 }, { "epoch": 0.0008295657223443527, "grad_norm": 84.14667729364332, "learning_rate": 1.5e-07, "loss": 8.5695, "step": 5 }, { "epoch": 0.0009954788668132234, "grad_norm": 84.10731463288944, "learning_rate": 1.8e-07, "loss": 8.5366, "step": 6 }, { "epoch": 0.0011613920112820938, "grad_norm": 83.28625999969219, "learning_rate": 2.1000000000000003e-07, "loss": 8.5134, "step": 7 }, { "epoch": 0.0013273051557509644, "grad_norm": 84.44997731146951, "learning_rate": 2.4000000000000003e-07, "loss": 8.5326, "step": 8 }, { "epoch": 0.0014932183002198348, "grad_norm": 89.14194809970635, "learning_rate": 2.7e-07, "loss": 8.722, "step": 9 }, { "epoch": 0.0016591314446887055, "grad_norm": 84.83466343564557, "learning_rate": 3e-07, "loss": 8.555, "step": 10 }, { "epoch": 0.001825044589157576, "grad_norm": 81.56216222466577, "learning_rate": 3.3e-07, "loss": 8.4491, "step": 11 }, { "epoch": 0.0019909577336264467, "grad_norm": 86.81209990901495, "learning_rate": 3.6e-07, "loss": 8.6371, "step": 12 }, { "epoch": 0.002156870878095317, "grad_norm": 85.71908305594017, "learning_rate": 3.9e-07, "loss": 8.5535, "step": 13 }, { "epoch": 0.0023227840225641875, "grad_norm": 86.9623893470603, "learning_rate": 4.2000000000000006e-07, "loss": 8.6119, "step": 14 }, { "epoch": 0.002488697167033058, "grad_norm": 82.49495302426142, "learning_rate": 4.5000000000000003e-07, "loss": 8.441, "step": 15 }, { "epoch": 0.002654610311501929, "grad_norm": 81.33695392158589, "learning_rate": 4.800000000000001e-07, "loss": 8.4148, "step": 16 }, { "epoch": 0.0028205234559707994, "grad_norm": 84.88167396340097, "learning_rate": 5.1e-07, "loss": 8.5835, "step": 17 }, { "epoch": 0.0029864366004396696, "grad_norm": 79.05682174134483, "learning_rate": 5.4e-07, "loss": 8.3251, "step": 18 }, { "epoch": 0.0031523497449085403, "grad_norm": 78.38741271121032, "learning_rate": 5.7e-07, "loss": 8.3005, "step": 19 }, { "epoch": 0.003318262889377411, "grad_norm": 82.4891623112013, "learning_rate": 6e-07, "loss": 8.4621, "step": 20 }, { "epoch": 0.0034841760338462815, "grad_norm": 79.1659385995681, "learning_rate": 6.300000000000001e-07, "loss": 8.3031, "step": 21 }, { "epoch": 0.003650089178315152, "grad_norm": 77.06598892419889, "learning_rate": 6.6e-07, "loss": 8.2303, "step": 22 }, { "epoch": 0.0038160023227840224, "grad_norm": 82.36739641908615, "learning_rate": 6.9e-07, "loss": 8.2115, "step": 23 }, { "epoch": 0.003981915467252893, "grad_norm": 76.36757247944843, "learning_rate": 7.2e-07, "loss": 8.1456, "step": 24 }, { "epoch": 0.004147828611721764, "grad_norm": 68.98126779127293, "learning_rate": 7.5e-07, "loss": 7.8079, "step": 25 }, { "epoch": 0.004313741756190634, "grad_norm": 69.00876196915596, "learning_rate": 7.8e-07, "loss": 7.8411, "step": 26 }, { "epoch": 0.004479654900659505, "grad_norm": 67.5391863634817, "learning_rate": 8.1e-07, "loss": 7.7531, "step": 27 }, { "epoch": 0.004645568045128375, "grad_norm": 68.05473702316927, "learning_rate": 8.400000000000001e-07, "loss": 7.7816, "step": 28 }, { "epoch": 0.004811481189597246, "grad_norm": 67.38920732223586, "learning_rate": 8.7e-07, "loss": 7.7137, "step": 29 }, { "epoch": 0.004977394334066116, "grad_norm": 66.22135170564259, "learning_rate": 9.000000000000001e-07, "loss": 7.6523, "step": 30 }, { "epoch": 0.0051433074785349866, "grad_norm": 65.24262292310318, "learning_rate": 9.3e-07, "loss": 7.6215, "step": 31 }, { "epoch": 0.005309220623003858, "grad_norm": 66.75573857246084, "learning_rate": 9.600000000000001e-07, "loss": 7.6052, "step": 32 }, { "epoch": 0.005475133767472728, "grad_norm": 66.14125778432644, "learning_rate": 9.9e-07, "loss": 7.4641, "step": 33 }, { "epoch": 0.005641046911941599, "grad_norm": 62.44214008796182, "learning_rate": 1.02e-06, "loss": 7.0586, "step": 34 }, { "epoch": 0.005806960056410469, "grad_norm": 59.29700311739906, "learning_rate": 1.0500000000000001e-06, "loss": 6.8323, "step": 35 }, { "epoch": 0.005972873200879339, "grad_norm": 59.53983946181781, "learning_rate": 1.08e-06, "loss": 6.8458, "step": 36 }, { "epoch": 0.00613878634534821, "grad_norm": 58.40955787137384, "learning_rate": 1.11e-06, "loss": 6.749, "step": 37 }, { "epoch": 0.0063046994898170805, "grad_norm": 58.46669985481344, "learning_rate": 1.14e-06, "loss": 6.7341, "step": 38 }, { "epoch": 0.006470612634285952, "grad_norm": 57.85754721296723, "learning_rate": 1.17e-06, "loss": 6.679, "step": 39 }, { "epoch": 0.006636525778754822, "grad_norm": 61.03757307715009, "learning_rate": 1.2e-06, "loss": 6.6835, "step": 40 }, { "epoch": 0.006802438923223692, "grad_norm": 58.679250167783295, "learning_rate": 1.2299999999999999e-06, "loss": 6.6456, "step": 41 }, { "epoch": 0.006968352067692563, "grad_norm": 58.420472121350315, "learning_rate": 1.2600000000000002e-06, "loss": 6.5997, "step": 42 }, { "epoch": 0.007134265212161433, "grad_norm": 57.989410831374606, "learning_rate": 1.2900000000000001e-06, "loss": 6.477, "step": 43 }, { "epoch": 0.007300178356630304, "grad_norm": 57.29419183703238, "learning_rate": 1.32e-06, "loss": 6.3482, "step": 44 }, { "epoch": 0.0074660915010991745, "grad_norm": 57.64245355217973, "learning_rate": 1.35e-06, "loss": 6.2579, "step": 45 }, { "epoch": 0.007632004645568045, "grad_norm": 56.91570071531264, "learning_rate": 1.38e-06, "loss": 6.1127, "step": 46 }, { "epoch": 0.007797917790036916, "grad_norm": 57.511790120005976, "learning_rate": 1.41e-06, "loss": 6.0203, "step": 47 }, { "epoch": 0.007963830934505787, "grad_norm": 58.13021570410771, "learning_rate": 1.44e-06, "loss": 5.9262, "step": 48 }, { "epoch": 0.008129744078974656, "grad_norm": 57.938125335380285, "learning_rate": 1.4700000000000001e-06, "loss": 5.8124, "step": 49 }, { "epoch": 0.008295657223443527, "grad_norm": 57.39264712573751, "learning_rate": 1.5e-06, "loss": 5.6758, "step": 50 }, { "epoch": 0.008461570367912398, "grad_norm": 57.79052254778045, "learning_rate": 1.5300000000000002e-06, "loss": 5.6168, "step": 51 }, { "epoch": 0.008627483512381268, "grad_norm": 58.18250706411147, "learning_rate": 1.56e-06, "loss": 5.527, "step": 52 }, { "epoch": 0.008793396656850139, "grad_norm": 56.91519882334982, "learning_rate": 1.59e-06, "loss": 5.4117, "step": 53 }, { "epoch": 0.00895930980131901, "grad_norm": 58.80496017496223, "learning_rate": 1.62e-06, "loss": 5.3978, "step": 54 }, { "epoch": 0.00912522294578788, "grad_norm": 57.514939390872016, "learning_rate": 1.6499999999999999e-06, "loss": 5.2806, "step": 55 }, { "epoch": 0.00929113609025675, "grad_norm": 58.261230393043675, "learning_rate": 1.6800000000000002e-06, "loss": 5.2305, "step": 56 }, { "epoch": 0.009457049234725621, "grad_norm": 58.853719732787475, "learning_rate": 1.7100000000000001e-06, "loss": 5.1747, "step": 57 }, { "epoch": 0.009622962379194492, "grad_norm": 58.44206227327712, "learning_rate": 1.74e-06, "loss": 5.0768, "step": 58 }, { "epoch": 0.009788875523663362, "grad_norm": 58.39912900993697, "learning_rate": 1.77e-06, "loss": 4.9874, "step": 59 }, { "epoch": 0.009954788668132233, "grad_norm": 58.19859876404236, "learning_rate": 1.8000000000000001e-06, "loss": 4.8936, "step": 60 }, { "epoch": 0.010120701812601104, "grad_norm": 58.931458451778994, "learning_rate": 1.83e-06, "loss": 4.8333, "step": 61 }, { "epoch": 0.010286614957069973, "grad_norm": 58.566598779309345, "learning_rate": 1.86e-06, "loss": 4.7316, "step": 62 }, { "epoch": 0.010452528101538844, "grad_norm": 59.43204190036222, "learning_rate": 1.89e-06, "loss": 4.6782, "step": 63 }, { "epoch": 0.010618441246007715, "grad_norm": 59.32237678351381, "learning_rate": 1.9200000000000003e-06, "loss": 4.5875, "step": 64 }, { "epoch": 0.010784354390476586, "grad_norm": 58.22281010507284, "learning_rate": 1.95e-06, "loss": 4.4805, "step": 65 }, { "epoch": 0.010950267534945456, "grad_norm": 58.89565065173638, "learning_rate": 1.98e-06, "loss": 4.4417, "step": 66 }, { "epoch": 0.011116180679414327, "grad_norm": 59.219564213854724, "learning_rate": 2.01e-06, "loss": 4.3646, "step": 67 }, { "epoch": 0.011282093823883198, "grad_norm": 60.084020905213414, "learning_rate": 2.04e-06, "loss": 4.3387, "step": 68 }, { "epoch": 0.011448006968352067, "grad_norm": 59.930063371072634, "learning_rate": 2.07e-06, "loss": 4.2256, "step": 69 }, { "epoch": 0.011613920112820938, "grad_norm": 59.443610975279206, "learning_rate": 2.1000000000000002e-06, "loss": 4.1584, "step": 70 }, { "epoch": 0.01177983325728981, "grad_norm": 60.46768457622464, "learning_rate": 2.13e-06, "loss": 4.1046, "step": 71 }, { "epoch": 0.011945746401758679, "grad_norm": 60.923838757674325, "learning_rate": 2.16e-06, "loss": 4.0402, "step": 72 }, { "epoch": 0.01211165954622755, "grad_norm": 60.56706226907599, "learning_rate": 2.19e-06, "loss": 3.9675, "step": 73 }, { "epoch": 0.01227757269069642, "grad_norm": 60.52251526885924, "learning_rate": 2.22e-06, "loss": 3.934, "step": 74 }, { "epoch": 0.012443485835165292, "grad_norm": 60.64687995134201, "learning_rate": 2.25e-06, "loss": 3.8682, "step": 75 }, { "epoch": 0.012609398979634161, "grad_norm": 59.74345013299483, "learning_rate": 2.28e-06, "loss": 3.7878, "step": 76 }, { "epoch": 0.012775312124103032, "grad_norm": 62.7371580672841, "learning_rate": 2.31e-06, "loss": 3.7883, "step": 77 }, { "epoch": 0.012941225268571903, "grad_norm": 61.120581572512556, "learning_rate": 2.34e-06, "loss": 3.6993, "step": 78 }, { "epoch": 0.013107138413040773, "grad_norm": 61.57800519844952, "learning_rate": 2.3699999999999998e-06, "loss": 3.6369, "step": 79 }, { "epoch": 0.013273051557509644, "grad_norm": 61.54876895013541, "learning_rate": 2.4e-06, "loss": 3.5785, "step": 80 }, { "epoch": 0.013438964701978515, "grad_norm": 60.1358841900405, "learning_rate": 2.43e-06, "loss": 3.514, "step": 81 }, { "epoch": 0.013604877846447384, "grad_norm": 61.297281772956765, "learning_rate": 2.4599999999999997e-06, "loss": 3.4742, "step": 82 }, { "epoch": 0.013770790990916255, "grad_norm": 61.30923571729013, "learning_rate": 2.4900000000000003e-06, "loss": 3.4433, "step": 83 }, { "epoch": 0.013936704135385126, "grad_norm": 61.23947604523023, "learning_rate": 2.5200000000000004e-06, "loss": 3.3705, "step": 84 }, { "epoch": 0.014102617279853997, "grad_norm": 62.39595603092295, "learning_rate": 2.55e-06, "loss": 3.3502, "step": 85 }, { "epoch": 0.014268530424322867, "grad_norm": 61.53318479023394, "learning_rate": 2.5800000000000003e-06, "loss": 3.2943, "step": 86 }, { "epoch": 0.014434443568791738, "grad_norm": 61.212619109385486, "learning_rate": 2.61e-06, "loss": 3.2354, "step": 87 }, { "epoch": 0.014600356713260609, "grad_norm": 60.68820969945481, "learning_rate": 2.64e-06, "loss": 3.1867, "step": 88 }, { "epoch": 0.014766269857729478, "grad_norm": 61.161255238888366, "learning_rate": 2.6700000000000003e-06, "loss": 3.1363, "step": 89 }, { "epoch": 0.014932183002198349, "grad_norm": 61.39181889008794, "learning_rate": 2.7e-06, "loss": 3.106, "step": 90 }, { "epoch": 0.01509809614666722, "grad_norm": 61.510960534577144, "learning_rate": 2.73e-06, "loss": 3.0709, "step": 91 }, { "epoch": 0.01526400929113609, "grad_norm": 61.403101355512064, "learning_rate": 2.76e-06, "loss": 3.0295, "step": 92 }, { "epoch": 0.01542992243560496, "grad_norm": 60.92480004727436, "learning_rate": 2.79e-06, "loss": 2.9853, "step": 93 }, { "epoch": 0.015595835580073832, "grad_norm": 60.38494778358348, "learning_rate": 2.82e-06, "loss": 2.9374, "step": 94 }, { "epoch": 0.015761748724542703, "grad_norm": 61.24204912886601, "learning_rate": 2.85e-06, "loss": 2.9142, "step": 95 }, { "epoch": 0.015927661869011574, "grad_norm": 61.79448720849728, "learning_rate": 2.88e-06, "loss": 2.8738, "step": 96 }, { "epoch": 0.01609357501348044, "grad_norm": 60.44596322113541, "learning_rate": 2.9099999999999997e-06, "loss": 2.8393, "step": 97 }, { "epoch": 0.016259488157949312, "grad_norm": 61.69496692909939, "learning_rate": 2.9400000000000002e-06, "loss": 2.8091, "step": 98 }, { "epoch": 0.016425401302418183, "grad_norm": 61.042506818121446, "learning_rate": 2.9700000000000004e-06, "loss": 2.7632, "step": 99 }, { "epoch": 0.016591314446887055, "grad_norm": 59.83708084484388, "learning_rate": 3e-06, "loss": 2.7211, "step": 100 }, { "epoch": 0.016757227591355926, "grad_norm": 60.76096104413392, "learning_rate": 3.0300000000000002e-06, "loss": 2.6877, "step": 101 }, { "epoch": 0.016923140735824797, "grad_norm": 60.503095248644414, "learning_rate": 3.0600000000000003e-06, "loss": 2.6464, "step": 102 }, { "epoch": 0.017089053880293668, "grad_norm": 60.022580078973974, "learning_rate": 3.09e-06, "loss": 2.6246, "step": 103 }, { "epoch": 0.017254967024762535, "grad_norm": 60.7786406432538, "learning_rate": 3.12e-06, "loss": 2.5835, "step": 104 }, { "epoch": 0.017420880169231406, "grad_norm": 60.0134194819714, "learning_rate": 3.15e-06, "loss": 2.548, "step": 105 }, { "epoch": 0.017586793313700277, "grad_norm": 59.59173672650453, "learning_rate": 3.18e-06, "loss": 2.5352, "step": 106 }, { "epoch": 0.01775270645816915, "grad_norm": 59.365953677840324, "learning_rate": 3.21e-06, "loss": 2.5026, "step": 107 }, { "epoch": 0.01791861960263802, "grad_norm": 59.03666897099401, "learning_rate": 3.24e-06, "loss": 2.4604, "step": 108 }, { "epoch": 0.01808453274710689, "grad_norm": 58.7798482726752, "learning_rate": 3.27e-06, "loss": 2.4486, "step": 109 }, { "epoch": 0.01825044589157576, "grad_norm": 58.114483374139134, "learning_rate": 3.2999999999999997e-06, "loss": 2.4179, "step": 110 }, { "epoch": 0.01841635903604463, "grad_norm": 59.80520961291876, "learning_rate": 3.33e-06, "loss": 2.392, "step": 111 }, { "epoch": 0.0185822721805135, "grad_norm": 58.14478216556001, "learning_rate": 3.3600000000000004e-06, "loss": 2.3406, "step": 112 }, { "epoch": 0.01874818532498237, "grad_norm": 59.18483586241932, "learning_rate": 3.3899999999999997e-06, "loss": 2.3231, "step": 113 }, { "epoch": 0.018914098469451242, "grad_norm": 57.28442984470624, "learning_rate": 3.4200000000000003e-06, "loss": 2.2941, "step": 114 }, { "epoch": 0.019080011613920114, "grad_norm": 57.357039771400295, "learning_rate": 3.4500000000000004e-06, "loss": 2.2816, "step": 115 }, { "epoch": 0.019245924758388985, "grad_norm": 57.111232218047554, "learning_rate": 3.48e-06, "loss": 2.2342, "step": 116 }, { "epoch": 0.019411837902857852, "grad_norm": 57.18649743743891, "learning_rate": 3.5100000000000003e-06, "loss": 2.2274, "step": 117 }, { "epoch": 0.019577751047326723, "grad_norm": 56.32726893984759, "learning_rate": 3.54e-06, "loss": 2.188, "step": 118 }, { "epoch": 0.019743664191795594, "grad_norm": 56.11126674497503, "learning_rate": 3.57e-06, "loss": 2.1688, "step": 119 }, { "epoch": 0.019909577336264465, "grad_norm": 56.452361895827636, "learning_rate": 3.6000000000000003e-06, "loss": 2.1257, "step": 120 }, { "epoch": 0.020075490480733336, "grad_norm": 54.87188045608243, "learning_rate": 3.63e-06, "loss": 2.1082, "step": 121 }, { "epoch": 0.020241403625202208, "grad_norm": 55.85330145328899, "learning_rate": 3.66e-06, "loss": 2.1085, "step": 122 }, { "epoch": 0.02040731676967108, "grad_norm": 54.697357042805265, "learning_rate": 3.69e-06, "loss": 2.0609, "step": 123 }, { "epoch": 0.020573229914139946, "grad_norm": 54.39711063453628, "learning_rate": 3.72e-06, "loss": 2.0577, "step": 124 }, { "epoch": 0.020739143058608817, "grad_norm": 54.63336965202357, "learning_rate": 3.75e-06, "loss": 2.0359, "step": 125 }, { "epoch": 0.02090505620307769, "grad_norm": 54.2613857002437, "learning_rate": 3.78e-06, "loss": 2.0223, "step": 126 }, { "epoch": 0.02107096934754656, "grad_norm": 54.55239593692058, "learning_rate": 3.81e-06, "loss": 1.9907, "step": 127 }, { "epoch": 0.02123688249201543, "grad_norm": 54.163024439012574, "learning_rate": 3.8400000000000005e-06, "loss": 1.9572, "step": 128 }, { "epoch": 0.0214027956364843, "grad_norm": 52.39556608669111, "learning_rate": 3.87e-06, "loss": 1.9547, "step": 129 }, { "epoch": 0.021568708780953173, "grad_norm": 52.88053635834076, "learning_rate": 3.9e-06, "loss": 1.9174, "step": 130 }, { "epoch": 0.02173462192542204, "grad_norm": 52.27647350963412, "learning_rate": 3.93e-06, "loss": 1.9085, "step": 131 }, { "epoch": 0.02190053506989091, "grad_norm": 51.8188123723821, "learning_rate": 3.96e-06, "loss": 1.8921, "step": 132 }, { "epoch": 0.022066448214359782, "grad_norm": 51.13956466153071, "learning_rate": 3.99e-06, "loss": 1.8695, "step": 133 }, { "epoch": 0.022232361358828653, "grad_norm": 50.880997478246, "learning_rate": 4.02e-06, "loss": 1.8723, "step": 134 }, { "epoch": 0.022398274503297524, "grad_norm": 50.17800750552662, "learning_rate": 4.05e-06, "loss": 1.8362, "step": 135 }, { "epoch": 0.022564187647766396, "grad_norm": 50.25731092529157, "learning_rate": 4.08e-06, "loss": 1.8273, "step": 136 }, { "epoch": 0.022730100792235263, "grad_norm": 49.557602243937815, "learning_rate": 4.1100000000000005e-06, "loss": 1.7923, "step": 137 }, { "epoch": 0.022896013936704134, "grad_norm": 49.06910514435632, "learning_rate": 4.14e-06, "loss": 1.7823, "step": 138 }, { "epoch": 0.023061927081173005, "grad_norm": 48.124932183722606, "learning_rate": 4.17e-06, "loss": 1.7748, "step": 139 }, { "epoch": 0.023227840225641876, "grad_norm": 48.134667233726226, "learning_rate": 4.2000000000000004e-06, "loss": 1.7639, "step": 140 }, { "epoch": 0.023393753370110747, "grad_norm": 47.557312841261506, "learning_rate": 4.23e-06, "loss": 1.7324, "step": 141 }, { "epoch": 0.02355966651457962, "grad_norm": 47.70768250047664, "learning_rate": 4.26e-06, "loss": 1.7218, "step": 142 }, { "epoch": 0.02372557965904849, "grad_norm": 46.50696652735735, "learning_rate": 4.2900000000000004e-06, "loss": 1.6973, "step": 143 }, { "epoch": 0.023891492803517357, "grad_norm": 46.08588423958666, "learning_rate": 4.32e-06, "loss": 1.6985, "step": 144 }, { "epoch": 0.024057405947986228, "grad_norm": 45.56462043474931, "learning_rate": 4.35e-06, "loss": 1.6685, "step": 145 }, { "epoch": 0.0242233190924551, "grad_norm": 45.97216272149993, "learning_rate": 4.38e-06, "loss": 1.6579, "step": 146 }, { "epoch": 0.02438923223692397, "grad_norm": 45.46498190474296, "learning_rate": 4.41e-06, "loss": 1.64, "step": 147 }, { "epoch": 0.02455514538139284, "grad_norm": 44.854534417908845, "learning_rate": 4.44e-06, "loss": 1.624, "step": 148 }, { "epoch": 0.024721058525861712, "grad_norm": 43.83569267105489, "learning_rate": 4.4699999999999996e-06, "loss": 1.6165, "step": 149 }, { "epoch": 0.024886971670330583, "grad_norm": 43.48566112246643, "learning_rate": 4.5e-06, "loss": 1.5971, "step": 150 }, { "epoch": 0.02505288481479945, "grad_norm": 42.91276059636016, "learning_rate": 4.53e-06, "loss": 1.5792, "step": 151 }, { "epoch": 0.025218797959268322, "grad_norm": 42.606137245138356, "learning_rate": 4.56e-06, "loss": 1.5553, "step": 152 }, { "epoch": 0.025384711103737193, "grad_norm": 41.4393639959328, "learning_rate": 4.59e-06, "loss": 1.5775, "step": 153 }, { "epoch": 0.025550624248206064, "grad_norm": 41.54145609070485, "learning_rate": 4.62e-06, "loss": 1.5479, "step": 154 }, { "epoch": 0.025716537392674935, "grad_norm": 41.887845194447074, "learning_rate": 4.65e-06, "loss": 1.5296, "step": 155 }, { "epoch": 0.025882450537143806, "grad_norm": 40.933178238844356, "learning_rate": 4.68e-06, "loss": 1.5297, "step": 156 }, { "epoch": 0.026048363681612677, "grad_norm": 39.94971992763078, "learning_rate": 4.71e-06, "loss": 1.5151, "step": 157 }, { "epoch": 0.026214276826081545, "grad_norm": 39.35709938410145, "learning_rate": 4.7399999999999995e-06, "loss": 1.5054, "step": 158 }, { "epoch": 0.026380189970550416, "grad_norm": 38.69670975999757, "learning_rate": 4.77e-06, "loss": 1.4814, "step": 159 }, { "epoch": 0.026546103115019287, "grad_norm": 38.62419347835826, "learning_rate": 4.8e-06, "loss": 1.4754, "step": 160 }, { "epoch": 0.02671201625948816, "grad_norm": 37.77998094940736, "learning_rate": 4.8299999999999995e-06, "loss": 1.462, "step": 161 }, { "epoch": 0.02687792940395703, "grad_norm": 38.187533465091185, "learning_rate": 4.86e-06, "loss": 1.4476, "step": 162 }, { "epoch": 0.0270438425484259, "grad_norm": 36.94958450784839, "learning_rate": 4.89e-06, "loss": 1.4472, "step": 163 }, { "epoch": 0.027209755692894768, "grad_norm": 36.82539602249903, "learning_rate": 4.9199999999999995e-06, "loss": 1.4243, "step": 164 }, { "epoch": 0.02737566883736364, "grad_norm": 35.84655716782342, "learning_rate": 4.950000000000001e-06, "loss": 1.4311, "step": 165 }, { "epoch": 0.02754158198183251, "grad_norm": 35.976742015954045, "learning_rate": 4.980000000000001e-06, "loss": 1.4, "step": 166 }, { "epoch": 0.02770749512630138, "grad_norm": 35.117919170195876, "learning_rate": 5.01e-06, "loss": 1.3998, "step": 167 }, { "epoch": 0.027873408270770252, "grad_norm": 35.170733706617995, "learning_rate": 5.040000000000001e-06, "loss": 1.3726, "step": 168 }, { "epoch": 0.028039321415239123, "grad_norm": 34.47083347313095, "learning_rate": 5.070000000000001e-06, "loss": 1.381, "step": 169 }, { "epoch": 0.028205234559707994, "grad_norm": 33.79057750635065, "learning_rate": 5.1e-06, "loss": 1.3674, "step": 170 }, { "epoch": 0.028371147704176862, "grad_norm": 32.91789621932166, "learning_rate": 5.13e-06, "loss": 1.3759, "step": 171 }, { "epoch": 0.028537060848645733, "grad_norm": 33.56814465795624, "learning_rate": 5.1600000000000006e-06, "loss": 1.3391, "step": 172 }, { "epoch": 0.028702973993114604, "grad_norm": 32.6150380329613, "learning_rate": 5.19e-06, "loss": 1.3314, "step": 173 }, { "epoch": 0.028868887137583475, "grad_norm": 31.955882800699367, "learning_rate": 5.22e-06, "loss": 1.3339, "step": 174 }, { "epoch": 0.029034800282052346, "grad_norm": 31.40356236669499, "learning_rate": 5.2500000000000006e-06, "loss": 1.3184, "step": 175 }, { "epoch": 0.029200713426521217, "grad_norm": 30.681143239334805, "learning_rate": 5.28e-06, "loss": 1.3139, "step": 176 }, { "epoch": 0.02936662657099009, "grad_norm": 30.21502637398018, "learning_rate": 5.31e-06, "loss": 1.3107, "step": 177 }, { "epoch": 0.029532539715458956, "grad_norm": 29.92516372692031, "learning_rate": 5.3400000000000005e-06, "loss": 1.3068, "step": 178 }, { "epoch": 0.029698452859927827, "grad_norm": 30.018033153252357, "learning_rate": 5.37e-06, "loss": 1.2877, "step": 179 }, { "epoch": 0.029864366004396698, "grad_norm": 29.63626860215389, "learning_rate": 5.4e-06, "loss": 1.2757, "step": 180 }, { "epoch": 0.03003027914886557, "grad_norm": 29.205952566984713, "learning_rate": 5.4300000000000005e-06, "loss": 1.2624, "step": 181 }, { "epoch": 0.03019619229333444, "grad_norm": 28.323326976872018, "learning_rate": 5.46e-06, "loss": 1.2636, "step": 182 }, { "epoch": 0.03036210543780331, "grad_norm": 28.327391694947394, "learning_rate": 5.49e-06, "loss": 1.2445, "step": 183 }, { "epoch": 0.03052801858227218, "grad_norm": 27.538465732946584, "learning_rate": 5.52e-06, "loss": 1.2553, "step": 184 }, { "epoch": 0.03069393172674105, "grad_norm": 27.86388136213645, "learning_rate": 5.55e-06, "loss": 1.2187, "step": 185 }, { "epoch": 0.03085984487120992, "grad_norm": 27.136153982898104, "learning_rate": 5.58e-06, "loss": 1.2264, "step": 186 }, { "epoch": 0.031025758015678792, "grad_norm": 26.542801786628125, "learning_rate": 5.61e-06, "loss": 1.2271, "step": 187 }, { "epoch": 0.031191671160147663, "grad_norm": 26.229276061707733, "learning_rate": 5.64e-06, "loss": 1.2213, "step": 188 }, { "epoch": 0.031357584304616534, "grad_norm": 25.655150823462403, "learning_rate": 5.67e-06, "loss": 1.2131, "step": 189 }, { "epoch": 0.031523497449085405, "grad_norm": 25.43762363935692, "learning_rate": 5.7e-06, "loss": 1.2009, "step": 190 }, { "epoch": 0.031689410593554276, "grad_norm": 24.606418616190336, "learning_rate": 5.73e-06, "loss": 1.2105, "step": 191 }, { "epoch": 0.03185532373802315, "grad_norm": 24.297360245690335, "learning_rate": 5.76e-06, "loss": 1.1902, "step": 192 }, { "epoch": 0.03202123688249202, "grad_norm": 24.37416138144368, "learning_rate": 5.79e-06, "loss": 1.183, "step": 193 }, { "epoch": 0.03218715002696088, "grad_norm": 23.58889140972946, "learning_rate": 5.819999999999999e-06, "loss": 1.1951, "step": 194 }, { "epoch": 0.032353063171429754, "grad_norm": 23.192738038885675, "learning_rate": 5.850000000000001e-06, "loss": 1.1846, "step": 195 }, { "epoch": 0.032518976315898625, "grad_norm": 23.067808075903493, "learning_rate": 5.8800000000000005e-06, "loss": 1.189, "step": 196 }, { "epoch": 0.032684889460367496, "grad_norm": 22.129640030521287, "learning_rate": 5.909999999999999e-06, "loss": 1.1902, "step": 197 }, { "epoch": 0.03285080260483637, "grad_norm": 22.580085955752832, "learning_rate": 5.940000000000001e-06, "loss": 1.142, "step": 198 }, { "epoch": 0.03301671574930524, "grad_norm": 22.001422436219375, "learning_rate": 5.9700000000000004e-06, "loss": 1.1451, "step": 199 }, { "epoch": 0.03318262889377411, "grad_norm": 21.511559365669424, "learning_rate": 6e-06, "loss": 1.157, "step": 200 }, { "epoch": 0.03334854203824298, "grad_norm": 21.350738678737287, "learning_rate": 6.030000000000001e-06, "loss": 1.1548, "step": 201 }, { "epoch": 0.03351445518271185, "grad_norm": 21.213322714991193, "learning_rate": 6.0600000000000004e-06, "loss": 1.1302, "step": 202 }, { "epoch": 0.03368036832718072, "grad_norm": 20.96284799616787, "learning_rate": 6.09e-06, "loss": 1.13, "step": 203 }, { "epoch": 0.03384628147164959, "grad_norm": 20.239673660306355, "learning_rate": 6.120000000000001e-06, "loss": 1.1364, "step": 204 }, { "epoch": 0.034012194616118464, "grad_norm": 20.140657358498693, "learning_rate": 6.15e-06, "loss": 1.1313, "step": 205 }, { "epoch": 0.034178107760587335, "grad_norm": 19.752608255172916, "learning_rate": 6.18e-06, "loss": 1.1187, "step": 206 }, { "epoch": 0.034344020905056206, "grad_norm": 19.351629911713136, "learning_rate": 6.21e-06, "loss": 1.1194, "step": 207 }, { "epoch": 0.03450993404952507, "grad_norm": 19.1529582255177, "learning_rate": 6.24e-06, "loss": 1.1175, "step": 208 }, { "epoch": 0.03467584719399394, "grad_norm": 18.921466270786127, "learning_rate": 6.27e-06, "loss": 1.1145, "step": 209 }, { "epoch": 0.03484176033846281, "grad_norm": 18.439640178944842, "learning_rate": 6.3e-06, "loss": 1.106, "step": 210 }, { "epoch": 0.035007673482931684, "grad_norm": 18.592213045939108, "learning_rate": 6.33e-06, "loss": 1.0832, "step": 211 }, { "epoch": 0.035173586627400555, "grad_norm": 18.02782578467052, "learning_rate": 6.36e-06, "loss": 1.1019, "step": 212 }, { "epoch": 0.035339499771869426, "grad_norm": 18.0166852155525, "learning_rate": 6.39e-06, "loss": 1.0737, "step": 213 }, { "epoch": 0.0355054129163383, "grad_norm": 17.781239753798864, "learning_rate": 6.42e-06, "loss": 1.0775, "step": 214 }, { "epoch": 0.03567132606080717, "grad_norm": 17.221262259530782, "learning_rate": 6.45e-06, "loss": 1.1112, "step": 215 }, { "epoch": 0.03583723920527604, "grad_norm": 16.865632842715378, "learning_rate": 6.48e-06, "loss": 1.0933, "step": 216 }, { "epoch": 0.03600315234974491, "grad_norm": 16.90995396668004, "learning_rate": 6.51e-06, "loss": 1.0786, "step": 217 }, { "epoch": 0.03616906549421378, "grad_norm": 16.502587446764466, "learning_rate": 6.54e-06, "loss": 1.0824, "step": 218 }, { "epoch": 0.03633497863868265, "grad_norm": 16.396720584098915, "learning_rate": 6.57e-06, "loss": 1.0666, "step": 219 }, { "epoch": 0.03650089178315152, "grad_norm": 16.0958491226786, "learning_rate": 6.5999999999999995e-06, "loss": 1.0587, "step": 220 }, { "epoch": 0.03666680492762039, "grad_norm": 15.781394542209787, "learning_rate": 6.63e-06, "loss": 1.0913, "step": 221 }, { "epoch": 0.03683271807208926, "grad_norm": 15.526150669252083, "learning_rate": 6.66e-06, "loss": 1.0685, "step": 222 }, { "epoch": 0.03699863121655813, "grad_norm": 15.295714129778606, "learning_rate": 6.6899999999999995e-06, "loss": 1.0688, "step": 223 }, { "epoch": 0.037164544361027, "grad_norm": 14.962595843836096, "learning_rate": 6.720000000000001e-06, "loss": 1.0663, "step": 224 }, { "epoch": 0.03733045750549587, "grad_norm": 14.845441469454471, "learning_rate": 6.75e-06, "loss": 1.0487, "step": 225 }, { "epoch": 0.03749637064996474, "grad_norm": 14.669509278037967, "learning_rate": 6.7799999999999995e-06, "loss": 1.058, "step": 226 }, { "epoch": 0.037662283794433614, "grad_norm": 14.550072253222602, "learning_rate": 6.810000000000001e-06, "loss": 1.0425, "step": 227 }, { "epoch": 0.037828196938902485, "grad_norm": 14.382438880655664, "learning_rate": 6.840000000000001e-06, "loss": 1.0367, "step": 228 }, { "epoch": 0.037994110083371356, "grad_norm": 14.274435292405178, "learning_rate": 6.87e-06, "loss": 1.0414, "step": 229 }, { "epoch": 0.03816002322784023, "grad_norm": 14.07020130770808, "learning_rate": 6.900000000000001e-06, "loss": 1.0264, "step": 230 }, { "epoch": 0.0383259363723091, "grad_norm": 13.49479267804676, "learning_rate": 6.9300000000000006e-06, "loss": 1.0438, "step": 231 }, { "epoch": 0.03849184951677797, "grad_norm": 13.554935453999114, "learning_rate": 6.96e-06, "loss": 1.0426, "step": 232 }, { "epoch": 0.03865776266124684, "grad_norm": 13.55923875969836, "learning_rate": 6.99e-06, "loss": 0.9961, "step": 233 }, { "epoch": 0.038823675805715704, "grad_norm": 12.9140401944659, "learning_rate": 7.0200000000000006e-06, "loss": 1.0416, "step": 234 }, { "epoch": 0.038989588950184575, "grad_norm": 13.324650059397225, "learning_rate": 7.05e-06, "loss": 0.9825, "step": 235 }, { "epoch": 0.03915550209465345, "grad_norm": 12.937116536824265, "learning_rate": 7.08e-06, "loss": 1.0103, "step": 236 }, { "epoch": 0.03932141523912232, "grad_norm": 12.744639401075105, "learning_rate": 7.1100000000000005e-06, "loss": 1.0198, "step": 237 }, { "epoch": 0.03948732838359119, "grad_norm": 12.472143264741263, "learning_rate": 7.14e-06, "loss": 1.0117, "step": 238 }, { "epoch": 0.03965324152806006, "grad_norm": 12.428804263701728, "learning_rate": 7.17e-06, "loss": 0.9982, "step": 239 }, { "epoch": 0.03981915467252893, "grad_norm": 12.136592772982283, "learning_rate": 7.2000000000000005e-06, "loss": 1.0175, "step": 240 }, { "epoch": 0.0399850678169978, "grad_norm": 12.053549043031309, "learning_rate": 7.23e-06, "loss": 1.0027, "step": 241 }, { "epoch": 0.04015098096146667, "grad_norm": 11.843388086833002, "learning_rate": 7.26e-06, "loss": 1.0132, "step": 242 }, { "epoch": 0.040316894105935544, "grad_norm": 11.628322273315547, "learning_rate": 7.2900000000000005e-06, "loss": 1.0017, "step": 243 }, { "epoch": 0.040482807250404415, "grad_norm": 11.553787753691592, "learning_rate": 7.32e-06, "loss": 0.9988, "step": 244 }, { "epoch": 0.040648720394873286, "grad_norm": 11.308136922354493, "learning_rate": 7.35e-06, "loss": 1.0267, "step": 245 }, { "epoch": 0.04081463353934216, "grad_norm": 11.038599061997045, "learning_rate": 7.38e-06, "loss": 1.0145, "step": 246 }, { "epoch": 0.04098054668381103, "grad_norm": 11.181030653605738, "learning_rate": 7.41e-06, "loss": 0.9889, "step": 247 }, { "epoch": 0.04114645982827989, "grad_norm": 10.86832281657928, "learning_rate": 7.44e-06, "loss": 0.9852, "step": 248 }, { "epoch": 0.04131237297274876, "grad_norm": 10.554939136339671, "learning_rate": 7.47e-06, "loss": 0.982, "step": 249 }, { "epoch": 0.041478286117217635, "grad_norm": 10.568145863035252, "learning_rate": 7.5e-06, "loss": 0.9821, "step": 250 }, { "epoch": 0.041644199261686506, "grad_norm": 10.308386741012788, "learning_rate": 7.53e-06, "loss": 0.9996, "step": 251 }, { "epoch": 0.04181011240615538, "grad_norm": 10.372751036409273, "learning_rate": 7.56e-06, "loss": 0.9551, "step": 252 }, { "epoch": 0.04197602555062425, "grad_norm": 10.072016333214728, "learning_rate": 7.590000000000001e-06, "loss": 0.9793, "step": 253 }, { "epoch": 0.04214193869509312, "grad_norm": 9.971211572091233, "learning_rate": 7.62e-06, "loss": 0.9783, "step": 254 }, { "epoch": 0.04230785183956199, "grad_norm": 9.805606829989866, "learning_rate": 7.65e-06, "loss": 0.9777, "step": 255 }, { "epoch": 0.04247376498403086, "grad_norm": 9.511714368090708, "learning_rate": 7.680000000000001e-06, "loss": 0.9896, "step": 256 }, { "epoch": 0.04263967812849973, "grad_norm": 9.793257406261617, "learning_rate": 7.71e-06, "loss": 0.9592, "step": 257 }, { "epoch": 0.0428055912729686, "grad_norm": 9.489765564713549, "learning_rate": 7.74e-06, "loss": 0.9677, "step": 258 }, { "epoch": 0.042971504417437474, "grad_norm": 9.522682824223127, "learning_rate": 7.77e-06, "loss": 0.9522, "step": 259 }, { "epoch": 0.043137417561906345, "grad_norm": 9.31316057534312, "learning_rate": 7.8e-06, "loss": 0.9523, "step": 260 }, { "epoch": 0.04330333070637521, "grad_norm": 9.191006657018935, "learning_rate": 7.83e-06, "loss": 0.9491, "step": 261 }, { "epoch": 0.04346924385084408, "grad_norm": 9.005774575637536, "learning_rate": 7.86e-06, "loss": 0.9776, "step": 262 }, { "epoch": 0.04363515699531295, "grad_norm": 8.944376972994409, "learning_rate": 7.89e-06, "loss": 0.9724, "step": 263 }, { "epoch": 0.04380107013978182, "grad_norm": 8.880616489633164, "learning_rate": 7.92e-06, "loss": 0.9509, "step": 264 }, { "epoch": 0.043966983284250694, "grad_norm": 10.295937531650397, "learning_rate": 7.95e-06, "loss": 0.9555, "step": 265 }, { "epoch": 0.044132896428719565, "grad_norm": 8.742470979369012, "learning_rate": 7.98e-06, "loss": 0.9365, "step": 266 }, { "epoch": 0.044298809573188436, "grad_norm": 8.59286130501365, "learning_rate": 8.01e-06, "loss": 0.9479, "step": 267 }, { "epoch": 0.04446472271765731, "grad_norm": 8.522351957971027, "learning_rate": 8.04e-06, "loss": 0.9424, "step": 268 }, { "epoch": 0.04463063586212618, "grad_norm": 8.330738085855412, "learning_rate": 8.069999999999999e-06, "loss": 0.9485, "step": 269 }, { "epoch": 0.04479654900659505, "grad_norm": 8.18707701163648, "learning_rate": 8.1e-06, "loss": 0.9649, "step": 270 }, { "epoch": 0.04496246215106392, "grad_norm": 8.222407203836411, "learning_rate": 8.13e-06, "loss": 0.9468, "step": 271 }, { "epoch": 0.04512837529553279, "grad_norm": 8.094229801443008, "learning_rate": 8.16e-06, "loss": 0.9418, "step": 272 }, { "epoch": 0.04529428844000166, "grad_norm": 7.940256424590958, "learning_rate": 8.190000000000001e-06, "loss": 0.9553, "step": 273 }, { "epoch": 0.045460201584470526, "grad_norm": 7.813320194657039, "learning_rate": 8.220000000000001e-06, "loss": 0.9505, "step": 274 }, { "epoch": 0.0456261147289394, "grad_norm": 7.854878419977774, "learning_rate": 8.25e-06, "loss": 0.9382, "step": 275 }, { "epoch": 0.04579202787340827, "grad_norm": 7.774670072265875, "learning_rate": 8.28e-06, "loss": 0.9364, "step": 276 }, { "epoch": 0.04595794101787714, "grad_norm": 7.567068884700512, "learning_rate": 8.31e-06, "loss": 0.9388, "step": 277 }, { "epoch": 0.04612385416234601, "grad_norm": 7.605183779660865, "learning_rate": 8.34e-06, "loss": 0.9403, "step": 278 }, { "epoch": 0.04628976730681488, "grad_norm": 7.566454167028441, "learning_rate": 8.370000000000001e-06, "loss": 0.9218, "step": 279 }, { "epoch": 0.04645568045128375, "grad_norm": 7.3469120826750265, "learning_rate": 8.400000000000001e-06, "loss": 0.932, "step": 280 }, { "epoch": 0.046621593595752624, "grad_norm": 7.26408565369186, "learning_rate": 8.43e-06, "loss": 0.9332, "step": 281 }, { "epoch": 0.046787506740221495, "grad_norm": 7.126714812653672, "learning_rate": 8.46e-06, "loss": 0.9408, "step": 282 }, { "epoch": 0.046953419884690366, "grad_norm": 7.160875540112923, "learning_rate": 8.49e-06, "loss": 0.9326, "step": 283 }, { "epoch": 0.04711933302915924, "grad_norm": 6.989719202314339, "learning_rate": 8.52e-06, "loss": 0.9258, "step": 284 }, { "epoch": 0.04728524617362811, "grad_norm": 6.897188941855704, "learning_rate": 8.55e-06, "loss": 0.9464, "step": 285 }, { "epoch": 0.04745115931809698, "grad_norm": 6.841146949131663, "learning_rate": 8.580000000000001e-06, "loss": 0.9452, "step": 286 }, { "epoch": 0.04761707246256585, "grad_norm": 6.841546310249179, "learning_rate": 8.61e-06, "loss": 0.9178, "step": 287 }, { "epoch": 0.047782985607034714, "grad_norm": 6.544761686641858, "learning_rate": 8.64e-06, "loss": 0.9603, "step": 288 }, { "epoch": 0.047948898751503585, "grad_norm": 6.614087506733643, "learning_rate": 8.67e-06, "loss": 0.916, "step": 289 }, { "epoch": 0.048114811895972456, "grad_norm": 6.6124048640237945, "learning_rate": 8.7e-06, "loss": 0.9129, "step": 290 }, { "epoch": 0.04828072504044133, "grad_norm": 6.408146101594846, "learning_rate": 8.73e-06, "loss": 0.9225, "step": 291 }, { "epoch": 0.0484466381849102, "grad_norm": 6.502364355003014, "learning_rate": 8.76e-06, "loss": 0.9031, "step": 292 }, { "epoch": 0.04861255132937907, "grad_norm": 6.331848917593811, "learning_rate": 8.79e-06, "loss": 0.9251, "step": 293 }, { "epoch": 0.04877846447384794, "grad_norm": 6.312668466628936, "learning_rate": 8.82e-06, "loss": 0.9046, "step": 294 }, { "epoch": 0.04894437761831681, "grad_norm": 6.120718833347466, "learning_rate": 8.85e-06, "loss": 0.9387, "step": 295 }, { "epoch": 0.04911029076278568, "grad_norm": 6.146017531230814, "learning_rate": 8.88e-06, "loss": 0.9065, "step": 296 }, { "epoch": 0.049276203907254554, "grad_norm": 6.2298783704404475, "learning_rate": 8.91e-06, "loss": 0.8897, "step": 297 }, { "epoch": 0.049442117051723425, "grad_norm": 6.01881306966524, "learning_rate": 8.939999999999999e-06, "loss": 0.9179, "step": 298 }, { "epoch": 0.049608030196192296, "grad_norm": 6.018783760260834, "learning_rate": 8.97e-06, "loss": 0.898, "step": 299 }, { "epoch": 0.04977394334066117, "grad_norm": 5.909905610679987, "learning_rate": 9e-06, "loss": 0.9119, "step": 300 }, { "epoch": 0.04993985648513003, "grad_norm": 5.734121028769744, "learning_rate": 8.999999975052319e-06, "loss": 0.9342, "step": 301 }, { "epoch": 0.0501057696295989, "grad_norm": 5.78200438759076, "learning_rate": 8.999999900209277e-06, "loss": 0.9036, "step": 302 }, { "epoch": 0.05027168277406777, "grad_norm": 5.667277372988322, "learning_rate": 8.999999775470875e-06, "loss": 0.9075, "step": 303 }, { "epoch": 0.050437595918536644, "grad_norm": 5.686905178208374, "learning_rate": 8.999999600837113e-06, "loss": 0.9196, "step": 304 }, { "epoch": 0.050603509063005515, "grad_norm": 5.589145090418338, "learning_rate": 8.999999376307994e-06, "loss": 0.9139, "step": 305 }, { "epoch": 0.050769422207474386, "grad_norm": 5.53466927124937, "learning_rate": 8.99999910188352e-06, "loss": 0.9135, "step": 306 }, { "epoch": 0.05093533535194326, "grad_norm": 9.215054382271381, "learning_rate": 8.999998777563696e-06, "loss": 0.9075, "step": 307 }, { "epoch": 0.05110124849641213, "grad_norm": 5.50940374670424, "learning_rate": 8.99999840334852e-06, "loss": 0.906, "step": 308 }, { "epoch": 0.051267161640881, "grad_norm": 5.315125426110582, "learning_rate": 8.999997979238004e-06, "loss": 0.9362, "step": 309 }, { "epoch": 0.05143307478534987, "grad_norm": 5.456218191120494, "learning_rate": 8.999997505232148e-06, "loss": 0.8974, "step": 310 }, { "epoch": 0.05159898792981874, "grad_norm": 5.3458253139310905, "learning_rate": 8.999996981330957e-06, "loss": 0.9052, "step": 311 }, { "epoch": 0.05176490107428761, "grad_norm": 5.428852344895628, "learning_rate": 8.999996407534438e-06, "loss": 0.8927, "step": 312 }, { "epoch": 0.051930814218756484, "grad_norm": 5.362891578705491, "learning_rate": 8.999995783842599e-06, "loss": 0.8913, "step": 313 }, { "epoch": 0.052096727363225355, "grad_norm": 5.2337978422263385, "learning_rate": 8.999995110255444e-06, "loss": 0.9175, "step": 314 }, { "epoch": 0.05226264050769422, "grad_norm": 5.176887043314268, "learning_rate": 8.999994386772981e-06, "loss": 0.9003, "step": 315 }, { "epoch": 0.05242855365216309, "grad_norm": 5.184446115994752, "learning_rate": 8.999993613395219e-06, "loss": 0.8943, "step": 316 }, { "epoch": 0.05259446679663196, "grad_norm": 5.159248082806763, "learning_rate": 8.999992790122167e-06, "loss": 0.8893, "step": 317 }, { "epoch": 0.05276037994110083, "grad_norm": 5.083647157877446, "learning_rate": 8.999991916953832e-06, "loss": 0.8921, "step": 318 }, { "epoch": 0.0529262930855697, "grad_norm": 5.0793184864339045, "learning_rate": 8.999990993890227e-06, "loss": 0.8891, "step": 319 }, { "epoch": 0.053092206230038574, "grad_norm": 4.969318514135246, "learning_rate": 8.99999002093136e-06, "loss": 0.9008, "step": 320 }, { "epoch": 0.053258119374507445, "grad_norm": 4.866162400058146, "learning_rate": 8.999988998077239e-06, "loss": 0.9074, "step": 321 }, { "epoch": 0.05342403251897632, "grad_norm": 4.871718580190094, "learning_rate": 8.999987925327882e-06, "loss": 0.9105, "step": 322 }, { "epoch": 0.05358994566344519, "grad_norm": 4.950770420419117, "learning_rate": 8.999986802683295e-06, "loss": 0.8792, "step": 323 }, { "epoch": 0.05375585880791406, "grad_norm": 4.8879460632967735, "learning_rate": 8.999985630143494e-06, "loss": 0.8691, "step": 324 }, { "epoch": 0.05392177195238293, "grad_norm": 4.776820682760442, "learning_rate": 8.999984407708489e-06, "loss": 0.9004, "step": 325 }, { "epoch": 0.0540876850968518, "grad_norm": 4.734078577215334, "learning_rate": 8.999983135378296e-06, "loss": 0.8811, "step": 326 }, { "epoch": 0.05425359824132067, "grad_norm": 4.702414692785676, "learning_rate": 8.99998181315293e-06, "loss": 0.9019, "step": 327 }, { "epoch": 0.054419511385789536, "grad_norm": 4.552715310423402, "learning_rate": 8.999980441032402e-06, "loss": 0.9042, "step": 328 }, { "epoch": 0.05458542453025841, "grad_norm": 4.507915660100879, "learning_rate": 8.999979019016731e-06, "loss": 0.9119, "step": 329 }, { "epoch": 0.05475133767472728, "grad_norm": 4.523492938568341, "learning_rate": 8.99997754710593e-06, "loss": 0.8932, "step": 330 }, { "epoch": 0.05491725081919615, "grad_norm": 4.6297463894007365, "learning_rate": 8.999976025300017e-06, "loss": 0.8477, "step": 331 }, { "epoch": 0.05508316396366502, "grad_norm": 4.51195869056777, "learning_rate": 8.999974453599007e-06, "loss": 0.8758, "step": 332 }, { "epoch": 0.05524907710813389, "grad_norm": 4.477645446414166, "learning_rate": 8.99997283200292e-06, "loss": 0.8538, "step": 333 }, { "epoch": 0.05541499025260276, "grad_norm": 4.393730342387632, "learning_rate": 8.999971160511772e-06, "loss": 0.8964, "step": 334 }, { "epoch": 0.05558090339707163, "grad_norm": 4.332577691208558, "learning_rate": 8.999969439125582e-06, "loss": 0.9002, "step": 335 }, { "epoch": 0.055746816541540505, "grad_norm": 4.272904462844758, "learning_rate": 8.999967667844369e-06, "loss": 0.8936, "step": 336 }, { "epoch": 0.055912729686009376, "grad_norm": 4.284138641618394, "learning_rate": 8.999965846668153e-06, "loss": 0.8806, "step": 337 }, { "epoch": 0.05607864283047825, "grad_norm": 4.245997951478823, "learning_rate": 8.999963975596955e-06, "loss": 0.8933, "step": 338 }, { "epoch": 0.05624455597494712, "grad_norm": 4.262350308738906, "learning_rate": 8.999962054630794e-06, "loss": 0.8672, "step": 339 }, { "epoch": 0.05641046911941599, "grad_norm": 4.218320423707499, "learning_rate": 8.999960083769692e-06, "loss": 0.8697, "step": 340 }, { "epoch": 0.05657638226388485, "grad_norm": 4.161434572377091, "learning_rate": 8.999958063013673e-06, "loss": 0.8878, "step": 341 }, { "epoch": 0.056742295408353724, "grad_norm": 4.0848883777831855, "learning_rate": 8.999955992362754e-06, "loss": 0.9028, "step": 342 }, { "epoch": 0.056908208552822595, "grad_norm": 4.087674741684919, "learning_rate": 8.999953871816964e-06, "loss": 0.884, "step": 343 }, { "epoch": 0.057074121697291466, "grad_norm": 4.077627298421648, "learning_rate": 8.999951701376323e-06, "loss": 0.8733, "step": 344 }, { "epoch": 0.05724003484176034, "grad_norm": 4.014185881607656, "learning_rate": 8.999949481040857e-06, "loss": 0.873, "step": 345 }, { "epoch": 0.05740594798622921, "grad_norm": 4.048242476037428, "learning_rate": 8.999947210810587e-06, "loss": 0.8612, "step": 346 }, { "epoch": 0.05757186113069808, "grad_norm": 3.9790776791909286, "learning_rate": 8.999944890685543e-06, "loss": 0.8745, "step": 347 }, { "epoch": 0.05773777427516695, "grad_norm": 3.9412637498937935, "learning_rate": 8.999942520665747e-06, "loss": 0.8751, "step": 348 }, { "epoch": 0.05790368741963582, "grad_norm": 3.9243342606372216, "learning_rate": 8.999940100751228e-06, "loss": 0.8705, "step": 349 }, { "epoch": 0.05806960056410469, "grad_norm": 3.8615778390716713, "learning_rate": 8.99993763094201e-06, "loss": 0.8912, "step": 350 }, { "epoch": 0.058235513708573564, "grad_norm": 3.859140301595901, "learning_rate": 8.999935111238122e-06, "loss": 0.8694, "step": 351 }, { "epoch": 0.058401426853042435, "grad_norm": 3.822341835829405, "learning_rate": 8.999932541639593e-06, "loss": 0.8742, "step": 352 }, { "epoch": 0.058567339997511306, "grad_norm": 3.8184180233248943, "learning_rate": 8.99992992214645e-06, "loss": 0.8676, "step": 353 }, { "epoch": 0.05873325314198018, "grad_norm": 3.801210717065968, "learning_rate": 8.999927252758723e-06, "loss": 0.8662, "step": 354 }, { "epoch": 0.05889916628644904, "grad_norm": 3.780756273238635, "learning_rate": 8.99992453347644e-06, "loss": 0.8649, "step": 355 }, { "epoch": 0.05906507943091791, "grad_norm": 3.7474392141127906, "learning_rate": 8.999921764299633e-06, "loss": 0.8583, "step": 356 }, { "epoch": 0.05923099257538678, "grad_norm": 3.730060223515627, "learning_rate": 8.999918945228331e-06, "loss": 0.8589, "step": 357 }, { "epoch": 0.059396905719855654, "grad_norm": 3.6811758989495567, "learning_rate": 8.999916076262566e-06, "loss": 0.8568, "step": 358 }, { "epoch": 0.059562818864324525, "grad_norm": 3.6726514292781505, "learning_rate": 8.999913157402371e-06, "loss": 0.8573, "step": 359 }, { "epoch": 0.059728732008793396, "grad_norm": 3.595206525156515, "learning_rate": 8.999910188647777e-06, "loss": 0.8742, "step": 360 }, { "epoch": 0.05989464515326227, "grad_norm": 3.471677475337036, "learning_rate": 8.999907169998819e-06, "loss": 0.8869, "step": 361 }, { "epoch": 0.06006055829773114, "grad_norm": 3.5659572607511163, "learning_rate": 8.999904101455525e-06, "loss": 0.8576, "step": 362 }, { "epoch": 0.06022647144220001, "grad_norm": 3.494068972585631, "learning_rate": 8.999900983017934e-06, "loss": 0.8626, "step": 363 }, { "epoch": 0.06039238458666888, "grad_norm": 3.5366577459151984, "learning_rate": 8.99989781468608e-06, "loss": 0.8641, "step": 364 }, { "epoch": 0.06055829773113775, "grad_norm": 3.4640552235900026, "learning_rate": 8.999894596459998e-06, "loss": 0.8498, "step": 365 }, { "epoch": 0.06072421087560662, "grad_norm": 3.4627533254161778, "learning_rate": 8.999891328339722e-06, "loss": 0.8701, "step": 366 }, { "epoch": 0.060890124020075494, "grad_norm": 3.4543538438907455, "learning_rate": 8.99988801032529e-06, "loss": 0.8525, "step": 367 }, { "epoch": 0.06105603716454436, "grad_norm": 3.4310968757128895, "learning_rate": 8.999884642416736e-06, "loss": 0.8653, "step": 368 }, { "epoch": 0.06122195030901323, "grad_norm": 3.3538182281557583, "learning_rate": 8.999881224614101e-06, "loss": 0.8908, "step": 369 }, { "epoch": 0.0613878634534821, "grad_norm": 3.3905994838285163, "learning_rate": 8.999877756917422e-06, "loss": 0.8521, "step": 370 }, { "epoch": 0.06155377659795097, "grad_norm": 3.3231583523257604, "learning_rate": 8.999874239326737e-06, "loss": 0.865, "step": 371 }, { "epoch": 0.06171968974241984, "grad_norm": 3.3486181559656334, "learning_rate": 8.999870671842085e-06, "loss": 0.8565, "step": 372 }, { "epoch": 0.06188560288688871, "grad_norm": 3.391890189649907, "learning_rate": 8.999867054463503e-06, "loss": 0.8344, "step": 373 }, { "epoch": 0.062051516031357584, "grad_norm": 3.2932669339414287, "learning_rate": 8.999863387191034e-06, "loss": 0.8668, "step": 374 }, { "epoch": 0.062217429175826455, "grad_norm": 3.350955000113915, "learning_rate": 8.99985967002472e-06, "loss": 0.8253, "step": 375 }, { "epoch": 0.062383342320295326, "grad_norm": 3.2330710537829304, "learning_rate": 8.9998559029646e-06, "loss": 0.8684, "step": 376 }, { "epoch": 0.0625492554647642, "grad_norm": 3.2098638665770243, "learning_rate": 8.999852086010716e-06, "loss": 0.8628, "step": 377 }, { "epoch": 0.06271516860923307, "grad_norm": 3.1892699537908156, "learning_rate": 8.99984821916311e-06, "loss": 0.8696, "step": 378 }, { "epoch": 0.06288108175370194, "grad_norm": 3.2334974851490363, "learning_rate": 8.999844302421825e-06, "loss": 0.8387, "step": 379 }, { "epoch": 0.06304699489817081, "grad_norm": 3.238152271931643, "learning_rate": 8.999840335786906e-06, "loss": 0.8276, "step": 380 }, { "epoch": 0.06321290804263968, "grad_norm": 3.1331529105356974, "learning_rate": 8.999836319258393e-06, "loss": 0.8512, "step": 381 }, { "epoch": 0.06337882118710855, "grad_norm": 3.152663308943621, "learning_rate": 8.999832252836337e-06, "loss": 0.8384, "step": 382 }, { "epoch": 0.06354473433157742, "grad_norm": 3.084432904970283, "learning_rate": 8.999828136520776e-06, "loss": 0.8584, "step": 383 }, { "epoch": 0.0637106474760463, "grad_norm": 3.1120289343036838, "learning_rate": 8.999823970311762e-06, "loss": 0.8443, "step": 384 }, { "epoch": 0.06387656062051517, "grad_norm": 3.1157944117751373, "learning_rate": 8.999819754209336e-06, "loss": 0.8355, "step": 385 }, { "epoch": 0.06404247376498404, "grad_norm": 3.016422516063702, "learning_rate": 8.999815488213547e-06, "loss": 0.8718, "step": 386 }, { "epoch": 0.06420838690945291, "grad_norm": 3.0280804164647277, "learning_rate": 8.999811172324443e-06, "loss": 0.8525, "step": 387 }, { "epoch": 0.06437430005392177, "grad_norm": 3.036473880069474, "learning_rate": 8.999806806542072e-06, "loss": 0.8644, "step": 388 }, { "epoch": 0.06454021319839064, "grad_norm": 2.991716780189957, "learning_rate": 8.99980239086648e-06, "loss": 0.8474, "step": 389 }, { "epoch": 0.06470612634285951, "grad_norm": 3.0744448937466635, "learning_rate": 8.999797925297717e-06, "loss": 0.8251, "step": 390 }, { "epoch": 0.06487203948732838, "grad_norm": 2.970959395794443, "learning_rate": 8.999793409835835e-06, "loss": 0.8497, "step": 391 }, { "epoch": 0.06503795263179725, "grad_norm": 2.942697893932714, "learning_rate": 8.99978884448088e-06, "loss": 0.8497, "step": 392 }, { "epoch": 0.06520386577626612, "grad_norm": 2.9217535841307374, "learning_rate": 8.999784229232905e-06, "loss": 0.8638, "step": 393 }, { "epoch": 0.06536977892073499, "grad_norm": 2.9558083426986483, "learning_rate": 8.999779564091961e-06, "loss": 0.8274, "step": 394 }, { "epoch": 0.06553569206520386, "grad_norm": 2.885361223118148, "learning_rate": 8.999774849058102e-06, "loss": 0.8583, "step": 395 }, { "epoch": 0.06570160520967273, "grad_norm": 2.8800300061377473, "learning_rate": 8.999770084131374e-06, "loss": 0.8499, "step": 396 }, { "epoch": 0.0658675183541416, "grad_norm": 2.8665971529078695, "learning_rate": 8.999765269311836e-06, "loss": 0.8489, "step": 397 }, { "epoch": 0.06603343149861048, "grad_norm": 2.8344547998127103, "learning_rate": 8.999760404599538e-06, "loss": 0.8432, "step": 398 }, { "epoch": 0.06619934464307935, "grad_norm": 2.839791425822409, "learning_rate": 8.999755489994537e-06, "loss": 0.8407, "step": 399 }, { "epoch": 0.06636525778754822, "grad_norm": 2.828150592505063, "learning_rate": 8.999750525496884e-06, "loss": 0.8217, "step": 400 }, { "epoch": 0.06585511054543962, "grad_norm": 2.791449185667969, "learning_rate": 8.99975071828438e-06, "loss": 0.8651, "step": 401 }, { "epoch": 0.06601933775378235, "grad_norm": 2.799198588104831, "learning_rate": 8.999745757622507e-06, "loss": 0.8615, "step": 402 }, { "epoch": 0.0661835649621251, "grad_norm": 2.731196238394043, "learning_rate": 8.99974074808897e-06, "loss": 0.87, "step": 403 }, { "epoch": 0.06634779217046784, "grad_norm": 2.7669579734667407, "learning_rate": 8.999735689683818e-06, "loss": 0.8304, "step": 404 }, { "epoch": 0.06651201937881059, "grad_norm": 2.6933917829612226, "learning_rate": 8.999730582407112e-06, "loss": 0.8645, "step": 405 }, { "epoch": 0.06667624658715332, "grad_norm": 2.7037937388444493, "learning_rate": 8.999725426258905e-06, "loss": 0.8522, "step": 406 }, { "epoch": 0.06684047379549607, "grad_norm": 2.6529365860710947, "learning_rate": 8.999720221239252e-06, "loss": 0.8665, "step": 407 }, { "epoch": 0.0670047010038388, "grad_norm": 2.6958361564015623, "learning_rate": 8.999714967348209e-06, "loss": 0.8421, "step": 408 }, { "epoch": 0.06716892821218155, "grad_norm": 2.679623752008986, "learning_rate": 8.999709664585836e-06, "loss": 0.8444, "step": 409 }, { "epoch": 0.0673331554205243, "grad_norm": 2.6846073911898247, "learning_rate": 8.999704312952188e-06, "loss": 0.8225, "step": 410 }, { "epoch": 0.06749738262886704, "grad_norm": 2.6357646834424733, "learning_rate": 8.999698912447324e-06, "loss": 0.8428, "step": 411 }, { "epoch": 0.06766160983720979, "grad_norm": 2.5864163075261515, "learning_rate": 8.999693463071303e-06, "loss": 0.8647, "step": 412 }, { "epoch": 0.06782583704555252, "grad_norm": 2.649716139853906, "learning_rate": 8.999687964824184e-06, "loss": 0.8336, "step": 413 }, { "epoch": 0.06799006425389527, "grad_norm": 2.6697160350910982, "learning_rate": 8.999682417706028e-06, "loss": 0.8315, "step": 414 }, { "epoch": 0.068154291462238, "grad_norm": 2.614544893051611, "learning_rate": 8.999676821716893e-06, "loss": 0.833, "step": 415 }, { "epoch": 0.06831851867058075, "grad_norm": 2.605311057811457, "learning_rate": 8.99967117685684e-06, "loss": 0.8354, "step": 416 }, { "epoch": 0.06848274587892349, "grad_norm": 2.512994946017452, "learning_rate": 8.999665483125932e-06, "loss": 0.8527, "step": 417 }, { "epoch": 0.06864697308726624, "grad_norm": 2.532102472565359, "learning_rate": 8.999659740524227e-06, "loss": 0.8526, "step": 418 }, { "epoch": 0.06881120029560897, "grad_norm": 2.537552292844787, "learning_rate": 8.999653949051792e-06, "loss": 0.8593, "step": 419 }, { "epoch": 0.06897542750395172, "grad_norm": 2.520981683215179, "learning_rate": 8.999648108708689e-06, "loss": 0.8409, "step": 420 }, { "epoch": 0.06913965471229445, "grad_norm": 2.5307279062339947, "learning_rate": 8.999642219494979e-06, "loss": 0.8544, "step": 421 }, { "epoch": 0.0693038819206372, "grad_norm": 2.513981724612271, "learning_rate": 8.999636281410728e-06, "loss": 0.838, "step": 422 }, { "epoch": 0.06946810912897994, "grad_norm": 2.5196092111609443, "learning_rate": 8.999630294456001e-06, "loss": 0.822, "step": 423 }, { "epoch": 0.06963233633732269, "grad_norm": 2.501508735280606, "learning_rate": 8.99962425863086e-06, "loss": 0.814, "step": 424 }, { "epoch": 0.06979656354566542, "grad_norm": 2.463190118948678, "learning_rate": 8.999618173935375e-06, "loss": 0.821, "step": 425 }, { "epoch": 0.06996079075400817, "grad_norm": 2.485170388269591, "learning_rate": 8.999612040369608e-06, "loss": 0.8129, "step": 426 }, { "epoch": 0.07012501796235092, "grad_norm": 2.4535480317174696, "learning_rate": 8.999605857933626e-06, "loss": 0.8204, "step": 427 }, { "epoch": 0.07028924517069365, "grad_norm": 2.4297331854286934, "learning_rate": 8.999599626627498e-06, "loss": 0.8228, "step": 428 }, { "epoch": 0.0704534723790364, "grad_norm": 2.412420279962346, "learning_rate": 8.999593346451292e-06, "loss": 0.8317, "step": 429 }, { "epoch": 0.07061769958737914, "grad_norm": 2.3821068613707364, "learning_rate": 8.999587017405074e-06, "loss": 0.8496, "step": 430 }, { "epoch": 0.07078192679572189, "grad_norm": 2.4590557883994086, "learning_rate": 8.999580639488916e-06, "loss": 0.8142, "step": 431 }, { "epoch": 0.07094615400406462, "grad_norm": 2.4087171883288763, "learning_rate": 8.999574212702883e-06, "loss": 0.8401, "step": 432 }, { "epoch": 0.07111038121240737, "grad_norm": 2.375466734189583, "learning_rate": 8.99956773704705e-06, "loss": 0.8161, "step": 433 }, { "epoch": 0.0712746084207501, "grad_norm": 2.377908447736706, "learning_rate": 8.999561212521482e-06, "loss": 0.8236, "step": 434 }, { "epoch": 0.07143883562909285, "grad_norm": 2.3169519503998264, "learning_rate": 8.999554639126252e-06, "loss": 0.8426, "step": 435 }, { "epoch": 0.07160306283743559, "grad_norm": 2.3362968144154816, "learning_rate": 8.999548016861435e-06, "loss": 0.8359, "step": 436 }, { "epoch": 0.07176729004577834, "grad_norm": 2.386407267700876, "learning_rate": 8.999541345727095e-06, "loss": 0.8029, "step": 437 }, { "epoch": 0.07193151725412107, "grad_norm": 2.3200737690092303, "learning_rate": 8.999534625723312e-06, "loss": 0.8193, "step": 438 }, { "epoch": 0.07209574446246382, "grad_norm": 2.340620712080502, "learning_rate": 8.999527856850155e-06, "loss": 0.8179, "step": 439 }, { "epoch": 0.07225997167080656, "grad_norm": 2.3005341648037216, "learning_rate": 8.999521039107699e-06, "loss": 0.8352, "step": 440 }, { "epoch": 0.0724241988791493, "grad_norm": 2.268310807131279, "learning_rate": 8.999514172496018e-06, "loss": 0.8584, "step": 441 }, { "epoch": 0.07258842608749204, "grad_norm": 2.280231628453319, "learning_rate": 8.999507257015185e-06, "loss": 0.8265, "step": 442 }, { "epoch": 0.07275265329583479, "grad_norm": 2.2563491434376366, "learning_rate": 8.999500292665276e-06, "loss": 0.8438, "step": 443 }, { "epoch": 0.07291688050417754, "grad_norm": 2.2687198666635093, "learning_rate": 8.999493279446368e-06, "loss": 0.8342, "step": 444 }, { "epoch": 0.07308110771252027, "grad_norm": 2.230140831740877, "learning_rate": 8.999486217358537e-06, "loss": 0.8238, "step": 445 }, { "epoch": 0.07324533492086302, "grad_norm": 2.210105023067696, "learning_rate": 8.999479106401858e-06, "loss": 0.8281, "step": 446 }, { "epoch": 0.07340956212920575, "grad_norm": 2.22721070539442, "learning_rate": 8.999471946576406e-06, "loss": 0.8255, "step": 447 }, { "epoch": 0.0735737893375485, "grad_norm": 2.1865500694065423, "learning_rate": 8.999464737882265e-06, "loss": 0.8383, "step": 448 }, { "epoch": 0.07373801654589124, "grad_norm": 2.2143774580363145, "learning_rate": 8.999457480319511e-06, "loss": 0.8283, "step": 449 }, { "epoch": 0.07390224375423399, "grad_norm": 2.1718648151396462, "learning_rate": 8.99945017388822e-06, "loss": 0.8467, "step": 450 }, { "epoch": 0.07406647096257672, "grad_norm": 2.206728470233497, "learning_rate": 8.999442818588473e-06, "loss": 0.8038, "step": 451 }, { "epoch": 0.07423069817091947, "grad_norm": 2.1780216266278853, "learning_rate": 8.99943541442035e-06, "loss": 0.8327, "step": 452 }, { "epoch": 0.0743949253792622, "grad_norm": 2.1845803500643184, "learning_rate": 8.999427961383933e-06, "loss": 0.7931, "step": 453 }, { "epoch": 0.07455915258760495, "grad_norm": 2.13549126985634, "learning_rate": 8.9994204594793e-06, "loss": 0.8301, "step": 454 }, { "epoch": 0.07472337979594769, "grad_norm": 2.1297114580768404, "learning_rate": 8.999412908706536e-06, "loss": 0.8271, "step": 455 }, { "epoch": 0.07488760700429044, "grad_norm": 2.1538771659712497, "learning_rate": 8.99940530906572e-06, "loss": 0.8335, "step": 456 }, { "epoch": 0.07505183421263317, "grad_norm": 2.15071828742556, "learning_rate": 8.999397660556935e-06, "loss": 0.8057, "step": 457 }, { "epoch": 0.07521606142097592, "grad_norm": 2.093120448722487, "learning_rate": 8.999389963180265e-06, "loss": 0.8367, "step": 458 }, { "epoch": 0.07538028862931866, "grad_norm": 2.085757684625511, "learning_rate": 8.999382216935793e-06, "loss": 0.8278, "step": 459 }, { "epoch": 0.0755445158376614, "grad_norm": 2.102360562017555, "learning_rate": 8.999374421823603e-06, "loss": 0.8206, "step": 460 }, { "epoch": 0.07570874304600414, "grad_norm": 2.061123490022161, "learning_rate": 8.99936657784378e-06, "loss": 0.8178, "step": 461 }, { "epoch": 0.07587297025434689, "grad_norm": 2.040202576510708, "learning_rate": 8.99935868499641e-06, "loss": 0.8234, "step": 462 }, { "epoch": 0.07603719746268964, "grad_norm": 2.0925459619084807, "learning_rate": 8.999350743281578e-06, "loss": 0.8011, "step": 463 }, { "epoch": 0.07620142467103237, "grad_norm": 2.0618958956130506, "learning_rate": 8.999342752699368e-06, "loss": 0.8064, "step": 464 }, { "epoch": 0.07636565187937512, "grad_norm": 2.0446614226644275, "learning_rate": 8.999334713249872e-06, "loss": 0.8143, "step": 465 }, { "epoch": 0.07652987908771786, "grad_norm": 2.009335333061511, "learning_rate": 8.999326624933172e-06, "loss": 0.8326, "step": 466 }, { "epoch": 0.0766941062960606, "grad_norm": 2.0489481976521797, "learning_rate": 8.999318487749358e-06, "loss": 0.7827, "step": 467 }, { "epoch": 0.07685833350440334, "grad_norm": 2.0221916630599304, "learning_rate": 8.99931030169852e-06, "loss": 0.804, "step": 468 }, { "epoch": 0.07702256071274609, "grad_norm": 2.029438896224542, "learning_rate": 8.999302066780746e-06, "loss": 0.8087, "step": 469 }, { "epoch": 0.07718678792108882, "grad_norm": 1.978319845152218, "learning_rate": 8.999293782996124e-06, "loss": 0.821, "step": 470 }, { "epoch": 0.07735101512943157, "grad_norm": 1.9918589527926724, "learning_rate": 8.999285450344744e-06, "loss": 0.8201, "step": 471 }, { "epoch": 0.0775152423377743, "grad_norm": 2.0023287957387836, "learning_rate": 8.999277068826698e-06, "loss": 0.8114, "step": 472 }, { "epoch": 0.07767946954611706, "grad_norm": 1.9735825072492768, "learning_rate": 8.999268638442074e-06, "loss": 0.81, "step": 473 }, { "epoch": 0.07784369675445979, "grad_norm": 1.9457430616789873, "learning_rate": 8.99926015919097e-06, "loss": 0.8161, "step": 474 }, { "epoch": 0.07800792396280254, "grad_norm": 1.9501504831433791, "learning_rate": 8.999251631073472e-06, "loss": 0.8233, "step": 475 }, { "epoch": 0.07817215117114527, "grad_norm": 1.9587112324999745, "learning_rate": 8.999243054089675e-06, "loss": 0.8289, "step": 476 }, { "epoch": 0.07833637837948802, "grad_norm": 1.9321723467173395, "learning_rate": 8.999234428239671e-06, "loss": 0.81, "step": 477 }, { "epoch": 0.07850060558783076, "grad_norm": 1.8961717334570505, "learning_rate": 8.999225753523554e-06, "loss": 0.8286, "step": 478 }, { "epoch": 0.0786648327961735, "grad_norm": 1.8988900894264487, "learning_rate": 8.99921702994142e-06, "loss": 0.8257, "step": 479 }, { "epoch": 0.07882906000451625, "grad_norm": 1.917437492909705, "learning_rate": 8.999208257493363e-06, "loss": 0.796, "step": 480 }, { "epoch": 0.07899328721285899, "grad_norm": 1.9162103223989222, "learning_rate": 8.999199436179476e-06, "loss": 0.8117, "step": 481 }, { "epoch": 0.07915751442120174, "grad_norm": 1.8900698273277583, "learning_rate": 8.999190565999858e-06, "loss": 0.8188, "step": 482 }, { "epoch": 0.07932174162954447, "grad_norm": 1.8766604711654709, "learning_rate": 8.999181646954602e-06, "loss": 0.8152, "step": 483 }, { "epoch": 0.07948596883788722, "grad_norm": 1.8868541925904996, "learning_rate": 8.999172679043808e-06, "loss": 0.812, "step": 484 }, { "epoch": 0.07965019604622996, "grad_norm": 1.8559987483245017, "learning_rate": 8.999163662267572e-06, "loss": 0.8223, "step": 485 }, { "epoch": 0.0798144232545727, "grad_norm": 1.8423384119854995, "learning_rate": 8.999154596625992e-06, "loss": 0.8195, "step": 486 }, { "epoch": 0.07997865046291544, "grad_norm": 1.8755155601469158, "learning_rate": 8.999145482119168e-06, "loss": 0.789, "step": 487 }, { "epoch": 0.08014287767125819, "grad_norm": 1.8578743443690677, "learning_rate": 8.999136318747195e-06, "loss": 0.817, "step": 488 }, { "epoch": 0.08030710487960092, "grad_norm": 1.8149426139291804, "learning_rate": 8.999127106510175e-06, "loss": 0.8298, "step": 489 }, { "epoch": 0.08047133208794367, "grad_norm": 1.8071557080745544, "learning_rate": 8.999117845408209e-06, "loss": 0.8156, "step": 490 }, { "epoch": 0.08063555929628641, "grad_norm": 1.7903215084168274, "learning_rate": 8.999108535441398e-06, "loss": 0.8213, "step": 491 }, { "epoch": 0.08079978650462916, "grad_norm": 1.8039616892674522, "learning_rate": 8.99909917660984e-06, "loss": 0.8086, "step": 492 }, { "epoch": 0.08096401371297189, "grad_norm": 1.8228172637206808, "learning_rate": 8.99908976891364e-06, "loss": 0.806, "step": 493 }, { "epoch": 0.08112824092131464, "grad_norm": 1.808137095151838, "learning_rate": 8.999080312352896e-06, "loss": 0.7975, "step": 494 }, { "epoch": 0.08129246812965737, "grad_norm": 1.7910413085027803, "learning_rate": 8.999070806927716e-06, "loss": 0.7961, "step": 495 }, { "epoch": 0.08145669533800012, "grad_norm": 1.8391374609956732, "learning_rate": 8.9990612526382e-06, "loss": 0.7667, "step": 496 }, { "epoch": 0.08162092254634287, "grad_norm": 1.7825821027741344, "learning_rate": 8.999051649484451e-06, "loss": 0.8082, "step": 497 }, { "epoch": 0.08178514975468561, "grad_norm": 1.7535944210904948, "learning_rate": 8.999041997466575e-06, "loss": 0.806, "step": 498 }, { "epoch": 0.08194937696302836, "grad_norm": 1.7940946696819353, "learning_rate": 8.999032296584675e-06, "loss": 0.7794, "step": 499 }, { "epoch": 0.08211360417137109, "grad_norm": 1.7675061166108443, "learning_rate": 8.999022546838862e-06, "loss": 0.8007, "step": 500 }, { "epoch": 0.08227783137971384, "grad_norm": 1.7466093753207794, "learning_rate": 8.999012748229235e-06, "loss": 0.8085, "step": 501 }, { "epoch": 0.08244205858805657, "grad_norm": 1.7384682546038235, "learning_rate": 8.999002900755904e-06, "loss": 0.8029, "step": 502 }, { "epoch": 0.08260628579639932, "grad_norm": 1.749204038070269, "learning_rate": 8.998993004418973e-06, "loss": 0.8065, "step": 503 }, { "epoch": 0.08277051300474206, "grad_norm": 1.731827509189124, "learning_rate": 8.998983059218553e-06, "loss": 0.7904, "step": 504 }, { "epoch": 0.0829347402130848, "grad_norm": 1.713406755440384, "learning_rate": 8.998973065154751e-06, "loss": 0.7929, "step": 505 }, { "epoch": 0.08309896742142754, "grad_norm": 1.7370353467699013, "learning_rate": 8.998963022227676e-06, "loss": 0.7979, "step": 506 }, { "epoch": 0.08326319462977029, "grad_norm": 1.722144711965804, "learning_rate": 8.998952930437434e-06, "loss": 0.7864, "step": 507 }, { "epoch": 0.08342742183811303, "grad_norm": 1.6850288461926786, "learning_rate": 8.998942789784138e-06, "loss": 0.7966, "step": 508 }, { "epoch": 0.08359164904645577, "grad_norm": 1.7125487489335667, "learning_rate": 8.998932600267896e-06, "loss": 0.789, "step": 509 }, { "epoch": 0.08375587625479851, "grad_norm": 1.666553952443347, "learning_rate": 8.998922361888821e-06, "loss": 0.8144, "step": 510 }, { "epoch": 0.08392010346314126, "grad_norm": 1.6766351515133946, "learning_rate": 8.998912074647022e-06, "loss": 0.8144, "step": 511 }, { "epoch": 0.08408433067148399, "grad_norm": 1.6676136142415157, "learning_rate": 8.998901738542612e-06, "loss": 0.8086, "step": 512 }, { "epoch": 0.08424855787982674, "grad_norm": 1.6939153245378111, "learning_rate": 8.998891353575703e-06, "loss": 0.7887, "step": 513 }, { "epoch": 0.08441278508816948, "grad_norm": 1.6367960342037091, "learning_rate": 8.998880919746406e-06, "loss": 0.8217, "step": 514 }, { "epoch": 0.08457701229651222, "grad_norm": 1.656541292788681, "learning_rate": 8.998870437054837e-06, "loss": 0.7918, "step": 515 }, { "epoch": 0.08474123950485497, "grad_norm": 1.649968215675082, "learning_rate": 8.99885990550111e-06, "loss": 0.7948, "step": 516 }, { "epoch": 0.08490546671319771, "grad_norm": 1.637068861746453, "learning_rate": 8.998849325085336e-06, "loss": 0.8022, "step": 517 }, { "epoch": 0.08506969392154046, "grad_norm": 1.6598278261447834, "learning_rate": 8.998838695807632e-06, "loss": 0.7767, "step": 518 }, { "epoch": 0.08523392112988319, "grad_norm": 1.6497075059531359, "learning_rate": 8.998828017668115e-06, "loss": 0.7843, "step": 519 }, { "epoch": 0.08539814833822594, "grad_norm": 1.6048685678000418, "learning_rate": 8.9988172906669e-06, "loss": 0.8041, "step": 520 }, { "epoch": 0.08556237554656868, "grad_norm": 1.6169953267344268, "learning_rate": 8.998806514804102e-06, "loss": 0.7884, "step": 521 }, { "epoch": 0.08572660275491142, "grad_norm": 1.6177878656916025, "learning_rate": 8.998795690079838e-06, "loss": 0.7828, "step": 522 }, { "epoch": 0.08589082996325416, "grad_norm": 1.605566301566824, "learning_rate": 8.998784816494227e-06, "loss": 0.7877, "step": 523 }, { "epoch": 0.08605505717159691, "grad_norm": 1.5887379483469, "learning_rate": 8.99877389404739e-06, "loss": 0.8035, "step": 524 }, { "epoch": 0.08621928437993964, "grad_norm": 1.605034282881469, "learning_rate": 8.998762922739438e-06, "loss": 0.793, "step": 525 }, { "epoch": 0.08638351158828239, "grad_norm": 1.5765227198544196, "learning_rate": 8.998751902570496e-06, "loss": 0.796, "step": 526 }, { "epoch": 0.08654773879662513, "grad_norm": 1.5863705667950556, "learning_rate": 8.998740833540683e-06, "loss": 0.7796, "step": 527 }, { "epoch": 0.08671196600496787, "grad_norm": 1.5650967154526163, "learning_rate": 8.998729715650116e-06, "loss": 0.7964, "step": 528 }, { "epoch": 0.08687619321331061, "grad_norm": 1.5973499387442838, "learning_rate": 8.99871854889892e-06, "loss": 0.7778, "step": 529 }, { "epoch": 0.08704042042165336, "grad_norm": 1.5399366650437916, "learning_rate": 8.998707333287214e-06, "loss": 0.8062, "step": 530 }, { "epoch": 0.0872046476299961, "grad_norm": 1.5877605974589395, "learning_rate": 8.99869606881512e-06, "loss": 0.7861, "step": 531 }, { "epoch": 0.08736887483833884, "grad_norm": 1.5404243830810433, "learning_rate": 8.998684755482762e-06, "loss": 0.7904, "step": 532 }, { "epoch": 0.08753310204668159, "grad_norm": 1.5293995565013145, "learning_rate": 8.99867339329026e-06, "loss": 0.8052, "step": 533 }, { "epoch": 0.08769732925502433, "grad_norm": 1.5285645822160316, "learning_rate": 8.99866198223774e-06, "loss": 0.8057, "step": 534 }, { "epoch": 0.08786155646336707, "grad_norm": 1.5317827095316812, "learning_rate": 8.998650522325322e-06, "loss": 0.7905, "step": 535 }, { "epoch": 0.08802578367170981, "grad_norm": 1.5487961117886297, "learning_rate": 8.998639013553136e-06, "loss": 0.7814, "step": 536 }, { "epoch": 0.08819001088005256, "grad_norm": 1.512615812975723, "learning_rate": 8.998627455921304e-06, "loss": 0.794, "step": 537 }, { "epoch": 0.08835423808839529, "grad_norm": 1.524531472522993, "learning_rate": 8.998615849429952e-06, "loss": 0.7853, "step": 538 }, { "epoch": 0.08851846529673804, "grad_norm": 1.5113713334392895, "learning_rate": 8.998604194079204e-06, "loss": 0.7985, "step": 539 }, { "epoch": 0.08868269250508078, "grad_norm": 1.513587345447187, "learning_rate": 8.99859248986919e-06, "loss": 0.7818, "step": 540 }, { "epoch": 0.08884691971342353, "grad_norm": 1.5025263128659543, "learning_rate": 8.998580736800035e-06, "loss": 0.7828, "step": 541 }, { "epoch": 0.08901114692176626, "grad_norm": 1.50559719285815, "learning_rate": 8.998568934871868e-06, "loss": 0.7903, "step": 542 }, { "epoch": 0.08917537413010901, "grad_norm": 1.4894498383121608, "learning_rate": 8.998557084084815e-06, "loss": 0.7731, "step": 543 }, { "epoch": 0.08933960133845174, "grad_norm": 1.4952299462037122, "learning_rate": 8.998545184439007e-06, "loss": 0.7702, "step": 544 }, { "epoch": 0.08950382854679449, "grad_norm": 1.4653177315472807, "learning_rate": 8.99853323593457e-06, "loss": 0.7988, "step": 545 }, { "epoch": 0.08966805575513723, "grad_norm": 1.459127859543925, "learning_rate": 8.99852123857164e-06, "loss": 0.8138, "step": 546 }, { "epoch": 0.08983228296347998, "grad_norm": 1.4651643994973669, "learning_rate": 8.998509192350342e-06, "loss": 0.7876, "step": 547 }, { "epoch": 0.08999651017182271, "grad_norm": 1.4676671777660628, "learning_rate": 8.998497097270808e-06, "loss": 0.7933, "step": 548 }, { "epoch": 0.09016073738016546, "grad_norm": 1.4376367008426452, "learning_rate": 8.998484953333168e-06, "loss": 0.8033, "step": 549 }, { "epoch": 0.0903249645885082, "grad_norm": 1.4627566923252446, "learning_rate": 8.998472760537557e-06, "loss": 0.7791, "step": 550 }, { "epoch": 0.09048919179685094, "grad_norm": 1.4303444060969528, "learning_rate": 8.998460518884106e-06, "loss": 0.7941, "step": 551 }, { "epoch": 0.09065341900519369, "grad_norm": 1.520429285468019, "learning_rate": 8.998448228372947e-06, "loss": 0.7875, "step": 552 }, { "epoch": 0.09081764621353643, "grad_norm": 1.4467982423594512, "learning_rate": 8.998435889004217e-06, "loss": 0.7599, "step": 553 }, { "epoch": 0.09098187342187918, "grad_norm": 1.4542459927929825, "learning_rate": 8.998423500778045e-06, "loss": 0.76, "step": 554 }, { "epoch": 0.09114610063022191, "grad_norm": 1.4334949641597166, "learning_rate": 8.998411063694569e-06, "loss": 0.7816, "step": 555 }, { "epoch": 0.09131032783856466, "grad_norm": 1.4376273667298758, "learning_rate": 8.99839857775392e-06, "loss": 0.78, "step": 556 }, { "epoch": 0.0914745550469074, "grad_norm": 1.4244926116912235, "learning_rate": 8.99838604295624e-06, "loss": 0.7887, "step": 557 }, { "epoch": 0.09163878225525014, "grad_norm": 1.4180874831462564, "learning_rate": 8.99837345930166e-06, "loss": 0.7668, "step": 558 }, { "epoch": 0.09180300946359288, "grad_norm": 1.402879410935121, "learning_rate": 8.998360826790319e-06, "loss": 0.7842, "step": 559 }, { "epoch": 0.09196723667193563, "grad_norm": 1.4040836140296928, "learning_rate": 8.998348145422355e-06, "loss": 0.7859, "step": 560 }, { "epoch": 0.09213146388027836, "grad_norm": 1.4219797527782458, "learning_rate": 8.998335415197903e-06, "loss": 0.7706, "step": 561 }, { "epoch": 0.09229569108862111, "grad_norm": 1.4503951930362755, "learning_rate": 8.998322636117103e-06, "loss": 0.7518, "step": 562 }, { "epoch": 0.09245991829696384, "grad_norm": 1.4170028168044975, "learning_rate": 8.998309808180093e-06, "loss": 0.7806, "step": 563 }, { "epoch": 0.0926241455053066, "grad_norm": 1.3835900975245248, "learning_rate": 8.998296931387013e-06, "loss": 0.7776, "step": 564 }, { "epoch": 0.09278837271364933, "grad_norm": 1.3880650749493229, "learning_rate": 8.998284005738002e-06, "loss": 0.7716, "step": 565 }, { "epoch": 0.09295259992199208, "grad_norm": 1.3904515187341628, "learning_rate": 8.998271031233202e-06, "loss": 0.7868, "step": 566 }, { "epoch": 0.09311682713033481, "grad_norm": 1.3873774301491548, "learning_rate": 8.998258007872753e-06, "loss": 0.7768, "step": 567 }, { "epoch": 0.09328105433867756, "grad_norm": 1.370753815387451, "learning_rate": 8.998244935656797e-06, "loss": 0.7658, "step": 568 }, { "epoch": 0.09344528154702031, "grad_norm": 1.4026344155479453, "learning_rate": 8.998231814585473e-06, "loss": 0.7526, "step": 569 }, { "epoch": 0.09360950875536304, "grad_norm": 1.344243720958496, "learning_rate": 8.998218644658926e-06, "loss": 0.7794, "step": 570 }, { "epoch": 0.09377373596370579, "grad_norm": 1.3829097281740133, "learning_rate": 8.9982054258773e-06, "loss": 0.7875, "step": 571 }, { "epoch": 0.09393796317204853, "grad_norm": 1.3604310447903412, "learning_rate": 8.998192158240738e-06, "loss": 0.7869, "step": 572 }, { "epoch": 0.09410219038039128, "grad_norm": 1.3581920491158632, "learning_rate": 8.998178841749382e-06, "loss": 0.7821, "step": 573 }, { "epoch": 0.09426641758873401, "grad_norm": 1.3407414437715726, "learning_rate": 8.998165476403378e-06, "loss": 0.78, "step": 574 }, { "epoch": 0.09443064479707676, "grad_norm": 1.323211309690318, "learning_rate": 8.998152062202874e-06, "loss": 0.7974, "step": 575 }, { "epoch": 0.0945948720054195, "grad_norm": 1.3516519692522833, "learning_rate": 8.998138599148012e-06, "loss": 0.7649, "step": 576 }, { "epoch": 0.09475909921376224, "grad_norm": 1.3229131845618651, "learning_rate": 8.998125087238938e-06, "loss": 0.7962, "step": 577 }, { "epoch": 0.09492332642210498, "grad_norm": 1.307882279240173, "learning_rate": 8.9981115264758e-06, "loss": 0.806, "step": 578 }, { "epoch": 0.09508755363044773, "grad_norm": 1.3352279874733834, "learning_rate": 8.998097916858747e-06, "loss": 0.774, "step": 579 }, { "epoch": 0.09525178083879046, "grad_norm": 1.311690507687563, "learning_rate": 8.998084258387923e-06, "loss": 0.7928, "step": 580 }, { "epoch": 0.09541600804713321, "grad_norm": 1.316169384327891, "learning_rate": 8.99807055106348e-06, "loss": 0.8016, "step": 581 }, { "epoch": 0.09558023525547595, "grad_norm": 1.3115286012581258, "learning_rate": 8.998056794885563e-06, "loss": 0.7687, "step": 582 }, { "epoch": 0.0957444624638187, "grad_norm": 1.2973876639193667, "learning_rate": 8.998042989854326e-06, "loss": 0.7957, "step": 583 }, { "epoch": 0.09590868967216143, "grad_norm": 1.3034852678205227, "learning_rate": 8.998029135969917e-06, "loss": 0.7694, "step": 584 }, { "epoch": 0.09607291688050418, "grad_norm": 1.2921967384130368, "learning_rate": 8.998015233232484e-06, "loss": 0.7673, "step": 585 }, { "epoch": 0.09623714408884693, "grad_norm": 1.2906042190034313, "learning_rate": 8.998001281642182e-06, "loss": 0.7768, "step": 586 }, { "epoch": 0.09640137129718966, "grad_norm": 1.2940931141309069, "learning_rate": 8.99798728119916e-06, "loss": 0.7844, "step": 587 }, { "epoch": 0.09656559850553241, "grad_norm": 1.284649039797445, "learning_rate": 8.99797323190357e-06, "loss": 0.7789, "step": 588 }, { "epoch": 0.09672982571387514, "grad_norm": 1.283737139541954, "learning_rate": 8.997959133755566e-06, "loss": 0.7677, "step": 589 }, { "epoch": 0.0968940529222179, "grad_norm": 1.2736857154419794, "learning_rate": 8.997944986755302e-06, "loss": 0.778, "step": 590 }, { "epoch": 0.09705828013056063, "grad_norm": 1.2769757024108, "learning_rate": 8.997930790902928e-06, "loss": 0.7819, "step": 591 }, { "epoch": 0.09722250733890338, "grad_norm": 1.262465276945493, "learning_rate": 8.997916546198599e-06, "loss": 0.7783, "step": 592 }, { "epoch": 0.09738673454724611, "grad_norm": 1.2663433484817244, "learning_rate": 8.997902252642474e-06, "loss": 0.7661, "step": 593 }, { "epoch": 0.09755096175558886, "grad_norm": 1.2748735844470442, "learning_rate": 8.997887910234704e-06, "loss": 0.7871, "step": 594 }, { "epoch": 0.0977151889639316, "grad_norm": 1.2627194359703184, "learning_rate": 8.997873518975445e-06, "loss": 0.7697, "step": 595 }, { "epoch": 0.09787941617227434, "grad_norm": 1.2458601207727036, "learning_rate": 8.997859078864856e-06, "loss": 0.7761, "step": 596 }, { "epoch": 0.09804364338061708, "grad_norm": 1.2685292083003645, "learning_rate": 8.99784458990309e-06, "loss": 0.7595, "step": 597 }, { "epoch": 0.09820787058895983, "grad_norm": 1.2423763258616558, "learning_rate": 8.997830052090308e-06, "loss": 0.7569, "step": 598 }, { "epoch": 0.09837209779730256, "grad_norm": 1.2284843126416907, "learning_rate": 8.997815465426666e-06, "loss": 0.7691, "step": 599 }, { "epoch": 0.09853632500564531, "grad_norm": 1.2380214027521046, "learning_rate": 8.997800829912323e-06, "loss": 0.7736, "step": 600 }, { "epoch": 0.09870055221398805, "grad_norm": 1.2319079389161247, "learning_rate": 8.997786145547437e-06, "loss": 0.7714, "step": 601 }, { "epoch": 0.0988647794223308, "grad_norm": 1.2327228774043395, "learning_rate": 8.997771412332169e-06, "loss": 0.7796, "step": 602 }, { "epoch": 0.09902900663067353, "grad_norm": 1.2188386522371906, "learning_rate": 8.997756630266679e-06, "loss": 0.7831, "step": 603 }, { "epoch": 0.09919323383901628, "grad_norm": 1.198537872248901, "learning_rate": 8.997741799351126e-06, "loss": 0.7919, "step": 604 }, { "epoch": 0.09935746104735903, "grad_norm": 1.2216480771422678, "learning_rate": 8.997726919585671e-06, "loss": 0.7551, "step": 605 }, { "epoch": 0.09952168825570176, "grad_norm": 1.225034221391949, "learning_rate": 8.997711990970478e-06, "loss": 0.7663, "step": 606 }, { "epoch": 0.09968591546404451, "grad_norm": 1.2000663875670825, "learning_rate": 8.997697013505707e-06, "loss": 0.7677, "step": 607 }, { "epoch": 0.09985014267238725, "grad_norm": 1.2053600381758947, "learning_rate": 8.997681987191521e-06, "loss": 0.7685, "step": 608 }, { "epoch": 0.10001436988073, "grad_norm": 1.2058066147722575, "learning_rate": 8.997666912028083e-06, "loss": 0.7591, "step": 609 }, { "epoch": 0.10017859708907273, "grad_norm": 1.201513478456253, "learning_rate": 8.997651788015559e-06, "loss": 0.7716, "step": 610 }, { "epoch": 0.10034282429741548, "grad_norm": 1.2182501923516547, "learning_rate": 8.997636615154109e-06, "loss": 0.741, "step": 611 }, { "epoch": 0.10050705150575821, "grad_norm": 1.1980299500822917, "learning_rate": 8.997621393443901e-06, "loss": 0.759, "step": 612 }, { "epoch": 0.10067127871410096, "grad_norm": 1.2021512642384815, "learning_rate": 8.997606122885102e-06, "loss": 0.7686, "step": 613 }, { "epoch": 0.1008355059224437, "grad_norm": 1.1891403994893042, "learning_rate": 8.997590803477872e-06, "loss": 0.7735, "step": 614 }, { "epoch": 0.10099973313078645, "grad_norm": 1.1847699209247626, "learning_rate": 8.997575435222384e-06, "loss": 0.7709, "step": 615 }, { "epoch": 0.10116396033912918, "grad_norm": 1.1887640824690933, "learning_rate": 8.9975600181188e-06, "loss": 0.7463, "step": 616 }, { "epoch": 0.10132818754747193, "grad_norm": 1.1738353175527652, "learning_rate": 8.997544552167289e-06, "loss": 0.7775, "step": 617 }, { "epoch": 0.10149241475581466, "grad_norm": 1.181133780761317, "learning_rate": 8.99752903736802e-06, "loss": 0.7607, "step": 618 }, { "epoch": 0.10165664196415741, "grad_norm": 1.1674676030003306, "learning_rate": 8.99751347372116e-06, "loss": 0.7616, "step": 619 }, { "epoch": 0.10182086917250015, "grad_norm": 1.168583211529951, "learning_rate": 8.99749786122688e-06, "loss": 0.7683, "step": 620 }, { "epoch": 0.1019850963808429, "grad_norm": 1.1622822735095926, "learning_rate": 8.997482199885346e-06, "loss": 0.768, "step": 621 }, { "epoch": 0.10214932358918564, "grad_norm": 1.1471729076856725, "learning_rate": 8.997466489696732e-06, "loss": 0.7604, "step": 622 }, { "epoch": 0.10231355079752838, "grad_norm": 1.145057169808049, "learning_rate": 8.997450730661206e-06, "loss": 0.7578, "step": 623 }, { "epoch": 0.10247777800587113, "grad_norm": 1.1557515410421166, "learning_rate": 8.997434922778942e-06, "loss": 0.7672, "step": 624 }, { "epoch": 0.10264200521421386, "grad_norm": 1.1355739695048033, "learning_rate": 8.99741906605011e-06, "loss": 0.7653, "step": 625 }, { "epoch": 0.10280623242255661, "grad_norm": 1.1538565942915397, "learning_rate": 8.99740316047488e-06, "loss": 0.7394, "step": 626 }, { "epoch": 0.10297045963089935, "grad_norm": 1.1446329691976465, "learning_rate": 8.997387206053427e-06, "loss": 0.7499, "step": 627 }, { "epoch": 0.1031346868392421, "grad_norm": 1.1537159517502786, "learning_rate": 8.997371202785925e-06, "loss": 0.7422, "step": 628 }, { "epoch": 0.10329891404758483, "grad_norm": 1.1205865491670322, "learning_rate": 8.997355150672548e-06, "loss": 0.7799, "step": 629 }, { "epoch": 0.10346314125592758, "grad_norm": 1.138552041683983, "learning_rate": 8.997339049713468e-06, "loss": 0.7533, "step": 630 }, { "epoch": 0.10362736846427031, "grad_norm": 1.1208591898037377, "learning_rate": 8.99732289990886e-06, "loss": 0.7685, "step": 631 }, { "epoch": 0.10379159567261306, "grad_norm": 1.122828390596003, "learning_rate": 8.997306701258903e-06, "loss": 0.7533, "step": 632 }, { "epoch": 0.1039558228809558, "grad_norm": 1.12283473302057, "learning_rate": 8.997290453763768e-06, "loss": 0.748, "step": 633 }, { "epoch": 0.10412005008929855, "grad_norm": 1.1034866068092488, "learning_rate": 8.997274157423636e-06, "loss": 0.7512, "step": 634 }, { "epoch": 0.10428427729764128, "grad_norm": 1.1165031140298034, "learning_rate": 8.99725781223868e-06, "loss": 0.7536, "step": 635 }, { "epoch": 0.10444850450598403, "grad_norm": 1.12908873229894, "learning_rate": 8.99724141820908e-06, "loss": 0.7437, "step": 636 }, { "epoch": 0.10461273171432676, "grad_norm": 1.122551438784602, "learning_rate": 8.997224975335015e-06, "loss": 0.7525, "step": 637 }, { "epoch": 0.10477695892266951, "grad_norm": 1.093230994266884, "learning_rate": 8.99720848361666e-06, "loss": 0.7871, "step": 638 }, { "epoch": 0.10494118613101226, "grad_norm": 1.1022741460479262, "learning_rate": 8.997191943054198e-06, "loss": 0.768, "step": 639 }, { "epoch": 0.105105413339355, "grad_norm": 1.0910181528443548, "learning_rate": 8.997175353647806e-06, "loss": 0.7549, "step": 640 }, { "epoch": 0.10526964054769775, "grad_norm": 1.1062114372442224, "learning_rate": 8.997158715397665e-06, "loss": 0.7539, "step": 641 }, { "epoch": 0.10543386775604048, "grad_norm": 1.103586593071817, "learning_rate": 8.997142028303955e-06, "loss": 0.7544, "step": 642 }, { "epoch": 0.10559809496438323, "grad_norm": 1.1071233361706303, "learning_rate": 8.99712529236686e-06, "loss": 0.764, "step": 643 }, { "epoch": 0.10576232217272596, "grad_norm": 1.094482530155215, "learning_rate": 8.99710850758656e-06, "loss": 0.7468, "step": 644 }, { "epoch": 0.10592654938106871, "grad_norm": 1.078601443848298, "learning_rate": 8.997091673963234e-06, "loss": 0.7576, "step": 645 }, { "epoch": 0.10609077658941145, "grad_norm": 1.124326466230655, "learning_rate": 8.99707479149707e-06, "loss": 0.7488, "step": 646 }, { "epoch": 0.1062550037977542, "grad_norm": 1.1065751826100323, "learning_rate": 8.99705786018825e-06, "loss": 0.7552, "step": 647 }, { "epoch": 0.10641923100609693, "grad_norm": 1.067142366456579, "learning_rate": 8.997040880036956e-06, "loss": 0.771, "step": 648 }, { "epoch": 0.10658345821443968, "grad_norm": 1.0713690882822862, "learning_rate": 8.997023851043372e-06, "loss": 0.7527, "step": 649 }, { "epoch": 0.10674768542278242, "grad_norm": 1.0664071164503175, "learning_rate": 8.997006773207687e-06, "loss": 0.7405, "step": 650 }, { "epoch": 0.10691191263112516, "grad_norm": 1.0590095194012845, "learning_rate": 8.996989646530083e-06, "loss": 0.7711, "step": 651 }, { "epoch": 0.1070761398394679, "grad_norm": 1.0157260991175592, "learning_rate": 8.996972471010747e-06, "loss": 0.8044, "step": 652 }, { "epoch": 0.10724036704781065, "grad_norm": 1.0702230692833752, "learning_rate": 8.996955246649866e-06, "loss": 0.7708, "step": 653 }, { "epoch": 0.10740459425615338, "grad_norm": 1.0415420000470876, "learning_rate": 8.996937973447627e-06, "loss": 0.7648, "step": 654 }, { "epoch": 0.10756882146449613, "grad_norm": 1.0502831812393232, "learning_rate": 8.996920651404215e-06, "loss": 0.7514, "step": 655 }, { "epoch": 0.10773304867283887, "grad_norm": 1.0643751230664344, "learning_rate": 8.996903280519821e-06, "loss": 0.7432, "step": 656 }, { "epoch": 0.10789727588118161, "grad_norm": 1.0611243032791475, "learning_rate": 8.996885860794635e-06, "loss": 0.75, "step": 657 }, { "epoch": 0.10806150308952436, "grad_norm": 1.0614565864005756, "learning_rate": 8.996868392228843e-06, "loss": 0.7704, "step": 658 }, { "epoch": 0.1082257302978671, "grad_norm": 1.0522356645726656, "learning_rate": 8.996850874822636e-06, "loss": 0.7514, "step": 659 }, { "epoch": 0.10838995750620985, "grad_norm": 1.0396641601794014, "learning_rate": 8.996833308576205e-06, "loss": 0.7443, "step": 660 }, { "epoch": 0.10855418471455258, "grad_norm": 1.0352100207650416, "learning_rate": 8.996815693489738e-06, "loss": 0.7444, "step": 661 }, { "epoch": 0.10871841192289533, "grad_norm": 1.0440907953492387, "learning_rate": 8.99679802956343e-06, "loss": 0.7588, "step": 662 }, { "epoch": 0.10888263913123807, "grad_norm": 1.0237415973036992, "learning_rate": 8.99678031679747e-06, "loss": 0.743, "step": 663 }, { "epoch": 0.10904686633958081, "grad_norm": 1.038219778138841, "learning_rate": 8.996762555192053e-06, "loss": 0.7696, "step": 664 }, { "epoch": 0.10921109354792355, "grad_norm": 1.0371317655068988, "learning_rate": 8.99674474474737e-06, "loss": 0.741, "step": 665 }, { "epoch": 0.1093753207562663, "grad_norm": 1.015958023377164, "learning_rate": 8.996726885463612e-06, "loss": 0.7632, "step": 666 }, { "epoch": 0.10953954796460903, "grad_norm": 1.025225602279085, "learning_rate": 8.996708977340979e-06, "loss": 0.7559, "step": 667 }, { "epoch": 0.10970377517295178, "grad_norm": 1.0102874868738583, "learning_rate": 8.996691020379662e-06, "loss": 0.7528, "step": 668 }, { "epoch": 0.10986800238129452, "grad_norm": 1.0171496292001285, "learning_rate": 8.996673014579855e-06, "loss": 0.7476, "step": 669 }, { "epoch": 0.11003222958963726, "grad_norm": 1.0133586146958062, "learning_rate": 8.996654959941757e-06, "loss": 0.7406, "step": 670 }, { "epoch": 0.11019645679798, "grad_norm": 1.012184525401541, "learning_rate": 8.99663685646556e-06, "loss": 0.7323, "step": 671 }, { "epoch": 0.11036068400632275, "grad_norm": 1.0204812192417894, "learning_rate": 8.996618704151464e-06, "loss": 0.7231, "step": 672 }, { "epoch": 0.11052491121466548, "grad_norm": 1.0394114945935236, "learning_rate": 8.996600502999663e-06, "loss": 0.7318, "step": 673 }, { "epoch": 0.11068913842300823, "grad_norm": 1.0071324286292476, "learning_rate": 8.99658225301036e-06, "loss": 0.7371, "step": 674 }, { "epoch": 0.11085336563135098, "grad_norm": 1.0071241514792235, "learning_rate": 8.996563954183747e-06, "loss": 0.7263, "step": 675 }, { "epoch": 0.11101759283969372, "grad_norm": 0.9849565269875252, "learning_rate": 8.996545606520026e-06, "loss": 0.7488, "step": 676 }, { "epoch": 0.11118182004803646, "grad_norm": 0.9857903891350793, "learning_rate": 8.996527210019395e-06, "loss": 0.7781, "step": 677 }, { "epoch": 0.1113460472563792, "grad_norm": 1.0008451419132542, "learning_rate": 8.996508764682056e-06, "loss": 0.7335, "step": 678 }, { "epoch": 0.11151027446472195, "grad_norm": 0.9904057469198645, "learning_rate": 8.996490270508207e-06, "loss": 0.7509, "step": 679 }, { "epoch": 0.11167450167306468, "grad_norm": 1.0154428424155522, "learning_rate": 8.99647172749805e-06, "loss": 0.7438, "step": 680 }, { "epoch": 0.11183872888140743, "grad_norm": 0.9859511271051146, "learning_rate": 8.996453135651785e-06, "loss": 0.7311, "step": 681 }, { "epoch": 0.11200295608975017, "grad_norm": 1.0172995678678334, "learning_rate": 8.996434494969617e-06, "loss": 0.7522, "step": 682 }, { "epoch": 0.11216718329809292, "grad_norm": 0.9934227384074349, "learning_rate": 8.996415805451744e-06, "loss": 0.748, "step": 683 }, { "epoch": 0.11233141050643565, "grad_norm": 0.9809402738647511, "learning_rate": 8.996397067098373e-06, "loss": 0.7384, "step": 684 }, { "epoch": 0.1124956377147784, "grad_norm": 0.9627480326356829, "learning_rate": 8.996378279909707e-06, "loss": 0.7475, "step": 685 }, { "epoch": 0.11265986492312113, "grad_norm": 0.9823816568610797, "learning_rate": 8.996359443885948e-06, "loss": 0.7546, "step": 686 }, { "epoch": 0.11282409213146388, "grad_norm": 0.982802515323171, "learning_rate": 8.9963405590273e-06, "loss": 0.7489, "step": 687 }, { "epoch": 0.11298831933980662, "grad_norm": 0.9965019206725246, "learning_rate": 8.996321625333972e-06, "loss": 0.7285, "step": 688 }, { "epoch": 0.11315254654814937, "grad_norm": 1.0211395532769991, "learning_rate": 8.996302642806166e-06, "loss": 0.7529, "step": 689 }, { "epoch": 0.1133167737564921, "grad_norm": 0.9443212451181419, "learning_rate": 8.996283611444091e-06, "loss": 0.7526, "step": 690 }, { "epoch": 0.11348100096483485, "grad_norm": 0.9626893407176333, "learning_rate": 8.99626453124795e-06, "loss": 0.7228, "step": 691 }, { "epoch": 0.11364522817317758, "grad_norm": 0.9730358742605486, "learning_rate": 8.996245402217954e-06, "loss": 0.744, "step": 692 }, { "epoch": 0.11380945538152033, "grad_norm": 0.9563252686051095, "learning_rate": 8.99622622435431e-06, "loss": 0.7431, "step": 693 }, { "epoch": 0.11397368258986308, "grad_norm": 0.9554055649452712, "learning_rate": 8.996206997657225e-06, "loss": 0.7432, "step": 694 }, { "epoch": 0.11413790979820582, "grad_norm": 0.9467956137652699, "learning_rate": 8.996187722126907e-06, "loss": 0.743, "step": 695 }, { "epoch": 0.11430213700654857, "grad_norm": 0.9489039079960571, "learning_rate": 8.996168397763568e-06, "loss": 0.7494, "step": 696 }, { "epoch": 0.1144663642148913, "grad_norm": 0.9387244200911926, "learning_rate": 8.996149024567416e-06, "loss": 0.7428, "step": 697 }, { "epoch": 0.11463059142323405, "grad_norm": 0.9501341353371187, "learning_rate": 8.996129602538664e-06, "loss": 0.7108, "step": 698 }, { "epoch": 0.11479481863157678, "grad_norm": 0.962206616789228, "learning_rate": 8.996110131677517e-06, "loss": 0.7195, "step": 699 }, { "epoch": 0.11495904583991953, "grad_norm": 0.9243568911177664, "learning_rate": 8.996090611984192e-06, "loss": 0.7557, "step": 700 }, { "epoch": 0.11512327304826227, "grad_norm": 0.953170841709303, "learning_rate": 8.996071043458902e-06, "loss": 0.719, "step": 701 }, { "epoch": 0.11528750025660502, "grad_norm": 0.9386842014299721, "learning_rate": 8.996051426101855e-06, "loss": 0.7436, "step": 702 }, { "epoch": 0.11545172746494775, "grad_norm": 0.9274541909232491, "learning_rate": 8.996031759913265e-06, "loss": 0.7385, "step": 703 }, { "epoch": 0.1156159546732905, "grad_norm": 0.9207183074144455, "learning_rate": 8.996012044893347e-06, "loss": 0.7592, "step": 704 }, { "epoch": 0.11578018188163323, "grad_norm": 0.9062916445772407, "learning_rate": 8.995992281042315e-06, "loss": 0.7615, "step": 705 }, { "epoch": 0.11594440908997598, "grad_norm": 0.9736278432415078, "learning_rate": 8.995972468360383e-06, "loss": 0.74, "step": 706 }, { "epoch": 0.11610863629831872, "grad_norm": 0.9065148273263071, "learning_rate": 8.995952606847767e-06, "loss": 0.7568, "step": 707 }, { "epoch": 0.11627286350666147, "grad_norm": 0.9193357664661844, "learning_rate": 8.995932696504683e-06, "loss": 0.7486, "step": 708 }, { "epoch": 0.1164370907150042, "grad_norm": 0.9195501174668326, "learning_rate": 8.995912737331345e-06, "loss": 0.7532, "step": 709 }, { "epoch": 0.11660131792334695, "grad_norm": 0.9507039973612097, "learning_rate": 8.995892729327973e-06, "loss": 0.7613, "step": 710 }, { "epoch": 0.1167655451316897, "grad_norm": 0.9261363835047515, "learning_rate": 8.995872672494781e-06, "loss": 0.74, "step": 711 }, { "epoch": 0.11692977234003243, "grad_norm": 0.9060734346127396, "learning_rate": 8.99585256683199e-06, "loss": 0.7368, "step": 712 }, { "epoch": 0.11709399954837518, "grad_norm": 0.8977667506848039, "learning_rate": 8.995832412339815e-06, "loss": 0.7516, "step": 713 }, { "epoch": 0.11725822675671792, "grad_norm": 0.8955651223846078, "learning_rate": 8.995812209018479e-06, "loss": 0.7456, "step": 714 }, { "epoch": 0.11742245396506067, "grad_norm": 0.9030569385935932, "learning_rate": 8.995791956868197e-06, "loss": 0.7176, "step": 715 }, { "epoch": 0.1175866811734034, "grad_norm": 0.8942305604018785, "learning_rate": 8.995771655889192e-06, "loss": 0.7384, "step": 716 }, { "epoch": 0.11775090838174615, "grad_norm": 0.9027311362567876, "learning_rate": 8.995751306081683e-06, "loss": 0.7405, "step": 717 }, { "epoch": 0.11791513559008888, "grad_norm": 0.885311457940408, "learning_rate": 8.995730907445893e-06, "loss": 0.7568, "step": 718 }, { "epoch": 0.11807936279843163, "grad_norm": 0.9012435000861214, "learning_rate": 8.99571045998204e-06, "loss": 0.7371, "step": 719 }, { "epoch": 0.11824359000677437, "grad_norm": 0.907146848660689, "learning_rate": 8.99568996369035e-06, "loss": 0.7292, "step": 720 }, { "epoch": 0.11840781721511712, "grad_norm": 1.0063300221437657, "learning_rate": 8.995669418571042e-06, "loss": 0.7459, "step": 721 }, { "epoch": 0.11857204442345985, "grad_norm": 0.9072193733429393, "learning_rate": 8.995648824624344e-06, "loss": 0.7195, "step": 722 }, { "epoch": 0.1187362716318026, "grad_norm": 0.9224186169335864, "learning_rate": 8.995628181850475e-06, "loss": 0.7263, "step": 723 }, { "epoch": 0.11890049884014534, "grad_norm": 0.8730762718625776, "learning_rate": 8.99560749024966e-06, "loss": 0.7508, "step": 724 }, { "epoch": 0.11906472604848808, "grad_norm": 0.8982887153940247, "learning_rate": 8.995586749822124e-06, "loss": 0.7269, "step": 725 }, { "epoch": 0.11922895325683082, "grad_norm": 0.9068289642355488, "learning_rate": 8.995565960568095e-06, "loss": 0.7465, "step": 726 }, { "epoch": 0.11939318046517357, "grad_norm": 0.8875535394981413, "learning_rate": 8.995545122487797e-06, "loss": 0.72, "step": 727 }, { "epoch": 0.11955740767351632, "grad_norm": 0.8679336550118347, "learning_rate": 8.995524235581455e-06, "loss": 0.7391, "step": 728 }, { "epoch": 0.11972163488185905, "grad_norm": 0.8826045646205907, "learning_rate": 8.995503299849294e-06, "loss": 0.7378, "step": 729 }, { "epoch": 0.1198858620902018, "grad_norm": 0.8701194777603063, "learning_rate": 8.995482315291548e-06, "loss": 0.7364, "step": 730 }, { "epoch": 0.12005008929854454, "grad_norm": 0.8893542492932401, "learning_rate": 8.99546128190844e-06, "loss": 0.7361, "step": 731 }, { "epoch": 0.12021431650688728, "grad_norm": 0.8765486658546501, "learning_rate": 8.995440199700199e-06, "loss": 0.7239, "step": 732 }, { "epoch": 0.12037854371523002, "grad_norm": 0.8761211470503778, "learning_rate": 8.995419068667056e-06, "loss": 0.7118, "step": 733 }, { "epoch": 0.12054277092357277, "grad_norm": 0.8784561432542338, "learning_rate": 8.995397888809236e-06, "loss": 0.7252, "step": 734 }, { "epoch": 0.1207069981319155, "grad_norm": 0.885958895044403, "learning_rate": 8.995376660126975e-06, "loss": 0.7289, "step": 735 }, { "epoch": 0.12087122534025825, "grad_norm": 1.3628303359719938, "learning_rate": 8.995355382620499e-06, "loss": 0.7363, "step": 736 }, { "epoch": 0.12103545254860099, "grad_norm": 0.8620243858978899, "learning_rate": 8.995334056290042e-06, "loss": 0.7345, "step": 737 }, { "epoch": 0.12119967975694373, "grad_norm": 0.8616681510393484, "learning_rate": 8.995312681135833e-06, "loss": 0.7369, "step": 738 }, { "epoch": 0.12136390696528647, "grad_norm": 0.8536335524173874, "learning_rate": 8.995291257158106e-06, "loss": 0.7301, "step": 739 }, { "epoch": 0.12152813417362922, "grad_norm": 0.8890164010727863, "learning_rate": 8.995269784357094e-06, "loss": 0.7259, "step": 740 }, { "epoch": 0.12169236138197195, "grad_norm": 0.8584462694033533, "learning_rate": 8.995248262733028e-06, "loss": 0.7211, "step": 741 }, { "epoch": 0.1218565885903147, "grad_norm": 0.8473041107598641, "learning_rate": 8.995226692286144e-06, "loss": 0.7446, "step": 742 }, { "epoch": 0.12202081579865744, "grad_norm": 0.857983951564626, "learning_rate": 8.995205073016676e-06, "loss": 0.7486, "step": 743 }, { "epoch": 0.12218504300700019, "grad_norm": 0.8499021660743089, "learning_rate": 8.995183404924856e-06, "loss": 0.7252, "step": 744 }, { "epoch": 0.12234927021534292, "grad_norm": 0.8432649022601657, "learning_rate": 8.995161688010924e-06, "loss": 0.7361, "step": 745 }, { "epoch": 0.12251349742368567, "grad_norm": 0.8349936223743508, "learning_rate": 8.995139922275112e-06, "loss": 0.728, "step": 746 }, { "epoch": 0.12267772463202842, "grad_norm": 0.8359575337444805, "learning_rate": 8.995118107717658e-06, "loss": 0.7339, "step": 747 }, { "epoch": 0.12284195184037115, "grad_norm": 0.8491789703211692, "learning_rate": 8.995096244338799e-06, "loss": 0.7273, "step": 748 }, { "epoch": 0.1230061790487139, "grad_norm": 0.8343756782871875, "learning_rate": 8.995074332138771e-06, "loss": 0.7376, "step": 749 }, { "epoch": 0.12317040625705664, "grad_norm": 0.8177523292597683, "learning_rate": 8.995052371117816e-06, "loss": 0.7498, "step": 750 }, { "epoch": 0.12333463346539938, "grad_norm": 0.8275634000310962, "learning_rate": 8.995030361276167e-06, "loss": 0.7181, "step": 751 }, { "epoch": 0.12349886067374212, "grad_norm": 0.8339533594185612, "learning_rate": 8.995008302614069e-06, "loss": 0.7265, "step": 752 }, { "epoch": 0.12366308788208487, "grad_norm": 0.8279085022505449, "learning_rate": 8.994986195131755e-06, "loss": 0.7269, "step": 753 }, { "epoch": 0.1238273150904276, "grad_norm": 0.8346885779308945, "learning_rate": 8.99496403882947e-06, "loss": 0.7239, "step": 754 }, { "epoch": 0.12399154229877035, "grad_norm": 0.8535778930786089, "learning_rate": 8.994941833707452e-06, "loss": 0.7335, "step": 755 }, { "epoch": 0.12415576950711309, "grad_norm": 0.8433517095759425, "learning_rate": 8.994919579765944e-06, "loss": 0.7262, "step": 756 }, { "epoch": 0.12431999671545584, "grad_norm": 0.8115026773168339, "learning_rate": 8.994897277005187e-06, "loss": 0.7543, "step": 757 }, { "epoch": 0.12448422392379857, "grad_norm": 0.8237859019603188, "learning_rate": 8.994874925425423e-06, "loss": 0.696, "step": 758 }, { "epoch": 0.12464845113214132, "grad_norm": 0.9508056714088163, "learning_rate": 8.994852525026896e-06, "loss": 0.7313, "step": 759 }, { "epoch": 0.12481267834048405, "grad_norm": 0.8253401090339175, "learning_rate": 8.994830075809846e-06, "loss": 0.7333, "step": 760 }, { "epoch": 0.1249769055488268, "grad_norm": 0.8141935870360409, "learning_rate": 8.99480757777452e-06, "loss": 0.7333, "step": 761 }, { "epoch": 0.12514113275716954, "grad_norm": 0.8136553415480036, "learning_rate": 8.994785030921164e-06, "loss": 0.7293, "step": 762 }, { "epoch": 0.1253053599655123, "grad_norm": 0.8063988080685636, "learning_rate": 8.994762435250018e-06, "loss": 0.7398, "step": 763 }, { "epoch": 0.12546958717385504, "grad_norm": 0.8024743885585705, "learning_rate": 8.994739790761329e-06, "loss": 0.7318, "step": 764 }, { "epoch": 0.12563381438219778, "grad_norm": 0.8210883509073227, "learning_rate": 8.994717097455346e-06, "loss": 0.7128, "step": 765 }, { "epoch": 0.1257980415905405, "grad_norm": 0.8081208044654137, "learning_rate": 8.994694355332311e-06, "loss": 0.7216, "step": 766 }, { "epoch": 0.12596226879888325, "grad_norm": 0.8043457567543434, "learning_rate": 8.994671564392472e-06, "loss": 0.729, "step": 767 }, { "epoch": 0.126126496007226, "grad_norm": 0.8332594138537104, "learning_rate": 8.994648724636081e-06, "loss": 0.7152, "step": 768 }, { "epoch": 0.12629072321556875, "grad_norm": 0.796400256746438, "learning_rate": 8.994625836063381e-06, "loss": 0.7337, "step": 769 }, { "epoch": 0.12645495042391147, "grad_norm": 0.7970766048853704, "learning_rate": 8.99460289867462e-06, "loss": 0.7385, "step": 770 }, { "epoch": 0.12661917763225422, "grad_norm": 0.8168189102976473, "learning_rate": 8.994579912470054e-06, "loss": 0.7207, "step": 771 }, { "epoch": 0.12678340484059697, "grad_norm": 0.8210337972298519, "learning_rate": 8.994556877449926e-06, "loss": 0.7173, "step": 772 }, { "epoch": 0.12694763204893972, "grad_norm": 0.7928567401168414, "learning_rate": 8.994533793614486e-06, "loss": 0.7299, "step": 773 }, { "epoch": 0.12711185925728244, "grad_norm": 0.7867827344107035, "learning_rate": 8.99451066096399e-06, "loss": 0.7327, "step": 774 }, { "epoch": 0.1272760864656252, "grad_norm": 0.8101031512180691, "learning_rate": 8.994487479498686e-06, "loss": 0.7201, "step": 775 }, { "epoch": 0.12744031367396794, "grad_norm": 0.8149592408788043, "learning_rate": 8.994464249218824e-06, "loss": 0.7059, "step": 776 }, { "epoch": 0.12760454088231069, "grad_norm": 0.7976670057578504, "learning_rate": 8.99444097012466e-06, "loss": 0.7234, "step": 777 }, { "epoch": 0.1277687680906534, "grad_norm": 0.7848442475451943, "learning_rate": 8.994417642216445e-06, "loss": 0.7224, "step": 778 }, { "epoch": 0.12793299529899615, "grad_norm": 0.7831415532113032, "learning_rate": 8.994394265494434e-06, "loss": 0.7116, "step": 779 }, { "epoch": 0.1280972225073389, "grad_norm": 0.8042009917600411, "learning_rate": 8.994370839958876e-06, "loss": 0.7249, "step": 780 }, { "epoch": 0.12826144971568165, "grad_norm": 0.7823737809741357, "learning_rate": 8.994347365610032e-06, "loss": 0.7148, "step": 781 }, { "epoch": 0.1284256769240244, "grad_norm": 0.7915106748965738, "learning_rate": 8.994323842448152e-06, "loss": 0.7328, "step": 782 }, { "epoch": 0.12858990413236712, "grad_norm": 0.7721916884200549, "learning_rate": 8.994300270473493e-06, "loss": 0.7112, "step": 783 }, { "epoch": 0.12875413134070987, "grad_norm": 0.7720807666798172, "learning_rate": 8.994276649686313e-06, "loss": 0.7276, "step": 784 }, { "epoch": 0.12891835854905262, "grad_norm": 0.7812178056014114, "learning_rate": 8.994252980086865e-06, "loss": 0.7125, "step": 785 }, { "epoch": 0.12908258575739537, "grad_norm": 0.7552866535606362, "learning_rate": 8.994229261675408e-06, "loss": 0.7384, "step": 786 }, { "epoch": 0.1292468129657381, "grad_norm": 0.7736822903510917, "learning_rate": 8.9942054944522e-06, "loss": 0.7231, "step": 787 }, { "epoch": 0.12941104017408084, "grad_norm": 0.7863166996807561, "learning_rate": 8.9941816784175e-06, "loss": 0.7125, "step": 788 }, { "epoch": 0.1295752673824236, "grad_norm": 0.7584645168372531, "learning_rate": 8.994157813571563e-06, "loss": 0.7218, "step": 789 }, { "epoch": 0.12973949459076634, "grad_norm": 0.7591392100122629, "learning_rate": 8.99413389991465e-06, "loss": 0.7196, "step": 790 }, { "epoch": 0.12990372179910906, "grad_norm": 0.7638964528596006, "learning_rate": 8.994109937447023e-06, "loss": 0.742, "step": 791 }, { "epoch": 0.1300679490074518, "grad_norm": 0.7638417423029302, "learning_rate": 8.994085926168942e-06, "loss": 0.7144, "step": 792 }, { "epoch": 0.13023217621579455, "grad_norm": 0.7531125912433023, "learning_rate": 8.994061866080662e-06, "loss": 0.7181, "step": 793 }, { "epoch": 0.1303964034241373, "grad_norm": 0.7638871871583925, "learning_rate": 8.994037757182452e-06, "loss": 0.7263, "step": 794 }, { "epoch": 0.13056063063248002, "grad_norm": 0.7712535656244461, "learning_rate": 8.99401359947457e-06, "loss": 0.7203, "step": 795 }, { "epoch": 0.13072485784082277, "grad_norm": 0.7534551679287208, "learning_rate": 8.993989392957277e-06, "loss": 0.7233, "step": 796 }, { "epoch": 0.13088908504916552, "grad_norm": 0.7541686256824629, "learning_rate": 8.99396513763084e-06, "loss": 0.7191, "step": 797 }, { "epoch": 0.13105331225750827, "grad_norm": 0.7558776359791881, "learning_rate": 8.993940833495517e-06, "loss": 0.7189, "step": 798 }, { "epoch": 0.13121753946585102, "grad_norm": 0.7618419103671177, "learning_rate": 8.993916480551577e-06, "loss": 0.7279, "step": 799 }, { "epoch": 0.13138176667419374, "grad_norm": 0.768280234616204, "learning_rate": 8.993892078799284e-06, "loss": 0.7126, "step": 800 }, { "epoch": 0.1315459938825365, "grad_norm": 0.7394145525562601, "learning_rate": 8.993867628238901e-06, "loss": 0.7446, "step": 801 }, { "epoch": 0.13171022109087924, "grad_norm": 0.7615299280436207, "learning_rate": 8.993843128870692e-06, "loss": 0.6987, "step": 802 }, { "epoch": 0.13187444829922199, "grad_norm": 0.7465231091292661, "learning_rate": 8.993818580694929e-06, "loss": 0.7378, "step": 803 }, { "epoch": 0.1320386755075647, "grad_norm": 0.7491298101718952, "learning_rate": 8.993793983711872e-06, "loss": 0.6964, "step": 804 }, { "epoch": 0.13220290271590746, "grad_norm": 0.7326662321957933, "learning_rate": 8.99376933792179e-06, "loss": 0.7346, "step": 805 }, { "epoch": 0.1323671299242502, "grad_norm": 0.7405957735369632, "learning_rate": 8.993744643324954e-06, "loss": 0.7056, "step": 806 }, { "epoch": 0.13253135713259295, "grad_norm": 0.7309108095519508, "learning_rate": 8.993719899921629e-06, "loss": 0.7229, "step": 807 }, { "epoch": 0.13269558434093567, "grad_norm": 0.7965004013613755, "learning_rate": 8.993695107712085e-06, "loss": 0.7272, "step": 808 }, { "epoch": 0.13285981154927842, "grad_norm": 0.7405587765052916, "learning_rate": 8.993670266696592e-06, "loss": 0.7033, "step": 809 }, { "epoch": 0.13302403875762117, "grad_norm": 0.738947917878739, "learning_rate": 8.993645376875417e-06, "loss": 0.7037, "step": 810 }, { "epoch": 0.13318826596596392, "grad_norm": 0.7266519422397545, "learning_rate": 8.993620438248834e-06, "loss": 0.7105, "step": 811 }, { "epoch": 0.13335249317430664, "grad_norm": 0.7321881237545829, "learning_rate": 8.99359545081711e-06, "loss": 0.719, "step": 812 }, { "epoch": 0.1335167203826494, "grad_norm": 0.7343760502140673, "learning_rate": 8.99357041458052e-06, "loss": 0.6994, "step": 813 }, { "epoch": 0.13368094759099214, "grad_norm": 0.7360965253226606, "learning_rate": 8.993545329539331e-06, "loss": 0.7087, "step": 814 }, { "epoch": 0.1338451747993349, "grad_norm": 0.7312711387682936, "learning_rate": 8.993520195693823e-06, "loss": 0.703, "step": 815 }, { "epoch": 0.1340094020076776, "grad_norm": 0.7354108968862768, "learning_rate": 8.99349501304426e-06, "loss": 0.7163, "step": 816 }, { "epoch": 0.13417362921602036, "grad_norm": 0.7263178056025598, "learning_rate": 8.993469781590924e-06, "loss": 0.7257, "step": 817 }, { "epoch": 0.1343378564243631, "grad_norm": 0.7416571311831478, "learning_rate": 8.993444501334085e-06, "loss": 0.7083, "step": 818 }, { "epoch": 0.13450208363270585, "grad_norm": 0.7849234501566686, "learning_rate": 8.993419172274016e-06, "loss": 0.7128, "step": 819 }, { "epoch": 0.1346663108410486, "grad_norm": 0.7178383001551933, "learning_rate": 8.993393794410995e-06, "loss": 0.7131, "step": 820 }, { "epoch": 0.13483053804939132, "grad_norm": 0.7241642838444384, "learning_rate": 8.993368367745295e-06, "loss": 0.7138, "step": 821 }, { "epoch": 0.13499476525773407, "grad_norm": 0.7168737582699622, "learning_rate": 8.993342892277194e-06, "loss": 0.7322, "step": 822 }, { "epoch": 0.13515899246607682, "grad_norm": 0.7120308237386352, "learning_rate": 8.99331736800697e-06, "loss": 0.6974, "step": 823 }, { "epoch": 0.13532321967441957, "grad_norm": 0.7321566228567261, "learning_rate": 8.993291794934896e-06, "loss": 0.6999, "step": 824 }, { "epoch": 0.1354874468827623, "grad_norm": 0.7438779269308436, "learning_rate": 8.993266173061255e-06, "loss": 0.6944, "step": 825 }, { "epoch": 0.13565167409110504, "grad_norm": 0.7118594659041331, "learning_rate": 8.993240502386322e-06, "loss": 0.7189, "step": 826 }, { "epoch": 0.1358159012994478, "grad_norm": 0.7361564635340935, "learning_rate": 8.993214782910375e-06, "loss": 0.6926, "step": 827 }, { "epoch": 0.13598012850779054, "grad_norm": 0.711244373700878, "learning_rate": 8.993189014633696e-06, "loss": 0.7185, "step": 828 }, { "epoch": 0.13614435571613326, "grad_norm": 0.7385931746846988, "learning_rate": 8.993163197556563e-06, "loss": 0.6904, "step": 829 }, { "epoch": 0.136308582924476, "grad_norm": 0.7135997031691055, "learning_rate": 8.993137331679257e-06, "loss": 0.7108, "step": 830 }, { "epoch": 0.13647281013281876, "grad_norm": 0.7246704678281216, "learning_rate": 8.993111417002059e-06, "loss": 0.7113, "step": 831 }, { "epoch": 0.1366370373411615, "grad_norm": 0.7091228636214885, "learning_rate": 8.993085453525251e-06, "loss": 0.7166, "step": 832 }, { "epoch": 0.13680126454950423, "grad_norm": 0.698307172462183, "learning_rate": 8.993059441249113e-06, "loss": 0.7131, "step": 833 }, { "epoch": 0.13696549175784697, "grad_norm": 0.7302212267089161, "learning_rate": 8.99303338017393e-06, "loss": 0.7153, "step": 834 }, { "epoch": 0.13712971896618972, "grad_norm": 0.7133967428699537, "learning_rate": 8.993007270299985e-06, "loss": 0.7199, "step": 835 }, { "epoch": 0.13729394617453247, "grad_norm": 0.6999377770634104, "learning_rate": 8.992981111627558e-06, "loss": 0.7037, "step": 836 }, { "epoch": 0.13745817338287522, "grad_norm": 0.7036896301199365, "learning_rate": 8.992954904156937e-06, "loss": 0.702, "step": 837 }, { "epoch": 0.13762240059121794, "grad_norm": 0.6896374085707714, "learning_rate": 8.992928647888405e-06, "loss": 0.7106, "step": 838 }, { "epoch": 0.1377866277995607, "grad_norm": 0.7269208950900521, "learning_rate": 8.992902342822247e-06, "loss": 0.7068, "step": 839 }, { "epoch": 0.13795085500790344, "grad_norm": 0.7046937086091937, "learning_rate": 8.99287598895875e-06, "loss": 0.703, "step": 840 }, { "epoch": 0.1381150822162462, "grad_norm": 0.7121950225185115, "learning_rate": 8.9928495862982e-06, "loss": 0.7152, "step": 841 }, { "epoch": 0.1382793094245889, "grad_norm": 0.7026225935912559, "learning_rate": 8.992823134840882e-06, "loss": 0.6939, "step": 842 }, { "epoch": 0.13844353663293166, "grad_norm": 0.6970719648209621, "learning_rate": 8.992796634587084e-06, "loss": 0.7043, "step": 843 }, { "epoch": 0.1386077638412744, "grad_norm": 0.6804608164044504, "learning_rate": 8.992770085537095e-06, "loss": 0.7077, "step": 844 }, { "epoch": 0.13877199104961715, "grad_norm": 0.6956774935332672, "learning_rate": 8.992743487691202e-06, "loss": 0.706, "step": 845 }, { "epoch": 0.13893621825795988, "grad_norm": 0.6965453226724493, "learning_rate": 8.992716841049694e-06, "loss": 0.6805, "step": 846 }, { "epoch": 0.13910044546630262, "grad_norm": 0.6834149944374464, "learning_rate": 8.992690145612864e-06, "loss": 0.7184, "step": 847 }, { "epoch": 0.13926467267464537, "grad_norm": 0.6878178766526616, "learning_rate": 8.992663401380995e-06, "loss": 0.6993, "step": 848 }, { "epoch": 0.13942889988298812, "grad_norm": 0.6872283526043412, "learning_rate": 8.992636608354383e-06, "loss": 0.6862, "step": 849 }, { "epoch": 0.13959312709133084, "grad_norm": 0.6963162299008429, "learning_rate": 8.992609766533318e-06, "loss": 0.6932, "step": 850 }, { "epoch": 0.1397573542996736, "grad_norm": 0.6895674941257026, "learning_rate": 8.992582875918089e-06, "loss": 0.7016, "step": 851 }, { "epoch": 0.13992158150801634, "grad_norm": 0.6984241181890183, "learning_rate": 8.992555936508991e-06, "loss": 0.7067, "step": 852 }, { "epoch": 0.1400858087163591, "grad_norm": 0.6905598521116458, "learning_rate": 8.992528948306314e-06, "loss": 0.6937, "step": 853 }, { "epoch": 0.14025003592470184, "grad_norm": 0.685496447106375, "learning_rate": 8.992501911310354e-06, "loss": 0.6928, "step": 854 }, { "epoch": 0.14041426313304456, "grad_norm": 0.7186605931410012, "learning_rate": 8.992474825521403e-06, "loss": 0.6888, "step": 855 }, { "epoch": 0.1405784903413873, "grad_norm": 0.6926681031006332, "learning_rate": 8.992447690939756e-06, "loss": 0.6945, "step": 856 }, { "epoch": 0.14074271754973006, "grad_norm": 0.6687413046389367, "learning_rate": 8.992420507565707e-06, "loss": 0.6915, "step": 857 }, { "epoch": 0.1409069447580728, "grad_norm": 0.6750901909722651, "learning_rate": 8.99239327539955e-06, "loss": 0.6972, "step": 858 }, { "epoch": 0.14107117196641553, "grad_norm": 0.7056162625158083, "learning_rate": 8.992365994441583e-06, "loss": 0.6981, "step": 859 }, { "epoch": 0.14123539917475827, "grad_norm": 0.6767374335973468, "learning_rate": 8.992338664692101e-06, "loss": 0.704, "step": 860 }, { "epoch": 0.14139962638310102, "grad_norm": 0.6824925357290131, "learning_rate": 8.992311286151403e-06, "loss": 0.7126, "step": 861 }, { "epoch": 0.14156385359144377, "grad_norm": 0.6611734657558349, "learning_rate": 8.992283858819784e-06, "loss": 0.7142, "step": 862 }, { "epoch": 0.1417280807997865, "grad_norm": 0.6632695371380769, "learning_rate": 8.992256382697539e-06, "loss": 0.7232, "step": 863 }, { "epoch": 0.14189230800812924, "grad_norm": 0.6626421044508288, "learning_rate": 8.992228857784975e-06, "loss": 0.7306, "step": 864 }, { "epoch": 0.142056535216472, "grad_norm": 0.6765717769175646, "learning_rate": 8.992201284082383e-06, "loss": 0.6982, "step": 865 }, { "epoch": 0.14222076242481474, "grad_norm": 0.6708039962729273, "learning_rate": 8.992173661590065e-06, "loss": 0.7088, "step": 866 }, { "epoch": 0.14238498963315746, "grad_norm": 0.6793311754143073, "learning_rate": 8.992145990308323e-06, "loss": 0.7004, "step": 867 }, { "epoch": 0.1425492168415002, "grad_norm": 0.6657408958013594, "learning_rate": 8.992118270237455e-06, "loss": 0.6799, "step": 868 }, { "epoch": 0.14271344404984296, "grad_norm": 0.6540758971441215, "learning_rate": 8.992090501377762e-06, "loss": 0.7228, "step": 869 }, { "epoch": 0.1428776712581857, "grad_norm": 0.6643906221820899, "learning_rate": 8.992062683729548e-06, "loss": 0.7104, "step": 870 }, { "epoch": 0.14304189846652846, "grad_norm": 0.6989964341191656, "learning_rate": 8.992034817293112e-06, "loss": 0.6858, "step": 871 }, { "epoch": 0.14320612567487118, "grad_norm": 0.6812360050752753, "learning_rate": 8.99200690206876e-06, "loss": 0.7117, "step": 872 }, { "epoch": 0.14337035288321393, "grad_norm": 0.6949187113631994, "learning_rate": 8.991978938056793e-06, "loss": 0.7152, "step": 873 }, { "epoch": 0.14353458009155667, "grad_norm": 0.6577059262682897, "learning_rate": 8.991950925257517e-06, "loss": 0.711, "step": 874 }, { "epoch": 0.14369880729989942, "grad_norm": 0.662738282110258, "learning_rate": 8.991922863671232e-06, "loss": 0.6881, "step": 875 }, { "epoch": 0.14386303450824214, "grad_norm": 0.6782128483184631, "learning_rate": 8.991894753298245e-06, "loss": 0.6662, "step": 876 }, { "epoch": 0.1440272617165849, "grad_norm": 0.6571616469132535, "learning_rate": 8.991866594138862e-06, "loss": 0.7077, "step": 877 }, { "epoch": 0.14419148892492764, "grad_norm": 0.6516355933069562, "learning_rate": 8.991838386193388e-06, "loss": 0.7009, "step": 878 }, { "epoch": 0.1443557161332704, "grad_norm": 0.6556308700827564, "learning_rate": 8.991810129462132e-06, "loss": 0.7031, "step": 879 }, { "epoch": 0.1445199433416131, "grad_norm": 0.6627545749856372, "learning_rate": 8.991781823945398e-06, "loss": 0.6717, "step": 880 }, { "epoch": 0.14468417054995586, "grad_norm": 0.6482910836902056, "learning_rate": 8.991753469643493e-06, "loss": 0.7217, "step": 881 }, { "epoch": 0.1448483977582986, "grad_norm": 0.7686915515107932, "learning_rate": 8.991725066556726e-06, "loss": 0.7087, "step": 882 }, { "epoch": 0.14501262496664136, "grad_norm": 0.633391471242811, "learning_rate": 8.991696614685406e-06, "loss": 0.7121, "step": 883 }, { "epoch": 0.14517685217498408, "grad_norm": 0.6592509022850245, "learning_rate": 8.99166811402984e-06, "loss": 0.6901, "step": 884 }, { "epoch": 0.14534107938332683, "grad_norm": 0.6462500462113238, "learning_rate": 8.991639564590342e-06, "loss": 0.6892, "step": 885 }, { "epoch": 0.14550530659166958, "grad_norm": 0.6449101927159558, "learning_rate": 8.991610966367217e-06, "loss": 0.6986, "step": 886 }, { "epoch": 0.14566953380001232, "grad_norm": 0.6513519292471238, "learning_rate": 8.991582319360779e-06, "loss": 0.6952, "step": 887 }, { "epoch": 0.14583376100835507, "grad_norm": 0.6351156522282531, "learning_rate": 8.991553623571336e-06, "loss": 0.6947, "step": 888 }, { "epoch": 0.1459979882166978, "grad_norm": 0.6463441730942351, "learning_rate": 8.991524878999202e-06, "loss": 0.6855, "step": 889 }, { "epoch": 0.14616221542504054, "grad_norm": 0.6296524118090603, "learning_rate": 8.99149608564469e-06, "loss": 0.6988, "step": 890 }, { "epoch": 0.1463264426333833, "grad_norm": 0.6527468073618038, "learning_rate": 8.991467243508111e-06, "loss": 0.6997, "step": 891 }, { "epoch": 0.14649066984172604, "grad_norm": 0.6337043319103454, "learning_rate": 8.991438352589779e-06, "loss": 0.6963, "step": 892 }, { "epoch": 0.14665489705006876, "grad_norm": 0.6566657563406406, "learning_rate": 8.991409412890008e-06, "loss": 0.7111, "step": 893 }, { "epoch": 0.1468191242584115, "grad_norm": 0.6858464246143825, "learning_rate": 8.991380424409111e-06, "loss": 0.7123, "step": 894 }, { "epoch": 0.14698335146675426, "grad_norm": 0.6529635045203046, "learning_rate": 8.991351387147404e-06, "loss": 0.7247, "step": 895 }, { "epoch": 0.147147578675097, "grad_norm": 0.6511476919346777, "learning_rate": 8.991322301105202e-06, "loss": 0.7022, "step": 896 }, { "epoch": 0.14731180588343973, "grad_norm": 0.6335117395711469, "learning_rate": 8.99129316628282e-06, "loss": 0.6852, "step": 897 }, { "epoch": 0.14747603309178248, "grad_norm": 0.6231917174435052, "learning_rate": 8.991263982680576e-06, "loss": 0.7049, "step": 898 }, { "epoch": 0.14764026030012523, "grad_norm": 0.6470589103422413, "learning_rate": 8.991234750298787e-06, "loss": 0.7089, "step": 899 }, { "epoch": 0.14780448750846797, "grad_norm": 0.6358772641933567, "learning_rate": 8.99120546913777e-06, "loss": 0.6738, "step": 900 }, { "epoch": 0.1479687147168107, "grad_norm": 0.6221324653432067, "learning_rate": 8.991176139197841e-06, "loss": 0.7093, "step": 901 }, { "epoch": 0.14813294192515344, "grad_norm": 0.6215558642133383, "learning_rate": 8.991146760479323e-06, "loss": 0.6978, "step": 902 }, { "epoch": 0.1482971691334962, "grad_norm": 0.6263701610045573, "learning_rate": 8.99111733298253e-06, "loss": 0.7108, "step": 903 }, { "epoch": 0.14846139634183894, "grad_norm": 0.6296632177864455, "learning_rate": 8.991087856707785e-06, "loss": 0.7151, "step": 904 }, { "epoch": 0.14862562355018166, "grad_norm": 0.6178781456844099, "learning_rate": 8.991058331655408e-06, "loss": 0.6921, "step": 905 }, { "epoch": 0.1487898507585244, "grad_norm": 0.6426241622321025, "learning_rate": 8.991028757825718e-06, "loss": 0.6661, "step": 906 }, { "epoch": 0.14895407796686716, "grad_norm": 0.6315217002269438, "learning_rate": 8.990999135219037e-06, "loss": 0.7029, "step": 907 }, { "epoch": 0.1491183051752099, "grad_norm": 0.6221289953237676, "learning_rate": 8.990969463835688e-06, "loss": 0.6854, "step": 908 }, { "epoch": 0.14928253238355266, "grad_norm": 0.6351780842503867, "learning_rate": 8.99093974367599e-06, "loss": 0.6746, "step": 909 }, { "epoch": 0.14944675959189538, "grad_norm": 0.6162843131557986, "learning_rate": 8.990909974740271e-06, "loss": 0.7063, "step": 910 }, { "epoch": 0.14961098680023813, "grad_norm": 0.6438716140978041, "learning_rate": 8.990880157028849e-06, "loss": 0.668, "step": 911 }, { "epoch": 0.14977521400858088, "grad_norm": 0.6738550733623182, "learning_rate": 8.99085029054205e-06, "loss": 0.6723, "step": 912 }, { "epoch": 0.14993944121692362, "grad_norm": 0.6236064060080636, "learning_rate": 8.990820375280199e-06, "loss": 0.6933, "step": 913 }, { "epoch": 0.15010366842526635, "grad_norm": 0.8878180053952123, "learning_rate": 8.990790411243622e-06, "loss": 0.7072, "step": 914 }, { "epoch": 0.1502678956336091, "grad_norm": 0.6040213602808409, "learning_rate": 8.99076039843264e-06, "loss": 0.7265, "step": 915 }, { "epoch": 0.15043212284195184, "grad_norm": 0.5993272996617645, "learning_rate": 8.990730336847584e-06, "loss": 0.6735, "step": 916 }, { "epoch": 0.1505963500502946, "grad_norm": 0.6196667818949094, "learning_rate": 8.990700226488775e-06, "loss": 0.683, "step": 917 }, { "epoch": 0.1507605772586373, "grad_norm": 0.611151140998212, "learning_rate": 8.990670067356546e-06, "loss": 0.6855, "step": 918 }, { "epoch": 0.15092480446698006, "grad_norm": 0.6124554833223387, "learning_rate": 8.990639859451223e-06, "loss": 0.7316, "step": 919 }, { "epoch": 0.1510890316753228, "grad_norm": 0.8671036907456195, "learning_rate": 8.99060960277313e-06, "loss": 0.6754, "step": 920 }, { "epoch": 0.15125325888366556, "grad_norm": 0.604961084181521, "learning_rate": 8.9905792973226e-06, "loss": 0.6957, "step": 921 }, { "epoch": 0.15141748609200828, "grad_norm": 0.6367685614450236, "learning_rate": 8.99054894309996e-06, "loss": 0.6878, "step": 922 }, { "epoch": 0.15158171330035103, "grad_norm": 0.6291160157774949, "learning_rate": 8.99051854010554e-06, "loss": 0.6758, "step": 923 }, { "epoch": 0.15174594050869378, "grad_norm": 0.6186538967618891, "learning_rate": 8.990488088339673e-06, "loss": 0.6851, "step": 924 }, { "epoch": 0.15191016771703653, "grad_norm": 0.6043423314180405, "learning_rate": 8.990457587802685e-06, "loss": 0.7029, "step": 925 }, { "epoch": 0.15207439492537927, "grad_norm": 0.6057826193851099, "learning_rate": 8.99042703849491e-06, "loss": 0.6894, "step": 926 }, { "epoch": 0.152238622133722, "grad_norm": 0.6415804974916999, "learning_rate": 8.990396440416682e-06, "loss": 0.6895, "step": 927 }, { "epoch": 0.15240284934206474, "grad_norm": 0.590171420782872, "learning_rate": 8.990365793568326e-06, "loss": 0.7064, "step": 928 }, { "epoch": 0.1525670765504075, "grad_norm": 0.5991023181384277, "learning_rate": 8.990335097950184e-06, "loss": 0.7034, "step": 929 }, { "epoch": 0.15273130375875024, "grad_norm": 0.5925124030029137, "learning_rate": 8.990304353562582e-06, "loss": 0.6722, "step": 930 }, { "epoch": 0.15289553096709296, "grad_norm": 0.5879098836185569, "learning_rate": 8.990273560405858e-06, "loss": 0.6778, "step": 931 }, { "epoch": 0.1530597581754357, "grad_norm": 0.6081890261292409, "learning_rate": 8.990242718480345e-06, "loss": 0.6977, "step": 932 }, { "epoch": 0.15322398538377846, "grad_norm": 0.5943707439558413, "learning_rate": 8.990211827786377e-06, "loss": 0.6833, "step": 933 }, { "epoch": 0.1533882125921212, "grad_norm": 0.5957073787044868, "learning_rate": 8.990180888324293e-06, "loss": 0.6769, "step": 934 }, { "epoch": 0.15355243980046393, "grad_norm": 0.5990803867181086, "learning_rate": 8.990149900094426e-06, "loss": 0.6947, "step": 935 }, { "epoch": 0.15371666700880668, "grad_norm": 0.5762901620148804, "learning_rate": 8.990118863097113e-06, "loss": 0.6989, "step": 936 }, { "epoch": 0.15388089421714943, "grad_norm": 0.583179708719448, "learning_rate": 8.990087777332693e-06, "loss": 0.7021, "step": 937 }, { "epoch": 0.15404512142549218, "grad_norm": 0.5957217968883289, "learning_rate": 8.9900566428015e-06, "loss": 0.6581, "step": 938 }, { "epoch": 0.1542093486338349, "grad_norm": 0.5768688535477488, "learning_rate": 8.990025459503875e-06, "loss": 0.6909, "step": 939 }, { "epoch": 0.15437357584217765, "grad_norm": 0.5863263337089208, "learning_rate": 8.989994227440156e-06, "loss": 0.6903, "step": 940 }, { "epoch": 0.1545378030505204, "grad_norm": 1.449174264009128, "learning_rate": 8.989962946610682e-06, "loss": 0.6777, "step": 941 }, { "epoch": 0.15470203025886314, "grad_norm": 0.5857095155638259, "learning_rate": 8.989931617015794e-06, "loss": 0.6847, "step": 942 }, { "epoch": 0.1548662574672059, "grad_norm": 0.6045629484192497, "learning_rate": 8.98990023865583e-06, "loss": 0.6973, "step": 943 }, { "epoch": 0.1550304846755486, "grad_norm": 0.6271020723122404, "learning_rate": 8.989868811531133e-06, "loss": 0.6822, "step": 944 }, { "epoch": 0.15519471188389136, "grad_norm": 0.5974097511017354, "learning_rate": 8.989837335642041e-06, "loss": 0.6932, "step": 945 }, { "epoch": 0.1553589390922341, "grad_norm": 0.6165658939960453, "learning_rate": 8.989805810988899e-06, "loss": 0.7028, "step": 946 }, { "epoch": 0.15552316630057686, "grad_norm": 0.5750032115880928, "learning_rate": 8.98977423757205e-06, "loss": 0.699, "step": 947 }, { "epoch": 0.15568739350891958, "grad_norm": 0.5930114131715402, "learning_rate": 8.989742615391835e-06, "loss": 0.6946, "step": 948 }, { "epoch": 0.15585162071726233, "grad_norm": 0.5821912545770899, "learning_rate": 8.989710944448598e-06, "loss": 0.6944, "step": 949 }, { "epoch": 0.15601584792560508, "grad_norm": 0.5998707680711169, "learning_rate": 8.989679224742682e-06, "loss": 0.6817, "step": 950 }, { "epoch": 0.15618007513394783, "grad_norm": 0.784960727758064, "learning_rate": 8.989647456274432e-06, "loss": 0.6667, "step": 951 }, { "epoch": 0.15634430234229055, "grad_norm": 0.5905550333586939, "learning_rate": 8.989615639044194e-06, "loss": 0.6909, "step": 952 }, { "epoch": 0.1565085295506333, "grad_norm": 0.5813775463869184, "learning_rate": 8.989583773052312e-06, "loss": 0.6741, "step": 953 }, { "epoch": 0.15667275675897605, "grad_norm": 0.5859001004681035, "learning_rate": 8.989551858299135e-06, "loss": 0.675, "step": 954 }, { "epoch": 0.1568369839673188, "grad_norm": 0.5789497437415951, "learning_rate": 8.989519894785007e-06, "loss": 0.6924, "step": 955 }, { "epoch": 0.15700121117566151, "grad_norm": 0.576924083966074, "learning_rate": 8.989487882510275e-06, "loss": 0.6951, "step": 956 }, { "epoch": 0.15716543838400426, "grad_norm": 0.5860897339311051, "learning_rate": 8.98945582147529e-06, "loss": 0.6837, "step": 957 }, { "epoch": 0.157329665592347, "grad_norm": 0.567962554350338, "learning_rate": 8.989423711680394e-06, "loss": 0.6718, "step": 958 }, { "epoch": 0.15749389280068976, "grad_norm": 0.580251872407634, "learning_rate": 8.989391553125943e-06, "loss": 0.6766, "step": 959 }, { "epoch": 0.1576581200090325, "grad_norm": 0.5835185656150728, "learning_rate": 8.98935934581228e-06, "loss": 0.6656, "step": 960 }, { "epoch": 0.15782234721737523, "grad_norm": 0.5680557698537251, "learning_rate": 8.989327089739759e-06, "loss": 0.6886, "step": 961 }, { "epoch": 0.15798657442571798, "grad_norm": 0.5822249108751449, "learning_rate": 8.98929478490873e-06, "loss": 0.6793, "step": 962 }, { "epoch": 0.15815080163406073, "grad_norm": 0.6920777043397142, "learning_rate": 8.989262431319541e-06, "loss": 0.68, "step": 963 }, { "epoch": 0.15831502884240348, "grad_norm": 0.5697926374238244, "learning_rate": 8.989230028972546e-06, "loss": 0.6902, "step": 964 }, { "epoch": 0.1584792560507462, "grad_norm": 0.5648702262985533, "learning_rate": 8.989197577868095e-06, "loss": 0.6785, "step": 965 }, { "epoch": 0.15864348325908895, "grad_norm": 0.574611072826276, "learning_rate": 8.989165078006542e-06, "loss": 0.702, "step": 966 }, { "epoch": 0.1588077104674317, "grad_norm": 0.5653515769606229, "learning_rate": 8.989132529388239e-06, "loss": 0.6805, "step": 967 }, { "epoch": 0.15897193767577444, "grad_norm": 0.5677479447859182, "learning_rate": 8.989099932013541e-06, "loss": 0.6899, "step": 968 }, { "epoch": 0.15913616488411716, "grad_norm": 0.5600255179636507, "learning_rate": 8.989067285882801e-06, "loss": 0.6952, "step": 969 }, { "epoch": 0.1593003920924599, "grad_norm": 0.5645642174349392, "learning_rate": 8.989034590996373e-06, "loss": 0.6797, "step": 970 }, { "epoch": 0.15946461930080266, "grad_norm": 0.5661098243057704, "learning_rate": 8.989001847354614e-06, "loss": 0.6769, "step": 971 }, { "epoch": 0.1596288465091454, "grad_norm": 0.5608033372773416, "learning_rate": 8.988969054957878e-06, "loss": 0.6786, "step": 972 }, { "epoch": 0.15979307371748813, "grad_norm": 0.5780246426412402, "learning_rate": 8.98893621380652e-06, "loss": 0.6693, "step": 973 }, { "epoch": 0.15995730092583088, "grad_norm": 0.5786252288148208, "learning_rate": 8.9889033239009e-06, "loss": 0.6862, "step": 974 }, { "epoch": 0.16012152813417363, "grad_norm": 0.7542693728723142, "learning_rate": 8.988870385241371e-06, "loss": 0.6678, "step": 975 }, { "epoch": 0.16028575534251638, "grad_norm": 0.6306239933124853, "learning_rate": 8.988837397828296e-06, "loss": 0.6757, "step": 976 }, { "epoch": 0.16044998255085913, "grad_norm": 0.5939312755402465, "learning_rate": 8.988804361662029e-06, "loss": 0.6713, "step": 977 }, { "epoch": 0.16061420975920185, "grad_norm": 0.5633583256765683, "learning_rate": 8.98877127674293e-06, "loss": 0.6673, "step": 978 }, { "epoch": 0.1607784369675446, "grad_norm": 0.5800987462350472, "learning_rate": 8.988738143071359e-06, "loss": 0.6605, "step": 979 }, { "epoch": 0.16094266417588735, "grad_norm": 0.5643582604641704, "learning_rate": 8.988704960647677e-06, "loss": 0.7013, "step": 980 }, { "epoch": 0.1611068913842301, "grad_norm": 0.5835933494518621, "learning_rate": 8.988671729472241e-06, "loss": 0.6782, "step": 981 }, { "epoch": 0.16127111859257282, "grad_norm": 0.6788940707481684, "learning_rate": 8.988638449545415e-06, "loss": 0.6844, "step": 982 }, { "epoch": 0.16143534580091556, "grad_norm": 0.5471636693606157, "learning_rate": 8.988605120867557e-06, "loss": 0.6786, "step": 983 }, { "epoch": 0.1615995730092583, "grad_norm": 0.5734962283482052, "learning_rate": 8.988571743439032e-06, "loss": 0.6819, "step": 984 }, { "epoch": 0.16176380021760106, "grad_norm": 0.6161812954255436, "learning_rate": 8.988538317260203e-06, "loss": 0.7011, "step": 985 }, { "epoch": 0.16192802742594378, "grad_norm": 0.5586729255122475, "learning_rate": 8.988504842331431e-06, "loss": 0.6924, "step": 986 }, { "epoch": 0.16209225463428653, "grad_norm": 0.5589443674472094, "learning_rate": 8.988471318653081e-06, "loss": 0.6688, "step": 987 }, { "epoch": 0.16225648184262928, "grad_norm": 0.5725349388200487, "learning_rate": 8.988437746225515e-06, "loss": 0.6859, "step": 988 }, { "epoch": 0.16242070905097203, "grad_norm": 0.5386632021604499, "learning_rate": 8.9884041250491e-06, "loss": 0.7098, "step": 989 }, { "epoch": 0.16258493625931475, "grad_norm": 0.5588338765541392, "learning_rate": 8.9883704551242e-06, "loss": 0.6869, "step": 990 }, { "epoch": 0.1627491634676575, "grad_norm": 0.5620172479210048, "learning_rate": 8.98833673645118e-06, "loss": 0.6801, "step": 991 }, { "epoch": 0.16291339067600025, "grad_norm": 0.578537129368564, "learning_rate": 8.988302969030409e-06, "loss": 0.6802, "step": 992 }, { "epoch": 0.163077617884343, "grad_norm": 0.5788833235484687, "learning_rate": 8.98826915286225e-06, "loss": 0.6838, "step": 993 }, { "epoch": 0.16324184509268574, "grad_norm": 0.5455036420454441, "learning_rate": 8.988235287947074e-06, "loss": 0.684, "step": 994 }, { "epoch": 0.16340607230102847, "grad_norm": 0.5469684799799748, "learning_rate": 8.988201374285246e-06, "loss": 0.6732, "step": 995 }, { "epoch": 0.16357029950937121, "grad_norm": 0.5659737004114193, "learning_rate": 8.988167411877134e-06, "loss": 0.6774, "step": 996 }, { "epoch": 0.16373452671771396, "grad_norm": 0.5472964921114717, "learning_rate": 8.98813340072311e-06, "loss": 0.6561, "step": 997 }, { "epoch": 0.1638987539260567, "grad_norm": 0.5313159381480278, "learning_rate": 8.98809934082354e-06, "loss": 0.6924, "step": 998 }, { "epoch": 0.16406298113439943, "grad_norm": 0.5792443966133527, "learning_rate": 8.988065232178799e-06, "loss": 0.6711, "step": 999 }, { "epoch": 0.16422720834274218, "grad_norm": 0.5545515494905078, "learning_rate": 8.98803107478925e-06, "loss": 0.6831, "step": 1000 }, { "epoch": 0.16439143555108493, "grad_norm": 0.5573621935545905, "learning_rate": 8.98799686865527e-06, "loss": 0.6991, "step": 1001 }, { "epoch": 0.16455566275942768, "grad_norm": 0.5491556801369158, "learning_rate": 8.987962613777226e-06, "loss": 0.6638, "step": 1002 }, { "epoch": 0.1647198899677704, "grad_norm": 0.5392445583149836, "learning_rate": 8.987928310155495e-06, "loss": 0.6652, "step": 1003 }, { "epoch": 0.16488411717611315, "grad_norm": 0.5348090900057155, "learning_rate": 8.987893957790447e-06, "loss": 0.6828, "step": 1004 }, { "epoch": 0.1650483443844559, "grad_norm": 0.5577106888054703, "learning_rate": 8.987859556682454e-06, "loss": 0.6719, "step": 1005 }, { "epoch": 0.16521257159279865, "grad_norm": 0.5363067993234555, "learning_rate": 8.98782510683189e-06, "loss": 0.6803, "step": 1006 }, { "epoch": 0.16537679880114137, "grad_norm": 0.5603737925264757, "learning_rate": 8.987790608239131e-06, "loss": 0.6825, "step": 1007 }, { "epoch": 0.16554102600948412, "grad_norm": 0.5411605815062192, "learning_rate": 8.98775606090455e-06, "loss": 0.6625, "step": 1008 }, { "epoch": 0.16570525321782686, "grad_norm": 0.5489945681101899, "learning_rate": 8.987721464828525e-06, "loss": 0.6695, "step": 1009 }, { "epoch": 0.1658694804261696, "grad_norm": 0.54397334030031, "learning_rate": 8.987686820011428e-06, "loss": 0.6667, "step": 1010 }, { "epoch": 0.16603370763451233, "grad_norm": 0.5321484388094383, "learning_rate": 8.987652126453636e-06, "loss": 0.691, "step": 1011 }, { "epoch": 0.16619793484285508, "grad_norm": 0.5317184209836602, "learning_rate": 8.987617384155527e-06, "loss": 0.6477, "step": 1012 }, { "epoch": 0.16636216205119783, "grad_norm": 0.5341564431442047, "learning_rate": 8.987582593117478e-06, "loss": 0.6473, "step": 1013 }, { "epoch": 0.16652638925954058, "grad_norm": 0.5787953947276587, "learning_rate": 8.987547753339868e-06, "loss": 0.6788, "step": 1014 }, { "epoch": 0.16669061646788333, "grad_norm": 0.5338083228144461, "learning_rate": 8.987512864823073e-06, "loss": 0.6783, "step": 1015 }, { "epoch": 0.16685484367622605, "grad_norm": 0.5431111870614626, "learning_rate": 8.987477927567475e-06, "loss": 0.6665, "step": 1016 }, { "epoch": 0.1670190708845688, "grad_norm": 0.5460094365072707, "learning_rate": 8.987442941573448e-06, "loss": 0.665, "step": 1017 }, { "epoch": 0.16718329809291155, "grad_norm": 0.5427687593109538, "learning_rate": 8.98740790684138e-06, "loss": 0.6562, "step": 1018 }, { "epoch": 0.1673475253012543, "grad_norm": 0.533396167621092, "learning_rate": 8.987372823371644e-06, "loss": 0.6684, "step": 1019 }, { "epoch": 0.16751175250959702, "grad_norm": 0.5320714216988802, "learning_rate": 8.987337691164625e-06, "loss": 0.6519, "step": 1020 }, { "epoch": 0.16767597971793977, "grad_norm": 0.5456380984656337, "learning_rate": 8.987302510220704e-06, "loss": 0.686, "step": 1021 }, { "epoch": 0.16784020692628251, "grad_norm": 0.5224293963323513, "learning_rate": 8.987267280540264e-06, "loss": 0.6697, "step": 1022 }, { "epoch": 0.16800443413462526, "grad_norm": 0.5243433616305909, "learning_rate": 8.987232002123685e-06, "loss": 0.6921, "step": 1023 }, { "epoch": 0.16816866134296798, "grad_norm": 0.5328025582851148, "learning_rate": 8.987196674971352e-06, "loss": 0.6555, "step": 1024 }, { "epoch": 0.16833288855131073, "grad_norm": 0.5198078640352478, "learning_rate": 8.987161299083647e-06, "loss": 0.6379, "step": 1025 }, { "epoch": 0.16849711575965348, "grad_norm": 0.5882832847154114, "learning_rate": 8.987125874460957e-06, "loss": 0.6701, "step": 1026 }, { "epoch": 0.16866134296799623, "grad_norm": 0.5295752447874579, "learning_rate": 8.987090401103665e-06, "loss": 0.6944, "step": 1027 }, { "epoch": 0.16882557017633895, "grad_norm": 0.5326690138378312, "learning_rate": 8.987054879012156e-06, "loss": 0.6812, "step": 1028 }, { "epoch": 0.1689897973846817, "grad_norm": 0.5279524083193049, "learning_rate": 8.987019308186818e-06, "loss": 0.664, "step": 1029 }, { "epoch": 0.16915402459302445, "grad_norm": 0.5696406663324558, "learning_rate": 8.986983688628034e-06, "loss": 0.6615, "step": 1030 }, { "epoch": 0.1693182518013672, "grad_norm": 0.5237891547003628, "learning_rate": 8.986948020336192e-06, "loss": 0.6643, "step": 1031 }, { "epoch": 0.16948247900970995, "grad_norm": 0.5398581475126638, "learning_rate": 8.986912303311682e-06, "loss": 0.6935, "step": 1032 }, { "epoch": 0.16964670621805267, "grad_norm": 0.5257870387792046, "learning_rate": 8.986876537554889e-06, "loss": 0.6688, "step": 1033 }, { "epoch": 0.16981093342639542, "grad_norm": 0.5358931293606296, "learning_rate": 8.986840723066202e-06, "loss": 0.6756, "step": 1034 }, { "epoch": 0.16997516063473816, "grad_norm": 0.5429766773598853, "learning_rate": 8.98680485984601e-06, "loss": 0.6706, "step": 1035 }, { "epoch": 0.1701393878430809, "grad_norm": 0.5115483229740615, "learning_rate": 8.986768947894704e-06, "loss": 0.665, "step": 1036 }, { "epoch": 0.17030361505142363, "grad_norm": 0.5223361541452932, "learning_rate": 8.98673298721267e-06, "loss": 0.6848, "step": 1037 }, { "epoch": 0.17046784225976638, "grad_norm": 0.8292942856556398, "learning_rate": 8.986696977800305e-06, "loss": 0.6611, "step": 1038 }, { "epoch": 0.17063206946810913, "grad_norm": 0.5594058489983992, "learning_rate": 8.986660919657995e-06, "loss": 0.6699, "step": 1039 }, { "epoch": 0.17079629667645188, "grad_norm": 0.5191782674532504, "learning_rate": 8.986624812786133e-06, "loss": 0.6463, "step": 1040 }, { "epoch": 0.1709605238847946, "grad_norm": 0.5065778655698908, "learning_rate": 8.986588657185112e-06, "loss": 0.6804, "step": 1041 }, { "epoch": 0.17112475109313735, "grad_norm": 0.5140861111946393, "learning_rate": 8.986552452855323e-06, "loss": 0.6425, "step": 1042 }, { "epoch": 0.1712889783014801, "grad_norm": 0.5472505082789985, "learning_rate": 8.98651619979716e-06, "loss": 0.654, "step": 1043 }, { "epoch": 0.17145320550982285, "grad_norm": 0.5101925390931323, "learning_rate": 8.986479898011019e-06, "loss": 0.685, "step": 1044 }, { "epoch": 0.17161743271816557, "grad_norm": 0.5206702278219933, "learning_rate": 8.98644354749729e-06, "loss": 0.6693, "step": 1045 }, { "epoch": 0.17178165992650832, "grad_norm": 0.5126143393833118, "learning_rate": 8.986407148256372e-06, "loss": 0.6799, "step": 1046 }, { "epoch": 0.17194588713485107, "grad_norm": 0.5331986314895121, "learning_rate": 8.986370700288655e-06, "loss": 0.6638, "step": 1047 }, { "epoch": 0.17211011434319382, "grad_norm": 0.5073509933218882, "learning_rate": 8.98633420359454e-06, "loss": 0.6802, "step": 1048 }, { "epoch": 0.17227434155153656, "grad_norm": 0.508473575804819, "learning_rate": 8.986297658174423e-06, "loss": 0.6571, "step": 1049 }, { "epoch": 0.17243856875987928, "grad_norm": 0.49990662654253826, "learning_rate": 8.986261064028698e-06, "loss": 0.668, "step": 1050 }, { "epoch": 0.17260279596822203, "grad_norm": 0.5180145085098473, "learning_rate": 8.986224421157764e-06, "loss": 0.6652, "step": 1051 }, { "epoch": 0.17276702317656478, "grad_norm": 0.5008280735732593, "learning_rate": 8.98618772956202e-06, "loss": 0.6635, "step": 1052 }, { "epoch": 0.17293125038490753, "grad_norm": 0.525862217902218, "learning_rate": 8.986150989241863e-06, "loss": 0.6604, "step": 1053 }, { "epoch": 0.17309547759325025, "grad_norm": 0.5041642087839215, "learning_rate": 8.986114200197692e-06, "loss": 0.6637, "step": 1054 }, { "epoch": 0.173259704801593, "grad_norm": 0.5755431185436396, "learning_rate": 8.986077362429908e-06, "loss": 0.6724, "step": 1055 }, { "epoch": 0.17342393200993575, "grad_norm": 0.5355683129486976, "learning_rate": 8.986040475938908e-06, "loss": 0.6549, "step": 1056 }, { "epoch": 0.1735881592182785, "grad_norm": 0.5041556885697243, "learning_rate": 8.986003540725098e-06, "loss": 0.6591, "step": 1057 }, { "epoch": 0.17375238642662122, "grad_norm": 0.511207002198651, "learning_rate": 8.985966556788873e-06, "loss": 0.6426, "step": 1058 }, { "epoch": 0.17391661363496397, "grad_norm": 0.49542003624935754, "learning_rate": 8.985929524130638e-06, "loss": 0.6586, "step": 1059 }, { "epoch": 0.17408084084330672, "grad_norm": 0.5639426152691353, "learning_rate": 8.985892442750796e-06, "loss": 0.6463, "step": 1060 }, { "epoch": 0.17424506805164947, "grad_norm": 0.5156251092751705, "learning_rate": 8.985855312649749e-06, "loss": 0.701, "step": 1061 }, { "epoch": 0.1744092952599922, "grad_norm": 0.5004678577815013, "learning_rate": 8.985818133827898e-06, "loss": 0.6876, "step": 1062 }, { "epoch": 0.17457352246833494, "grad_norm": 0.5368872269813518, "learning_rate": 8.98578090628565e-06, "loss": 0.6573, "step": 1063 }, { "epoch": 0.17473774967667768, "grad_norm": 0.5012298661884161, "learning_rate": 8.985743630023406e-06, "loss": 0.675, "step": 1064 }, { "epoch": 0.17490197688502043, "grad_norm": 0.586654914508436, "learning_rate": 8.985706305041575e-06, "loss": 0.6746, "step": 1065 }, { "epoch": 0.17506620409336318, "grad_norm": 0.5265854279573517, "learning_rate": 8.98566893134056e-06, "loss": 0.6883, "step": 1066 }, { "epoch": 0.1752304313017059, "grad_norm": 0.49981491800267486, "learning_rate": 8.985631508920767e-06, "loss": 0.6485, "step": 1067 }, { "epoch": 0.17539465851004865, "grad_norm": 0.4938509609458083, "learning_rate": 8.985594037782602e-06, "loss": 0.6846, "step": 1068 }, { "epoch": 0.1755588857183914, "grad_norm": 0.5031409162616062, "learning_rate": 8.985556517926472e-06, "loss": 0.6357, "step": 1069 }, { "epoch": 0.17572311292673415, "grad_norm": 0.5700620669276998, "learning_rate": 8.985518949352786e-06, "loss": 0.6849, "step": 1070 }, { "epoch": 0.17588734013507687, "grad_norm": 0.5145274475647432, "learning_rate": 8.98548133206195e-06, "loss": 0.6694, "step": 1071 }, { "epoch": 0.17605156734341962, "grad_norm": 0.5063946464405173, "learning_rate": 8.985443666054375e-06, "loss": 0.678, "step": 1072 }, { "epoch": 0.17621579455176237, "grad_norm": 0.4916761388400217, "learning_rate": 8.985405951330468e-06, "loss": 0.6829, "step": 1073 }, { "epoch": 0.17638002176010512, "grad_norm": 0.5289628393025868, "learning_rate": 8.98536818789064e-06, "loss": 0.6685, "step": 1074 }, { "epoch": 0.17654424896844784, "grad_norm": 0.5643139132504469, "learning_rate": 8.985330375735298e-06, "loss": 0.6627, "step": 1075 }, { "epoch": 0.17670847617679059, "grad_norm": 0.4814303227261524, "learning_rate": 8.985292514864859e-06, "loss": 0.6559, "step": 1076 }, { "epoch": 0.17687270338513333, "grad_norm": 0.4974900220408461, "learning_rate": 8.985254605279726e-06, "loss": 0.6716, "step": 1077 }, { "epoch": 0.17703693059347608, "grad_norm": 0.48748470280103556, "learning_rate": 8.985216646980318e-06, "loss": 0.6554, "step": 1078 }, { "epoch": 0.1772011578018188, "grad_norm": 0.476700155245185, "learning_rate": 8.985178639967044e-06, "loss": 0.6633, "step": 1079 }, { "epoch": 0.17736538501016155, "grad_norm": 0.48611980044750863, "learning_rate": 8.985140584240317e-06, "loss": 0.6477, "step": 1080 }, { "epoch": 0.1775296122185043, "grad_norm": 0.4744923322444135, "learning_rate": 8.985102479800551e-06, "loss": 0.6889, "step": 1081 }, { "epoch": 0.17769383942684705, "grad_norm": 0.492363316728015, "learning_rate": 8.985064326648157e-06, "loss": 0.6646, "step": 1082 }, { "epoch": 0.1778580666351898, "grad_norm": 0.5034362431677137, "learning_rate": 8.985026124783554e-06, "loss": 0.6495, "step": 1083 }, { "epoch": 0.17802229384353252, "grad_norm": 0.5305230978210335, "learning_rate": 8.984987874207156e-06, "loss": 0.6578, "step": 1084 }, { "epoch": 0.17818652105187527, "grad_norm": 0.49163982241906173, "learning_rate": 8.984949574919374e-06, "loss": 0.67, "step": 1085 }, { "epoch": 0.17835074826021802, "grad_norm": 0.48484339846155095, "learning_rate": 8.984911226920629e-06, "loss": 0.6795, "step": 1086 }, { "epoch": 0.17851497546856077, "grad_norm": 0.4746707284972163, "learning_rate": 8.984872830211335e-06, "loss": 0.6693, "step": 1087 }, { "epoch": 0.1786792026769035, "grad_norm": 0.4815131650072882, "learning_rate": 8.984834384791908e-06, "loss": 0.6627, "step": 1088 }, { "epoch": 0.17884342988524624, "grad_norm": 0.49934183398618404, "learning_rate": 8.984795890662768e-06, "loss": 0.6705, "step": 1089 }, { "epoch": 0.17900765709358898, "grad_norm": 0.46901602497900924, "learning_rate": 8.984757347824334e-06, "loss": 0.6822, "step": 1090 }, { "epoch": 0.17917188430193173, "grad_norm": 0.4841196036749678, "learning_rate": 8.984718756277019e-06, "loss": 0.6612, "step": 1091 }, { "epoch": 0.17933611151027445, "grad_norm": 0.495840243932423, "learning_rate": 8.984680116021248e-06, "loss": 0.6459, "step": 1092 }, { "epoch": 0.1795003387186172, "grad_norm": 0.4745846787795327, "learning_rate": 8.98464142705744e-06, "loss": 0.663, "step": 1093 }, { "epoch": 0.17966456592695995, "grad_norm": 0.4880415845630486, "learning_rate": 8.984602689386013e-06, "loss": 0.6527, "step": 1094 }, { "epoch": 0.1798287931353027, "grad_norm": 0.48962473221363556, "learning_rate": 8.984563903007389e-06, "loss": 0.6714, "step": 1095 }, { "epoch": 0.17999302034364542, "grad_norm": 0.483069837873451, "learning_rate": 8.984525067921987e-06, "loss": 0.6776, "step": 1096 }, { "epoch": 0.18015724755198817, "grad_norm": 0.4767229218630196, "learning_rate": 8.984486184130231e-06, "loss": 0.6615, "step": 1097 }, { "epoch": 0.18032147476033092, "grad_norm": 0.5064791581734579, "learning_rate": 8.984447251632543e-06, "loss": 0.6765, "step": 1098 }, { "epoch": 0.18048570196867367, "grad_norm": 0.5345397900086212, "learning_rate": 8.984408270429348e-06, "loss": 0.6513, "step": 1099 }, { "epoch": 0.1806499291770164, "grad_norm": 0.5296402491323332, "learning_rate": 8.984369240521063e-06, "loss": 0.6468, "step": 1100 }, { "epoch": 0.18081415638535914, "grad_norm": 0.46787695321689876, "learning_rate": 8.984330161908119e-06, "loss": 0.6772, "step": 1101 }, { "epoch": 0.18097838359370189, "grad_norm": 0.48925568695329985, "learning_rate": 8.984291034590937e-06, "loss": 0.6857, "step": 1102 }, { "epoch": 0.18114261080204463, "grad_norm": 0.4790066192561788, "learning_rate": 8.984251858569943e-06, "loss": 0.6709, "step": 1103 }, { "epoch": 0.18130683801038738, "grad_norm": 0.469440234272998, "learning_rate": 8.98421263384556e-06, "loss": 0.6627, "step": 1104 }, { "epoch": 0.1814710652187301, "grad_norm": 0.49923220336592083, "learning_rate": 8.984173360418219e-06, "loss": 0.6747, "step": 1105 }, { "epoch": 0.18163529242707285, "grad_norm": 0.49630959834059857, "learning_rate": 8.98413403828834e-06, "loss": 0.6774, "step": 1106 }, { "epoch": 0.1817995196354156, "grad_norm": 0.4766594175572563, "learning_rate": 8.984094667456355e-06, "loss": 0.6526, "step": 1107 }, { "epoch": 0.18196374684375835, "grad_norm": 0.47797237085992367, "learning_rate": 8.98405524792269e-06, "loss": 0.6377, "step": 1108 }, { "epoch": 0.18212797405210107, "grad_norm": 0.6046432859375188, "learning_rate": 8.984015779687773e-06, "loss": 0.6396, "step": 1109 }, { "epoch": 0.18229220126044382, "grad_norm": 0.479492420455775, "learning_rate": 8.983976262752034e-06, "loss": 0.6621, "step": 1110 }, { "epoch": 0.18245642846878657, "grad_norm": 0.4843117988504339, "learning_rate": 8.9839366971159e-06, "loss": 0.6606, "step": 1111 }, { "epoch": 0.18262065567712932, "grad_norm": 0.487141594800104, "learning_rate": 8.983897082779804e-06, "loss": 0.6644, "step": 1112 }, { "epoch": 0.18278488288547204, "grad_norm": 0.4785509249745807, "learning_rate": 8.983857419744173e-06, "loss": 0.6592, "step": 1113 }, { "epoch": 0.1829491100938148, "grad_norm": 0.4779896385199476, "learning_rate": 8.983817708009438e-06, "loss": 0.6618, "step": 1114 }, { "epoch": 0.18311333730215754, "grad_norm": 0.49850559987657705, "learning_rate": 8.983777947576032e-06, "loss": 0.6579, "step": 1115 }, { "epoch": 0.18327756451050028, "grad_norm": 0.4635978888264068, "learning_rate": 8.983738138444387e-06, "loss": 0.6794, "step": 1116 }, { "epoch": 0.183441791718843, "grad_norm": 0.46979893505545445, "learning_rate": 8.98369828061493e-06, "loss": 0.6773, "step": 1117 }, { "epoch": 0.18360601892718575, "grad_norm": 0.5249699786325431, "learning_rate": 8.983658374088103e-06, "loss": 0.66, "step": 1118 }, { "epoch": 0.1837702461355285, "grad_norm": 0.6266089286333181, "learning_rate": 8.983618418864334e-06, "loss": 0.6625, "step": 1119 }, { "epoch": 0.18393447334387125, "grad_norm": 0.47135356277133833, "learning_rate": 8.983578414944056e-06, "loss": 0.6742, "step": 1120 }, { "epoch": 0.184098700552214, "grad_norm": 0.4939184853353077, "learning_rate": 8.983538362327707e-06, "loss": 0.661, "step": 1121 }, { "epoch": 0.18426292776055672, "grad_norm": 0.5556973801833033, "learning_rate": 8.98349826101572e-06, "loss": 0.6765, "step": 1122 }, { "epoch": 0.18442715496889947, "grad_norm": 0.59081209705095, "learning_rate": 8.983458111008528e-06, "loss": 0.6667, "step": 1123 }, { "epoch": 0.18459138217724222, "grad_norm": 0.6127369186330817, "learning_rate": 8.983417912306573e-06, "loss": 0.6463, "step": 1124 }, { "epoch": 0.18475560938558497, "grad_norm": 0.4725990782989587, "learning_rate": 8.983377664910287e-06, "loss": 0.6547, "step": 1125 }, { "epoch": 0.1849198365939277, "grad_norm": 0.459266379649527, "learning_rate": 8.98333736882011e-06, "loss": 0.6441, "step": 1126 }, { "epoch": 0.18508406380227044, "grad_norm": 0.5021589556181871, "learning_rate": 8.983297024036475e-06, "loss": 0.6618, "step": 1127 }, { "epoch": 0.1852482910106132, "grad_norm": 0.4731458077349534, "learning_rate": 8.983256630559826e-06, "loss": 0.6343, "step": 1128 }, { "epoch": 0.18541251821895594, "grad_norm": 0.46076652722107214, "learning_rate": 8.983216188390598e-06, "loss": 0.6571, "step": 1129 }, { "epoch": 0.18557674542729866, "grad_norm": 0.4729510638590709, "learning_rate": 8.98317569752923e-06, "loss": 0.6551, "step": 1130 }, { "epoch": 0.1857409726356414, "grad_norm": 0.4672436396771921, "learning_rate": 8.983135157976166e-06, "loss": 0.6526, "step": 1131 }, { "epoch": 0.18590519984398415, "grad_norm": 0.5362518055595471, "learning_rate": 8.983094569731842e-06, "loss": 0.6495, "step": 1132 }, { "epoch": 0.1860694270523269, "grad_norm": 0.4974174360111951, "learning_rate": 8.9830539327967e-06, "loss": 0.6796, "step": 1133 }, { "epoch": 0.18623365426066962, "grad_norm": 0.5038650088289387, "learning_rate": 8.983013247171182e-06, "loss": 0.6395, "step": 1134 }, { "epoch": 0.18639788146901237, "grad_norm": 0.46062899373893684, "learning_rate": 8.98297251285573e-06, "loss": 0.692, "step": 1135 }, { "epoch": 0.18656210867735512, "grad_norm": 0.5124254676856769, "learning_rate": 8.982931729850786e-06, "loss": 0.6724, "step": 1136 }, { "epoch": 0.18672633588569787, "grad_norm": 0.4507316596529217, "learning_rate": 8.98289089815679e-06, "loss": 0.6534, "step": 1137 }, { "epoch": 0.18689056309404062, "grad_norm": 0.5255609623079383, "learning_rate": 8.98285001777419e-06, "loss": 0.6297, "step": 1138 }, { "epoch": 0.18705479030238334, "grad_norm": 0.4753157263680939, "learning_rate": 8.98280908870343e-06, "loss": 0.6602, "step": 1139 }, { "epoch": 0.1872190175107261, "grad_norm": 0.4699605583978582, "learning_rate": 8.98276811094495e-06, "loss": 0.6736, "step": 1140 }, { "epoch": 0.18738324471906884, "grad_norm": 0.46174823365599904, "learning_rate": 8.9827270844992e-06, "loss": 0.6678, "step": 1141 }, { "epoch": 0.18754747192741159, "grad_norm": 0.46428061282059035, "learning_rate": 8.982686009366622e-06, "loss": 0.6789, "step": 1142 }, { "epoch": 0.1877116991357543, "grad_norm": 0.4699014887684446, "learning_rate": 8.982644885547666e-06, "loss": 0.6732, "step": 1143 }, { "epoch": 0.18787592634409706, "grad_norm": 0.4811773724914709, "learning_rate": 8.982603713042773e-06, "loss": 0.66, "step": 1144 }, { "epoch": 0.1880401535524398, "grad_norm": 0.45321971267257205, "learning_rate": 8.982562491852394e-06, "loss": 0.6625, "step": 1145 }, { "epoch": 0.18820438076078255, "grad_norm": 0.6330573920154635, "learning_rate": 8.982521221976978e-06, "loss": 0.6692, "step": 1146 }, { "epoch": 0.18836860796912527, "grad_norm": 0.45518725569387647, "learning_rate": 8.98247990341697e-06, "loss": 0.6435, "step": 1147 }, { "epoch": 0.18853283517746802, "grad_norm": 0.47309733075163013, "learning_rate": 8.982438536172819e-06, "loss": 0.6296, "step": 1148 }, { "epoch": 0.18869706238581077, "grad_norm": 0.475995730507989, "learning_rate": 8.982397120244977e-06, "loss": 0.6272, "step": 1149 }, { "epoch": 0.18886128959415352, "grad_norm": 0.4547023805476552, "learning_rate": 8.982355655633892e-06, "loss": 0.6477, "step": 1150 }, { "epoch": 0.18902551680249624, "grad_norm": 0.4725682510670995, "learning_rate": 8.982314142340014e-06, "loss": 0.6653, "step": 1151 }, { "epoch": 0.189189744010839, "grad_norm": 0.5627057072719744, "learning_rate": 8.982272580363796e-06, "loss": 0.6545, "step": 1152 }, { "epoch": 0.18935397121918174, "grad_norm": 0.5006530922999425, "learning_rate": 8.982230969705685e-06, "loss": 0.6545, "step": 1153 }, { "epoch": 0.1895181984275245, "grad_norm": 0.4547374521034954, "learning_rate": 8.982189310366138e-06, "loss": 0.6717, "step": 1154 }, { "epoch": 0.18968242563586724, "grad_norm": 0.445107036433399, "learning_rate": 8.982147602345605e-06, "loss": 0.661, "step": 1155 }, { "epoch": 0.18984665284420996, "grad_norm": 0.4561286392537167, "learning_rate": 8.982105845644539e-06, "loss": 0.6447, "step": 1156 }, { "epoch": 0.1900108800525527, "grad_norm": 0.5216727923195004, "learning_rate": 8.982064040263394e-06, "loss": 0.6586, "step": 1157 }, { "epoch": 0.19017510726089545, "grad_norm": 0.45996861833590036, "learning_rate": 8.982022186202623e-06, "loss": 0.6618, "step": 1158 }, { "epoch": 0.1903393344692382, "grad_norm": 0.44939371658844723, "learning_rate": 8.981980283462681e-06, "loss": 0.6705, "step": 1159 }, { "epoch": 0.19050356167758092, "grad_norm": 0.48425712732679077, "learning_rate": 8.981938332044024e-06, "loss": 0.6684, "step": 1160 }, { "epoch": 0.19066778888592367, "grad_norm": 0.4645137102326744, "learning_rate": 8.981896331947108e-06, "loss": 0.6696, "step": 1161 }, { "epoch": 0.19083201609426642, "grad_norm": 0.4695085651143102, "learning_rate": 8.981854283172386e-06, "loss": 0.6632, "step": 1162 }, { "epoch": 0.19099624330260917, "grad_norm": 0.45627726789169143, "learning_rate": 8.981812185720319e-06, "loss": 0.6774, "step": 1163 }, { "epoch": 0.1911604705109519, "grad_norm": 0.4508546900179419, "learning_rate": 8.98177003959136e-06, "loss": 0.6495, "step": 1164 }, { "epoch": 0.19132469771929464, "grad_norm": 0.4676794625199336, "learning_rate": 8.981727844785972e-06, "loss": 0.6505, "step": 1165 }, { "epoch": 0.1914889249276374, "grad_norm": 0.46561161225429254, "learning_rate": 8.981685601304608e-06, "loss": 0.639, "step": 1166 }, { "epoch": 0.19165315213598014, "grad_norm": 0.46003161609163923, "learning_rate": 8.98164330914773e-06, "loss": 0.659, "step": 1167 }, { "epoch": 0.19181737934432286, "grad_norm": 0.4750628926671919, "learning_rate": 8.981600968315796e-06, "loss": 0.6494, "step": 1168 }, { "epoch": 0.1919816065526656, "grad_norm": 0.4766948623132289, "learning_rate": 8.981558578809265e-06, "loss": 0.6408, "step": 1169 }, { "epoch": 0.19214583376100836, "grad_norm": 0.5949849501521395, "learning_rate": 8.9815161406286e-06, "loss": 0.6377, "step": 1170 }, { "epoch": 0.1923100609693511, "grad_norm": 0.4315864051018545, "learning_rate": 8.98147365377426e-06, "loss": 0.657, "step": 1171 }, { "epoch": 0.19247428817769385, "grad_norm": 0.48620239608886057, "learning_rate": 8.981431118246707e-06, "loss": 0.6484, "step": 1172 }, { "epoch": 0.19263851538603657, "grad_norm": 0.48009521513705844, "learning_rate": 8.981388534046403e-06, "loss": 0.661, "step": 1173 }, { "epoch": 0.19280274259437932, "grad_norm": 0.46661786912772535, "learning_rate": 8.981345901173812e-06, "loss": 0.6553, "step": 1174 }, { "epoch": 0.19296696980272207, "grad_norm": 0.4670500707072574, "learning_rate": 8.981303219629392e-06, "loss": 0.6443, "step": 1175 }, { "epoch": 0.19313119701106482, "grad_norm": 0.4409917364955242, "learning_rate": 8.981260489413613e-06, "loss": 0.6619, "step": 1176 }, { "epoch": 0.19329542421940754, "grad_norm": 0.4711657126739569, "learning_rate": 8.981217710526935e-06, "loss": 0.635, "step": 1177 }, { "epoch": 0.1934596514277503, "grad_norm": 0.5696321006657865, "learning_rate": 8.981174882969823e-06, "loss": 0.6625, "step": 1178 }, { "epoch": 0.19362387863609304, "grad_norm": 0.46026048794687807, "learning_rate": 8.981132006742745e-06, "loss": 0.6392, "step": 1179 }, { "epoch": 0.1937881058444358, "grad_norm": 0.44618034160175857, "learning_rate": 8.981089081846164e-06, "loss": 0.6793, "step": 1180 }, { "epoch": 0.1939523330527785, "grad_norm": 0.466485326789076, "learning_rate": 8.981046108280545e-06, "loss": 0.6483, "step": 1181 }, { "epoch": 0.19411656026112126, "grad_norm": 0.48215706406724307, "learning_rate": 8.981003086046358e-06, "loss": 0.6493, "step": 1182 }, { "epoch": 0.194280787469464, "grad_norm": 0.44683277849330777, "learning_rate": 8.980960015144068e-06, "loss": 0.6471, "step": 1183 }, { "epoch": 0.19444501467780675, "grad_norm": 0.4492007111991035, "learning_rate": 8.980916895574143e-06, "loss": 0.6608, "step": 1184 }, { "epoch": 0.19460924188614948, "grad_norm": 0.43166191142444144, "learning_rate": 8.980873727337053e-06, "loss": 0.6674, "step": 1185 }, { "epoch": 0.19477346909449222, "grad_norm": 0.43998146007104594, "learning_rate": 8.980830510433266e-06, "loss": 0.6579, "step": 1186 }, { "epoch": 0.19493769630283497, "grad_norm": 0.48354840890327583, "learning_rate": 8.98078724486325e-06, "loss": 0.6475, "step": 1187 }, { "epoch": 0.19510192351117772, "grad_norm": 0.48479443389772425, "learning_rate": 8.980743930627477e-06, "loss": 0.6576, "step": 1188 }, { "epoch": 0.19526615071952047, "grad_norm": 0.46889288135317325, "learning_rate": 8.980700567726415e-06, "loss": 0.6674, "step": 1189 }, { "epoch": 0.1954303779278632, "grad_norm": 0.4363016948383347, "learning_rate": 8.980657156160538e-06, "loss": 0.6426, "step": 1190 }, { "epoch": 0.19559460513620594, "grad_norm": 0.4856980442738892, "learning_rate": 8.980613695930315e-06, "loss": 0.6763, "step": 1191 }, { "epoch": 0.1957588323445487, "grad_norm": 0.43424616977986746, "learning_rate": 8.98057018703622e-06, "loss": 0.6549, "step": 1192 }, { "epoch": 0.19592305955289144, "grad_norm": 0.4315225641477105, "learning_rate": 8.980526629478724e-06, "loss": 0.633, "step": 1193 }, { "epoch": 0.19608728676123416, "grad_norm": 0.44353238665548256, "learning_rate": 8.9804830232583e-06, "loss": 0.6314, "step": 1194 }, { "epoch": 0.1962515139695769, "grad_norm": 0.42772453875967764, "learning_rate": 8.980439368375423e-06, "loss": 0.6608, "step": 1195 }, { "epoch": 0.19641574117791966, "grad_norm": 0.47642585813722443, "learning_rate": 8.980395664830566e-06, "loss": 0.6469, "step": 1196 }, { "epoch": 0.1965799683862624, "grad_norm": 0.4585709399028893, "learning_rate": 8.980351912624204e-06, "loss": 0.6409, "step": 1197 }, { "epoch": 0.19674419559460513, "grad_norm": 0.4683407067017671, "learning_rate": 8.980308111756812e-06, "loss": 0.6343, "step": 1198 }, { "epoch": 0.19690842280294787, "grad_norm": 0.45598990783869575, "learning_rate": 8.980264262228865e-06, "loss": 0.6723, "step": 1199 }, { "epoch": 0.19707265001129062, "grad_norm": 0.4441138325488558, "learning_rate": 8.980220364040843e-06, "loss": 0.642, "step": 1200 }, { "epoch": 0.19723687721963337, "grad_norm": 0.4372372641941166, "learning_rate": 8.980176417193217e-06, "loss": 0.6613, "step": 1201 }, { "epoch": 0.1974011044279761, "grad_norm": 0.43527778983518434, "learning_rate": 8.980132421686467e-06, "loss": 0.6413, "step": 1202 }, { "epoch": 0.19756533163631884, "grad_norm": 0.4318187764560961, "learning_rate": 8.980088377521073e-06, "loss": 0.6488, "step": 1203 }, { "epoch": 0.1977295588446616, "grad_norm": 0.45481144116691763, "learning_rate": 8.98004428469751e-06, "loss": 0.6527, "step": 1204 }, { "epoch": 0.19789378605300434, "grad_norm": 0.5522025822262004, "learning_rate": 8.98000014321626e-06, "loss": 0.6489, "step": 1205 }, { "epoch": 0.19805801326134706, "grad_norm": 0.41859957738652587, "learning_rate": 8.9799559530778e-06, "loss": 0.6368, "step": 1206 }, { "epoch": 0.1982222404696898, "grad_norm": 0.45326825381870833, "learning_rate": 8.97991171428261e-06, "loss": 0.6596, "step": 1207 }, { "epoch": 0.19838646767803256, "grad_norm": 0.44391403918832606, "learning_rate": 8.979867426831171e-06, "loss": 0.6476, "step": 1208 }, { "epoch": 0.1985506948863753, "grad_norm": 0.43255445491586364, "learning_rate": 8.979823090723966e-06, "loss": 0.6301, "step": 1209 }, { "epoch": 0.19871492209471806, "grad_norm": 0.428228400413717, "learning_rate": 8.979778705961471e-06, "loss": 0.6779, "step": 1210 }, { "epoch": 0.19887914930306078, "grad_norm": 0.461878401668105, "learning_rate": 8.979734272544175e-06, "loss": 0.648, "step": 1211 }, { "epoch": 0.19904337651140352, "grad_norm": 0.4311866229558285, "learning_rate": 8.979689790472556e-06, "loss": 0.6456, "step": 1212 }, { "epoch": 0.19920760371974627, "grad_norm": 0.41531054144954355, "learning_rate": 8.979645259747098e-06, "loss": 0.6407, "step": 1213 }, { "epoch": 0.19937183092808902, "grad_norm": 0.4262317469216339, "learning_rate": 8.979600680368286e-06, "loss": 0.6243, "step": 1214 }, { "epoch": 0.19953605813643174, "grad_norm": 0.4314545771479194, "learning_rate": 8.979556052336605e-06, "loss": 0.6493, "step": 1215 }, { "epoch": 0.1997002853447745, "grad_norm": 0.44815070308467875, "learning_rate": 8.979511375652535e-06, "loss": 0.6438, "step": 1216 }, { "epoch": 0.19986451255311724, "grad_norm": 0.4422403594628592, "learning_rate": 8.979466650316565e-06, "loss": 0.669, "step": 1217 }, { "epoch": 0.20002873976146, "grad_norm": 0.42824265290355684, "learning_rate": 8.97942187632918e-06, "loss": 0.6579, "step": 1218 }, { "epoch": 0.2001929669698027, "grad_norm": 0.543273528497163, "learning_rate": 8.979377053690867e-06, "loss": 0.6373, "step": 1219 }, { "epoch": 0.20035719417814546, "grad_norm": 0.4230607481140885, "learning_rate": 8.979332182402111e-06, "loss": 0.6431, "step": 1220 }, { "epoch": 0.2005214213864882, "grad_norm": 0.4362433490235076, "learning_rate": 8.979287262463403e-06, "loss": 0.6501, "step": 1221 }, { "epoch": 0.20068564859483096, "grad_norm": 0.4451428401814509, "learning_rate": 8.979242293875225e-06, "loss": 0.6557, "step": 1222 }, { "epoch": 0.20084987580317368, "grad_norm": 0.4352292485824094, "learning_rate": 8.979197276638071e-06, "loss": 0.6598, "step": 1223 }, { "epoch": 0.20101410301151643, "grad_norm": 0.4261401748689992, "learning_rate": 8.979152210752427e-06, "loss": 0.6318, "step": 1224 }, { "epoch": 0.20117833021985917, "grad_norm": 0.41345651752110774, "learning_rate": 8.979107096218781e-06, "loss": 0.6701, "step": 1225 }, { "epoch": 0.20134255742820192, "grad_norm": 0.4257173704920539, "learning_rate": 8.979061933037629e-06, "loss": 0.6462, "step": 1226 }, { "epoch": 0.20150678463654467, "grad_norm": 0.4402291803372949, "learning_rate": 8.979016721209456e-06, "loss": 0.6405, "step": 1227 }, { "epoch": 0.2016710118448874, "grad_norm": 0.4149097427684321, "learning_rate": 8.978971460734753e-06, "loss": 0.6203, "step": 1228 }, { "epoch": 0.20183523905323014, "grad_norm": 0.48308972389337906, "learning_rate": 8.978926151614014e-06, "loss": 0.6642, "step": 1229 }, { "epoch": 0.2019994662615729, "grad_norm": 0.4331467017777629, "learning_rate": 8.978880793847732e-06, "loss": 0.6395, "step": 1230 }, { "epoch": 0.20216369346991564, "grad_norm": 0.43266826431991046, "learning_rate": 8.978835387436396e-06, "loss": 0.6434, "step": 1231 }, { "epoch": 0.20232792067825836, "grad_norm": 0.4460728101045431, "learning_rate": 8.978789932380501e-06, "loss": 0.6286, "step": 1232 }, { "epoch": 0.2024921478866011, "grad_norm": 0.43293087715759737, "learning_rate": 8.978744428680543e-06, "loss": 0.6546, "step": 1233 }, { "epoch": 0.20265637509494386, "grad_norm": 0.42924541630533913, "learning_rate": 8.978698876337011e-06, "loss": 0.6446, "step": 1234 }, { "epoch": 0.2028206023032866, "grad_norm": 0.4151254920163802, "learning_rate": 8.978653275350405e-06, "loss": 0.6631, "step": 1235 }, { "epoch": 0.20298482951162933, "grad_norm": 0.44360392632935175, "learning_rate": 8.978607625721219e-06, "loss": 0.6303, "step": 1236 }, { "epoch": 0.20314905671997208, "grad_norm": 0.4277824314710478, "learning_rate": 8.978561927449946e-06, "loss": 0.6467, "step": 1237 }, { "epoch": 0.20331328392831483, "grad_norm": 0.4366290288251897, "learning_rate": 8.978516180537083e-06, "loss": 0.6476, "step": 1238 }, { "epoch": 0.20347751113665757, "grad_norm": 0.42651600201106177, "learning_rate": 8.97847038498313e-06, "loss": 0.6514, "step": 1239 }, { "epoch": 0.2036417383450003, "grad_norm": 0.4251031339705655, "learning_rate": 8.978424540788583e-06, "loss": 0.6734, "step": 1240 }, { "epoch": 0.20380596555334304, "grad_norm": 0.549156680303186, "learning_rate": 8.978378647953937e-06, "loss": 0.6291, "step": 1241 }, { "epoch": 0.2039701927616858, "grad_norm": 0.4224197425754509, "learning_rate": 8.978332706479694e-06, "loss": 0.6516, "step": 1242 }, { "epoch": 0.20413441997002854, "grad_norm": 0.42435648487983524, "learning_rate": 8.978286716366352e-06, "loss": 0.6401, "step": 1243 }, { "epoch": 0.2042986471783713, "grad_norm": 0.4218890161029301, "learning_rate": 8.97824067761441e-06, "loss": 0.6332, "step": 1244 }, { "epoch": 0.204462874386714, "grad_norm": 0.4598458015905371, "learning_rate": 8.978194590224367e-06, "loss": 0.636, "step": 1245 }, { "epoch": 0.20462710159505676, "grad_norm": 0.4494394320075245, "learning_rate": 8.978148454196728e-06, "loss": 0.6628, "step": 1246 }, { "epoch": 0.2047913288033995, "grad_norm": 0.45629173674361884, "learning_rate": 8.978102269531988e-06, "loss": 0.6434, "step": 1247 }, { "epoch": 0.20495555601174226, "grad_norm": 0.42740668248808533, "learning_rate": 8.978056036230651e-06, "loss": 0.6408, "step": 1248 }, { "epoch": 0.20511978322008498, "grad_norm": 0.45719887031605844, "learning_rate": 8.978009754293221e-06, "loss": 0.6407, "step": 1249 }, { "epoch": 0.20528401042842773, "grad_norm": 0.4352265634714779, "learning_rate": 8.9779634237202e-06, "loss": 0.6371, "step": 1250 }, { "epoch": 0.20544823763677048, "grad_norm": 0.42210549828374605, "learning_rate": 8.97791704451209e-06, "loss": 0.6408, "step": 1251 }, { "epoch": 0.20561246484511322, "grad_norm": 0.40181174335862413, "learning_rate": 8.977870616669395e-06, "loss": 0.6494, "step": 1252 }, { "epoch": 0.20577669205345595, "grad_norm": 0.40188184471409283, "learning_rate": 8.97782414019262e-06, "loss": 0.6593, "step": 1253 }, { "epoch": 0.2059409192617987, "grad_norm": 0.410659768636049, "learning_rate": 8.977777615082268e-06, "loss": 0.6452, "step": 1254 }, { "epoch": 0.20610514647014144, "grad_norm": 0.4235122270844502, "learning_rate": 8.977731041338847e-06, "loss": 0.6438, "step": 1255 }, { "epoch": 0.2062693736784842, "grad_norm": 0.44846490809871864, "learning_rate": 8.97768441896286e-06, "loss": 0.6454, "step": 1256 }, { "epoch": 0.2064336008868269, "grad_norm": 0.42416716591523856, "learning_rate": 8.977637747954815e-06, "loss": 0.6342, "step": 1257 }, { "epoch": 0.20659782809516966, "grad_norm": 0.4198893083929536, "learning_rate": 8.977591028315221e-06, "loss": 0.6533, "step": 1258 }, { "epoch": 0.2067620553035124, "grad_norm": 0.4455138571329966, "learning_rate": 8.97754426004458e-06, "loss": 0.6586, "step": 1259 }, { "epoch": 0.20692628251185516, "grad_norm": 0.4142993504597112, "learning_rate": 8.977497443143405e-06, "loss": 0.6525, "step": 1260 }, { "epoch": 0.2070905097201979, "grad_norm": 0.42824370833772935, "learning_rate": 8.9774505776122e-06, "loss": 0.6228, "step": 1261 }, { "epoch": 0.20725473692854063, "grad_norm": 0.424782198551827, "learning_rate": 8.977403663451478e-06, "loss": 0.6432, "step": 1262 }, { "epoch": 0.20741896413688338, "grad_norm": 0.41832663825833505, "learning_rate": 8.977356700661749e-06, "loss": 0.6567, "step": 1263 }, { "epoch": 0.20758319134522613, "grad_norm": 0.45774058372764936, "learning_rate": 8.977309689243519e-06, "loss": 0.6594, "step": 1264 }, { "epoch": 0.20774741855356887, "grad_norm": 0.4315936364515443, "learning_rate": 8.9772626291973e-06, "loss": 0.6348, "step": 1265 }, { "epoch": 0.2079116457619116, "grad_norm": 0.4070764707847252, "learning_rate": 8.977215520523605e-06, "loss": 0.6427, "step": 1266 }, { "epoch": 0.20807587297025434, "grad_norm": 0.4162334688331903, "learning_rate": 8.977168363222944e-06, "loss": 0.6605, "step": 1267 }, { "epoch": 0.2082401001785971, "grad_norm": 0.4053266968546352, "learning_rate": 8.977121157295831e-06, "loss": 0.6579, "step": 1268 }, { "epoch": 0.20840432738693984, "grad_norm": 0.43978146816965014, "learning_rate": 8.977073902742775e-06, "loss": 0.6334, "step": 1269 }, { "epoch": 0.20856855459528256, "grad_norm": 0.40390746582339854, "learning_rate": 8.977026599564294e-06, "loss": 0.6526, "step": 1270 }, { "epoch": 0.2087327818036253, "grad_norm": 0.3978119140838034, "learning_rate": 8.976979247760898e-06, "loss": 0.6379, "step": 1271 }, { "epoch": 0.20889700901196806, "grad_norm": 0.400050972879593, "learning_rate": 8.976931847333104e-06, "loss": 0.634, "step": 1272 }, { "epoch": 0.2090612362203108, "grad_norm": 0.43290124573046884, "learning_rate": 8.976884398281424e-06, "loss": 0.6635, "step": 1273 }, { "epoch": 0.20922546342865353, "grad_norm": 0.4087546614663093, "learning_rate": 8.976836900606375e-06, "loss": 0.6506, "step": 1274 }, { "epoch": 0.20938969063699628, "grad_norm": 0.3974013674929746, "learning_rate": 8.976789354308471e-06, "loss": 0.6425, "step": 1275 }, { "epoch": 0.20955391784533903, "grad_norm": 0.519176836639908, "learning_rate": 8.976741759388233e-06, "loss": 0.6533, "step": 1276 }, { "epoch": 0.20971814505368178, "grad_norm": 0.4212920607770516, "learning_rate": 8.976694115846174e-06, "loss": 0.6414, "step": 1277 }, { "epoch": 0.20988237226202452, "grad_norm": 0.42102429097091515, "learning_rate": 8.976646423682811e-06, "loss": 0.6277, "step": 1278 }, { "epoch": 0.21004659947036725, "grad_norm": 0.4265483499282997, "learning_rate": 8.976598682898665e-06, "loss": 0.6527, "step": 1279 }, { "epoch": 0.21021082667871, "grad_norm": 0.44527848666988024, "learning_rate": 8.976550893494252e-06, "loss": 0.6275, "step": 1280 }, { "epoch": 0.21037505388705274, "grad_norm": 0.4074471380026174, "learning_rate": 8.976503055470093e-06, "loss": 0.6325, "step": 1281 }, { "epoch": 0.2105392810953955, "grad_norm": 0.42635054736052974, "learning_rate": 8.976455168826705e-06, "loss": 0.6404, "step": 1282 }, { "epoch": 0.2107035083037382, "grad_norm": 0.5478927850514103, "learning_rate": 8.97640723356461e-06, "loss": 0.6493, "step": 1283 }, { "epoch": 0.21086773551208096, "grad_norm": 0.4516746938788672, "learning_rate": 8.976359249684329e-06, "loss": 0.6431, "step": 1284 }, { "epoch": 0.2110319627204237, "grad_norm": 0.4591186560356215, "learning_rate": 8.976311217186384e-06, "loss": 0.6636, "step": 1285 }, { "epoch": 0.21119618992876646, "grad_norm": 0.42161671968749, "learning_rate": 8.976263136071294e-06, "loss": 0.6388, "step": 1286 }, { "epoch": 0.21136041713710918, "grad_norm": 0.4294902286359102, "learning_rate": 8.97621500633958e-06, "loss": 0.6307, "step": 1287 }, { "epoch": 0.21152464434545193, "grad_norm": 0.47729600920882603, "learning_rate": 8.97616682799177e-06, "loss": 0.664, "step": 1288 }, { "epoch": 0.21168887155379468, "grad_norm": 0.40274636860562796, "learning_rate": 8.976118601028382e-06, "loss": 0.6676, "step": 1289 }, { "epoch": 0.21185309876213743, "grad_norm": 0.41214219381854394, "learning_rate": 8.976070325449942e-06, "loss": 0.6405, "step": 1290 }, { "epoch": 0.21201732597048015, "grad_norm": 0.41848751377730636, "learning_rate": 8.976022001256977e-06, "loss": 0.6483, "step": 1291 }, { "epoch": 0.2121815531788229, "grad_norm": 0.42500778911619475, "learning_rate": 8.975973628450006e-06, "loss": 0.6406, "step": 1292 }, { "epoch": 0.21234578038716564, "grad_norm": 0.4119091349723487, "learning_rate": 8.97592520702956e-06, "loss": 0.6458, "step": 1293 }, { "epoch": 0.2125100075955084, "grad_norm": 0.44255627064099584, "learning_rate": 8.975876736996163e-06, "loss": 0.64, "step": 1294 }, { "epoch": 0.21267423480385111, "grad_norm": 0.43848087034148364, "learning_rate": 8.97582821835034e-06, "loss": 0.6463, "step": 1295 }, { "epoch": 0.21283846201219386, "grad_norm": 0.42192516605366814, "learning_rate": 8.975779651092618e-06, "loss": 0.6413, "step": 1296 }, { "epoch": 0.2130026892205366, "grad_norm": 0.4323607525943481, "learning_rate": 8.975731035223526e-06, "loss": 0.6407, "step": 1297 }, { "epoch": 0.21316691642887936, "grad_norm": 0.3968768309143591, "learning_rate": 8.975682370743592e-06, "loss": 0.626, "step": 1298 }, { "epoch": 0.2133311436372221, "grad_norm": 0.40895837748550445, "learning_rate": 8.975633657653344e-06, "loss": 0.6436, "step": 1299 }, { "epoch": 0.21349537084556483, "grad_norm": 0.4110881048866647, "learning_rate": 8.97558489595331e-06, "loss": 0.6084, "step": 1300 }, { "epoch": 0.21365959805390758, "grad_norm": 0.7065067394013022, "learning_rate": 8.975536085644022e-06, "loss": 0.6485, "step": 1301 }, { "epoch": 0.21382382526225033, "grad_norm": 0.5630818303342476, "learning_rate": 8.975487226726007e-06, "loss": 0.6415, "step": 1302 }, { "epoch": 0.21398805247059308, "grad_norm": 0.42991690147388806, "learning_rate": 8.975438319199798e-06, "loss": 0.6244, "step": 1303 }, { "epoch": 0.2141522796789358, "grad_norm": 0.3916322356920595, "learning_rate": 8.975389363065928e-06, "loss": 0.6474, "step": 1304 }, { "epoch": 0.21431650688727855, "grad_norm": 0.4040633626617589, "learning_rate": 8.975340358324925e-06, "loss": 0.6482, "step": 1305 }, { "epoch": 0.2144807340956213, "grad_norm": 0.39442006030905546, "learning_rate": 8.97529130497732e-06, "loss": 0.6329, "step": 1306 }, { "epoch": 0.21464496130396404, "grad_norm": 0.628544271221126, "learning_rate": 8.975242203023652e-06, "loss": 0.6529, "step": 1307 }, { "epoch": 0.21480918851230676, "grad_norm": 0.38401898520257327, "learning_rate": 8.97519305246445e-06, "loss": 0.6534, "step": 1308 }, { "epoch": 0.2149734157206495, "grad_norm": 0.39648552275477433, "learning_rate": 8.975143853300246e-06, "loss": 0.6424, "step": 1309 }, { "epoch": 0.21513764292899226, "grad_norm": 0.3998211133640743, "learning_rate": 8.975094605531577e-06, "loss": 0.6135, "step": 1310 }, { "epoch": 0.215301870137335, "grad_norm": 0.41497948541509916, "learning_rate": 8.975045309158978e-06, "loss": 0.6564, "step": 1311 }, { "epoch": 0.21546609734567773, "grad_norm": 0.4072266315384676, "learning_rate": 8.974995964182987e-06, "loss": 0.6239, "step": 1312 }, { "epoch": 0.21563032455402048, "grad_norm": 0.4158843054553293, "learning_rate": 8.974946570604135e-06, "loss": 0.6197, "step": 1313 }, { "epoch": 0.21579455176236323, "grad_norm": 0.42555213454131813, "learning_rate": 8.97489712842296e-06, "loss": 0.6357, "step": 1314 }, { "epoch": 0.21595877897070598, "grad_norm": 0.4344847335024281, "learning_rate": 8.97484763764e-06, "loss": 0.6538, "step": 1315 }, { "epoch": 0.21612300617904873, "grad_norm": 0.40231338795547006, "learning_rate": 8.974798098255793e-06, "loss": 0.6438, "step": 1316 }, { "epoch": 0.21628723338739145, "grad_norm": 0.401018844856692, "learning_rate": 8.974748510270874e-06, "loss": 0.6219, "step": 1317 }, { "epoch": 0.2164514605957342, "grad_norm": 0.3879562862518085, "learning_rate": 8.974698873685786e-06, "loss": 0.6432, "step": 1318 }, { "epoch": 0.21661568780407695, "grad_norm": 0.3916001285671798, "learning_rate": 8.974649188501065e-06, "loss": 0.6266, "step": 1319 }, { "epoch": 0.2167799150124197, "grad_norm": 0.43067968795936457, "learning_rate": 8.974599454717248e-06, "loss": 0.6507, "step": 1320 }, { "epoch": 0.21694414222076241, "grad_norm": 0.3984580712524322, "learning_rate": 8.974549672334883e-06, "loss": 0.6419, "step": 1321 }, { "epoch": 0.21710836942910516, "grad_norm": 0.46140477849198946, "learning_rate": 8.974499841354504e-06, "loss": 0.6416, "step": 1322 }, { "epoch": 0.2172725966374479, "grad_norm": 0.459126770981474, "learning_rate": 8.974449961776656e-06, "loss": 0.6478, "step": 1323 }, { "epoch": 0.21743682384579066, "grad_norm": 0.46092127260237753, "learning_rate": 8.974400033601878e-06, "loss": 0.6424, "step": 1324 }, { "epoch": 0.21760105105413338, "grad_norm": 0.39361748300881266, "learning_rate": 8.974350056830712e-06, "loss": 0.6444, "step": 1325 }, { "epoch": 0.21776527826247613, "grad_norm": 0.4046619648292167, "learning_rate": 8.974300031463704e-06, "loss": 0.6576, "step": 1326 }, { "epoch": 0.21792950547081888, "grad_norm": 0.37804314723939025, "learning_rate": 8.974249957501395e-06, "loss": 0.6309, "step": 1327 }, { "epoch": 0.21809373267916163, "grad_norm": 0.5167184663185815, "learning_rate": 8.97419983494433e-06, "loss": 0.643, "step": 1328 }, { "epoch": 0.21825795988750435, "grad_norm": 0.37870371535857766, "learning_rate": 8.974149663793053e-06, "loss": 0.6322, "step": 1329 }, { "epoch": 0.2184221870958471, "grad_norm": 0.39915021911898146, "learning_rate": 8.974099444048108e-06, "loss": 0.6457, "step": 1330 }, { "epoch": 0.21858641430418985, "grad_norm": 0.4006828964084616, "learning_rate": 8.97404917571004e-06, "loss": 0.6247, "step": 1331 }, { "epoch": 0.2187506415125326, "grad_norm": 0.40900509712567223, "learning_rate": 8.973998858779397e-06, "loss": 0.6388, "step": 1332 }, { "epoch": 0.21891486872087534, "grad_norm": 0.454939516696798, "learning_rate": 8.973948493256727e-06, "loss": 0.629, "step": 1333 }, { "epoch": 0.21907909592921807, "grad_norm": 0.37761379656488037, "learning_rate": 8.973898079142573e-06, "loss": 0.6359, "step": 1334 }, { "epoch": 0.2192433231375608, "grad_norm": 0.4672563224075992, "learning_rate": 8.973847616437483e-06, "loss": 0.6387, "step": 1335 }, { "epoch": 0.21940755034590356, "grad_norm": 0.3981227122118326, "learning_rate": 8.973797105142005e-06, "loss": 0.6317, "step": 1336 }, { "epoch": 0.2195717775542463, "grad_norm": 0.45785924142754797, "learning_rate": 8.973746545256692e-06, "loss": 0.6275, "step": 1337 }, { "epoch": 0.21973600476258903, "grad_norm": 0.40208418946268193, "learning_rate": 8.973695936782088e-06, "loss": 0.6649, "step": 1338 }, { "epoch": 0.21990023197093178, "grad_norm": 0.4196465854058847, "learning_rate": 8.973645279718746e-06, "loss": 0.65, "step": 1339 }, { "epoch": 0.22006445917927453, "grad_norm": 0.39709401064649014, "learning_rate": 8.973594574067214e-06, "loss": 0.6219, "step": 1340 }, { "epoch": 0.22022868638761728, "grad_norm": 0.4317979361442743, "learning_rate": 8.973543819828042e-06, "loss": 0.6352, "step": 1341 }, { "epoch": 0.22039291359596, "grad_norm": 0.3996012626147995, "learning_rate": 8.973493017001785e-06, "loss": 0.6504, "step": 1342 }, { "epoch": 0.22055714080430275, "grad_norm": 0.401605897362349, "learning_rate": 8.973442165588993e-06, "loss": 0.6564, "step": 1343 }, { "epoch": 0.2207213680126455, "grad_norm": 0.4211815055879324, "learning_rate": 8.973391265590215e-06, "loss": 0.6377, "step": 1344 }, { "epoch": 0.22088559522098825, "grad_norm": 0.4147590208542743, "learning_rate": 8.97334031700601e-06, "loss": 0.6358, "step": 1345 }, { "epoch": 0.22104982242933097, "grad_norm": 0.4041410755841317, "learning_rate": 8.973289319836924e-06, "loss": 0.6345, "step": 1346 }, { "epoch": 0.22121404963767372, "grad_norm": 0.4419733735874853, "learning_rate": 8.973238274083517e-06, "loss": 0.6309, "step": 1347 }, { "epoch": 0.22137827684601646, "grad_norm": 0.4438396420085765, "learning_rate": 8.973187179746341e-06, "loss": 0.6456, "step": 1348 }, { "epoch": 0.2215425040543592, "grad_norm": 0.40548180198977596, "learning_rate": 8.973136036825952e-06, "loss": 0.6448, "step": 1349 }, { "epoch": 0.22170673126270196, "grad_norm": 0.3776693195760448, "learning_rate": 8.973084845322905e-06, "loss": 0.6267, "step": 1350 }, { "epoch": 0.22187095847104468, "grad_norm": 0.3914134341854077, "learning_rate": 8.973033605237754e-06, "loss": 0.6064, "step": 1351 }, { "epoch": 0.22203518567938743, "grad_norm": 0.4008878109833888, "learning_rate": 8.972982316571059e-06, "loss": 0.6412, "step": 1352 }, { "epoch": 0.22219941288773018, "grad_norm": 0.3947443304069219, "learning_rate": 8.972930979323373e-06, "loss": 0.6296, "step": 1353 }, { "epoch": 0.22236364009607293, "grad_norm": 0.40428731395639134, "learning_rate": 8.972879593495257e-06, "loss": 0.6468, "step": 1354 }, { "epoch": 0.22252786730441565, "grad_norm": 0.42559973840758325, "learning_rate": 8.972828159087268e-06, "loss": 0.636, "step": 1355 }, { "epoch": 0.2226920945127584, "grad_norm": 0.38558193013481934, "learning_rate": 8.972776676099965e-06, "loss": 0.6267, "step": 1356 }, { "epoch": 0.22285632172110115, "grad_norm": 0.3942742258512122, "learning_rate": 8.972725144533905e-06, "loss": 0.6095, "step": 1357 }, { "epoch": 0.2230205489294439, "grad_norm": 0.39813829783312527, "learning_rate": 8.972673564389651e-06, "loss": 0.6206, "step": 1358 }, { "epoch": 0.22318477613778662, "grad_norm": 0.424281039810817, "learning_rate": 8.972621935667763e-06, "loss": 0.6455, "step": 1359 }, { "epoch": 0.22334900334612937, "grad_norm": 0.3908176395636074, "learning_rate": 8.972570258368797e-06, "loss": 0.6242, "step": 1360 }, { "epoch": 0.22351323055447211, "grad_norm": 0.4052328537533125, "learning_rate": 8.972518532493319e-06, "loss": 0.6209, "step": 1361 }, { "epoch": 0.22367745776281486, "grad_norm": 0.39202858379796474, "learning_rate": 8.97246675804189e-06, "loss": 0.6256, "step": 1362 }, { "epoch": 0.22384168497115758, "grad_norm": 0.3800992157787325, "learning_rate": 8.97241493501507e-06, "loss": 0.6232, "step": 1363 }, { "epoch": 0.22400591217950033, "grad_norm": 0.3889099579798512, "learning_rate": 8.972363063413424e-06, "loss": 0.642, "step": 1364 }, { "epoch": 0.22417013938784308, "grad_norm": 0.43843337554692713, "learning_rate": 8.972311143237516e-06, "loss": 0.6309, "step": 1365 }, { "epoch": 0.22433436659618583, "grad_norm": 0.38555050369799626, "learning_rate": 8.972259174487908e-06, "loss": 0.6175, "step": 1366 }, { "epoch": 0.22449859380452858, "grad_norm": 0.3954594559012571, "learning_rate": 8.972207157165167e-06, "loss": 0.6332, "step": 1367 }, { "epoch": 0.2246628210128713, "grad_norm": 0.397471975775458, "learning_rate": 8.972155091269854e-06, "loss": 0.6291, "step": 1368 }, { "epoch": 0.22482704822121405, "grad_norm": 0.3954234981146683, "learning_rate": 8.972102976802537e-06, "loss": 0.6497, "step": 1369 }, { "epoch": 0.2249912754295568, "grad_norm": 0.4398435558984372, "learning_rate": 8.972050813763783e-06, "loss": 0.6282, "step": 1370 }, { "epoch": 0.22515550263789955, "grad_norm": 1.1371437397465023, "learning_rate": 8.971998602154156e-06, "loss": 0.6283, "step": 1371 }, { "epoch": 0.22531972984624227, "grad_norm": 0.42734572284380834, "learning_rate": 8.971946341974225e-06, "loss": 0.6498, "step": 1372 }, { "epoch": 0.22548395705458502, "grad_norm": 0.372006683716092, "learning_rate": 8.971894033224556e-06, "loss": 0.6317, "step": 1373 }, { "epoch": 0.22564818426292776, "grad_norm": 0.5365999450195464, "learning_rate": 8.97184167590572e-06, "loss": 0.6398, "step": 1374 }, { "epoch": 0.2258124114712705, "grad_norm": 0.4513109754424501, "learning_rate": 8.971789270018282e-06, "loss": 0.6516, "step": 1375 }, { "epoch": 0.22597663867961323, "grad_norm": 0.3876777628138284, "learning_rate": 8.971736815562813e-06, "loss": 0.6239, "step": 1376 }, { "epoch": 0.22614086588795598, "grad_norm": 0.39030172173320044, "learning_rate": 8.971684312539884e-06, "loss": 0.6362, "step": 1377 }, { "epoch": 0.22630509309629873, "grad_norm": 0.5022741874530742, "learning_rate": 8.971631760950062e-06, "loss": 0.6391, "step": 1378 }, { "epoch": 0.22646932030464148, "grad_norm": 0.3966665973762089, "learning_rate": 8.971579160793921e-06, "loss": 0.6279, "step": 1379 }, { "epoch": 0.2266335475129842, "grad_norm": 0.46552566540421336, "learning_rate": 8.971526512072028e-06, "loss": 0.633, "step": 1380 }, { "epoch": 0.22679777472132695, "grad_norm": 0.3887481021652706, "learning_rate": 8.971473814784961e-06, "loss": 0.6316, "step": 1381 }, { "epoch": 0.2269620019296697, "grad_norm": 0.3981403897977426, "learning_rate": 8.971421068933289e-06, "loss": 0.6297, "step": 1382 }, { "epoch": 0.22712622913801245, "grad_norm": 0.3760925511936663, "learning_rate": 8.971368274517584e-06, "loss": 0.6518, "step": 1383 }, { "epoch": 0.22729045634635517, "grad_norm": 0.41634568057723575, "learning_rate": 8.971315431538419e-06, "loss": 0.6507, "step": 1384 }, { "epoch": 0.22745468355469792, "grad_norm": 0.3767648106691863, "learning_rate": 8.971262539996371e-06, "loss": 0.6219, "step": 1385 }, { "epoch": 0.22761891076304067, "grad_norm": 0.37280796908592456, "learning_rate": 8.971209599892012e-06, "loss": 0.63, "step": 1386 }, { "epoch": 0.22778313797138341, "grad_norm": 0.39063293180757414, "learning_rate": 8.971156611225918e-06, "loss": 0.6354, "step": 1387 }, { "epoch": 0.22794736517972616, "grad_norm": 0.4130871584775029, "learning_rate": 8.971103573998664e-06, "loss": 0.6215, "step": 1388 }, { "epoch": 0.22811159238806888, "grad_norm": 0.3697250690243639, "learning_rate": 8.971050488210827e-06, "loss": 0.6509, "step": 1389 }, { "epoch": 0.22827581959641163, "grad_norm": 0.40985315624852925, "learning_rate": 8.97099735386298e-06, "loss": 0.6223, "step": 1390 }, { "epoch": 0.22844004680475438, "grad_norm": 0.41753388559467336, "learning_rate": 8.970944170955705e-06, "loss": 0.6308, "step": 1391 }, { "epoch": 0.22860427401309713, "grad_norm": 0.37172418439885546, "learning_rate": 8.970890939489577e-06, "loss": 0.6378, "step": 1392 }, { "epoch": 0.22876850122143985, "grad_norm": 0.3799761140290008, "learning_rate": 8.970837659465175e-06, "loss": 0.62, "step": 1393 }, { "epoch": 0.2289327284297826, "grad_norm": 0.3637974123542842, "learning_rate": 8.970784330883077e-06, "loss": 0.6414, "step": 1394 }, { "epoch": 0.22909695563812535, "grad_norm": 0.39145712168539487, "learning_rate": 8.970730953743865e-06, "loss": 0.6259, "step": 1395 }, { "epoch": 0.2292611828464681, "grad_norm": 0.3747433458407329, "learning_rate": 8.970677528048112e-06, "loss": 0.629, "step": 1396 }, { "epoch": 0.22942541005481082, "grad_norm": 0.40593368367010124, "learning_rate": 8.970624053796405e-06, "loss": 0.6545, "step": 1397 }, { "epoch": 0.22958963726315357, "grad_norm": 0.3814870268904145, "learning_rate": 8.970570530989322e-06, "loss": 0.6356, "step": 1398 }, { "epoch": 0.22975386447149632, "grad_norm": 0.37393431918398545, "learning_rate": 8.970516959627445e-06, "loss": 0.6149, "step": 1399 }, { "epoch": 0.22991809167983907, "grad_norm": 0.4234768187758134, "learning_rate": 8.970463339711354e-06, "loss": 0.6213, "step": 1400 }, { "epoch": 0.23008231888818179, "grad_norm": 0.39010381977649883, "learning_rate": 8.970409671241635e-06, "loss": 0.6239, "step": 1401 }, { "epoch": 0.23024654609652453, "grad_norm": 0.4059667118236035, "learning_rate": 8.970355954218866e-06, "loss": 0.6457, "step": 1402 }, { "epoch": 0.23041077330486728, "grad_norm": 0.36347685311176176, "learning_rate": 8.970302188643634e-06, "loss": 0.6366, "step": 1403 }, { "epoch": 0.23057500051321003, "grad_norm": 0.4016516068154081, "learning_rate": 8.970248374516523e-06, "loss": 0.6155, "step": 1404 }, { "epoch": 0.23073922772155278, "grad_norm": 0.3738165759888122, "learning_rate": 8.970194511838116e-06, "loss": 0.6297, "step": 1405 }, { "epoch": 0.2309034549298955, "grad_norm": 0.38775548720178016, "learning_rate": 8.970140600608998e-06, "loss": 0.631, "step": 1406 }, { "epoch": 0.23106768213823825, "grad_norm": 0.36651163646368323, "learning_rate": 8.970086640829755e-06, "loss": 0.6235, "step": 1407 }, { "epoch": 0.231231909346581, "grad_norm": 0.388177161793234, "learning_rate": 8.970032632500974e-06, "loss": 0.6447, "step": 1408 }, { "epoch": 0.23139613655492375, "grad_norm": 0.38342963166847494, "learning_rate": 8.96997857562324e-06, "loss": 0.6446, "step": 1409 }, { "epoch": 0.23156036376326647, "grad_norm": 0.35965242348385945, "learning_rate": 8.969924470197141e-06, "loss": 0.6211, "step": 1410 }, { "epoch": 0.23172459097160922, "grad_norm": 0.40581904246250583, "learning_rate": 8.969870316223264e-06, "loss": 0.6089, "step": 1411 }, { "epoch": 0.23188881817995197, "grad_norm": 0.36406413748044436, "learning_rate": 8.969816113702198e-06, "loss": 0.6302, "step": 1412 }, { "epoch": 0.23205304538829472, "grad_norm": 0.36016891310093596, "learning_rate": 8.969761862634532e-06, "loss": 0.6301, "step": 1413 }, { "epoch": 0.23221727259663744, "grad_norm": 0.38497253061331427, "learning_rate": 8.969707563020854e-06, "loss": 0.6381, "step": 1414 }, { "epoch": 0.23238149980498018, "grad_norm": 0.3728601291907777, "learning_rate": 8.969653214861753e-06, "loss": 0.6181, "step": 1415 }, { "epoch": 0.23254572701332293, "grad_norm": 0.3667182768200828, "learning_rate": 8.969598818157824e-06, "loss": 0.6175, "step": 1416 }, { "epoch": 0.23270995422166568, "grad_norm": 0.40991591837455477, "learning_rate": 8.969544372909651e-06, "loss": 0.6357, "step": 1417 }, { "epoch": 0.2328741814300084, "grad_norm": 0.3830261589112887, "learning_rate": 8.96948987911783e-06, "loss": 0.6263, "step": 1418 }, { "epoch": 0.23303840863835115, "grad_norm": 0.37342955461632715, "learning_rate": 8.969435336782951e-06, "loss": 0.651, "step": 1419 }, { "epoch": 0.2332026358466939, "grad_norm": 0.38302602639530275, "learning_rate": 8.969380745905607e-06, "loss": 0.6147, "step": 1420 }, { "epoch": 0.23336686305503665, "grad_norm": 0.3801283833275303, "learning_rate": 8.969326106486392e-06, "loss": 0.5939, "step": 1421 }, { "epoch": 0.2335310902633794, "grad_norm": 0.3829133639241409, "learning_rate": 8.969271418525897e-06, "loss": 0.6029, "step": 1422 }, { "epoch": 0.23369531747172212, "grad_norm": 0.37812529413156654, "learning_rate": 8.969216682024718e-06, "loss": 0.6403, "step": 1423 }, { "epoch": 0.23385954468006487, "grad_norm": 0.37094348293191975, "learning_rate": 8.969161896983448e-06, "loss": 0.6171, "step": 1424 }, { "epoch": 0.23402377188840762, "grad_norm": 0.35854811021498156, "learning_rate": 8.969107063402682e-06, "loss": 0.6213, "step": 1425 }, { "epoch": 0.23418799909675037, "grad_norm": 0.3600943551683208, "learning_rate": 8.969052181283017e-06, "loss": 0.6146, "step": 1426 }, { "epoch": 0.2343522263050931, "grad_norm": 0.37828891860958147, "learning_rate": 8.968997250625048e-06, "loss": 0.6205, "step": 1427 }, { "epoch": 0.23451645351343584, "grad_norm": 0.3753629445499872, "learning_rate": 8.968942271429375e-06, "loss": 0.6267, "step": 1428 }, { "epoch": 0.23468068072177858, "grad_norm": 0.35650819622201213, "learning_rate": 8.968887243696589e-06, "loss": 0.6282, "step": 1429 }, { "epoch": 0.23484490793012133, "grad_norm": 0.38875352987788536, "learning_rate": 8.96883216742729e-06, "loss": 0.6156, "step": 1430 }, { "epoch": 0.23500913513846405, "grad_norm": 0.3817434000607439, "learning_rate": 8.96877704262208e-06, "loss": 0.6388, "step": 1431 }, { "epoch": 0.2351733623468068, "grad_norm": 0.3873016030270038, "learning_rate": 8.968721869281552e-06, "loss": 0.6163, "step": 1432 }, { "epoch": 0.23533758955514955, "grad_norm": 0.4079351706434482, "learning_rate": 8.96866664740631e-06, "loss": 0.6485, "step": 1433 }, { "epoch": 0.2355018167634923, "grad_norm": 0.38317014691327517, "learning_rate": 8.968611376996949e-06, "loss": 0.5968, "step": 1434 }, { "epoch": 0.23566604397183502, "grad_norm": 0.3862892534452918, "learning_rate": 8.968556058054075e-06, "loss": 0.6228, "step": 1435 }, { "epoch": 0.23583027118017777, "grad_norm": 0.3513593263777787, "learning_rate": 8.968500690578285e-06, "loss": 0.6188, "step": 1436 }, { "epoch": 0.23599449838852052, "grad_norm": 0.36867544120784373, "learning_rate": 8.968445274570179e-06, "loss": 0.6263, "step": 1437 }, { "epoch": 0.23615872559686327, "grad_norm": 0.36938216239379995, "learning_rate": 8.968389810030362e-06, "loss": 0.6164, "step": 1438 }, { "epoch": 0.23632295280520602, "grad_norm": 0.3727449831108178, "learning_rate": 8.968334296959436e-06, "loss": 0.6085, "step": 1439 }, { "epoch": 0.23648718001354874, "grad_norm": 0.3662499381596673, "learning_rate": 8.968278735358003e-06, "loss": 0.6023, "step": 1440 }, { "epoch": 0.23665140722189149, "grad_norm": 0.40628718930364516, "learning_rate": 8.968223125226667e-06, "loss": 0.6171, "step": 1441 }, { "epoch": 0.23681563443023423, "grad_norm": 0.37037082262637283, "learning_rate": 8.96816746656603e-06, "loss": 0.6368, "step": 1442 }, { "epoch": 0.23697986163857698, "grad_norm": 0.3549645143515296, "learning_rate": 8.968111759376699e-06, "loss": 0.6334, "step": 1443 }, { "epoch": 0.2371440888469197, "grad_norm": 0.3798618217383637, "learning_rate": 8.96805600365928e-06, "loss": 0.6191, "step": 1444 }, { "epoch": 0.23730831605526245, "grad_norm": 0.36686801549322573, "learning_rate": 8.968000199414376e-06, "loss": 0.6356, "step": 1445 }, { "epoch": 0.2374725432636052, "grad_norm": 0.35942437209207495, "learning_rate": 8.967944346642592e-06, "loss": 0.6144, "step": 1446 }, { "epoch": 0.23763677047194795, "grad_norm": 0.36600431669189964, "learning_rate": 8.96788844534454e-06, "loss": 0.6198, "step": 1447 }, { "epoch": 0.23780099768029067, "grad_norm": 0.367127385380099, "learning_rate": 8.967832495520822e-06, "loss": 0.6204, "step": 1448 }, { "epoch": 0.23796522488863342, "grad_norm": 0.37739288988592845, "learning_rate": 8.967776497172046e-06, "loss": 0.6286, "step": 1449 }, { "epoch": 0.23812945209697617, "grad_norm": 0.3694809371169283, "learning_rate": 8.967720450298822e-06, "loss": 0.6329, "step": 1450 }, { "epoch": 0.23829367930531892, "grad_norm": 0.39873053139399506, "learning_rate": 8.967664354901759e-06, "loss": 0.607, "step": 1451 }, { "epoch": 0.23845790651366164, "grad_norm": 0.37174093067137187, "learning_rate": 8.967608210981466e-06, "loss": 0.6297, "step": 1452 }, { "epoch": 0.2386221337220044, "grad_norm": 0.3655257783754846, "learning_rate": 8.967552018538552e-06, "loss": 0.614, "step": 1453 }, { "epoch": 0.23878636093034714, "grad_norm": 0.3486334444422297, "learning_rate": 8.967495777573626e-06, "loss": 0.612, "step": 1454 }, { "epoch": 0.23895058813868988, "grad_norm": 0.38653414135810354, "learning_rate": 8.967439488087303e-06, "loss": 0.6373, "step": 1455 }, { "epoch": 0.23911481534703263, "grad_norm": 0.4166657864417276, "learning_rate": 8.967383150080191e-06, "loss": 0.6172, "step": 1456 }, { "epoch": 0.23927904255537535, "grad_norm": 0.36625495836800076, "learning_rate": 8.967326763552901e-06, "loss": 0.6105, "step": 1457 }, { "epoch": 0.2394432697637181, "grad_norm": 0.35693092319100034, "learning_rate": 8.96727032850605e-06, "loss": 0.6361, "step": 1458 }, { "epoch": 0.23960749697206085, "grad_norm": 0.4668329077344547, "learning_rate": 8.967213844940246e-06, "loss": 0.6507, "step": 1459 }, { "epoch": 0.2397717241804036, "grad_norm": 0.36293669801834133, "learning_rate": 8.967157312856105e-06, "loss": 0.629, "step": 1460 }, { "epoch": 0.23993595138874632, "grad_norm": 0.39226552096477374, "learning_rate": 8.96710073225424e-06, "loss": 0.6246, "step": 1461 }, { "epoch": 0.24010017859708907, "grad_norm": 0.3767099803854531, "learning_rate": 8.967044103135266e-06, "loss": 0.6157, "step": 1462 }, { "epoch": 0.24026440580543182, "grad_norm": 0.3738464526239834, "learning_rate": 8.966987425499798e-06, "loss": 0.6338, "step": 1463 }, { "epoch": 0.24042863301377457, "grad_norm": 0.3924322055790276, "learning_rate": 8.966930699348453e-06, "loss": 0.6097, "step": 1464 }, { "epoch": 0.2405928602221173, "grad_norm": 0.40128496672548014, "learning_rate": 8.966873924681845e-06, "loss": 0.6409, "step": 1465 }, { "epoch": 0.24075708743046004, "grad_norm": 0.35113853348291707, "learning_rate": 8.96681710150059e-06, "loss": 0.6347, "step": 1466 }, { "epoch": 0.24092131463880279, "grad_norm": 0.3820894843457969, "learning_rate": 8.96676022980531e-06, "loss": 0.6058, "step": 1467 }, { "epoch": 0.24108554184714553, "grad_norm": 0.36199118035878114, "learning_rate": 8.966703309596615e-06, "loss": 0.6356, "step": 1468 }, { "epoch": 0.24124976905548826, "grad_norm": 0.3540001836120606, "learning_rate": 8.966646340875129e-06, "loss": 0.619, "step": 1469 }, { "epoch": 0.241413996263831, "grad_norm": 0.38920950337400373, "learning_rate": 8.96658932364147e-06, "loss": 0.6122, "step": 1470 }, { "epoch": 0.24157822347217375, "grad_norm": 0.35862870218740506, "learning_rate": 8.966532257896256e-06, "loss": 0.5999, "step": 1471 }, { "epoch": 0.2417424506805165, "grad_norm": 0.40346544693249303, "learning_rate": 8.966475143640108e-06, "loss": 0.6389, "step": 1472 }, { "epoch": 0.24190667788885925, "grad_norm": 0.35802924018053983, "learning_rate": 8.966417980873644e-06, "loss": 0.656, "step": 1473 }, { "epoch": 0.24207090509720197, "grad_norm": 0.35416797749163403, "learning_rate": 8.966360769597487e-06, "loss": 0.5871, "step": 1474 }, { "epoch": 0.24223513230554472, "grad_norm": 0.38394310761980127, "learning_rate": 8.966303509812259e-06, "loss": 0.6278, "step": 1475 }, { "epoch": 0.24239935951388747, "grad_norm": 0.37542608512409903, "learning_rate": 8.966246201518577e-06, "loss": 0.6451, "step": 1476 }, { "epoch": 0.24256358672223022, "grad_norm": 0.50798389472191, "learning_rate": 8.96618884471707e-06, "loss": 0.6169, "step": 1477 }, { "epoch": 0.24272781393057294, "grad_norm": 0.37174062893917387, "learning_rate": 8.966131439408357e-06, "loss": 0.6373, "step": 1478 }, { "epoch": 0.2428920411389157, "grad_norm": 0.4209079011176253, "learning_rate": 8.966073985593063e-06, "loss": 0.6235, "step": 1479 }, { "epoch": 0.24305626834725844, "grad_norm": 0.41783270170305736, "learning_rate": 8.966016483271813e-06, "loss": 0.6291, "step": 1480 }, { "epoch": 0.24322049555560118, "grad_norm": 0.3578979030166597, "learning_rate": 8.965958932445228e-06, "loss": 0.6075, "step": 1481 }, { "epoch": 0.2433847227639439, "grad_norm": 0.37588742363710526, "learning_rate": 8.965901333113936e-06, "loss": 0.6172, "step": 1482 }, { "epoch": 0.24354894997228665, "grad_norm": 0.43019135611502246, "learning_rate": 8.965843685278561e-06, "loss": 0.6152, "step": 1483 }, { "epoch": 0.2437131771806294, "grad_norm": 0.3713307328258515, "learning_rate": 8.965785988939728e-06, "loss": 0.633, "step": 1484 }, { "epoch": 0.24387740438897215, "grad_norm": 0.38538363095385886, "learning_rate": 8.96572824409807e-06, "loss": 0.6103, "step": 1485 }, { "epoch": 0.24404163159731487, "grad_norm": 0.42959307234492405, "learning_rate": 8.965670450754205e-06, "loss": 0.6159, "step": 1486 }, { "epoch": 0.24420585880565762, "grad_norm": 0.3677298238888804, "learning_rate": 8.965612608908767e-06, "loss": 0.6242, "step": 1487 }, { "epoch": 0.24437008601400037, "grad_norm": 0.3700124245084724, "learning_rate": 8.965554718562383e-06, "loss": 0.632, "step": 1488 }, { "epoch": 0.24453431322234312, "grad_norm": 0.37162697278412393, "learning_rate": 8.965496779715681e-06, "loss": 0.6291, "step": 1489 }, { "epoch": 0.24469854043068584, "grad_norm": 0.5932246129868997, "learning_rate": 8.965438792369291e-06, "loss": 0.6218, "step": 1490 }, { "epoch": 0.2448627676390286, "grad_norm": 0.35796971024046653, "learning_rate": 8.965380756523842e-06, "loss": 0.6301, "step": 1491 }, { "epoch": 0.24502699484737134, "grad_norm": 0.3888508026013573, "learning_rate": 8.965322672179964e-06, "loss": 0.607, "step": 1492 }, { "epoch": 0.2451912220557141, "grad_norm": 0.3782937892080753, "learning_rate": 8.96526453933829e-06, "loss": 0.6456, "step": 1493 }, { "epoch": 0.24535544926405684, "grad_norm": 0.3799106905153972, "learning_rate": 8.965206357999449e-06, "loss": 0.6294, "step": 1494 }, { "epoch": 0.24551967647239956, "grad_norm": 0.39762851482762696, "learning_rate": 8.965148128164074e-06, "loss": 0.6399, "step": 1495 }, { "epoch": 0.2456839036807423, "grad_norm": 0.4416226219612057, "learning_rate": 8.965089849832796e-06, "loss": 0.6251, "step": 1496 }, { "epoch": 0.24584813088908505, "grad_norm": 0.3889843392209306, "learning_rate": 8.96503152300625e-06, "loss": 0.6497, "step": 1497 }, { "epoch": 0.2460123580974278, "grad_norm": 0.35215446037387954, "learning_rate": 8.964973147685069e-06, "loss": 0.633, "step": 1498 }, { "epoch": 0.24617658530577052, "grad_norm": 0.45650337654446665, "learning_rate": 8.964914723869886e-06, "loss": 0.6042, "step": 1499 }, { "epoch": 0.24634081251411327, "grad_norm": 0.36286389615728926, "learning_rate": 8.964856251561336e-06, "loss": 0.6341, "step": 1500 }, { "epoch": 0.24650503972245602, "grad_norm": 0.3593756064119613, "learning_rate": 8.964797730760055e-06, "loss": 0.6197, "step": 1501 }, { "epoch": 0.24666926693079877, "grad_norm": 0.3771052035230812, "learning_rate": 8.964739161466678e-06, "loss": 0.6242, "step": 1502 }, { "epoch": 0.2468334941391415, "grad_norm": 0.3605637264989002, "learning_rate": 8.96468054368184e-06, "loss": 0.6094, "step": 1503 }, { "epoch": 0.24699772134748424, "grad_norm": 0.3626333911931281, "learning_rate": 8.964621877406181e-06, "loss": 0.6348, "step": 1504 }, { "epoch": 0.247161948555827, "grad_norm": 0.38088499252044117, "learning_rate": 8.964563162640334e-06, "loss": 0.6268, "step": 1505 }, { "epoch": 0.24732617576416974, "grad_norm": 0.36603008668404546, "learning_rate": 8.964504399384938e-06, "loss": 0.6265, "step": 1506 }, { "epoch": 0.24749040297251246, "grad_norm": 0.3627778415298624, "learning_rate": 8.964445587640633e-06, "loss": 0.6557, "step": 1507 }, { "epoch": 0.2476546301808552, "grad_norm": 0.3976038856166393, "learning_rate": 8.964386727408055e-06, "loss": 0.6295, "step": 1508 }, { "epoch": 0.24781885738919796, "grad_norm": 0.35128896141586324, "learning_rate": 8.964327818687847e-06, "loss": 0.629, "step": 1509 }, { "epoch": 0.2479830845975407, "grad_norm": 0.3408244422741637, "learning_rate": 8.964268861480645e-06, "loss": 0.614, "step": 1510 }, { "epoch": 0.24814731180588345, "grad_norm": 0.35929482815511576, "learning_rate": 8.964209855787091e-06, "loss": 0.5934, "step": 1511 }, { "epoch": 0.24831153901422617, "grad_norm": 0.46183852703188355, "learning_rate": 8.964150801607825e-06, "loss": 0.6054, "step": 1512 }, { "epoch": 0.24847576622256892, "grad_norm": 0.37059564147077584, "learning_rate": 8.96409169894349e-06, "loss": 0.6184, "step": 1513 }, { "epoch": 0.24863999343091167, "grad_norm": 0.3629530929389863, "learning_rate": 8.964032547794728e-06, "loss": 0.6242, "step": 1514 }, { "epoch": 0.24880422063925442, "grad_norm": 0.36798673186434855, "learning_rate": 8.96397334816218e-06, "loss": 0.6034, "step": 1515 }, { "epoch": 0.24896844784759714, "grad_norm": 0.3943059155021281, "learning_rate": 8.963914100046489e-06, "loss": 0.6471, "step": 1516 }, { "epoch": 0.2491326750559399, "grad_norm": 0.4348402454752103, "learning_rate": 8.963854803448301e-06, "loss": 0.5869, "step": 1517 }, { "epoch": 0.24929690226428264, "grad_norm": 0.400012642981447, "learning_rate": 8.963795458368254e-06, "loss": 0.6282, "step": 1518 }, { "epoch": 0.2494611294726254, "grad_norm": 0.39046182025528064, "learning_rate": 8.963736064807e-06, "loss": 0.6466, "step": 1519 }, { "epoch": 0.2496253566809681, "grad_norm": 0.35844392595007163, "learning_rate": 8.963676622765179e-06, "loss": 0.6256, "step": 1520 }, { "epoch": 0.24978958388931086, "grad_norm": 0.35242439852874236, "learning_rate": 8.963617132243439e-06, "loss": 0.5967, "step": 1521 }, { "epoch": 0.2499538110976536, "grad_norm": 0.35656035849610795, "learning_rate": 8.963557593242424e-06, "loss": 0.6198, "step": 1522 }, { "epoch": 0.2501180383059963, "grad_norm": 0.38132355048447236, "learning_rate": 8.963498005762783e-06, "loss": 0.6431, "step": 1523 }, { "epoch": 0.2502822655143391, "grad_norm": 0.3537836522540743, "learning_rate": 8.963438369805163e-06, "loss": 0.6209, "step": 1524 }, { "epoch": 0.2504464927226818, "grad_norm": 0.3807854751881578, "learning_rate": 8.963378685370209e-06, "loss": 0.614, "step": 1525 }, { "epoch": 0.2506107199310246, "grad_norm": 0.3506963314976677, "learning_rate": 8.963318952458571e-06, "loss": 0.607, "step": 1526 }, { "epoch": 0.2507749471393673, "grad_norm": 0.3478461020759691, "learning_rate": 8.9632591710709e-06, "loss": 0.641, "step": 1527 }, { "epoch": 0.25093917434771007, "grad_norm": 0.3530580403738584, "learning_rate": 8.963199341207842e-06, "loss": 0.6202, "step": 1528 }, { "epoch": 0.2511034015560528, "grad_norm": 0.39033985727780796, "learning_rate": 8.963139462870049e-06, "loss": 0.6342, "step": 1529 }, { "epoch": 0.25126762876439557, "grad_norm": 0.34915595980919556, "learning_rate": 8.963079536058168e-06, "loss": 0.6036, "step": 1530 }, { "epoch": 0.25143185597273826, "grad_norm": 0.41289312093272673, "learning_rate": 8.963019560772856e-06, "loss": 0.6, "step": 1531 }, { "epoch": 0.251596083181081, "grad_norm": 0.3579650538537075, "learning_rate": 8.962959537014757e-06, "loss": 0.6271, "step": 1532 }, { "epoch": 0.25176031038942376, "grad_norm": 0.358747837059559, "learning_rate": 8.962899464784528e-06, "loss": 0.6079, "step": 1533 }, { "epoch": 0.2519245375977665, "grad_norm": 0.3630698663213305, "learning_rate": 8.962839344082818e-06, "loss": 0.6221, "step": 1534 }, { "epoch": 0.25208876480610926, "grad_norm": 0.3587173079907215, "learning_rate": 8.962779174910283e-06, "loss": 0.6049, "step": 1535 }, { "epoch": 0.252252992014452, "grad_norm": 0.3627910673327825, "learning_rate": 8.962718957267576e-06, "loss": 0.627, "step": 1536 }, { "epoch": 0.25241721922279475, "grad_norm": 0.3543705245438461, "learning_rate": 8.962658691155351e-06, "loss": 0.6004, "step": 1537 }, { "epoch": 0.2525814464311375, "grad_norm": 0.36309273648911605, "learning_rate": 8.96259837657426e-06, "loss": 0.6173, "step": 1538 }, { "epoch": 0.2527456736394802, "grad_norm": 0.3777501728160246, "learning_rate": 8.962538013524963e-06, "loss": 0.5753, "step": 1539 }, { "epoch": 0.25290990084782294, "grad_norm": 0.3592769791035358, "learning_rate": 8.96247760200811e-06, "loss": 0.6148, "step": 1540 }, { "epoch": 0.2530741280561657, "grad_norm": 0.36539569711437503, "learning_rate": 8.96241714202436e-06, "loss": 0.636, "step": 1541 }, { "epoch": 0.25323835526450844, "grad_norm": 0.3581273145789663, "learning_rate": 8.962356633574368e-06, "loss": 0.5825, "step": 1542 }, { "epoch": 0.2534025824728512, "grad_norm": 0.4135080963610115, "learning_rate": 8.962296076658795e-06, "loss": 0.6252, "step": 1543 }, { "epoch": 0.25356680968119394, "grad_norm": 0.33935214150807924, "learning_rate": 8.962235471278298e-06, "loss": 0.6465, "step": 1544 }, { "epoch": 0.2537310368895367, "grad_norm": 0.4749931745427001, "learning_rate": 8.962174817433531e-06, "loss": 0.6142, "step": 1545 }, { "epoch": 0.25389526409787944, "grad_norm": 0.42031769394767504, "learning_rate": 8.962114115125154e-06, "loss": 0.63, "step": 1546 }, { "epoch": 0.2540594913062222, "grad_norm": 0.3804360525221743, "learning_rate": 8.962053364353831e-06, "loss": 0.6312, "step": 1547 }, { "epoch": 0.2542237185145649, "grad_norm": 0.3524668319377424, "learning_rate": 8.961992565120216e-06, "loss": 0.6088, "step": 1548 }, { "epoch": 0.2543879457229076, "grad_norm": 0.3458219025452087, "learning_rate": 8.961931717424973e-06, "loss": 0.6192, "step": 1549 }, { "epoch": 0.2545521729312504, "grad_norm": 0.35274115975192566, "learning_rate": 8.96187082126876e-06, "loss": 0.6129, "step": 1550 }, { "epoch": 0.2547164001395931, "grad_norm": 0.3626793254051729, "learning_rate": 8.96180987665224e-06, "loss": 0.6305, "step": 1551 }, { "epoch": 0.2548806273479359, "grad_norm": 0.3848176233620632, "learning_rate": 8.961748883576077e-06, "loss": 0.589, "step": 1552 }, { "epoch": 0.2550448545562786, "grad_norm": 0.366125994685682, "learning_rate": 8.96168784204093e-06, "loss": 0.6528, "step": 1553 }, { "epoch": 0.25520908176462137, "grad_norm": 0.3322249818553472, "learning_rate": 8.961626752047464e-06, "loss": 0.641, "step": 1554 }, { "epoch": 0.2553733089729641, "grad_norm": 0.39701211769550127, "learning_rate": 8.96156561359634e-06, "loss": 0.6347, "step": 1555 }, { "epoch": 0.2555375361813068, "grad_norm": 0.331778398473829, "learning_rate": 8.961504426688226e-06, "loss": 0.5913, "step": 1556 }, { "epoch": 0.25570176338964956, "grad_norm": 0.35888908022149046, "learning_rate": 8.961443191323783e-06, "loss": 0.6081, "step": 1557 }, { "epoch": 0.2558659905979923, "grad_norm": 0.3535893722927766, "learning_rate": 8.961381907503678e-06, "loss": 0.6385, "step": 1558 }, { "epoch": 0.25603021780633506, "grad_norm": 0.33617576269857896, "learning_rate": 8.961320575228577e-06, "loss": 0.6137, "step": 1559 }, { "epoch": 0.2561944450146778, "grad_norm": 0.4050222822206187, "learning_rate": 8.961259194499144e-06, "loss": 0.6156, "step": 1560 }, { "epoch": 0.25635867222302056, "grad_norm": 0.35052205808117587, "learning_rate": 8.961197765316048e-06, "loss": 0.599, "step": 1561 }, { "epoch": 0.2565228994313633, "grad_norm": 0.39858862462836475, "learning_rate": 8.961136287679955e-06, "loss": 0.611, "step": 1562 }, { "epoch": 0.25668712663970605, "grad_norm": 0.3749625887686892, "learning_rate": 8.96107476159153e-06, "loss": 0.6183, "step": 1563 }, { "epoch": 0.2568513538480488, "grad_norm": 0.36936476849359495, "learning_rate": 8.961013187051448e-06, "loss": 0.6082, "step": 1564 }, { "epoch": 0.2570155810563915, "grad_norm": 0.34295965567698833, "learning_rate": 8.96095156406037e-06, "loss": 0.6076, "step": 1565 }, { "epoch": 0.25717980826473424, "grad_norm": 0.35256410051497583, "learning_rate": 8.960889892618972e-06, "loss": 0.6208, "step": 1566 }, { "epoch": 0.257344035473077, "grad_norm": 0.5784333725990156, "learning_rate": 8.960828172727918e-06, "loss": 0.6236, "step": 1567 }, { "epoch": 0.25750826268141974, "grad_norm": 0.3489605554526544, "learning_rate": 8.960766404387882e-06, "loss": 0.64, "step": 1568 }, { "epoch": 0.2576724898897625, "grad_norm": 0.3415995007524165, "learning_rate": 8.960704587599537e-06, "loss": 0.638, "step": 1569 }, { "epoch": 0.25783671709810524, "grad_norm": 0.3820929372774659, "learning_rate": 8.960642722363548e-06, "loss": 0.6488, "step": 1570 }, { "epoch": 0.258000944306448, "grad_norm": 0.38911326483657277, "learning_rate": 8.960580808680592e-06, "loss": 0.5972, "step": 1571 }, { "epoch": 0.25816517151479074, "grad_norm": 0.47819526662868234, "learning_rate": 8.96051884655134e-06, "loss": 0.6315, "step": 1572 }, { "epoch": 0.25832939872313343, "grad_norm": 0.38675435762044147, "learning_rate": 8.960456835976463e-06, "loss": 0.6121, "step": 1573 }, { "epoch": 0.2584936259314762, "grad_norm": 0.36691627542081623, "learning_rate": 8.96039477695664e-06, "loss": 0.6224, "step": 1574 }, { "epoch": 0.2586578531398189, "grad_norm": 0.3273576946150552, "learning_rate": 8.960332669492536e-06, "loss": 0.6082, "step": 1575 }, { "epoch": 0.2588220803481617, "grad_norm": 0.3771741764322628, "learning_rate": 8.960270513584835e-06, "loss": 0.6154, "step": 1576 }, { "epoch": 0.2589863075565044, "grad_norm": 0.3504930691326926, "learning_rate": 8.960208309234205e-06, "loss": 0.6115, "step": 1577 }, { "epoch": 0.2591505347648472, "grad_norm": 0.3221176517577065, "learning_rate": 8.960146056441327e-06, "loss": 0.6233, "step": 1578 }, { "epoch": 0.2593147619731899, "grad_norm": 0.34553595989460767, "learning_rate": 8.960083755206874e-06, "loss": 0.6314, "step": 1579 }, { "epoch": 0.25947898918153267, "grad_norm": 0.35748357758110466, "learning_rate": 8.960021405531523e-06, "loss": 0.6069, "step": 1580 }, { "epoch": 0.2596432163898754, "grad_norm": 0.4473671425734154, "learning_rate": 8.959959007415951e-06, "loss": 0.6417, "step": 1581 }, { "epoch": 0.2598074435982181, "grad_norm": 0.3510174264321415, "learning_rate": 8.959896560860838e-06, "loss": 0.629, "step": 1582 }, { "epoch": 0.25997167080656086, "grad_norm": 0.36410783759248944, "learning_rate": 8.959834065866857e-06, "loss": 0.5946, "step": 1583 }, { "epoch": 0.2601358980149036, "grad_norm": 0.3512116883938849, "learning_rate": 8.959771522434693e-06, "loss": 0.609, "step": 1584 }, { "epoch": 0.26030012522324636, "grad_norm": 0.36875418431071966, "learning_rate": 8.959708930565021e-06, "loss": 0.6243, "step": 1585 }, { "epoch": 0.2604643524315891, "grad_norm": 0.40873019656468806, "learning_rate": 8.959646290258523e-06, "loss": 0.6009, "step": 1586 }, { "epoch": 0.26062857963993186, "grad_norm": 0.33934926723741654, "learning_rate": 8.959583601515878e-06, "loss": 0.6143, "step": 1587 }, { "epoch": 0.2607928068482746, "grad_norm": 0.36594161286713034, "learning_rate": 8.959520864337769e-06, "loss": 0.5963, "step": 1588 }, { "epoch": 0.26095703405661735, "grad_norm": 0.35446975771035094, "learning_rate": 8.959458078724875e-06, "loss": 0.6133, "step": 1589 }, { "epoch": 0.26112126126496005, "grad_norm": 0.33119122466208173, "learning_rate": 8.959395244677878e-06, "loss": 0.6029, "step": 1590 }, { "epoch": 0.2612854884733028, "grad_norm": 0.3558849986573231, "learning_rate": 8.959332362197461e-06, "loss": 0.5961, "step": 1591 }, { "epoch": 0.26144971568164554, "grad_norm": 0.4300437747977983, "learning_rate": 8.959269431284309e-06, "loss": 0.6265, "step": 1592 }, { "epoch": 0.2616139428899883, "grad_norm": 0.33313374885784347, "learning_rate": 8.959206451939102e-06, "loss": 0.6335, "step": 1593 }, { "epoch": 0.26177817009833104, "grad_norm": 0.36400831389369626, "learning_rate": 8.959143424162526e-06, "loss": 0.6284, "step": 1594 }, { "epoch": 0.2619423973066738, "grad_norm": 0.3464235716370396, "learning_rate": 8.959080347955264e-06, "loss": 0.6023, "step": 1595 }, { "epoch": 0.26210662451501654, "grad_norm": 0.39376900324065406, "learning_rate": 8.959017223318005e-06, "loss": 0.6145, "step": 1596 }, { "epoch": 0.2622708517233593, "grad_norm": 0.34821762378354826, "learning_rate": 8.95895405025143e-06, "loss": 0.6114, "step": 1597 }, { "epoch": 0.26243507893170204, "grad_norm": 0.3463710949430958, "learning_rate": 8.958890828756229e-06, "loss": 0.6386, "step": 1598 }, { "epoch": 0.26259930614004473, "grad_norm": 0.3247606140038734, "learning_rate": 8.958827558833084e-06, "loss": 0.6303, "step": 1599 }, { "epoch": 0.2627635333483875, "grad_norm": 0.3406923782871825, "learning_rate": 8.958764240482686e-06, "loss": 0.5948, "step": 1600 }, { "epoch": 0.26292776055673023, "grad_norm": 0.3627730588083312, "learning_rate": 8.958700873705721e-06, "loss": 0.6232, "step": 1601 }, { "epoch": 0.263091987765073, "grad_norm": 0.35207234573189816, "learning_rate": 8.958637458502879e-06, "loss": 0.6102, "step": 1602 }, { "epoch": 0.2632562149734157, "grad_norm": 0.3569459596649669, "learning_rate": 8.958573994874846e-06, "loss": 0.6313, "step": 1603 }, { "epoch": 0.2634204421817585, "grad_norm": 0.4495054405643325, "learning_rate": 8.958510482822314e-06, "loss": 0.6, "step": 1604 }, { "epoch": 0.2635846693901012, "grad_norm": 0.38731324914519233, "learning_rate": 8.95844692234597e-06, "loss": 0.6405, "step": 1605 }, { "epoch": 0.26374889659844397, "grad_norm": 0.3338592407113862, "learning_rate": 8.958383313446508e-06, "loss": 0.6234, "step": 1606 }, { "epoch": 0.26391312380678666, "grad_norm": 0.33659403725843934, "learning_rate": 8.958319656124615e-06, "loss": 0.6163, "step": 1607 }, { "epoch": 0.2640773510151294, "grad_norm": 0.34801294714515657, "learning_rate": 8.958255950380986e-06, "loss": 0.6201, "step": 1608 }, { "epoch": 0.26424157822347216, "grad_norm": 0.33754657169064073, "learning_rate": 8.958192196216309e-06, "loss": 0.6342, "step": 1609 }, { "epoch": 0.2644058054318149, "grad_norm": 0.3224965637155876, "learning_rate": 8.958128393631279e-06, "loss": 0.6122, "step": 1610 }, { "epoch": 0.26457003264015766, "grad_norm": 0.33135459212029433, "learning_rate": 8.958064542626589e-06, "loss": 0.5957, "step": 1611 }, { "epoch": 0.2647342598485004, "grad_norm": 0.34651185946628477, "learning_rate": 8.958000643202932e-06, "loss": 0.5911, "step": 1612 }, { "epoch": 0.26489848705684316, "grad_norm": 0.3637450672410526, "learning_rate": 8.957936695361001e-06, "loss": 0.6292, "step": 1613 }, { "epoch": 0.2650627142651859, "grad_norm": 0.34550569205067416, "learning_rate": 8.957872699101492e-06, "loss": 0.6216, "step": 1614 }, { "epoch": 0.26522694147352865, "grad_norm": 0.3360151957193412, "learning_rate": 8.9578086544251e-06, "loss": 0.6141, "step": 1615 }, { "epoch": 0.26539116868187135, "grad_norm": 0.3322104258097254, "learning_rate": 8.957744561332521e-06, "loss": 0.6043, "step": 1616 }, { "epoch": 0.2655553958902141, "grad_norm": 0.3507509351350694, "learning_rate": 8.957680419824448e-06, "loss": 0.6475, "step": 1617 }, { "epoch": 0.26571962309855685, "grad_norm": 0.3477772878053762, "learning_rate": 8.95761622990158e-06, "loss": 0.601, "step": 1618 }, { "epoch": 0.2658838503068996, "grad_norm": 0.3976960245557338, "learning_rate": 8.957551991564617e-06, "loss": 0.6343, "step": 1619 }, { "epoch": 0.26604807751524234, "grad_norm": 0.34982742819917306, "learning_rate": 8.957487704814252e-06, "loss": 0.6665, "step": 1620 }, { "epoch": 0.2662123047235851, "grad_norm": 0.3406041837951626, "learning_rate": 8.957423369651183e-06, "loss": 0.6283, "step": 1621 }, { "epoch": 0.26637653193192784, "grad_norm": 0.3279996924316886, "learning_rate": 8.957358986076113e-06, "loss": 0.6057, "step": 1622 }, { "epoch": 0.2665407591402706, "grad_norm": 0.3672845856678108, "learning_rate": 8.957294554089738e-06, "loss": 0.6201, "step": 1623 }, { "epoch": 0.2667049863486133, "grad_norm": 0.39245027772202284, "learning_rate": 8.957230073692759e-06, "loss": 0.6082, "step": 1624 }, { "epoch": 0.26686921355695603, "grad_norm": 0.40103050047766803, "learning_rate": 8.957165544885875e-06, "loss": 0.5781, "step": 1625 }, { "epoch": 0.2670334407652988, "grad_norm": 0.33885876819639205, "learning_rate": 8.957100967669791e-06, "loss": 0.6334, "step": 1626 }, { "epoch": 0.26719766797364153, "grad_norm": 0.3583568872434457, "learning_rate": 8.957036342045203e-06, "loss": 0.6053, "step": 1627 }, { "epoch": 0.2673618951819843, "grad_norm": 0.3518879810969012, "learning_rate": 8.956971668012817e-06, "loss": 0.6216, "step": 1628 }, { "epoch": 0.267526122390327, "grad_norm": 0.3381543177932053, "learning_rate": 8.95690694557333e-06, "loss": 0.6232, "step": 1629 }, { "epoch": 0.2676903495986698, "grad_norm": 0.441450413777482, "learning_rate": 8.95684217472745e-06, "loss": 0.6225, "step": 1630 }, { "epoch": 0.2678545768070125, "grad_norm": 0.35541320952449645, "learning_rate": 8.956777355475881e-06, "loss": 0.6389, "step": 1631 }, { "epoch": 0.2680188040153552, "grad_norm": 0.4207779627283632, "learning_rate": 8.956712487819323e-06, "loss": 0.632, "step": 1632 }, { "epoch": 0.26818303122369797, "grad_norm": 0.3320596414586848, "learning_rate": 8.956647571758485e-06, "loss": 0.6146, "step": 1633 }, { "epoch": 0.2683472584320407, "grad_norm": 0.3565184586735187, "learning_rate": 8.956582607294067e-06, "loss": 0.6196, "step": 1634 }, { "epoch": 0.26851148564038346, "grad_norm": 0.32988657768521945, "learning_rate": 8.956517594426778e-06, "loss": 0.6184, "step": 1635 }, { "epoch": 0.2686757128487262, "grad_norm": 0.33061850241523516, "learning_rate": 8.956452533157325e-06, "loss": 0.5958, "step": 1636 }, { "epoch": 0.26883994005706896, "grad_norm": 0.3852551983617042, "learning_rate": 8.95638742348641e-06, "loss": 0.6365, "step": 1637 }, { "epoch": 0.2690041672654117, "grad_norm": 0.35206933111559585, "learning_rate": 8.956322265414746e-06, "loss": 0.6004, "step": 1638 }, { "epoch": 0.26916839447375446, "grad_norm": 0.3427069503065173, "learning_rate": 8.956257058943036e-06, "loss": 0.6185, "step": 1639 }, { "epoch": 0.2693326216820972, "grad_norm": 0.3406012616435544, "learning_rate": 8.95619180407199e-06, "loss": 0.606, "step": 1640 }, { "epoch": 0.2694968488904399, "grad_norm": 0.3597673860314228, "learning_rate": 8.956126500802318e-06, "loss": 0.6304, "step": 1641 }, { "epoch": 0.26966107609878265, "grad_norm": 0.3146073190261586, "learning_rate": 8.956061149134725e-06, "loss": 0.6177, "step": 1642 }, { "epoch": 0.2698253033071254, "grad_norm": 0.3424703951143541, "learning_rate": 8.955995749069926e-06, "loss": 0.6211, "step": 1643 }, { "epoch": 0.26998953051546815, "grad_norm": 0.35081835808818557, "learning_rate": 8.955930300608629e-06, "loss": 0.598, "step": 1644 }, { "epoch": 0.2701537577238109, "grad_norm": 0.4085725713510005, "learning_rate": 8.955864803751546e-06, "loss": 0.6, "step": 1645 }, { "epoch": 0.27031798493215364, "grad_norm": 0.3245057016062214, "learning_rate": 8.955799258499384e-06, "loss": 0.6016, "step": 1646 }, { "epoch": 0.2704822121404964, "grad_norm": 0.3429782149104829, "learning_rate": 8.95573366485286e-06, "loss": 0.6029, "step": 1647 }, { "epoch": 0.27064643934883914, "grad_norm": 0.3637562624540342, "learning_rate": 8.955668022812687e-06, "loss": 0.6142, "step": 1648 }, { "epoch": 0.27081066655718183, "grad_norm": 0.4237638762114716, "learning_rate": 8.955602332379572e-06, "loss": 0.5921, "step": 1649 }, { "epoch": 0.2709748937655246, "grad_norm": 0.39003068117908357, "learning_rate": 8.955536593554232e-06, "loss": 0.5899, "step": 1650 }, { "epoch": 0.27113912097386733, "grad_norm": 0.37048027690848173, "learning_rate": 8.955470806337382e-06, "loss": 0.6276, "step": 1651 }, { "epoch": 0.2713033481822101, "grad_norm": 0.3597344209193968, "learning_rate": 8.955404970729736e-06, "loss": 0.6175, "step": 1652 }, { "epoch": 0.27146757539055283, "grad_norm": 0.3367753913122933, "learning_rate": 8.955339086732009e-06, "loss": 0.6233, "step": 1653 }, { "epoch": 0.2716318025988956, "grad_norm": 0.3767695965340454, "learning_rate": 8.955273154344914e-06, "loss": 0.6275, "step": 1654 }, { "epoch": 0.2717960298072383, "grad_norm": 0.35115066088714475, "learning_rate": 8.95520717356917e-06, "loss": 0.6262, "step": 1655 }, { "epoch": 0.2719602570155811, "grad_norm": 0.35017560014066323, "learning_rate": 8.955141144405493e-06, "loss": 0.5956, "step": 1656 }, { "epoch": 0.2721244842239238, "grad_norm": 0.33377410466349333, "learning_rate": 8.9550750668546e-06, "loss": 0.59, "step": 1657 }, { "epoch": 0.2722887114322665, "grad_norm": 0.3590803976413437, "learning_rate": 8.955008940917208e-06, "loss": 0.6097, "step": 1658 }, { "epoch": 0.27245293864060927, "grad_norm": 0.36554099156071396, "learning_rate": 8.954942766594036e-06, "loss": 0.6201, "step": 1659 }, { "epoch": 0.272617165848952, "grad_norm": 0.3378974550030977, "learning_rate": 8.954876543885802e-06, "loss": 0.5875, "step": 1660 }, { "epoch": 0.27278139305729476, "grad_norm": 0.32781104553889123, "learning_rate": 8.954810272793227e-06, "loss": 0.6002, "step": 1661 }, { "epoch": 0.2729456202656375, "grad_norm": 0.398398331687394, "learning_rate": 8.954743953317029e-06, "loss": 0.62, "step": 1662 }, { "epoch": 0.27310984747398026, "grad_norm": 0.5063519634001101, "learning_rate": 8.95467758545793e-06, "loss": 0.604, "step": 1663 }, { "epoch": 0.273274074682323, "grad_norm": 0.3998293658682676, "learning_rate": 8.954611169216646e-06, "loss": 0.6273, "step": 1664 }, { "epoch": 0.27343830189066576, "grad_norm": 0.6269815667257993, "learning_rate": 8.954544704593904e-06, "loss": 0.6311, "step": 1665 }, { "epoch": 0.27360252909900845, "grad_norm": 0.32155955625909083, "learning_rate": 8.954478191590425e-06, "loss": 0.6042, "step": 1666 }, { "epoch": 0.2737667563073512, "grad_norm": 0.35671629044279696, "learning_rate": 8.95441163020693e-06, "loss": 0.6325, "step": 1667 }, { "epoch": 0.27393098351569395, "grad_norm": 0.3193642963859553, "learning_rate": 8.954345020444141e-06, "loss": 0.6272, "step": 1668 }, { "epoch": 0.2740952107240367, "grad_norm": 0.3555838716484768, "learning_rate": 8.954278362302783e-06, "loss": 0.5956, "step": 1669 }, { "epoch": 0.27425943793237945, "grad_norm": 0.49064088111583565, "learning_rate": 8.954211655783579e-06, "loss": 0.6205, "step": 1670 }, { "epoch": 0.2744236651407222, "grad_norm": 0.3711693322859996, "learning_rate": 8.954144900887255e-06, "loss": 0.6149, "step": 1671 }, { "epoch": 0.27458789234906494, "grad_norm": 0.33828977223496476, "learning_rate": 8.954078097614534e-06, "loss": 0.595, "step": 1672 }, { "epoch": 0.2747521195574077, "grad_norm": 0.3603675345755279, "learning_rate": 8.954011245966145e-06, "loss": 0.6123, "step": 1673 }, { "epoch": 0.27491634676575044, "grad_norm": 0.3342987882526401, "learning_rate": 8.953944345942809e-06, "loss": 0.6064, "step": 1674 }, { "epoch": 0.27508057397409313, "grad_norm": 0.33840786908343246, "learning_rate": 8.953877397545255e-06, "loss": 0.6147, "step": 1675 }, { "epoch": 0.2752448011824359, "grad_norm": 0.3361362202891087, "learning_rate": 8.953810400774213e-06, "loss": 0.612, "step": 1676 }, { "epoch": 0.27540902839077863, "grad_norm": 0.3615623201654782, "learning_rate": 8.953743355630406e-06, "loss": 0.6197, "step": 1677 }, { "epoch": 0.2755732555991214, "grad_norm": 0.34209041694780395, "learning_rate": 8.953676262114565e-06, "loss": 0.6493, "step": 1678 }, { "epoch": 0.27573748280746413, "grad_norm": 0.325938108900595, "learning_rate": 8.95360912022742e-06, "loss": 0.6057, "step": 1679 }, { "epoch": 0.2759017100158069, "grad_norm": 0.38745143087531064, "learning_rate": 8.953541929969696e-06, "loss": 0.6295, "step": 1680 }, { "epoch": 0.2760659372241496, "grad_norm": 0.3810022825723158, "learning_rate": 8.953474691342126e-06, "loss": 0.5934, "step": 1681 }, { "epoch": 0.2762301644324924, "grad_norm": 0.39295072393329594, "learning_rate": 8.953407404345437e-06, "loss": 0.6321, "step": 1682 }, { "epoch": 0.27639439164083507, "grad_norm": 0.35390570166315877, "learning_rate": 8.953340068980363e-06, "loss": 0.5874, "step": 1683 }, { "epoch": 0.2765586188491778, "grad_norm": 0.34976927367936533, "learning_rate": 8.953272685247636e-06, "loss": 0.6143, "step": 1684 }, { "epoch": 0.27672284605752057, "grad_norm": 0.3327787798882458, "learning_rate": 8.953205253147985e-06, "loss": 0.6057, "step": 1685 }, { "epoch": 0.2768870732658633, "grad_norm": 0.3415951518427282, "learning_rate": 8.953137772682144e-06, "loss": 0.6334, "step": 1686 }, { "epoch": 0.27705130047420606, "grad_norm": 0.3384035832213004, "learning_rate": 8.953070243850843e-06, "loss": 0.6103, "step": 1687 }, { "epoch": 0.2772155276825488, "grad_norm": 0.3495169277940791, "learning_rate": 8.953002666654822e-06, "loss": 0.6097, "step": 1688 }, { "epoch": 0.27737975489089156, "grad_norm": 0.3416538910156951, "learning_rate": 8.952935041094809e-06, "loss": 0.5985, "step": 1689 }, { "epoch": 0.2775439820992343, "grad_norm": 0.3451977176545778, "learning_rate": 8.95286736717154e-06, "loss": 0.5766, "step": 1690 }, { "epoch": 0.27770820930757706, "grad_norm": 0.32525419888437135, "learning_rate": 8.95279964488575e-06, "loss": 0.6011, "step": 1691 }, { "epoch": 0.27787243651591975, "grad_norm": 0.39000414414804097, "learning_rate": 8.952731874238176e-06, "loss": 0.6029, "step": 1692 }, { "epoch": 0.2780366637242625, "grad_norm": 0.3393591338584082, "learning_rate": 8.952664055229553e-06, "loss": 0.5948, "step": 1693 }, { "epoch": 0.27820089093260525, "grad_norm": 0.3986866139500602, "learning_rate": 8.952596187860617e-06, "loss": 0.6006, "step": 1694 }, { "epoch": 0.278365118140948, "grad_norm": 0.3098858325727656, "learning_rate": 8.952528272132107e-06, "loss": 0.6008, "step": 1695 }, { "epoch": 0.27852934534929075, "grad_norm": 0.3572611724356876, "learning_rate": 8.952460308044756e-06, "loss": 0.5998, "step": 1696 }, { "epoch": 0.2786935725576335, "grad_norm": 0.4291924390919209, "learning_rate": 8.952392295599309e-06, "loss": 0.6133, "step": 1697 }, { "epoch": 0.27885779976597624, "grad_norm": 0.34816840940610366, "learning_rate": 8.9523242347965e-06, "loss": 0.5985, "step": 1698 }, { "epoch": 0.279022026974319, "grad_norm": 0.34022234855467215, "learning_rate": 8.952256125637069e-06, "loss": 0.6103, "step": 1699 }, { "epoch": 0.2791862541826617, "grad_norm": 0.3639374654161306, "learning_rate": 8.952187968121755e-06, "loss": 0.6076, "step": 1700 }, { "epoch": 0.27935048139100443, "grad_norm": 0.3338247300715134, "learning_rate": 8.952119762251299e-06, "loss": 0.6114, "step": 1701 }, { "epoch": 0.2795147085993472, "grad_norm": 0.31947936911146907, "learning_rate": 8.952051508026443e-06, "loss": 0.616, "step": 1702 }, { "epoch": 0.27967893580768993, "grad_norm": 0.34279451006593414, "learning_rate": 8.951983205447928e-06, "loss": 0.6179, "step": 1703 }, { "epoch": 0.2798431630160327, "grad_norm": 0.45582975634168327, "learning_rate": 8.951914854516495e-06, "loss": 0.6173, "step": 1704 }, { "epoch": 0.28000739022437543, "grad_norm": 0.3419181751540804, "learning_rate": 8.951846455232888e-06, "loss": 0.6024, "step": 1705 }, { "epoch": 0.2801716174327182, "grad_norm": 0.3593734235508695, "learning_rate": 8.951778007597848e-06, "loss": 0.5989, "step": 1706 }, { "epoch": 0.2803358446410609, "grad_norm": 0.3511409384421343, "learning_rate": 8.951709511612116e-06, "loss": 0.592, "step": 1707 }, { "epoch": 0.2805000718494037, "grad_norm": 0.3501672881668432, "learning_rate": 8.95164096727644e-06, "loss": 0.5928, "step": 1708 }, { "epoch": 0.28066429905774637, "grad_norm": 0.3727919134182136, "learning_rate": 8.951572374591564e-06, "loss": 0.5893, "step": 1709 }, { "epoch": 0.2808285262660891, "grad_norm": 0.3280328400507155, "learning_rate": 8.951503733558232e-06, "loss": 0.6162, "step": 1710 }, { "epoch": 0.28099275347443187, "grad_norm": 0.3775465194021973, "learning_rate": 8.951435044177191e-06, "loss": 0.6215, "step": 1711 }, { "epoch": 0.2811569806827746, "grad_norm": 0.33441012730815717, "learning_rate": 8.951366306449184e-06, "loss": 0.5886, "step": 1712 }, { "epoch": 0.28132120789111736, "grad_norm": 0.34134012357709204, "learning_rate": 8.95129752037496e-06, "loss": 0.5977, "step": 1713 }, { "epoch": 0.2814854350994601, "grad_norm": 0.33093093198335205, "learning_rate": 8.951228685955265e-06, "loss": 0.6345, "step": 1714 }, { "epoch": 0.28164966230780286, "grad_norm": 0.40993061330630076, "learning_rate": 8.951159803190848e-06, "loss": 0.6178, "step": 1715 }, { "epoch": 0.2818138895161456, "grad_norm": 0.3963971197606208, "learning_rate": 8.951090872082457e-06, "loss": 0.6059, "step": 1716 }, { "epoch": 0.2819781167244883, "grad_norm": 0.34012540670581837, "learning_rate": 8.951021892630839e-06, "loss": 0.6247, "step": 1717 }, { "epoch": 0.28214234393283105, "grad_norm": 0.32786048540396723, "learning_rate": 8.950952864836743e-06, "loss": 0.6105, "step": 1718 }, { "epoch": 0.2823065711411738, "grad_norm": 0.5082922315473971, "learning_rate": 8.95088378870092e-06, "loss": 0.6049, "step": 1719 }, { "epoch": 0.28247079834951655, "grad_norm": 0.3094071280048266, "learning_rate": 8.95081466422412e-06, "loss": 0.5871, "step": 1720 }, { "epoch": 0.2826350255578593, "grad_norm": 0.32426351466574627, "learning_rate": 8.950745491407095e-06, "loss": 0.5966, "step": 1721 }, { "epoch": 0.28279925276620205, "grad_norm": 0.31977137099864866, "learning_rate": 8.950676270250593e-06, "loss": 0.6026, "step": 1722 }, { "epoch": 0.2829634799745448, "grad_norm": 0.341677966380917, "learning_rate": 8.95060700075537e-06, "loss": 0.6146, "step": 1723 }, { "epoch": 0.28312770718288754, "grad_norm": 0.34463367244022197, "learning_rate": 8.950537682922175e-06, "loss": 0.6037, "step": 1724 }, { "epoch": 0.2832919343912303, "grad_norm": 0.3238216529925343, "learning_rate": 8.950468316751763e-06, "loss": 0.605, "step": 1725 }, { "epoch": 0.283456161599573, "grad_norm": 0.3491065555826677, "learning_rate": 8.950398902244884e-06, "loss": 0.6054, "step": 1726 }, { "epoch": 0.28362038880791574, "grad_norm": 0.3248877879087777, "learning_rate": 8.950329439402296e-06, "loss": 0.5972, "step": 1727 }, { "epoch": 0.2837846160162585, "grad_norm": 0.3444958318564493, "learning_rate": 8.950259928224753e-06, "loss": 0.5953, "step": 1728 }, { "epoch": 0.28394884322460123, "grad_norm": 0.32530493542474337, "learning_rate": 8.950190368713007e-06, "loss": 0.6184, "step": 1729 }, { "epoch": 0.284113070432944, "grad_norm": 0.3321681672983501, "learning_rate": 8.950120760867817e-06, "loss": 0.598, "step": 1730 }, { "epoch": 0.28427729764128673, "grad_norm": 0.33431613907622554, "learning_rate": 8.950051104689933e-06, "loss": 0.6233, "step": 1731 }, { "epoch": 0.2844415248496295, "grad_norm": 0.33533763383067433, "learning_rate": 8.94998140018012e-06, "loss": 0.6041, "step": 1732 }, { "epoch": 0.2846057520579722, "grad_norm": 0.3504748214822058, "learning_rate": 8.949911647339128e-06, "loss": 0.6167, "step": 1733 }, { "epoch": 0.2847699792663149, "grad_norm": 0.4093185842862643, "learning_rate": 8.94984184616772e-06, "loss": 0.5901, "step": 1734 }, { "epoch": 0.28493420647465767, "grad_norm": 0.3544777274138171, "learning_rate": 8.94977199666665e-06, "loss": 0.6031, "step": 1735 }, { "epoch": 0.2850984336830004, "grad_norm": 0.3149036493882386, "learning_rate": 8.949702098836679e-06, "loss": 0.5978, "step": 1736 }, { "epoch": 0.28526266089134317, "grad_norm": 0.32806889942804984, "learning_rate": 8.949632152678564e-06, "loss": 0.6175, "step": 1737 }, { "epoch": 0.2854268880996859, "grad_norm": 0.33769469010145153, "learning_rate": 8.949562158193067e-06, "loss": 0.6051, "step": 1738 }, { "epoch": 0.28559111530802866, "grad_norm": 0.321457597624339, "learning_rate": 8.949492115380947e-06, "loss": 0.6147, "step": 1739 }, { "epoch": 0.2857553425163714, "grad_norm": 0.35122328128449154, "learning_rate": 8.949422024242963e-06, "loss": 0.5977, "step": 1740 }, { "epoch": 0.28591956972471416, "grad_norm": 0.3853149359679044, "learning_rate": 8.949351884779882e-06, "loss": 0.6193, "step": 1741 }, { "epoch": 0.2860837969330569, "grad_norm": 0.4647759735760696, "learning_rate": 8.949281696992459e-06, "loss": 0.6024, "step": 1742 }, { "epoch": 0.2862480241413996, "grad_norm": 0.31943303091838704, "learning_rate": 8.94921146088146e-06, "loss": 0.5835, "step": 1743 }, { "epoch": 0.28641225134974235, "grad_norm": 0.3386314610382678, "learning_rate": 8.949141176447648e-06, "loss": 0.6309, "step": 1744 }, { "epoch": 0.2865764785580851, "grad_norm": 0.3223619040890537, "learning_rate": 8.949070843691785e-06, "loss": 0.6206, "step": 1745 }, { "epoch": 0.28674070576642785, "grad_norm": 0.3460822464622039, "learning_rate": 8.949000462614634e-06, "loss": 0.5956, "step": 1746 }, { "epoch": 0.2869049329747706, "grad_norm": 0.3360222306588388, "learning_rate": 8.948930033216963e-06, "loss": 0.5946, "step": 1747 }, { "epoch": 0.28706916018311335, "grad_norm": 0.35259283103540384, "learning_rate": 8.948859555499533e-06, "loss": 0.596, "step": 1748 }, { "epoch": 0.2872333873914561, "grad_norm": 0.34166466778665, "learning_rate": 8.948789029463112e-06, "loss": 0.5831, "step": 1749 }, { "epoch": 0.28739761459979885, "grad_norm": 0.31738289979786777, "learning_rate": 8.948718455108464e-06, "loss": 0.6179, "step": 1750 }, { "epoch": 0.28756184180814154, "grad_norm": 0.32852270318439086, "learning_rate": 8.948647832436357e-06, "loss": 0.6015, "step": 1751 }, { "epoch": 0.2877260690164843, "grad_norm": 0.38106270430454836, "learning_rate": 8.948577161447558e-06, "loss": 0.6165, "step": 1752 }, { "epoch": 0.28789029622482704, "grad_norm": 0.34254010965813875, "learning_rate": 8.948506442142834e-06, "loss": 0.6043, "step": 1753 }, { "epoch": 0.2880545234331698, "grad_norm": 0.3124040810333103, "learning_rate": 8.948435674522954e-06, "loss": 0.5979, "step": 1754 }, { "epoch": 0.28821875064151253, "grad_norm": 0.3204292251504406, "learning_rate": 8.948364858588684e-06, "loss": 0.5847, "step": 1755 }, { "epoch": 0.2883829778498553, "grad_norm": 0.32046035935302963, "learning_rate": 8.948293994340797e-06, "loss": 0.6032, "step": 1756 }, { "epoch": 0.28854720505819803, "grad_norm": 0.3408352541517514, "learning_rate": 8.948223081780062e-06, "loss": 0.6159, "step": 1757 }, { "epoch": 0.2887114322665408, "grad_norm": 0.5290664262676616, "learning_rate": 8.948152120907245e-06, "loss": 0.6169, "step": 1758 }, { "epoch": 0.28887565947488353, "grad_norm": 0.3400090749950359, "learning_rate": 8.948081111723122e-06, "loss": 0.6176, "step": 1759 }, { "epoch": 0.2890398866832262, "grad_norm": 0.48512254987990944, "learning_rate": 8.94801005422846e-06, "loss": 0.6045, "step": 1760 }, { "epoch": 0.28920411389156897, "grad_norm": 0.32696720216752834, "learning_rate": 8.947938948424033e-06, "loss": 0.611, "step": 1761 }, { "epoch": 0.2893683410999117, "grad_norm": 0.3348572131814443, "learning_rate": 8.947867794310612e-06, "loss": 0.5976, "step": 1762 }, { "epoch": 0.28953256830825447, "grad_norm": 0.3463666631002641, "learning_rate": 8.947796591888971e-06, "loss": 0.6093, "step": 1763 }, { "epoch": 0.2896967955165972, "grad_norm": 0.37184836370878455, "learning_rate": 8.947725341159884e-06, "loss": 0.577, "step": 1764 }, { "epoch": 0.28986102272493997, "grad_norm": 0.3554557818232329, "learning_rate": 8.947654042124124e-06, "loss": 0.628, "step": 1765 }, { "epoch": 0.2900252499332827, "grad_norm": 0.34625018438648913, "learning_rate": 8.947582694782464e-06, "loss": 0.5874, "step": 1766 }, { "epoch": 0.29018947714162546, "grad_norm": 0.3405480851791619, "learning_rate": 8.947511299135681e-06, "loss": 0.6213, "step": 1767 }, { "epoch": 0.29035370434996816, "grad_norm": 0.33999734574851553, "learning_rate": 8.947439855184548e-06, "loss": 0.5855, "step": 1768 }, { "epoch": 0.2905179315583109, "grad_norm": 0.33180325232357516, "learning_rate": 8.947368362929844e-06, "loss": 0.6249, "step": 1769 }, { "epoch": 0.29068215876665365, "grad_norm": 0.33592557868625156, "learning_rate": 8.947296822372344e-06, "loss": 0.6044, "step": 1770 }, { "epoch": 0.2908463859749964, "grad_norm": 0.3836129375299099, "learning_rate": 8.947225233512824e-06, "loss": 0.6128, "step": 1771 }, { "epoch": 0.29101061318333915, "grad_norm": 0.35129704586375776, "learning_rate": 8.947153596352064e-06, "loss": 0.6037, "step": 1772 }, { "epoch": 0.2911748403916819, "grad_norm": 0.3359681523693811, "learning_rate": 8.947081910890838e-06, "loss": 0.5972, "step": 1773 }, { "epoch": 0.29133906760002465, "grad_norm": 0.4025206425833003, "learning_rate": 8.947010177129929e-06, "loss": 0.6035, "step": 1774 }, { "epoch": 0.2915032948083674, "grad_norm": 0.3219472625707683, "learning_rate": 8.946938395070115e-06, "loss": 0.6012, "step": 1775 }, { "epoch": 0.29166752201671015, "grad_norm": 0.34762377131835476, "learning_rate": 8.946866564712174e-06, "loss": 0.5764, "step": 1776 }, { "epoch": 0.29183174922505284, "grad_norm": 0.33217069383444225, "learning_rate": 8.946794686056886e-06, "loss": 0.6019, "step": 1777 }, { "epoch": 0.2919959764333956, "grad_norm": 0.31309510597308177, "learning_rate": 8.946722759105034e-06, "loss": 0.5969, "step": 1778 }, { "epoch": 0.29216020364173834, "grad_norm": 0.3426072418753475, "learning_rate": 8.946650783857395e-06, "loss": 0.5952, "step": 1779 }, { "epoch": 0.2923244308500811, "grad_norm": 0.3422905765735522, "learning_rate": 8.946578760314758e-06, "loss": 0.5999, "step": 1780 }, { "epoch": 0.29248865805842383, "grad_norm": 0.32223052931910556, "learning_rate": 8.946506688477896e-06, "loss": 0.5891, "step": 1781 }, { "epoch": 0.2926528852667666, "grad_norm": 0.34014514460328144, "learning_rate": 8.9464345683476e-06, "loss": 0.6022, "step": 1782 }, { "epoch": 0.29281711247510933, "grad_norm": 0.4222426807348207, "learning_rate": 8.94636239992465e-06, "loss": 0.5949, "step": 1783 }, { "epoch": 0.2929813396834521, "grad_norm": 1.4806624577700644, "learning_rate": 8.946290183209829e-06, "loss": 0.6048, "step": 1784 }, { "epoch": 0.2931455668917948, "grad_norm": 0.43374222283990266, "learning_rate": 8.946217918203922e-06, "loss": 0.6213, "step": 1785 }, { "epoch": 0.2933097941001375, "grad_norm": 0.33943359501404785, "learning_rate": 8.946145604907712e-06, "loss": 0.5849, "step": 1786 }, { "epoch": 0.29347402130848027, "grad_norm": 0.34687657406918043, "learning_rate": 8.94607324332199e-06, "loss": 0.5989, "step": 1787 }, { "epoch": 0.293638248516823, "grad_norm": 0.35147792904951136, "learning_rate": 8.946000833447535e-06, "loss": 0.6301, "step": 1788 }, { "epoch": 0.29380247572516577, "grad_norm": 0.3120533026712586, "learning_rate": 8.945928375285139e-06, "loss": 0.6007, "step": 1789 }, { "epoch": 0.2939667029335085, "grad_norm": 0.32762054284041, "learning_rate": 8.945855868835584e-06, "loss": 0.6263, "step": 1790 }, { "epoch": 0.29413093014185127, "grad_norm": 0.319177490918817, "learning_rate": 8.945783314099663e-06, "loss": 0.5937, "step": 1791 }, { "epoch": 0.294295157350194, "grad_norm": 0.3426538853948598, "learning_rate": 8.94571071107816e-06, "loss": 0.5915, "step": 1792 }, { "epoch": 0.29445938455853676, "grad_norm": 0.3534302275740654, "learning_rate": 8.945638059771864e-06, "loss": 0.6086, "step": 1793 }, { "epoch": 0.29462361176687946, "grad_norm": 0.4119987675520305, "learning_rate": 8.945565360181566e-06, "loss": 0.5817, "step": 1794 }, { "epoch": 0.2947878389752222, "grad_norm": 0.33739850349835476, "learning_rate": 8.945492612308053e-06, "loss": 0.6046, "step": 1795 }, { "epoch": 0.29495206618356495, "grad_norm": 0.31484597834060024, "learning_rate": 8.945419816152118e-06, "loss": 0.6109, "step": 1796 }, { "epoch": 0.2951162933919077, "grad_norm": 0.3359396360573391, "learning_rate": 8.945346971714548e-06, "loss": 0.6, "step": 1797 }, { "epoch": 0.29528052060025045, "grad_norm": 0.3273419658246956, "learning_rate": 8.945274078996139e-06, "loss": 0.5885, "step": 1798 }, { "epoch": 0.2954447478085932, "grad_norm": 0.361277697562596, "learning_rate": 8.945201137997677e-06, "loss": 0.5726, "step": 1799 }, { "epoch": 0.29560897501693595, "grad_norm": 0.3692868752877909, "learning_rate": 8.94512814871996e-06, "loss": 0.593, "step": 1800 }, { "epoch": 0.2957732022252787, "grad_norm": 0.4303230153835513, "learning_rate": 8.945055111163776e-06, "loss": 0.5879, "step": 1801 }, { "epoch": 0.2959374294336214, "grad_norm": 0.36640985236501644, "learning_rate": 8.94498202532992e-06, "loss": 0.6146, "step": 1802 }, { "epoch": 0.29610165664196414, "grad_norm": 0.38608955906589143, "learning_rate": 8.944908891219187e-06, "loss": 0.5973, "step": 1803 }, { "epoch": 0.2962658838503069, "grad_norm": 0.345799158653238, "learning_rate": 8.94483570883237e-06, "loss": 0.5899, "step": 1804 }, { "epoch": 0.29643011105864964, "grad_norm": 0.38703871745196605, "learning_rate": 8.944762478170264e-06, "loss": 0.6125, "step": 1805 }, { "epoch": 0.2965943382669924, "grad_norm": 0.4328536946290052, "learning_rate": 8.944689199233665e-06, "loss": 0.6108, "step": 1806 }, { "epoch": 0.29675856547533513, "grad_norm": 0.41970722512068465, "learning_rate": 8.944615872023367e-06, "loss": 0.616, "step": 1807 }, { "epoch": 0.2969227926836779, "grad_norm": 0.36376878975146204, "learning_rate": 8.944542496540167e-06, "loss": 0.5927, "step": 1808 }, { "epoch": 0.29708701989202063, "grad_norm": 0.3833035639036213, "learning_rate": 8.944469072784864e-06, "loss": 0.5912, "step": 1809 }, { "epoch": 0.2972512471003633, "grad_norm": 0.36238546383545633, "learning_rate": 8.944395600758255e-06, "loss": 0.6036, "step": 1810 }, { "epoch": 0.2974154743087061, "grad_norm": 0.3480258093184134, "learning_rate": 8.944322080461137e-06, "loss": 0.616, "step": 1811 }, { "epoch": 0.2975797015170488, "grad_norm": 0.3480417118481595, "learning_rate": 8.944248511894307e-06, "loss": 0.6033, "step": 1812 }, { "epoch": 0.29774392872539157, "grad_norm": 0.33712664821835, "learning_rate": 8.944174895058567e-06, "loss": 0.5945, "step": 1813 }, { "epoch": 0.2979081559337343, "grad_norm": 0.313027470384797, "learning_rate": 8.944101229954714e-06, "loss": 0.5916, "step": 1814 }, { "epoch": 0.29807238314207707, "grad_norm": 0.3439276047463889, "learning_rate": 8.94402751658355e-06, "loss": 0.6182, "step": 1815 }, { "epoch": 0.2982366103504198, "grad_norm": 0.36083957952146445, "learning_rate": 8.943953754945874e-06, "loss": 0.6012, "step": 1816 }, { "epoch": 0.29840083755876257, "grad_norm": 0.3693169795087396, "learning_rate": 8.943879945042488e-06, "loss": 0.6028, "step": 1817 }, { "epoch": 0.2985650647671053, "grad_norm": 0.35025691555948874, "learning_rate": 8.943806086874195e-06, "loss": 0.6178, "step": 1818 }, { "epoch": 0.298729291975448, "grad_norm": 0.30773657041387825, "learning_rate": 8.943732180441794e-06, "loss": 0.6097, "step": 1819 }, { "epoch": 0.29889351918379076, "grad_norm": 0.3707759522224546, "learning_rate": 8.94365822574609e-06, "loss": 0.5885, "step": 1820 }, { "epoch": 0.2990577463921335, "grad_norm": 0.35508095599295475, "learning_rate": 8.943584222787888e-06, "loss": 0.6205, "step": 1821 }, { "epoch": 0.29922197360047625, "grad_norm": 0.33789169443655837, "learning_rate": 8.943510171567986e-06, "loss": 0.6159, "step": 1822 }, { "epoch": 0.299386200808819, "grad_norm": 0.37893216238689686, "learning_rate": 8.943436072087195e-06, "loss": 0.6163, "step": 1823 }, { "epoch": 0.29955042801716175, "grad_norm": 0.3149645776504594, "learning_rate": 8.943361924346313e-06, "loss": 0.5986, "step": 1824 }, { "epoch": 0.2997146552255045, "grad_norm": 0.2981620792000357, "learning_rate": 8.943287728346151e-06, "loss": 0.5902, "step": 1825 }, { "epoch": 0.29987888243384725, "grad_norm": 0.3527275097586558, "learning_rate": 8.943213484087512e-06, "loss": 0.6147, "step": 1826 }, { "epoch": 0.30004310964218994, "grad_norm": 0.3273132809209595, "learning_rate": 8.943139191571203e-06, "loss": 0.6007, "step": 1827 }, { "epoch": 0.3002073368505327, "grad_norm": 0.37250810652301003, "learning_rate": 8.943064850798031e-06, "loss": 0.5959, "step": 1828 }, { "epoch": 0.30037156405887544, "grad_norm": 0.33906589441259377, "learning_rate": 8.942990461768805e-06, "loss": 0.5965, "step": 1829 }, { "epoch": 0.3005357912672182, "grad_norm": 0.313977528622849, "learning_rate": 8.94291602448433e-06, "loss": 0.5909, "step": 1830 }, { "epoch": 0.30070001847556094, "grad_norm": 0.35447541373215136, "learning_rate": 8.942841538945415e-06, "loss": 0.5942, "step": 1831 }, { "epoch": 0.3008642456839037, "grad_norm": 0.3184211563502309, "learning_rate": 8.94276700515287e-06, "loss": 0.6229, "step": 1832 }, { "epoch": 0.30102847289224643, "grad_norm": 0.3279512969542947, "learning_rate": 8.942692423107506e-06, "loss": 0.5918, "step": 1833 }, { "epoch": 0.3011927001005892, "grad_norm": 0.3862938159831846, "learning_rate": 8.94261779281013e-06, "loss": 0.631, "step": 1834 }, { "epoch": 0.30135692730893193, "grad_norm": 0.9431248577218457, "learning_rate": 8.942543114261552e-06, "loss": 0.5946, "step": 1835 }, { "epoch": 0.3015211545172746, "grad_norm": 0.37485001496427883, "learning_rate": 8.942468387462588e-06, "loss": 0.6025, "step": 1836 }, { "epoch": 0.3016853817256174, "grad_norm": 0.32292549370149765, "learning_rate": 8.942393612414045e-06, "loss": 0.6107, "step": 1837 }, { "epoch": 0.3018496089339601, "grad_norm": 0.3677915274414714, "learning_rate": 8.942318789116736e-06, "loss": 0.5722, "step": 1838 }, { "epoch": 0.30201383614230287, "grad_norm": 0.31430531461192124, "learning_rate": 8.942243917571474e-06, "loss": 0.5984, "step": 1839 }, { "epoch": 0.3021780633506456, "grad_norm": 0.320090326229002, "learning_rate": 8.942168997779075e-06, "loss": 0.5669, "step": 1840 }, { "epoch": 0.30234229055898837, "grad_norm": 0.4661611655990814, "learning_rate": 8.942094029740347e-06, "loss": 0.5949, "step": 1841 }, { "epoch": 0.3025065177673311, "grad_norm": 0.35595856183511837, "learning_rate": 8.94201901345611e-06, "loss": 0.6392, "step": 1842 }, { "epoch": 0.30267074497567387, "grad_norm": 0.34468323175899396, "learning_rate": 8.941943948927175e-06, "loss": 0.6319, "step": 1843 }, { "epoch": 0.30283497218401656, "grad_norm": 0.3283822883044266, "learning_rate": 8.94186883615436e-06, "loss": 0.633, "step": 1844 }, { "epoch": 0.3029991993923593, "grad_norm": 0.3211390353728499, "learning_rate": 8.941793675138477e-06, "loss": 0.5856, "step": 1845 }, { "epoch": 0.30316342660070206, "grad_norm": 0.38373627857810344, "learning_rate": 8.941718465880344e-06, "loss": 0.6072, "step": 1846 }, { "epoch": 0.3033276538090448, "grad_norm": 0.4349614186299253, "learning_rate": 8.941643208380781e-06, "loss": 0.5782, "step": 1847 }, { "epoch": 0.30349188101738755, "grad_norm": 0.33020652969405023, "learning_rate": 8.941567902640602e-06, "loss": 0.5849, "step": 1848 }, { "epoch": 0.3036561082257303, "grad_norm": 0.3768588711697932, "learning_rate": 8.941492548660625e-06, "loss": 0.5899, "step": 1849 }, { "epoch": 0.30382033543407305, "grad_norm": 0.3160245478293392, "learning_rate": 8.94141714644167e-06, "loss": 0.5968, "step": 1850 }, { "epoch": 0.3039845626424158, "grad_norm": 0.34318955817414776, "learning_rate": 8.941341695984554e-06, "loss": 0.6093, "step": 1851 }, { "epoch": 0.30414878985075855, "grad_norm": 0.44061105391992383, "learning_rate": 8.941266197290098e-06, "loss": 0.5914, "step": 1852 }, { "epoch": 0.30431301705910124, "grad_norm": 0.32870210797503463, "learning_rate": 8.941190650359121e-06, "loss": 0.624, "step": 1853 }, { "epoch": 0.304477244267444, "grad_norm": 0.30637297274135705, "learning_rate": 8.941115055192444e-06, "loss": 0.6058, "step": 1854 }, { "epoch": 0.30464147147578674, "grad_norm": 0.32762818726888737, "learning_rate": 8.941039411790888e-06, "loss": 0.6015, "step": 1855 }, { "epoch": 0.3048056986841295, "grad_norm": 0.3731775719566123, "learning_rate": 8.940963720155276e-06, "loss": 0.603, "step": 1856 }, { "epoch": 0.30496992589247224, "grad_norm": 0.346190637525773, "learning_rate": 8.940887980286428e-06, "loss": 0.5789, "step": 1857 }, { "epoch": 0.305134153100815, "grad_norm": 0.36066973926340856, "learning_rate": 8.940812192185166e-06, "loss": 0.6123, "step": 1858 }, { "epoch": 0.30529838030915774, "grad_norm": 0.34442243351119006, "learning_rate": 8.940736355852316e-06, "loss": 0.5858, "step": 1859 }, { "epoch": 0.3054626075175005, "grad_norm": 0.30796542041483227, "learning_rate": 8.9406604712887e-06, "loss": 0.6035, "step": 1860 }, { "epoch": 0.3056268347258432, "grad_norm": 0.3052034273976577, "learning_rate": 8.94058453849514e-06, "loss": 0.5995, "step": 1861 }, { "epoch": 0.3057910619341859, "grad_norm": 0.3136793732786086, "learning_rate": 8.940508557472466e-06, "loss": 0.6061, "step": 1862 }, { "epoch": 0.3059552891425287, "grad_norm": 0.3088594677892597, "learning_rate": 8.940432528221499e-06, "loss": 0.5707, "step": 1863 }, { "epoch": 0.3061195163508714, "grad_norm": 0.33833389134184366, "learning_rate": 8.940356450743065e-06, "loss": 0.5839, "step": 1864 }, { "epoch": 0.30628374355921417, "grad_norm": 0.3889948728445, "learning_rate": 8.940280325037992e-06, "loss": 0.606, "step": 1865 }, { "epoch": 0.3064479707675569, "grad_norm": 0.30634134506963684, "learning_rate": 8.940204151107106e-06, "loss": 0.5845, "step": 1866 }, { "epoch": 0.30661219797589967, "grad_norm": 0.33450251258079644, "learning_rate": 8.940127928951235e-06, "loss": 0.615, "step": 1867 }, { "epoch": 0.3067764251842424, "grad_norm": 0.33318859011481017, "learning_rate": 8.940051658571205e-06, "loss": 0.5912, "step": 1868 }, { "epoch": 0.30694065239258517, "grad_norm": 0.33002590261509496, "learning_rate": 8.939975339967846e-06, "loss": 0.6006, "step": 1869 }, { "epoch": 0.30710487960092786, "grad_norm": 0.381727301651218, "learning_rate": 8.939898973141987e-06, "loss": 0.5911, "step": 1870 }, { "epoch": 0.3072691068092706, "grad_norm": 0.3523516962800485, "learning_rate": 8.939822558094456e-06, "loss": 0.6086, "step": 1871 }, { "epoch": 0.30743333401761336, "grad_norm": 0.3417375521609287, "learning_rate": 8.939746094826085e-06, "loss": 0.5858, "step": 1872 }, { "epoch": 0.3075975612259561, "grad_norm": 0.3338860766579545, "learning_rate": 8.939669583337703e-06, "loss": 0.61, "step": 1873 }, { "epoch": 0.30776178843429886, "grad_norm": 0.32800363065689425, "learning_rate": 8.93959302363014e-06, "loss": 0.6232, "step": 1874 }, { "epoch": 0.3079260156426416, "grad_norm": 0.3791074578049409, "learning_rate": 8.939516415704231e-06, "loss": 0.6074, "step": 1875 }, { "epoch": 0.30809024285098435, "grad_norm": 0.32136934807801004, "learning_rate": 8.939439759560805e-06, "loss": 0.6395, "step": 1876 }, { "epoch": 0.3082544700593271, "grad_norm": 0.3308724841591282, "learning_rate": 8.939363055200693e-06, "loss": 0.5923, "step": 1877 }, { "epoch": 0.3084186972676698, "grad_norm": 0.3039170961510034, "learning_rate": 8.939286302624734e-06, "loss": 0.5927, "step": 1878 }, { "epoch": 0.30858292447601254, "grad_norm": 0.3139854071425278, "learning_rate": 8.939209501833755e-06, "loss": 0.6029, "step": 1879 }, { "epoch": 0.3087471516843553, "grad_norm": 0.32362328635050663, "learning_rate": 8.939132652828596e-06, "loss": 0.5967, "step": 1880 }, { "epoch": 0.30891137889269804, "grad_norm": 0.30376582477672504, "learning_rate": 8.939055755610087e-06, "loss": 0.6067, "step": 1881 }, { "epoch": 0.3090756061010408, "grad_norm": 0.3088804302395554, "learning_rate": 8.938978810179066e-06, "loss": 0.5968, "step": 1882 }, { "epoch": 0.30923983330938354, "grad_norm": 0.3182885237046062, "learning_rate": 8.938901816536367e-06, "loss": 0.6052, "step": 1883 }, { "epoch": 0.3094040605177263, "grad_norm": 0.334401122678781, "learning_rate": 8.938824774682829e-06, "loss": 0.5915, "step": 1884 }, { "epoch": 0.30956828772606904, "grad_norm": 0.33028895160076666, "learning_rate": 8.938747684619284e-06, "loss": 0.609, "step": 1885 }, { "epoch": 0.3097325149344118, "grad_norm": 0.3116708839296472, "learning_rate": 8.938670546346574e-06, "loss": 0.5787, "step": 1886 }, { "epoch": 0.3098967421427545, "grad_norm": 0.29246076671142657, "learning_rate": 8.938593359865533e-06, "loss": 0.5822, "step": 1887 }, { "epoch": 0.3100609693510972, "grad_norm": 0.3222058679707449, "learning_rate": 8.938516125177002e-06, "loss": 0.6057, "step": 1888 }, { "epoch": 0.31022519655944, "grad_norm": 0.3983985286357778, "learning_rate": 8.938438842281819e-06, "loss": 0.61, "step": 1889 }, { "epoch": 0.3103894237677827, "grad_norm": 0.360508351280119, "learning_rate": 8.938361511180823e-06, "loss": 0.5955, "step": 1890 }, { "epoch": 0.3105536509761255, "grad_norm": 0.333327409913908, "learning_rate": 8.938284131874856e-06, "loss": 0.5741, "step": 1891 }, { "epoch": 0.3107178781844682, "grad_norm": 0.32873570321422285, "learning_rate": 8.938206704364754e-06, "loss": 0.5719, "step": 1892 }, { "epoch": 0.31088210539281097, "grad_norm": 0.33211514766374295, "learning_rate": 8.938129228651361e-06, "loss": 0.5712, "step": 1893 }, { "epoch": 0.3110463326011537, "grad_norm": 0.37887895546631883, "learning_rate": 8.938051704735521e-06, "loss": 0.5768, "step": 1894 }, { "epoch": 0.3112105598094964, "grad_norm": 0.3034162951190907, "learning_rate": 8.93797413261807e-06, "loss": 0.6036, "step": 1895 }, { "epoch": 0.31137478701783916, "grad_norm": 0.2969354168845284, "learning_rate": 8.937896512299854e-06, "loss": 0.5692, "step": 1896 }, { "epoch": 0.3115390142261819, "grad_norm": 0.3212729936642514, "learning_rate": 8.937818843781717e-06, "loss": 0.5753, "step": 1897 }, { "epoch": 0.31170324143452466, "grad_norm": 0.31974316763883187, "learning_rate": 8.9377411270645e-06, "loss": 0.593, "step": 1898 }, { "epoch": 0.3118674686428674, "grad_norm": 0.427602217890422, "learning_rate": 8.937663362149048e-06, "loss": 0.6011, "step": 1899 }, { "epoch": 0.31203169585121016, "grad_norm": 0.32321832135134826, "learning_rate": 8.937585549036207e-06, "loss": 0.5819, "step": 1900 }, { "epoch": 0.3121959230595529, "grad_norm": 0.3977562088177384, "learning_rate": 8.93750768772682e-06, "loss": 0.5982, "step": 1901 }, { "epoch": 0.31236015026789565, "grad_norm": 0.34126358407384527, "learning_rate": 8.937429778221733e-06, "loss": 0.6179, "step": 1902 }, { "epoch": 0.3125243774762384, "grad_norm": 0.34038697596903955, "learning_rate": 8.937351820521793e-06, "loss": 0.5826, "step": 1903 }, { "epoch": 0.3126886046845811, "grad_norm": 0.382868319329483, "learning_rate": 8.937273814627848e-06, "loss": 0.5834, "step": 1904 }, { "epoch": 0.31285283189292384, "grad_norm": 0.32282662203740864, "learning_rate": 8.937195760540742e-06, "loss": 0.5991, "step": 1905 }, { "epoch": 0.3130170591012666, "grad_norm": 0.319661355229016, "learning_rate": 8.937117658261326e-06, "loss": 0.5726, "step": 1906 }, { "epoch": 0.31318128630960934, "grad_norm": 0.34430211300551833, "learning_rate": 8.937039507790446e-06, "loss": 0.5983, "step": 1907 }, { "epoch": 0.3133455135179521, "grad_norm": 0.38993287400775706, "learning_rate": 8.936961309128951e-06, "loss": 0.5928, "step": 1908 }, { "epoch": 0.31350974072629484, "grad_norm": 0.4499590084430486, "learning_rate": 8.93688306227769e-06, "loss": 0.6112, "step": 1909 }, { "epoch": 0.3136739679346376, "grad_norm": 0.35068628407856295, "learning_rate": 8.936804767237515e-06, "loss": 0.582, "step": 1910 }, { "epoch": 0.31383819514298034, "grad_norm": 0.32678889633656605, "learning_rate": 8.936726424009275e-06, "loss": 0.5945, "step": 1911 }, { "epoch": 0.31400242235132303, "grad_norm": 0.3427217699640625, "learning_rate": 8.93664803259382e-06, "loss": 0.6034, "step": 1912 }, { "epoch": 0.3141666495596658, "grad_norm": 0.3750821944895772, "learning_rate": 8.936569592992003e-06, "loss": 0.5906, "step": 1913 }, { "epoch": 0.3143308767680085, "grad_norm": 0.3733784173782264, "learning_rate": 8.936491105204675e-06, "loss": 0.6176, "step": 1914 }, { "epoch": 0.3144951039763513, "grad_norm": 0.3463030952400915, "learning_rate": 8.936412569232689e-06, "loss": 0.5843, "step": 1915 }, { "epoch": 0.314659331184694, "grad_norm": 0.3750041213901857, "learning_rate": 8.936333985076898e-06, "loss": 0.6075, "step": 1916 }, { "epoch": 0.3148235583930368, "grad_norm": 0.3245755348831032, "learning_rate": 8.936255352738155e-06, "loss": 0.5803, "step": 1917 }, { "epoch": 0.3149877856013795, "grad_norm": 0.3287569137043314, "learning_rate": 8.936176672217314e-06, "loss": 0.5633, "step": 1918 }, { "epoch": 0.31515201280972227, "grad_norm": 0.34395092289289697, "learning_rate": 8.936097943515229e-06, "loss": 0.6011, "step": 1919 }, { "epoch": 0.315316240018065, "grad_norm": 0.339337024599154, "learning_rate": 8.936019166632757e-06, "loss": 0.5962, "step": 1920 }, { "epoch": 0.3154804672264077, "grad_norm": 0.38368448835939173, "learning_rate": 8.935940341570752e-06, "loss": 0.5859, "step": 1921 }, { "epoch": 0.31564469443475046, "grad_norm": 0.36731857860519107, "learning_rate": 8.93586146833007e-06, "loss": 0.5962, "step": 1922 }, { "epoch": 0.3158089216430932, "grad_norm": 0.32553923326385664, "learning_rate": 8.935782546911568e-06, "loss": 0.5815, "step": 1923 }, { "epoch": 0.31597314885143596, "grad_norm": 0.32593913183393547, "learning_rate": 8.935703577316105e-06, "loss": 0.5998, "step": 1924 }, { "epoch": 0.3161373760597787, "grad_norm": 0.3369591114430366, "learning_rate": 8.935624559544534e-06, "loss": 0.5987, "step": 1925 }, { "epoch": 0.31630160326812146, "grad_norm": 0.34102164790399514, "learning_rate": 8.935545493597719e-06, "loss": 0.5898, "step": 1926 }, { "epoch": 0.3164658304764642, "grad_norm": 0.44631235074440223, "learning_rate": 8.935466379476515e-06, "loss": 0.6025, "step": 1927 }, { "epoch": 0.31663005768480695, "grad_norm": 0.3522044856839867, "learning_rate": 8.93538721718178e-06, "loss": 0.6083, "step": 1928 }, { "epoch": 0.31679428489314965, "grad_norm": 0.3472773544596263, "learning_rate": 8.93530800671438e-06, "loss": 0.5754, "step": 1929 }, { "epoch": 0.3169585121014924, "grad_norm": 0.4215209510654946, "learning_rate": 8.93522874807517e-06, "loss": 0.6199, "step": 1930 }, { "epoch": 0.31712273930983514, "grad_norm": 0.33209722753968446, "learning_rate": 8.935149441265012e-06, "loss": 0.6037, "step": 1931 }, { "epoch": 0.3172869665181779, "grad_norm": 0.5349339542080866, "learning_rate": 8.935070086284766e-06, "loss": 0.587, "step": 1932 }, { "epoch": 0.31745119372652064, "grad_norm": 0.3410062528635434, "learning_rate": 8.934990683135297e-06, "loss": 0.5932, "step": 1933 }, { "epoch": 0.3176154209348634, "grad_norm": 0.3408786107621934, "learning_rate": 8.934911231817464e-06, "loss": 0.609, "step": 1934 }, { "epoch": 0.31777964814320614, "grad_norm": 0.3219443504322176, "learning_rate": 8.934831732332133e-06, "loss": 0.5974, "step": 1935 }, { "epoch": 0.3179438753515489, "grad_norm": 0.40622542122388705, "learning_rate": 8.934752184680166e-06, "loss": 0.5862, "step": 1936 }, { "epoch": 0.31810810255989164, "grad_norm": 0.32480511259589073, "learning_rate": 8.934672588862426e-06, "loss": 0.564, "step": 1937 }, { "epoch": 0.31827232976823433, "grad_norm": 0.3076637039890817, "learning_rate": 8.93459294487978e-06, "loss": 0.5945, "step": 1938 }, { "epoch": 0.3184365569765771, "grad_norm": 0.33152318749385223, "learning_rate": 8.934513252733091e-06, "loss": 0.5637, "step": 1939 }, { "epoch": 0.3186007841849198, "grad_norm": 0.3318923807049237, "learning_rate": 8.934433512423224e-06, "loss": 0.6095, "step": 1940 }, { "epoch": 0.3187650113932626, "grad_norm": 0.37782491111307404, "learning_rate": 8.934353723951049e-06, "loss": 0.6057, "step": 1941 }, { "epoch": 0.3189292386016053, "grad_norm": 0.36785165188784136, "learning_rate": 8.934273887317427e-06, "loss": 0.6034, "step": 1942 }, { "epoch": 0.3190934658099481, "grad_norm": 0.3129470298856957, "learning_rate": 8.93419400252323e-06, "loss": 0.6043, "step": 1943 }, { "epoch": 0.3192576930182908, "grad_norm": 0.36195379839111386, "learning_rate": 8.934114069569321e-06, "loss": 0.5935, "step": 1944 }, { "epoch": 0.31942192022663357, "grad_norm": 0.34969714763824405, "learning_rate": 8.934034088456573e-06, "loss": 0.582, "step": 1945 }, { "epoch": 0.31958614743497626, "grad_norm": 0.3143958189825889, "learning_rate": 8.93395405918585e-06, "loss": 0.5904, "step": 1946 }, { "epoch": 0.319750374643319, "grad_norm": 0.3676003778377262, "learning_rate": 8.933873981758026e-06, "loss": 0.5981, "step": 1947 }, { "epoch": 0.31991460185166176, "grad_norm": 0.29483469959922937, "learning_rate": 8.933793856173966e-06, "loss": 0.6051, "step": 1948 }, { "epoch": 0.3200788290600045, "grad_norm": 0.32753721487164505, "learning_rate": 8.933713682434545e-06, "loss": 0.5924, "step": 1949 }, { "epoch": 0.32024305626834726, "grad_norm": 0.33750303418011973, "learning_rate": 8.93363346054063e-06, "loss": 0.5933, "step": 1950 }, { "epoch": 0.32040728347669, "grad_norm": 0.31414417859725996, "learning_rate": 8.933553190493092e-06, "loss": 0.5742, "step": 1951 }, { "epoch": 0.32057151068503276, "grad_norm": 0.3123828711273527, "learning_rate": 8.933472872292805e-06, "loss": 0.5843, "step": 1952 }, { "epoch": 0.3207357378933755, "grad_norm": 0.4409976229317403, "learning_rate": 8.933392505940643e-06, "loss": 0.5793, "step": 1953 }, { "epoch": 0.32089996510171825, "grad_norm": 0.30482982633884265, "learning_rate": 8.933312091437473e-06, "loss": 0.5888, "step": 1954 }, { "epoch": 0.32106419231006095, "grad_norm": 0.4277019006668714, "learning_rate": 8.933231628784174e-06, "loss": 0.5929, "step": 1955 }, { "epoch": 0.3212284195184037, "grad_norm": 0.35975629174938006, "learning_rate": 8.933151117981617e-06, "loss": 0.5996, "step": 1956 }, { "epoch": 0.32139264672674644, "grad_norm": 0.33774220319360165, "learning_rate": 8.933070559030678e-06, "loss": 0.6049, "step": 1957 }, { "epoch": 0.3215568739350892, "grad_norm": 0.34493943664636884, "learning_rate": 8.93298995193223e-06, "loss": 0.5892, "step": 1958 }, { "epoch": 0.32172110114343194, "grad_norm": 0.3969135510220126, "learning_rate": 8.93290929668715e-06, "loss": 0.598, "step": 1959 }, { "epoch": 0.3218853283517747, "grad_norm": 0.3308507690751659, "learning_rate": 8.932828593296315e-06, "loss": 0.5872, "step": 1960 }, { "epoch": 0.32204955556011744, "grad_norm": 0.33602909569345507, "learning_rate": 8.9327478417606e-06, "loss": 0.5769, "step": 1961 }, { "epoch": 0.3222137827684602, "grad_norm": 0.3928903161584532, "learning_rate": 8.932667042080881e-06, "loss": 0.5826, "step": 1962 }, { "epoch": 0.3223780099768029, "grad_norm": 0.3503559259230569, "learning_rate": 8.932586194258038e-06, "loss": 0.5986, "step": 1963 }, { "epoch": 0.32254223718514563, "grad_norm": 0.3413850568357385, "learning_rate": 8.932505298292945e-06, "loss": 0.572, "step": 1964 }, { "epoch": 0.3227064643934884, "grad_norm": 0.33703606476691533, "learning_rate": 8.932424354186486e-06, "loss": 0.5834, "step": 1965 }, { "epoch": 0.32287069160183113, "grad_norm": 0.3213463785765971, "learning_rate": 8.932343361939538e-06, "loss": 0.601, "step": 1966 }, { "epoch": 0.3230349188101739, "grad_norm": 0.30437587809999017, "learning_rate": 8.93226232155298e-06, "loss": 0.591, "step": 1967 }, { "epoch": 0.3231991460185166, "grad_norm": 0.44379718365544524, "learning_rate": 8.932181233027692e-06, "loss": 0.573, "step": 1968 }, { "epoch": 0.3233633732268594, "grad_norm": 0.44330142263154393, "learning_rate": 8.932100096364554e-06, "loss": 0.6035, "step": 1969 }, { "epoch": 0.3235276004352021, "grad_norm": 0.3358028448333551, "learning_rate": 8.93201891156445e-06, "loss": 0.5983, "step": 1970 }, { "epoch": 0.32369182764354487, "grad_norm": 0.366694044935094, "learning_rate": 8.931937678628258e-06, "loss": 0.6014, "step": 1971 }, { "epoch": 0.32385605485188756, "grad_norm": 0.31338557422441116, "learning_rate": 8.931856397556865e-06, "loss": 0.5766, "step": 1972 }, { "epoch": 0.3240202820602303, "grad_norm": 0.2903187762457536, "learning_rate": 8.931775068351149e-06, "loss": 0.5789, "step": 1973 }, { "epoch": 0.32418450926857306, "grad_norm": 0.3254260429214987, "learning_rate": 8.931693691011998e-06, "loss": 0.5892, "step": 1974 }, { "epoch": 0.3243487364769158, "grad_norm": 0.3428275678284653, "learning_rate": 8.931612265540291e-06, "loss": 0.5984, "step": 1975 }, { "epoch": 0.32451296368525856, "grad_norm": 0.30397612315020456, "learning_rate": 8.931530791936915e-06, "loss": 0.5831, "step": 1976 }, { "epoch": 0.3246771908936013, "grad_norm": 0.4572089137256494, "learning_rate": 8.931449270202756e-06, "loss": 0.5819, "step": 1977 }, { "epoch": 0.32484141810194406, "grad_norm": 0.36966275950806693, "learning_rate": 8.931367700338696e-06, "loss": 0.5889, "step": 1978 }, { "epoch": 0.3250056453102868, "grad_norm": 0.3415927987872072, "learning_rate": 8.931286082345625e-06, "loss": 0.5923, "step": 1979 }, { "epoch": 0.3251698725186295, "grad_norm": 0.3627702364467653, "learning_rate": 8.931204416224426e-06, "loss": 0.6089, "step": 1980 }, { "epoch": 0.32533409972697225, "grad_norm": 0.340328174457014, "learning_rate": 8.931122701975987e-06, "loss": 0.5923, "step": 1981 }, { "epoch": 0.325498326935315, "grad_norm": 0.32978423355398195, "learning_rate": 8.931040939601196e-06, "loss": 0.5914, "step": 1982 }, { "epoch": 0.32566255414365775, "grad_norm": 0.3543467620738116, "learning_rate": 8.930959129100941e-06, "loss": 0.5821, "step": 1983 }, { "epoch": 0.3258267813520005, "grad_norm": 0.5390093529285701, "learning_rate": 8.930877270476112e-06, "loss": 0.5872, "step": 1984 }, { "epoch": 0.32599100856034324, "grad_norm": 0.3290275508548655, "learning_rate": 8.930795363727595e-06, "loss": 0.588, "step": 1985 }, { "epoch": 0.326155235768686, "grad_norm": 0.31186173084525426, "learning_rate": 8.930713408856281e-06, "loss": 0.6012, "step": 1986 }, { "epoch": 0.32631946297702874, "grad_norm": 0.32941053882022925, "learning_rate": 8.930631405863059e-06, "loss": 0.5816, "step": 1987 }, { "epoch": 0.3264836901853715, "grad_norm": 0.3222654111456884, "learning_rate": 8.930549354748822e-06, "loss": 0.5898, "step": 1988 }, { "epoch": 0.3266479173937142, "grad_norm": 0.3292664467169063, "learning_rate": 8.930467255514461e-06, "loss": 0.5889, "step": 1989 }, { "epoch": 0.32681214460205693, "grad_norm": 0.3743999278918246, "learning_rate": 8.930385108160867e-06, "loss": 0.6068, "step": 1990 }, { "epoch": 0.3269763718103997, "grad_norm": 0.31297732477605195, "learning_rate": 8.93030291268893e-06, "loss": 0.603, "step": 1991 }, { "epoch": 0.32714059901874243, "grad_norm": 0.33774822533754145, "learning_rate": 8.930220669099544e-06, "loss": 0.5755, "step": 1992 }, { "epoch": 0.3273048262270852, "grad_norm": 0.3403085046460094, "learning_rate": 8.930138377393604e-06, "loss": 0.5925, "step": 1993 }, { "epoch": 0.3274690534354279, "grad_norm": 0.48517931840712203, "learning_rate": 8.930056037572002e-06, "loss": 0.5873, "step": 1994 }, { "epoch": 0.3276332806437707, "grad_norm": 0.30851335436470095, "learning_rate": 8.929973649635633e-06, "loss": 0.5701, "step": 1995 }, { "epoch": 0.3277975078521134, "grad_norm": 0.3451118082788753, "learning_rate": 8.929891213585391e-06, "loss": 0.5815, "step": 1996 }, { "epoch": 0.3279617350604561, "grad_norm": 0.3343156174433525, "learning_rate": 8.929808729422172e-06, "loss": 0.5835, "step": 1997 }, { "epoch": 0.32812596226879887, "grad_norm": 0.31129871158266786, "learning_rate": 8.929726197146872e-06, "loss": 0.5938, "step": 1998 }, { "epoch": 0.3282901894771416, "grad_norm": 0.30126632572026746, "learning_rate": 8.92964361676039e-06, "loss": 0.5936, "step": 1999 }, { "epoch": 0.32845441668548436, "grad_norm": 0.5638600322215224, "learning_rate": 8.929560988263617e-06, "loss": 0.5845, "step": 2000 }, { "epoch": 0.3286186438938271, "grad_norm": 0.3554185177476191, "learning_rate": 8.929478311657455e-06, "loss": 0.5588, "step": 2001 }, { "epoch": 0.32878287110216986, "grad_norm": 0.33610749379303184, "learning_rate": 8.9293955869428e-06, "loss": 0.5824, "step": 2002 }, { "epoch": 0.3289470983105126, "grad_norm": 0.8752215708398972, "learning_rate": 8.929312814120551e-06, "loss": 0.6125, "step": 2003 }, { "epoch": 0.32911132551885536, "grad_norm": 0.3846513114226988, "learning_rate": 8.929229993191608e-06, "loss": 0.5778, "step": 2004 }, { "epoch": 0.32927555272719805, "grad_norm": 0.3111081213681579, "learning_rate": 8.929147124156869e-06, "loss": 0.5772, "step": 2005 }, { "epoch": 0.3294397799355408, "grad_norm": 0.3280468513105951, "learning_rate": 8.929064207017233e-06, "loss": 0.5855, "step": 2006 }, { "epoch": 0.32960400714388355, "grad_norm": 0.32684896198790714, "learning_rate": 8.928981241773603e-06, "loss": 0.5722, "step": 2007 }, { "epoch": 0.3297682343522263, "grad_norm": 0.32752500433087867, "learning_rate": 8.92889822842688e-06, "loss": 0.5977, "step": 2008 }, { "epoch": 0.32993246156056905, "grad_norm": 0.3142469749217832, "learning_rate": 8.928815166977964e-06, "loss": 0.5972, "step": 2009 }, { "epoch": 0.3300966887689118, "grad_norm": 0.34216065189885336, "learning_rate": 8.928732057427757e-06, "loss": 0.6017, "step": 2010 }, { "epoch": 0.33026091597725454, "grad_norm": 0.33522533076340355, "learning_rate": 8.928648899777165e-06, "loss": 0.5883, "step": 2011 }, { "epoch": 0.3304251431855973, "grad_norm": 0.4104881167330931, "learning_rate": 8.928565694027086e-06, "loss": 0.5868, "step": 2012 }, { "epoch": 0.33058937039394004, "grad_norm": 0.323178569938277, "learning_rate": 8.928482440178428e-06, "loss": 0.6021, "step": 2013 }, { "epoch": 0.33075359760228273, "grad_norm": 0.3731243392644556, "learning_rate": 8.928399138232094e-06, "loss": 0.577, "step": 2014 }, { "epoch": 0.3309178248106255, "grad_norm": 0.30986603573501476, "learning_rate": 8.928315788188989e-06, "loss": 0.6136, "step": 2015 }, { "epoch": 0.33108205201896823, "grad_norm": 0.33958219563081765, "learning_rate": 8.928232390050015e-06, "loss": 0.5737, "step": 2016 }, { "epoch": 0.331246279227311, "grad_norm": 0.328627168041723, "learning_rate": 8.928148943816084e-06, "loss": 0.5868, "step": 2017 }, { "epoch": 0.33141050643565373, "grad_norm": 0.3176779588770702, "learning_rate": 8.928065449488096e-06, "loss": 0.5769, "step": 2018 }, { "epoch": 0.3315747336439965, "grad_norm": 0.41809285685467873, "learning_rate": 8.927981907066961e-06, "loss": 0.5899, "step": 2019 }, { "epoch": 0.3317389608523392, "grad_norm": 0.36709251595651227, "learning_rate": 8.927898316553586e-06, "loss": 0.604, "step": 2020 }, { "epoch": 0.331903188060682, "grad_norm": 0.3274955175203132, "learning_rate": 8.927814677948879e-06, "loss": 0.6012, "step": 2021 }, { "epoch": 0.33206741526902467, "grad_norm": 0.2832864673661248, "learning_rate": 8.92773099125375e-06, "loss": 0.5942, "step": 2022 }, { "epoch": 0.3322316424773674, "grad_norm": 0.30603924362675944, "learning_rate": 8.927647256469104e-06, "loss": 0.5925, "step": 2023 }, { "epoch": 0.33239586968571017, "grad_norm": 0.33180800499144236, "learning_rate": 8.927563473595853e-06, "loss": 0.5823, "step": 2024 }, { "epoch": 0.3325600968940529, "grad_norm": 0.31278550416452017, "learning_rate": 8.927479642634906e-06, "loss": 0.5746, "step": 2025 }, { "epoch": 0.33272432410239566, "grad_norm": 0.6370773833935756, "learning_rate": 8.927395763587175e-06, "loss": 0.5944, "step": 2026 }, { "epoch": 0.3328885513107384, "grad_norm": 0.3793789597731592, "learning_rate": 8.927311836453569e-06, "loss": 0.6057, "step": 2027 }, { "epoch": 0.33305277851908116, "grad_norm": 0.32422037386343944, "learning_rate": 8.927227861235002e-06, "loss": 0.5886, "step": 2028 }, { "epoch": 0.3332170057274239, "grad_norm": 0.41822762137690095, "learning_rate": 8.927143837932384e-06, "loss": 0.5775, "step": 2029 }, { "epoch": 0.33338123293576666, "grad_norm": 0.3339592815475642, "learning_rate": 8.927059766546627e-06, "loss": 0.5943, "step": 2030 }, { "epoch": 0.33354546014410935, "grad_norm": 0.4645135259033762, "learning_rate": 8.926975647078648e-06, "loss": 0.6036, "step": 2031 }, { "epoch": 0.3337096873524521, "grad_norm": 0.30158131377366604, "learning_rate": 8.926891479529356e-06, "loss": 0.5819, "step": 2032 }, { "epoch": 0.33387391456079485, "grad_norm": 0.46237162147584415, "learning_rate": 8.926807263899665e-06, "loss": 0.5877, "step": 2033 }, { "epoch": 0.3340381417691376, "grad_norm": 0.3169681472015116, "learning_rate": 8.926723000190496e-06, "loss": 0.59, "step": 2034 }, { "epoch": 0.33420236897748035, "grad_norm": 0.3515972769278677, "learning_rate": 8.926638688402759e-06, "loss": 0.5732, "step": 2035 }, { "epoch": 0.3343665961858231, "grad_norm": 0.32805529347869533, "learning_rate": 8.926554328537368e-06, "loss": 0.5825, "step": 2036 }, { "epoch": 0.33453082339416584, "grad_norm": 0.32908781830163236, "learning_rate": 8.926469920595243e-06, "loss": 0.5955, "step": 2037 }, { "epoch": 0.3346950506025086, "grad_norm": 0.8047963527089921, "learning_rate": 8.9263854645773e-06, "loss": 0.5999, "step": 2038 }, { "epoch": 0.3348592778108513, "grad_norm": 0.2904460170060317, "learning_rate": 8.926300960484457e-06, "loss": 0.585, "step": 2039 }, { "epoch": 0.33502350501919403, "grad_norm": 0.32512733927564513, "learning_rate": 8.92621640831763e-06, "loss": 0.6103, "step": 2040 }, { "epoch": 0.3351877322275368, "grad_norm": 0.620086893708396, "learning_rate": 8.926131808077737e-06, "loss": 0.613, "step": 2041 }, { "epoch": 0.33535195943587953, "grad_norm": 0.3439753681608022, "learning_rate": 8.926047159765699e-06, "loss": 0.5954, "step": 2042 }, { "epoch": 0.3355161866442223, "grad_norm": 0.30302759955620057, "learning_rate": 8.925962463382433e-06, "loss": 0.588, "step": 2043 }, { "epoch": 0.33568041385256503, "grad_norm": 0.34048898804704547, "learning_rate": 8.92587771892886e-06, "loss": 0.6006, "step": 2044 }, { "epoch": 0.3358446410609078, "grad_norm": 0.35733144461693095, "learning_rate": 8.925792926405903e-06, "loss": 0.5708, "step": 2045 }, { "epoch": 0.3360088682692505, "grad_norm": 0.3100918297713711, "learning_rate": 8.925708085814478e-06, "loss": 0.579, "step": 2046 }, { "epoch": 0.3361730954775933, "grad_norm": 0.31010557216776774, "learning_rate": 8.925623197155508e-06, "loss": 0.6043, "step": 2047 }, { "epoch": 0.33633732268593597, "grad_norm": 0.3207717425849682, "learning_rate": 8.925538260429919e-06, "loss": 0.5791, "step": 2048 }, { "epoch": 0.3365015498942787, "grad_norm": 0.46303927724673305, "learning_rate": 8.925453275638628e-06, "loss": 0.5981, "step": 2049 }, { "epoch": 0.33666577710262147, "grad_norm": 0.32519157260027265, "learning_rate": 8.925368242782562e-06, "loss": 0.5773, "step": 2050 }, { "epoch": 0.3368300043109642, "grad_norm": 0.30398409620669065, "learning_rate": 8.925283161862642e-06, "loss": 0.5894, "step": 2051 }, { "epoch": 0.33699423151930696, "grad_norm": 0.2974031839313766, "learning_rate": 8.925198032879793e-06, "loss": 0.5957, "step": 2052 }, { "epoch": 0.3371584587276497, "grad_norm": 0.3404861792754053, "learning_rate": 8.925112855834939e-06, "loss": 0.5979, "step": 2053 }, { "epoch": 0.33732268593599246, "grad_norm": 0.3542777008958305, "learning_rate": 8.925027630729007e-06, "loss": 0.5731, "step": 2054 }, { "epoch": 0.3374869131443352, "grad_norm": 0.29838686215896404, "learning_rate": 8.92494235756292e-06, "loss": 0.5914, "step": 2055 }, { "epoch": 0.3376511403526779, "grad_norm": 0.30489826458776687, "learning_rate": 8.924857036337606e-06, "loss": 0.5885, "step": 2056 }, { "epoch": 0.33781536756102065, "grad_norm": 0.34373941171524325, "learning_rate": 8.924771667053991e-06, "loss": 0.5691, "step": 2057 }, { "epoch": 0.3379795947693634, "grad_norm": 0.3235745424873555, "learning_rate": 8.924686249713002e-06, "loss": 0.5791, "step": 2058 }, { "epoch": 0.33814382197770615, "grad_norm": 0.45911687221923925, "learning_rate": 8.924600784315568e-06, "loss": 0.6007, "step": 2059 }, { "epoch": 0.3383080491860489, "grad_norm": 0.3303513848889647, "learning_rate": 8.924515270862615e-06, "loss": 0.5877, "step": 2060 }, { "epoch": 0.33847227639439165, "grad_norm": 0.3146634573861978, "learning_rate": 8.924429709355075e-06, "loss": 0.5985, "step": 2061 }, { "epoch": 0.3386365036027344, "grad_norm": 0.35605464455064945, "learning_rate": 8.924344099793873e-06, "loss": 0.5658, "step": 2062 }, { "epoch": 0.33880073081107714, "grad_norm": 0.3348179585377794, "learning_rate": 8.924258442179942e-06, "loss": 0.6065, "step": 2063 }, { "epoch": 0.3389649580194199, "grad_norm": 0.3194465381879545, "learning_rate": 8.924172736514213e-06, "loss": 0.5856, "step": 2064 }, { "epoch": 0.3391291852277626, "grad_norm": 0.36449000613032345, "learning_rate": 8.924086982797612e-06, "loss": 0.5774, "step": 2065 }, { "epoch": 0.33929341243610533, "grad_norm": 0.3880607217082634, "learning_rate": 8.924001181031077e-06, "loss": 0.566, "step": 2066 }, { "epoch": 0.3394576396444481, "grad_norm": 0.287273007995822, "learning_rate": 8.923915331215534e-06, "loss": 0.5615, "step": 2067 }, { "epoch": 0.33962186685279083, "grad_norm": 0.3312610820471845, "learning_rate": 8.923829433351919e-06, "loss": 0.5675, "step": 2068 }, { "epoch": 0.3397860940611336, "grad_norm": 0.35310045879408936, "learning_rate": 8.923743487441164e-06, "loss": 0.5948, "step": 2069 }, { "epoch": 0.33995032126947633, "grad_norm": 0.33574837233858923, "learning_rate": 8.923657493484203e-06, "loss": 0.5896, "step": 2070 }, { "epoch": 0.3401145484778191, "grad_norm": 0.29104720814747725, "learning_rate": 8.923571451481967e-06, "loss": 0.5728, "step": 2071 }, { "epoch": 0.3402787756861618, "grad_norm": 0.3733366968858962, "learning_rate": 8.923485361435397e-06, "loss": 0.5897, "step": 2072 }, { "epoch": 0.3404430028945045, "grad_norm": 0.31009524646895975, "learning_rate": 8.92339922334542e-06, "loss": 0.5529, "step": 2073 }, { "epoch": 0.34060723010284727, "grad_norm": 0.2956517683672337, "learning_rate": 8.923313037212977e-06, "loss": 0.6152, "step": 2074 }, { "epoch": 0.34077145731119, "grad_norm": 0.3332125958408152, "learning_rate": 8.923226803039e-06, "loss": 0.6156, "step": 2075 }, { "epoch": 0.34093568451953277, "grad_norm": 0.3180632021721819, "learning_rate": 8.923140520824432e-06, "loss": 0.583, "step": 2076 }, { "epoch": 0.3410999117278755, "grad_norm": 0.35586058964375455, "learning_rate": 8.923054190570204e-06, "loss": 0.5819, "step": 2077 }, { "epoch": 0.34126413893621826, "grad_norm": 0.3998068688551633, "learning_rate": 8.922967812277256e-06, "loss": 0.5787, "step": 2078 }, { "epoch": 0.341428366144561, "grad_norm": 0.30357534998154256, "learning_rate": 8.922881385946526e-06, "loss": 0.5593, "step": 2079 }, { "epoch": 0.34159259335290376, "grad_norm": 0.5561696146995321, "learning_rate": 8.922794911578954e-06, "loss": 0.5973, "step": 2080 }, { "epoch": 0.3417568205612465, "grad_norm": 0.40185317262477926, "learning_rate": 8.922708389175476e-06, "loss": 0.5947, "step": 2081 }, { "epoch": 0.3419210477695892, "grad_norm": 0.30390626963582257, "learning_rate": 8.922621818737033e-06, "loss": 0.5678, "step": 2082 }, { "epoch": 0.34208527497793195, "grad_norm": 0.3413233665139855, "learning_rate": 8.922535200264568e-06, "loss": 0.5982, "step": 2083 }, { "epoch": 0.3422495021862747, "grad_norm": 0.28579448814830777, "learning_rate": 8.922448533759017e-06, "loss": 0.5848, "step": 2084 }, { "epoch": 0.34241372939461745, "grad_norm": 0.3204345352536137, "learning_rate": 8.922361819221326e-06, "loss": 0.5757, "step": 2085 }, { "epoch": 0.3425779566029602, "grad_norm": 0.3142059988489119, "learning_rate": 8.922275056652434e-06, "loss": 0.5934, "step": 2086 }, { "epoch": 0.34274218381130295, "grad_norm": 0.3307567711086455, "learning_rate": 8.922188246053284e-06, "loss": 0.5934, "step": 2087 }, { "epoch": 0.3429064110196457, "grad_norm": 0.31046467756377955, "learning_rate": 8.922101387424818e-06, "loss": 0.5882, "step": 2088 }, { "epoch": 0.34307063822798844, "grad_norm": 0.3820278829809434, "learning_rate": 8.922014480767981e-06, "loss": 0.5954, "step": 2089 }, { "epoch": 0.34323486543633114, "grad_norm": 0.335264516354752, "learning_rate": 8.921927526083716e-06, "loss": 0.5987, "step": 2090 }, { "epoch": 0.3433990926446739, "grad_norm": 0.35109540453615623, "learning_rate": 8.921840523372967e-06, "loss": 0.5886, "step": 2091 }, { "epoch": 0.34356331985301664, "grad_norm": 0.3404371948804148, "learning_rate": 8.92175347263668e-06, "loss": 0.612, "step": 2092 }, { "epoch": 0.3437275470613594, "grad_norm": 0.3226414596776337, "learning_rate": 8.9216663738758e-06, "loss": 0.5992, "step": 2093 }, { "epoch": 0.34389177426970213, "grad_norm": 0.3638859412166717, "learning_rate": 8.921579227091272e-06, "loss": 0.5853, "step": 2094 }, { "epoch": 0.3440560014780449, "grad_norm": 0.29899585682019175, "learning_rate": 8.921492032284043e-06, "loss": 0.5919, "step": 2095 }, { "epoch": 0.34422022868638763, "grad_norm": 0.3039192178553265, "learning_rate": 8.921404789455061e-06, "loss": 0.5763, "step": 2096 }, { "epoch": 0.3443844558947304, "grad_norm": 0.3465925369057025, "learning_rate": 8.921317498605274e-06, "loss": 0.593, "step": 2097 }, { "epoch": 0.34454868310307313, "grad_norm": 0.3332403460427041, "learning_rate": 8.921230159735627e-06, "loss": 0.6263, "step": 2098 }, { "epoch": 0.3447129103114158, "grad_norm": 0.3097195369061601, "learning_rate": 8.921142772847073e-06, "loss": 0.5772, "step": 2099 }, { "epoch": 0.34487713751975857, "grad_norm": 0.2961247616477229, "learning_rate": 8.921055337940556e-06, "loss": 0.5517, "step": 2100 }, { "epoch": 0.3450413647281013, "grad_norm": 0.3645824498707989, "learning_rate": 8.92096785501703e-06, "loss": 0.568, "step": 2101 }, { "epoch": 0.34520559193644407, "grad_norm": 0.3490365788059674, "learning_rate": 8.920880324077443e-06, "loss": 0.5953, "step": 2102 }, { "epoch": 0.3453698191447868, "grad_norm": 0.3786183967790828, "learning_rate": 8.920792745122747e-06, "loss": 0.6042, "step": 2103 }, { "epoch": 0.34553404635312956, "grad_norm": 0.3290400716459685, "learning_rate": 8.92070511815389e-06, "loss": 0.5824, "step": 2104 }, { "epoch": 0.3456982735614723, "grad_norm": 0.34834661911635995, "learning_rate": 8.920617443171828e-06, "loss": 0.5908, "step": 2105 }, { "epoch": 0.34586250076981506, "grad_norm": 0.37268445468777833, "learning_rate": 8.920529720177512e-06, "loss": 0.5783, "step": 2106 }, { "epoch": 0.34602672797815776, "grad_norm": 0.31664534703004893, "learning_rate": 8.92044194917189e-06, "loss": 0.5798, "step": 2107 }, { "epoch": 0.3461909551865005, "grad_norm": 0.3582045293008781, "learning_rate": 8.920354130155924e-06, "loss": 0.5913, "step": 2108 }, { "epoch": 0.34635518239484325, "grad_norm": 0.5543690484261287, "learning_rate": 8.92026626313056e-06, "loss": 0.5881, "step": 2109 }, { "epoch": 0.346519409603186, "grad_norm": 0.3208990175255881, "learning_rate": 8.920178348096756e-06, "loss": 0.5996, "step": 2110 }, { "epoch": 0.34668363681152875, "grad_norm": 0.3016388152725111, "learning_rate": 8.920090385055468e-06, "loss": 0.6002, "step": 2111 }, { "epoch": 0.3468478640198715, "grad_norm": 0.3602146787451213, "learning_rate": 8.920002374007648e-06, "loss": 0.5964, "step": 2112 }, { "epoch": 0.34701209122821425, "grad_norm": 0.35331652465559116, "learning_rate": 8.919914314954255e-06, "loss": 0.5855, "step": 2113 }, { "epoch": 0.347176318436557, "grad_norm": 0.30569388964646355, "learning_rate": 8.919826207896243e-06, "loss": 0.6103, "step": 2114 }, { "epoch": 0.34734054564489975, "grad_norm": 0.3339711679529885, "learning_rate": 8.919738052834569e-06, "loss": 0.5792, "step": 2115 }, { "epoch": 0.34750477285324244, "grad_norm": 0.31919676404834385, "learning_rate": 8.919649849770193e-06, "loss": 0.5673, "step": 2116 }, { "epoch": 0.3476690000615852, "grad_norm": 0.3503739553239841, "learning_rate": 8.91956159870407e-06, "loss": 0.586, "step": 2117 }, { "epoch": 0.34783322726992794, "grad_norm": 0.3263957410349147, "learning_rate": 8.919473299637159e-06, "loss": 0.5933, "step": 2118 }, { "epoch": 0.3479974544782707, "grad_norm": 0.3206121735433827, "learning_rate": 8.919384952570423e-06, "loss": 0.5619, "step": 2119 }, { "epoch": 0.34816168168661343, "grad_norm": 0.3672111356514065, "learning_rate": 8.919296557504816e-06, "loss": 0.5986, "step": 2120 }, { "epoch": 0.3483259088949562, "grad_norm": 0.31992702542573537, "learning_rate": 8.9192081144413e-06, "loss": 0.5877, "step": 2121 }, { "epoch": 0.34849013610329893, "grad_norm": 0.5187842764216102, "learning_rate": 8.919119623380837e-06, "loss": 0.5838, "step": 2122 }, { "epoch": 0.3486543633116417, "grad_norm": 0.28609000136348256, "learning_rate": 8.919031084324387e-06, "loss": 0.5767, "step": 2123 }, { "epoch": 0.3488185905199844, "grad_norm": 0.3176274746766672, "learning_rate": 8.918942497272911e-06, "loss": 0.5679, "step": 2124 }, { "epoch": 0.3489828177283271, "grad_norm": 0.4791596535673938, "learning_rate": 8.918853862227372e-06, "loss": 0.5807, "step": 2125 }, { "epoch": 0.34914704493666987, "grad_norm": 0.3121921995990465, "learning_rate": 8.918765179188733e-06, "loss": 0.5492, "step": 2126 }, { "epoch": 0.3493112721450126, "grad_norm": 0.34081370981264214, "learning_rate": 8.918676448157957e-06, "loss": 0.5876, "step": 2127 }, { "epoch": 0.34947549935335537, "grad_norm": 0.36611718567205803, "learning_rate": 8.918587669136007e-06, "loss": 0.5729, "step": 2128 }, { "epoch": 0.3496397265616981, "grad_norm": 0.3122800636082036, "learning_rate": 8.918498842123846e-06, "loss": 0.568, "step": 2129 }, { "epoch": 0.34980395377004087, "grad_norm": 0.33777800341019615, "learning_rate": 8.918409967122443e-06, "loss": 0.5903, "step": 2130 }, { "epoch": 0.3499681809783836, "grad_norm": 0.3359295272643457, "learning_rate": 8.91832104413276e-06, "loss": 0.5659, "step": 2131 }, { "epoch": 0.35013240818672636, "grad_norm": 0.3429887840973727, "learning_rate": 8.918232073155762e-06, "loss": 0.581, "step": 2132 }, { "epoch": 0.35029663539506906, "grad_norm": 0.33042086580264834, "learning_rate": 8.918143054192417e-06, "loss": 0.592, "step": 2133 }, { "epoch": 0.3504608626034118, "grad_norm": 0.3318582273308789, "learning_rate": 8.918053987243692e-06, "loss": 0.5953, "step": 2134 }, { "epoch": 0.35062508981175455, "grad_norm": 0.35486222754880553, "learning_rate": 8.917964872310555e-06, "loss": 0.5634, "step": 2135 }, { "epoch": 0.3507893170200973, "grad_norm": 0.2850475030435862, "learning_rate": 8.91787570939397e-06, "loss": 0.5726, "step": 2136 }, { "epoch": 0.35095354422844005, "grad_norm": 0.47896412101886765, "learning_rate": 8.917786498494912e-06, "loss": 0.5956, "step": 2137 }, { "epoch": 0.3511177714367828, "grad_norm": 0.3116157303094224, "learning_rate": 8.917697239614343e-06, "loss": 0.5734, "step": 2138 }, { "epoch": 0.35128199864512555, "grad_norm": 0.30719351202702805, "learning_rate": 8.917607932753237e-06, "loss": 0.5558, "step": 2139 }, { "epoch": 0.3514462258534683, "grad_norm": 0.3747383902637673, "learning_rate": 8.917518577912562e-06, "loss": 0.5724, "step": 2140 }, { "epoch": 0.351610453061811, "grad_norm": 0.33882358226497106, "learning_rate": 8.91742917509329e-06, "loss": 0.5962, "step": 2141 }, { "epoch": 0.35177468027015374, "grad_norm": 0.36082268139826695, "learning_rate": 8.917339724296391e-06, "loss": 0.5844, "step": 2142 }, { "epoch": 0.3519389074784965, "grad_norm": 0.30132427096962017, "learning_rate": 8.917250225522834e-06, "loss": 0.5746, "step": 2143 }, { "epoch": 0.35210313468683924, "grad_norm": 0.3129525632905135, "learning_rate": 8.917160678773597e-06, "loss": 0.5752, "step": 2144 }, { "epoch": 0.352267361895182, "grad_norm": 0.3219843814868087, "learning_rate": 8.917071084049647e-06, "loss": 0.5968, "step": 2145 }, { "epoch": 0.35243158910352473, "grad_norm": 0.3258109865865092, "learning_rate": 8.91698144135196e-06, "loss": 0.6133, "step": 2146 }, { "epoch": 0.3525958163118675, "grad_norm": 0.3090823359585868, "learning_rate": 8.916891750681508e-06, "loss": 0.5866, "step": 2147 }, { "epoch": 0.35276004352021023, "grad_norm": 0.32487573794202323, "learning_rate": 8.916802012039267e-06, "loss": 0.5595, "step": 2148 }, { "epoch": 0.352924270728553, "grad_norm": 0.31033278772936684, "learning_rate": 8.916712225426208e-06, "loss": 0.5872, "step": 2149 }, { "epoch": 0.3530884979368957, "grad_norm": 0.2833943755067742, "learning_rate": 8.916622390843312e-06, "loss": 0.5687, "step": 2150 }, { "epoch": 0.3532527251452384, "grad_norm": 0.33627350008595963, "learning_rate": 8.916532508291549e-06, "loss": 0.5878, "step": 2151 }, { "epoch": 0.35341695235358117, "grad_norm": 0.3469802742069985, "learning_rate": 8.9164425777719e-06, "loss": 0.6043, "step": 2152 }, { "epoch": 0.3535811795619239, "grad_norm": 0.31575761311555894, "learning_rate": 8.916352599285338e-06, "loss": 0.5815, "step": 2153 }, { "epoch": 0.35374540677026667, "grad_norm": 0.3045497966721819, "learning_rate": 8.916262572832842e-06, "loss": 0.5663, "step": 2154 }, { "epoch": 0.3539096339786094, "grad_norm": 0.31121959786989556, "learning_rate": 8.916172498415389e-06, "loss": 0.5729, "step": 2155 }, { "epoch": 0.35407386118695217, "grad_norm": 0.3627412802197949, "learning_rate": 8.916082376033958e-06, "loss": 0.5644, "step": 2156 }, { "epoch": 0.3542380883952949, "grad_norm": 0.31693514999285666, "learning_rate": 8.915992205689529e-06, "loss": 0.5936, "step": 2157 }, { "epoch": 0.3544023156036376, "grad_norm": 0.3855064420166456, "learning_rate": 8.915901987383078e-06, "loss": 0.5771, "step": 2158 }, { "epoch": 0.35456654281198036, "grad_norm": 0.32831398862385386, "learning_rate": 8.915811721115588e-06, "loss": 0.6016, "step": 2159 }, { "epoch": 0.3547307700203231, "grad_norm": 0.32057668168581144, "learning_rate": 8.915721406888037e-06, "loss": 0.582, "step": 2160 }, { "epoch": 0.35489499722866585, "grad_norm": 0.3029317139589128, "learning_rate": 8.915631044701408e-06, "loss": 0.5853, "step": 2161 }, { "epoch": 0.3550592244370086, "grad_norm": 0.3683147589988735, "learning_rate": 8.915540634556681e-06, "loss": 0.5618, "step": 2162 }, { "epoch": 0.35522345164535135, "grad_norm": 0.32250564191378006, "learning_rate": 8.915450176454838e-06, "loss": 0.5949, "step": 2163 }, { "epoch": 0.3553876788536941, "grad_norm": 0.34769282222308573, "learning_rate": 8.915359670396863e-06, "loss": 0.6056, "step": 2164 }, { "epoch": 0.35555190606203685, "grad_norm": 0.32794507485016755, "learning_rate": 8.915269116383736e-06, "loss": 0.5667, "step": 2165 }, { "epoch": 0.3557161332703796, "grad_norm": 0.3592878992842856, "learning_rate": 8.915178514416443e-06, "loss": 0.5734, "step": 2166 }, { "epoch": 0.3558803604787223, "grad_norm": 0.31418949050424083, "learning_rate": 8.91508786449597e-06, "loss": 0.5647, "step": 2167 }, { "epoch": 0.35604458768706504, "grad_norm": 0.41684345813587714, "learning_rate": 8.914997166623295e-06, "loss": 0.5661, "step": 2168 }, { "epoch": 0.3562088148954078, "grad_norm": 0.33388499752938794, "learning_rate": 8.91490642079941e-06, "loss": 0.5724, "step": 2169 }, { "epoch": 0.35637304210375054, "grad_norm": 0.30404975902522224, "learning_rate": 8.914815627025295e-06, "loss": 0.5754, "step": 2170 }, { "epoch": 0.3565372693120933, "grad_norm": 0.3150577755264073, "learning_rate": 8.914724785301942e-06, "loss": 0.5769, "step": 2171 }, { "epoch": 0.35670149652043603, "grad_norm": 0.3201787714157396, "learning_rate": 8.91463389563033e-06, "loss": 0.6045, "step": 2172 }, { "epoch": 0.3568657237287788, "grad_norm": 0.32275307016076826, "learning_rate": 8.914542958011454e-06, "loss": 0.5873, "step": 2173 }, { "epoch": 0.35702995093712153, "grad_norm": 0.29979459104094425, "learning_rate": 8.914451972446297e-06, "loss": 0.5786, "step": 2174 }, { "epoch": 0.3571941781454642, "grad_norm": 0.625963371230108, "learning_rate": 8.914360938935849e-06, "loss": 0.5633, "step": 2175 }, { "epoch": 0.357358405353807, "grad_norm": 0.3087502313684936, "learning_rate": 8.914269857481098e-06, "loss": 0.5823, "step": 2176 }, { "epoch": 0.3575226325621497, "grad_norm": 0.32683326534937834, "learning_rate": 8.914178728083031e-06, "loss": 0.5778, "step": 2177 }, { "epoch": 0.35768685977049247, "grad_norm": 0.7880735018196897, "learning_rate": 8.914087550742643e-06, "loss": 0.5851, "step": 2178 }, { "epoch": 0.3578510869788352, "grad_norm": 0.3278097326230743, "learning_rate": 8.91399632546092e-06, "loss": 0.5914, "step": 2179 }, { "epoch": 0.35801531418717797, "grad_norm": 0.35430290321922725, "learning_rate": 8.913905052238854e-06, "loss": 0.6058, "step": 2180 }, { "epoch": 0.3581795413955207, "grad_norm": 1.0396310534287811, "learning_rate": 8.913813731077437e-06, "loss": 0.583, "step": 2181 }, { "epoch": 0.35834376860386347, "grad_norm": 0.3060473871714537, "learning_rate": 8.91372236197766e-06, "loss": 0.5871, "step": 2182 }, { "epoch": 0.3585079958122062, "grad_norm": 0.33881404244223196, "learning_rate": 8.913630944940516e-06, "loss": 0.5872, "step": 2183 }, { "epoch": 0.3586722230205489, "grad_norm": 0.2973344128005135, "learning_rate": 8.913539479966997e-06, "loss": 0.5696, "step": 2184 }, { "epoch": 0.35883645022889166, "grad_norm": 0.35184436201426394, "learning_rate": 8.913447967058097e-06, "loss": 0.5965, "step": 2185 }, { "epoch": 0.3590006774372344, "grad_norm": 0.3020841641575663, "learning_rate": 8.913356406214809e-06, "loss": 0.562, "step": 2186 }, { "epoch": 0.35916490464557715, "grad_norm": 0.4027338067146939, "learning_rate": 8.91326479743813e-06, "loss": 0.5637, "step": 2187 }, { "epoch": 0.3593291318539199, "grad_norm": 0.297962563186807, "learning_rate": 8.913173140729051e-06, "loss": 0.584, "step": 2188 }, { "epoch": 0.35949335906226265, "grad_norm": 0.49567057600017395, "learning_rate": 8.913081436088573e-06, "loss": 0.5698, "step": 2189 }, { "epoch": 0.3596575862706054, "grad_norm": 0.3139466119859544, "learning_rate": 8.912989683517686e-06, "loss": 0.581, "step": 2190 }, { "epoch": 0.35982181347894815, "grad_norm": 0.3222467409727742, "learning_rate": 8.91289788301739e-06, "loss": 0.5692, "step": 2191 }, { "epoch": 0.35998604068729084, "grad_norm": 0.3132899288065772, "learning_rate": 8.912806034588682e-06, "loss": 0.5716, "step": 2192 }, { "epoch": 0.3601502678956336, "grad_norm": 0.311666768458647, "learning_rate": 8.912714138232558e-06, "loss": 0.5707, "step": 2193 }, { "epoch": 0.36031449510397634, "grad_norm": 0.318265739153915, "learning_rate": 8.912622193950016e-06, "loss": 0.6014, "step": 2194 }, { "epoch": 0.3604787223123191, "grad_norm": 0.33405275857775724, "learning_rate": 8.912530201742057e-06, "loss": 0.5796, "step": 2195 }, { "epoch": 0.36064294952066184, "grad_norm": 0.32054434013299965, "learning_rate": 8.912438161609678e-06, "loss": 0.5737, "step": 2196 }, { "epoch": 0.3608071767290046, "grad_norm": 0.4906329903361965, "learning_rate": 8.912346073553882e-06, "loss": 0.5959, "step": 2197 }, { "epoch": 0.36097140393734733, "grad_norm": 0.6300646881591123, "learning_rate": 8.912253937575663e-06, "loss": 0.5996, "step": 2198 }, { "epoch": 0.3611356311456901, "grad_norm": 0.32082175087580206, "learning_rate": 8.912161753676026e-06, "loss": 0.5648, "step": 2199 }, { "epoch": 0.3612998583540328, "grad_norm": 0.3143112057835378, "learning_rate": 8.912069521855971e-06, "loss": 0.5699, "step": 2200 }, { "epoch": 0.3614640855623755, "grad_norm": 0.3295077023756852, "learning_rate": 8.911977242116502e-06, "loss": 0.5756, "step": 2201 }, { "epoch": 0.3616283127707183, "grad_norm": 0.33517369326913704, "learning_rate": 8.911884914458618e-06, "loss": 0.5851, "step": 2202 }, { "epoch": 0.361792539979061, "grad_norm": 0.33051215135799833, "learning_rate": 8.911792538883323e-06, "loss": 0.5722, "step": 2203 }, { "epoch": 0.36195676718740377, "grad_norm": 0.43832424126361785, "learning_rate": 8.91170011539162e-06, "loss": 0.5514, "step": 2204 }, { "epoch": 0.3621209943957465, "grad_norm": 0.37071372638553013, "learning_rate": 8.911607643984513e-06, "loss": 0.5964, "step": 2205 }, { "epoch": 0.36228522160408927, "grad_norm": 0.4086726046467989, "learning_rate": 8.91151512466301e-06, "loss": 0.5983, "step": 2206 }, { "epoch": 0.362449448812432, "grad_norm": 0.3148352832312435, "learning_rate": 8.91142255742811e-06, "loss": 0.5952, "step": 2207 }, { "epoch": 0.36261367602077477, "grad_norm": 0.30697433895023185, "learning_rate": 8.91132994228082e-06, "loss": 0.5793, "step": 2208 }, { "epoch": 0.36277790322911746, "grad_norm": 0.3825985510743253, "learning_rate": 8.911237279222148e-06, "loss": 0.5774, "step": 2209 }, { "epoch": 0.3629421304374602, "grad_norm": 0.34276141854757924, "learning_rate": 8.911144568253097e-06, "loss": 0.5758, "step": 2210 }, { "epoch": 0.36310635764580296, "grad_norm": 0.3142215471342378, "learning_rate": 8.911051809374677e-06, "loss": 0.5792, "step": 2211 }, { "epoch": 0.3632705848541457, "grad_norm": 0.30903065304902716, "learning_rate": 8.910959002587895e-06, "loss": 0.5866, "step": 2212 }, { "epoch": 0.36343481206248845, "grad_norm": 0.31135335714393814, "learning_rate": 8.910866147893758e-06, "loss": 0.5868, "step": 2213 }, { "epoch": 0.3635990392708312, "grad_norm": 0.3764236674444635, "learning_rate": 8.910773245293275e-06, "loss": 0.5889, "step": 2214 }, { "epoch": 0.36376326647917395, "grad_norm": 0.40469774842863443, "learning_rate": 8.910680294787455e-06, "loss": 0.566, "step": 2215 }, { "epoch": 0.3639274936875167, "grad_norm": 0.3145755387386865, "learning_rate": 8.910587296377308e-06, "loss": 0.5788, "step": 2216 }, { "epoch": 0.3640917208958594, "grad_norm": 0.35885193000909843, "learning_rate": 8.910494250063844e-06, "loss": 0.5677, "step": 2217 }, { "epoch": 0.36425594810420214, "grad_norm": 0.3651850799070969, "learning_rate": 8.910401155848072e-06, "loss": 0.5735, "step": 2218 }, { "epoch": 0.3644201753125449, "grad_norm": 0.29711733435878707, "learning_rate": 8.910308013731004e-06, "loss": 0.5525, "step": 2219 }, { "epoch": 0.36458440252088764, "grad_norm": 0.3236062586888815, "learning_rate": 8.910214823713652e-06, "loss": 0.592, "step": 2220 }, { "epoch": 0.3647486297292304, "grad_norm": 0.33402951253417174, "learning_rate": 8.910121585797028e-06, "loss": 0.5915, "step": 2221 }, { "epoch": 0.36491285693757314, "grad_norm": 0.29300268294595966, "learning_rate": 8.910028299982145e-06, "loss": 0.5813, "step": 2222 }, { "epoch": 0.3650770841459159, "grad_norm": 0.3091653000574013, "learning_rate": 8.909934966270016e-06, "loss": 0.5804, "step": 2223 }, { "epoch": 0.36524131135425864, "grad_norm": 0.3079107397662537, "learning_rate": 8.909841584661654e-06, "loss": 0.5748, "step": 2224 }, { "epoch": 0.3654055385626014, "grad_norm": 0.34259914622682275, "learning_rate": 8.909748155158074e-06, "loss": 0.5796, "step": 2225 }, { "epoch": 0.3655697657709441, "grad_norm": 0.4153545557740463, "learning_rate": 8.90965467776029e-06, "loss": 0.5734, "step": 2226 }, { "epoch": 0.3657339929792868, "grad_norm": 0.31071013419276294, "learning_rate": 8.909561152469317e-06, "loss": 0.5905, "step": 2227 }, { "epoch": 0.3658982201876296, "grad_norm": 0.3471544762094001, "learning_rate": 8.909467579286173e-06, "loss": 0.5755, "step": 2228 }, { "epoch": 0.3660624473959723, "grad_norm": 0.3216553400318287, "learning_rate": 8.909373958211872e-06, "loss": 0.5566, "step": 2229 }, { "epoch": 0.3662266746043151, "grad_norm": 0.3292521251192245, "learning_rate": 8.909280289247431e-06, "loss": 0.5803, "step": 2230 }, { "epoch": 0.3663909018126578, "grad_norm": 0.3344410080798811, "learning_rate": 8.90918657239387e-06, "loss": 0.5777, "step": 2231 }, { "epoch": 0.36655512902100057, "grad_norm": 0.2941625541245194, "learning_rate": 8.909092807652202e-06, "loss": 0.6021, "step": 2232 }, { "epoch": 0.3667193562293433, "grad_norm": 0.35239833707095725, "learning_rate": 8.908998995023449e-06, "loss": 0.5848, "step": 2233 }, { "epoch": 0.366883583437686, "grad_norm": 0.34132808655561586, "learning_rate": 8.908905134508631e-06, "loss": 0.5662, "step": 2234 }, { "epoch": 0.36704781064602876, "grad_norm": 0.5185030895359983, "learning_rate": 8.908811226108764e-06, "loss": 0.5913, "step": 2235 }, { "epoch": 0.3672120378543715, "grad_norm": 0.30832542073870384, "learning_rate": 8.90871726982487e-06, "loss": 0.6005, "step": 2236 }, { "epoch": 0.36737626506271426, "grad_norm": 0.32855997531791475, "learning_rate": 8.908623265657966e-06, "loss": 0.5756, "step": 2237 }, { "epoch": 0.367540492271057, "grad_norm": 0.30127305901284296, "learning_rate": 8.90852921360908e-06, "loss": 0.5881, "step": 2238 }, { "epoch": 0.36770471947939976, "grad_norm": 0.2993924136214141, "learning_rate": 8.908435113679226e-06, "loss": 0.5682, "step": 2239 }, { "epoch": 0.3678689466877425, "grad_norm": 0.3406570591462043, "learning_rate": 8.908340965869432e-06, "loss": 0.5739, "step": 2240 }, { "epoch": 0.36803317389608525, "grad_norm": 0.33425544277809804, "learning_rate": 8.908246770180716e-06, "loss": 0.5857, "step": 2241 }, { "epoch": 0.368197401104428, "grad_norm": 0.3471600230497452, "learning_rate": 8.908152526614104e-06, "loss": 0.5689, "step": 2242 }, { "epoch": 0.3683616283127707, "grad_norm": 0.31874708068845486, "learning_rate": 8.90805823517062e-06, "loss": 0.5846, "step": 2243 }, { "epoch": 0.36852585552111344, "grad_norm": 0.3397332860106541, "learning_rate": 8.907963895851282e-06, "loss": 0.5983, "step": 2244 }, { "epoch": 0.3686900827294562, "grad_norm": 0.3454421184183436, "learning_rate": 8.907869508657122e-06, "loss": 0.5703, "step": 2245 }, { "epoch": 0.36885430993779894, "grad_norm": 0.31162260048303864, "learning_rate": 8.907775073589163e-06, "loss": 0.5857, "step": 2246 }, { "epoch": 0.3690185371461417, "grad_norm": 0.326196691003036, "learning_rate": 8.907680590648429e-06, "loss": 0.5868, "step": 2247 }, { "epoch": 0.36918276435448444, "grad_norm": 0.3186926099650805, "learning_rate": 8.907586059835948e-06, "loss": 0.6031, "step": 2248 }, { "epoch": 0.3693469915628272, "grad_norm": 0.36454682926659043, "learning_rate": 8.907491481152747e-06, "loss": 0.601, "step": 2249 }, { "epoch": 0.36951121877116994, "grad_norm": 0.3407203715992103, "learning_rate": 8.90739685459985e-06, "loss": 0.5824, "step": 2250 }, { "epoch": 0.36967544597951263, "grad_norm": 0.27972540167378696, "learning_rate": 8.907302180178286e-06, "loss": 0.5686, "step": 2251 }, { "epoch": 0.3698396731878554, "grad_norm": 0.3208480967722726, "learning_rate": 8.907207457889087e-06, "loss": 0.5886, "step": 2252 }, { "epoch": 0.3700039003961981, "grad_norm": 0.3041652148393244, "learning_rate": 8.907112687733278e-06, "loss": 0.582, "step": 2253 }, { "epoch": 0.3701681276045409, "grad_norm": 0.3174281025630536, "learning_rate": 8.907017869711888e-06, "loss": 0.5669, "step": 2254 }, { "epoch": 0.3703323548128836, "grad_norm": 0.2875806634656067, "learning_rate": 8.906923003825949e-06, "loss": 0.5741, "step": 2255 }, { "epoch": 0.3704965820212264, "grad_norm": 0.3209729400410392, "learning_rate": 8.90682809007649e-06, "loss": 0.5761, "step": 2256 }, { "epoch": 0.3706608092295691, "grad_norm": 0.3300546032520837, "learning_rate": 8.906733128464541e-06, "loss": 0.5587, "step": 2257 }, { "epoch": 0.37082503643791187, "grad_norm": 0.291617247856832, "learning_rate": 8.906638118991137e-06, "loss": 0.5801, "step": 2258 }, { "epoch": 0.3709892636462546, "grad_norm": 0.35873974625749094, "learning_rate": 8.906543061657306e-06, "loss": 0.5652, "step": 2259 }, { "epoch": 0.3711534908545973, "grad_norm": 0.2995194684429335, "learning_rate": 8.906447956464082e-06, "loss": 0.5818, "step": 2260 }, { "epoch": 0.37131771806294006, "grad_norm": 0.28613397814477437, "learning_rate": 8.906352803412499e-06, "loss": 0.5497, "step": 2261 }, { "epoch": 0.3714819452712828, "grad_norm": 0.35714670764867573, "learning_rate": 8.906257602503589e-06, "loss": 0.5854, "step": 2262 }, { "epoch": 0.37164617247962556, "grad_norm": 0.314102556822799, "learning_rate": 8.906162353738385e-06, "loss": 0.5927, "step": 2263 }, { "epoch": 0.3718103996879683, "grad_norm": 0.31461753863692804, "learning_rate": 8.906067057117924e-06, "loss": 0.559, "step": 2264 }, { "epoch": 0.37197462689631106, "grad_norm": 0.3202526289895275, "learning_rate": 8.905971712643238e-06, "loss": 0.5908, "step": 2265 }, { "epoch": 0.3721388541046538, "grad_norm": 0.29543179263334973, "learning_rate": 8.905876320315367e-06, "loss": 0.5434, "step": 2266 }, { "epoch": 0.37230308131299655, "grad_norm": 0.3062964276019493, "learning_rate": 8.905780880135343e-06, "loss": 0.5745, "step": 2267 }, { "epoch": 0.37246730852133925, "grad_norm": 0.28951540649656626, "learning_rate": 8.905685392104203e-06, "loss": 0.587, "step": 2268 }, { "epoch": 0.372631535729682, "grad_norm": 0.3192492106256812, "learning_rate": 8.905589856222985e-06, "loss": 0.5776, "step": 2269 }, { "epoch": 0.37279576293802474, "grad_norm": 0.33482973901264274, "learning_rate": 8.905494272492728e-06, "loss": 0.5691, "step": 2270 }, { "epoch": 0.3729599901463675, "grad_norm": 0.2875150584599915, "learning_rate": 8.905398640914468e-06, "loss": 0.5927, "step": 2271 }, { "epoch": 0.37312421735471024, "grad_norm": 0.3076711414341829, "learning_rate": 8.905302961489245e-06, "loss": 0.5755, "step": 2272 }, { "epoch": 0.373288444563053, "grad_norm": 0.29479734170962885, "learning_rate": 8.905207234218098e-06, "loss": 0.5966, "step": 2273 }, { "epoch": 0.37345267177139574, "grad_norm": 0.28667253454249403, "learning_rate": 8.905111459102065e-06, "loss": 0.5847, "step": 2274 }, { "epoch": 0.3736168989797385, "grad_norm": 0.37883062566583625, "learning_rate": 8.905015636142189e-06, "loss": 0.5881, "step": 2275 }, { "epoch": 0.37378112618808124, "grad_norm": 0.29767160803116754, "learning_rate": 8.904919765339508e-06, "loss": 0.5627, "step": 2276 }, { "epoch": 0.37394535339642393, "grad_norm": 0.3155297745770699, "learning_rate": 8.904823846695065e-06, "loss": 0.5693, "step": 2277 }, { "epoch": 0.3741095806047667, "grad_norm": 0.30626645197578384, "learning_rate": 8.904727880209902e-06, "loss": 0.5739, "step": 2278 }, { "epoch": 0.3742738078131094, "grad_norm": 0.2870421361283258, "learning_rate": 8.904631865885059e-06, "loss": 0.564, "step": 2279 }, { "epoch": 0.3744380350214522, "grad_norm": 0.3041713714865128, "learning_rate": 8.904535803721581e-06, "loss": 0.581, "step": 2280 }, { "epoch": 0.3746022622297949, "grad_norm": 0.3342741300499172, "learning_rate": 8.904439693720511e-06, "loss": 0.5672, "step": 2281 }, { "epoch": 0.3747664894381377, "grad_norm": 0.29876866247680856, "learning_rate": 8.904343535882892e-06, "loss": 0.5563, "step": 2282 }, { "epoch": 0.3749307166464804, "grad_norm": 0.3272388991501618, "learning_rate": 8.90424733020977e-06, "loss": 0.5733, "step": 2283 }, { "epoch": 0.37509494385482317, "grad_norm": 0.29504478377135096, "learning_rate": 8.904151076702187e-06, "loss": 0.5582, "step": 2284 }, { "epoch": 0.37525917106316586, "grad_norm": 0.32151438651497766, "learning_rate": 8.904054775361191e-06, "loss": 0.5808, "step": 2285 }, { "epoch": 0.3754233982715086, "grad_norm": 0.29408465592424793, "learning_rate": 8.903958426187827e-06, "loss": 0.6031, "step": 2286 }, { "epoch": 0.37558762547985136, "grad_norm": 0.39603717369803254, "learning_rate": 8.903862029183144e-06, "loss": 0.5646, "step": 2287 }, { "epoch": 0.3757518526881941, "grad_norm": 0.27728565310231973, "learning_rate": 8.903765584348183e-06, "loss": 0.5764, "step": 2288 }, { "epoch": 0.37591607989653686, "grad_norm": 0.3228980017218485, "learning_rate": 8.903669091683996e-06, "loss": 0.5758, "step": 2289 }, { "epoch": 0.3760803071048796, "grad_norm": 0.27396890144473407, "learning_rate": 8.903572551191633e-06, "loss": 0.5904, "step": 2290 }, { "epoch": 0.37624453431322236, "grad_norm": 0.33219915905771785, "learning_rate": 8.903475962872135e-06, "loss": 0.588, "step": 2291 }, { "epoch": 0.3764087615215651, "grad_norm": 0.29442976439850005, "learning_rate": 8.903379326726559e-06, "loss": 0.5726, "step": 2292 }, { "epoch": 0.37657298872990785, "grad_norm": 0.27379664077338145, "learning_rate": 8.903282642755948e-06, "loss": 0.5569, "step": 2293 }, { "epoch": 0.37673721593825055, "grad_norm": 0.309011494993213, "learning_rate": 8.903185910961358e-06, "loss": 0.5866, "step": 2294 }, { "epoch": 0.3769014431465933, "grad_norm": 0.34027174363674284, "learning_rate": 8.903089131343835e-06, "loss": 0.5794, "step": 2295 }, { "epoch": 0.37706567035493604, "grad_norm": 0.2904502113115749, "learning_rate": 8.902992303904435e-06, "loss": 0.6026, "step": 2296 }, { "epoch": 0.3772298975632788, "grad_norm": 0.38170797705184983, "learning_rate": 8.902895428644203e-06, "loss": 0.5842, "step": 2297 }, { "epoch": 0.37739412477162154, "grad_norm": 0.31501550061600586, "learning_rate": 8.902798505564198e-06, "loss": 0.5805, "step": 2298 }, { "epoch": 0.3775583519799643, "grad_norm": 0.33472044182482413, "learning_rate": 8.902701534665467e-06, "loss": 0.596, "step": 2299 }, { "epoch": 0.37772257918830704, "grad_norm": 0.29453287123397076, "learning_rate": 8.902604515949067e-06, "loss": 0.5808, "step": 2300 }, { "epoch": 0.3778868063966498, "grad_norm": 0.3003032769783216, "learning_rate": 8.90250744941605e-06, "loss": 0.595, "step": 2301 }, { "epoch": 0.3780510336049925, "grad_norm": 0.33000419779183837, "learning_rate": 8.90241033506747e-06, "loss": 0.5461, "step": 2302 }, { "epoch": 0.37821526081333523, "grad_norm": 0.44537302571064064, "learning_rate": 8.902313172904383e-06, "loss": 0.5843, "step": 2303 }, { "epoch": 0.378379488021678, "grad_norm": 0.27528935880347627, "learning_rate": 8.902215962927844e-06, "loss": 0.5868, "step": 2304 }, { "epoch": 0.3785437152300207, "grad_norm": 0.30776693033578106, "learning_rate": 8.902118705138908e-06, "loss": 0.5892, "step": 2305 }, { "epoch": 0.3787079424383635, "grad_norm": 0.4128017693757589, "learning_rate": 8.90202139953863e-06, "loss": 0.5512, "step": 2306 }, { "epoch": 0.3788721696467062, "grad_norm": 0.29928028112716526, "learning_rate": 8.901924046128072e-06, "loss": 0.5752, "step": 2307 }, { "epoch": 0.379036396855049, "grad_norm": 0.2854681450585411, "learning_rate": 8.901826644908287e-06, "loss": 0.5643, "step": 2308 }, { "epoch": 0.3792006240633917, "grad_norm": 0.33566978961389754, "learning_rate": 8.901729195880332e-06, "loss": 0.5894, "step": 2309 }, { "epoch": 0.37936485127173447, "grad_norm": 0.3031308669451596, "learning_rate": 8.90163169904527e-06, "loss": 0.5769, "step": 2310 }, { "epoch": 0.37952907848007716, "grad_norm": 0.3232660855580926, "learning_rate": 8.901534154404154e-06, "loss": 0.5928, "step": 2311 }, { "epoch": 0.3796933056884199, "grad_norm": 0.3708012017505623, "learning_rate": 8.901436561958048e-06, "loss": 0.5831, "step": 2312 }, { "epoch": 0.37985753289676266, "grad_norm": 0.31052685319187673, "learning_rate": 8.90133892170801e-06, "loss": 0.5587, "step": 2313 }, { "epoch": 0.3800217601051054, "grad_norm": 0.32187830125576383, "learning_rate": 8.901241233655103e-06, "loss": 0.5717, "step": 2314 }, { "epoch": 0.38018598731344816, "grad_norm": 0.43759240032581914, "learning_rate": 8.901143497800383e-06, "loss": 0.5554, "step": 2315 }, { "epoch": 0.3803502145217909, "grad_norm": 0.44601495555487336, "learning_rate": 8.901045714144916e-06, "loss": 0.5956, "step": 2316 }, { "epoch": 0.38051444173013366, "grad_norm": 0.33538126500137966, "learning_rate": 8.900947882689763e-06, "loss": 0.567, "step": 2317 }, { "epoch": 0.3806786689384764, "grad_norm": 0.31306894560268406, "learning_rate": 8.900850003435985e-06, "loss": 0.5624, "step": 2318 }, { "epoch": 0.3808428961468191, "grad_norm": 0.3111850571560804, "learning_rate": 8.900752076384648e-06, "loss": 0.5789, "step": 2319 }, { "epoch": 0.38100712335516185, "grad_norm": 0.3030241102758526, "learning_rate": 8.900654101536811e-06, "loss": 0.5693, "step": 2320 }, { "epoch": 0.3811713505635046, "grad_norm": 0.33699935182531765, "learning_rate": 8.900556078893542e-06, "loss": 0.5959, "step": 2321 }, { "epoch": 0.38133557777184734, "grad_norm": 0.3544044560119914, "learning_rate": 8.900458008455905e-06, "loss": 0.5713, "step": 2322 }, { "epoch": 0.3814998049801901, "grad_norm": 0.3177507153530851, "learning_rate": 8.900359890224965e-06, "loss": 0.5984, "step": 2323 }, { "epoch": 0.38166403218853284, "grad_norm": 0.3175167714364075, "learning_rate": 8.900261724201786e-06, "loss": 0.5915, "step": 2324 }, { "epoch": 0.3818282593968756, "grad_norm": 0.4009151151987348, "learning_rate": 8.900163510387436e-06, "loss": 0.5518, "step": 2325 }, { "epoch": 0.38199248660521834, "grad_norm": 0.3754271825257931, "learning_rate": 8.900065248782981e-06, "loss": 0.6039, "step": 2326 }, { "epoch": 0.3821567138135611, "grad_norm": 0.32691754192502154, "learning_rate": 8.899966939389488e-06, "loss": 0.5507, "step": 2327 }, { "epoch": 0.3823209410219038, "grad_norm": 0.28998835197412, "learning_rate": 8.899868582208024e-06, "loss": 0.5652, "step": 2328 }, { "epoch": 0.38248516823024653, "grad_norm": 0.3133390970464335, "learning_rate": 8.89977017723966e-06, "loss": 0.5767, "step": 2329 }, { "epoch": 0.3826493954385893, "grad_norm": 0.27878076670015955, "learning_rate": 8.899671724485463e-06, "loss": 0.5876, "step": 2330 }, { "epoch": 0.38281362264693203, "grad_norm": 0.3268019065018182, "learning_rate": 8.899573223946502e-06, "loss": 0.5673, "step": 2331 }, { "epoch": 0.3829778498552748, "grad_norm": 0.373397306799112, "learning_rate": 8.899474675623847e-06, "loss": 0.5799, "step": 2332 }, { "epoch": 0.3831420770636175, "grad_norm": 0.2975325766072855, "learning_rate": 8.89937607951857e-06, "loss": 0.5762, "step": 2333 }, { "epoch": 0.3833063042719603, "grad_norm": 0.28508597358780635, "learning_rate": 8.899277435631738e-06, "loss": 0.5858, "step": 2334 }, { "epoch": 0.383470531480303, "grad_norm": 0.4014745772227359, "learning_rate": 8.899178743964426e-06, "loss": 0.5544, "step": 2335 }, { "epoch": 0.3836347586886457, "grad_norm": 0.32360223924724896, "learning_rate": 8.899080004517704e-06, "loss": 0.5718, "step": 2336 }, { "epoch": 0.38379898589698846, "grad_norm": 0.33739313880409866, "learning_rate": 8.898981217292645e-06, "loss": 0.5623, "step": 2337 }, { "epoch": 0.3839632131053312, "grad_norm": 0.3172906694171004, "learning_rate": 8.898882382290323e-06, "loss": 0.5796, "step": 2338 }, { "epoch": 0.38412744031367396, "grad_norm": 0.35624301408414016, "learning_rate": 8.89878349951181e-06, "loss": 0.5828, "step": 2339 }, { "epoch": 0.3842916675220167, "grad_norm": 0.3434168572205274, "learning_rate": 8.89868456895818e-06, "loss": 0.5946, "step": 2340 }, { "epoch": 0.38445589473035946, "grad_norm": 0.3153538586937991, "learning_rate": 8.898585590630508e-06, "loss": 0.593, "step": 2341 }, { "epoch": 0.3846201219387022, "grad_norm": 0.296511438451358, "learning_rate": 8.89848656452987e-06, "loss": 0.5789, "step": 2342 }, { "epoch": 0.38478434914704496, "grad_norm": 0.42450348545740313, "learning_rate": 8.898387490657339e-06, "loss": 0.5797, "step": 2343 }, { "epoch": 0.3849485763553877, "grad_norm": 0.33579453521532565, "learning_rate": 8.898288369013993e-06, "loss": 0.5962, "step": 2344 }, { "epoch": 0.3851128035637304, "grad_norm": 0.30616008807308986, "learning_rate": 8.898189199600907e-06, "loss": 0.5813, "step": 2345 }, { "epoch": 0.38527703077207315, "grad_norm": 0.36008697096086706, "learning_rate": 8.89808998241916e-06, "loss": 0.5833, "step": 2346 }, { "epoch": 0.3854412579804159, "grad_norm": 0.30608114375905615, "learning_rate": 8.897990717469828e-06, "loss": 0.5581, "step": 2347 }, { "epoch": 0.38560548518875865, "grad_norm": 0.29685505491002356, "learning_rate": 8.89789140475399e-06, "loss": 0.577, "step": 2348 }, { "epoch": 0.3857697123971014, "grad_norm": 0.3025244608028047, "learning_rate": 8.897792044272724e-06, "loss": 0.5567, "step": 2349 }, { "epoch": 0.38593393960544414, "grad_norm": 0.2933770229759006, "learning_rate": 8.897692636027112e-06, "loss": 0.5592, "step": 2350 }, { "epoch": 0.3860981668137869, "grad_norm": 0.3232920174640066, "learning_rate": 8.89759318001823e-06, "loss": 0.5668, "step": 2351 }, { "epoch": 0.38626239402212964, "grad_norm": 0.3288617268332227, "learning_rate": 8.897493676247158e-06, "loss": 0.5708, "step": 2352 }, { "epoch": 0.38642662123047233, "grad_norm": 0.36540336033645326, "learning_rate": 8.897394124714979e-06, "loss": 0.5605, "step": 2353 }, { "epoch": 0.3865908484388151, "grad_norm": 0.38478628819551197, "learning_rate": 8.897294525422773e-06, "loss": 0.5824, "step": 2354 }, { "epoch": 0.38675507564715783, "grad_norm": 0.3239640598548664, "learning_rate": 8.897194878371623e-06, "loss": 0.5794, "step": 2355 }, { "epoch": 0.3869193028555006, "grad_norm": 0.31349793588556835, "learning_rate": 8.897095183562609e-06, "loss": 0.5636, "step": 2356 }, { "epoch": 0.38708353006384333, "grad_norm": 0.3038538132161622, "learning_rate": 8.896995440996816e-06, "loss": 0.5791, "step": 2357 }, { "epoch": 0.3872477572721861, "grad_norm": 0.32051112577468094, "learning_rate": 8.896895650675327e-06, "loss": 0.5791, "step": 2358 }, { "epoch": 0.3874119844805288, "grad_norm": 0.44838209737281975, "learning_rate": 8.896795812599224e-06, "loss": 0.5915, "step": 2359 }, { "epoch": 0.3875762116888716, "grad_norm": 0.34659678851515363, "learning_rate": 8.896695926769594e-06, "loss": 0.5726, "step": 2360 }, { "epoch": 0.3877404388972143, "grad_norm": 0.2887359708809979, "learning_rate": 8.89659599318752e-06, "loss": 0.5854, "step": 2361 }, { "epoch": 0.387904666105557, "grad_norm": 0.3166935243544558, "learning_rate": 8.896496011854087e-06, "loss": 0.582, "step": 2362 }, { "epoch": 0.38806889331389977, "grad_norm": 0.2931378659260637, "learning_rate": 8.896395982770382e-06, "loss": 0.5595, "step": 2363 }, { "epoch": 0.3882331205222425, "grad_norm": 0.35930294339419266, "learning_rate": 8.896295905937492e-06, "loss": 0.5803, "step": 2364 }, { "epoch": 0.38839734773058526, "grad_norm": 0.3112712071807427, "learning_rate": 8.896195781356502e-06, "loss": 0.5941, "step": 2365 }, { "epoch": 0.388561574938928, "grad_norm": 0.2942703416825157, "learning_rate": 8.896095609028501e-06, "loss": 0.5783, "step": 2366 }, { "epoch": 0.38872580214727076, "grad_norm": 0.3352361065283778, "learning_rate": 8.895995388954577e-06, "loss": 0.559, "step": 2367 }, { "epoch": 0.3888900293556135, "grad_norm": 0.2917202118240349, "learning_rate": 8.895895121135819e-06, "loss": 0.5731, "step": 2368 }, { "epoch": 0.38905425656395626, "grad_norm": 0.30769072743532866, "learning_rate": 8.895794805573313e-06, "loss": 0.5623, "step": 2369 }, { "epoch": 0.38921848377229895, "grad_norm": 0.2893103915184653, "learning_rate": 8.89569444226815e-06, "loss": 0.5903, "step": 2370 }, { "epoch": 0.3893827109806417, "grad_norm": 0.29857635371328933, "learning_rate": 8.895594031221423e-06, "loss": 0.5766, "step": 2371 }, { "epoch": 0.38954693818898445, "grad_norm": 0.5167094917444112, "learning_rate": 8.895493572434218e-06, "loss": 0.5776, "step": 2372 }, { "epoch": 0.3897111653973272, "grad_norm": 0.358184481637941, "learning_rate": 8.89539306590763e-06, "loss": 0.595, "step": 2373 }, { "epoch": 0.38987539260566995, "grad_norm": 1.3947990141046052, "learning_rate": 8.895292511642748e-06, "loss": 0.5791, "step": 2374 }, { "epoch": 0.3900396198140127, "grad_norm": 0.33588084377988386, "learning_rate": 8.895191909640665e-06, "loss": 0.5948, "step": 2375 }, { "epoch": 0.39020384702235544, "grad_norm": 0.42192446454016386, "learning_rate": 8.895091259902472e-06, "loss": 0.5804, "step": 2376 }, { "epoch": 0.3903680742306982, "grad_norm": 0.3417045707043655, "learning_rate": 8.894990562429265e-06, "loss": 0.5712, "step": 2377 }, { "epoch": 0.39053230143904094, "grad_norm": 0.32637787783707956, "learning_rate": 8.894889817222138e-06, "loss": 0.59, "step": 2378 }, { "epoch": 0.39069652864738363, "grad_norm": 0.3589500141121288, "learning_rate": 8.894789024282181e-06, "loss": 0.578, "step": 2379 }, { "epoch": 0.3908607558557264, "grad_norm": 0.3401379685320087, "learning_rate": 8.894688183610494e-06, "loss": 0.5698, "step": 2380 }, { "epoch": 0.39102498306406913, "grad_norm": 0.31926923165948984, "learning_rate": 8.894587295208167e-06, "loss": 0.5883, "step": 2381 }, { "epoch": 0.3911892102724119, "grad_norm": 0.3215207946196231, "learning_rate": 8.8944863590763e-06, "loss": 0.5943, "step": 2382 }, { "epoch": 0.39135343748075463, "grad_norm": 0.3453997776935591, "learning_rate": 8.894385375215987e-06, "loss": 0.5744, "step": 2383 }, { "epoch": 0.3915176646890974, "grad_norm": 0.3461759429970046, "learning_rate": 8.894284343628326e-06, "loss": 0.5682, "step": 2384 }, { "epoch": 0.3916818918974401, "grad_norm": 0.33305618278830995, "learning_rate": 8.89418326431441e-06, "loss": 0.6009, "step": 2385 }, { "epoch": 0.3918461191057829, "grad_norm": 0.34941470397732544, "learning_rate": 8.894082137275344e-06, "loss": 0.5922, "step": 2386 }, { "epoch": 0.39201034631412557, "grad_norm": 0.33931275242971576, "learning_rate": 8.893980962512224e-06, "loss": 0.5559, "step": 2387 }, { "epoch": 0.3921745735224683, "grad_norm": 0.43197181831368625, "learning_rate": 8.893879740026146e-06, "loss": 0.5432, "step": 2388 }, { "epoch": 0.39233880073081107, "grad_norm": 0.4445672617305598, "learning_rate": 8.893778469818211e-06, "loss": 0.5849, "step": 2389 }, { "epoch": 0.3925030279391538, "grad_norm": 0.30662557274934144, "learning_rate": 8.893677151889517e-06, "loss": 0.5479, "step": 2390 }, { "epoch": 0.39266725514749656, "grad_norm": 0.35586055709357794, "learning_rate": 8.893575786241168e-06, "loss": 0.5713, "step": 2391 }, { "epoch": 0.3928314823558393, "grad_norm": 0.3416283784802478, "learning_rate": 8.893474372874264e-06, "loss": 0.5692, "step": 2392 }, { "epoch": 0.39299570956418206, "grad_norm": 0.37964865251282476, "learning_rate": 8.893372911789904e-06, "loss": 0.5615, "step": 2393 }, { "epoch": 0.3931599367725248, "grad_norm": 0.30654274690443206, "learning_rate": 8.893271402989193e-06, "loss": 0.5878, "step": 2394 }, { "epoch": 0.3933241639808675, "grad_norm": 0.3098855741105631, "learning_rate": 8.893169846473233e-06, "loss": 0.586, "step": 2395 }, { "epoch": 0.39348839118921025, "grad_norm": 0.3284469381158421, "learning_rate": 8.893068242243126e-06, "loss": 0.5656, "step": 2396 }, { "epoch": 0.393652618397553, "grad_norm": 0.3457270508908126, "learning_rate": 8.892966590299975e-06, "loss": 0.5804, "step": 2397 }, { "epoch": 0.39381684560589575, "grad_norm": 0.3824770008300639, "learning_rate": 8.892864890644882e-06, "loss": 0.5445, "step": 2398 }, { "epoch": 0.3939810728142385, "grad_norm": 0.2890234530985488, "learning_rate": 8.892763143278958e-06, "loss": 0.5591, "step": 2399 }, { "epoch": 0.39414530002258125, "grad_norm": 0.2999344078752705, "learning_rate": 8.892661348203304e-06, "loss": 0.5752, "step": 2400 }, { "epoch": 0.394309527230924, "grad_norm": 0.2966419849742581, "learning_rate": 8.892559505419023e-06, "loss": 0.5673, "step": 2401 }, { "epoch": 0.39447375443926674, "grad_norm": 0.3193420409937522, "learning_rate": 8.892457614927228e-06, "loss": 0.5612, "step": 2402 }, { "epoch": 0.3946379816476095, "grad_norm": 0.3291083690324307, "learning_rate": 8.89235567672902e-06, "loss": 0.5676, "step": 2403 }, { "epoch": 0.3948022088559522, "grad_norm": 0.3398061099802675, "learning_rate": 8.892253690825507e-06, "loss": 0.5447, "step": 2404 }, { "epoch": 0.39496643606429493, "grad_norm": 0.436882175126899, "learning_rate": 8.892151657217799e-06, "loss": 0.5661, "step": 2405 }, { "epoch": 0.3951306632726377, "grad_norm": 0.9383550668483664, "learning_rate": 8.892049575907003e-06, "loss": 0.5763, "step": 2406 }, { "epoch": 0.39529489048098043, "grad_norm": 0.3557356759695701, "learning_rate": 8.891947446894224e-06, "loss": 0.5721, "step": 2407 }, { "epoch": 0.3954591176893232, "grad_norm": 0.40604062140883007, "learning_rate": 8.891845270180578e-06, "loss": 0.577, "step": 2408 }, { "epoch": 0.39562334489766593, "grad_norm": 0.3646391321735781, "learning_rate": 8.89174304576717e-06, "loss": 0.5777, "step": 2409 }, { "epoch": 0.3957875721060087, "grad_norm": 0.3310293733714123, "learning_rate": 8.891640773655112e-06, "loss": 0.5613, "step": 2410 }, { "epoch": 0.3959517993143514, "grad_norm": 0.4039936546858141, "learning_rate": 8.891538453845515e-06, "loss": 0.5776, "step": 2411 }, { "epoch": 0.3961160265226941, "grad_norm": 0.39043339053206516, "learning_rate": 8.891436086339489e-06, "loss": 0.5647, "step": 2412 }, { "epoch": 0.39628025373103687, "grad_norm": 0.3213534464575592, "learning_rate": 8.891333671138146e-06, "loss": 0.5711, "step": 2413 }, { "epoch": 0.3964444809393796, "grad_norm": 0.31102891837891017, "learning_rate": 8.8912312082426e-06, "loss": 0.5837, "step": 2414 }, { "epoch": 0.39660870814772237, "grad_norm": 0.3046339082276663, "learning_rate": 8.891128697653962e-06, "loss": 0.5554, "step": 2415 }, { "epoch": 0.3967729353560651, "grad_norm": 0.31346347835751853, "learning_rate": 8.891026139373347e-06, "loss": 0.5792, "step": 2416 }, { "epoch": 0.39693716256440786, "grad_norm": 0.3290758388183897, "learning_rate": 8.890923533401866e-06, "loss": 0.5893, "step": 2417 }, { "epoch": 0.3971013897727506, "grad_norm": 0.32363239445426245, "learning_rate": 8.890820879740636e-06, "loss": 0.5926, "step": 2418 }, { "epoch": 0.39726561698109336, "grad_norm": 0.3397402640059377, "learning_rate": 8.890718178390772e-06, "loss": 0.5573, "step": 2419 }, { "epoch": 0.3974298441894361, "grad_norm": 0.33337105589432475, "learning_rate": 8.89061542935339e-06, "loss": 0.5788, "step": 2420 }, { "epoch": 0.3975940713977788, "grad_norm": 0.3040636722728571, "learning_rate": 8.890512632629603e-06, "loss": 0.5735, "step": 2421 }, { "epoch": 0.39775829860612155, "grad_norm": 0.3486589140163701, "learning_rate": 8.89040978822053e-06, "loss": 0.5698, "step": 2422 }, { "epoch": 0.3979225258144643, "grad_norm": 0.33530465006472016, "learning_rate": 8.890306896127285e-06, "loss": 0.5824, "step": 2423 }, { "epoch": 0.39808675302280705, "grad_norm": 0.416663784834214, "learning_rate": 8.890203956350989e-06, "loss": 0.5673, "step": 2424 }, { "epoch": 0.3982509802311498, "grad_norm": 0.31480558408985104, "learning_rate": 8.89010096889276e-06, "loss": 0.5696, "step": 2425 }, { "epoch": 0.39841520743949255, "grad_norm": 0.3087118076140605, "learning_rate": 8.889997933753713e-06, "loss": 0.5764, "step": 2426 }, { "epoch": 0.3985794346478353, "grad_norm": 0.33372436198265837, "learning_rate": 8.88989485093497e-06, "loss": 0.5898, "step": 2427 }, { "epoch": 0.39874366185617804, "grad_norm": 0.35881328155788444, "learning_rate": 8.88979172043765e-06, "loss": 0.5731, "step": 2428 }, { "epoch": 0.39890788906452074, "grad_norm": 0.30641486846057925, "learning_rate": 8.889688542262872e-06, "loss": 0.5621, "step": 2429 }, { "epoch": 0.3990721162728635, "grad_norm": 0.3188642436177513, "learning_rate": 8.889585316411759e-06, "loss": 0.5958, "step": 2430 }, { "epoch": 0.39923634348120624, "grad_norm": 0.3586864652327105, "learning_rate": 8.88948204288543e-06, "loss": 0.5696, "step": 2431 }, { "epoch": 0.399400570689549, "grad_norm": 0.32446607347051026, "learning_rate": 8.889378721685008e-06, "loss": 0.5801, "step": 2432 }, { "epoch": 0.39956479789789173, "grad_norm": 0.3091050200854687, "learning_rate": 8.889275352811614e-06, "loss": 0.5882, "step": 2433 }, { "epoch": 0.3997290251062345, "grad_norm": 0.4026782667136338, "learning_rate": 8.889171936266373e-06, "loss": 0.5563, "step": 2434 }, { "epoch": 0.39989325231457723, "grad_norm": 0.3830299878740365, "learning_rate": 8.889068472050405e-06, "loss": 0.5828, "step": 2435 }, { "epoch": 0.40005747952292, "grad_norm": 0.35592783342450557, "learning_rate": 8.888964960164833e-06, "loss": 0.5734, "step": 2436 }, { "epoch": 0.4002217067312627, "grad_norm": 0.31061412781069053, "learning_rate": 8.888861400610786e-06, "loss": 0.5967, "step": 2437 }, { "epoch": 0.4003859339396054, "grad_norm": 0.35688423964889243, "learning_rate": 8.888757793389384e-06, "loss": 0.5738, "step": 2438 }, { "epoch": 0.40055016114794817, "grad_norm": 0.30702503029090233, "learning_rate": 8.888654138501756e-06, "loss": 0.5848, "step": 2439 }, { "epoch": 0.4007143883562909, "grad_norm": 0.4147640287492882, "learning_rate": 8.888550435949027e-06, "loss": 0.5759, "step": 2440 }, { "epoch": 0.40087861556463367, "grad_norm": 0.3049971946792247, "learning_rate": 8.888446685732321e-06, "loss": 0.5723, "step": 2441 }, { "epoch": 0.4010428427729764, "grad_norm": 0.2979275672901426, "learning_rate": 8.888342887852767e-06, "loss": 0.5836, "step": 2442 }, { "epoch": 0.40120706998131916, "grad_norm": 0.31789230808393515, "learning_rate": 8.88823904231149e-06, "loss": 0.5758, "step": 2443 }, { "epoch": 0.4013712971896619, "grad_norm": 0.2969921000323856, "learning_rate": 8.888135149109623e-06, "loss": 0.5829, "step": 2444 }, { "epoch": 0.40153552439800466, "grad_norm": 0.32598594902859673, "learning_rate": 8.888031208248288e-06, "loss": 0.5533, "step": 2445 }, { "epoch": 0.40169975160634736, "grad_norm": 0.3055143376194248, "learning_rate": 8.887927219728618e-06, "loss": 0.5745, "step": 2446 }, { "epoch": 0.4018639788146901, "grad_norm": 0.3290788171475274, "learning_rate": 8.887823183551741e-06, "loss": 0.5758, "step": 2447 }, { "epoch": 0.40202820602303285, "grad_norm": 0.3092126257798601, "learning_rate": 8.887719099718788e-06, "loss": 0.5568, "step": 2448 }, { "epoch": 0.4021924332313756, "grad_norm": 0.31991904996752984, "learning_rate": 8.887614968230888e-06, "loss": 0.5766, "step": 2449 }, { "epoch": 0.40235666043971835, "grad_norm": 0.2857049219979474, "learning_rate": 8.887510789089173e-06, "loss": 0.5905, "step": 2450 }, { "epoch": 0.4025208876480611, "grad_norm": 0.3171922371979184, "learning_rate": 8.887406562294774e-06, "loss": 0.5537, "step": 2451 }, { "epoch": 0.40268511485640385, "grad_norm": 0.3223877063261562, "learning_rate": 8.887302287848822e-06, "loss": 0.578, "step": 2452 }, { "epoch": 0.4028493420647466, "grad_norm": 0.2973553918001752, "learning_rate": 8.887197965752452e-06, "loss": 0.5664, "step": 2453 }, { "epoch": 0.40301356927308934, "grad_norm": 0.2895585733719346, "learning_rate": 8.887093596006794e-06, "loss": 0.5725, "step": 2454 }, { "epoch": 0.40317779648143204, "grad_norm": 0.33434995224312225, "learning_rate": 8.886989178612985e-06, "loss": 0.5889, "step": 2455 }, { "epoch": 0.4033420236897748, "grad_norm": 0.31888170721341874, "learning_rate": 8.886884713572157e-06, "loss": 0.5661, "step": 2456 }, { "epoch": 0.40350625089811754, "grad_norm": 0.2937884224933003, "learning_rate": 8.886780200885444e-06, "loss": 0.5522, "step": 2457 }, { "epoch": 0.4036704781064603, "grad_norm": 0.3394152732680013, "learning_rate": 8.886675640553981e-06, "loss": 0.5622, "step": 2458 }, { "epoch": 0.40383470531480303, "grad_norm": 0.32314358205589594, "learning_rate": 8.886571032578906e-06, "loss": 0.5775, "step": 2459 }, { "epoch": 0.4039989325231458, "grad_norm": 0.40212521213858143, "learning_rate": 8.886466376961355e-06, "loss": 0.5769, "step": 2460 }, { "epoch": 0.40416315973148853, "grad_norm": 0.3058600193689216, "learning_rate": 8.886361673702463e-06, "loss": 0.582, "step": 2461 }, { "epoch": 0.4043273869398313, "grad_norm": 0.27062081866814136, "learning_rate": 8.886256922803368e-06, "loss": 0.5452, "step": 2462 }, { "epoch": 0.404491614148174, "grad_norm": 0.3095302292598992, "learning_rate": 8.886152124265205e-06, "loss": 0.5701, "step": 2463 }, { "epoch": 0.4046558413565167, "grad_norm": 0.3196683063197642, "learning_rate": 8.886047278089117e-06, "loss": 0.5688, "step": 2464 }, { "epoch": 0.40482006856485947, "grad_norm": 0.3747769365378931, "learning_rate": 8.885942384276238e-06, "loss": 0.5602, "step": 2465 }, { "epoch": 0.4049842957732022, "grad_norm": 0.3314657584893606, "learning_rate": 8.88583744282771e-06, "loss": 0.568, "step": 2466 }, { "epoch": 0.40514852298154497, "grad_norm": 0.2921241951052027, "learning_rate": 8.885732453744673e-06, "loss": 0.5694, "step": 2467 }, { "epoch": 0.4053127501898877, "grad_norm": 0.31684539227316827, "learning_rate": 8.885627417028266e-06, "loss": 0.5706, "step": 2468 }, { "epoch": 0.40547697739823046, "grad_norm": 0.29919740238742754, "learning_rate": 8.885522332679632e-06, "loss": 0.5611, "step": 2469 }, { "epoch": 0.4056412046065732, "grad_norm": 0.2927552985408856, "learning_rate": 8.88541720069991e-06, "loss": 0.5614, "step": 2470 }, { "epoch": 0.40580543181491596, "grad_norm": 0.33353867719732694, "learning_rate": 8.885312021090242e-06, "loss": 0.5638, "step": 2471 }, { "epoch": 0.40596965902325866, "grad_norm": 0.2925862099678924, "learning_rate": 8.885206793851771e-06, "loss": 0.5577, "step": 2472 }, { "epoch": 0.4061338862316014, "grad_norm": 0.31469465854680573, "learning_rate": 8.88510151898564e-06, "loss": 0.5658, "step": 2473 }, { "epoch": 0.40629811343994415, "grad_norm": 0.3254583303924738, "learning_rate": 8.884996196492992e-06, "loss": 0.5728, "step": 2474 }, { "epoch": 0.4064623406482869, "grad_norm": 0.3475809377362272, "learning_rate": 8.88489082637497e-06, "loss": 0.5621, "step": 2475 }, { "epoch": 0.40662656785662965, "grad_norm": 0.29271987278075334, "learning_rate": 8.88478540863272e-06, "loss": 0.5867, "step": 2476 }, { "epoch": 0.4067907950649724, "grad_norm": 0.29606614439458234, "learning_rate": 8.884679943267387e-06, "loss": 0.5621, "step": 2477 }, { "epoch": 0.40695502227331515, "grad_norm": 0.2990525558360778, "learning_rate": 8.884574430280117e-06, "loss": 0.5771, "step": 2478 }, { "epoch": 0.4071192494816579, "grad_norm": 0.34870302304095335, "learning_rate": 8.884468869672053e-06, "loss": 0.5587, "step": 2479 }, { "epoch": 0.4072834766900006, "grad_norm": 0.3581627456016973, "learning_rate": 8.884363261444344e-06, "loss": 0.566, "step": 2480 }, { "epoch": 0.40744770389834334, "grad_norm": 0.35359658435859415, "learning_rate": 8.884257605598137e-06, "loss": 0.5745, "step": 2481 }, { "epoch": 0.4076119311066861, "grad_norm": 0.31570900922366557, "learning_rate": 8.884151902134578e-06, "loss": 0.5746, "step": 2482 }, { "epoch": 0.40777615831502884, "grad_norm": 0.35211170928524965, "learning_rate": 8.884046151054815e-06, "loss": 0.5819, "step": 2483 }, { "epoch": 0.4079403855233716, "grad_norm": 0.3246151812151191, "learning_rate": 8.883940352359998e-06, "loss": 0.5808, "step": 2484 }, { "epoch": 0.40810461273171433, "grad_norm": 0.29286391071193524, "learning_rate": 8.883834506051277e-06, "loss": 0.5618, "step": 2485 }, { "epoch": 0.4082688399400571, "grad_norm": 0.38869425964352805, "learning_rate": 8.883728612129799e-06, "loss": 0.5944, "step": 2486 }, { "epoch": 0.40843306714839983, "grad_norm": 0.3357922498523062, "learning_rate": 8.883622670596715e-06, "loss": 0.5485, "step": 2487 }, { "epoch": 0.4085972943567426, "grad_norm": 0.3037126347902224, "learning_rate": 8.883516681453177e-06, "loss": 0.5801, "step": 2488 }, { "epoch": 0.4087615215650853, "grad_norm": 0.32469023718462664, "learning_rate": 8.883410644700335e-06, "loss": 0.5674, "step": 2489 }, { "epoch": 0.408925748773428, "grad_norm": 0.31429830880564624, "learning_rate": 8.88330456033934e-06, "loss": 0.5506, "step": 2490 }, { "epoch": 0.40908997598177077, "grad_norm": 0.28926492543203663, "learning_rate": 8.883198428371346e-06, "loss": 0.584, "step": 2491 }, { "epoch": 0.4092542031901135, "grad_norm": 0.33964002500396295, "learning_rate": 8.883092248797503e-06, "loss": 0.5619, "step": 2492 }, { "epoch": 0.40941843039845627, "grad_norm": 0.30098148533924857, "learning_rate": 8.882986021618967e-06, "loss": 0.5504, "step": 2493 }, { "epoch": 0.409582657606799, "grad_norm": 0.3575580059528442, "learning_rate": 8.88287974683689e-06, "loss": 0.5601, "step": 2494 }, { "epoch": 0.40974688481514177, "grad_norm": 0.32069671607176764, "learning_rate": 8.882773424452427e-06, "loss": 0.5295, "step": 2495 }, { "epoch": 0.4099111120234845, "grad_norm": 0.30232051278302446, "learning_rate": 8.882667054466731e-06, "loss": 0.5796, "step": 2496 }, { "epoch": 0.4100753392318272, "grad_norm": 0.3708374336609579, "learning_rate": 8.88256063688096e-06, "loss": 0.5743, "step": 2497 }, { "epoch": 0.41023956644016996, "grad_norm": 0.3032029622742079, "learning_rate": 8.88245417169627e-06, "loss": 0.5754, "step": 2498 }, { "epoch": 0.4104037936485127, "grad_norm": 0.2867613437909591, "learning_rate": 8.882347658913814e-06, "loss": 0.5713, "step": 2499 }, { "epoch": 0.41056802085685545, "grad_norm": 0.32926370873977717, "learning_rate": 8.882241098534751e-06, "loss": 0.5838, "step": 2500 }, { "epoch": 0.4107322480651982, "grad_norm": 0.3428683835495346, "learning_rate": 8.88213449056024e-06, "loss": 0.5651, "step": 2501 }, { "epoch": 0.41089647527354095, "grad_norm": 0.28973504149209034, "learning_rate": 8.882027834991435e-06, "loss": 0.5739, "step": 2502 }, { "epoch": 0.4110607024818837, "grad_norm": 0.2797815273126128, "learning_rate": 8.881921131829497e-06, "loss": 0.5723, "step": 2503 }, { "epoch": 0.41122492969022645, "grad_norm": 0.4145755184754829, "learning_rate": 8.881814381075583e-06, "loss": 0.5595, "step": 2504 }, { "epoch": 0.4113891568985692, "grad_norm": 0.3331349868005999, "learning_rate": 8.881707582730855e-06, "loss": 0.5694, "step": 2505 }, { "epoch": 0.4115533841069119, "grad_norm": 0.30051933126756675, "learning_rate": 8.881600736796473e-06, "loss": 0.5705, "step": 2506 }, { "epoch": 0.41171761131525464, "grad_norm": 0.33450947684165244, "learning_rate": 8.881493843273595e-06, "loss": 0.5733, "step": 2507 }, { "epoch": 0.4118818385235974, "grad_norm": 0.32963924885381335, "learning_rate": 8.881386902163382e-06, "loss": 0.5658, "step": 2508 }, { "epoch": 0.41204606573194014, "grad_norm": 0.35930224711053116, "learning_rate": 8.881279913466997e-06, "loss": 0.5658, "step": 2509 }, { "epoch": 0.4122102929402829, "grad_norm": 0.3704447240612607, "learning_rate": 8.881172877185601e-06, "loss": 0.5525, "step": 2510 }, { "epoch": 0.41237452014862563, "grad_norm": 0.30949854959121004, "learning_rate": 8.881065793320358e-06, "loss": 0.5693, "step": 2511 }, { "epoch": 0.4125387473569684, "grad_norm": 0.32021726342404155, "learning_rate": 8.880958661872431e-06, "loss": 0.5729, "step": 2512 }, { "epoch": 0.41270297456531113, "grad_norm": 0.298710533844321, "learning_rate": 8.88085148284298e-06, "loss": 0.5395, "step": 2513 }, { "epoch": 0.4128672017736538, "grad_norm": 0.3740569254074185, "learning_rate": 8.880744256233175e-06, "loss": 0.5472, "step": 2514 }, { "epoch": 0.4130314289819966, "grad_norm": 0.4018513304426621, "learning_rate": 8.880636982044176e-06, "loss": 0.5624, "step": 2515 }, { "epoch": 0.4131956561903393, "grad_norm": 0.40970239846974127, "learning_rate": 8.88052966027715e-06, "loss": 0.5564, "step": 2516 }, { "epoch": 0.41335988339868207, "grad_norm": 0.3117304225668632, "learning_rate": 8.88042229093326e-06, "loss": 0.54, "step": 2517 }, { "epoch": 0.4135241106070248, "grad_norm": 0.28437155849049806, "learning_rate": 8.880314874013674e-06, "loss": 0.5711, "step": 2518 }, { "epoch": 0.41368833781536757, "grad_norm": 0.3332562664647129, "learning_rate": 8.88020740951956e-06, "loss": 0.5559, "step": 2519 }, { "epoch": 0.4138525650237103, "grad_norm": 0.5483444520242049, "learning_rate": 8.880099897452086e-06, "loss": 0.5722, "step": 2520 }, { "epoch": 0.41401679223205307, "grad_norm": 0.3011635778727716, "learning_rate": 8.879992337812416e-06, "loss": 0.5684, "step": 2521 }, { "epoch": 0.4141810194403958, "grad_norm": 0.31450829980721595, "learning_rate": 8.879884730601718e-06, "loss": 0.5698, "step": 2522 }, { "epoch": 0.4143452466487385, "grad_norm": 0.31001996453014075, "learning_rate": 8.879777075821165e-06, "loss": 0.5342, "step": 2523 }, { "epoch": 0.41450947385708126, "grad_norm": 0.3139993732413725, "learning_rate": 8.879669373471923e-06, "loss": 0.5567, "step": 2524 }, { "epoch": 0.414673701065424, "grad_norm": 0.3203420354656011, "learning_rate": 8.879561623555163e-06, "loss": 0.5657, "step": 2525 }, { "epoch": 0.41483792827376675, "grad_norm": 0.3108136022021269, "learning_rate": 8.879453826072055e-06, "loss": 0.5815, "step": 2526 }, { "epoch": 0.4150021554821095, "grad_norm": 0.30562850254753904, "learning_rate": 8.879345981023769e-06, "loss": 0.5765, "step": 2527 }, { "epoch": 0.41516638269045225, "grad_norm": 0.3901832252063882, "learning_rate": 8.879238088411476e-06, "loss": 0.5606, "step": 2528 }, { "epoch": 0.415330609898795, "grad_norm": 0.3017152351908569, "learning_rate": 8.879130148236351e-06, "loss": 0.6051, "step": 2529 }, { "epoch": 0.41549483710713775, "grad_norm": 0.37648168466183224, "learning_rate": 8.879022160499563e-06, "loss": 0.557, "step": 2530 }, { "epoch": 0.41565906431548044, "grad_norm": 0.31899551450864727, "learning_rate": 8.878914125202287e-06, "loss": 0.5608, "step": 2531 }, { "epoch": 0.4158232915238232, "grad_norm": 0.2937937843776762, "learning_rate": 8.878806042345693e-06, "loss": 0.5608, "step": 2532 }, { "epoch": 0.41598751873216594, "grad_norm": 0.3343905076548578, "learning_rate": 8.878697911930959e-06, "loss": 0.5666, "step": 2533 }, { "epoch": 0.4161517459405087, "grad_norm": 0.37676843413898936, "learning_rate": 8.878589733959256e-06, "loss": 0.554, "step": 2534 }, { "epoch": 0.41631597314885144, "grad_norm": 0.3158506997271894, "learning_rate": 8.878481508431762e-06, "loss": 0.56, "step": 2535 }, { "epoch": 0.4164802003571942, "grad_norm": 0.31815058752542835, "learning_rate": 8.87837323534965e-06, "loss": 0.5791, "step": 2536 }, { "epoch": 0.41664442756553693, "grad_norm": 0.4371060424111749, "learning_rate": 8.878264914714098e-06, "loss": 0.5815, "step": 2537 }, { "epoch": 0.4168086547738797, "grad_norm": 0.2983880649857614, "learning_rate": 8.878156546526282e-06, "loss": 0.5604, "step": 2538 }, { "epoch": 0.41697288198222243, "grad_norm": 0.34073001572633294, "learning_rate": 8.878048130787376e-06, "loss": 0.5813, "step": 2539 }, { "epoch": 0.4171371091905651, "grad_norm": 0.3624505324000262, "learning_rate": 8.877939667498561e-06, "loss": 0.551, "step": 2540 }, { "epoch": 0.4173013363989079, "grad_norm": 0.3038346714696093, "learning_rate": 8.877831156661015e-06, "loss": 0.5392, "step": 2541 }, { "epoch": 0.4174655636072506, "grad_norm": 0.3447650018068754, "learning_rate": 8.877722598275915e-06, "loss": 0.5765, "step": 2542 }, { "epoch": 0.41762979081559337, "grad_norm": 0.30392547582003404, "learning_rate": 8.87761399234444e-06, "loss": 0.5616, "step": 2543 }, { "epoch": 0.4177940180239361, "grad_norm": 0.3887953041088847, "learning_rate": 8.87750533886777e-06, "loss": 0.5483, "step": 2544 }, { "epoch": 0.41795824523227887, "grad_norm": 0.3289178164106548, "learning_rate": 8.877396637847085e-06, "loss": 0.5337, "step": 2545 }, { "epoch": 0.4181224724406216, "grad_norm": 0.332748619678136, "learning_rate": 8.877287889283566e-06, "loss": 0.5682, "step": 2546 }, { "epoch": 0.41828669964896437, "grad_norm": 0.3044178943644956, "learning_rate": 8.877179093178394e-06, "loss": 0.5424, "step": 2547 }, { "epoch": 0.41845092685730706, "grad_norm": 0.3173245691997307, "learning_rate": 8.87707024953275e-06, "loss": 0.5936, "step": 2548 }, { "epoch": 0.4186151540656498, "grad_norm": 0.265613088884184, "learning_rate": 8.876961358347819e-06, "loss": 0.561, "step": 2549 }, { "epoch": 0.41877938127399256, "grad_norm": 0.3146875818714378, "learning_rate": 8.876852419624777e-06, "loss": 0.5787, "step": 2550 }, { "epoch": 0.4189436084823353, "grad_norm": 0.37599575888562486, "learning_rate": 8.876743433364814e-06, "loss": 0.5568, "step": 2551 }, { "epoch": 0.41910783569067805, "grad_norm": 0.3598518156246321, "learning_rate": 8.876634399569111e-06, "loss": 0.5856, "step": 2552 }, { "epoch": 0.4192720628990208, "grad_norm": 0.30311498096956535, "learning_rate": 8.876525318238852e-06, "loss": 0.5876, "step": 2553 }, { "epoch": 0.41943629010736355, "grad_norm": 0.3171555277490769, "learning_rate": 8.876416189375222e-06, "loss": 0.5866, "step": 2554 }, { "epoch": 0.4196005173157063, "grad_norm": 0.3690366608054597, "learning_rate": 8.876307012979409e-06, "loss": 0.5691, "step": 2555 }, { "epoch": 0.41976474452404905, "grad_norm": 0.2956348303710773, "learning_rate": 8.876197789052593e-06, "loss": 0.5774, "step": 2556 }, { "epoch": 0.41992897173239174, "grad_norm": 0.30313111772520873, "learning_rate": 8.876088517595964e-06, "loss": 0.5626, "step": 2557 }, { "epoch": 0.4200931989407345, "grad_norm": 0.29748543393772187, "learning_rate": 8.875979198610709e-06, "loss": 0.585, "step": 2558 }, { "epoch": 0.42025742614907724, "grad_norm": 0.35983239485183327, "learning_rate": 8.875869832098014e-06, "loss": 0.5698, "step": 2559 }, { "epoch": 0.42042165335742, "grad_norm": 0.2952903149801761, "learning_rate": 8.875760418059067e-06, "loss": 0.5602, "step": 2560 }, { "epoch": 0.42058588056576274, "grad_norm": 0.2748700755222291, "learning_rate": 8.875650956495058e-06, "loss": 0.5571, "step": 2561 }, { "epoch": 0.4207501077741055, "grad_norm": 0.30142460059542353, "learning_rate": 8.875541447407174e-06, "loss": 0.5815, "step": 2562 }, { "epoch": 0.42091433498244823, "grad_norm": 0.30208405047227427, "learning_rate": 8.875431890796603e-06, "loss": 0.5731, "step": 2563 }, { "epoch": 0.421078562190791, "grad_norm": 0.3118637667702491, "learning_rate": 8.875322286664538e-06, "loss": 0.5474, "step": 2564 }, { "epoch": 0.4212427893991337, "grad_norm": 0.317291042191685, "learning_rate": 8.87521263501217e-06, "loss": 0.5446, "step": 2565 }, { "epoch": 0.4214070166074764, "grad_norm": 0.35441933886963456, "learning_rate": 8.875102935840687e-06, "loss": 0.5516, "step": 2566 }, { "epoch": 0.4215712438158192, "grad_norm": 0.3204356969177786, "learning_rate": 8.874993189151281e-06, "loss": 0.5661, "step": 2567 }, { "epoch": 0.4217354710241619, "grad_norm": 0.30987334724965443, "learning_rate": 8.874883394945145e-06, "loss": 0.5629, "step": 2568 }, { "epoch": 0.42189969823250467, "grad_norm": 0.32477041715743676, "learning_rate": 8.87477355322347e-06, "loss": 0.5627, "step": 2569 }, { "epoch": 0.4220639254408474, "grad_norm": 0.2954548449250752, "learning_rate": 8.874663663987452e-06, "loss": 0.5503, "step": 2570 }, { "epoch": 0.42222815264919017, "grad_norm": 0.3245662283540012, "learning_rate": 8.874553727238281e-06, "loss": 0.5747, "step": 2571 }, { "epoch": 0.4223923798575329, "grad_norm": 0.3987350454378436, "learning_rate": 8.874443742977154e-06, "loss": 0.5561, "step": 2572 }, { "epoch": 0.4225566070658756, "grad_norm": 0.3345985015745256, "learning_rate": 8.874333711205264e-06, "loss": 0.5555, "step": 2573 }, { "epoch": 0.42272083427421836, "grad_norm": 0.28819563482433996, "learning_rate": 8.874223631923804e-06, "loss": 0.5392, "step": 2574 }, { "epoch": 0.4228850614825611, "grad_norm": 0.3202474861777573, "learning_rate": 8.874113505133974e-06, "loss": 0.565, "step": 2575 }, { "epoch": 0.42304928869090386, "grad_norm": 0.2790382154947942, "learning_rate": 8.874003330836966e-06, "loss": 0.5766, "step": 2576 }, { "epoch": 0.4232135158992466, "grad_norm": 0.44535350715320376, "learning_rate": 8.87389310903398e-06, "loss": 0.5744, "step": 2577 }, { "epoch": 0.42337774310758935, "grad_norm": 0.3184028856113947, "learning_rate": 8.87378283972621e-06, "loss": 0.5816, "step": 2578 }, { "epoch": 0.4235419703159321, "grad_norm": 0.2922792339457821, "learning_rate": 8.873672522914856e-06, "loss": 0.5509, "step": 2579 }, { "epoch": 0.42370619752427485, "grad_norm": 0.454554563345644, "learning_rate": 8.873562158601116e-06, "loss": 0.5594, "step": 2580 }, { "epoch": 0.4238704247326176, "grad_norm": 0.34907028522108435, "learning_rate": 8.873451746786186e-06, "loss": 0.5655, "step": 2581 }, { "epoch": 0.4240346519409603, "grad_norm": 0.3298719872719342, "learning_rate": 8.873341287471269e-06, "loss": 0.5629, "step": 2582 }, { "epoch": 0.42419887914930304, "grad_norm": 0.3389124908725632, "learning_rate": 8.873230780657562e-06, "loss": 0.5878, "step": 2583 }, { "epoch": 0.4243631063576458, "grad_norm": 0.37188737857676285, "learning_rate": 8.873120226346266e-06, "loss": 0.5831, "step": 2584 }, { "epoch": 0.42452733356598854, "grad_norm": 0.34890335602420697, "learning_rate": 8.873009624538582e-06, "loss": 0.5722, "step": 2585 }, { "epoch": 0.4246915607743313, "grad_norm": 0.328354898484246, "learning_rate": 8.872898975235711e-06, "loss": 0.5569, "step": 2586 }, { "epoch": 0.42485578798267404, "grad_norm": 0.312327677555695, "learning_rate": 8.872788278438854e-06, "loss": 0.5674, "step": 2587 }, { "epoch": 0.4250200151910168, "grad_norm": 0.32296344711606056, "learning_rate": 8.872677534149215e-06, "loss": 0.559, "step": 2588 }, { "epoch": 0.42518424239935954, "grad_norm": 0.36315755635387575, "learning_rate": 8.872566742367995e-06, "loss": 0.5895, "step": 2589 }, { "epoch": 0.42534846960770223, "grad_norm": 0.33097917263767107, "learning_rate": 8.8724559030964e-06, "loss": 0.578, "step": 2590 }, { "epoch": 0.425512696816045, "grad_norm": 0.315094597471828, "learning_rate": 8.87234501633563e-06, "loss": 0.5504, "step": 2591 }, { "epoch": 0.4256769240243877, "grad_norm": 0.45635596872249246, "learning_rate": 8.87223408208689e-06, "loss": 0.5482, "step": 2592 }, { "epoch": 0.4258411512327305, "grad_norm": 0.4193262239236399, "learning_rate": 8.872123100351389e-06, "loss": 0.5688, "step": 2593 }, { "epoch": 0.4260053784410732, "grad_norm": 0.4273899125136232, "learning_rate": 8.87201207113033e-06, "loss": 0.5796, "step": 2594 }, { "epoch": 0.426169605649416, "grad_norm": 0.3917537813124731, "learning_rate": 8.871900994424917e-06, "loss": 0.5666, "step": 2595 }, { "epoch": 0.4263338328577587, "grad_norm": 0.3335513432900561, "learning_rate": 8.871789870236358e-06, "loss": 0.5536, "step": 2596 }, { "epoch": 0.42649806006610147, "grad_norm": 0.3463487402113218, "learning_rate": 8.87167869856586e-06, "loss": 0.5663, "step": 2597 }, { "epoch": 0.4266622872744442, "grad_norm": 0.3872490703111275, "learning_rate": 8.87156747941463e-06, "loss": 0.5594, "step": 2598 }, { "epoch": 0.4268265144827869, "grad_norm": 0.31074493917689083, "learning_rate": 8.871456212783874e-06, "loss": 0.5922, "step": 2599 }, { "epoch": 0.42699074169112966, "grad_norm": 0.31115722805904655, "learning_rate": 8.871344898674806e-06, "loss": 0.5681, "step": 2600 }, { "epoch": 0.4271549688994724, "grad_norm": 0.30377485723430814, "learning_rate": 8.87123353708863e-06, "loss": 0.5751, "step": 2601 }, { "epoch": 0.42731919610781516, "grad_norm": 0.3443330184625631, "learning_rate": 8.871122128026559e-06, "loss": 0.5477, "step": 2602 }, { "epoch": 0.4274834233161579, "grad_norm": 0.3160677382382284, "learning_rate": 8.871010671489798e-06, "loss": 0.551, "step": 2603 }, { "epoch": 0.42764765052450066, "grad_norm": 0.3089935106670285, "learning_rate": 8.870899167479561e-06, "loss": 0.5573, "step": 2604 }, { "epoch": 0.4278118777328434, "grad_norm": 0.29792130232279923, "learning_rate": 8.87078761599706e-06, "loss": 0.5449, "step": 2605 }, { "epoch": 0.42797610494118615, "grad_norm": 0.3416750043491599, "learning_rate": 8.870676017043506e-06, "loss": 0.5558, "step": 2606 }, { "epoch": 0.42814033214952885, "grad_norm": 0.3137377292747842, "learning_rate": 8.870564370620109e-06, "loss": 0.5785, "step": 2607 }, { "epoch": 0.4283045593578716, "grad_norm": 0.337439787190629, "learning_rate": 8.870452676728082e-06, "loss": 0.5636, "step": 2608 }, { "epoch": 0.42846878656621434, "grad_norm": 0.3449765466085809, "learning_rate": 8.870340935368641e-06, "loss": 0.5613, "step": 2609 }, { "epoch": 0.4286330137745571, "grad_norm": 0.3239998293778881, "learning_rate": 8.870229146542996e-06, "loss": 0.5427, "step": 2610 }, { "epoch": 0.42879724098289984, "grad_norm": 0.3629879882971418, "learning_rate": 8.870117310252364e-06, "loss": 0.5485, "step": 2611 }, { "epoch": 0.4289614681912426, "grad_norm": 0.322609355069675, "learning_rate": 8.870005426497957e-06, "loss": 0.5946, "step": 2612 }, { "epoch": 0.42912569539958534, "grad_norm": 0.2760721542583468, "learning_rate": 8.869893495280993e-06, "loss": 0.545, "step": 2613 }, { "epoch": 0.4292899226079281, "grad_norm": 0.377724350549196, "learning_rate": 8.869781516602686e-06, "loss": 0.5727, "step": 2614 }, { "epoch": 0.42945414981627084, "grad_norm": 0.3233053561286611, "learning_rate": 8.869669490464253e-06, "loss": 0.5629, "step": 2615 }, { "epoch": 0.42961837702461353, "grad_norm": 0.35088644192144613, "learning_rate": 8.869557416866907e-06, "loss": 0.561, "step": 2616 }, { "epoch": 0.4297826042329563, "grad_norm": 0.3447050844031401, "learning_rate": 8.86944529581187e-06, "loss": 0.572, "step": 2617 }, { "epoch": 0.429946831441299, "grad_norm": 0.34469094004572337, "learning_rate": 8.86933312730036e-06, "loss": 0.5506, "step": 2618 }, { "epoch": 0.4301110586496418, "grad_norm": 0.327264522482107, "learning_rate": 8.869220911333591e-06, "loss": 0.5374, "step": 2619 }, { "epoch": 0.4302752858579845, "grad_norm": 0.32108065910291667, "learning_rate": 8.869108647912786e-06, "loss": 0.5562, "step": 2620 }, { "epoch": 0.4304395130663273, "grad_norm": 0.3075435330753905, "learning_rate": 8.868996337039163e-06, "loss": 0.5655, "step": 2621 }, { "epoch": 0.43060374027467, "grad_norm": 0.34795979712051084, "learning_rate": 8.868883978713939e-06, "loss": 0.5661, "step": 2622 }, { "epoch": 0.43076796748301277, "grad_norm": 0.36050340443618495, "learning_rate": 8.868771572938337e-06, "loss": 0.5624, "step": 2623 }, { "epoch": 0.43093219469135546, "grad_norm": 0.3918416774697456, "learning_rate": 8.868659119713579e-06, "loss": 0.5749, "step": 2624 }, { "epoch": 0.4310964218996982, "grad_norm": 0.3457358807465997, "learning_rate": 8.868546619040884e-06, "loss": 0.5643, "step": 2625 }, { "epoch": 0.43126064910804096, "grad_norm": 0.39287093615546537, "learning_rate": 8.868434070921473e-06, "loss": 0.5588, "step": 2626 }, { "epoch": 0.4314248763163837, "grad_norm": 0.33227836409954387, "learning_rate": 8.868321475356572e-06, "loss": 0.531, "step": 2627 }, { "epoch": 0.43158910352472646, "grad_norm": 0.30996877615907675, "learning_rate": 8.868208832347401e-06, "loss": 0.54, "step": 2628 }, { "epoch": 0.4317533307330692, "grad_norm": 0.29705507963891703, "learning_rate": 8.868096141895186e-06, "loss": 0.5647, "step": 2629 }, { "epoch": 0.43191755794141196, "grad_norm": 0.3145590224011055, "learning_rate": 8.867983404001147e-06, "loss": 0.575, "step": 2630 }, { "epoch": 0.4320817851497547, "grad_norm": 0.2984999228772706, "learning_rate": 8.867870618666512e-06, "loss": 0.564, "step": 2631 }, { "epoch": 0.43224601235809745, "grad_norm": 0.3948551709555983, "learning_rate": 8.867757785892506e-06, "loss": 0.56, "step": 2632 }, { "epoch": 0.43241023956644015, "grad_norm": 0.313192693394057, "learning_rate": 8.86764490568035e-06, "loss": 0.5537, "step": 2633 }, { "epoch": 0.4325744667747829, "grad_norm": 0.4653518718560589, "learning_rate": 8.867531978031276e-06, "loss": 0.5714, "step": 2634 }, { "epoch": 0.43273869398312564, "grad_norm": 0.3742502734817775, "learning_rate": 8.867419002946505e-06, "loss": 0.5631, "step": 2635 }, { "epoch": 0.4329029211914684, "grad_norm": 0.3232307295194687, "learning_rate": 8.867305980427268e-06, "loss": 0.5529, "step": 2636 }, { "epoch": 0.43306714839981114, "grad_norm": 0.2807991745055016, "learning_rate": 8.867192910474792e-06, "loss": 0.5406, "step": 2637 }, { "epoch": 0.4332313756081539, "grad_norm": 0.297296622670886, "learning_rate": 8.867079793090304e-06, "loss": 0.5506, "step": 2638 }, { "epoch": 0.43339560281649664, "grad_norm": 0.3542755078188722, "learning_rate": 8.866966628275032e-06, "loss": 0.5684, "step": 2639 }, { "epoch": 0.4335598300248394, "grad_norm": 0.2744566073730852, "learning_rate": 8.866853416030206e-06, "loss": 0.5396, "step": 2640 }, { "epoch": 0.4337240572331821, "grad_norm": 0.2959244532919851, "learning_rate": 8.866740156357056e-06, "loss": 0.5702, "step": 2641 }, { "epoch": 0.43388828444152483, "grad_norm": 0.3268923575661857, "learning_rate": 8.866626849256812e-06, "loss": 0.5568, "step": 2642 }, { "epoch": 0.4340525116498676, "grad_norm": 0.30432906161931816, "learning_rate": 8.866513494730702e-06, "loss": 0.54, "step": 2643 }, { "epoch": 0.4342167388582103, "grad_norm": 0.33743946703434036, "learning_rate": 8.866400092779963e-06, "loss": 0.5553, "step": 2644 }, { "epoch": 0.4343809660665531, "grad_norm": 0.2873723233387204, "learning_rate": 8.866286643405819e-06, "loss": 0.5603, "step": 2645 }, { "epoch": 0.4345451932748958, "grad_norm": 0.4160177869919669, "learning_rate": 8.866173146609509e-06, "loss": 0.5581, "step": 2646 }, { "epoch": 0.4347094204832386, "grad_norm": 0.3620215325482711, "learning_rate": 8.866059602392262e-06, "loss": 0.5538, "step": 2647 }, { "epoch": 0.4348736476915813, "grad_norm": 0.34398345578712186, "learning_rate": 8.865946010755313e-06, "loss": 0.5617, "step": 2648 }, { "epoch": 0.43503787489992407, "grad_norm": 0.4050944944383113, "learning_rate": 8.865832371699894e-06, "loss": 0.5628, "step": 2649 }, { "epoch": 0.43520210210826676, "grad_norm": 0.34906966357449254, "learning_rate": 8.86571868522724e-06, "loss": 0.5485, "step": 2650 }, { "epoch": 0.4353663293166095, "grad_norm": 0.2968598963085859, "learning_rate": 8.865604951338585e-06, "loss": 0.5592, "step": 2651 }, { "epoch": 0.43553055652495226, "grad_norm": 0.2987516697302997, "learning_rate": 8.865491170035166e-06, "loss": 0.5615, "step": 2652 }, { "epoch": 0.435694783733295, "grad_norm": 0.3500301965790752, "learning_rate": 8.865377341318218e-06, "loss": 0.5892, "step": 2653 }, { "epoch": 0.43585901094163776, "grad_norm": 0.3929052238635463, "learning_rate": 8.865263465188977e-06, "loss": 0.5495, "step": 2654 }, { "epoch": 0.4360232381499805, "grad_norm": 0.3113448223634506, "learning_rate": 8.865149541648679e-06, "loss": 0.5663, "step": 2655 }, { "epoch": 0.43618746535832326, "grad_norm": 0.31364147135723414, "learning_rate": 8.865035570698563e-06, "loss": 0.5871, "step": 2656 }, { "epoch": 0.436351692566666, "grad_norm": 0.31789727183776045, "learning_rate": 8.864921552339866e-06, "loss": 0.5898, "step": 2657 }, { "epoch": 0.4365159197750087, "grad_norm": 0.2991212298735117, "learning_rate": 8.864807486573827e-06, "loss": 0.5535, "step": 2658 }, { "epoch": 0.43668014698335145, "grad_norm": 0.31462832495510007, "learning_rate": 8.864693373401684e-06, "loss": 0.5674, "step": 2659 }, { "epoch": 0.4368443741916942, "grad_norm": 0.3690385907683935, "learning_rate": 8.864579212824676e-06, "loss": 0.5641, "step": 2660 }, { "epoch": 0.43700860140003694, "grad_norm": 0.30516447961861315, "learning_rate": 8.864465004844045e-06, "loss": 0.5619, "step": 2661 }, { "epoch": 0.4371728286083797, "grad_norm": 0.32974499017869374, "learning_rate": 8.864350749461027e-06, "loss": 0.5534, "step": 2662 }, { "epoch": 0.43733705581672244, "grad_norm": 0.5630108302746223, "learning_rate": 8.864236446676871e-06, "loss": 0.5881, "step": 2663 }, { "epoch": 0.4375012830250652, "grad_norm": 0.33965305823725117, "learning_rate": 8.864122096492808e-06, "loss": 0.5615, "step": 2664 }, { "epoch": 0.43766551023340794, "grad_norm": 0.2980421255968811, "learning_rate": 8.86400769891009e-06, "loss": 0.5415, "step": 2665 }, { "epoch": 0.4378297374417507, "grad_norm": 0.33674308958283006, "learning_rate": 8.863893253929951e-06, "loss": 0.5721, "step": 2666 }, { "epoch": 0.4379939646500934, "grad_norm": 0.37123811762089753, "learning_rate": 8.86377876155364e-06, "loss": 0.5859, "step": 2667 }, { "epoch": 0.43815819185843613, "grad_norm": 0.3360935049247868, "learning_rate": 8.863664221782397e-06, "loss": 0.5466, "step": 2668 }, { "epoch": 0.4383224190667789, "grad_norm": 0.3194927447043426, "learning_rate": 8.863549634617467e-06, "loss": 0.5725, "step": 2669 }, { "epoch": 0.4384866462751216, "grad_norm": 0.31690358845503236, "learning_rate": 8.863435000060097e-06, "loss": 0.5396, "step": 2670 }, { "epoch": 0.4386508734834644, "grad_norm": 0.3191798894096308, "learning_rate": 8.863320318111528e-06, "loss": 0.5577, "step": 2671 }, { "epoch": 0.4388151006918071, "grad_norm": 0.4654210766977454, "learning_rate": 8.863205588773007e-06, "loss": 0.5724, "step": 2672 }, { "epoch": 0.4389793279001499, "grad_norm": 0.3422961348923824, "learning_rate": 8.863090812045783e-06, "loss": 0.5847, "step": 2673 }, { "epoch": 0.4391435551084926, "grad_norm": 0.3121440943186785, "learning_rate": 8.862975987931097e-06, "loss": 0.5774, "step": 2674 }, { "epoch": 0.4393077823168353, "grad_norm": 0.3614058733108181, "learning_rate": 8.8628611164302e-06, "loss": 0.5538, "step": 2675 }, { "epoch": 0.43947200952517806, "grad_norm": 0.3305418031431199, "learning_rate": 8.862746197544341e-06, "loss": 0.5522, "step": 2676 }, { "epoch": 0.4396362367335208, "grad_norm": 0.31728827829254425, "learning_rate": 8.862631231274764e-06, "loss": 0.5637, "step": 2677 }, { "epoch": 0.43980046394186356, "grad_norm": 0.2984532306242673, "learning_rate": 8.862516217622721e-06, "loss": 0.5741, "step": 2678 }, { "epoch": 0.4399646911502063, "grad_norm": 0.30044448216886954, "learning_rate": 8.862401156589457e-06, "loss": 0.5649, "step": 2679 }, { "epoch": 0.44012891835854906, "grad_norm": 0.33355290409354743, "learning_rate": 8.862286048176227e-06, "loss": 0.5435, "step": 2680 }, { "epoch": 0.4402931455668918, "grad_norm": 0.305282881254181, "learning_rate": 8.862170892384278e-06, "loss": 0.5473, "step": 2681 }, { "epoch": 0.44045737277523456, "grad_norm": 0.31605141038354306, "learning_rate": 8.86205568921486e-06, "loss": 0.5179, "step": 2682 }, { "epoch": 0.4406215999835773, "grad_norm": 0.36881933440627185, "learning_rate": 8.861940438669227e-06, "loss": 0.591, "step": 2683 }, { "epoch": 0.44078582719192, "grad_norm": 0.3375529244437988, "learning_rate": 8.86182514074863e-06, "loss": 0.5791, "step": 2684 }, { "epoch": 0.44095005440026275, "grad_norm": 0.30314726667864034, "learning_rate": 8.861709795454319e-06, "loss": 0.5557, "step": 2685 }, { "epoch": 0.4411142816086055, "grad_norm": 0.3364679818060467, "learning_rate": 8.86159440278755e-06, "loss": 0.5733, "step": 2686 }, { "epoch": 0.44127850881694825, "grad_norm": 0.39220116051738163, "learning_rate": 8.861478962749572e-06, "loss": 0.5579, "step": 2687 }, { "epoch": 0.441442736025291, "grad_norm": 0.34252772123554887, "learning_rate": 8.861363475341642e-06, "loss": 0.5544, "step": 2688 }, { "epoch": 0.44160696323363374, "grad_norm": 0.33108150991015783, "learning_rate": 8.861247940565015e-06, "loss": 0.5761, "step": 2689 }, { "epoch": 0.4417711904419765, "grad_norm": 0.32834599672177206, "learning_rate": 8.861132358420943e-06, "loss": 0.5662, "step": 2690 }, { "epoch": 0.44193541765031924, "grad_norm": 0.32244811454221667, "learning_rate": 8.861016728910683e-06, "loss": 0.5671, "step": 2691 }, { "epoch": 0.44209964485866193, "grad_norm": 0.3597249482708189, "learning_rate": 8.860901052035492e-06, "loss": 0.5613, "step": 2692 }, { "epoch": 0.4422638720670047, "grad_norm": 0.37744052906600656, "learning_rate": 8.860785327796625e-06, "loss": 0.553, "step": 2693 }, { "epoch": 0.44242809927534743, "grad_norm": 0.3052348493632045, "learning_rate": 8.860669556195338e-06, "loss": 0.5457, "step": 2694 }, { "epoch": 0.4425923264836902, "grad_norm": 0.30573461825823206, "learning_rate": 8.860553737232889e-06, "loss": 0.5613, "step": 2695 }, { "epoch": 0.44275655369203293, "grad_norm": 0.3007325457721176, "learning_rate": 8.860437870910537e-06, "loss": 0.5534, "step": 2696 }, { "epoch": 0.4429207809003757, "grad_norm": 0.2869011044903269, "learning_rate": 8.86032195722954e-06, "loss": 0.5542, "step": 2697 }, { "epoch": 0.4430850081087184, "grad_norm": 0.5107075695902422, "learning_rate": 8.860205996191155e-06, "loss": 0.5081, "step": 2698 }, { "epoch": 0.4432492353170612, "grad_norm": 0.33253967123335504, "learning_rate": 8.860089987796643e-06, "loss": 0.5554, "step": 2699 }, { "epoch": 0.4434134625254039, "grad_norm": 0.42343864420968474, "learning_rate": 8.859973932047267e-06, "loss": 0.5545, "step": 2700 }, { "epoch": 0.4435776897337466, "grad_norm": 0.30685102611881854, "learning_rate": 8.85985782894428e-06, "loss": 0.5412, "step": 2701 }, { "epoch": 0.44374191694208936, "grad_norm": 0.3169560527008105, "learning_rate": 8.85974167848895e-06, "loss": 0.5784, "step": 2702 }, { "epoch": 0.4439061441504321, "grad_norm": 0.3084651380614442, "learning_rate": 8.859625480682535e-06, "loss": 0.5317, "step": 2703 }, { "epoch": 0.44407037135877486, "grad_norm": 0.441293933174961, "learning_rate": 8.8595092355263e-06, "loss": 0.543, "step": 2704 }, { "epoch": 0.4442345985671176, "grad_norm": 0.46942179938637835, "learning_rate": 8.859392943021504e-06, "loss": 0.5627, "step": 2705 }, { "epoch": 0.44439882577546036, "grad_norm": 0.293267545918469, "learning_rate": 8.859276603169412e-06, "loss": 0.5479, "step": 2706 }, { "epoch": 0.4445630529838031, "grad_norm": 0.45510996453392444, "learning_rate": 8.859160215971286e-06, "loss": 0.5778, "step": 2707 }, { "epoch": 0.44472728019214586, "grad_norm": 0.32926696538846945, "learning_rate": 8.859043781428393e-06, "loss": 0.5417, "step": 2708 }, { "epoch": 0.44489150740048855, "grad_norm": 0.33787931129775456, "learning_rate": 8.858927299541995e-06, "loss": 0.572, "step": 2709 }, { "epoch": 0.4450557346088313, "grad_norm": 0.40369557531155686, "learning_rate": 8.858810770313358e-06, "loss": 0.5943, "step": 2710 }, { "epoch": 0.44521996181717405, "grad_norm": 0.33877275852459676, "learning_rate": 8.858694193743747e-06, "loss": 0.5494, "step": 2711 }, { "epoch": 0.4453841890255168, "grad_norm": 0.3597032289417445, "learning_rate": 8.85857756983443e-06, "loss": 0.5811, "step": 2712 }, { "epoch": 0.44554841623385955, "grad_norm": 0.34694433353018983, "learning_rate": 8.858460898586671e-06, "loss": 0.5472, "step": 2713 }, { "epoch": 0.4457126434422023, "grad_norm": 0.3595141962113896, "learning_rate": 8.858344180001738e-06, "loss": 0.5738, "step": 2714 }, { "epoch": 0.44587687065054504, "grad_norm": 0.31463064675320646, "learning_rate": 8.8582274140809e-06, "loss": 0.5648, "step": 2715 }, { "epoch": 0.4460410978588878, "grad_norm": 0.34440317093930706, "learning_rate": 8.858110600825425e-06, "loss": 0.5654, "step": 2716 }, { "epoch": 0.44620532506723054, "grad_norm": 0.31860356038557297, "learning_rate": 8.857993740236582e-06, "loss": 0.5553, "step": 2717 }, { "epoch": 0.44636955227557323, "grad_norm": 0.3105546506846929, "learning_rate": 8.857876832315636e-06, "loss": 0.5647, "step": 2718 }, { "epoch": 0.446533779483916, "grad_norm": 0.33831142671969106, "learning_rate": 8.857759877063863e-06, "loss": 0.5479, "step": 2719 }, { "epoch": 0.44669800669225873, "grad_norm": 0.3358614278897194, "learning_rate": 8.857642874482528e-06, "loss": 0.5792, "step": 2720 }, { "epoch": 0.4468622339006015, "grad_norm": 1.239182763799466, "learning_rate": 8.857525824572906e-06, "loss": 0.5551, "step": 2721 }, { "epoch": 0.44702646110894423, "grad_norm": 0.41414189926956013, "learning_rate": 8.857408727336265e-06, "loss": 0.5596, "step": 2722 }, { "epoch": 0.447190688317287, "grad_norm": 0.3364069633037803, "learning_rate": 8.857291582773878e-06, "loss": 0.5507, "step": 2723 }, { "epoch": 0.4473549155256297, "grad_norm": 0.32342124716147996, "learning_rate": 8.857174390887019e-06, "loss": 0.5608, "step": 2724 }, { "epoch": 0.4475191427339725, "grad_norm": 0.34676827689048356, "learning_rate": 8.85705715167696e-06, "loss": 0.5582, "step": 2725 }, { "epoch": 0.44768336994231517, "grad_norm": 0.35153982996698413, "learning_rate": 8.856939865144971e-06, "loss": 0.5515, "step": 2726 }, { "epoch": 0.4478475971506579, "grad_norm": 0.2938845084834155, "learning_rate": 8.856822531292329e-06, "loss": 0.5429, "step": 2727 }, { "epoch": 0.44801182435900067, "grad_norm": 0.2910178771667892, "learning_rate": 8.856705150120308e-06, "loss": 0.5645, "step": 2728 }, { "epoch": 0.4481760515673434, "grad_norm": 0.2918827196955868, "learning_rate": 8.856587721630182e-06, "loss": 0.5451, "step": 2729 }, { "epoch": 0.44834027877568616, "grad_norm": 0.3394189888352664, "learning_rate": 8.856470245823227e-06, "loss": 0.5605, "step": 2730 }, { "epoch": 0.4485045059840289, "grad_norm": 0.33940159120607644, "learning_rate": 8.85635272270072e-06, "loss": 0.5491, "step": 2731 }, { "epoch": 0.44866873319237166, "grad_norm": 0.36680822587198164, "learning_rate": 8.856235152263938e-06, "loss": 0.5681, "step": 2732 }, { "epoch": 0.4488329604007144, "grad_norm": 0.3334810084593633, "learning_rate": 8.856117534514154e-06, "loss": 0.5606, "step": 2733 }, { "epoch": 0.44899718760905716, "grad_norm": 0.29740348475239803, "learning_rate": 8.855999869452647e-06, "loss": 0.5647, "step": 2734 }, { "epoch": 0.44916141481739985, "grad_norm": 0.32512754587660186, "learning_rate": 8.855882157080697e-06, "loss": 0.5652, "step": 2735 }, { "epoch": 0.4493256420257426, "grad_norm": 0.3561551765070881, "learning_rate": 8.85576439739958e-06, "loss": 0.5773, "step": 2736 }, { "epoch": 0.44948986923408535, "grad_norm": 0.3304245407015048, "learning_rate": 8.855646590410578e-06, "loss": 0.5666, "step": 2737 }, { "epoch": 0.4496540964424281, "grad_norm": 0.3332153957003678, "learning_rate": 8.855528736114969e-06, "loss": 0.5686, "step": 2738 }, { "epoch": 0.44981832365077085, "grad_norm": 0.3014454733597397, "learning_rate": 8.85541083451403e-06, "loss": 0.5547, "step": 2739 }, { "epoch": 0.4499825508591136, "grad_norm": 0.3068041232696349, "learning_rate": 8.855292885609045e-06, "loss": 0.5625, "step": 2740 }, { "epoch": 0.45014677806745634, "grad_norm": 0.3670493909898495, "learning_rate": 8.855174889401295e-06, "loss": 0.5724, "step": 2741 }, { "epoch": 0.4503110052757991, "grad_norm": 0.3600172039749998, "learning_rate": 8.85505684589206e-06, "loss": 0.552, "step": 2742 }, { "epoch": 0.4504752324841418, "grad_norm": 0.34510224190707856, "learning_rate": 8.854938755082624e-06, "loss": 0.5542, "step": 2743 }, { "epoch": 0.45063945969248453, "grad_norm": 0.3354210595356994, "learning_rate": 8.854820616974267e-06, "loss": 0.5706, "step": 2744 }, { "epoch": 0.4508036869008273, "grad_norm": 0.27484837623978225, "learning_rate": 8.854702431568276e-06, "loss": 0.5536, "step": 2745 }, { "epoch": 0.45096791410917003, "grad_norm": 0.2772875379629504, "learning_rate": 8.85458419886593e-06, "loss": 0.5519, "step": 2746 }, { "epoch": 0.4511321413175128, "grad_norm": 0.36745553176616597, "learning_rate": 8.854465918868516e-06, "loss": 0.5691, "step": 2747 }, { "epoch": 0.45129636852585553, "grad_norm": 0.9592406829660379, "learning_rate": 8.854347591577319e-06, "loss": 0.5453, "step": 2748 }, { "epoch": 0.4514605957341983, "grad_norm": 0.37135239233609174, "learning_rate": 8.85422921699362e-06, "loss": 0.5541, "step": 2749 }, { "epoch": 0.451624822942541, "grad_norm": 0.3242742529723335, "learning_rate": 8.85411079511871e-06, "loss": 0.5644, "step": 2750 }, { "epoch": 0.4517890501508838, "grad_norm": 0.323580754011984, "learning_rate": 8.853992325953872e-06, "loss": 0.5593, "step": 2751 }, { "epoch": 0.45195327735922647, "grad_norm": 0.3877844317699021, "learning_rate": 8.853873809500395e-06, "loss": 0.5353, "step": 2752 }, { "epoch": 0.4521175045675692, "grad_norm": 0.32987127792086013, "learning_rate": 8.853755245759564e-06, "loss": 0.547, "step": 2753 }, { "epoch": 0.45228173177591197, "grad_norm": 0.3243082577170755, "learning_rate": 8.853636634732668e-06, "loss": 0.5534, "step": 2754 }, { "epoch": 0.4524459589842547, "grad_norm": 0.31835431916955587, "learning_rate": 8.853517976420993e-06, "loss": 0.5437, "step": 2755 }, { "epoch": 0.45261018619259746, "grad_norm": 0.34976967541051596, "learning_rate": 8.85339927082583e-06, "loss": 0.5577, "step": 2756 }, { "epoch": 0.4527744134009402, "grad_norm": 0.2923139255600356, "learning_rate": 8.853280517948468e-06, "loss": 0.5569, "step": 2757 }, { "epoch": 0.45293864060928296, "grad_norm": 0.30552005384252057, "learning_rate": 8.853161717790197e-06, "loss": 0.5416, "step": 2758 }, { "epoch": 0.4531028678176257, "grad_norm": 0.3990802742968821, "learning_rate": 8.853042870352308e-06, "loss": 0.5465, "step": 2759 }, { "epoch": 0.4532670950259684, "grad_norm": 0.3461464555447581, "learning_rate": 8.852923975636089e-06, "loss": 0.5565, "step": 2760 }, { "epoch": 0.45343132223431115, "grad_norm": 0.4068062311659384, "learning_rate": 8.852805033642834e-06, "loss": 0.5293, "step": 2761 }, { "epoch": 0.4535955494426539, "grad_norm": 0.4050717095926476, "learning_rate": 8.852686044373831e-06, "loss": 0.5512, "step": 2762 }, { "epoch": 0.45375977665099665, "grad_norm": 0.3106886645730793, "learning_rate": 8.852567007830378e-06, "loss": 0.566, "step": 2763 }, { "epoch": 0.4539240038593394, "grad_norm": 0.3465009114201763, "learning_rate": 8.852447924013763e-06, "loss": 0.5429, "step": 2764 }, { "epoch": 0.45408823106768215, "grad_norm": 0.34100205206268885, "learning_rate": 8.852328792925284e-06, "loss": 0.5562, "step": 2765 }, { "epoch": 0.4542524582760249, "grad_norm": 0.3373659034184963, "learning_rate": 8.85220961456623e-06, "loss": 0.5508, "step": 2766 }, { "epoch": 0.45441668548436764, "grad_norm": 0.32662364480543243, "learning_rate": 8.852090388937899e-06, "loss": 0.5574, "step": 2767 }, { "epoch": 0.45458091269271034, "grad_norm": 0.29597270835282974, "learning_rate": 8.851971116041582e-06, "loss": 0.5619, "step": 2768 }, { "epoch": 0.4547451399010531, "grad_norm": 0.3269485670857866, "learning_rate": 8.85185179587858e-06, "loss": 0.5336, "step": 2769 }, { "epoch": 0.45490936710939583, "grad_norm": 0.3079376099727638, "learning_rate": 8.851732428450183e-06, "loss": 0.549, "step": 2770 }, { "epoch": 0.4550735943177386, "grad_norm": 0.3205362156914591, "learning_rate": 8.851613013757693e-06, "loss": 0.5441, "step": 2771 }, { "epoch": 0.45523782152608133, "grad_norm": 0.32782562576015545, "learning_rate": 8.851493551802403e-06, "loss": 0.5498, "step": 2772 }, { "epoch": 0.4554020487344241, "grad_norm": 0.36441285832394543, "learning_rate": 8.851374042585612e-06, "loss": 0.5473, "step": 2773 }, { "epoch": 0.45556627594276683, "grad_norm": 0.3225326006599982, "learning_rate": 8.851254486108616e-06, "loss": 0.5283, "step": 2774 }, { "epoch": 0.4557305031511096, "grad_norm": 0.29349747631366513, "learning_rate": 8.851134882372716e-06, "loss": 0.5582, "step": 2775 }, { "epoch": 0.4558947303594523, "grad_norm": 0.3107685467178685, "learning_rate": 8.851015231379211e-06, "loss": 0.5504, "step": 2776 }, { "epoch": 0.456058957567795, "grad_norm": 0.31413826669046374, "learning_rate": 8.8508955331294e-06, "loss": 0.5159, "step": 2777 }, { "epoch": 0.45622318477613777, "grad_norm": 0.2936011119768247, "learning_rate": 8.850775787624584e-06, "loss": 0.539, "step": 2778 }, { "epoch": 0.4563874119844805, "grad_norm": 0.3475480393438803, "learning_rate": 8.85065599486606e-06, "loss": 0.5642, "step": 2779 }, { "epoch": 0.45655163919282327, "grad_norm": 0.3098780378515171, "learning_rate": 8.850536154855132e-06, "loss": 0.5429, "step": 2780 }, { "epoch": 0.456715866401166, "grad_norm": 0.9929195452826118, "learning_rate": 8.850416267593102e-06, "loss": 0.5698, "step": 2781 }, { "epoch": 0.45688009360950876, "grad_norm": 0.3244883281188489, "learning_rate": 8.85029633308127e-06, "loss": 0.5742, "step": 2782 }, { "epoch": 0.4570443208178515, "grad_norm": 0.31113246989096616, "learning_rate": 8.85017635132094e-06, "loss": 0.5397, "step": 2783 }, { "epoch": 0.45720854802619426, "grad_norm": 0.31519864743587955, "learning_rate": 8.850056322313414e-06, "loss": 0.5792, "step": 2784 }, { "epoch": 0.45737277523453695, "grad_norm": 0.3145460315844349, "learning_rate": 8.849936246059998e-06, "loss": 0.5623, "step": 2785 }, { "epoch": 0.4575370024428797, "grad_norm": 0.38866288927128867, "learning_rate": 8.849816122561993e-06, "loss": 0.5444, "step": 2786 }, { "epoch": 0.45770122965122245, "grad_norm": 0.32096724495160234, "learning_rate": 8.849695951820707e-06, "loss": 0.5496, "step": 2787 }, { "epoch": 0.4578654568595652, "grad_norm": 0.37573771878896706, "learning_rate": 8.849575733837444e-06, "loss": 0.5418, "step": 2788 }, { "epoch": 0.45802968406790795, "grad_norm": 0.27925464921147647, "learning_rate": 8.849455468613506e-06, "loss": 0.5458, "step": 2789 }, { "epoch": 0.4581939112762507, "grad_norm": 0.36654175597987393, "learning_rate": 8.849335156150205e-06, "loss": 0.5602, "step": 2790 }, { "epoch": 0.45835813848459345, "grad_norm": 0.3155475116274339, "learning_rate": 8.849214796448844e-06, "loss": 0.5824, "step": 2791 }, { "epoch": 0.4585223656929362, "grad_norm": 0.3428136888193705, "learning_rate": 8.84909438951073e-06, "loss": 0.569, "step": 2792 }, { "epoch": 0.45868659290127894, "grad_norm": 0.30583378806886213, "learning_rate": 8.848973935337174e-06, "loss": 0.5545, "step": 2793 }, { "epoch": 0.45885082010962164, "grad_norm": 0.33735566982609017, "learning_rate": 8.848853433929482e-06, "loss": 0.5572, "step": 2794 }, { "epoch": 0.4590150473179644, "grad_norm": 0.31361528788938636, "learning_rate": 8.848732885288963e-06, "loss": 0.5697, "step": 2795 }, { "epoch": 0.45917927452630714, "grad_norm": 0.34545031796156106, "learning_rate": 8.848612289416926e-06, "loss": 0.5642, "step": 2796 }, { "epoch": 0.4593435017346499, "grad_norm": 0.2874345855470007, "learning_rate": 8.84849164631468e-06, "loss": 0.5849, "step": 2797 }, { "epoch": 0.45950772894299263, "grad_norm": 0.28314439970383, "learning_rate": 8.848370955983539e-06, "loss": 0.5566, "step": 2798 }, { "epoch": 0.4596719561513354, "grad_norm": 0.29925600402316954, "learning_rate": 8.848250218424809e-06, "loss": 0.5598, "step": 2799 }, { "epoch": 0.45983618335967813, "grad_norm": 0.5002875320880814, "learning_rate": 8.848129433639803e-06, "loss": 0.5434, "step": 2800 }, { "epoch": 0.4600004105680209, "grad_norm": 0.33733296405596075, "learning_rate": 8.848008601629834e-06, "loss": 0.5406, "step": 2801 }, { "epoch": 0.46016463777636357, "grad_norm": 0.3162926750411412, "learning_rate": 8.847887722396215e-06, "loss": 0.5539, "step": 2802 }, { "epoch": 0.4603288649847063, "grad_norm": 0.3281447732451233, "learning_rate": 8.847766795940256e-06, "loss": 0.5747, "step": 2803 }, { "epoch": 0.46049309219304907, "grad_norm": 0.2805684954794172, "learning_rate": 8.847645822263274e-06, "loss": 0.5665, "step": 2804 }, { "epoch": 0.4606573194013918, "grad_norm": 0.39677613691474567, "learning_rate": 8.847524801366579e-06, "loss": 0.5454, "step": 2805 }, { "epoch": 0.46082154660973457, "grad_norm": 0.29768665118106963, "learning_rate": 8.847403733251488e-06, "loss": 0.5619, "step": 2806 }, { "epoch": 0.4609857738180773, "grad_norm": 0.2719942035991359, "learning_rate": 8.847282617919317e-06, "loss": 0.5527, "step": 2807 }, { "epoch": 0.46115000102642006, "grad_norm": 0.30476150813606573, "learning_rate": 8.847161455371376e-06, "loss": 0.5661, "step": 2808 }, { "epoch": 0.4613142282347628, "grad_norm": 0.35212052120108744, "learning_rate": 8.847040245608987e-06, "loss": 0.54, "step": 2809 }, { "epoch": 0.46147845544310556, "grad_norm": 0.3856979779368862, "learning_rate": 8.846918988633464e-06, "loss": 0.5946, "step": 2810 }, { "epoch": 0.46164268265144826, "grad_norm": 0.28956054679877297, "learning_rate": 8.846797684446123e-06, "loss": 0.5461, "step": 2811 }, { "epoch": 0.461806909859791, "grad_norm": 0.3947231541849336, "learning_rate": 8.846676333048283e-06, "loss": 0.5781, "step": 2812 }, { "epoch": 0.46197113706813375, "grad_norm": 0.3223342533441565, "learning_rate": 8.84655493444126e-06, "loss": 0.5637, "step": 2813 }, { "epoch": 0.4621353642764765, "grad_norm": 0.31670445757816623, "learning_rate": 8.846433488626376e-06, "loss": 0.5548, "step": 2814 }, { "epoch": 0.46229959148481925, "grad_norm": 0.3614008756161484, "learning_rate": 8.846311995604947e-06, "loss": 0.5659, "step": 2815 }, { "epoch": 0.462463818693162, "grad_norm": 0.3072418723805468, "learning_rate": 8.846190455378293e-06, "loss": 0.5395, "step": 2816 }, { "epoch": 0.46262804590150475, "grad_norm": 0.3025970724441588, "learning_rate": 8.846068867947736e-06, "loss": 0.545, "step": 2817 }, { "epoch": 0.4627922731098475, "grad_norm": 0.4316806453711984, "learning_rate": 8.845947233314595e-06, "loss": 0.5384, "step": 2818 }, { "epoch": 0.4629565003181902, "grad_norm": 0.3162804536736171, "learning_rate": 8.845825551480192e-06, "loss": 0.5714, "step": 2819 }, { "epoch": 0.46312072752653294, "grad_norm": 0.3311951157979156, "learning_rate": 8.845703822445845e-06, "loss": 0.5648, "step": 2820 }, { "epoch": 0.4632849547348757, "grad_norm": 0.3077557161039132, "learning_rate": 8.84558204621288e-06, "loss": 0.5737, "step": 2821 }, { "epoch": 0.46344918194321844, "grad_norm": 0.4303477814296107, "learning_rate": 8.845460222782619e-06, "loss": 0.5653, "step": 2822 }, { "epoch": 0.4636134091515612, "grad_norm": 0.29648758662065006, "learning_rate": 8.845338352156384e-06, "loss": 0.561, "step": 2823 }, { "epoch": 0.46377763635990393, "grad_norm": 0.3183803997527791, "learning_rate": 8.8452164343355e-06, "loss": 0.5502, "step": 2824 }, { "epoch": 0.4639418635682467, "grad_norm": 0.3179317015924949, "learning_rate": 8.845094469321291e-06, "loss": 0.5111, "step": 2825 }, { "epoch": 0.46410609077658943, "grad_norm": 0.33356525833540174, "learning_rate": 8.84497245711508e-06, "loss": 0.5532, "step": 2826 }, { "epoch": 0.4642703179849322, "grad_norm": 0.35637651622204897, "learning_rate": 8.844850397718193e-06, "loss": 0.5457, "step": 2827 }, { "epoch": 0.4644345451932749, "grad_norm": 0.3510786554960796, "learning_rate": 8.844728291131956e-06, "loss": 0.542, "step": 2828 }, { "epoch": 0.4645987724016176, "grad_norm": 0.36430876563423503, "learning_rate": 8.844606137357697e-06, "loss": 0.5329, "step": 2829 }, { "epoch": 0.46476299960996037, "grad_norm": 0.28596151687840704, "learning_rate": 8.84448393639674e-06, "loss": 0.5466, "step": 2830 }, { "epoch": 0.4649272268183031, "grad_norm": 0.3332646439130557, "learning_rate": 8.844361688250412e-06, "loss": 0.5234, "step": 2831 }, { "epoch": 0.46509145402664587, "grad_norm": 0.29528217400431656, "learning_rate": 8.844239392920044e-06, "loss": 0.5533, "step": 2832 }, { "epoch": 0.4652556812349886, "grad_norm": 0.3045475455711421, "learning_rate": 8.844117050406958e-06, "loss": 0.5516, "step": 2833 }, { "epoch": 0.46541990844333136, "grad_norm": 0.3673341114723952, "learning_rate": 8.84399466071249e-06, "loss": 0.5628, "step": 2834 }, { "epoch": 0.4655841356516741, "grad_norm": 0.36352857411555345, "learning_rate": 8.843872223837964e-06, "loss": 0.5527, "step": 2835 }, { "epoch": 0.4657483628600168, "grad_norm": 0.37350387535762136, "learning_rate": 8.843749739784714e-06, "loss": 0.563, "step": 2836 }, { "epoch": 0.46591259006835956, "grad_norm": 0.31369805940776757, "learning_rate": 8.843627208554067e-06, "loss": 0.5599, "step": 2837 }, { "epoch": 0.4660768172767023, "grad_norm": 0.30836598525731435, "learning_rate": 8.843504630147356e-06, "loss": 0.5575, "step": 2838 }, { "epoch": 0.46624104448504505, "grad_norm": 0.3666029081716213, "learning_rate": 8.843382004565909e-06, "loss": 0.544, "step": 2839 }, { "epoch": 0.4664052716933878, "grad_norm": 0.32683584986669917, "learning_rate": 8.843259331811062e-06, "loss": 0.5926, "step": 2840 }, { "epoch": 0.46656949890173055, "grad_norm": 0.3935470331834398, "learning_rate": 8.843136611884145e-06, "loss": 0.5502, "step": 2841 }, { "epoch": 0.4667337261100733, "grad_norm": 0.32710167529269035, "learning_rate": 8.843013844786491e-06, "loss": 0.5599, "step": 2842 }, { "epoch": 0.46689795331841605, "grad_norm": 0.3596882455911912, "learning_rate": 8.842891030519434e-06, "loss": 0.551, "step": 2843 }, { "epoch": 0.4670621805267588, "grad_norm": 0.29837305973530415, "learning_rate": 8.842768169084309e-06, "loss": 0.5616, "step": 2844 }, { "epoch": 0.4672264077351015, "grad_norm": 0.36261886857746567, "learning_rate": 8.842645260482446e-06, "loss": 0.546, "step": 2845 }, { "epoch": 0.46739063494344424, "grad_norm": 0.3239781103541487, "learning_rate": 8.842522304715184e-06, "loss": 0.5493, "step": 2846 }, { "epoch": 0.467554862151787, "grad_norm": 0.3022618320478039, "learning_rate": 8.842399301783859e-06, "loss": 0.5563, "step": 2847 }, { "epoch": 0.46771908936012974, "grad_norm": 0.3324394490245069, "learning_rate": 8.842276251689804e-06, "loss": 0.5467, "step": 2848 }, { "epoch": 0.4678833165684725, "grad_norm": 0.34005289281067436, "learning_rate": 8.842153154434357e-06, "loss": 0.5507, "step": 2849 }, { "epoch": 0.46804754377681523, "grad_norm": 0.30758197825127814, "learning_rate": 8.842030010018855e-06, "loss": 0.5617, "step": 2850 }, { "epoch": 0.468211770985158, "grad_norm": 0.3485397690878994, "learning_rate": 8.841906818444634e-06, "loss": 0.5414, "step": 2851 }, { "epoch": 0.46837599819350073, "grad_norm": 0.28465131392759013, "learning_rate": 8.841783579713033e-06, "loss": 0.5549, "step": 2852 }, { "epoch": 0.4685402254018434, "grad_norm": 0.340827843779612, "learning_rate": 8.841660293825392e-06, "loss": 0.5542, "step": 2853 }, { "epoch": 0.4687044526101862, "grad_norm": 0.3642631778373032, "learning_rate": 8.841536960783047e-06, "loss": 0.5422, "step": 2854 }, { "epoch": 0.4688686798185289, "grad_norm": 0.3350444847989983, "learning_rate": 8.84141358058734e-06, "loss": 0.5582, "step": 2855 }, { "epoch": 0.46903290702687167, "grad_norm": 0.32037747228626573, "learning_rate": 8.84129015323961e-06, "loss": 0.5511, "step": 2856 }, { "epoch": 0.4691971342352144, "grad_norm": 0.32535957757017653, "learning_rate": 8.841166678741197e-06, "loss": 0.5542, "step": 2857 }, { "epoch": 0.46936136144355717, "grad_norm": 0.3612115613975925, "learning_rate": 8.841043157093444e-06, "loss": 0.5645, "step": 2858 }, { "epoch": 0.4695255886518999, "grad_norm": 0.3032820965069931, "learning_rate": 8.840919588297691e-06, "loss": 0.5561, "step": 2859 }, { "epoch": 0.46968981586024267, "grad_norm": 0.34091583425847477, "learning_rate": 8.84079597235528e-06, "loss": 0.5539, "step": 2860 }, { "epoch": 0.4698540430685854, "grad_norm": 0.3366327725164345, "learning_rate": 8.840672309267553e-06, "loss": 0.5512, "step": 2861 }, { "epoch": 0.4700182702769281, "grad_norm": 0.33509050371301097, "learning_rate": 8.840548599035857e-06, "loss": 0.5806, "step": 2862 }, { "epoch": 0.47018249748527086, "grad_norm": 0.3043078378641758, "learning_rate": 8.84042484166153e-06, "loss": 0.5549, "step": 2863 }, { "epoch": 0.4703467246936136, "grad_norm": 0.2911581650021685, "learning_rate": 8.840301037145919e-06, "loss": 0.5385, "step": 2864 }, { "epoch": 0.47051095190195635, "grad_norm": 0.33595882669664007, "learning_rate": 8.840177185490369e-06, "loss": 0.5284, "step": 2865 }, { "epoch": 0.4706751791102991, "grad_norm": 0.3617199149406533, "learning_rate": 8.840053286696224e-06, "loss": 0.5446, "step": 2866 }, { "epoch": 0.47083940631864185, "grad_norm": 0.30372519781604457, "learning_rate": 8.839929340764832e-06, "loss": 0.5728, "step": 2867 }, { "epoch": 0.4710036335269846, "grad_norm": 0.31933667718002556, "learning_rate": 8.839805347697536e-06, "loss": 0.5518, "step": 2868 }, { "epoch": 0.47116786073532735, "grad_norm": 0.3966424493600261, "learning_rate": 8.839681307495685e-06, "loss": 0.5743, "step": 2869 }, { "epoch": 0.47133208794367004, "grad_norm": 0.3348971739237751, "learning_rate": 8.839557220160626e-06, "loss": 0.5524, "step": 2870 }, { "epoch": 0.4714963151520128, "grad_norm": 0.3213892824965565, "learning_rate": 8.839433085693704e-06, "loss": 0.5628, "step": 2871 }, { "epoch": 0.47166054236035554, "grad_norm": 0.4390491443368386, "learning_rate": 8.839308904096272e-06, "loss": 0.5376, "step": 2872 }, { "epoch": 0.4718247695686983, "grad_norm": 0.32059743684089986, "learning_rate": 8.839184675369672e-06, "loss": 0.5614, "step": 2873 }, { "epoch": 0.47198899677704104, "grad_norm": 0.3058018710793769, "learning_rate": 8.83906039951526e-06, "loss": 0.5359, "step": 2874 }, { "epoch": 0.4721532239853838, "grad_norm": 0.2804240335544641, "learning_rate": 8.838936076534381e-06, "loss": 0.5442, "step": 2875 }, { "epoch": 0.47231745119372653, "grad_norm": 0.3746199995295414, "learning_rate": 8.83881170642839e-06, "loss": 0.5763, "step": 2876 }, { "epoch": 0.4724816784020693, "grad_norm": 0.43755114764082026, "learning_rate": 8.838687289198634e-06, "loss": 0.5677, "step": 2877 }, { "epoch": 0.47264590561041203, "grad_norm": 0.30021384425688985, "learning_rate": 8.838562824846464e-06, "loss": 0.5354, "step": 2878 }, { "epoch": 0.4728101328187547, "grad_norm": 0.2982429736947675, "learning_rate": 8.838438313373234e-06, "loss": 0.5523, "step": 2879 }, { "epoch": 0.4729743600270975, "grad_norm": 0.3139337209816409, "learning_rate": 8.838313754780297e-06, "loss": 0.5442, "step": 2880 }, { "epoch": 0.4731385872354402, "grad_norm": 0.38187862880767065, "learning_rate": 8.838189149069004e-06, "loss": 0.5596, "step": 2881 }, { "epoch": 0.47330281444378297, "grad_norm": 0.35478374350718284, "learning_rate": 8.838064496240706e-06, "loss": 0.5492, "step": 2882 }, { "epoch": 0.4734670416521257, "grad_norm": 0.3149790656321742, "learning_rate": 8.837939796296762e-06, "loss": 0.5453, "step": 2883 }, { "epoch": 0.47363126886046847, "grad_norm": 0.349800653583477, "learning_rate": 8.837815049238523e-06, "loss": 0.5303, "step": 2884 }, { "epoch": 0.4737954960688112, "grad_norm": 0.3199958228053064, "learning_rate": 8.837690255067346e-06, "loss": 0.5516, "step": 2885 }, { "epoch": 0.47395972327715397, "grad_norm": 0.3614633246024179, "learning_rate": 8.837565413784583e-06, "loss": 0.5503, "step": 2886 }, { "epoch": 0.47412395048549666, "grad_norm": 0.41022846573910837, "learning_rate": 8.837440525391593e-06, "loss": 0.5704, "step": 2887 }, { "epoch": 0.4742881776938394, "grad_norm": 0.32146513999845583, "learning_rate": 8.83731558988973e-06, "loss": 0.5593, "step": 2888 }, { "epoch": 0.47445240490218216, "grad_norm": 0.3522243215874715, "learning_rate": 8.837190607280355e-06, "loss": 0.5535, "step": 2889 }, { "epoch": 0.4746166321105249, "grad_norm": 0.34027532771479646, "learning_rate": 8.83706557756482e-06, "loss": 0.5422, "step": 2890 }, { "epoch": 0.47478085931886765, "grad_norm": 0.3531730299805744, "learning_rate": 8.836940500744489e-06, "loss": 0.5617, "step": 2891 }, { "epoch": 0.4749450865272104, "grad_norm": 0.3674374072994261, "learning_rate": 8.836815376820715e-06, "loss": 0.5592, "step": 2892 }, { "epoch": 0.47510931373555315, "grad_norm": 0.32970365360706744, "learning_rate": 8.836690205794858e-06, "loss": 0.5676, "step": 2893 }, { "epoch": 0.4752735409438959, "grad_norm": 0.39642473826995, "learning_rate": 8.836564987668281e-06, "loss": 0.5787, "step": 2894 }, { "epoch": 0.47543776815223865, "grad_norm": 0.2868884450572186, "learning_rate": 8.836439722442341e-06, "loss": 0.5578, "step": 2895 }, { "epoch": 0.47560199536058134, "grad_norm": 0.3071135606555718, "learning_rate": 8.8363144101184e-06, "loss": 0.5446, "step": 2896 }, { "epoch": 0.4757662225689241, "grad_norm": 0.4085356980863807, "learning_rate": 8.836189050697817e-06, "loss": 0.5494, "step": 2897 }, { "epoch": 0.47593044977726684, "grad_norm": 0.29666396729431577, "learning_rate": 8.836063644181954e-06, "loss": 0.544, "step": 2898 }, { "epoch": 0.4760946769856096, "grad_norm": 0.30255551130327596, "learning_rate": 8.835938190572174e-06, "loss": 0.5499, "step": 2899 }, { "epoch": 0.47625890419395234, "grad_norm": 0.2806984477691293, "learning_rate": 8.83581268986984e-06, "loss": 0.5648, "step": 2900 }, { "epoch": 0.4764231314022951, "grad_norm": 0.3263273596646186, "learning_rate": 8.835687142076314e-06, "loss": 0.5561, "step": 2901 }, { "epoch": 0.47658735861063783, "grad_norm": 0.3790842531002415, "learning_rate": 8.83556154719296e-06, "loss": 0.5744, "step": 2902 }, { "epoch": 0.4767515858189806, "grad_norm": 0.3417983950853606, "learning_rate": 8.835435905221142e-06, "loss": 0.5652, "step": 2903 }, { "epoch": 0.4769158130273233, "grad_norm": 0.29218721475019915, "learning_rate": 8.835310216162224e-06, "loss": 0.5554, "step": 2904 }, { "epoch": 0.477080040235666, "grad_norm": 0.31113339800938206, "learning_rate": 8.835184480017572e-06, "loss": 0.5521, "step": 2905 }, { "epoch": 0.4772442674440088, "grad_norm": 0.2985823425378832, "learning_rate": 8.835058696788552e-06, "loss": 0.5459, "step": 2906 }, { "epoch": 0.4774084946523515, "grad_norm": 0.35116196870091576, "learning_rate": 8.834932866476531e-06, "loss": 0.5569, "step": 2907 }, { "epoch": 0.47757272186069427, "grad_norm": 0.285228916766138, "learning_rate": 8.83480698908287e-06, "loss": 0.5476, "step": 2908 }, { "epoch": 0.477736949069037, "grad_norm": 0.32355675164680775, "learning_rate": 8.834681064608944e-06, "loss": 0.5616, "step": 2909 }, { "epoch": 0.47790117627737977, "grad_norm": 0.29250415203008606, "learning_rate": 8.834555093056114e-06, "loss": 0.5452, "step": 2910 }, { "epoch": 0.4780654034857225, "grad_norm": 0.4086216732635031, "learning_rate": 8.834429074425752e-06, "loss": 0.5887, "step": 2911 }, { "epoch": 0.47822963069406527, "grad_norm": 0.30486039535028525, "learning_rate": 8.834303008719226e-06, "loss": 0.5586, "step": 2912 }, { "epoch": 0.47839385790240796, "grad_norm": 0.392026031748003, "learning_rate": 8.834176895937906e-06, "loss": 0.5454, "step": 2913 }, { "epoch": 0.4785580851107507, "grad_norm": 0.32606961737462525, "learning_rate": 8.834050736083158e-06, "loss": 0.5565, "step": 2914 }, { "epoch": 0.47872231231909346, "grad_norm": 0.3567403007077362, "learning_rate": 8.833924529156357e-06, "loss": 0.5579, "step": 2915 }, { "epoch": 0.4788865395274362, "grad_norm": 0.2962722445309099, "learning_rate": 8.833798275158871e-06, "loss": 0.5517, "step": 2916 }, { "epoch": 0.47905076673577895, "grad_norm": 0.31457620394082064, "learning_rate": 8.83367197409207e-06, "loss": 0.5654, "step": 2917 }, { "epoch": 0.4792149939441217, "grad_norm": 0.36662242627563446, "learning_rate": 8.833545625957332e-06, "loss": 0.552, "step": 2918 }, { "epoch": 0.47937922115246445, "grad_norm": 0.28945439867043166, "learning_rate": 8.833419230756021e-06, "loss": 0.5611, "step": 2919 }, { "epoch": 0.4795434483608072, "grad_norm": 0.3090300015262434, "learning_rate": 8.833292788489517e-06, "loss": 0.57, "step": 2920 }, { "epoch": 0.4797076755691499, "grad_norm": 0.3367798755701906, "learning_rate": 8.833166299159187e-06, "loss": 0.5635, "step": 2921 }, { "epoch": 0.47987190277749264, "grad_norm": 0.3211307845952408, "learning_rate": 8.833039762766408e-06, "loss": 0.539, "step": 2922 }, { "epoch": 0.4800361299858354, "grad_norm": 0.3269164013348773, "learning_rate": 8.832913179312555e-06, "loss": 0.5481, "step": 2923 }, { "epoch": 0.48020035719417814, "grad_norm": 0.3193091915753343, "learning_rate": 8.832786548799002e-06, "loss": 0.5685, "step": 2924 }, { "epoch": 0.4803645844025209, "grad_norm": 0.29715881654797105, "learning_rate": 8.832659871227124e-06, "loss": 0.547, "step": 2925 }, { "epoch": 0.48052881161086364, "grad_norm": 0.3092509709334806, "learning_rate": 8.832533146598297e-06, "loss": 0.5407, "step": 2926 }, { "epoch": 0.4806930388192064, "grad_norm": 0.2997878748298698, "learning_rate": 8.832406374913896e-06, "loss": 0.5437, "step": 2927 }, { "epoch": 0.48085726602754914, "grad_norm": 0.36833955082132547, "learning_rate": 8.832279556175302e-06, "loss": 0.5591, "step": 2928 }, { "epoch": 0.4810214932358919, "grad_norm": 0.31188670229780074, "learning_rate": 8.832152690383887e-06, "loss": 0.5523, "step": 2929 }, { "epoch": 0.4811857204442346, "grad_norm": 0.31498192500648686, "learning_rate": 8.832025777541032e-06, "loss": 0.5367, "step": 2930 }, { "epoch": 0.4813499476525773, "grad_norm": 0.32468516735093983, "learning_rate": 8.831898817648116e-06, "loss": 0.5561, "step": 2931 }, { "epoch": 0.4815141748609201, "grad_norm": 0.33711950838323035, "learning_rate": 8.831771810706518e-06, "loss": 0.5711, "step": 2932 }, { "epoch": 0.4816784020692628, "grad_norm": 0.3467531673707492, "learning_rate": 8.831644756717614e-06, "loss": 0.576, "step": 2933 }, { "epoch": 0.48184262927760557, "grad_norm": 0.3587735987166933, "learning_rate": 8.831517655682787e-06, "loss": 0.5603, "step": 2934 }, { "epoch": 0.4820068564859483, "grad_norm": 0.3479101996126524, "learning_rate": 8.831390507603416e-06, "loss": 0.5607, "step": 2935 }, { "epoch": 0.48217108369429107, "grad_norm": 0.35567025483515724, "learning_rate": 8.831263312480883e-06, "loss": 0.5428, "step": 2936 }, { "epoch": 0.4823353109026338, "grad_norm": 0.34050479161407216, "learning_rate": 8.831136070316568e-06, "loss": 0.5335, "step": 2937 }, { "epoch": 0.4824995381109765, "grad_norm": 0.34844624545525676, "learning_rate": 8.831008781111855e-06, "loss": 0.5609, "step": 2938 }, { "epoch": 0.48266376531931926, "grad_norm": 0.3534685183853488, "learning_rate": 8.830881444868126e-06, "loss": 0.5601, "step": 2939 }, { "epoch": 0.482827992527662, "grad_norm": 0.3576134623563205, "learning_rate": 8.830754061586764e-06, "loss": 0.5622, "step": 2940 }, { "epoch": 0.48299221973600476, "grad_norm": 0.37002257796135896, "learning_rate": 8.83062663126915e-06, "loss": 0.556, "step": 2941 }, { "epoch": 0.4831564469443475, "grad_norm": 0.37832760131778265, "learning_rate": 8.830499153916671e-06, "loss": 0.5463, "step": 2942 }, { "epoch": 0.48332067415269026, "grad_norm": 0.3257646983345067, "learning_rate": 8.83037162953071e-06, "loss": 0.5626, "step": 2943 }, { "epoch": 0.483484901361033, "grad_norm": 0.34700830090226, "learning_rate": 8.830244058112655e-06, "loss": 0.5396, "step": 2944 }, { "epoch": 0.48364912856937575, "grad_norm": 0.31036138655536344, "learning_rate": 8.830116439663887e-06, "loss": 0.5197, "step": 2945 }, { "epoch": 0.4838133557777185, "grad_norm": 0.3565916198778085, "learning_rate": 8.829988774185794e-06, "loss": 0.5376, "step": 2946 }, { "epoch": 0.4839775829860612, "grad_norm": 0.3681633251091734, "learning_rate": 8.829861061679763e-06, "loss": 0.5404, "step": 2947 }, { "epoch": 0.48414181019440394, "grad_norm": 0.4760323163689969, "learning_rate": 8.829733302147182e-06, "loss": 0.5564, "step": 2948 }, { "epoch": 0.4843060374027467, "grad_norm": 0.3142530374093069, "learning_rate": 8.829605495589436e-06, "loss": 0.5424, "step": 2949 }, { "epoch": 0.48447026461108944, "grad_norm": 0.395721637651938, "learning_rate": 8.829477642007915e-06, "loss": 0.5428, "step": 2950 }, { "epoch": 0.4846344918194322, "grad_norm": 0.3317668352816898, "learning_rate": 8.82934974140401e-06, "loss": 0.5413, "step": 2951 }, { "epoch": 0.48479871902777494, "grad_norm": 0.4043660326447801, "learning_rate": 8.829221793779102e-06, "loss": 0.5645, "step": 2952 }, { "epoch": 0.4849629462361177, "grad_norm": 0.31928485059252737, "learning_rate": 8.82909379913459e-06, "loss": 0.5396, "step": 2953 }, { "epoch": 0.48512717344446044, "grad_norm": 0.30272961197459414, "learning_rate": 8.828965757471858e-06, "loss": 0.5611, "step": 2954 }, { "epoch": 0.48529140065280313, "grad_norm": 0.3219097780968554, "learning_rate": 8.8288376687923e-06, "loss": 0.5495, "step": 2955 }, { "epoch": 0.4854556278611459, "grad_norm": 0.3068594785573637, "learning_rate": 8.828709533097304e-06, "loss": 0.527, "step": 2956 }, { "epoch": 0.4856198550694886, "grad_norm": 0.3286151563554068, "learning_rate": 8.828581350388267e-06, "loss": 0.5453, "step": 2957 }, { "epoch": 0.4857840822778314, "grad_norm": 0.32326057654047574, "learning_rate": 8.828453120666574e-06, "loss": 0.5433, "step": 2958 }, { "epoch": 0.4859483094861741, "grad_norm": 0.341029084787711, "learning_rate": 8.828324843933625e-06, "loss": 0.5644, "step": 2959 }, { "epoch": 0.4861125366945169, "grad_norm": 0.34016806957422147, "learning_rate": 8.828196520190807e-06, "loss": 0.5556, "step": 2960 }, { "epoch": 0.4862767639028596, "grad_norm": 0.4028159993767315, "learning_rate": 8.828068149439518e-06, "loss": 0.5325, "step": 2961 }, { "epoch": 0.48644099111120237, "grad_norm": 0.2984402049931922, "learning_rate": 8.82793973168115e-06, "loss": 0.5751, "step": 2962 }, { "epoch": 0.48660521831954506, "grad_norm": 0.33616040754201904, "learning_rate": 8.827811266917099e-06, "loss": 0.5424, "step": 2963 }, { "epoch": 0.4867694455278878, "grad_norm": 0.333707218309075, "learning_rate": 8.827682755148757e-06, "loss": 0.5424, "step": 2964 }, { "epoch": 0.48693367273623056, "grad_norm": 0.3336325173722268, "learning_rate": 8.827554196377525e-06, "loss": 0.5617, "step": 2965 }, { "epoch": 0.4870978999445733, "grad_norm": 0.3299202105690531, "learning_rate": 8.827425590604796e-06, "loss": 0.5422, "step": 2966 }, { "epoch": 0.48726212715291606, "grad_norm": 0.3506142749597406, "learning_rate": 8.827296937831969e-06, "loss": 0.5609, "step": 2967 }, { "epoch": 0.4874263543612588, "grad_norm": 0.44404974906674444, "learning_rate": 8.82716823806044e-06, "loss": 0.5782, "step": 2968 }, { "epoch": 0.48759058156960156, "grad_norm": 0.30580865087991876, "learning_rate": 8.827039491291604e-06, "loss": 0.5581, "step": 2969 }, { "epoch": 0.4877548087779443, "grad_norm": 0.3048111574103392, "learning_rate": 8.826910697526862e-06, "loss": 0.5343, "step": 2970 }, { "epoch": 0.48791903598628705, "grad_norm": 0.3281501305555785, "learning_rate": 8.826781856767614e-06, "loss": 0.5627, "step": 2971 }, { "epoch": 0.48808326319462975, "grad_norm": 0.3026997019538971, "learning_rate": 8.826652969015258e-06, "loss": 0.579, "step": 2972 }, { "epoch": 0.4882474904029725, "grad_norm": 0.33950303423306283, "learning_rate": 8.826524034271194e-06, "loss": 0.5491, "step": 2973 }, { "epoch": 0.48841171761131524, "grad_norm": 0.3662426620093517, "learning_rate": 8.82639505253682e-06, "loss": 0.5609, "step": 2974 }, { "epoch": 0.488575944819658, "grad_norm": 0.3504254683156696, "learning_rate": 8.826266023813543e-06, "loss": 0.5358, "step": 2975 }, { "epoch": 0.48874017202800074, "grad_norm": 0.362624051096286, "learning_rate": 8.826136948102757e-06, "loss": 0.5515, "step": 2976 }, { "epoch": 0.4889043992363435, "grad_norm": 0.3303718575483711, "learning_rate": 8.82600782540587e-06, "loss": 0.5562, "step": 2977 }, { "epoch": 0.48906862644468624, "grad_norm": 0.35238845644920747, "learning_rate": 8.825878655724279e-06, "loss": 0.5385, "step": 2978 }, { "epoch": 0.489232853653029, "grad_norm": 0.31177874457750715, "learning_rate": 8.825749439059393e-06, "loss": 0.5396, "step": 2979 }, { "epoch": 0.4893970808613717, "grad_norm": 0.34109682668123126, "learning_rate": 8.825620175412609e-06, "loss": 0.5505, "step": 2980 }, { "epoch": 0.48956130806971443, "grad_norm": 0.3620326005023896, "learning_rate": 8.825490864785336e-06, "loss": 0.5778, "step": 2981 }, { "epoch": 0.4897255352780572, "grad_norm": 0.5979291508659067, "learning_rate": 8.825361507178977e-06, "loss": 0.5518, "step": 2982 }, { "epoch": 0.4898897624863999, "grad_norm": 0.29689712347868064, "learning_rate": 8.825232102594935e-06, "loss": 0.5817, "step": 2983 }, { "epoch": 0.4900539896947427, "grad_norm": 0.34548738955931996, "learning_rate": 8.825102651034617e-06, "loss": 0.5585, "step": 2984 }, { "epoch": 0.4902182169030854, "grad_norm": 0.3554958070227503, "learning_rate": 8.82497315249943e-06, "loss": 0.5488, "step": 2985 }, { "epoch": 0.4903824441114282, "grad_norm": 0.2904946631395032, "learning_rate": 8.82484360699078e-06, "loss": 0.5386, "step": 2986 }, { "epoch": 0.4905466713197709, "grad_norm": 0.3229198644119859, "learning_rate": 8.824714014510071e-06, "loss": 0.5558, "step": 2987 }, { "epoch": 0.49071089852811367, "grad_norm": 0.36384501410130143, "learning_rate": 8.824584375058713e-06, "loss": 0.5466, "step": 2988 }, { "epoch": 0.49087512573645636, "grad_norm": 0.3753164950066053, "learning_rate": 8.824454688638116e-06, "loss": 0.5401, "step": 2989 }, { "epoch": 0.4910393529447991, "grad_norm": 0.36709523119655635, "learning_rate": 8.824324955249685e-06, "loss": 0.5366, "step": 2990 }, { "epoch": 0.49120358015314186, "grad_norm": 0.36824250049715346, "learning_rate": 8.82419517489483e-06, "loss": 0.5468, "step": 2991 }, { "epoch": 0.4913678073614846, "grad_norm": 0.41026285040761973, "learning_rate": 8.824065347574962e-06, "loss": 0.5796, "step": 2992 }, { "epoch": 0.49153203456982736, "grad_norm": 0.36743089732858497, "learning_rate": 8.82393547329149e-06, "loss": 0.5691, "step": 2993 }, { "epoch": 0.4916962617781701, "grad_norm": 0.3573127649092164, "learning_rate": 8.823805552045824e-06, "loss": 0.5782, "step": 2994 }, { "epoch": 0.49186048898651286, "grad_norm": 0.4711373569935757, "learning_rate": 8.823675583839375e-06, "loss": 0.5448, "step": 2995 }, { "epoch": 0.4920247161948556, "grad_norm": 0.3467146120478822, "learning_rate": 8.823545568673556e-06, "loss": 0.5785, "step": 2996 }, { "epoch": 0.4921889434031983, "grad_norm": 0.35151193970461303, "learning_rate": 8.823415506549779e-06, "loss": 0.5703, "step": 2997 }, { "epoch": 0.49235317061154105, "grad_norm": 0.3620038956032762, "learning_rate": 8.823285397469455e-06, "loss": 0.5713, "step": 2998 }, { "epoch": 0.4925173978198838, "grad_norm": 0.3607731749175902, "learning_rate": 8.823155241434e-06, "loss": 0.5556, "step": 2999 }, { "epoch": 0.49268162502822654, "grad_norm": 0.32757022768540983, "learning_rate": 8.823025038444823e-06, "loss": 0.5531, "step": 3000 }, { "epoch": 0.4928458522365693, "grad_norm": 0.3781611992714109, "learning_rate": 8.822894788503342e-06, "loss": 0.5505, "step": 3001 }, { "epoch": 0.49301007944491204, "grad_norm": 0.3676888297654473, "learning_rate": 8.82276449161097e-06, "loss": 0.5646, "step": 3002 }, { "epoch": 0.4931743066532548, "grad_norm": 0.3467636715211463, "learning_rate": 8.822634147769123e-06, "loss": 0.538, "step": 3003 }, { "epoch": 0.49333853386159754, "grad_norm": 0.29828305218985435, "learning_rate": 8.822503756979217e-06, "loss": 0.5407, "step": 3004 }, { "epoch": 0.4935027610699403, "grad_norm": 0.3722421038012637, "learning_rate": 8.822373319242666e-06, "loss": 0.553, "step": 3005 }, { "epoch": 0.493666988278283, "grad_norm": 0.3964608753926012, "learning_rate": 8.822242834560888e-06, "loss": 0.5506, "step": 3006 }, { "epoch": 0.49383121548662573, "grad_norm": 0.3038259436819525, "learning_rate": 8.822112302935302e-06, "loss": 0.5528, "step": 3007 }, { "epoch": 0.4939954426949685, "grad_norm": 0.31767582517383786, "learning_rate": 8.821981724367322e-06, "loss": 0.5335, "step": 3008 }, { "epoch": 0.4941596699033112, "grad_norm": 0.2613418127902927, "learning_rate": 8.82185109885837e-06, "loss": 0.5279, "step": 3009 }, { "epoch": 0.494323897111654, "grad_norm": 0.3858067232180448, "learning_rate": 8.821720426409862e-06, "loss": 0.5577, "step": 3010 }, { "epoch": 0.4944881243199967, "grad_norm": 0.34249742513275216, "learning_rate": 8.821589707023218e-06, "loss": 0.5404, "step": 3011 }, { "epoch": 0.4946523515283395, "grad_norm": 0.3078017073081959, "learning_rate": 8.821458940699858e-06, "loss": 0.5441, "step": 3012 }, { "epoch": 0.4948165787366822, "grad_norm": 0.4729428717813342, "learning_rate": 8.821328127441202e-06, "loss": 0.5871, "step": 3013 }, { "epoch": 0.4949808059450249, "grad_norm": 0.344819112206208, "learning_rate": 8.821197267248673e-06, "loss": 0.5371, "step": 3014 }, { "epoch": 0.49514503315336766, "grad_norm": 0.5182067984676952, "learning_rate": 8.821066360123687e-06, "loss": 0.554, "step": 3015 }, { "epoch": 0.4953092603617104, "grad_norm": 0.5446474279763543, "learning_rate": 8.820935406067672e-06, "loss": 0.5833, "step": 3016 }, { "epoch": 0.49547348757005316, "grad_norm": 0.2829439671030965, "learning_rate": 8.820804405082045e-06, "loss": 0.5415, "step": 3017 }, { "epoch": 0.4956377147783959, "grad_norm": 0.3880941527869299, "learning_rate": 8.820673357168232e-06, "loss": 0.5579, "step": 3018 }, { "epoch": 0.49580194198673866, "grad_norm": 0.29511019081794293, "learning_rate": 8.820542262327655e-06, "loss": 0.54, "step": 3019 }, { "epoch": 0.4959661691950814, "grad_norm": 0.337603608717014, "learning_rate": 8.820411120561738e-06, "loss": 0.5414, "step": 3020 }, { "epoch": 0.49613039640342416, "grad_norm": 0.36921638040684235, "learning_rate": 8.820279931871906e-06, "loss": 0.5572, "step": 3021 }, { "epoch": 0.4962946236117669, "grad_norm": 0.34506745881486306, "learning_rate": 8.820148696259584e-06, "loss": 0.5594, "step": 3022 }, { "epoch": 0.4964588508201096, "grad_norm": 0.35208611833465026, "learning_rate": 8.820017413726196e-06, "loss": 0.5487, "step": 3023 }, { "epoch": 0.49662307802845235, "grad_norm": 0.3274229204168676, "learning_rate": 8.819886084273168e-06, "loss": 0.5649, "step": 3024 }, { "epoch": 0.4967873052367951, "grad_norm": 0.3208518695214698, "learning_rate": 8.819754707901928e-06, "loss": 0.5461, "step": 3025 }, { "epoch": 0.49695153244513784, "grad_norm": 0.3198716314130458, "learning_rate": 8.819623284613901e-06, "loss": 0.5608, "step": 3026 }, { "epoch": 0.4971157596534806, "grad_norm": 0.4017507102525848, "learning_rate": 8.819491814410516e-06, "loss": 0.5156, "step": 3027 }, { "epoch": 0.49727998686182334, "grad_norm": 0.3032437885755787, "learning_rate": 8.819360297293199e-06, "loss": 0.5362, "step": 3028 }, { "epoch": 0.4974442140701661, "grad_norm": 0.3414840714435411, "learning_rate": 8.81922873326338e-06, "loss": 0.5451, "step": 3029 }, { "epoch": 0.49760844127850884, "grad_norm": 0.35225110432503826, "learning_rate": 8.819097122322488e-06, "loss": 0.5209, "step": 3030 }, { "epoch": 0.49777266848685153, "grad_norm": 0.3239762292518163, "learning_rate": 8.81896546447195e-06, "loss": 0.572, "step": 3031 }, { "epoch": 0.4979368956951943, "grad_norm": 0.4545041404536738, "learning_rate": 8.8188337597132e-06, "loss": 0.5595, "step": 3032 }, { "epoch": 0.49810112290353703, "grad_norm": 0.35087259800795967, "learning_rate": 8.818702008047666e-06, "loss": 0.5503, "step": 3033 }, { "epoch": 0.4982653501118798, "grad_norm": 0.3518407108267486, "learning_rate": 8.818570209476777e-06, "loss": 0.5565, "step": 3034 }, { "epoch": 0.4984295773202225, "grad_norm": 0.377200938642081, "learning_rate": 8.81843836400197e-06, "loss": 0.5663, "step": 3035 }, { "epoch": 0.4985938045285653, "grad_norm": 0.33495384047527427, "learning_rate": 8.818306471624672e-06, "loss": 0.558, "step": 3036 }, { "epoch": 0.498758031736908, "grad_norm": 0.3370327966629263, "learning_rate": 8.818174532346315e-06, "loss": 0.5431, "step": 3037 }, { "epoch": 0.4989222589452508, "grad_norm": 0.31485961232651577, "learning_rate": 8.818042546168336e-06, "loss": 0.5452, "step": 3038 }, { "epoch": 0.4990864861535935, "grad_norm": 1.0283110791160432, "learning_rate": 8.817910513092168e-06, "loss": 0.555, "step": 3039 }, { "epoch": 0.4992507133619362, "grad_norm": 0.3370249745346664, "learning_rate": 8.81777843311924e-06, "loss": 0.5565, "step": 3040 }, { "epoch": 0.49941494057027896, "grad_norm": 0.3697422926403468, "learning_rate": 8.817646306250992e-06, "loss": 0.5376, "step": 3041 }, { "epoch": 0.4995791677786217, "grad_norm": 0.33821750551255797, "learning_rate": 8.817514132488858e-06, "loss": 0.5741, "step": 3042 }, { "epoch": 0.49974339498696446, "grad_norm": 0.333623396144671, "learning_rate": 8.817381911834272e-06, "loss": 0.537, "step": 3043 }, { "epoch": 0.4999076221953072, "grad_norm": 0.5552665670609216, "learning_rate": 8.817249644288669e-06, "loss": 0.5366, "step": 3044 }, { "epoch": 0.5000718494036499, "grad_norm": 0.3361543209358446, "learning_rate": 8.817117329853489e-06, "loss": 0.5571, "step": 3045 }, { "epoch": 0.5002360766119927, "grad_norm": 0.3305629109517608, "learning_rate": 8.816984968530167e-06, "loss": 0.5147, "step": 3046 }, { "epoch": 0.5004003038203354, "grad_norm": 0.3732536451895665, "learning_rate": 8.816852560320142e-06, "loss": 0.5593, "step": 3047 }, { "epoch": 0.5005645310286782, "grad_norm": 0.3253389659113194, "learning_rate": 8.816720105224851e-06, "loss": 0.5649, "step": 3048 }, { "epoch": 0.5007287582370209, "grad_norm": 0.5642123979492453, "learning_rate": 8.81658760324573e-06, "loss": 0.5608, "step": 3049 }, { "epoch": 0.5008929854453636, "grad_norm": 0.34823482199959965, "learning_rate": 8.816455054384224e-06, "loss": 0.5371, "step": 3050 }, { "epoch": 0.5010572126537064, "grad_norm": 0.2841807878282881, "learning_rate": 8.816322458641767e-06, "loss": 0.577, "step": 3051 }, { "epoch": 0.5012214398620491, "grad_norm": 0.42949070216811547, "learning_rate": 8.816189816019802e-06, "loss": 0.5596, "step": 3052 }, { "epoch": 0.5013856670703919, "grad_norm": 0.37820954212665636, "learning_rate": 8.816057126519769e-06, "loss": 0.5555, "step": 3053 }, { "epoch": 0.5015498942787346, "grad_norm": 0.33884019153259126, "learning_rate": 8.815924390143108e-06, "loss": 0.5125, "step": 3054 }, { "epoch": 0.5017141214870774, "grad_norm": 0.3024625122784817, "learning_rate": 8.815791606891265e-06, "loss": 0.5431, "step": 3055 }, { "epoch": 0.5018783486954201, "grad_norm": 0.31699884443526544, "learning_rate": 8.815658776765675e-06, "loss": 0.5392, "step": 3056 }, { "epoch": 0.5020425759037629, "grad_norm": 0.3264450614548681, "learning_rate": 8.815525899767788e-06, "loss": 0.5455, "step": 3057 }, { "epoch": 0.5022068031121056, "grad_norm": 0.3043521417126159, "learning_rate": 8.815392975899042e-06, "loss": 0.546, "step": 3058 }, { "epoch": 0.5023710303204484, "grad_norm": 0.2830419102096985, "learning_rate": 8.815260005160884e-06, "loss": 0.5405, "step": 3059 }, { "epoch": 0.5025352575287911, "grad_norm": 0.3197247856576631, "learning_rate": 8.815126987554755e-06, "loss": 0.5653, "step": 3060 }, { "epoch": 0.5026994847371338, "grad_norm": 0.3388722907872103, "learning_rate": 8.814993923082102e-06, "loss": 0.5546, "step": 3061 }, { "epoch": 0.5028637119454765, "grad_norm": 0.29425536996130774, "learning_rate": 8.81486081174437e-06, "loss": 0.5499, "step": 3062 }, { "epoch": 0.5030279391538193, "grad_norm": 0.34880282593568984, "learning_rate": 8.814727653543005e-06, "loss": 0.5848, "step": 3063 }, { "epoch": 0.503192166362162, "grad_norm": 0.3310747337461077, "learning_rate": 8.814594448479452e-06, "loss": 0.551, "step": 3064 }, { "epoch": 0.5033563935705048, "grad_norm": 0.3895534185008042, "learning_rate": 8.814461196555156e-06, "loss": 0.5314, "step": 3065 }, { "epoch": 0.5035206207788475, "grad_norm": 0.3101567026796809, "learning_rate": 8.81432789777157e-06, "loss": 0.5445, "step": 3066 }, { "epoch": 0.5036848479871903, "grad_norm": 0.28256966020552643, "learning_rate": 8.814194552130136e-06, "loss": 0.5586, "step": 3067 }, { "epoch": 0.503849075195533, "grad_norm": 0.29522749873378323, "learning_rate": 8.814061159632306e-06, "loss": 0.5617, "step": 3068 }, { "epoch": 0.5040133024038758, "grad_norm": 0.35241166914489735, "learning_rate": 8.813927720279526e-06, "loss": 0.5617, "step": 3069 }, { "epoch": 0.5041775296122185, "grad_norm": 0.3227727829669039, "learning_rate": 8.813794234073247e-06, "loss": 0.5513, "step": 3070 }, { "epoch": 0.5043417568205613, "grad_norm": 0.35211764287996394, "learning_rate": 8.813660701014918e-06, "loss": 0.553, "step": 3071 }, { "epoch": 0.504505984028904, "grad_norm": 0.2820677190638171, "learning_rate": 8.813527121105991e-06, "loss": 0.5366, "step": 3072 }, { "epoch": 0.5046702112372468, "grad_norm": 0.3628674686715253, "learning_rate": 8.813393494347915e-06, "loss": 0.5504, "step": 3073 }, { "epoch": 0.5048344384455895, "grad_norm": 0.3344707001677626, "learning_rate": 8.813259820742143e-06, "loss": 0.5411, "step": 3074 }, { "epoch": 0.5049986656539323, "grad_norm": 0.33865193193614024, "learning_rate": 8.813126100290124e-06, "loss": 0.543, "step": 3075 }, { "epoch": 0.505162892862275, "grad_norm": 0.34283850872753413, "learning_rate": 8.812992332993312e-06, "loss": 0.5304, "step": 3076 }, { "epoch": 0.5053271200706178, "grad_norm": 0.3125953167598692, "learning_rate": 8.81285851885316e-06, "loss": 0.5779, "step": 3077 }, { "epoch": 0.5054913472789604, "grad_norm": 0.3253589557042883, "learning_rate": 8.812724657871124e-06, "loss": 0.544, "step": 3078 }, { "epoch": 0.5056555744873031, "grad_norm": 0.32809582790623654, "learning_rate": 8.812590750048651e-06, "loss": 0.5484, "step": 3079 }, { "epoch": 0.5058198016956459, "grad_norm": 0.3000554576014139, "learning_rate": 8.8124567953872e-06, "loss": 0.5455, "step": 3080 }, { "epoch": 0.5059840289039886, "grad_norm": 0.4201987809012978, "learning_rate": 8.812322793888229e-06, "loss": 0.542, "step": 3081 }, { "epoch": 0.5061482561123314, "grad_norm": 0.3110305012514798, "learning_rate": 8.812188745553186e-06, "loss": 0.5443, "step": 3082 }, { "epoch": 0.5063124833206741, "grad_norm": 0.31745465529222217, "learning_rate": 8.812054650383533e-06, "loss": 0.5374, "step": 3083 }, { "epoch": 0.5064767105290169, "grad_norm": 0.28706529458952756, "learning_rate": 8.811920508380722e-06, "loss": 0.5796, "step": 3084 }, { "epoch": 0.5066409377373596, "grad_norm": 0.31694659994111607, "learning_rate": 8.811786319546213e-06, "loss": 0.568, "step": 3085 }, { "epoch": 0.5068051649457024, "grad_norm": 0.3311771175932345, "learning_rate": 8.81165208388146e-06, "loss": 0.541, "step": 3086 }, { "epoch": 0.5069693921540451, "grad_norm": 0.2894350217712809, "learning_rate": 8.811517801387926e-06, "loss": 0.5511, "step": 3087 }, { "epoch": 0.5071336193623879, "grad_norm": 0.3198518620985994, "learning_rate": 8.811383472067066e-06, "loss": 0.5487, "step": 3088 }, { "epoch": 0.5072978465707306, "grad_norm": 0.2919190229017156, "learning_rate": 8.811249095920339e-06, "loss": 0.5515, "step": 3089 }, { "epoch": 0.5074620737790734, "grad_norm": 0.3323556812127161, "learning_rate": 8.811114672949207e-06, "loss": 0.5363, "step": 3090 }, { "epoch": 0.5076263009874161, "grad_norm": 0.28621504004096965, "learning_rate": 8.810980203155126e-06, "loss": 0.5152, "step": 3091 }, { "epoch": 0.5077905281957589, "grad_norm": 0.30244379133479443, "learning_rate": 8.81084568653956e-06, "loss": 0.5592, "step": 3092 }, { "epoch": 0.5079547554041016, "grad_norm": 0.32967015947935796, "learning_rate": 8.810711123103967e-06, "loss": 0.5378, "step": 3093 }, { "epoch": 0.5081189826124444, "grad_norm": 0.32233218351029747, "learning_rate": 8.810576512849812e-06, "loss": 0.5701, "step": 3094 }, { "epoch": 0.508283209820787, "grad_norm": 0.3038805312926043, "learning_rate": 8.810441855778554e-06, "loss": 0.5369, "step": 3095 }, { "epoch": 0.5084474370291298, "grad_norm": 0.3675474725686056, "learning_rate": 8.810307151891658e-06, "loss": 0.5435, "step": 3096 }, { "epoch": 0.5086116642374725, "grad_norm": 0.3031891362387634, "learning_rate": 8.810172401190583e-06, "loss": 0.553, "step": 3097 }, { "epoch": 0.5087758914458153, "grad_norm": 0.3125909011850417, "learning_rate": 8.810037603676797e-06, "loss": 0.5349, "step": 3098 }, { "epoch": 0.508940118654158, "grad_norm": 0.5022551060954056, "learning_rate": 8.809902759351761e-06, "loss": 0.5645, "step": 3099 }, { "epoch": 0.5091043458625008, "grad_norm": 0.3055638918113333, "learning_rate": 8.809767868216941e-06, "loss": 0.548, "step": 3100 }, { "epoch": 0.5092685730708435, "grad_norm": 0.2874713939919956, "learning_rate": 8.809632930273801e-06, "loss": 0.561, "step": 3101 }, { "epoch": 0.5094328002791862, "grad_norm": 0.4250919250625114, "learning_rate": 8.809497945523808e-06, "loss": 0.5645, "step": 3102 }, { "epoch": 0.509597027487529, "grad_norm": 0.35550987220631075, "learning_rate": 8.809362913968428e-06, "loss": 0.5403, "step": 3103 }, { "epoch": 0.5097612546958717, "grad_norm": 0.3443953804934167, "learning_rate": 8.809227835609127e-06, "loss": 0.5413, "step": 3104 }, { "epoch": 0.5099254819042145, "grad_norm": 0.2839313401806877, "learning_rate": 8.80909271044737e-06, "loss": 0.5491, "step": 3105 }, { "epoch": 0.5100897091125572, "grad_norm": 0.2967153264099807, "learning_rate": 8.808957538484629e-06, "loss": 0.5488, "step": 3106 }, { "epoch": 0.5102539363209, "grad_norm": 0.28848535562090943, "learning_rate": 8.808822319722367e-06, "loss": 0.5588, "step": 3107 }, { "epoch": 0.5104181635292427, "grad_norm": 0.2830657128227046, "learning_rate": 8.808687054162057e-06, "loss": 0.557, "step": 3108 }, { "epoch": 0.5105823907375855, "grad_norm": 0.39693335600192026, "learning_rate": 8.808551741805167e-06, "loss": 0.5537, "step": 3109 }, { "epoch": 0.5107466179459282, "grad_norm": 0.3161563688662857, "learning_rate": 8.808416382653165e-06, "loss": 0.5418, "step": 3110 }, { "epoch": 0.510910845154271, "grad_norm": 0.299459534024022, "learning_rate": 8.808280976707522e-06, "loss": 0.5751, "step": 3111 }, { "epoch": 0.5110750723626136, "grad_norm": 0.3302741452682689, "learning_rate": 8.80814552396971e-06, "loss": 0.5722, "step": 3112 }, { "epoch": 0.5112392995709564, "grad_norm": 0.4430503083050463, "learning_rate": 8.808010024441198e-06, "loss": 0.5413, "step": 3113 }, { "epoch": 0.5114035267792991, "grad_norm": 0.32648783038577595, "learning_rate": 8.80787447812346e-06, "loss": 0.5405, "step": 3114 }, { "epoch": 0.5115677539876419, "grad_norm": 0.300303509573739, "learning_rate": 8.807738885017965e-06, "loss": 0.5455, "step": 3115 }, { "epoch": 0.5117319811959846, "grad_norm": 0.3694340171302308, "learning_rate": 8.807603245126187e-06, "loss": 0.545, "step": 3116 }, { "epoch": 0.5118962084043274, "grad_norm": 0.3788879023858456, "learning_rate": 8.807467558449603e-06, "loss": 0.5359, "step": 3117 }, { "epoch": 0.5120604356126701, "grad_norm": 0.353354588272609, "learning_rate": 8.80733182498968e-06, "loss": 0.5488, "step": 3118 }, { "epoch": 0.5122246628210129, "grad_norm": 0.3408319876373525, "learning_rate": 8.807196044747897e-06, "loss": 0.5485, "step": 3119 }, { "epoch": 0.5123888900293556, "grad_norm": 0.39035712683504437, "learning_rate": 8.807060217725726e-06, "loss": 0.5464, "step": 3120 }, { "epoch": 0.5125531172376984, "grad_norm": 0.3533902186234627, "learning_rate": 8.806924343924644e-06, "loss": 0.5289, "step": 3121 }, { "epoch": 0.5127173444460411, "grad_norm": 0.5125086187864211, "learning_rate": 8.806788423346127e-06, "loss": 0.5434, "step": 3122 }, { "epoch": 0.5128815716543839, "grad_norm": 0.7203539322855015, "learning_rate": 8.806652455991651e-06, "loss": 0.5588, "step": 3123 }, { "epoch": 0.5130457988627266, "grad_norm": 0.36552585640486723, "learning_rate": 8.80651644186269e-06, "loss": 0.5523, "step": 3124 }, { "epoch": 0.5132100260710694, "grad_norm": 0.31504873528001304, "learning_rate": 8.806380380960725e-06, "loss": 0.5414, "step": 3125 }, { "epoch": 0.5133742532794121, "grad_norm": 0.3246787661246523, "learning_rate": 8.806244273287233e-06, "loss": 0.5418, "step": 3126 }, { "epoch": 0.5135384804877549, "grad_norm": 0.3715899215366805, "learning_rate": 8.806108118843688e-06, "loss": 0.5657, "step": 3127 }, { "epoch": 0.5137027076960976, "grad_norm": 0.4586182447833959, "learning_rate": 8.805971917631575e-06, "loss": 0.5338, "step": 3128 }, { "epoch": 0.5138669349044402, "grad_norm": 0.2998730656450566, "learning_rate": 8.80583566965237e-06, "loss": 0.567, "step": 3129 }, { "epoch": 0.514031162112783, "grad_norm": 0.3358753869110285, "learning_rate": 8.805699374907553e-06, "loss": 0.5329, "step": 3130 }, { "epoch": 0.5141953893211257, "grad_norm": 0.3623082881810261, "learning_rate": 8.805563033398604e-06, "loss": 0.5481, "step": 3131 }, { "epoch": 0.5143596165294685, "grad_norm": 0.41586552825851203, "learning_rate": 8.805426645127005e-06, "loss": 0.5347, "step": 3132 }, { "epoch": 0.5145238437378112, "grad_norm": 0.3389036505262228, "learning_rate": 8.805290210094238e-06, "loss": 0.5432, "step": 3133 }, { "epoch": 0.514688070946154, "grad_norm": 0.33220166906284515, "learning_rate": 8.80515372830178e-06, "loss": 0.5737, "step": 3134 }, { "epoch": 0.5148522981544967, "grad_norm": 1.4240426803228405, "learning_rate": 8.80501719975112e-06, "loss": 0.563, "step": 3135 }, { "epoch": 0.5150165253628395, "grad_norm": 0.34283390659735813, "learning_rate": 8.804880624443737e-06, "loss": 0.5487, "step": 3136 }, { "epoch": 0.5151807525711822, "grad_norm": 0.3625409825436214, "learning_rate": 8.804744002381114e-06, "loss": 0.558, "step": 3137 }, { "epoch": 0.515344979779525, "grad_norm": 0.39081632339485356, "learning_rate": 8.804607333564737e-06, "loss": 0.5447, "step": 3138 }, { "epoch": 0.5155092069878677, "grad_norm": 0.3236707823666088, "learning_rate": 8.804470617996088e-06, "loss": 0.5647, "step": 3139 }, { "epoch": 0.5156734341962105, "grad_norm": 0.42790371855431897, "learning_rate": 8.804333855676653e-06, "loss": 0.5436, "step": 3140 }, { "epoch": 0.5158376614045532, "grad_norm": 0.3510945198863832, "learning_rate": 8.804197046607918e-06, "loss": 0.595, "step": 3141 }, { "epoch": 0.516001888612896, "grad_norm": 0.34592814315247516, "learning_rate": 8.80406019079137e-06, "loss": 0.5549, "step": 3142 }, { "epoch": 0.5161661158212387, "grad_norm": 0.3300648787481515, "learning_rate": 8.803923288228492e-06, "loss": 0.5512, "step": 3143 }, { "epoch": 0.5163303430295815, "grad_norm": 0.3367035380808404, "learning_rate": 8.803786338920773e-06, "loss": 0.5349, "step": 3144 }, { "epoch": 0.5164945702379242, "grad_norm": 0.40066400824553233, "learning_rate": 8.803649342869698e-06, "loss": 0.5556, "step": 3145 }, { "epoch": 0.5166587974462669, "grad_norm": 0.311816919024996, "learning_rate": 8.803512300076759e-06, "loss": 0.5589, "step": 3146 }, { "epoch": 0.5168230246546096, "grad_norm": 0.31924308074256, "learning_rate": 8.803375210543442e-06, "loss": 0.5502, "step": 3147 }, { "epoch": 0.5169872518629524, "grad_norm": 0.3624853958484826, "learning_rate": 8.803238074271237e-06, "loss": 0.5562, "step": 3148 }, { "epoch": 0.5171514790712951, "grad_norm": 0.3150090211544387, "learning_rate": 8.803100891261632e-06, "loss": 0.534, "step": 3149 }, { "epoch": 0.5173157062796379, "grad_norm": 0.30714145840950313, "learning_rate": 8.802963661516117e-06, "loss": 0.5609, "step": 3150 }, { "epoch": 0.5174799334879806, "grad_norm": 0.41808965641054635, "learning_rate": 8.802826385036183e-06, "loss": 0.5277, "step": 3151 }, { "epoch": 0.5176441606963234, "grad_norm": 0.33805758218512644, "learning_rate": 8.802689061823322e-06, "loss": 0.5744, "step": 3152 }, { "epoch": 0.5178083879046661, "grad_norm": 0.32757272518208763, "learning_rate": 8.802551691879024e-06, "loss": 0.5434, "step": 3153 }, { "epoch": 0.5179726151130088, "grad_norm": 0.30549027755496716, "learning_rate": 8.802414275204783e-06, "loss": 0.5388, "step": 3154 }, { "epoch": 0.5181368423213516, "grad_norm": 0.5121294354664127, "learning_rate": 8.802276811802089e-06, "loss": 0.5559, "step": 3155 }, { "epoch": 0.5183010695296943, "grad_norm": 0.4360850175903607, "learning_rate": 8.802139301672434e-06, "loss": 0.5508, "step": 3156 }, { "epoch": 0.5184652967380371, "grad_norm": 0.3109791520360334, "learning_rate": 8.802001744817315e-06, "loss": 0.5582, "step": 3157 }, { "epoch": 0.5186295239463798, "grad_norm": 0.34541983264627407, "learning_rate": 8.801864141238225e-06, "loss": 0.5425, "step": 3158 }, { "epoch": 0.5187937511547226, "grad_norm": 0.44077911149552174, "learning_rate": 8.801726490936658e-06, "loss": 0.5333, "step": 3159 }, { "epoch": 0.5189579783630653, "grad_norm": 0.3175152055000214, "learning_rate": 8.801588793914108e-06, "loss": 0.5347, "step": 3160 }, { "epoch": 0.5191222055714081, "grad_norm": 0.33110520939463356, "learning_rate": 8.801451050172072e-06, "loss": 0.5367, "step": 3161 }, { "epoch": 0.5192864327797508, "grad_norm": 0.6786090232297229, "learning_rate": 8.801313259712045e-06, "loss": 0.5379, "step": 3162 }, { "epoch": 0.5194506599880935, "grad_norm": 0.32345532898552576, "learning_rate": 8.801175422535524e-06, "loss": 0.5689, "step": 3163 }, { "epoch": 0.5196148871964362, "grad_norm": 0.29288324836950214, "learning_rate": 8.801037538644008e-06, "loss": 0.552, "step": 3164 }, { "epoch": 0.519779114404779, "grad_norm": 0.3060045843978916, "learning_rate": 8.80089960803899e-06, "loss": 0.549, "step": 3165 }, { "epoch": 0.5199433416131217, "grad_norm": 0.3997067399866202, "learning_rate": 8.800761630721973e-06, "loss": 0.5322, "step": 3166 }, { "epoch": 0.5201075688214645, "grad_norm": 0.2952695042762905, "learning_rate": 8.800623606694453e-06, "loss": 0.5631, "step": 3167 }, { "epoch": 0.5202717960298072, "grad_norm": 0.33202602894268374, "learning_rate": 8.800485535957928e-06, "loss": 0.5355, "step": 3168 }, { "epoch": 0.52043602323815, "grad_norm": 0.3053471478926584, "learning_rate": 8.8003474185139e-06, "loss": 0.5239, "step": 3169 }, { "epoch": 0.5206002504464927, "grad_norm": 0.44546827302209424, "learning_rate": 8.80020925436387e-06, "loss": 0.5446, "step": 3170 }, { "epoch": 0.5207644776548355, "grad_norm": 0.3232464754044946, "learning_rate": 8.800071043509333e-06, "loss": 0.5625, "step": 3171 }, { "epoch": 0.5209287048631782, "grad_norm": 0.31924534819311456, "learning_rate": 8.799932785951797e-06, "loss": 0.5548, "step": 3172 }, { "epoch": 0.521092932071521, "grad_norm": 0.8093812270735015, "learning_rate": 8.799794481692757e-06, "loss": 0.5512, "step": 3173 }, { "epoch": 0.5212571592798637, "grad_norm": 0.353912459104798, "learning_rate": 8.79965613073372e-06, "loss": 0.5355, "step": 3174 }, { "epoch": 0.5214213864882065, "grad_norm": 17.314905034002273, "learning_rate": 8.799517733076186e-06, "loss": 0.5549, "step": 3175 }, { "epoch": 0.5215856136965492, "grad_norm": 0.4253045024687482, "learning_rate": 8.799379288721663e-06, "loss": 0.5465, "step": 3176 }, { "epoch": 0.521749840904892, "grad_norm": 0.5363599916050024, "learning_rate": 8.799240797671648e-06, "loss": 0.5398, "step": 3177 }, { "epoch": 0.5219140681132347, "grad_norm": 0.5429239713433921, "learning_rate": 8.799102259927648e-06, "loss": 0.5547, "step": 3178 }, { "epoch": 0.5220782953215775, "grad_norm": 0.888409605205552, "learning_rate": 8.798963675491168e-06, "loss": 0.569, "step": 3179 }, { "epoch": 0.5222425225299201, "grad_norm": 0.8292470249496997, "learning_rate": 8.798825044363714e-06, "loss": 0.5491, "step": 3180 }, { "epoch": 0.5224067497382628, "grad_norm": 0.8524348696719072, "learning_rate": 8.79868636654679e-06, "loss": 0.5565, "step": 3181 }, { "epoch": 0.5225709769466056, "grad_norm": 0.7638273678405479, "learning_rate": 8.798547642041903e-06, "loss": 0.5655, "step": 3182 }, { "epoch": 0.5227352041549483, "grad_norm": 0.5794743123076325, "learning_rate": 8.798408870850557e-06, "loss": 0.5791, "step": 3183 }, { "epoch": 0.5228994313632911, "grad_norm": 0.6652418445715474, "learning_rate": 8.798270052974265e-06, "loss": 0.5817, "step": 3184 }, { "epoch": 0.5230636585716338, "grad_norm": 0.6033023612512277, "learning_rate": 8.79813118841453e-06, "loss": 0.5602, "step": 3185 }, { "epoch": 0.5232278857799766, "grad_norm": 0.7713085937031505, "learning_rate": 8.79799227717286e-06, "loss": 0.5467, "step": 3186 }, { "epoch": 0.5233921129883193, "grad_norm": 0.5954572400751407, "learning_rate": 8.797853319250767e-06, "loss": 0.5632, "step": 3187 }, { "epoch": 0.5235563401966621, "grad_norm": 0.47584740248527363, "learning_rate": 8.797714314649757e-06, "loss": 0.5594, "step": 3188 }, { "epoch": 0.5237205674050048, "grad_norm": 0.5639314104879384, "learning_rate": 8.797575263371343e-06, "loss": 0.5555, "step": 3189 }, { "epoch": 0.5238847946133476, "grad_norm": 0.5285776579354926, "learning_rate": 8.797436165417032e-06, "loss": 0.5671, "step": 3190 }, { "epoch": 0.5240490218216903, "grad_norm": 0.43872512757835813, "learning_rate": 8.797297020788336e-06, "loss": 0.5555, "step": 3191 }, { "epoch": 0.5242132490300331, "grad_norm": 0.5753312286290805, "learning_rate": 8.797157829486767e-06, "loss": 0.5585, "step": 3192 }, { "epoch": 0.5243774762383758, "grad_norm": 0.38476151861139246, "learning_rate": 8.797018591513837e-06, "loss": 0.5576, "step": 3193 }, { "epoch": 0.5245417034467186, "grad_norm": 0.43061245503098006, "learning_rate": 8.796879306871056e-06, "loss": 0.58, "step": 3194 }, { "epoch": 0.5247059306550613, "grad_norm": 0.3378612386322587, "learning_rate": 8.79673997555994e-06, "loss": 0.552, "step": 3195 }, { "epoch": 0.5248701578634041, "grad_norm": 0.3335234350164391, "learning_rate": 8.796600597581998e-06, "loss": 0.5599, "step": 3196 }, { "epoch": 0.5250343850717467, "grad_norm": 0.5837684658025116, "learning_rate": 8.796461172938749e-06, "loss": 0.564, "step": 3197 }, { "epoch": 0.5251986122800895, "grad_norm": 0.34310523436981255, "learning_rate": 8.796321701631702e-06, "loss": 0.541, "step": 3198 }, { "epoch": 0.5253628394884322, "grad_norm": 0.44627037677002795, "learning_rate": 8.796182183662376e-06, "loss": 0.5639, "step": 3199 }, { "epoch": 0.525527066696775, "grad_norm": 0.3762255974042171, "learning_rate": 8.796042619032283e-06, "loss": 0.5529, "step": 3200 }, { "epoch": 0.5256912939051177, "grad_norm": 0.3437555157560286, "learning_rate": 8.795903007742941e-06, "loss": 0.5632, "step": 3201 }, { "epoch": 0.5258555211134605, "grad_norm": 0.3317101998553545, "learning_rate": 8.795763349795866e-06, "loss": 0.5524, "step": 3202 }, { "epoch": 0.5260197483218032, "grad_norm": 0.3979361730906255, "learning_rate": 8.795623645192574e-06, "loss": 0.5464, "step": 3203 }, { "epoch": 0.526183975530146, "grad_norm": 0.3619951010888624, "learning_rate": 8.795483893934584e-06, "loss": 0.5404, "step": 3204 }, { "epoch": 0.5263482027384887, "grad_norm": 0.3117407096018107, "learning_rate": 8.795344096023411e-06, "loss": 0.5376, "step": 3205 }, { "epoch": 0.5265124299468315, "grad_norm": 0.3293924524709304, "learning_rate": 8.795204251460576e-06, "loss": 0.5306, "step": 3206 }, { "epoch": 0.5266766571551742, "grad_norm": 0.41805058863513644, "learning_rate": 8.795064360247598e-06, "loss": 0.5646, "step": 3207 }, { "epoch": 0.526840884363517, "grad_norm": 0.3826750759835131, "learning_rate": 8.794924422385995e-06, "loss": 0.5386, "step": 3208 }, { "epoch": 0.5270051115718597, "grad_norm": 0.3430815145020039, "learning_rate": 8.794784437877286e-06, "loss": 0.5521, "step": 3209 }, { "epoch": 0.5271693387802024, "grad_norm": 0.5506840435783226, "learning_rate": 8.794644406722993e-06, "loss": 0.5466, "step": 3210 }, { "epoch": 0.5273335659885452, "grad_norm": 0.2992384367198502, "learning_rate": 8.794504328924636e-06, "loss": 0.5506, "step": 3211 }, { "epoch": 0.5274977931968879, "grad_norm": 0.3029752344219687, "learning_rate": 8.794364204483736e-06, "loss": 0.5484, "step": 3212 }, { "epoch": 0.5276620204052307, "grad_norm": 0.2948964578526369, "learning_rate": 8.794224033401818e-06, "loss": 0.5462, "step": 3213 }, { "epoch": 0.5278262476135733, "grad_norm": 0.366601371536449, "learning_rate": 8.794083815680402e-06, "loss": 0.5599, "step": 3214 }, { "epoch": 0.5279904748219161, "grad_norm": 0.3498632389083059, "learning_rate": 8.79394355132101e-06, "loss": 0.5383, "step": 3215 }, { "epoch": 0.5281547020302588, "grad_norm": 0.31505091855348366, "learning_rate": 8.793803240325165e-06, "loss": 0.5631, "step": 3216 }, { "epoch": 0.5283189292386016, "grad_norm": 0.42965003549284014, "learning_rate": 8.793662882694394e-06, "loss": 0.5419, "step": 3217 }, { "epoch": 0.5284831564469443, "grad_norm": 0.29650964340269054, "learning_rate": 8.79352247843022e-06, "loss": 0.5551, "step": 3218 }, { "epoch": 0.5286473836552871, "grad_norm": 0.29571046480753704, "learning_rate": 8.793382027534167e-06, "loss": 0.5402, "step": 3219 }, { "epoch": 0.5288116108636298, "grad_norm": 0.3534211821619501, "learning_rate": 8.79324153000776e-06, "loss": 0.56, "step": 3220 }, { "epoch": 0.5289758380719726, "grad_norm": 0.3105684543180674, "learning_rate": 8.793100985852527e-06, "loss": 0.5344, "step": 3221 }, { "epoch": 0.5291400652803153, "grad_norm": 0.32396286394780704, "learning_rate": 8.792960395069993e-06, "loss": 0.5344, "step": 3222 }, { "epoch": 0.5293042924886581, "grad_norm": 0.40299790637446253, "learning_rate": 8.792819757661686e-06, "loss": 0.5337, "step": 3223 }, { "epoch": 0.5294685196970008, "grad_norm": 0.2948307048801837, "learning_rate": 8.792679073629132e-06, "loss": 0.5372, "step": 3224 }, { "epoch": 0.5296327469053436, "grad_norm": 0.33790169922370855, "learning_rate": 8.792538342973862e-06, "loss": 0.5311, "step": 3225 }, { "epoch": 0.5297969741136863, "grad_norm": 0.30645198851218913, "learning_rate": 8.792397565697399e-06, "loss": 0.543, "step": 3226 }, { "epoch": 0.5299612013220291, "grad_norm": 0.3628053695130395, "learning_rate": 8.792256741801277e-06, "loss": 0.5559, "step": 3227 }, { "epoch": 0.5301254285303718, "grad_norm": 0.27857795489535925, "learning_rate": 8.792115871287025e-06, "loss": 0.53, "step": 3228 }, { "epoch": 0.5302896557387146, "grad_norm": 0.3071228087006437, "learning_rate": 8.79197495415617e-06, "loss": 0.5541, "step": 3229 }, { "epoch": 0.5304538829470573, "grad_norm": 0.3325204943756406, "learning_rate": 8.791833990410246e-06, "loss": 0.5422, "step": 3230 }, { "epoch": 0.5306181101554, "grad_norm": 0.3669488938803181, "learning_rate": 8.79169298005078e-06, "loss": 0.5363, "step": 3231 }, { "epoch": 0.5307823373637427, "grad_norm": 0.27794447125357075, "learning_rate": 8.791551923079308e-06, "loss": 0.5678, "step": 3232 }, { "epoch": 0.5309465645720854, "grad_norm": 0.3167738475291457, "learning_rate": 8.791410819497359e-06, "loss": 0.54, "step": 3233 }, { "epoch": 0.5311107917804282, "grad_norm": 0.39694196446332014, "learning_rate": 8.791269669306465e-06, "loss": 0.5468, "step": 3234 }, { "epoch": 0.5312750189887709, "grad_norm": 0.32582945645680483, "learning_rate": 8.791128472508163e-06, "loss": 0.5498, "step": 3235 }, { "epoch": 0.5314392461971137, "grad_norm": 0.30304838745817103, "learning_rate": 8.790987229103981e-06, "loss": 0.5399, "step": 3236 }, { "epoch": 0.5316034734054564, "grad_norm": 0.3316043075793242, "learning_rate": 8.790845939095456e-06, "loss": 0.5433, "step": 3237 }, { "epoch": 0.5317677006137992, "grad_norm": 0.3544344684280817, "learning_rate": 8.790704602484125e-06, "loss": 0.5629, "step": 3238 }, { "epoch": 0.5319319278221419, "grad_norm": 0.2916093235123261, "learning_rate": 8.790563219271518e-06, "loss": 0.5545, "step": 3239 }, { "epoch": 0.5320961550304847, "grad_norm": 0.45549622656485855, "learning_rate": 8.790421789459175e-06, "loss": 0.5399, "step": 3240 }, { "epoch": 0.5322603822388274, "grad_norm": 0.33583247440668695, "learning_rate": 8.79028031304863e-06, "loss": 0.5452, "step": 3241 }, { "epoch": 0.5324246094471702, "grad_norm": 0.30414857412185614, "learning_rate": 8.79013879004142e-06, "loss": 0.5533, "step": 3242 }, { "epoch": 0.5325888366555129, "grad_norm": 0.3661648266690582, "learning_rate": 8.78999722043908e-06, "loss": 0.5316, "step": 3243 }, { "epoch": 0.5327530638638557, "grad_norm": 0.2943779954560458, "learning_rate": 8.78985560424315e-06, "loss": 0.5439, "step": 3244 }, { "epoch": 0.5329172910721984, "grad_norm": 0.6613520472875823, "learning_rate": 8.789713941455168e-06, "loss": 0.5534, "step": 3245 }, { "epoch": 0.5330815182805412, "grad_norm": 0.37859318983449436, "learning_rate": 8.789572232076671e-06, "loss": 0.5552, "step": 3246 }, { "epoch": 0.5332457454888838, "grad_norm": 0.32920543399704716, "learning_rate": 8.789430476109201e-06, "loss": 0.5392, "step": 3247 }, { "epoch": 0.5334099726972266, "grad_norm": 0.36207175639388633, "learning_rate": 8.789288673554296e-06, "loss": 0.5584, "step": 3248 }, { "epoch": 0.5335741999055693, "grad_norm": 0.3265540316957601, "learning_rate": 8.789146824413494e-06, "loss": 0.5497, "step": 3249 }, { "epoch": 0.5337384271139121, "grad_norm": 0.7945446107991943, "learning_rate": 8.789004928688339e-06, "loss": 0.5217, "step": 3250 }, { "epoch": 0.5339026543222548, "grad_norm": 0.3216038639309901, "learning_rate": 8.78886298638037e-06, "loss": 0.5409, "step": 3251 }, { "epoch": 0.5340668815305976, "grad_norm": 0.29287927055837376, "learning_rate": 8.78872099749113e-06, "loss": 0.5481, "step": 3252 }, { "epoch": 0.5342311087389403, "grad_norm": 0.29317563619901427, "learning_rate": 8.78857896202216e-06, "loss": 0.548, "step": 3253 }, { "epoch": 0.5343953359472831, "grad_norm": 0.739804214926253, "learning_rate": 8.788436879975003e-06, "loss": 0.5477, "step": 3254 }, { "epoch": 0.5345595631556258, "grad_norm": 0.3462570710581254, "learning_rate": 8.788294751351201e-06, "loss": 0.5446, "step": 3255 }, { "epoch": 0.5347237903639686, "grad_norm": 0.35379963473107207, "learning_rate": 8.7881525761523e-06, "loss": 0.5492, "step": 3256 }, { "epoch": 0.5348880175723113, "grad_norm": 0.6842229018746369, "learning_rate": 8.788010354379844e-06, "loss": 0.5425, "step": 3257 }, { "epoch": 0.535052244780654, "grad_norm": 0.3078870199163517, "learning_rate": 8.787868086035374e-06, "loss": 0.5589, "step": 3258 }, { "epoch": 0.5352164719889968, "grad_norm": 0.3013589290107262, "learning_rate": 8.78772577112044e-06, "loss": 0.5458, "step": 3259 }, { "epoch": 0.5353806991973395, "grad_norm": 0.29817945458234185, "learning_rate": 8.787583409636587e-06, "loss": 0.5515, "step": 3260 }, { "epoch": 0.5355449264056823, "grad_norm": 0.5011852342901882, "learning_rate": 8.787441001585356e-06, "loss": 0.5421, "step": 3261 }, { "epoch": 0.535709153614025, "grad_norm": 0.3648740653731427, "learning_rate": 8.787298546968301e-06, "loss": 0.5653, "step": 3262 }, { "epoch": 0.5358733808223678, "grad_norm": 0.29821976509943804, "learning_rate": 8.787156045786963e-06, "loss": 0.5378, "step": 3263 }, { "epoch": 0.5360376080307104, "grad_norm": 0.3126612686731939, "learning_rate": 8.787013498042896e-06, "loss": 0.5413, "step": 3264 }, { "epoch": 0.5362018352390532, "grad_norm": 0.4119496533178666, "learning_rate": 8.786870903737641e-06, "loss": 0.5681, "step": 3265 }, { "epoch": 0.5363660624473959, "grad_norm": 0.3168742881680851, "learning_rate": 8.78672826287275e-06, "loss": 0.5539, "step": 3266 }, { "epoch": 0.5365302896557387, "grad_norm": 0.31014627546022744, "learning_rate": 8.786585575449775e-06, "loss": 0.5537, "step": 3267 }, { "epoch": 0.5366945168640814, "grad_norm": 0.4724267454615436, "learning_rate": 8.786442841470261e-06, "loss": 0.55, "step": 3268 }, { "epoch": 0.5368587440724242, "grad_norm": 0.33371309506311503, "learning_rate": 8.786300060935761e-06, "loss": 0.5482, "step": 3269 }, { "epoch": 0.5370229712807669, "grad_norm": 0.3795166104581432, "learning_rate": 8.786157233847827e-06, "loss": 0.5255, "step": 3270 }, { "epoch": 0.5371871984891097, "grad_norm": 0.4632986698184538, "learning_rate": 8.786014360208008e-06, "loss": 0.5316, "step": 3271 }, { "epoch": 0.5373514256974524, "grad_norm": 0.3120794912916995, "learning_rate": 8.785871440017854e-06, "loss": 0.5272, "step": 3272 }, { "epoch": 0.5375156529057952, "grad_norm": 0.3266812219370469, "learning_rate": 8.785728473278922e-06, "loss": 0.5395, "step": 3273 }, { "epoch": 0.5376798801141379, "grad_norm": 0.3667428118373131, "learning_rate": 8.78558545999276e-06, "loss": 0.5372, "step": 3274 }, { "epoch": 0.5378441073224807, "grad_norm": 0.3770025621124436, "learning_rate": 8.785442400160925e-06, "loss": 0.5321, "step": 3275 }, { "epoch": 0.5380083345308234, "grad_norm": 0.3108514386539361, "learning_rate": 8.785299293784968e-06, "loss": 0.581, "step": 3276 }, { "epoch": 0.5381725617391662, "grad_norm": 0.32926200102815384, "learning_rate": 8.785156140866444e-06, "loss": 0.5551, "step": 3277 }, { "epoch": 0.5383367889475089, "grad_norm": 0.29382084808673803, "learning_rate": 8.785012941406911e-06, "loss": 0.5505, "step": 3278 }, { "epoch": 0.5385010161558517, "grad_norm": 0.296915713846914, "learning_rate": 8.78486969540792e-06, "loss": 0.5403, "step": 3279 }, { "epoch": 0.5386652433641944, "grad_norm": 0.3364779005663321, "learning_rate": 8.784726402871028e-06, "loss": 0.5507, "step": 3280 }, { "epoch": 0.538829470572537, "grad_norm": 0.31712966506772305, "learning_rate": 8.78458306379779e-06, "loss": 0.5481, "step": 3281 }, { "epoch": 0.5389936977808798, "grad_norm": 0.4194875222865034, "learning_rate": 8.784439678189769e-06, "loss": 0.5419, "step": 3282 }, { "epoch": 0.5391579249892225, "grad_norm": 0.36369565463198017, "learning_rate": 8.784296246048515e-06, "loss": 0.538, "step": 3283 }, { "epoch": 0.5393221521975653, "grad_norm": 0.33327635860855287, "learning_rate": 8.784152767375589e-06, "loss": 0.5786, "step": 3284 }, { "epoch": 0.539486379405908, "grad_norm": 0.31341028441266205, "learning_rate": 8.784009242172548e-06, "loss": 0.5383, "step": 3285 }, { "epoch": 0.5396506066142508, "grad_norm": 0.363543182076021, "learning_rate": 8.783865670440954e-06, "loss": 0.524, "step": 3286 }, { "epoch": 0.5398148338225935, "grad_norm": 0.3023715111443661, "learning_rate": 8.783722052182361e-06, "loss": 0.54, "step": 3287 }, { "epoch": 0.5399790610309363, "grad_norm": 0.3172844189567488, "learning_rate": 8.783578387398333e-06, "loss": 0.5319, "step": 3288 }, { "epoch": 0.540143288239279, "grad_norm": 0.315659171219246, "learning_rate": 8.78343467609043e-06, "loss": 0.5714, "step": 3289 }, { "epoch": 0.5403075154476218, "grad_norm": 0.7465434186042995, "learning_rate": 8.783290918260212e-06, "loss": 0.5532, "step": 3290 }, { "epoch": 0.5404717426559645, "grad_norm": 0.3801930075543929, "learning_rate": 8.78314711390924e-06, "loss": 0.5595, "step": 3291 }, { "epoch": 0.5406359698643073, "grad_norm": 0.35849452095029644, "learning_rate": 8.783003263039077e-06, "loss": 0.558, "step": 3292 }, { "epoch": 0.54080019707265, "grad_norm": 0.3286836188938242, "learning_rate": 8.782859365651284e-06, "loss": 0.549, "step": 3293 }, { "epoch": 0.5409644242809928, "grad_norm": 0.3544088100080713, "learning_rate": 8.782715421747424e-06, "loss": 0.5263, "step": 3294 }, { "epoch": 0.5411286514893355, "grad_norm": 0.3012499735294153, "learning_rate": 8.782571431329062e-06, "loss": 0.5493, "step": 3295 }, { "epoch": 0.5412928786976783, "grad_norm": 0.2862519026155833, "learning_rate": 8.78242739439776e-06, "loss": 0.5529, "step": 3296 }, { "epoch": 0.541457105906021, "grad_norm": 0.34212920789775564, "learning_rate": 8.782283310955084e-06, "loss": 0.5156, "step": 3297 }, { "epoch": 0.5416213331143637, "grad_norm": 0.2874868127529915, "learning_rate": 8.782139181002598e-06, "loss": 0.5347, "step": 3298 }, { "epoch": 0.5417855603227064, "grad_norm": 0.30620036288558916, "learning_rate": 8.781995004541866e-06, "loss": 0.5562, "step": 3299 }, { "epoch": 0.5419497875310492, "grad_norm": 0.3328961656461178, "learning_rate": 8.781850781574458e-06, "loss": 0.5402, "step": 3300 }, { "epoch": 0.5421140147393919, "grad_norm": 0.4321499881343302, "learning_rate": 8.781706512101936e-06, "loss": 0.5424, "step": 3301 }, { "epoch": 0.5422782419477347, "grad_norm": 0.2959437492978648, "learning_rate": 8.781562196125868e-06, "loss": 0.5616, "step": 3302 }, { "epoch": 0.5424424691560774, "grad_norm": 0.34091956215548647, "learning_rate": 8.781417833647823e-06, "loss": 0.533, "step": 3303 }, { "epoch": 0.5426066963644202, "grad_norm": 0.2969866679071415, "learning_rate": 8.781273424669368e-06, "loss": 0.5295, "step": 3304 }, { "epoch": 0.5427709235727629, "grad_norm": 0.33197289823642345, "learning_rate": 8.78112896919207e-06, "loss": 0.5319, "step": 3305 }, { "epoch": 0.5429351507811057, "grad_norm": 0.30111409556440877, "learning_rate": 8.780984467217503e-06, "loss": 0.5497, "step": 3306 }, { "epoch": 0.5430993779894484, "grad_norm": 0.3166579827962453, "learning_rate": 8.78083991874723e-06, "loss": 0.5573, "step": 3307 }, { "epoch": 0.5432636051977912, "grad_norm": 0.39090836439548915, "learning_rate": 8.780695323782823e-06, "loss": 0.5588, "step": 3308 }, { "epoch": 0.5434278324061339, "grad_norm": 0.41882139487374753, "learning_rate": 8.780550682325853e-06, "loss": 0.5407, "step": 3309 }, { "epoch": 0.5435920596144767, "grad_norm": 0.351388516687854, "learning_rate": 8.780405994377893e-06, "loss": 0.561, "step": 3310 }, { "epoch": 0.5437562868228194, "grad_norm": 0.5146337082689596, "learning_rate": 8.780261259940511e-06, "loss": 0.5512, "step": 3311 }, { "epoch": 0.5439205140311622, "grad_norm": 0.3632997394671109, "learning_rate": 8.780116479015283e-06, "loss": 0.5423, "step": 3312 }, { "epoch": 0.5440847412395049, "grad_norm": 0.31828930879474143, "learning_rate": 8.779971651603776e-06, "loss": 0.5395, "step": 3313 }, { "epoch": 0.5442489684478476, "grad_norm": 0.3052865887699414, "learning_rate": 8.779826777707568e-06, "loss": 0.5373, "step": 3314 }, { "epoch": 0.5444131956561903, "grad_norm": 0.28649306849827694, "learning_rate": 8.77968185732823e-06, "loss": 0.5406, "step": 3315 }, { "epoch": 0.544577422864533, "grad_norm": 0.3262919874735506, "learning_rate": 8.779536890467336e-06, "loss": 0.5552, "step": 3316 }, { "epoch": 0.5447416500728758, "grad_norm": 0.28064816936119535, "learning_rate": 8.77939187712646e-06, "loss": 0.5448, "step": 3317 }, { "epoch": 0.5449058772812185, "grad_norm": 0.2905610645984247, "learning_rate": 8.77924681730718e-06, "loss": 0.5514, "step": 3318 }, { "epoch": 0.5450701044895613, "grad_norm": 0.3582455870421614, "learning_rate": 8.779101711011067e-06, "loss": 0.5543, "step": 3319 }, { "epoch": 0.545234331697904, "grad_norm": 0.2956549075881862, "learning_rate": 8.7789565582397e-06, "loss": 0.528, "step": 3320 }, { "epoch": 0.5453985589062468, "grad_norm": 0.32122379476319896, "learning_rate": 8.778811358994655e-06, "loss": 0.5133, "step": 3321 }, { "epoch": 0.5455627861145895, "grad_norm": 0.2971563406062923, "learning_rate": 8.77866611327751e-06, "loss": 0.5245, "step": 3322 }, { "epoch": 0.5457270133229323, "grad_norm": 0.3102675621709384, "learning_rate": 8.77852082108984e-06, "loss": 0.538, "step": 3323 }, { "epoch": 0.545891240531275, "grad_norm": 0.2870978359981991, "learning_rate": 8.778375482433226e-06, "loss": 0.5384, "step": 3324 }, { "epoch": 0.5460554677396178, "grad_norm": 0.2900289483988109, "learning_rate": 8.778230097309243e-06, "loss": 0.542, "step": 3325 }, { "epoch": 0.5462196949479605, "grad_norm": 0.32092240664718624, "learning_rate": 8.778084665719473e-06, "loss": 0.5591, "step": 3326 }, { "epoch": 0.5463839221563033, "grad_norm": 0.3126772582717247, "learning_rate": 8.777939187665495e-06, "loss": 0.5469, "step": 3327 }, { "epoch": 0.546548149364646, "grad_norm": 0.3857668440217911, "learning_rate": 8.777793663148888e-06, "loss": 0.5633, "step": 3328 }, { "epoch": 0.5467123765729888, "grad_norm": 0.28195626253107636, "learning_rate": 8.777648092171232e-06, "loss": 0.5439, "step": 3329 }, { "epoch": 0.5468766037813315, "grad_norm": 0.36064556386630914, "learning_rate": 8.777502474734109e-06, "loss": 0.5489, "step": 3330 }, { "epoch": 0.5470408309896743, "grad_norm": 0.33236569642633484, "learning_rate": 8.777356810839102e-06, "loss": 0.5251, "step": 3331 }, { "epoch": 0.5472050581980169, "grad_norm": 0.5230949434342357, "learning_rate": 8.77721110048779e-06, "loss": 0.5652, "step": 3332 }, { "epoch": 0.5473692854063597, "grad_norm": 0.2952566421849118, "learning_rate": 8.77706534368176e-06, "loss": 0.5377, "step": 3333 }, { "epoch": 0.5475335126147024, "grad_norm": 0.35344945699330593, "learning_rate": 8.77691954042259e-06, "loss": 0.5225, "step": 3334 }, { "epoch": 0.5476977398230451, "grad_norm": 0.32436302971883846, "learning_rate": 8.776773690711866e-06, "loss": 0.5521, "step": 3335 }, { "epoch": 0.5478619670313879, "grad_norm": 0.30968709714872467, "learning_rate": 8.776627794551174e-06, "loss": 0.5473, "step": 3336 }, { "epoch": 0.5480261942397306, "grad_norm": 0.292355032883547, "learning_rate": 8.776481851942094e-06, "loss": 0.5296, "step": 3337 }, { "epoch": 0.5481904214480734, "grad_norm": 0.30650548477899137, "learning_rate": 8.776335862886216e-06, "loss": 0.5486, "step": 3338 }, { "epoch": 0.5483546486564161, "grad_norm": 0.30828921012684446, "learning_rate": 8.776189827385121e-06, "loss": 0.5424, "step": 3339 }, { "epoch": 0.5485188758647589, "grad_norm": 0.30623149799210697, "learning_rate": 8.776043745440398e-06, "loss": 0.5563, "step": 3340 }, { "epoch": 0.5486831030731016, "grad_norm": 0.320878900454465, "learning_rate": 8.775897617053633e-06, "loss": 0.5434, "step": 3341 }, { "epoch": 0.5488473302814444, "grad_norm": 0.3160907340766199, "learning_rate": 8.775751442226412e-06, "loss": 0.5266, "step": 3342 }, { "epoch": 0.5490115574897871, "grad_norm": 0.2940009688601457, "learning_rate": 8.775605220960325e-06, "loss": 0.5591, "step": 3343 }, { "epoch": 0.5491757846981299, "grad_norm": 0.2756229954361331, "learning_rate": 8.775458953256958e-06, "loss": 0.5422, "step": 3344 }, { "epoch": 0.5493400119064726, "grad_norm": 0.4017540228900945, "learning_rate": 8.7753126391179e-06, "loss": 0.5378, "step": 3345 }, { "epoch": 0.5495042391148154, "grad_norm": 0.30553436345975726, "learning_rate": 8.775166278544742e-06, "loss": 0.5207, "step": 3346 }, { "epoch": 0.5496684663231581, "grad_norm": 0.3855818478613099, "learning_rate": 8.775019871539071e-06, "loss": 0.5377, "step": 3347 }, { "epoch": 0.5498326935315009, "grad_norm": 0.26781930874015525, "learning_rate": 8.774873418102477e-06, "loss": 0.5154, "step": 3348 }, { "epoch": 0.5499969207398435, "grad_norm": 0.5037312157429292, "learning_rate": 8.774726918236553e-06, "loss": 0.5387, "step": 3349 }, { "epoch": 0.5501611479481863, "grad_norm": 0.37168947952148945, "learning_rate": 8.774580371942888e-06, "loss": 0.5503, "step": 3350 }, { "epoch": 0.550325375156529, "grad_norm": 0.33315012217900974, "learning_rate": 8.774433779223076e-06, "loss": 0.5407, "step": 3351 }, { "epoch": 0.5504896023648718, "grad_norm": 0.3247936341233711, "learning_rate": 8.774287140078708e-06, "loss": 0.5522, "step": 3352 }, { "epoch": 0.5506538295732145, "grad_norm": 0.4965710840359514, "learning_rate": 8.774140454511375e-06, "loss": 0.5416, "step": 3353 }, { "epoch": 0.5508180567815573, "grad_norm": 0.33188955600441644, "learning_rate": 8.773993722522672e-06, "loss": 0.5555, "step": 3354 }, { "epoch": 0.5509822839899, "grad_norm": 0.418317451310977, "learning_rate": 8.773846944114192e-06, "loss": 0.5425, "step": 3355 }, { "epoch": 0.5511465111982428, "grad_norm": 0.2825039647828183, "learning_rate": 8.773700119287528e-06, "loss": 0.5707, "step": 3356 }, { "epoch": 0.5513107384065855, "grad_norm": 0.3538687145304638, "learning_rate": 8.773553248044278e-06, "loss": 0.5685, "step": 3357 }, { "epoch": 0.5514749656149283, "grad_norm": 0.42920655053335216, "learning_rate": 8.773406330386034e-06, "loss": 0.5295, "step": 3358 }, { "epoch": 0.551639192823271, "grad_norm": 0.32412456816316443, "learning_rate": 8.773259366314393e-06, "loss": 0.5574, "step": 3359 }, { "epoch": 0.5518034200316138, "grad_norm": 0.2797144018953076, "learning_rate": 8.77311235583095e-06, "loss": 0.5669, "step": 3360 }, { "epoch": 0.5519676472399565, "grad_norm": 0.37403853631233064, "learning_rate": 8.772965298937305e-06, "loss": 0.5285, "step": 3361 }, { "epoch": 0.5521318744482993, "grad_norm": 0.28490995583623363, "learning_rate": 8.772818195635052e-06, "loss": 0.5413, "step": 3362 }, { "epoch": 0.552296101656642, "grad_norm": 0.32396743410008255, "learning_rate": 8.772671045925788e-06, "loss": 0.5585, "step": 3363 }, { "epoch": 0.5524603288649848, "grad_norm": 0.3075785289042439, "learning_rate": 8.772523849811114e-06, "loss": 0.5535, "step": 3364 }, { "epoch": 0.5526245560733275, "grad_norm": 0.4027775951653619, "learning_rate": 8.772376607292627e-06, "loss": 0.55, "step": 3365 }, { "epoch": 0.5527887832816701, "grad_norm": 0.385492345624032, "learning_rate": 8.772229318371927e-06, "loss": 0.5506, "step": 3366 }, { "epoch": 0.5529530104900129, "grad_norm": 0.33761364893706824, "learning_rate": 8.772081983050611e-06, "loss": 0.5321, "step": 3367 }, { "epoch": 0.5531172376983556, "grad_norm": 0.3403673283677389, "learning_rate": 8.771934601330285e-06, "loss": 0.5451, "step": 3368 }, { "epoch": 0.5532814649066984, "grad_norm": 0.35126406168503904, "learning_rate": 8.771787173212545e-06, "loss": 0.5395, "step": 3369 }, { "epoch": 0.5534456921150411, "grad_norm": 0.36250462085917223, "learning_rate": 8.771639698698993e-06, "loss": 0.5538, "step": 3370 }, { "epoch": 0.5536099193233839, "grad_norm": 0.28299198501272577, "learning_rate": 8.77149217779123e-06, "loss": 0.5542, "step": 3371 }, { "epoch": 0.5537741465317266, "grad_norm": 0.3021177801777963, "learning_rate": 8.77134461049086e-06, "loss": 0.5455, "step": 3372 }, { "epoch": 0.5539383737400694, "grad_norm": 0.33274453416677413, "learning_rate": 8.771196996799486e-06, "loss": 0.5501, "step": 3373 }, { "epoch": 0.5541026009484121, "grad_norm": 0.31709314075267925, "learning_rate": 8.77104933671871e-06, "loss": 0.5319, "step": 3374 }, { "epoch": 0.5542668281567549, "grad_norm": 0.38946982014827064, "learning_rate": 8.770901630250137e-06, "loss": 0.5438, "step": 3375 }, { "epoch": 0.5544310553650976, "grad_norm": 0.3106343141375796, "learning_rate": 8.77075387739537e-06, "loss": 0.537, "step": 3376 }, { "epoch": 0.5545952825734404, "grad_norm": 0.40028112123615217, "learning_rate": 8.770606078156013e-06, "loss": 0.5553, "step": 3377 }, { "epoch": 0.5547595097817831, "grad_norm": 0.3187911975290353, "learning_rate": 8.770458232533672e-06, "loss": 0.5554, "step": 3378 }, { "epoch": 0.5549237369901259, "grad_norm": 0.3378804598434121, "learning_rate": 8.770310340529954e-06, "loss": 0.5533, "step": 3379 }, { "epoch": 0.5550879641984686, "grad_norm": 0.3401695168568818, "learning_rate": 8.770162402146465e-06, "loss": 0.5408, "step": 3380 }, { "epoch": 0.5552521914068114, "grad_norm": 0.29459484154398957, "learning_rate": 8.77001441738481e-06, "loss": 0.5198, "step": 3381 }, { "epoch": 0.5554164186151541, "grad_norm": 0.3459913273509143, "learning_rate": 8.769866386246596e-06, "loss": 0.5393, "step": 3382 }, { "epoch": 0.5555806458234968, "grad_norm": 0.3211088301865988, "learning_rate": 8.769718308733434e-06, "loss": 0.5494, "step": 3383 }, { "epoch": 0.5557448730318395, "grad_norm": 0.3254018054944106, "learning_rate": 8.769570184846929e-06, "loss": 0.5429, "step": 3384 }, { "epoch": 0.5559091002401823, "grad_norm": 0.3114123751834169, "learning_rate": 8.769422014588692e-06, "loss": 0.5479, "step": 3385 }, { "epoch": 0.556073327448525, "grad_norm": 0.3236795283927829, "learning_rate": 8.769273797960331e-06, "loss": 0.5375, "step": 3386 }, { "epoch": 0.5562375546568677, "grad_norm": 0.4880689140211738, "learning_rate": 8.769125534963457e-06, "loss": 0.5387, "step": 3387 }, { "epoch": 0.5564017818652105, "grad_norm": 0.31117332262719577, "learning_rate": 8.768977225599679e-06, "loss": 0.5455, "step": 3388 }, { "epoch": 0.5565660090735532, "grad_norm": 0.2786661939353369, "learning_rate": 8.768828869870609e-06, "loss": 0.5511, "step": 3389 }, { "epoch": 0.556730236281896, "grad_norm": 0.31635464932598195, "learning_rate": 8.768680467777857e-06, "loss": 0.5336, "step": 3390 }, { "epoch": 0.5568944634902387, "grad_norm": 0.29765570525059454, "learning_rate": 8.768532019323034e-06, "loss": 0.5561, "step": 3391 }, { "epoch": 0.5570586906985815, "grad_norm": 0.3243154243746134, "learning_rate": 8.768383524507754e-06, "loss": 0.5381, "step": 3392 }, { "epoch": 0.5572229179069242, "grad_norm": 0.2893446275664096, "learning_rate": 8.76823498333363e-06, "loss": 0.5556, "step": 3393 }, { "epoch": 0.557387145115267, "grad_norm": 0.3273044449065161, "learning_rate": 8.768086395802274e-06, "loss": 0.5476, "step": 3394 }, { "epoch": 0.5575513723236097, "grad_norm": 0.3267822607622496, "learning_rate": 8.767937761915302e-06, "loss": 0.5467, "step": 3395 }, { "epoch": 0.5577155995319525, "grad_norm": 0.3321830374515757, "learning_rate": 8.767789081674324e-06, "loss": 0.5442, "step": 3396 }, { "epoch": 0.5578798267402952, "grad_norm": 0.361249384322939, "learning_rate": 8.767640355080962e-06, "loss": 0.5374, "step": 3397 }, { "epoch": 0.558044053948638, "grad_norm": 0.3154351904554402, "learning_rate": 8.767491582136823e-06, "loss": 0.536, "step": 3398 }, { "epoch": 0.5582082811569807, "grad_norm": 0.2980191259728029, "learning_rate": 8.767342762843529e-06, "loss": 0.5449, "step": 3399 }, { "epoch": 0.5583725083653234, "grad_norm": 0.39564078503411465, "learning_rate": 8.767193897202692e-06, "loss": 0.5377, "step": 3400 }, { "epoch": 0.5585367355736661, "grad_norm": 0.3517990839488776, "learning_rate": 8.767044985215933e-06, "loss": 0.5404, "step": 3401 }, { "epoch": 0.5587009627820089, "grad_norm": 0.4155353751005951, "learning_rate": 8.766896026884868e-06, "loss": 0.5333, "step": 3402 }, { "epoch": 0.5588651899903516, "grad_norm": 0.3017636592925083, "learning_rate": 8.766747022211112e-06, "loss": 0.5676, "step": 3403 }, { "epoch": 0.5590294171986944, "grad_norm": 0.33995408666281135, "learning_rate": 8.766597971196285e-06, "loss": 0.5374, "step": 3404 }, { "epoch": 0.5591936444070371, "grad_norm": 0.2795836141353966, "learning_rate": 8.766448873842009e-06, "loss": 0.5438, "step": 3405 }, { "epoch": 0.5593578716153799, "grad_norm": 0.35753173927761156, "learning_rate": 8.766299730149898e-06, "loss": 0.5552, "step": 3406 }, { "epoch": 0.5595220988237226, "grad_norm": 0.3248618335926127, "learning_rate": 8.766150540121576e-06, "loss": 0.532, "step": 3407 }, { "epoch": 0.5596863260320654, "grad_norm": 0.36621704636882757, "learning_rate": 8.766001303758661e-06, "loss": 0.5654, "step": 3408 }, { "epoch": 0.5598505532404081, "grad_norm": 0.3491811217207003, "learning_rate": 8.765852021062774e-06, "loss": 0.5283, "step": 3409 }, { "epoch": 0.5600147804487509, "grad_norm": 0.3545037992141468, "learning_rate": 8.765702692035539e-06, "loss": 0.548, "step": 3410 }, { "epoch": 0.5601790076570936, "grad_norm": 0.2763757716992341, "learning_rate": 8.765553316678574e-06, "loss": 0.5585, "step": 3411 }, { "epoch": 0.5603432348654364, "grad_norm": 0.30894970057288423, "learning_rate": 8.765403894993503e-06, "loss": 0.5697, "step": 3412 }, { "epoch": 0.5605074620737791, "grad_norm": 0.30395462426760694, "learning_rate": 8.765254426981951e-06, "loss": 0.5392, "step": 3413 }, { "epoch": 0.5606716892821219, "grad_norm": 0.2938324142755099, "learning_rate": 8.765104912645538e-06, "loss": 0.5445, "step": 3414 }, { "epoch": 0.5608359164904646, "grad_norm": 0.33410474841600196, "learning_rate": 8.76495535198589e-06, "loss": 0.533, "step": 3415 }, { "epoch": 0.5610001436988074, "grad_norm": 0.3183614582303699, "learning_rate": 8.764805745004632e-06, "loss": 0.5414, "step": 3416 }, { "epoch": 0.56116437090715, "grad_norm": 0.3107793870235241, "learning_rate": 8.764656091703386e-06, "loss": 0.5563, "step": 3417 }, { "epoch": 0.5613285981154927, "grad_norm": 0.28256412488593424, "learning_rate": 8.76450639208378e-06, "loss": 0.5213, "step": 3418 }, { "epoch": 0.5614928253238355, "grad_norm": 0.3291646446986307, "learning_rate": 8.764356646147437e-06, "loss": 0.5298, "step": 3419 }, { "epoch": 0.5616570525321782, "grad_norm": 0.29015251711779344, "learning_rate": 8.764206853895987e-06, "loss": 0.5451, "step": 3420 }, { "epoch": 0.561821279740521, "grad_norm": 0.3669840500783633, "learning_rate": 8.764057015331054e-06, "loss": 0.5599, "step": 3421 }, { "epoch": 0.5619855069488637, "grad_norm": 0.39324670708032294, "learning_rate": 8.763907130454267e-06, "loss": 0.5375, "step": 3422 }, { "epoch": 0.5621497341572065, "grad_norm": 0.31065568764508283, "learning_rate": 8.763757199267253e-06, "loss": 0.5408, "step": 3423 }, { "epoch": 0.5623139613655492, "grad_norm": 0.3830704592320809, "learning_rate": 8.763607221771643e-06, "loss": 0.5215, "step": 3424 }, { "epoch": 0.562478188573892, "grad_norm": 0.38348921195787256, "learning_rate": 8.763457197969061e-06, "loss": 0.5264, "step": 3425 }, { "epoch": 0.5626424157822347, "grad_norm": 0.37148155697446655, "learning_rate": 8.76330712786114e-06, "loss": 0.5582, "step": 3426 }, { "epoch": 0.5628066429905775, "grad_norm": 0.4273592131591784, "learning_rate": 8.76315701144951e-06, "loss": 0.5652, "step": 3427 }, { "epoch": 0.5629708701989202, "grad_norm": 0.36466575092615616, "learning_rate": 8.7630068487358e-06, "loss": 0.5272, "step": 3428 }, { "epoch": 0.563135097407263, "grad_norm": 0.3627460014212349, "learning_rate": 8.762856639721642e-06, "loss": 0.5487, "step": 3429 }, { "epoch": 0.5632993246156057, "grad_norm": 0.8673504397637322, "learning_rate": 8.762706384408666e-06, "loss": 0.5456, "step": 3430 }, { "epoch": 0.5634635518239485, "grad_norm": 0.32618874303521883, "learning_rate": 8.762556082798505e-06, "loss": 0.5374, "step": 3431 }, { "epoch": 0.5636277790322912, "grad_norm": 0.3913592007145045, "learning_rate": 8.76240573489279e-06, "loss": 0.5377, "step": 3432 }, { "epoch": 0.563792006240634, "grad_norm": 0.34399391895825815, "learning_rate": 8.762255340693156e-06, "loss": 0.5405, "step": 3433 }, { "epoch": 0.5639562334489766, "grad_norm": 0.307384899591098, "learning_rate": 8.762104900201235e-06, "loss": 0.5346, "step": 3434 }, { "epoch": 0.5641204606573194, "grad_norm": 0.2968959663698474, "learning_rate": 8.761954413418663e-06, "loss": 0.5568, "step": 3435 }, { "epoch": 0.5642846878656621, "grad_norm": 0.3949150892211441, "learning_rate": 8.761803880347073e-06, "loss": 0.5486, "step": 3436 }, { "epoch": 0.5644489150740049, "grad_norm": 0.33926890627687606, "learning_rate": 8.761653300988097e-06, "loss": 0.5197, "step": 3437 }, { "epoch": 0.5646131422823476, "grad_norm": 0.3172017138331228, "learning_rate": 8.761502675343375e-06, "loss": 0.5471, "step": 3438 }, { "epoch": 0.5647773694906904, "grad_norm": 0.418356801651984, "learning_rate": 8.761352003414541e-06, "loss": 0.5321, "step": 3439 }, { "epoch": 0.5649415966990331, "grad_norm": 0.3079378893504073, "learning_rate": 8.761201285203232e-06, "loss": 0.5321, "step": 3440 }, { "epoch": 0.5651058239073758, "grad_norm": 0.37210351753249876, "learning_rate": 8.761050520711083e-06, "loss": 0.5329, "step": 3441 }, { "epoch": 0.5652700511157186, "grad_norm": 0.2805648721097993, "learning_rate": 8.760899709939735e-06, "loss": 0.5186, "step": 3442 }, { "epoch": 0.5654342783240613, "grad_norm": 0.47212938527808185, "learning_rate": 8.760748852890824e-06, "loss": 0.5474, "step": 3443 }, { "epoch": 0.5655985055324041, "grad_norm": 0.342366746899832, "learning_rate": 8.760597949565988e-06, "loss": 0.5356, "step": 3444 }, { "epoch": 0.5657627327407468, "grad_norm": 0.30910313660004984, "learning_rate": 8.760446999966866e-06, "loss": 0.5389, "step": 3445 }, { "epoch": 0.5659269599490896, "grad_norm": 0.3183907922282123, "learning_rate": 8.760296004095098e-06, "loss": 0.526, "step": 3446 }, { "epoch": 0.5660911871574323, "grad_norm": 0.3900742021203354, "learning_rate": 8.760144961952324e-06, "loss": 0.538, "step": 3447 }, { "epoch": 0.5662554143657751, "grad_norm": 0.4494570545768727, "learning_rate": 8.759993873540184e-06, "loss": 0.5409, "step": 3448 }, { "epoch": 0.5664196415741178, "grad_norm": 0.3344372117969043, "learning_rate": 8.75984273886032e-06, "loss": 0.5416, "step": 3449 }, { "epoch": 0.5665838687824606, "grad_norm": 0.28815799555494404, "learning_rate": 8.759691557914372e-06, "loss": 0.5321, "step": 3450 }, { "epoch": 0.5667480959908032, "grad_norm": 0.3427716429030344, "learning_rate": 8.759540330703983e-06, "loss": 0.5218, "step": 3451 }, { "epoch": 0.566912323199146, "grad_norm": 0.3535468139773774, "learning_rate": 8.759389057230795e-06, "loss": 0.5311, "step": 3452 }, { "epoch": 0.5670765504074887, "grad_norm": 0.37238689749646064, "learning_rate": 8.759237737496451e-06, "loss": 0.5626, "step": 3453 }, { "epoch": 0.5672407776158315, "grad_norm": 0.4913391693392319, "learning_rate": 8.759086371502595e-06, "loss": 0.5505, "step": 3454 }, { "epoch": 0.5674050048241742, "grad_norm": 0.37181041963439404, "learning_rate": 8.75893495925087e-06, "loss": 0.5414, "step": 3455 }, { "epoch": 0.567569232032517, "grad_norm": 0.31509365349628793, "learning_rate": 8.758783500742922e-06, "loss": 0.55, "step": 3456 }, { "epoch": 0.5677334592408597, "grad_norm": 0.3368764395024524, "learning_rate": 8.758631995980395e-06, "loss": 0.5384, "step": 3457 }, { "epoch": 0.5678976864492025, "grad_norm": 0.42083886653211133, "learning_rate": 8.758480444964933e-06, "loss": 0.55, "step": 3458 }, { "epoch": 0.5680619136575452, "grad_norm": 0.30635925898925725, "learning_rate": 8.758328847698185e-06, "loss": 0.5211, "step": 3459 }, { "epoch": 0.568226140865888, "grad_norm": 0.31708343966830854, "learning_rate": 8.758177204181797e-06, "loss": 0.5579, "step": 3460 }, { "epoch": 0.5683903680742307, "grad_norm": 0.36409414468119855, "learning_rate": 8.758025514417415e-06, "loss": 0.5375, "step": 3461 }, { "epoch": 0.5685545952825735, "grad_norm": 0.3112449064143783, "learning_rate": 8.757873778406686e-06, "loss": 0.5374, "step": 3462 }, { "epoch": 0.5687188224909162, "grad_norm": 0.3608556254888521, "learning_rate": 8.757721996151258e-06, "loss": 0.5528, "step": 3463 }, { "epoch": 0.568883049699259, "grad_norm": 0.3114835902741878, "learning_rate": 8.757570167652781e-06, "loss": 0.5478, "step": 3464 }, { "epoch": 0.5690472769076017, "grad_norm": 0.4473767595398761, "learning_rate": 8.757418292912902e-06, "loss": 0.5407, "step": 3465 }, { "epoch": 0.5692115041159445, "grad_norm": 0.30253085478675174, "learning_rate": 8.757266371933272e-06, "loss": 0.5603, "step": 3466 }, { "epoch": 0.5693757313242872, "grad_norm": 0.3115092905114874, "learning_rate": 8.75711440471554e-06, "loss": 0.5284, "step": 3467 }, { "epoch": 0.5695399585326298, "grad_norm": 0.30839473910587395, "learning_rate": 8.756962391261358e-06, "loss": 0.5443, "step": 3468 }, { "epoch": 0.5697041857409726, "grad_norm": 0.28102093112639204, "learning_rate": 8.756810331572375e-06, "loss": 0.5476, "step": 3469 }, { "epoch": 0.5698684129493153, "grad_norm": 0.3302176543184028, "learning_rate": 8.756658225650245e-06, "loss": 0.5431, "step": 3470 }, { "epoch": 0.5700326401576581, "grad_norm": 0.3365665104771197, "learning_rate": 8.75650607349662e-06, "loss": 0.5262, "step": 3471 }, { "epoch": 0.5701968673660008, "grad_norm": 0.3067227588817041, "learning_rate": 8.75635387511315e-06, "loss": 0.5384, "step": 3472 }, { "epoch": 0.5703610945743436, "grad_norm": 0.33218329012978515, "learning_rate": 8.756201630501487e-06, "loss": 0.5332, "step": 3473 }, { "epoch": 0.5705253217826863, "grad_norm": 0.3713379909457561, "learning_rate": 8.756049339663288e-06, "loss": 0.5466, "step": 3474 }, { "epoch": 0.5706895489910291, "grad_norm": 0.34441687097450313, "learning_rate": 8.755897002600207e-06, "loss": 0.5541, "step": 3475 }, { "epoch": 0.5708537761993718, "grad_norm": 0.7228301451209881, "learning_rate": 8.755744619313896e-06, "loss": 0.5376, "step": 3476 }, { "epoch": 0.5710180034077146, "grad_norm": 0.2999297342940657, "learning_rate": 8.755592189806012e-06, "loss": 0.5443, "step": 3477 }, { "epoch": 0.5711822306160573, "grad_norm": 0.39362953316784804, "learning_rate": 8.75543971407821e-06, "loss": 0.535, "step": 3478 }, { "epoch": 0.5713464578244001, "grad_norm": 0.4111607829748149, "learning_rate": 8.755287192132145e-06, "loss": 0.5611, "step": 3479 }, { "epoch": 0.5715106850327428, "grad_norm": 0.29266563450602223, "learning_rate": 8.755134623969477e-06, "loss": 0.5634, "step": 3480 }, { "epoch": 0.5716749122410856, "grad_norm": 0.32732512271978664, "learning_rate": 8.75498200959186e-06, "loss": 0.543, "step": 3481 }, { "epoch": 0.5718391394494283, "grad_norm": 0.46875550296652735, "learning_rate": 8.754829349000948e-06, "loss": 0.5457, "step": 3482 }, { "epoch": 0.5720033666577711, "grad_norm": 0.33783116396267615, "learning_rate": 8.754676642198407e-06, "loss": 0.526, "step": 3483 }, { "epoch": 0.5721675938661138, "grad_norm": 0.5346132082744581, "learning_rate": 8.75452388918589e-06, "loss": 0.5329, "step": 3484 }, { "epoch": 0.5723318210744565, "grad_norm": 0.2764576251732501, "learning_rate": 8.754371089965058e-06, "loss": 0.5519, "step": 3485 }, { "epoch": 0.5724960482827992, "grad_norm": 0.2688202687009742, "learning_rate": 8.75421824453757e-06, "loss": 0.5399, "step": 3486 }, { "epoch": 0.572660275491142, "grad_norm": 0.3093609334958058, "learning_rate": 8.754065352905087e-06, "loss": 0.5277, "step": 3487 }, { "epoch": 0.5728245026994847, "grad_norm": 0.39119778064404526, "learning_rate": 8.753912415069269e-06, "loss": 0.5498, "step": 3488 }, { "epoch": 0.5729887299078275, "grad_norm": 0.2955026812716169, "learning_rate": 8.753759431031775e-06, "loss": 0.5499, "step": 3489 }, { "epoch": 0.5731529571161702, "grad_norm": 0.3113087610249926, "learning_rate": 8.753606400794271e-06, "loss": 0.5452, "step": 3490 }, { "epoch": 0.573317184324513, "grad_norm": 0.3611344907617437, "learning_rate": 8.753453324358416e-06, "loss": 0.5348, "step": 3491 }, { "epoch": 0.5734814115328557, "grad_norm": 0.3344023185058532, "learning_rate": 8.753300201725872e-06, "loss": 0.5429, "step": 3492 }, { "epoch": 0.5736456387411984, "grad_norm": 0.304956411481048, "learning_rate": 8.753147032898303e-06, "loss": 0.5482, "step": 3493 }, { "epoch": 0.5738098659495412, "grad_norm": 0.30165741270268126, "learning_rate": 8.752993817877373e-06, "loss": 0.5328, "step": 3494 }, { "epoch": 0.573974093157884, "grad_norm": 0.33741706948502814, "learning_rate": 8.752840556664747e-06, "loss": 0.5189, "step": 3495 }, { "epoch": 0.5741383203662267, "grad_norm": 0.3701100738767769, "learning_rate": 8.752687249262087e-06, "loss": 0.5623, "step": 3496 }, { "epoch": 0.5743025475745694, "grad_norm": 0.3169045858154691, "learning_rate": 8.75253389567106e-06, "loss": 0.5277, "step": 3497 }, { "epoch": 0.5744667747829122, "grad_norm": 0.3854837052550634, "learning_rate": 8.75238049589333e-06, "loss": 0.5461, "step": 3498 }, { "epoch": 0.5746310019912549, "grad_norm": 0.35234654736307097, "learning_rate": 8.752227049930566e-06, "loss": 0.5478, "step": 3499 }, { "epoch": 0.5747952291995977, "grad_norm": 0.43861512127403823, "learning_rate": 8.75207355778443e-06, "loss": 0.5158, "step": 3500 }, { "epoch": 0.5749594564079404, "grad_norm": 0.5687892409034156, "learning_rate": 8.751920019456594e-06, "loss": 0.5426, "step": 3501 }, { "epoch": 0.5751236836162831, "grad_norm": 0.321497927550389, "learning_rate": 8.75176643494872e-06, "loss": 0.5448, "step": 3502 }, { "epoch": 0.5752879108246258, "grad_norm": 0.3619816521146462, "learning_rate": 8.751612804262483e-06, "loss": 0.539, "step": 3503 }, { "epoch": 0.5754521380329686, "grad_norm": 0.32956518716772837, "learning_rate": 8.751459127399548e-06, "loss": 0.5492, "step": 3504 }, { "epoch": 0.5756163652413113, "grad_norm": 0.3437166953073345, "learning_rate": 8.751305404361582e-06, "loss": 0.5374, "step": 3505 }, { "epoch": 0.5757805924496541, "grad_norm": 0.32115500488296933, "learning_rate": 8.751151635150255e-06, "loss": 0.5278, "step": 3506 }, { "epoch": 0.5759448196579968, "grad_norm": 0.347119267249423, "learning_rate": 8.750997819767241e-06, "loss": 0.5529, "step": 3507 }, { "epoch": 0.5761090468663396, "grad_norm": 0.28964439064190384, "learning_rate": 8.75084395821421e-06, "loss": 0.5135, "step": 3508 }, { "epoch": 0.5762732740746823, "grad_norm": 0.3267292561169078, "learning_rate": 8.750690050492828e-06, "loss": 0.5232, "step": 3509 }, { "epoch": 0.5764375012830251, "grad_norm": 0.31301380438742404, "learning_rate": 8.750536096604772e-06, "loss": 0.5444, "step": 3510 }, { "epoch": 0.5766017284913678, "grad_norm": 0.3211593964134133, "learning_rate": 8.750382096551711e-06, "loss": 0.5541, "step": 3511 }, { "epoch": 0.5767659556997106, "grad_norm": 0.2826221728840829, "learning_rate": 8.750228050335319e-06, "loss": 0.5424, "step": 3512 }, { "epoch": 0.5769301829080533, "grad_norm": 0.436341101746728, "learning_rate": 8.750073957957269e-06, "loss": 0.5366, "step": 3513 }, { "epoch": 0.5770944101163961, "grad_norm": 0.38049927921661986, "learning_rate": 8.749919819419234e-06, "loss": 0.5241, "step": 3514 }, { "epoch": 0.5772586373247388, "grad_norm": 0.32937459946163317, "learning_rate": 8.749765634722889e-06, "loss": 0.5361, "step": 3515 }, { "epoch": 0.5774228645330816, "grad_norm": 0.3247301226127869, "learning_rate": 8.749611403869907e-06, "loss": 0.5296, "step": 3516 }, { "epoch": 0.5775870917414243, "grad_norm": 0.3040428174782979, "learning_rate": 8.749457126861965e-06, "loss": 0.5287, "step": 3517 }, { "epoch": 0.5777513189497671, "grad_norm": 0.33533069093525125, "learning_rate": 8.749302803700735e-06, "loss": 0.5494, "step": 3518 }, { "epoch": 0.5779155461581097, "grad_norm": 0.3993043789739712, "learning_rate": 8.7491484343879e-06, "loss": 0.5533, "step": 3519 }, { "epoch": 0.5780797733664524, "grad_norm": 0.37167067761157563, "learning_rate": 8.748994018925129e-06, "loss": 0.5457, "step": 3520 }, { "epoch": 0.5782440005747952, "grad_norm": 0.29226128233424004, "learning_rate": 8.748839557314105e-06, "loss": 0.5426, "step": 3521 }, { "epoch": 0.5784082277831379, "grad_norm": 0.3363682700498467, "learning_rate": 8.748685049556502e-06, "loss": 0.5322, "step": 3522 }, { "epoch": 0.5785724549914807, "grad_norm": 0.6507199888257422, "learning_rate": 8.748530495653999e-06, "loss": 0.553, "step": 3523 }, { "epoch": 0.5787366821998234, "grad_norm": 0.33296678939243896, "learning_rate": 8.748375895608275e-06, "loss": 0.5268, "step": 3524 }, { "epoch": 0.5789009094081662, "grad_norm": 0.29283720540204305, "learning_rate": 8.748221249421009e-06, "loss": 0.5463, "step": 3525 }, { "epoch": 0.5790651366165089, "grad_norm": 0.3050167584945107, "learning_rate": 8.74806655709388e-06, "loss": 0.549, "step": 3526 }, { "epoch": 0.5792293638248517, "grad_norm": 0.34419849382255013, "learning_rate": 8.74791181862857e-06, "loss": 0.5378, "step": 3527 }, { "epoch": 0.5793935910331944, "grad_norm": 0.3661112951777841, "learning_rate": 8.747757034026757e-06, "loss": 0.5397, "step": 3528 }, { "epoch": 0.5795578182415372, "grad_norm": 0.29887150740963486, "learning_rate": 8.747602203290124e-06, "loss": 0.5328, "step": 3529 }, { "epoch": 0.5797220454498799, "grad_norm": 0.46824862966054376, "learning_rate": 8.747447326420352e-06, "loss": 0.5405, "step": 3530 }, { "epoch": 0.5798862726582227, "grad_norm": 0.29399612928502583, "learning_rate": 8.747292403419123e-06, "loss": 0.5435, "step": 3531 }, { "epoch": 0.5800504998665654, "grad_norm": 0.310842296672758, "learning_rate": 8.74713743428812e-06, "loss": 0.5184, "step": 3532 }, { "epoch": 0.5802147270749082, "grad_norm": 0.4061885724828876, "learning_rate": 8.746982419029025e-06, "loss": 0.5321, "step": 3533 }, { "epoch": 0.5803789542832509, "grad_norm": 0.4215721203763714, "learning_rate": 8.746827357643524e-06, "loss": 0.5578, "step": 3534 }, { "epoch": 0.5805431814915937, "grad_norm": 0.320833189661272, "learning_rate": 8.746672250133299e-06, "loss": 0.5444, "step": 3535 }, { "epoch": 0.5807074086999363, "grad_norm": 0.30776465075514364, "learning_rate": 8.746517096500034e-06, "loss": 0.554, "step": 3536 }, { "epoch": 0.5808716359082791, "grad_norm": 0.3897926875015399, "learning_rate": 8.746361896745416e-06, "loss": 0.5526, "step": 3537 }, { "epoch": 0.5810358631166218, "grad_norm": 0.31542044643027906, "learning_rate": 8.74620665087113e-06, "loss": 0.5212, "step": 3538 }, { "epoch": 0.5812000903249646, "grad_norm": 0.33704459885406324, "learning_rate": 8.746051358878863e-06, "loss": 0.5216, "step": 3539 }, { "epoch": 0.5813643175333073, "grad_norm": 0.3362121370815365, "learning_rate": 8.745896020770298e-06, "loss": 0.5601, "step": 3540 }, { "epoch": 0.58152854474165, "grad_norm": 0.39125110963458015, "learning_rate": 8.745740636547128e-06, "loss": 0.5317, "step": 3541 }, { "epoch": 0.5816927719499928, "grad_norm": 0.30491752320771665, "learning_rate": 8.745585206211037e-06, "loss": 0.5494, "step": 3542 }, { "epoch": 0.5818569991583356, "grad_norm": 0.4110951329926245, "learning_rate": 8.745429729763711e-06, "loss": 0.5618, "step": 3543 }, { "epoch": 0.5820212263666783, "grad_norm": 0.343279992216792, "learning_rate": 8.745274207206844e-06, "loss": 0.5209, "step": 3544 }, { "epoch": 0.582185453575021, "grad_norm": 0.32229992763426724, "learning_rate": 8.745118638542121e-06, "loss": 0.5143, "step": 3545 }, { "epoch": 0.5823496807833638, "grad_norm": 0.41394397944438854, "learning_rate": 8.744963023771233e-06, "loss": 0.5621, "step": 3546 }, { "epoch": 0.5825139079917065, "grad_norm": 0.4164012584557086, "learning_rate": 8.74480736289587e-06, "loss": 0.554, "step": 3547 }, { "epoch": 0.5826781352000493, "grad_norm": 0.3226663377624633, "learning_rate": 8.744651655917724e-06, "loss": 0.549, "step": 3548 }, { "epoch": 0.582842362408392, "grad_norm": 0.37795580482924596, "learning_rate": 8.744495902838483e-06, "loss": 0.5176, "step": 3549 }, { "epoch": 0.5830065896167348, "grad_norm": 0.3317217876574988, "learning_rate": 8.744340103659841e-06, "loss": 0.5235, "step": 3550 }, { "epoch": 0.5831708168250775, "grad_norm": 0.32555740709280667, "learning_rate": 8.74418425838349e-06, "loss": 0.5547, "step": 3551 }, { "epoch": 0.5833350440334203, "grad_norm": 0.4069952879795932, "learning_rate": 8.744028367011122e-06, "loss": 0.5065, "step": 3552 }, { "epoch": 0.5834992712417629, "grad_norm": 0.32360525266398305, "learning_rate": 8.74387242954443e-06, "loss": 0.5353, "step": 3553 }, { "epoch": 0.5836634984501057, "grad_norm": 0.3948501021982254, "learning_rate": 8.74371644598511e-06, "loss": 0.5251, "step": 3554 }, { "epoch": 0.5838277256584484, "grad_norm": 0.3584701735245281, "learning_rate": 8.743560416334852e-06, "loss": 0.5521, "step": 3555 }, { "epoch": 0.5839919528667912, "grad_norm": 0.31069610623115584, "learning_rate": 8.743404340595352e-06, "loss": 0.5252, "step": 3556 }, { "epoch": 0.5841561800751339, "grad_norm": 0.3453463165092735, "learning_rate": 8.74324821876831e-06, "loss": 0.5388, "step": 3557 }, { "epoch": 0.5843204072834767, "grad_norm": 0.3017781549374568, "learning_rate": 8.743092050855413e-06, "loss": 0.5183, "step": 3558 }, { "epoch": 0.5844846344918194, "grad_norm": 0.40474828712089234, "learning_rate": 8.742935836858363e-06, "loss": 0.5571, "step": 3559 }, { "epoch": 0.5846488617001622, "grad_norm": 0.2922900982199023, "learning_rate": 8.742779576778857e-06, "loss": 0.5438, "step": 3560 }, { "epoch": 0.5848130889085049, "grad_norm": 0.3191290542154945, "learning_rate": 8.742623270618588e-06, "loss": 0.4971, "step": 3561 }, { "epoch": 0.5849773161168477, "grad_norm": 0.30540507514538917, "learning_rate": 8.74246691837926e-06, "loss": 0.5271, "step": 3562 }, { "epoch": 0.5851415433251904, "grad_norm": 0.31493701820865183, "learning_rate": 8.742310520062563e-06, "loss": 0.5414, "step": 3563 }, { "epoch": 0.5853057705335332, "grad_norm": 0.31388160368167506, "learning_rate": 8.742154075670202e-06, "loss": 0.5328, "step": 3564 }, { "epoch": 0.5854699977418759, "grad_norm": 0.31964141957864006, "learning_rate": 8.741997585203874e-06, "loss": 0.5393, "step": 3565 }, { "epoch": 0.5856342249502187, "grad_norm": 0.32701350425663833, "learning_rate": 8.741841048665279e-06, "loss": 0.5322, "step": 3566 }, { "epoch": 0.5857984521585614, "grad_norm": 0.3203015136579183, "learning_rate": 8.741684466056116e-06, "loss": 0.5055, "step": 3567 }, { "epoch": 0.5859626793669042, "grad_norm": 0.3208442953143098, "learning_rate": 8.741527837378086e-06, "loss": 0.5459, "step": 3568 }, { "epoch": 0.5861269065752469, "grad_norm": 0.37630098391133593, "learning_rate": 8.74137116263289e-06, "loss": 0.5428, "step": 3569 }, { "epoch": 0.5862911337835895, "grad_norm": 0.299680115481499, "learning_rate": 8.741214441822231e-06, "loss": 0.541, "step": 3570 }, { "epoch": 0.5864553609919323, "grad_norm": 0.3896245426246468, "learning_rate": 8.741057674947812e-06, "loss": 0.5301, "step": 3571 }, { "epoch": 0.586619588200275, "grad_norm": 0.28228508118203716, "learning_rate": 8.740900862011332e-06, "loss": 0.5237, "step": 3572 }, { "epoch": 0.5867838154086178, "grad_norm": 0.26998847124009234, "learning_rate": 8.740744003014498e-06, "loss": 0.5622, "step": 3573 }, { "epoch": 0.5869480426169605, "grad_norm": 0.31146496471387847, "learning_rate": 8.74058709795901e-06, "loss": 0.5439, "step": 3574 }, { "epoch": 0.5871122698253033, "grad_norm": 0.35384476669187864, "learning_rate": 8.740430146846576e-06, "loss": 0.5403, "step": 3575 }, { "epoch": 0.587276497033646, "grad_norm": 0.3270939587667193, "learning_rate": 8.740273149678897e-06, "loss": 0.53, "step": 3576 }, { "epoch": 0.5874407242419888, "grad_norm": 0.29676668095238884, "learning_rate": 8.74011610645768e-06, "loss": 0.5135, "step": 3577 }, { "epoch": 0.5876049514503315, "grad_norm": 0.28217328906910305, "learning_rate": 8.739959017184629e-06, "loss": 0.5421, "step": 3578 }, { "epoch": 0.5877691786586743, "grad_norm": 0.26291304157258816, "learning_rate": 8.739801881861453e-06, "loss": 0.5278, "step": 3579 }, { "epoch": 0.587933405867017, "grad_norm": 0.3132802190163169, "learning_rate": 8.739644700489856e-06, "loss": 0.5206, "step": 3580 }, { "epoch": 0.5880976330753598, "grad_norm": 0.3025007721387032, "learning_rate": 8.73948747307155e-06, "loss": 0.532, "step": 3581 }, { "epoch": 0.5882618602837025, "grad_norm": 0.26167893751892235, "learning_rate": 8.739330199608235e-06, "loss": 0.5501, "step": 3582 }, { "epoch": 0.5884260874920453, "grad_norm": 0.2802592450047528, "learning_rate": 8.739172880101624e-06, "loss": 0.5373, "step": 3583 }, { "epoch": 0.588590314700388, "grad_norm": 0.3227658244340501, "learning_rate": 8.739015514553425e-06, "loss": 0.5564, "step": 3584 }, { "epoch": 0.5887545419087308, "grad_norm": 0.31112797368771505, "learning_rate": 8.738858102965348e-06, "loss": 0.5329, "step": 3585 }, { "epoch": 0.5889187691170735, "grad_norm": 0.36689048311245726, "learning_rate": 8.7387006453391e-06, "loss": 0.534, "step": 3586 }, { "epoch": 0.5890829963254162, "grad_norm": 0.30855317467713334, "learning_rate": 8.738543141676393e-06, "loss": 0.5394, "step": 3587 }, { "epoch": 0.5892472235337589, "grad_norm": 0.35880633381663496, "learning_rate": 8.738385591978936e-06, "loss": 0.5273, "step": 3588 }, { "epoch": 0.5894114507421017, "grad_norm": 0.3353793135243243, "learning_rate": 8.738227996248444e-06, "loss": 0.5352, "step": 3589 }, { "epoch": 0.5895756779504444, "grad_norm": 0.29390223003626537, "learning_rate": 8.738070354486626e-06, "loss": 0.553, "step": 3590 }, { "epoch": 0.5897399051587872, "grad_norm": 0.27791226239243794, "learning_rate": 8.737912666695192e-06, "loss": 0.5389, "step": 3591 }, { "epoch": 0.5899041323671299, "grad_norm": 0.27245500324821437, "learning_rate": 8.73775493287586e-06, "loss": 0.5266, "step": 3592 }, { "epoch": 0.5900683595754727, "grad_norm": 0.8524051814576923, "learning_rate": 8.737597153030338e-06, "loss": 0.5444, "step": 3593 }, { "epoch": 0.5902325867838154, "grad_norm": 0.3002304556760417, "learning_rate": 8.73743932716034e-06, "loss": 0.5337, "step": 3594 }, { "epoch": 0.5903968139921582, "grad_norm": 0.30605452136769806, "learning_rate": 8.737281455267585e-06, "loss": 0.5386, "step": 3595 }, { "epoch": 0.5905610412005009, "grad_norm": 0.34386930237760743, "learning_rate": 8.737123537353783e-06, "loss": 0.5194, "step": 3596 }, { "epoch": 0.5907252684088437, "grad_norm": 0.3312859202395492, "learning_rate": 8.73696557342065e-06, "loss": 0.5592, "step": 3597 }, { "epoch": 0.5908894956171864, "grad_norm": 0.3658728879871337, "learning_rate": 8.736807563469905e-06, "loss": 0.5572, "step": 3598 }, { "epoch": 0.5910537228255291, "grad_norm": 0.2881831918233398, "learning_rate": 8.736649507503257e-06, "loss": 0.5371, "step": 3599 }, { "epoch": 0.5912179500338719, "grad_norm": 0.36645575351131854, "learning_rate": 8.736491405522431e-06, "loss": 0.5454, "step": 3600 }, { "epoch": 0.5913821772422146, "grad_norm": 0.3179909063540978, "learning_rate": 8.73633325752914e-06, "loss": 0.551, "step": 3601 }, { "epoch": 0.5915464044505574, "grad_norm": 0.38250970177758387, "learning_rate": 8.7361750635251e-06, "loss": 0.5431, "step": 3602 }, { "epoch": 0.5917106316589001, "grad_norm": 0.2894129002667864, "learning_rate": 8.736016823512031e-06, "loss": 0.5438, "step": 3603 }, { "epoch": 0.5918748588672428, "grad_norm": 0.2894750736546933, "learning_rate": 8.735858537491652e-06, "loss": 0.5393, "step": 3604 }, { "epoch": 0.5920390860755855, "grad_norm": 0.3680922889819331, "learning_rate": 8.735700205465683e-06, "loss": 0.5356, "step": 3605 }, { "epoch": 0.5922033132839283, "grad_norm": 0.3025193777662114, "learning_rate": 8.73554182743584e-06, "loss": 0.5447, "step": 3606 }, { "epoch": 0.592367540492271, "grad_norm": 0.3136810875159733, "learning_rate": 8.735383403403849e-06, "loss": 0.5303, "step": 3607 }, { "epoch": 0.5925317677006138, "grad_norm": 0.38311420078716235, "learning_rate": 8.735224933371423e-06, "loss": 0.5266, "step": 3608 }, { "epoch": 0.5926959949089565, "grad_norm": 0.28233496253123336, "learning_rate": 8.73506641734029e-06, "loss": 0.5449, "step": 3609 }, { "epoch": 0.5928602221172993, "grad_norm": 0.3227273047099139, "learning_rate": 8.734907855312168e-06, "loss": 0.5411, "step": 3610 }, { "epoch": 0.593024449325642, "grad_norm": 0.27668525367867175, "learning_rate": 8.734749247288782e-06, "loss": 0.5377, "step": 3611 }, { "epoch": 0.5931886765339848, "grad_norm": 0.28813590933697675, "learning_rate": 8.734590593271851e-06, "loss": 0.5364, "step": 3612 }, { "epoch": 0.5933529037423275, "grad_norm": 0.3242787415516203, "learning_rate": 8.7344318932631e-06, "loss": 0.5472, "step": 3613 }, { "epoch": 0.5935171309506703, "grad_norm": 0.30987179115108054, "learning_rate": 8.734273147264252e-06, "loss": 0.5264, "step": 3614 }, { "epoch": 0.593681358159013, "grad_norm": 0.28439528277004245, "learning_rate": 8.734114355277033e-06, "loss": 0.5309, "step": 3615 }, { "epoch": 0.5938455853673558, "grad_norm": 0.309183288887301, "learning_rate": 8.733955517303165e-06, "loss": 0.5451, "step": 3616 }, { "epoch": 0.5940098125756985, "grad_norm": 0.2947685618065334, "learning_rate": 8.733796633344375e-06, "loss": 0.5292, "step": 3617 }, { "epoch": 0.5941740397840413, "grad_norm": 0.3046893076201151, "learning_rate": 8.733637703402387e-06, "loss": 0.5221, "step": 3618 }, { "epoch": 0.594338266992384, "grad_norm": 0.32928639712865154, "learning_rate": 8.733478727478931e-06, "loss": 0.528, "step": 3619 }, { "epoch": 0.5945024942007266, "grad_norm": 0.4061734180068937, "learning_rate": 8.733319705575728e-06, "loss": 0.5405, "step": 3620 }, { "epoch": 0.5946667214090694, "grad_norm": 0.2870014695047263, "learning_rate": 8.73316063769451e-06, "loss": 0.5378, "step": 3621 }, { "epoch": 0.5948309486174121, "grad_norm": 0.35349649731908983, "learning_rate": 8.733001523837003e-06, "loss": 0.5466, "step": 3622 }, { "epoch": 0.5949951758257549, "grad_norm": 0.3189730804637844, "learning_rate": 8.732842364004932e-06, "loss": 0.5431, "step": 3623 }, { "epoch": 0.5951594030340976, "grad_norm": 0.3457512242859419, "learning_rate": 8.73268315820003e-06, "loss": 0.553, "step": 3624 }, { "epoch": 0.5953236302424404, "grad_norm": 0.40936712834610717, "learning_rate": 8.732523906424025e-06, "loss": 0.5194, "step": 3625 }, { "epoch": 0.5954878574507831, "grad_norm": 0.332203955122217, "learning_rate": 8.732364608678644e-06, "loss": 0.5539, "step": 3626 }, { "epoch": 0.5956520846591259, "grad_norm": 0.2911967123741656, "learning_rate": 8.732205264965622e-06, "loss": 0.5376, "step": 3627 }, { "epoch": 0.5958163118674686, "grad_norm": 0.287762468996504, "learning_rate": 8.732045875286685e-06, "loss": 0.5344, "step": 3628 }, { "epoch": 0.5959805390758114, "grad_norm": 0.34854621639857275, "learning_rate": 8.731886439643566e-06, "loss": 0.5423, "step": 3629 }, { "epoch": 0.5961447662841541, "grad_norm": 0.3698751509326475, "learning_rate": 8.731726958037998e-06, "loss": 0.529, "step": 3630 }, { "epoch": 0.5963089934924969, "grad_norm": 0.27934921610336444, "learning_rate": 8.731567430471711e-06, "loss": 0.5443, "step": 3631 }, { "epoch": 0.5964732207008396, "grad_norm": 0.3193564893222012, "learning_rate": 8.731407856946438e-06, "loss": 0.5296, "step": 3632 }, { "epoch": 0.5966374479091824, "grad_norm": 0.30388255767050054, "learning_rate": 8.731248237463913e-06, "loss": 0.5581, "step": 3633 }, { "epoch": 0.5968016751175251, "grad_norm": 0.34394183718979676, "learning_rate": 8.73108857202587e-06, "loss": 0.5245, "step": 3634 }, { "epoch": 0.5969659023258679, "grad_norm": 0.3368222367193206, "learning_rate": 8.730928860634041e-06, "loss": 0.5364, "step": 3635 }, { "epoch": 0.5971301295342106, "grad_norm": 0.6386666188329989, "learning_rate": 8.730769103290162e-06, "loss": 0.5315, "step": 3636 }, { "epoch": 0.5972943567425533, "grad_norm": 0.39752248221935366, "learning_rate": 8.73060929999597e-06, "loss": 0.5453, "step": 3637 }, { "epoch": 0.597458583950896, "grad_norm": 0.32135827832074965, "learning_rate": 8.730449450753197e-06, "loss": 0.5373, "step": 3638 }, { "epoch": 0.5976228111592388, "grad_norm": 0.2997430977323944, "learning_rate": 8.73028955556358e-06, "loss": 0.5314, "step": 3639 }, { "epoch": 0.5977870383675815, "grad_norm": 0.3172118412359191, "learning_rate": 8.730129614428858e-06, "loss": 0.5356, "step": 3640 }, { "epoch": 0.5979512655759243, "grad_norm": 0.2722040870504818, "learning_rate": 8.729969627350766e-06, "loss": 0.5503, "step": 3641 }, { "epoch": 0.598115492784267, "grad_norm": 0.29389145308594455, "learning_rate": 8.72980959433104e-06, "loss": 0.5079, "step": 3642 }, { "epoch": 0.5982797199926098, "grad_norm": 0.2885264804569022, "learning_rate": 8.729649515371423e-06, "loss": 0.556, "step": 3643 }, { "epoch": 0.5984439472009525, "grad_norm": 0.3008793211955447, "learning_rate": 8.729489390473649e-06, "loss": 0.5529, "step": 3644 }, { "epoch": 0.5986081744092953, "grad_norm": 0.31658137204584463, "learning_rate": 8.729329219639462e-06, "loss": 0.5335, "step": 3645 }, { "epoch": 0.598772401617638, "grad_norm": 0.34838847739756235, "learning_rate": 8.729169002870596e-06, "loss": 0.5502, "step": 3646 }, { "epoch": 0.5989366288259808, "grad_norm": 0.3756933229117239, "learning_rate": 8.729008740168793e-06, "loss": 0.5294, "step": 3647 }, { "epoch": 0.5991008560343235, "grad_norm": 0.34519823363513086, "learning_rate": 8.728848431535795e-06, "loss": 0.5284, "step": 3648 }, { "epoch": 0.5992650832426663, "grad_norm": 0.3167375361570294, "learning_rate": 8.728688076973344e-06, "loss": 0.5358, "step": 3649 }, { "epoch": 0.599429310451009, "grad_norm": 0.3306748583479528, "learning_rate": 8.728527676483178e-06, "loss": 0.5293, "step": 3650 }, { "epoch": 0.5995935376593517, "grad_norm": 0.3708640363221295, "learning_rate": 8.728367230067043e-06, "loss": 0.5427, "step": 3651 }, { "epoch": 0.5997577648676945, "grad_norm": 0.3431078530708263, "learning_rate": 8.728206737726678e-06, "loss": 0.5445, "step": 3652 }, { "epoch": 0.5999219920760372, "grad_norm": 0.32219217359231533, "learning_rate": 8.728046199463829e-06, "loss": 0.5324, "step": 3653 }, { "epoch": 0.6000862192843799, "grad_norm": 0.32057168261411606, "learning_rate": 8.727885615280237e-06, "loss": 0.5547, "step": 3654 }, { "epoch": 0.6002504464927226, "grad_norm": 0.2767525924131096, "learning_rate": 8.72772498517765e-06, "loss": 0.5452, "step": 3655 }, { "epoch": 0.6004146737010654, "grad_norm": 0.326894122884887, "learning_rate": 8.727564309157807e-06, "loss": 0.5449, "step": 3656 }, { "epoch": 0.6005789009094081, "grad_norm": 0.3048109582358382, "learning_rate": 8.727403587222457e-06, "loss": 0.541, "step": 3657 }, { "epoch": 0.6007431281177509, "grad_norm": 0.3520073738268126, "learning_rate": 8.727242819373347e-06, "loss": 0.5617, "step": 3658 }, { "epoch": 0.6009073553260936, "grad_norm": 0.3499362485696832, "learning_rate": 8.72708200561222e-06, "loss": 0.546, "step": 3659 }, { "epoch": 0.6010715825344364, "grad_norm": 0.29907581210466505, "learning_rate": 8.726921145940824e-06, "loss": 0.5367, "step": 3660 }, { "epoch": 0.6012358097427791, "grad_norm": 0.35970980950843484, "learning_rate": 8.726760240360904e-06, "loss": 0.5456, "step": 3661 }, { "epoch": 0.6014000369511219, "grad_norm": 0.3035374882855592, "learning_rate": 8.726599288874211e-06, "loss": 0.4972, "step": 3662 }, { "epoch": 0.6015642641594646, "grad_norm": 0.3077833132019388, "learning_rate": 8.72643829148249e-06, "loss": 0.5381, "step": 3663 }, { "epoch": 0.6017284913678074, "grad_norm": 0.27870036599180303, "learning_rate": 8.726277248187491e-06, "loss": 0.5331, "step": 3664 }, { "epoch": 0.6018927185761501, "grad_norm": 0.3980801030680251, "learning_rate": 8.726116158990964e-06, "loss": 0.5534, "step": 3665 }, { "epoch": 0.6020569457844929, "grad_norm": 0.306821683865838, "learning_rate": 8.725955023894657e-06, "loss": 0.5404, "step": 3666 }, { "epoch": 0.6022211729928356, "grad_norm": 0.30355098474208053, "learning_rate": 8.725793842900319e-06, "loss": 0.5446, "step": 3667 }, { "epoch": 0.6023854002011784, "grad_norm": 0.32484342105558917, "learning_rate": 8.725632616009704e-06, "loss": 0.5136, "step": 3668 }, { "epoch": 0.6025496274095211, "grad_norm": 0.35942539543304525, "learning_rate": 8.725471343224562e-06, "loss": 0.5522, "step": 3669 }, { "epoch": 0.6027138546178639, "grad_norm": 0.35114276088233815, "learning_rate": 8.725310024546642e-06, "loss": 0.5439, "step": 3670 }, { "epoch": 0.6028780818262065, "grad_norm": 0.33071150493599055, "learning_rate": 8.7251486599777e-06, "loss": 0.5394, "step": 3671 }, { "epoch": 0.6030423090345493, "grad_norm": 0.2914014429336388, "learning_rate": 8.724987249519485e-06, "loss": 0.5283, "step": 3672 }, { "epoch": 0.603206536242892, "grad_norm": 0.2987885928184587, "learning_rate": 8.724825793173752e-06, "loss": 0.5484, "step": 3673 }, { "epoch": 0.6033707634512347, "grad_norm": 0.35620845704867954, "learning_rate": 8.724664290942254e-06, "loss": 0.5232, "step": 3674 }, { "epoch": 0.6035349906595775, "grad_norm": 0.8502964437012199, "learning_rate": 8.724502742826746e-06, "loss": 0.5313, "step": 3675 }, { "epoch": 0.6036992178679202, "grad_norm": 0.35180116837141656, "learning_rate": 8.724341148828982e-06, "loss": 0.5405, "step": 3676 }, { "epoch": 0.603863445076263, "grad_norm": 0.3428233815348743, "learning_rate": 8.724179508950717e-06, "loss": 0.5556, "step": 3677 }, { "epoch": 0.6040276722846057, "grad_norm": 0.35078504166460134, "learning_rate": 8.724017823193706e-06, "loss": 0.5383, "step": 3678 }, { "epoch": 0.6041918994929485, "grad_norm": 0.7235745909791813, "learning_rate": 8.723856091559704e-06, "loss": 0.5448, "step": 3679 }, { "epoch": 0.6043561267012912, "grad_norm": 0.6140340695491158, "learning_rate": 8.723694314050472e-06, "loss": 0.5273, "step": 3680 }, { "epoch": 0.604520353909634, "grad_norm": 0.3498517666631241, "learning_rate": 8.723532490667763e-06, "loss": 0.5331, "step": 3681 }, { "epoch": 0.6046845811179767, "grad_norm": 0.32938150991708076, "learning_rate": 8.723370621413335e-06, "loss": 0.5391, "step": 3682 }, { "epoch": 0.6048488083263195, "grad_norm": 0.3697450699685494, "learning_rate": 8.723208706288946e-06, "loss": 0.5395, "step": 3683 }, { "epoch": 0.6050130355346622, "grad_norm": 0.32905386542315357, "learning_rate": 8.723046745296357e-06, "loss": 0.5269, "step": 3684 }, { "epoch": 0.605177262743005, "grad_norm": 0.30086869065685895, "learning_rate": 8.722884738437327e-06, "loss": 0.5128, "step": 3685 }, { "epoch": 0.6053414899513477, "grad_norm": 0.3113050683890726, "learning_rate": 8.722722685713612e-06, "loss": 0.5231, "step": 3686 }, { "epoch": 0.6055057171596905, "grad_norm": 0.36319048353923766, "learning_rate": 8.722560587126975e-06, "loss": 0.5269, "step": 3687 }, { "epoch": 0.6056699443680331, "grad_norm": 0.37114379821384086, "learning_rate": 8.722398442679174e-06, "loss": 0.5407, "step": 3688 }, { "epoch": 0.6058341715763759, "grad_norm": 0.3219171912615586, "learning_rate": 8.722236252371974e-06, "loss": 0.5359, "step": 3689 }, { "epoch": 0.6059983987847186, "grad_norm": 0.34569270952073616, "learning_rate": 8.722074016207131e-06, "loss": 0.5413, "step": 3690 }, { "epoch": 0.6061626259930614, "grad_norm": 0.34523412105687357, "learning_rate": 8.721911734186412e-06, "loss": 0.5198, "step": 3691 }, { "epoch": 0.6063268532014041, "grad_norm": 0.35292293229945165, "learning_rate": 8.721749406311578e-06, "loss": 0.5664, "step": 3692 }, { "epoch": 0.6064910804097469, "grad_norm": 0.39140784378450616, "learning_rate": 8.721587032584391e-06, "loss": 0.5174, "step": 3693 }, { "epoch": 0.6066553076180896, "grad_norm": 0.29287997907577673, "learning_rate": 8.721424613006616e-06, "loss": 0.5242, "step": 3694 }, { "epoch": 0.6068195348264324, "grad_norm": 0.32102245081740655, "learning_rate": 8.721262147580016e-06, "loss": 0.5078, "step": 3695 }, { "epoch": 0.6069837620347751, "grad_norm": 0.329148098587207, "learning_rate": 8.721099636306357e-06, "loss": 0.5179, "step": 3696 }, { "epoch": 0.6071479892431179, "grad_norm": 0.43195330263064485, "learning_rate": 8.720937079187402e-06, "loss": 0.5486, "step": 3697 }, { "epoch": 0.6073122164514606, "grad_norm": 0.2719468750025907, "learning_rate": 8.720774476224918e-06, "loss": 0.519, "step": 3698 }, { "epoch": 0.6074764436598034, "grad_norm": 0.335292603650919, "learning_rate": 8.72061182742067e-06, "loss": 0.5406, "step": 3699 }, { "epoch": 0.6076406708681461, "grad_norm": 0.3269121070823391, "learning_rate": 8.720449132776424e-06, "loss": 0.523, "step": 3700 }, { "epoch": 0.6078048980764889, "grad_norm": 0.35219972464970845, "learning_rate": 8.72028639229395e-06, "loss": 0.5228, "step": 3701 }, { "epoch": 0.6079691252848316, "grad_norm": 0.3391824315436888, "learning_rate": 8.720123605975012e-06, "loss": 0.5416, "step": 3702 }, { "epoch": 0.6081333524931744, "grad_norm": 0.2759373012282259, "learning_rate": 8.71996077382138e-06, "loss": 0.5291, "step": 3703 }, { "epoch": 0.6082975797015171, "grad_norm": 0.29854298935572593, "learning_rate": 8.719797895834823e-06, "loss": 0.5513, "step": 3704 }, { "epoch": 0.6084618069098597, "grad_norm": 0.29485850608627, "learning_rate": 8.719634972017109e-06, "loss": 0.5346, "step": 3705 }, { "epoch": 0.6086260341182025, "grad_norm": 0.29127554479034234, "learning_rate": 8.719472002370007e-06, "loss": 0.5288, "step": 3706 }, { "epoch": 0.6087902613265452, "grad_norm": 0.35738849044273974, "learning_rate": 8.719308986895288e-06, "loss": 0.5316, "step": 3707 }, { "epoch": 0.608954488534888, "grad_norm": 0.32317977559171934, "learning_rate": 8.719145925594722e-06, "loss": 0.5285, "step": 3708 }, { "epoch": 0.6091187157432307, "grad_norm": 0.323539384053774, "learning_rate": 8.718982818470081e-06, "loss": 0.5224, "step": 3709 }, { "epoch": 0.6092829429515735, "grad_norm": 0.3306250256102783, "learning_rate": 8.718819665523135e-06, "loss": 0.5515, "step": 3710 }, { "epoch": 0.6094471701599162, "grad_norm": 0.3057541843863994, "learning_rate": 8.718656466755657e-06, "loss": 0.5364, "step": 3711 }, { "epoch": 0.609611397368259, "grad_norm": 0.32189324965811217, "learning_rate": 8.718493222169417e-06, "loss": 0.56, "step": 3712 }, { "epoch": 0.6097756245766017, "grad_norm": 0.2930020470373635, "learning_rate": 8.718329931766193e-06, "loss": 0.5491, "step": 3713 }, { "epoch": 0.6099398517849445, "grad_norm": 0.37017958751656427, "learning_rate": 8.718166595547755e-06, "loss": 0.5273, "step": 3714 }, { "epoch": 0.6101040789932872, "grad_norm": 0.38151259250123476, "learning_rate": 8.718003213515876e-06, "loss": 0.5495, "step": 3715 }, { "epoch": 0.61026830620163, "grad_norm": 0.2899120782134563, "learning_rate": 8.717839785672334e-06, "loss": 0.5387, "step": 3716 }, { "epoch": 0.6104325334099727, "grad_norm": 0.3129274447525849, "learning_rate": 8.7176763120189e-06, "loss": 0.5236, "step": 3717 }, { "epoch": 0.6105967606183155, "grad_norm": 0.3245883613657934, "learning_rate": 8.717512792557355e-06, "loss": 0.5265, "step": 3718 }, { "epoch": 0.6107609878266582, "grad_norm": 0.3046206158653733, "learning_rate": 8.71734922728947e-06, "loss": 0.5493, "step": 3719 }, { "epoch": 0.610925215035001, "grad_norm": 0.3414330525019781, "learning_rate": 8.717185616217022e-06, "loss": 0.5487, "step": 3720 }, { "epoch": 0.6110894422433437, "grad_norm": 0.30157466649864123, "learning_rate": 8.71702195934179e-06, "loss": 0.5214, "step": 3721 }, { "epoch": 0.6112536694516864, "grad_norm": 0.2939600687346926, "learning_rate": 8.71685825666555e-06, "loss": 0.534, "step": 3722 }, { "epoch": 0.6114178966600291, "grad_norm": 0.3532357313126545, "learning_rate": 8.716694508190081e-06, "loss": 0.522, "step": 3723 }, { "epoch": 0.6115821238683719, "grad_norm": 0.2888952148557769, "learning_rate": 8.716530713917162e-06, "loss": 0.5391, "step": 3724 }, { "epoch": 0.6117463510767146, "grad_norm": 0.34438682455747893, "learning_rate": 8.716366873848569e-06, "loss": 0.5399, "step": 3725 }, { "epoch": 0.6119105782850573, "grad_norm": 0.364981358716848, "learning_rate": 8.716202987986084e-06, "loss": 0.5346, "step": 3726 }, { "epoch": 0.6120748054934001, "grad_norm": 0.4694648437859305, "learning_rate": 8.716039056331487e-06, "loss": 0.5249, "step": 3727 }, { "epoch": 0.6122390327017428, "grad_norm": 0.3210375908279311, "learning_rate": 8.715875078886557e-06, "loss": 0.5237, "step": 3728 }, { "epoch": 0.6124032599100856, "grad_norm": 0.34752632577085785, "learning_rate": 8.715711055653077e-06, "loss": 0.5405, "step": 3729 }, { "epoch": 0.6125674871184283, "grad_norm": 0.40203728653194964, "learning_rate": 8.715546986632826e-06, "loss": 0.5461, "step": 3730 }, { "epoch": 0.6127317143267711, "grad_norm": 0.3049524203199884, "learning_rate": 8.715382871827587e-06, "loss": 0.5383, "step": 3731 }, { "epoch": 0.6128959415351138, "grad_norm": 0.43035534035034767, "learning_rate": 8.715218711239143e-06, "loss": 0.5377, "step": 3732 }, { "epoch": 0.6130601687434566, "grad_norm": 0.304376981147912, "learning_rate": 8.715054504869277e-06, "loss": 0.5365, "step": 3733 }, { "epoch": 0.6132243959517993, "grad_norm": 0.3352498186486189, "learning_rate": 8.714890252719772e-06, "loss": 0.5405, "step": 3734 }, { "epoch": 0.6133886231601421, "grad_norm": 0.37635670121733, "learning_rate": 8.714725954792413e-06, "loss": 0.55, "step": 3735 }, { "epoch": 0.6135528503684848, "grad_norm": 0.3193338992698321, "learning_rate": 8.714561611088982e-06, "loss": 0.5254, "step": 3736 }, { "epoch": 0.6137170775768276, "grad_norm": 0.28862874206631534, "learning_rate": 8.714397221611264e-06, "loss": 0.5122, "step": 3737 }, { "epoch": 0.6138813047851703, "grad_norm": 0.36269165943906717, "learning_rate": 8.714232786361049e-06, "loss": 0.551, "step": 3738 }, { "epoch": 0.614045531993513, "grad_norm": 0.43221986090902803, "learning_rate": 8.714068305340117e-06, "loss": 0.5409, "step": 3739 }, { "epoch": 0.6142097592018557, "grad_norm": 0.30739719342988214, "learning_rate": 8.713903778550258e-06, "loss": 0.5617, "step": 3740 }, { "epoch": 0.6143739864101985, "grad_norm": 0.3447769130969769, "learning_rate": 8.713739205993259e-06, "loss": 0.5031, "step": 3741 }, { "epoch": 0.6145382136185412, "grad_norm": 0.36911964009951104, "learning_rate": 8.713574587670906e-06, "loss": 0.5487, "step": 3742 }, { "epoch": 0.614702440826884, "grad_norm": 0.2882172434812275, "learning_rate": 8.713409923584986e-06, "loss": 0.5187, "step": 3743 }, { "epoch": 0.6148666680352267, "grad_norm": 0.4483918078459419, "learning_rate": 8.71324521373729e-06, "loss": 0.5568, "step": 3744 }, { "epoch": 0.6150308952435695, "grad_norm": 0.3979105216794056, "learning_rate": 8.713080458129606e-06, "loss": 0.5288, "step": 3745 }, { "epoch": 0.6151951224519122, "grad_norm": 0.32051012765081943, "learning_rate": 8.712915656763723e-06, "loss": 0.5374, "step": 3746 }, { "epoch": 0.615359349660255, "grad_norm": 0.4134079464940972, "learning_rate": 8.71275080964143e-06, "loss": 0.5364, "step": 3747 }, { "epoch": 0.6155235768685977, "grad_norm": 0.3025117236117526, "learning_rate": 8.71258591676452e-06, "loss": 0.5315, "step": 3748 }, { "epoch": 0.6156878040769405, "grad_norm": 0.5045071173403394, "learning_rate": 8.71242097813478e-06, "loss": 0.517, "step": 3749 }, { "epoch": 0.6158520312852832, "grad_norm": 0.36659414586697897, "learning_rate": 8.712255993754007e-06, "loss": 0.5323, "step": 3750 }, { "epoch": 0.616016258493626, "grad_norm": 0.3413997188779241, "learning_rate": 8.712090963623987e-06, "loss": 0.5415, "step": 3751 }, { "epoch": 0.6161804857019687, "grad_norm": 0.30485270030431405, "learning_rate": 8.711925887746516e-06, "loss": 0.5422, "step": 3752 }, { "epoch": 0.6163447129103115, "grad_norm": 0.3781760980970153, "learning_rate": 8.711760766123385e-06, "loss": 0.5226, "step": 3753 }, { "epoch": 0.6165089401186542, "grad_norm": 0.32080829914928155, "learning_rate": 8.71159559875639e-06, "loss": 0.5307, "step": 3754 }, { "epoch": 0.616673167326997, "grad_norm": 0.2814514445391986, "learning_rate": 8.711430385647321e-06, "loss": 0.518, "step": 3755 }, { "epoch": 0.6168373945353396, "grad_norm": 0.3194129509030034, "learning_rate": 8.711265126797976e-06, "loss": 0.5458, "step": 3756 }, { "epoch": 0.6170016217436823, "grad_norm": 0.31273916859958956, "learning_rate": 8.711099822210148e-06, "loss": 0.529, "step": 3757 }, { "epoch": 0.6171658489520251, "grad_norm": 0.4807127966621582, "learning_rate": 8.710934471885632e-06, "loss": 0.5691, "step": 3758 }, { "epoch": 0.6173300761603678, "grad_norm": 0.35464260050427854, "learning_rate": 8.710769075826227e-06, "loss": 0.5455, "step": 3759 }, { "epoch": 0.6174943033687106, "grad_norm": 0.33236503232750314, "learning_rate": 8.710603634033725e-06, "loss": 0.5364, "step": 3760 }, { "epoch": 0.6176585305770533, "grad_norm": 0.3464239772480392, "learning_rate": 8.710438146509925e-06, "loss": 0.5267, "step": 3761 }, { "epoch": 0.6178227577853961, "grad_norm": 0.3150829346496259, "learning_rate": 8.710272613256623e-06, "loss": 0.5238, "step": 3762 }, { "epoch": 0.6179869849937388, "grad_norm": 0.33797103760143943, "learning_rate": 8.710107034275621e-06, "loss": 0.5101, "step": 3763 }, { "epoch": 0.6181512122020816, "grad_norm": 0.3460563845624605, "learning_rate": 8.709941409568712e-06, "loss": 0.5402, "step": 3764 }, { "epoch": 0.6183154394104243, "grad_norm": 0.31892852721514725, "learning_rate": 8.709775739137698e-06, "loss": 0.5443, "step": 3765 }, { "epoch": 0.6184796666187671, "grad_norm": 0.33910830608309395, "learning_rate": 8.709610022984379e-06, "loss": 0.5288, "step": 3766 }, { "epoch": 0.6186438938271098, "grad_norm": 0.5006130609576214, "learning_rate": 8.709444261110551e-06, "loss": 0.5698, "step": 3767 }, { "epoch": 0.6188081210354526, "grad_norm": 0.34041891842889027, "learning_rate": 8.70927845351802e-06, "loss": 0.5306, "step": 3768 }, { "epoch": 0.6189723482437953, "grad_norm": 0.32959334265653095, "learning_rate": 8.70911260020858e-06, "loss": 0.5027, "step": 3769 }, { "epoch": 0.6191365754521381, "grad_norm": 0.33632145365800753, "learning_rate": 8.708946701184038e-06, "loss": 0.5622, "step": 3770 }, { "epoch": 0.6193008026604808, "grad_norm": 0.3284090276692645, "learning_rate": 8.708780756446193e-06, "loss": 0.5258, "step": 3771 }, { "epoch": 0.6194650298688236, "grad_norm": 0.27420040378984273, "learning_rate": 8.70861476599685e-06, "loss": 0.5217, "step": 3772 }, { "epoch": 0.6196292570771662, "grad_norm": 0.37164897196678104, "learning_rate": 8.708448729837807e-06, "loss": 0.5001, "step": 3773 }, { "epoch": 0.619793484285509, "grad_norm": 0.3198369016389374, "learning_rate": 8.708282647970872e-06, "loss": 0.5501, "step": 3774 }, { "epoch": 0.6199577114938517, "grad_norm": 0.4734567044434302, "learning_rate": 8.708116520397847e-06, "loss": 0.5429, "step": 3775 }, { "epoch": 0.6201219387021945, "grad_norm": 0.3675363210937101, "learning_rate": 8.707950347120536e-06, "loss": 0.5343, "step": 3776 }, { "epoch": 0.6202861659105372, "grad_norm": 0.3130171423294823, "learning_rate": 8.707784128140745e-06, "loss": 0.5626, "step": 3777 }, { "epoch": 0.62045039311888, "grad_norm": 0.33656999876962657, "learning_rate": 8.707617863460276e-06, "loss": 0.5397, "step": 3778 }, { "epoch": 0.6206146203272227, "grad_norm": 0.378701611013499, "learning_rate": 8.70745155308094e-06, "loss": 0.5236, "step": 3779 }, { "epoch": 0.6207788475355654, "grad_norm": 0.40263268075287956, "learning_rate": 8.70728519700454e-06, "loss": 0.5522, "step": 3780 }, { "epoch": 0.6209430747439082, "grad_norm": 0.31609267720649964, "learning_rate": 8.707118795232882e-06, "loss": 0.5341, "step": 3781 }, { "epoch": 0.621107301952251, "grad_norm": 0.33076045899366674, "learning_rate": 8.706952347767776e-06, "loss": 0.5141, "step": 3782 }, { "epoch": 0.6212715291605937, "grad_norm": 0.3119390105211355, "learning_rate": 8.706785854611027e-06, "loss": 0.5395, "step": 3783 }, { "epoch": 0.6214357563689364, "grad_norm": 0.32407867611523294, "learning_rate": 8.706619315764446e-06, "loss": 0.5663, "step": 3784 }, { "epoch": 0.6215999835772792, "grad_norm": 0.3366621208808585, "learning_rate": 8.70645273122984e-06, "loss": 0.5303, "step": 3785 }, { "epoch": 0.6217642107856219, "grad_norm": 0.334100898833225, "learning_rate": 8.706286101009021e-06, "loss": 0.5346, "step": 3786 }, { "epoch": 0.6219284379939647, "grad_norm": 0.2938793811806174, "learning_rate": 8.706119425103793e-06, "loss": 0.5509, "step": 3787 }, { "epoch": 0.6220926652023074, "grad_norm": 0.30688080144004615, "learning_rate": 8.705952703515972e-06, "loss": 0.5409, "step": 3788 }, { "epoch": 0.6222568924106502, "grad_norm": 0.30865811818944405, "learning_rate": 8.705785936247364e-06, "loss": 0.5298, "step": 3789 }, { "epoch": 0.6224211196189928, "grad_norm": 0.34419297569497387, "learning_rate": 8.705619123299786e-06, "loss": 0.547, "step": 3790 }, { "epoch": 0.6225853468273356, "grad_norm": 0.36703178884640025, "learning_rate": 8.705452264675045e-06, "loss": 0.5413, "step": 3791 }, { "epoch": 0.6227495740356783, "grad_norm": 0.3923267640978763, "learning_rate": 8.705285360374955e-06, "loss": 0.5323, "step": 3792 }, { "epoch": 0.6229138012440211, "grad_norm": 0.33842940645185976, "learning_rate": 8.705118410401329e-06, "loss": 0.5302, "step": 3793 }, { "epoch": 0.6230780284523638, "grad_norm": 0.8608072591706694, "learning_rate": 8.704951414755978e-06, "loss": 0.5415, "step": 3794 }, { "epoch": 0.6232422556607066, "grad_norm": 0.2936882961967509, "learning_rate": 8.704784373440719e-06, "loss": 0.5353, "step": 3795 }, { "epoch": 0.6234064828690493, "grad_norm": 0.289643466614854, "learning_rate": 8.704617286457365e-06, "loss": 0.5305, "step": 3796 }, { "epoch": 0.6235707100773921, "grad_norm": 0.45597156884259565, "learning_rate": 8.70445015380773e-06, "loss": 0.5335, "step": 3797 }, { "epoch": 0.6237349372857348, "grad_norm": 0.37801086646828497, "learning_rate": 8.70428297549363e-06, "loss": 0.5075, "step": 3798 }, { "epoch": 0.6238991644940776, "grad_norm": 0.2812051698019951, "learning_rate": 8.70411575151688e-06, "loss": 0.5319, "step": 3799 }, { "epoch": 0.6240633917024203, "grad_norm": 0.3145164176523809, "learning_rate": 8.703948481879296e-06, "loss": 0.5154, "step": 3800 }, { "epoch": 0.6242276189107631, "grad_norm": 0.3293466939126531, "learning_rate": 8.703781166582696e-06, "loss": 0.5329, "step": 3801 }, { "epoch": 0.6243918461191058, "grad_norm": 0.3241613646790027, "learning_rate": 8.703613805628897e-06, "loss": 0.5366, "step": 3802 }, { "epoch": 0.6245560733274486, "grad_norm": 0.30109707531054103, "learning_rate": 8.703446399019716e-06, "loss": 0.5267, "step": 3803 }, { "epoch": 0.6247203005357913, "grad_norm": 0.3159230311428853, "learning_rate": 8.703278946756972e-06, "loss": 0.5408, "step": 3804 }, { "epoch": 0.624884527744134, "grad_norm": 0.32223834189608774, "learning_rate": 8.703111448842482e-06, "loss": 0.5587, "step": 3805 }, { "epoch": 0.6250487549524768, "grad_norm": 90.67267353364257, "learning_rate": 8.702943905278067e-06, "loss": 0.528, "step": 3806 }, { "epoch": 0.6252129821608194, "grad_norm": 0.43270448510011983, "learning_rate": 8.702776316065547e-06, "loss": 0.5133, "step": 3807 }, { "epoch": 0.6253772093691622, "grad_norm": 0.6509586015003804, "learning_rate": 8.70260868120674e-06, "loss": 0.5407, "step": 3808 }, { "epoch": 0.6255414365775049, "grad_norm": 1.4183420052563982, "learning_rate": 8.702441000703468e-06, "loss": 0.563, "step": 3809 }, { "epoch": 0.6257056637858477, "grad_norm": 1.8921901405577501, "learning_rate": 8.702273274557552e-06, "loss": 0.5751, "step": 3810 }, { "epoch": 0.6258698909941904, "grad_norm": 1.4172223283468877, "learning_rate": 8.702105502770813e-06, "loss": 0.545, "step": 3811 }, { "epoch": 0.6260341182025332, "grad_norm": 1.3874445788703256, "learning_rate": 8.701937685345076e-06, "loss": 0.573, "step": 3812 }, { "epoch": 0.6261983454108759, "grad_norm": 1.4763769040707089, "learning_rate": 8.70176982228216e-06, "loss": 0.5626, "step": 3813 }, { "epoch": 0.6263625726192187, "grad_norm": 1.2332840709413573, "learning_rate": 8.701601913583891e-06, "loss": 0.5505, "step": 3814 }, { "epoch": 0.6265267998275614, "grad_norm": 1.082496847586964, "learning_rate": 8.70143395925209e-06, "loss": 0.5308, "step": 3815 }, { "epoch": 0.6266910270359042, "grad_norm": 1.3361911741976398, "learning_rate": 8.701265959288584e-06, "loss": 0.5584, "step": 3816 }, { "epoch": 0.6268552542442469, "grad_norm": 0.9853921793041766, "learning_rate": 8.701097913695196e-06, "loss": 0.5561, "step": 3817 }, { "epoch": 0.6270194814525897, "grad_norm": 0.6673756700635366, "learning_rate": 8.70092982247375e-06, "loss": 0.5216, "step": 3818 }, { "epoch": 0.6271837086609324, "grad_norm": 0.7139398117801092, "learning_rate": 8.700761685626074e-06, "loss": 0.5466, "step": 3819 }, { "epoch": 0.6273479358692752, "grad_norm": 0.6201194455263372, "learning_rate": 8.700593503153993e-06, "loss": 0.5595, "step": 3820 }, { "epoch": 0.6275121630776179, "grad_norm": 0.6338582846012263, "learning_rate": 8.700425275059334e-06, "loss": 0.5416, "step": 3821 }, { "epoch": 0.6276763902859607, "grad_norm": 0.620727987673737, "learning_rate": 8.700257001343924e-06, "loss": 0.5448, "step": 3822 }, { "epoch": 0.6278406174943034, "grad_norm": 0.6572165151879532, "learning_rate": 8.70008868200959e-06, "loss": 0.5585, "step": 3823 }, { "epoch": 0.6280048447026461, "grad_norm": 0.7069587357491588, "learning_rate": 8.69992031705816e-06, "loss": 0.5505, "step": 3824 }, { "epoch": 0.6281690719109888, "grad_norm": 0.5887814051268679, "learning_rate": 8.699751906491464e-06, "loss": 0.5453, "step": 3825 }, { "epoch": 0.6283332991193316, "grad_norm": 0.5949769393475107, "learning_rate": 8.69958345031133e-06, "loss": 0.5485, "step": 3826 }, { "epoch": 0.6284975263276743, "grad_norm": 0.45559918621982776, "learning_rate": 8.699414948519588e-06, "loss": 0.5507, "step": 3827 }, { "epoch": 0.628661753536017, "grad_norm": 0.5066535100919715, "learning_rate": 8.699246401118067e-06, "loss": 0.5456, "step": 3828 }, { "epoch": 0.6288259807443598, "grad_norm": 0.603987704362724, "learning_rate": 8.699077808108598e-06, "loss": 0.5367, "step": 3829 }, { "epoch": 0.6289902079527026, "grad_norm": 0.5753829717531779, "learning_rate": 8.698909169493014e-06, "loss": 0.5354, "step": 3830 }, { "epoch": 0.6291544351610453, "grad_norm": 0.48583557088688634, "learning_rate": 8.698740485273147e-06, "loss": 0.5613, "step": 3831 }, { "epoch": 0.629318662369388, "grad_norm": 0.4923545359065946, "learning_rate": 8.698571755450826e-06, "loss": 0.5264, "step": 3832 }, { "epoch": 0.6294828895777308, "grad_norm": 0.5021171246250056, "learning_rate": 8.698402980027884e-06, "loss": 0.5536, "step": 3833 }, { "epoch": 0.6296471167860735, "grad_norm": 0.6308522128061763, "learning_rate": 8.698234159006155e-06, "loss": 0.5574, "step": 3834 }, { "epoch": 0.6298113439944163, "grad_norm": 0.5317056736086443, "learning_rate": 8.698065292387474e-06, "loss": 0.5362, "step": 3835 }, { "epoch": 0.629975571202759, "grad_norm": 0.41775683407473374, "learning_rate": 8.697896380173673e-06, "loss": 0.5521, "step": 3836 }, { "epoch": 0.6301397984111018, "grad_norm": 0.35903545755316413, "learning_rate": 8.697727422366586e-06, "loss": 0.56, "step": 3837 }, { "epoch": 0.6303040256194445, "grad_norm": 0.38789311643013547, "learning_rate": 8.69755841896805e-06, "loss": 0.525, "step": 3838 }, { "epoch": 0.6304682528277873, "grad_norm": 0.5183715290306259, "learning_rate": 8.697389369979901e-06, "loss": 0.5372, "step": 3839 }, { "epoch": 0.63063248003613, "grad_norm": 0.4307370872715388, "learning_rate": 8.697220275403972e-06, "loss": 0.5309, "step": 3840 }, { "epoch": 0.6307967072444727, "grad_norm": 0.3726029619618114, "learning_rate": 8.697051135242103e-06, "loss": 0.5386, "step": 3841 }, { "epoch": 0.6309609344528154, "grad_norm": 0.4780072696531049, "learning_rate": 8.696881949496127e-06, "loss": 0.5471, "step": 3842 }, { "epoch": 0.6311251616611582, "grad_norm": 0.34995870047191846, "learning_rate": 8.696712718167884e-06, "loss": 0.5402, "step": 3843 }, { "epoch": 0.6312893888695009, "grad_norm": 0.411103673149213, "learning_rate": 8.696543441259215e-06, "loss": 0.5433, "step": 3844 }, { "epoch": 0.6314536160778437, "grad_norm": 0.3587501781903258, "learning_rate": 8.696374118771952e-06, "loss": 0.5486, "step": 3845 }, { "epoch": 0.6316178432861864, "grad_norm": 0.33772335432480305, "learning_rate": 8.69620475070794e-06, "loss": 0.5315, "step": 3846 }, { "epoch": 0.6317820704945292, "grad_norm": 0.3390924628374558, "learning_rate": 8.696035337069013e-06, "loss": 0.5622, "step": 3847 }, { "epoch": 0.6319462977028719, "grad_norm": 0.4403930593852699, "learning_rate": 8.695865877857015e-06, "loss": 0.5013, "step": 3848 }, { "epoch": 0.6321105249112147, "grad_norm": 0.41806220799017596, "learning_rate": 8.695696373073787e-06, "loss": 0.5464, "step": 3849 }, { "epoch": 0.6322747521195574, "grad_norm": 0.4085362195583205, "learning_rate": 8.695526822721167e-06, "loss": 0.5217, "step": 3850 }, { "epoch": 0.6324389793279002, "grad_norm": 0.3689830215891882, "learning_rate": 8.695357226800999e-06, "loss": 0.5503, "step": 3851 }, { "epoch": 0.6326032065362429, "grad_norm": 0.3731211509083733, "learning_rate": 8.695187585315122e-06, "loss": 0.5305, "step": 3852 }, { "epoch": 0.6327674337445857, "grad_norm": 0.3781825465409617, "learning_rate": 8.695017898265381e-06, "loss": 0.536, "step": 3853 }, { "epoch": 0.6329316609529284, "grad_norm": 0.6074947040600762, "learning_rate": 8.694848165653618e-06, "loss": 0.5354, "step": 3854 }, { "epoch": 0.6330958881612712, "grad_norm": 0.33740040810301675, "learning_rate": 8.694678387481678e-06, "loss": 0.5216, "step": 3855 }, { "epoch": 0.6332601153696139, "grad_norm": 0.3720129842158853, "learning_rate": 8.694508563751404e-06, "loss": 0.5304, "step": 3856 }, { "epoch": 0.6334243425779567, "grad_norm": 0.34325179252543697, "learning_rate": 8.694338694464639e-06, "loss": 0.5401, "step": 3857 }, { "epoch": 0.6335885697862993, "grad_norm": 0.3983565060019772, "learning_rate": 8.694168779623229e-06, "loss": 0.5368, "step": 3858 }, { "epoch": 0.633752796994642, "grad_norm": 0.4136266171320773, "learning_rate": 8.69399881922902e-06, "loss": 0.5129, "step": 3859 }, { "epoch": 0.6339170242029848, "grad_norm": 0.5371242412763911, "learning_rate": 8.693828813283856e-06, "loss": 0.5441, "step": 3860 }, { "epoch": 0.6340812514113275, "grad_norm": 0.3961247480341962, "learning_rate": 8.693658761789587e-06, "loss": 0.5676, "step": 3861 }, { "epoch": 0.6342454786196703, "grad_norm": 0.39617220623101806, "learning_rate": 8.693488664748058e-06, "loss": 0.5468, "step": 3862 }, { "epoch": 0.634409705828013, "grad_norm": 0.40097095211622075, "learning_rate": 8.693318522161114e-06, "loss": 0.5169, "step": 3863 }, { "epoch": 0.6345739330363558, "grad_norm": 0.38894572895815444, "learning_rate": 8.693148334030607e-06, "loss": 0.5135, "step": 3864 }, { "epoch": 0.6347381602446985, "grad_norm": 0.3763156946705871, "learning_rate": 8.692978100358384e-06, "loss": 0.5529, "step": 3865 }, { "epoch": 0.6349023874530413, "grad_norm": 0.4886376067594294, "learning_rate": 8.692807821146292e-06, "loss": 0.5406, "step": 3866 }, { "epoch": 0.635066614661384, "grad_norm": 0.35352227694749655, "learning_rate": 8.692637496396181e-06, "loss": 0.535, "step": 3867 }, { "epoch": 0.6352308418697268, "grad_norm": 0.4983938112065874, "learning_rate": 8.692467126109904e-06, "loss": 0.5408, "step": 3868 }, { "epoch": 0.6353950690780695, "grad_norm": 0.31829193416012663, "learning_rate": 8.692296710289309e-06, "loss": 0.534, "step": 3869 }, { "epoch": 0.6355592962864123, "grad_norm": 0.3525883829667812, "learning_rate": 8.692126248936246e-06, "loss": 0.5417, "step": 3870 }, { "epoch": 0.635723523494755, "grad_norm": 0.36401514044093797, "learning_rate": 8.691955742052567e-06, "loss": 0.5371, "step": 3871 }, { "epoch": 0.6358877507030978, "grad_norm": 0.3969898970872966, "learning_rate": 8.691785189640127e-06, "loss": 0.5472, "step": 3872 }, { "epoch": 0.6360519779114405, "grad_norm": 0.33217592523461514, "learning_rate": 8.691614591700774e-06, "loss": 0.5308, "step": 3873 }, { "epoch": 0.6362162051197833, "grad_norm": 0.42246583922748293, "learning_rate": 8.691443948236361e-06, "loss": 0.5424, "step": 3874 }, { "epoch": 0.6363804323281259, "grad_norm": 0.3403711310191984, "learning_rate": 8.691273259248745e-06, "loss": 0.5514, "step": 3875 }, { "epoch": 0.6365446595364687, "grad_norm": 0.3248716492797271, "learning_rate": 8.691102524739778e-06, "loss": 0.5387, "step": 3876 }, { "epoch": 0.6367088867448114, "grad_norm": 0.4398054414005426, "learning_rate": 8.690931744711313e-06, "loss": 0.543, "step": 3877 }, { "epoch": 0.6368731139531542, "grad_norm": 0.42677973526179014, "learning_rate": 8.690760919165206e-06, "loss": 0.5536, "step": 3878 }, { "epoch": 0.6370373411614969, "grad_norm": 0.40237646554369005, "learning_rate": 8.690590048103313e-06, "loss": 0.5328, "step": 3879 }, { "epoch": 0.6372015683698397, "grad_norm": 0.2960670627284331, "learning_rate": 8.690419131527489e-06, "loss": 0.569, "step": 3880 }, { "epoch": 0.6373657955781824, "grad_norm": 0.4324403747553011, "learning_rate": 8.69024816943959e-06, "loss": 0.541, "step": 3881 }, { "epoch": 0.6375300227865252, "grad_norm": 0.3444517645451572, "learning_rate": 8.690077161841473e-06, "loss": 0.5449, "step": 3882 }, { "epoch": 0.6376942499948679, "grad_norm": 0.34750063901480577, "learning_rate": 8.689906108734994e-06, "loss": 0.5387, "step": 3883 }, { "epoch": 0.6378584772032106, "grad_norm": 0.30817723250833423, "learning_rate": 8.689735010122015e-06, "loss": 0.5284, "step": 3884 }, { "epoch": 0.6380227044115534, "grad_norm": 0.3530145511907926, "learning_rate": 8.68956386600439e-06, "loss": 0.5128, "step": 3885 }, { "epoch": 0.6381869316198961, "grad_norm": 0.40882371198815415, "learning_rate": 8.68939267638398e-06, "loss": 0.5395, "step": 3886 }, { "epoch": 0.6383511588282389, "grad_norm": 0.36352754615313715, "learning_rate": 8.689221441262645e-06, "loss": 0.5224, "step": 3887 }, { "epoch": 0.6385153860365816, "grad_norm": 0.2907383919549112, "learning_rate": 8.689050160642242e-06, "loss": 0.5336, "step": 3888 }, { "epoch": 0.6386796132449244, "grad_norm": 0.34749893951758054, "learning_rate": 8.688878834524634e-06, "loss": 0.5389, "step": 3889 }, { "epoch": 0.6388438404532671, "grad_norm": 0.40513911865191865, "learning_rate": 8.688707462911679e-06, "loss": 0.5158, "step": 3890 }, { "epoch": 0.6390080676616099, "grad_norm": 0.3030479591332535, "learning_rate": 8.688536045805241e-06, "loss": 0.5539, "step": 3891 }, { "epoch": 0.6391722948699525, "grad_norm": 0.33893944587583974, "learning_rate": 8.688364583207181e-06, "loss": 0.5337, "step": 3892 }, { "epoch": 0.6393365220782953, "grad_norm": 0.3315789532550877, "learning_rate": 8.68819307511936e-06, "loss": 0.5402, "step": 3893 }, { "epoch": 0.639500749286638, "grad_norm": 0.4070342888416627, "learning_rate": 8.68802152154364e-06, "loss": 0.5243, "step": 3894 }, { "epoch": 0.6396649764949808, "grad_norm": 0.30937112791655846, "learning_rate": 8.687849922481888e-06, "loss": 0.5215, "step": 3895 }, { "epoch": 0.6398292037033235, "grad_norm": 0.38064616880789337, "learning_rate": 8.687678277935965e-06, "loss": 0.5338, "step": 3896 }, { "epoch": 0.6399934309116663, "grad_norm": 0.47604423697526016, "learning_rate": 8.687506587907736e-06, "loss": 0.5311, "step": 3897 }, { "epoch": 0.640157658120009, "grad_norm": 0.410893019484912, "learning_rate": 8.687334852399064e-06, "loss": 0.5234, "step": 3898 }, { "epoch": 0.6403218853283518, "grad_norm": 0.41715319002167833, "learning_rate": 8.687163071411817e-06, "loss": 0.5223, "step": 3899 }, { "epoch": 0.6404861125366945, "grad_norm": 0.3342019449167628, "learning_rate": 8.686991244947861e-06, "loss": 0.5272, "step": 3900 }, { "epoch": 0.6406503397450373, "grad_norm": 0.30869298822600383, "learning_rate": 8.68681937300906e-06, "loss": 0.5458, "step": 3901 }, { "epoch": 0.64081456695338, "grad_norm": 0.4454509392864555, "learning_rate": 8.68664745559728e-06, "loss": 0.5255, "step": 3902 }, { "epoch": 0.6409787941617228, "grad_norm": 0.33893845187399224, "learning_rate": 8.686475492714389e-06, "loss": 0.5355, "step": 3903 }, { "epoch": 0.6411430213700655, "grad_norm": 0.36988459118997447, "learning_rate": 8.686303484362257e-06, "loss": 0.5415, "step": 3904 }, { "epoch": 0.6413072485784083, "grad_norm": 0.3705192814061367, "learning_rate": 8.686131430542749e-06, "loss": 0.5456, "step": 3905 }, { "epoch": 0.641471475786751, "grad_norm": 0.35173078787427914, "learning_rate": 8.685959331257736e-06, "loss": 0.5217, "step": 3906 }, { "epoch": 0.6416357029950938, "grad_norm": 0.2957226125413411, "learning_rate": 8.685787186509084e-06, "loss": 0.533, "step": 3907 }, { "epoch": 0.6417999302034365, "grad_norm": 0.7762910142753189, "learning_rate": 8.685614996298667e-06, "loss": 0.536, "step": 3908 }, { "epoch": 0.6419641574117791, "grad_norm": 0.32793594523929287, "learning_rate": 8.685442760628354e-06, "loss": 0.5203, "step": 3909 }, { "epoch": 0.6421283846201219, "grad_norm": 0.29157387769874193, "learning_rate": 8.685270479500013e-06, "loss": 0.5282, "step": 3910 }, { "epoch": 0.6422926118284646, "grad_norm": 0.35421199782762086, "learning_rate": 8.685098152915517e-06, "loss": 0.5606, "step": 3911 }, { "epoch": 0.6424568390368074, "grad_norm": 0.3948593371824962, "learning_rate": 8.684925780876737e-06, "loss": 0.5283, "step": 3912 }, { "epoch": 0.6426210662451501, "grad_norm": 0.3382510623900333, "learning_rate": 8.684753363385547e-06, "loss": 0.5291, "step": 3913 }, { "epoch": 0.6427852934534929, "grad_norm": 0.34658199700209985, "learning_rate": 8.684580900443818e-06, "loss": 0.5205, "step": 3914 }, { "epoch": 0.6429495206618356, "grad_norm": 0.347988158080561, "learning_rate": 8.684408392053423e-06, "loss": 0.5284, "step": 3915 }, { "epoch": 0.6431137478701784, "grad_norm": 0.3289779514425289, "learning_rate": 8.684235838216237e-06, "loss": 0.5491, "step": 3916 }, { "epoch": 0.6432779750785211, "grad_norm": 0.417822963644472, "learning_rate": 8.684063238934131e-06, "loss": 0.5272, "step": 3917 }, { "epoch": 0.6434422022868639, "grad_norm": 0.3819259777641144, "learning_rate": 8.683890594208982e-06, "loss": 0.5391, "step": 3918 }, { "epoch": 0.6436064294952066, "grad_norm": 0.3245771331101448, "learning_rate": 8.683717904042665e-06, "loss": 0.5288, "step": 3919 }, { "epoch": 0.6437706567035494, "grad_norm": 0.3232520982859475, "learning_rate": 8.683545168437057e-06, "loss": 0.546, "step": 3920 }, { "epoch": 0.6439348839118921, "grad_norm": 0.3237295073427335, "learning_rate": 8.683372387394031e-06, "loss": 0.5479, "step": 3921 }, { "epoch": 0.6440991111202349, "grad_norm": 0.3807149827338743, "learning_rate": 8.683199560915464e-06, "loss": 0.5288, "step": 3922 }, { "epoch": 0.6442633383285776, "grad_norm": 0.3063169142380427, "learning_rate": 8.683026689003236e-06, "loss": 0.5211, "step": 3923 }, { "epoch": 0.6444275655369204, "grad_norm": 0.43993293159188923, "learning_rate": 8.682853771659222e-06, "loss": 0.5373, "step": 3924 }, { "epoch": 0.6445917927452631, "grad_norm": 0.3216930168200892, "learning_rate": 8.6826808088853e-06, "loss": 0.5395, "step": 3925 }, { "epoch": 0.6447560199536058, "grad_norm": 0.45364275257594294, "learning_rate": 8.68250780068335e-06, "loss": 0.5586, "step": 3926 }, { "epoch": 0.6449202471619485, "grad_norm": 0.34204207855157476, "learning_rate": 8.682334747055251e-06, "loss": 0.5261, "step": 3927 }, { "epoch": 0.6450844743702913, "grad_norm": 0.47713121914216133, "learning_rate": 8.682161648002881e-06, "loss": 0.5333, "step": 3928 }, { "epoch": 0.645248701578634, "grad_norm": 0.4187590920449625, "learning_rate": 8.681988503528119e-06, "loss": 0.5417, "step": 3929 }, { "epoch": 0.6454129287869768, "grad_norm": 0.47497527294790776, "learning_rate": 8.68181531363285e-06, "loss": 0.5462, "step": 3930 }, { "epoch": 0.6455771559953195, "grad_norm": 0.3535004979395706, "learning_rate": 8.681642078318952e-06, "loss": 0.546, "step": 3931 }, { "epoch": 0.6457413832036623, "grad_norm": 0.359940360159505, "learning_rate": 8.681468797588304e-06, "loss": 0.5273, "step": 3932 }, { "epoch": 0.645905610412005, "grad_norm": 0.3685906434372928, "learning_rate": 8.681295471442793e-06, "loss": 0.5349, "step": 3933 }, { "epoch": 0.6460698376203478, "grad_norm": 0.4558508535400147, "learning_rate": 8.6811220998843e-06, "loss": 0.5183, "step": 3934 }, { "epoch": 0.6462340648286905, "grad_norm": 0.44492975761566206, "learning_rate": 8.680948682914706e-06, "loss": 0.5257, "step": 3935 }, { "epoch": 0.6463982920370333, "grad_norm": 0.3440786536061186, "learning_rate": 8.680775220535897e-06, "loss": 0.5169, "step": 3936 }, { "epoch": 0.646562519245376, "grad_norm": 0.41451230027792896, "learning_rate": 8.680601712749755e-06, "loss": 0.536, "step": 3937 }, { "epoch": 0.6467267464537187, "grad_norm": 0.34734955497252606, "learning_rate": 8.680428159558167e-06, "loss": 0.563, "step": 3938 }, { "epoch": 0.6468909736620615, "grad_norm": 0.4474330702145904, "learning_rate": 8.680254560963014e-06, "loss": 0.5298, "step": 3939 }, { "epoch": 0.6470552008704042, "grad_norm": 0.4773063602198883, "learning_rate": 8.680080916966183e-06, "loss": 0.5524, "step": 3940 }, { "epoch": 0.647219428078747, "grad_norm": 0.3754853566580427, "learning_rate": 8.679907227569562e-06, "loss": 0.5538, "step": 3941 }, { "epoch": 0.6473836552870897, "grad_norm": 0.3432933716810742, "learning_rate": 8.679733492775035e-06, "loss": 0.5241, "step": 3942 }, { "epoch": 0.6475478824954324, "grad_norm": 0.4450073864802975, "learning_rate": 8.679559712584492e-06, "loss": 0.5283, "step": 3943 }, { "epoch": 0.6477121097037751, "grad_norm": 0.3940289611879266, "learning_rate": 8.679385886999818e-06, "loss": 0.543, "step": 3944 }, { "epoch": 0.6478763369121179, "grad_norm": 0.31865120983857287, "learning_rate": 8.6792120160229e-06, "loss": 0.5276, "step": 3945 }, { "epoch": 0.6480405641204606, "grad_norm": 0.37514316444909035, "learning_rate": 8.679038099655629e-06, "loss": 0.5283, "step": 3946 }, { "epoch": 0.6482047913288034, "grad_norm": 0.38267915121977825, "learning_rate": 8.678864137899892e-06, "loss": 0.5355, "step": 3947 }, { "epoch": 0.6483690185371461, "grad_norm": 0.3566722105246468, "learning_rate": 8.678690130757579e-06, "loss": 0.5222, "step": 3948 }, { "epoch": 0.6485332457454889, "grad_norm": 0.36829644006372725, "learning_rate": 8.67851607823058e-06, "loss": 0.5476, "step": 3949 }, { "epoch": 0.6486974729538316, "grad_norm": 0.4250247596289241, "learning_rate": 8.678341980320785e-06, "loss": 0.5248, "step": 3950 }, { "epoch": 0.6488617001621744, "grad_norm": 0.35853628855086594, "learning_rate": 8.678167837030085e-06, "loss": 0.5204, "step": 3951 }, { "epoch": 0.6490259273705171, "grad_norm": 0.3992651417520165, "learning_rate": 8.677993648360371e-06, "loss": 0.5544, "step": 3952 }, { "epoch": 0.6491901545788599, "grad_norm": 0.3157472097393151, "learning_rate": 8.677819414313537e-06, "loss": 0.52, "step": 3953 }, { "epoch": 0.6493543817872026, "grad_norm": 0.35866185315676313, "learning_rate": 8.677645134891472e-06, "loss": 0.5252, "step": 3954 }, { "epoch": 0.6495186089955454, "grad_norm": 0.3720242063110086, "learning_rate": 8.677470810096072e-06, "loss": 0.5543, "step": 3955 }, { "epoch": 0.6496828362038881, "grad_norm": 0.33452108930498453, "learning_rate": 8.677296439929228e-06, "loss": 0.5281, "step": 3956 }, { "epoch": 0.6498470634122309, "grad_norm": 0.36791969880247277, "learning_rate": 8.677122024392837e-06, "loss": 0.543, "step": 3957 }, { "epoch": 0.6500112906205736, "grad_norm": 0.4421337612938926, "learning_rate": 8.676947563488789e-06, "loss": 0.5399, "step": 3958 }, { "epoch": 0.6501755178289164, "grad_norm": 0.317527166513708, "learning_rate": 8.67677305721898e-06, "loss": 0.5314, "step": 3959 }, { "epoch": 0.650339745037259, "grad_norm": 1.3753223596720887, "learning_rate": 8.676598505585308e-06, "loss": 0.5365, "step": 3960 }, { "epoch": 0.6505039722456017, "grad_norm": 0.35311319928316154, "learning_rate": 8.676423908589667e-06, "loss": 0.5168, "step": 3961 }, { "epoch": 0.6506681994539445, "grad_norm": 0.36668233413529516, "learning_rate": 8.676249266233952e-06, "loss": 0.5094, "step": 3962 }, { "epoch": 0.6508324266622872, "grad_norm": 0.34926006262255677, "learning_rate": 8.676074578520061e-06, "loss": 0.5266, "step": 3963 }, { "epoch": 0.65099665387063, "grad_norm": 0.3230850253272737, "learning_rate": 8.675899845449892e-06, "loss": 0.5222, "step": 3964 }, { "epoch": 0.6511608810789727, "grad_norm": 0.3178205063481531, "learning_rate": 8.675725067025343e-06, "loss": 0.5334, "step": 3965 }, { "epoch": 0.6513251082873155, "grad_norm": 0.3604238765156544, "learning_rate": 8.67555024324831e-06, "loss": 0.5571, "step": 3966 }, { "epoch": 0.6514893354956582, "grad_norm": 0.4148764805152981, "learning_rate": 8.675375374120695e-06, "loss": 0.5082, "step": 3967 }, { "epoch": 0.651653562704001, "grad_norm": 0.45696787695365004, "learning_rate": 8.675200459644393e-06, "loss": 0.5309, "step": 3968 }, { "epoch": 0.6518177899123437, "grad_norm": 0.32008682200636956, "learning_rate": 8.675025499821309e-06, "loss": 0.5424, "step": 3969 }, { "epoch": 0.6519820171206865, "grad_norm": 0.36765540872038655, "learning_rate": 8.674850494653338e-06, "loss": 0.5279, "step": 3970 }, { "epoch": 0.6521462443290292, "grad_norm": 0.4053643104635402, "learning_rate": 8.674675444142385e-06, "loss": 0.5384, "step": 3971 }, { "epoch": 0.652310471537372, "grad_norm": 0.39808760881236877, "learning_rate": 8.674500348290349e-06, "loss": 0.5518, "step": 3972 }, { "epoch": 0.6524746987457147, "grad_norm": 0.44611158326975175, "learning_rate": 8.674325207099131e-06, "loss": 0.5523, "step": 3973 }, { "epoch": 0.6526389259540575, "grad_norm": 0.3183684377023937, "learning_rate": 8.674150020570635e-06, "loss": 0.5302, "step": 3974 }, { "epoch": 0.6528031531624002, "grad_norm": 0.34196266717367213, "learning_rate": 8.673974788706762e-06, "loss": 0.5159, "step": 3975 }, { "epoch": 0.652967380370743, "grad_norm": 0.2900554832151464, "learning_rate": 8.673799511509418e-06, "loss": 0.5334, "step": 3976 }, { "epoch": 0.6531316075790856, "grad_norm": 0.3386359848323148, "learning_rate": 8.673624188980503e-06, "loss": 0.5537, "step": 3977 }, { "epoch": 0.6532958347874284, "grad_norm": 0.39546656599547636, "learning_rate": 8.673448821121923e-06, "loss": 0.5237, "step": 3978 }, { "epoch": 0.6534600619957711, "grad_norm": 0.3317443593450704, "learning_rate": 8.673273407935584e-06, "loss": 0.5053, "step": 3979 }, { "epoch": 0.6536242892041139, "grad_norm": 0.418188518330638, "learning_rate": 8.67309794942339e-06, "loss": 0.5492, "step": 3980 }, { "epoch": 0.6537885164124566, "grad_norm": 0.369270618448298, "learning_rate": 8.672922445587245e-06, "loss": 0.5104, "step": 3981 }, { "epoch": 0.6539527436207994, "grad_norm": 0.38664898700693945, "learning_rate": 8.672746896429058e-06, "loss": 0.5316, "step": 3982 }, { "epoch": 0.6541169708291421, "grad_norm": 0.33549576162982075, "learning_rate": 8.672571301950733e-06, "loss": 0.5425, "step": 3983 }, { "epoch": 0.6542811980374849, "grad_norm": 0.40001762129435364, "learning_rate": 8.67239566215418e-06, "loss": 0.5307, "step": 3984 }, { "epoch": 0.6544454252458276, "grad_norm": 0.3913057314551714, "learning_rate": 8.672219977041304e-06, "loss": 0.5247, "step": 3985 }, { "epoch": 0.6546096524541704, "grad_norm": 0.31883678483466016, "learning_rate": 8.672044246614013e-06, "loss": 0.5077, "step": 3986 }, { "epoch": 0.6547738796625131, "grad_norm": 0.29753011492717113, "learning_rate": 8.67186847087422e-06, "loss": 0.5299, "step": 3987 }, { "epoch": 0.6549381068708559, "grad_norm": 0.30891222511550015, "learning_rate": 8.671692649823828e-06, "loss": 0.5183, "step": 3988 }, { "epoch": 0.6551023340791986, "grad_norm": 0.36057053254078036, "learning_rate": 8.671516783464751e-06, "loss": 0.5522, "step": 3989 }, { "epoch": 0.6552665612875413, "grad_norm": 0.30740628581649376, "learning_rate": 8.671340871798895e-06, "loss": 0.5421, "step": 3990 }, { "epoch": 0.6554307884958841, "grad_norm": 0.3390539225771164, "learning_rate": 8.671164914828174e-06, "loss": 0.5367, "step": 3991 }, { "epoch": 0.6555950157042268, "grad_norm": 0.3027722746444447, "learning_rate": 8.670988912554501e-06, "loss": 0.5447, "step": 3992 }, { "epoch": 0.6557592429125696, "grad_norm": 0.8625813728280358, "learning_rate": 8.670812864979783e-06, "loss": 0.5196, "step": 3993 }, { "epoch": 0.6559234701209122, "grad_norm": 0.4270267910110388, "learning_rate": 8.670636772105932e-06, "loss": 0.5183, "step": 3994 }, { "epoch": 0.656087697329255, "grad_norm": 0.30519084206610886, "learning_rate": 8.670460633934864e-06, "loss": 0.5346, "step": 3995 }, { "epoch": 0.6562519245375977, "grad_norm": 0.3523099685252621, "learning_rate": 8.67028445046849e-06, "loss": 0.5239, "step": 3996 }, { "epoch": 0.6564161517459405, "grad_norm": 0.31211800772194953, "learning_rate": 8.670108221708725e-06, "loss": 0.5328, "step": 3997 }, { "epoch": 0.6565803789542832, "grad_norm": 0.4759892153046304, "learning_rate": 8.669931947657481e-06, "loss": 0.5338, "step": 3998 }, { "epoch": 0.656744606162626, "grad_norm": 0.4637487119659366, "learning_rate": 8.669755628316673e-06, "loss": 0.5278, "step": 3999 }, { "epoch": 0.6569088333709687, "grad_norm": 0.32994096949419766, "learning_rate": 8.669579263688216e-06, "loss": 0.5261, "step": 4000 }, { "epoch": 0.6570730605793115, "grad_norm": 0.3356383750274373, "learning_rate": 8.669402853774026e-06, "loss": 0.5187, "step": 4001 }, { "epoch": 0.6572372877876542, "grad_norm": 0.6717913507630792, "learning_rate": 8.66922639857602e-06, "loss": 0.5238, "step": 4002 }, { "epoch": 0.657401514995997, "grad_norm": 0.3221690604002636, "learning_rate": 8.669049898096114e-06, "loss": 0.5559, "step": 4003 }, { "epoch": 0.6575657422043397, "grad_norm": 0.3081158006782644, "learning_rate": 8.668873352336221e-06, "loss": 0.5057, "step": 4004 }, { "epoch": 0.6577299694126825, "grad_norm": 0.32070325867119254, "learning_rate": 8.668696761298266e-06, "loss": 0.5211, "step": 4005 }, { "epoch": 0.6578941966210252, "grad_norm": 0.31266643294309454, "learning_rate": 8.66852012498416e-06, "loss": 0.5343, "step": 4006 }, { "epoch": 0.658058423829368, "grad_norm": 0.2863747219818306, "learning_rate": 8.668343443395824e-06, "loss": 0.5046, "step": 4007 }, { "epoch": 0.6582226510377107, "grad_norm": 0.659439189835058, "learning_rate": 8.668166716535179e-06, "loss": 0.5289, "step": 4008 }, { "epoch": 0.6583868782460535, "grad_norm": 0.34529192696537236, "learning_rate": 8.66798994440414e-06, "loss": 0.5471, "step": 4009 }, { "epoch": 0.6585511054543961, "grad_norm": 0.2888681021457298, "learning_rate": 8.667813127004631e-06, "loss": 0.5263, "step": 4010 }, { "epoch": 0.6587153326627389, "grad_norm": 0.32980455433855144, "learning_rate": 8.667636264338571e-06, "loss": 0.5192, "step": 4011 }, { "epoch": 0.6588795598710816, "grad_norm": 0.3485846589730208, "learning_rate": 8.66745935640788e-06, "loss": 0.5181, "step": 4012 }, { "epoch": 0.6590437870794243, "grad_norm": 0.31578509817687833, "learning_rate": 8.667282403214481e-06, "loss": 0.5237, "step": 4013 }, { "epoch": 0.6592080142877671, "grad_norm": 0.3229530274985868, "learning_rate": 8.667105404760295e-06, "loss": 0.5371, "step": 4014 }, { "epoch": 0.6593722414961098, "grad_norm": 0.30354742074792096, "learning_rate": 8.666928361047245e-06, "loss": 0.5191, "step": 4015 }, { "epoch": 0.6595364687044526, "grad_norm": 0.3509968909891949, "learning_rate": 8.666751272077251e-06, "loss": 0.5442, "step": 4016 }, { "epoch": 0.6597006959127953, "grad_norm": 0.3153934088828136, "learning_rate": 8.66657413785224e-06, "loss": 0.5269, "step": 4017 }, { "epoch": 0.6598649231211381, "grad_norm": 0.3170905924730358, "learning_rate": 8.666396958374135e-06, "loss": 0.5015, "step": 4018 }, { "epoch": 0.6600291503294808, "grad_norm": 0.3199674654985588, "learning_rate": 8.66621973364486e-06, "loss": 0.5137, "step": 4019 }, { "epoch": 0.6601933775378236, "grad_norm": 0.3528630549071056, "learning_rate": 8.666042463666338e-06, "loss": 0.5343, "step": 4020 }, { "epoch": 0.6603576047461663, "grad_norm": 0.4338982051253656, "learning_rate": 8.665865148440497e-06, "loss": 0.5273, "step": 4021 }, { "epoch": 0.6605218319545091, "grad_norm": 0.4618783895698222, "learning_rate": 8.665687787969262e-06, "loss": 0.5149, "step": 4022 }, { "epoch": 0.6606860591628518, "grad_norm": 0.3659376755669406, "learning_rate": 8.66551038225456e-06, "loss": 0.5292, "step": 4023 }, { "epoch": 0.6608502863711946, "grad_norm": 0.4078323654587953, "learning_rate": 8.665332931298317e-06, "loss": 0.5056, "step": 4024 }, { "epoch": 0.6610145135795373, "grad_norm": 0.3036588935862725, "learning_rate": 8.66515543510246e-06, "loss": 0.5361, "step": 4025 }, { "epoch": 0.6611787407878801, "grad_norm": 0.33300491685531275, "learning_rate": 8.664977893668914e-06, "loss": 0.5145, "step": 4026 }, { "epoch": 0.6613429679962227, "grad_norm": 0.3164346698371754, "learning_rate": 8.664800306999613e-06, "loss": 0.5422, "step": 4027 }, { "epoch": 0.6615071952045655, "grad_norm": 0.5384011020648434, "learning_rate": 8.664622675096482e-06, "loss": 0.5228, "step": 4028 }, { "epoch": 0.6616714224129082, "grad_norm": 0.36002664128745665, "learning_rate": 8.664444997961454e-06, "loss": 0.5283, "step": 4029 }, { "epoch": 0.661835649621251, "grad_norm": 0.34992110750106104, "learning_rate": 8.664267275596453e-06, "loss": 0.5273, "step": 4030 }, { "epoch": 0.6619998768295937, "grad_norm": 0.33045355618722133, "learning_rate": 8.664089508003413e-06, "loss": 0.5309, "step": 4031 }, { "epoch": 0.6621641040379365, "grad_norm": 0.3008997087087797, "learning_rate": 8.663911695184265e-06, "loss": 0.5214, "step": 4032 }, { "epoch": 0.6623283312462792, "grad_norm": 0.2974714182775004, "learning_rate": 8.663733837140939e-06, "loss": 0.5514, "step": 4033 }, { "epoch": 0.662492558454622, "grad_norm": 0.32714662729303373, "learning_rate": 8.663555933875366e-06, "loss": 0.5474, "step": 4034 }, { "epoch": 0.6626567856629647, "grad_norm": 0.3179914587930655, "learning_rate": 8.663377985389478e-06, "loss": 0.5253, "step": 4035 }, { "epoch": 0.6628210128713075, "grad_norm": 0.34607268067750174, "learning_rate": 8.663199991685212e-06, "loss": 0.5385, "step": 4036 }, { "epoch": 0.6629852400796502, "grad_norm": 0.38888766148880033, "learning_rate": 8.663021952764496e-06, "loss": 0.5179, "step": 4037 }, { "epoch": 0.663149467287993, "grad_norm": 0.34136523068815844, "learning_rate": 8.662843868629267e-06, "loss": 0.5359, "step": 4038 }, { "epoch": 0.6633136944963357, "grad_norm": 0.36005173204058966, "learning_rate": 8.662665739281458e-06, "loss": 0.5454, "step": 4039 }, { "epoch": 0.6634779217046785, "grad_norm": 0.3518704195072764, "learning_rate": 8.662487564723002e-06, "loss": 0.5415, "step": 4040 }, { "epoch": 0.6636421489130212, "grad_norm": 0.3015583584370122, "learning_rate": 8.662309344955838e-06, "loss": 0.5281, "step": 4041 }, { "epoch": 0.663806376121364, "grad_norm": 0.2901950538717286, "learning_rate": 8.662131079981897e-06, "loss": 0.5156, "step": 4042 }, { "epoch": 0.6639706033297067, "grad_norm": 0.3280991639172377, "learning_rate": 8.661952769803119e-06, "loss": 0.5402, "step": 4043 }, { "epoch": 0.6641348305380493, "grad_norm": 0.310899012651114, "learning_rate": 8.661774414421438e-06, "loss": 0.5298, "step": 4044 }, { "epoch": 0.6642990577463921, "grad_norm": 0.34032542764121404, "learning_rate": 8.661596013838793e-06, "loss": 0.5329, "step": 4045 }, { "epoch": 0.6644632849547348, "grad_norm": 0.3048173603991897, "learning_rate": 8.66141756805712e-06, "loss": 0.5272, "step": 4046 }, { "epoch": 0.6646275121630776, "grad_norm": 0.3861615964125312, "learning_rate": 8.661239077078358e-06, "loss": 0.5392, "step": 4047 }, { "epoch": 0.6647917393714203, "grad_norm": 0.37328731816957406, "learning_rate": 8.661060540904447e-06, "loss": 0.5333, "step": 4048 }, { "epoch": 0.6649559665797631, "grad_norm": 0.33058996782192235, "learning_rate": 8.660881959537324e-06, "loss": 0.533, "step": 4049 }, { "epoch": 0.6651201937881058, "grad_norm": 0.3452711619202778, "learning_rate": 8.66070333297893e-06, "loss": 0.5183, "step": 4050 }, { "epoch": 0.6652844209964486, "grad_norm": 0.3659269032483274, "learning_rate": 8.660524661231202e-06, "loss": 0.5348, "step": 4051 }, { "epoch": 0.6654486482047913, "grad_norm": 0.31385515871837055, "learning_rate": 8.660345944296083e-06, "loss": 0.5471, "step": 4052 }, { "epoch": 0.6656128754131341, "grad_norm": 0.3140491300896386, "learning_rate": 8.660167182175515e-06, "loss": 0.5315, "step": 4053 }, { "epoch": 0.6657771026214768, "grad_norm": 0.30202244601958633, "learning_rate": 8.659988374871436e-06, "loss": 0.5386, "step": 4054 }, { "epoch": 0.6659413298298196, "grad_norm": 0.3905089298298893, "learning_rate": 8.659809522385794e-06, "loss": 0.5208, "step": 4055 }, { "epoch": 0.6661055570381623, "grad_norm": 0.30906324457562473, "learning_rate": 8.659630624720525e-06, "loss": 0.5518, "step": 4056 }, { "epoch": 0.6662697842465051, "grad_norm": 0.4354062962749167, "learning_rate": 8.659451681877577e-06, "loss": 0.5515, "step": 4057 }, { "epoch": 0.6664340114548478, "grad_norm": 0.29273028085860114, "learning_rate": 8.65927269385889e-06, "loss": 0.514, "step": 4058 }, { "epoch": 0.6665982386631906, "grad_norm": 0.42766559142493105, "learning_rate": 8.659093660666411e-06, "loss": 0.5504, "step": 4059 }, { "epoch": 0.6667624658715333, "grad_norm": 0.4006447442117922, "learning_rate": 8.658914582302082e-06, "loss": 0.5121, "step": 4060 }, { "epoch": 0.666926693079876, "grad_norm": 0.3034528462862328, "learning_rate": 8.658735458767848e-06, "loss": 0.5228, "step": 4061 }, { "epoch": 0.6670909202882187, "grad_norm": 0.4233032717301708, "learning_rate": 8.658556290065655e-06, "loss": 0.5336, "step": 4062 }, { "epoch": 0.6672551474965615, "grad_norm": 0.45100471286674626, "learning_rate": 8.65837707619745e-06, "loss": 0.5082, "step": 4063 }, { "epoch": 0.6674193747049042, "grad_norm": 1.0766468981490969, "learning_rate": 8.658197817165181e-06, "loss": 0.5224, "step": 4064 }, { "epoch": 0.667583601913247, "grad_norm": 0.33427942116827647, "learning_rate": 8.658018512970788e-06, "loss": 0.5121, "step": 4065 }, { "epoch": 0.6677478291215897, "grad_norm": 0.3459647160115218, "learning_rate": 8.657839163616227e-06, "loss": 0.5368, "step": 4066 }, { "epoch": 0.6679120563299324, "grad_norm": 0.36718583835824414, "learning_rate": 8.657659769103439e-06, "loss": 0.5273, "step": 4067 }, { "epoch": 0.6680762835382752, "grad_norm": 0.3013228326013261, "learning_rate": 8.657480329434378e-06, "loss": 0.561, "step": 4068 }, { "epoch": 0.6682405107466179, "grad_norm": 0.31179364960946604, "learning_rate": 8.657300844610988e-06, "loss": 0.5286, "step": 4069 }, { "epoch": 0.6684047379549607, "grad_norm": 0.37872253313407145, "learning_rate": 8.657121314635221e-06, "loss": 0.5431, "step": 4070 }, { "epoch": 0.6685689651633034, "grad_norm": 0.33270200758353, "learning_rate": 8.656941739509027e-06, "loss": 0.5331, "step": 4071 }, { "epoch": 0.6687331923716462, "grad_norm": 0.4963399150092201, "learning_rate": 8.656762119234356e-06, "loss": 0.5292, "step": 4072 }, { "epoch": 0.6688974195799889, "grad_norm": 0.29276600299262023, "learning_rate": 8.656582453813157e-06, "loss": 0.5015, "step": 4073 }, { "epoch": 0.6690616467883317, "grad_norm": 0.3227354735508351, "learning_rate": 8.656402743247385e-06, "loss": 0.5265, "step": 4074 }, { "epoch": 0.6692258739966744, "grad_norm": 0.3580768588600473, "learning_rate": 8.65622298753899e-06, "loss": 0.5258, "step": 4075 }, { "epoch": 0.6693901012050172, "grad_norm": 0.3428832595500862, "learning_rate": 8.656043186689923e-06, "loss": 0.5283, "step": 4076 }, { "epoch": 0.6695543284133599, "grad_norm": 0.43434915808781277, "learning_rate": 8.655863340702139e-06, "loss": 0.5317, "step": 4077 }, { "epoch": 0.6697185556217026, "grad_norm": 0.3118494196364416, "learning_rate": 8.65568344957759e-06, "loss": 0.5465, "step": 4078 }, { "epoch": 0.6698827828300453, "grad_norm": 0.281140918903688, "learning_rate": 8.65550351331823e-06, "loss": 0.5173, "step": 4079 }, { "epoch": 0.6700470100383881, "grad_norm": 0.31825315533295956, "learning_rate": 8.655323531926013e-06, "loss": 0.5469, "step": 4080 }, { "epoch": 0.6702112372467308, "grad_norm": 0.45548632455563126, "learning_rate": 8.655143505402893e-06, "loss": 0.5542, "step": 4081 }, { "epoch": 0.6703754644550736, "grad_norm": 0.3272018942287102, "learning_rate": 8.654963433750829e-06, "loss": 0.5121, "step": 4082 }, { "epoch": 0.6705396916634163, "grad_norm": 0.29951992766048624, "learning_rate": 8.654783316971773e-06, "loss": 0.5263, "step": 4083 }, { "epoch": 0.6707039188717591, "grad_norm": 0.3251815306475271, "learning_rate": 8.654603155067682e-06, "loss": 0.5348, "step": 4084 }, { "epoch": 0.6708681460801018, "grad_norm": 0.3957221610064905, "learning_rate": 8.654422948040515e-06, "loss": 0.5044, "step": 4085 }, { "epoch": 0.6710323732884446, "grad_norm": 0.30994219622654656, "learning_rate": 8.654242695892224e-06, "loss": 0.5088, "step": 4086 }, { "epoch": 0.6711966004967873, "grad_norm": 0.3344417178331218, "learning_rate": 8.654062398624772e-06, "loss": 0.517, "step": 4087 }, { "epoch": 0.6713608277051301, "grad_norm": 0.3001680261537345, "learning_rate": 8.653882056240116e-06, "loss": 0.5182, "step": 4088 }, { "epoch": 0.6715250549134728, "grad_norm": 0.3372489161830878, "learning_rate": 8.653701668740214e-06, "loss": 0.524, "step": 4089 }, { "epoch": 0.6716892821218156, "grad_norm": 0.32761295082448727, "learning_rate": 8.653521236127023e-06, "loss": 0.5537, "step": 4090 }, { "epoch": 0.6718535093301583, "grad_norm": 0.45024104103317986, "learning_rate": 8.653340758402508e-06, "loss": 0.5146, "step": 4091 }, { "epoch": 0.672017736538501, "grad_norm": 0.3375009611373258, "learning_rate": 8.653160235568622e-06, "loss": 0.515, "step": 4092 }, { "epoch": 0.6721819637468438, "grad_norm": 0.30487466834246935, "learning_rate": 8.652979667627333e-06, "loss": 0.507, "step": 4093 }, { "epoch": 0.6723461909551866, "grad_norm": 0.3390407856209974, "learning_rate": 8.6527990545806e-06, "loss": 0.5235, "step": 4094 }, { "epoch": 0.6725104181635292, "grad_norm": 0.44647654142082505, "learning_rate": 8.65261839643038e-06, "loss": 0.5302, "step": 4095 }, { "epoch": 0.6726746453718719, "grad_norm": 0.4409501511793635, "learning_rate": 8.65243769317864e-06, "loss": 0.5076, "step": 4096 }, { "epoch": 0.6728388725802147, "grad_norm": 0.3545266642369537, "learning_rate": 8.652256944827341e-06, "loss": 0.5139, "step": 4097 }, { "epoch": 0.6730030997885574, "grad_norm": 0.3919734598018783, "learning_rate": 8.652076151378446e-06, "loss": 0.5267, "step": 4098 }, { "epoch": 0.6731673269969002, "grad_norm": 0.38540276913674354, "learning_rate": 8.65189531283392e-06, "loss": 0.5229, "step": 4099 }, { "epoch": 0.6733315542052429, "grad_norm": 0.2910524183414479, "learning_rate": 8.651714429195725e-06, "loss": 0.542, "step": 4100 }, { "epoch": 0.6734957814135857, "grad_norm": 0.43159118996051093, "learning_rate": 8.651533500465828e-06, "loss": 0.5326, "step": 4101 }, { "epoch": 0.6736600086219284, "grad_norm": 0.33738359200446183, "learning_rate": 8.651352526646191e-06, "loss": 0.5403, "step": 4102 }, { "epoch": 0.6738242358302712, "grad_norm": 0.3315680073734393, "learning_rate": 8.651171507738783e-06, "loss": 0.516, "step": 4103 }, { "epoch": 0.6739884630386139, "grad_norm": 0.3589770475880881, "learning_rate": 8.650990443745567e-06, "loss": 0.5414, "step": 4104 }, { "epoch": 0.6741526902469567, "grad_norm": 0.35588571798248414, "learning_rate": 8.65080933466851e-06, "loss": 0.5215, "step": 4105 }, { "epoch": 0.6743169174552994, "grad_norm": 0.3033052472495628, "learning_rate": 8.65062818050958e-06, "loss": 0.5435, "step": 4106 }, { "epoch": 0.6744811446636422, "grad_norm": 0.6316488676677522, "learning_rate": 8.650446981270744e-06, "loss": 0.5224, "step": 4107 }, { "epoch": 0.6746453718719849, "grad_norm": 0.3855301938414688, "learning_rate": 8.650265736953972e-06, "loss": 0.54, "step": 4108 }, { "epoch": 0.6748095990803277, "grad_norm": 0.32885699707547555, "learning_rate": 8.65008444756123e-06, "loss": 0.5318, "step": 4109 }, { "epoch": 0.6749738262886704, "grad_norm": 0.35338886666359093, "learning_rate": 8.649903113094487e-06, "loss": 0.528, "step": 4110 }, { "epoch": 0.6751380534970132, "grad_norm": 0.36097457735813854, "learning_rate": 8.649721733555715e-06, "loss": 0.5244, "step": 4111 }, { "epoch": 0.6753022807053558, "grad_norm": 0.31060975134752294, "learning_rate": 8.64954030894688e-06, "loss": 0.5215, "step": 4112 }, { "epoch": 0.6754665079136986, "grad_norm": 0.3011457205831512, "learning_rate": 8.649358839269955e-06, "loss": 0.508, "step": 4113 }, { "epoch": 0.6756307351220413, "grad_norm": 0.3418984691313553, "learning_rate": 8.649177324526913e-06, "loss": 0.5235, "step": 4114 }, { "epoch": 0.675794962330384, "grad_norm": 0.43582773294360444, "learning_rate": 8.64899576471972e-06, "loss": 0.5541, "step": 4115 }, { "epoch": 0.6759591895387268, "grad_norm": 0.35149410264601055, "learning_rate": 8.648814159850354e-06, "loss": 0.5347, "step": 4116 }, { "epoch": 0.6761234167470695, "grad_norm": 0.368562017515984, "learning_rate": 8.648632509920781e-06, "loss": 0.542, "step": 4117 }, { "epoch": 0.6762876439554123, "grad_norm": 0.323205499569439, "learning_rate": 8.64845081493298e-06, "loss": 0.5271, "step": 4118 }, { "epoch": 0.676451871163755, "grad_norm": 0.412816416007039, "learning_rate": 8.64826907488892e-06, "loss": 0.5231, "step": 4119 }, { "epoch": 0.6766160983720978, "grad_norm": 0.2996965923663482, "learning_rate": 8.648087289790578e-06, "loss": 0.5389, "step": 4120 }, { "epoch": 0.6767803255804405, "grad_norm": 0.3222559504790764, "learning_rate": 8.647905459639926e-06, "loss": 0.5224, "step": 4121 }, { "epoch": 0.6769445527887833, "grad_norm": 0.30650977456731987, "learning_rate": 8.647723584438939e-06, "loss": 0.5262, "step": 4122 }, { "epoch": 0.677108779997126, "grad_norm": 0.5112994720395476, "learning_rate": 8.647541664189593e-06, "loss": 0.5178, "step": 4123 }, { "epoch": 0.6772730072054688, "grad_norm": 0.3522817205412864, "learning_rate": 8.647359698893867e-06, "loss": 0.5199, "step": 4124 }, { "epoch": 0.6774372344138115, "grad_norm": 0.3024109450912323, "learning_rate": 8.647177688553731e-06, "loss": 0.5135, "step": 4125 }, { "epoch": 0.6776014616221543, "grad_norm": 0.3174797317269863, "learning_rate": 8.646995633171165e-06, "loss": 0.5123, "step": 4126 }, { "epoch": 0.677765688830497, "grad_norm": 0.3490783488508878, "learning_rate": 8.646813532748147e-06, "loss": 0.5354, "step": 4127 }, { "epoch": 0.6779299160388398, "grad_norm": 0.44894277917191494, "learning_rate": 8.646631387286655e-06, "loss": 0.5048, "step": 4128 }, { "epoch": 0.6780941432471824, "grad_norm": 1.0038385015849884, "learning_rate": 8.646449196788664e-06, "loss": 0.5346, "step": 4129 }, { "epoch": 0.6782583704555252, "grad_norm": 0.28209868548502737, "learning_rate": 8.646266961256158e-06, "loss": 0.5092, "step": 4130 }, { "epoch": 0.6784225976638679, "grad_norm": 0.32056520106324327, "learning_rate": 8.646084680691112e-06, "loss": 0.521, "step": 4131 }, { "epoch": 0.6785868248722107, "grad_norm": 0.30892792961083615, "learning_rate": 8.645902355095507e-06, "loss": 0.5233, "step": 4132 }, { "epoch": 0.6787510520805534, "grad_norm": 0.3004522844978485, "learning_rate": 8.645719984471325e-06, "loss": 0.5519, "step": 4133 }, { "epoch": 0.6789152792888962, "grad_norm": 0.37531256935974994, "learning_rate": 8.645537568820544e-06, "loss": 0.522, "step": 4134 }, { "epoch": 0.6790795064972389, "grad_norm": 0.4463922435591353, "learning_rate": 8.645355108145146e-06, "loss": 0.5076, "step": 4135 }, { "epoch": 0.6792437337055817, "grad_norm": 0.2871378370372213, "learning_rate": 8.645172602447113e-06, "loss": 0.5084, "step": 4136 }, { "epoch": 0.6794079609139244, "grad_norm": 0.39296031580485435, "learning_rate": 8.644990051728428e-06, "loss": 0.5329, "step": 4137 }, { "epoch": 0.6795721881222672, "grad_norm": 0.3206991930440626, "learning_rate": 8.644807455991071e-06, "loss": 0.5254, "step": 4138 }, { "epoch": 0.6797364153306099, "grad_norm": 0.312091021839642, "learning_rate": 8.644624815237029e-06, "loss": 0.5262, "step": 4139 }, { "epoch": 0.6799006425389527, "grad_norm": 0.2924739456006572, "learning_rate": 8.644442129468284e-06, "loss": 0.5002, "step": 4140 }, { "epoch": 0.6800648697472954, "grad_norm": 0.3458651635031552, "learning_rate": 8.64425939868682e-06, "loss": 0.5326, "step": 4141 }, { "epoch": 0.6802290969556382, "grad_norm": 0.6053301737235616, "learning_rate": 8.644076622894621e-06, "loss": 0.5496, "step": 4142 }, { "epoch": 0.6803933241639809, "grad_norm": 0.3242956814925467, "learning_rate": 8.643893802093671e-06, "loss": 0.5138, "step": 4143 }, { "epoch": 0.6805575513723237, "grad_norm": 0.2991928853840647, "learning_rate": 8.64371093628596e-06, "loss": 0.5154, "step": 4144 }, { "epoch": 0.6807217785806664, "grad_norm": 0.2870604350100677, "learning_rate": 8.64352802547347e-06, "loss": 0.5152, "step": 4145 }, { "epoch": 0.680886005789009, "grad_norm": 0.2885442702491027, "learning_rate": 8.64334506965819e-06, "loss": 0.5375, "step": 4146 }, { "epoch": 0.6810502329973518, "grad_norm": 0.36928363961336547, "learning_rate": 8.643162068842105e-06, "loss": 0.5292, "step": 4147 }, { "epoch": 0.6812144602056945, "grad_norm": 0.3343679239764359, "learning_rate": 8.642979023027203e-06, "loss": 0.5281, "step": 4148 }, { "epoch": 0.6813786874140373, "grad_norm": 0.3116320481253324, "learning_rate": 8.642795932215472e-06, "loss": 0.5394, "step": 4149 }, { "epoch": 0.68154291462238, "grad_norm": 0.2847083185415691, "learning_rate": 8.642612796408904e-06, "loss": 0.5315, "step": 4150 }, { "epoch": 0.6817071418307228, "grad_norm": 0.3789055568649402, "learning_rate": 8.642429615609483e-06, "loss": 0.5175, "step": 4151 }, { "epoch": 0.6818713690390655, "grad_norm": 0.35801662699820525, "learning_rate": 8.642246389819202e-06, "loss": 0.5237, "step": 4152 }, { "epoch": 0.6820355962474083, "grad_norm": 0.2999952779090304, "learning_rate": 8.642063119040049e-06, "loss": 0.5155, "step": 4153 }, { "epoch": 0.682199823455751, "grad_norm": 0.4117495657601018, "learning_rate": 8.641879803274016e-06, "loss": 0.5248, "step": 4154 }, { "epoch": 0.6823640506640938, "grad_norm": 0.38786034920878637, "learning_rate": 8.641696442523093e-06, "loss": 0.5205, "step": 4155 }, { "epoch": 0.6825282778724365, "grad_norm": 0.3532378510900068, "learning_rate": 8.641513036789273e-06, "loss": 0.5291, "step": 4156 }, { "epoch": 0.6826925050807793, "grad_norm": 0.2924513736687471, "learning_rate": 8.641329586074545e-06, "loss": 0.517, "step": 4157 }, { "epoch": 0.682856732289122, "grad_norm": 0.3274374660126311, "learning_rate": 8.641146090380903e-06, "loss": 0.5282, "step": 4158 }, { "epoch": 0.6830209594974648, "grad_norm": 0.35013113932458, "learning_rate": 8.64096254971034e-06, "loss": 0.5332, "step": 4159 }, { "epoch": 0.6831851867058075, "grad_norm": 0.3457641571260403, "learning_rate": 8.640778964064852e-06, "loss": 0.5289, "step": 4160 }, { "epoch": 0.6833494139141503, "grad_norm": 0.27620805752207256, "learning_rate": 8.640595333446427e-06, "loss": 0.5458, "step": 4161 }, { "epoch": 0.683513641122493, "grad_norm": 0.33925143437931815, "learning_rate": 8.640411657857066e-06, "loss": 0.5537, "step": 4162 }, { "epoch": 0.6836778683308357, "grad_norm": 0.35668433810410605, "learning_rate": 8.64022793729876e-06, "loss": 0.5283, "step": 4163 }, { "epoch": 0.6838420955391784, "grad_norm": 0.28380844419386303, "learning_rate": 8.640044171773503e-06, "loss": 0.5207, "step": 4164 }, { "epoch": 0.6840063227475212, "grad_norm": 0.36428431243227716, "learning_rate": 8.639860361283295e-06, "loss": 0.5371, "step": 4165 }, { "epoch": 0.6841705499558639, "grad_norm": 0.3638178524463825, "learning_rate": 8.63967650583013e-06, "loss": 0.5396, "step": 4166 }, { "epoch": 0.6843347771642067, "grad_norm": 0.34929302756951347, "learning_rate": 8.639492605416005e-06, "loss": 0.5368, "step": 4167 }, { "epoch": 0.6844990043725494, "grad_norm": 0.3769355480143036, "learning_rate": 8.639308660042918e-06, "loss": 0.5357, "step": 4168 }, { "epoch": 0.6846632315808922, "grad_norm": 0.2722486789490862, "learning_rate": 8.639124669712867e-06, "loss": 0.5201, "step": 4169 }, { "epoch": 0.6848274587892349, "grad_norm": 0.30186430431604144, "learning_rate": 8.63894063442785e-06, "loss": 0.5302, "step": 4170 }, { "epoch": 0.6849916859975776, "grad_norm": 0.2866787935647283, "learning_rate": 8.638756554189863e-06, "loss": 0.501, "step": 4171 }, { "epoch": 0.6851559132059204, "grad_norm": 0.38690681773281876, "learning_rate": 8.63857242900091e-06, "loss": 0.526, "step": 4172 }, { "epoch": 0.6853201404142631, "grad_norm": 0.3231383434901005, "learning_rate": 8.638388258862987e-06, "loss": 0.5325, "step": 4173 }, { "epoch": 0.6854843676226059, "grad_norm": 0.43681580478235355, "learning_rate": 8.638204043778097e-06, "loss": 0.5387, "step": 4174 }, { "epoch": 0.6856485948309486, "grad_norm": 0.3668910158632165, "learning_rate": 8.63801978374824e-06, "loss": 0.5356, "step": 4175 }, { "epoch": 0.6858128220392914, "grad_norm": 0.3455507180236111, "learning_rate": 8.637835478775417e-06, "loss": 0.5044, "step": 4176 }, { "epoch": 0.6859770492476341, "grad_norm": 0.3556586108295262, "learning_rate": 8.637651128861629e-06, "loss": 0.5325, "step": 4177 }, { "epoch": 0.6861412764559769, "grad_norm": 0.29839684395296556, "learning_rate": 8.637466734008879e-06, "loss": 0.5468, "step": 4178 }, { "epoch": 0.6863055036643196, "grad_norm": 0.3371453476125466, "learning_rate": 8.637282294219168e-06, "loss": 0.508, "step": 4179 }, { "epoch": 0.6864697308726623, "grad_norm": 0.48442277629443187, "learning_rate": 8.637097809494504e-06, "loss": 0.5469, "step": 4180 }, { "epoch": 0.686633958081005, "grad_norm": 0.3501409566376713, "learning_rate": 8.636913279836884e-06, "loss": 0.5394, "step": 4181 }, { "epoch": 0.6867981852893478, "grad_norm": 0.3153330952861787, "learning_rate": 8.636728705248319e-06, "loss": 0.5287, "step": 4182 }, { "epoch": 0.6869624124976905, "grad_norm": 0.30647381190495343, "learning_rate": 8.636544085730808e-06, "loss": 0.509, "step": 4183 }, { "epoch": 0.6871266397060333, "grad_norm": 0.32085086991778444, "learning_rate": 8.636359421286358e-06, "loss": 0.5338, "step": 4184 }, { "epoch": 0.687290866914376, "grad_norm": 0.29831140561366876, "learning_rate": 8.636174711916977e-06, "loss": 0.5275, "step": 4185 }, { "epoch": 0.6874550941227188, "grad_norm": 0.3066929012205568, "learning_rate": 8.635989957624669e-06, "loss": 0.5218, "step": 4186 }, { "epoch": 0.6876193213310615, "grad_norm": 0.33259903521685663, "learning_rate": 8.635805158411438e-06, "loss": 0.5006, "step": 4187 }, { "epoch": 0.6877835485394043, "grad_norm": 0.2893515986506582, "learning_rate": 8.635620314279297e-06, "loss": 0.5177, "step": 4188 }, { "epoch": 0.687947775747747, "grad_norm": 0.33727758187688006, "learning_rate": 8.63543542523025e-06, "loss": 0.5136, "step": 4189 }, { "epoch": 0.6881120029560898, "grad_norm": 0.31013060657131736, "learning_rate": 8.635250491266304e-06, "loss": 0.5238, "step": 4190 }, { "epoch": 0.6882762301644325, "grad_norm": 0.38573616889290285, "learning_rate": 8.63506551238947e-06, "loss": 0.5505, "step": 4191 }, { "epoch": 0.6884404573727753, "grad_norm": 0.313359998521622, "learning_rate": 8.634880488601756e-06, "loss": 0.5277, "step": 4192 }, { "epoch": 0.688604684581118, "grad_norm": 0.349773895723136, "learning_rate": 8.634695419905173e-06, "loss": 0.5349, "step": 4193 }, { "epoch": 0.6887689117894608, "grad_norm": 0.44196567700319817, "learning_rate": 8.634510306301728e-06, "loss": 0.5137, "step": 4194 }, { "epoch": 0.6889331389978035, "grad_norm": 0.4226507437950207, "learning_rate": 8.634325147793434e-06, "loss": 0.5528, "step": 4195 }, { "epoch": 0.6890973662061463, "grad_norm": 0.293215751832506, "learning_rate": 8.6341399443823e-06, "loss": 0.5337, "step": 4196 }, { "epoch": 0.6892615934144889, "grad_norm": 0.28090267043332035, "learning_rate": 8.63395469607034e-06, "loss": 0.5268, "step": 4197 }, { "epoch": 0.6894258206228316, "grad_norm": 0.35600281186827526, "learning_rate": 8.633769402859566e-06, "loss": 0.5323, "step": 4198 }, { "epoch": 0.6895900478311744, "grad_norm": 0.34343124065854136, "learning_rate": 8.633584064751989e-06, "loss": 0.5365, "step": 4199 }, { "epoch": 0.6897542750395171, "grad_norm": 0.32470237327045537, "learning_rate": 8.63339868174962e-06, "loss": 0.5258, "step": 4200 }, { "epoch": 0.6899185022478599, "grad_norm": 0.3868201193984014, "learning_rate": 8.633213253854476e-06, "loss": 0.5275, "step": 4201 }, { "epoch": 0.6900827294562026, "grad_norm": 0.4445413871679448, "learning_rate": 8.63302778106857e-06, "loss": 0.5337, "step": 4202 }, { "epoch": 0.6902469566645454, "grad_norm": 0.48265095727302576, "learning_rate": 8.632842263393915e-06, "loss": 0.523, "step": 4203 }, { "epoch": 0.6904111838728881, "grad_norm": 0.47827943743664814, "learning_rate": 8.632656700832527e-06, "loss": 0.5416, "step": 4204 }, { "epoch": 0.6905754110812309, "grad_norm": 0.31881673938269417, "learning_rate": 8.63247109338642e-06, "loss": 0.5456, "step": 4205 }, { "epoch": 0.6907396382895736, "grad_norm": 0.36208368158027626, "learning_rate": 8.632285441057614e-06, "loss": 0.5353, "step": 4206 }, { "epoch": 0.6909038654979164, "grad_norm": 0.3044211762704708, "learning_rate": 8.632099743848121e-06, "loss": 0.5347, "step": 4207 }, { "epoch": 0.6910680927062591, "grad_norm": 0.43847455160193893, "learning_rate": 8.631914001759958e-06, "loss": 0.5257, "step": 4208 }, { "epoch": 0.6912323199146019, "grad_norm": 0.3791569545248908, "learning_rate": 8.631728214795145e-06, "loss": 0.5228, "step": 4209 }, { "epoch": 0.6913965471229446, "grad_norm": 0.31693174954761305, "learning_rate": 8.6315423829557e-06, "loss": 0.5511, "step": 4210 }, { "epoch": 0.6915607743312874, "grad_norm": 0.30018018314139316, "learning_rate": 8.631356506243637e-06, "loss": 0.5272, "step": 4211 }, { "epoch": 0.6917250015396301, "grad_norm": 0.35174861259968854, "learning_rate": 8.63117058466098e-06, "loss": 0.5475, "step": 4212 }, { "epoch": 0.6918892287479729, "grad_norm": 0.307674247694834, "learning_rate": 8.630984618209743e-06, "loss": 0.4938, "step": 4213 }, { "epoch": 0.6920534559563155, "grad_norm": 0.36394072159220425, "learning_rate": 8.630798606891951e-06, "loss": 0.5274, "step": 4214 }, { "epoch": 0.6922176831646583, "grad_norm": 0.33159368131380446, "learning_rate": 8.630612550709622e-06, "loss": 0.5469, "step": 4215 }, { "epoch": 0.692381910373001, "grad_norm": 0.3251253917776796, "learning_rate": 8.630426449664776e-06, "loss": 0.547, "step": 4216 }, { "epoch": 0.6925461375813438, "grad_norm": 0.3349215700548793, "learning_rate": 8.630240303759436e-06, "loss": 0.5146, "step": 4217 }, { "epoch": 0.6927103647896865, "grad_norm": 0.320126842055859, "learning_rate": 8.630054112995621e-06, "loss": 0.5317, "step": 4218 }, { "epoch": 0.6928745919980293, "grad_norm": 0.3873589155863503, "learning_rate": 8.629867877375356e-06, "loss": 0.5117, "step": 4219 }, { "epoch": 0.693038819206372, "grad_norm": 0.3407313212235653, "learning_rate": 8.629681596900663e-06, "loss": 0.5329, "step": 4220 }, { "epoch": 0.6932030464147148, "grad_norm": 0.295578297140357, "learning_rate": 8.629495271573565e-06, "loss": 0.5033, "step": 4221 }, { "epoch": 0.6933672736230575, "grad_norm": 0.42228340319703683, "learning_rate": 8.629308901396083e-06, "loss": 0.4983, "step": 4222 }, { "epoch": 0.6935315008314002, "grad_norm": 0.4463823743156875, "learning_rate": 8.629122486370245e-06, "loss": 0.513, "step": 4223 }, { "epoch": 0.693695728039743, "grad_norm": 0.31355091465461943, "learning_rate": 8.628936026498075e-06, "loss": 0.5326, "step": 4224 }, { "epoch": 0.6938599552480857, "grad_norm": 0.33679477663537927, "learning_rate": 8.628749521781598e-06, "loss": 0.5448, "step": 4225 }, { "epoch": 0.6940241824564285, "grad_norm": 0.35080468703776435, "learning_rate": 8.628562972222838e-06, "loss": 0.4956, "step": 4226 }, { "epoch": 0.6941884096647712, "grad_norm": 0.29616783212095005, "learning_rate": 8.628376377823823e-06, "loss": 0.5487, "step": 4227 }, { "epoch": 0.694352636873114, "grad_norm": 0.3512870033644539, "learning_rate": 8.628189738586577e-06, "loss": 0.5192, "step": 4228 }, { "epoch": 0.6945168640814567, "grad_norm": 0.7499849953119065, "learning_rate": 8.628003054513129e-06, "loss": 0.5412, "step": 4229 }, { "epoch": 0.6946810912897995, "grad_norm": 0.34197986132326763, "learning_rate": 8.627816325605509e-06, "loss": 0.541, "step": 4230 }, { "epoch": 0.6948453184981421, "grad_norm": 0.35850041319838344, "learning_rate": 8.627629551865741e-06, "loss": 0.5424, "step": 4231 }, { "epoch": 0.6950095457064849, "grad_norm": 0.38240384165529273, "learning_rate": 8.627442733295855e-06, "loss": 0.5417, "step": 4232 }, { "epoch": 0.6951737729148276, "grad_norm": 0.3236064766935071, "learning_rate": 8.62725586989788e-06, "loss": 0.5352, "step": 4233 }, { "epoch": 0.6953380001231704, "grad_norm": 0.3556556217013173, "learning_rate": 8.627068961673844e-06, "loss": 0.5187, "step": 4234 }, { "epoch": 0.6955022273315131, "grad_norm": 0.3423798038428814, "learning_rate": 8.62688200862578e-06, "loss": 0.5366, "step": 4235 }, { "epoch": 0.6956664545398559, "grad_norm": 0.28350851553753786, "learning_rate": 8.626695010755719e-06, "loss": 0.522, "step": 4236 }, { "epoch": 0.6958306817481986, "grad_norm": 0.3257472077657691, "learning_rate": 8.626507968065687e-06, "loss": 0.5215, "step": 4237 }, { "epoch": 0.6959949089565414, "grad_norm": 0.31021167794491405, "learning_rate": 8.62632088055772e-06, "loss": 0.5088, "step": 4238 }, { "epoch": 0.6961591361648841, "grad_norm": 0.3166485566164785, "learning_rate": 8.626133748233847e-06, "loss": 0.5213, "step": 4239 }, { "epoch": 0.6963233633732269, "grad_norm": 0.29145841027820457, "learning_rate": 8.625946571096106e-06, "loss": 0.4946, "step": 4240 }, { "epoch": 0.6964875905815696, "grad_norm": 0.319921850998031, "learning_rate": 8.625759349146521e-06, "loss": 0.5386, "step": 4241 }, { "epoch": 0.6966518177899124, "grad_norm": 0.308972089997359, "learning_rate": 8.625572082387132e-06, "loss": 0.5413, "step": 4242 }, { "epoch": 0.6968160449982551, "grad_norm": 0.2835317133915667, "learning_rate": 8.625384770819972e-06, "loss": 0.534, "step": 4243 }, { "epoch": 0.6969802722065979, "grad_norm": 0.4233385444079988, "learning_rate": 8.625197414447073e-06, "loss": 0.5229, "step": 4244 }, { "epoch": 0.6971444994149406, "grad_norm": 0.4137599078113359, "learning_rate": 8.625010013270474e-06, "loss": 0.4999, "step": 4245 }, { "epoch": 0.6973087266232834, "grad_norm": 0.333057995130589, "learning_rate": 8.624822567292205e-06, "loss": 0.5021, "step": 4246 }, { "epoch": 0.6974729538316261, "grad_norm": 0.3373896893862306, "learning_rate": 8.624635076514307e-06, "loss": 0.544, "step": 4247 }, { "epoch": 0.6976371810399687, "grad_norm": 0.3598055058817781, "learning_rate": 8.624447540938813e-06, "loss": 0.5192, "step": 4248 }, { "epoch": 0.6978014082483115, "grad_norm": 0.33727018756814525, "learning_rate": 8.62425996056776e-06, "loss": 0.5226, "step": 4249 }, { "epoch": 0.6979656354566542, "grad_norm": 0.30647169364146576, "learning_rate": 8.624072335403188e-06, "loss": 0.5102, "step": 4250 }, { "epoch": 0.698129862664997, "grad_norm": 0.2931637111302099, "learning_rate": 8.62388466544713e-06, "loss": 0.5347, "step": 4251 }, { "epoch": 0.6982940898733397, "grad_norm": 0.41161972319441537, "learning_rate": 8.623696950701629e-06, "loss": 0.498, "step": 4252 }, { "epoch": 0.6984583170816825, "grad_norm": 0.34023286853888174, "learning_rate": 8.623509191168722e-06, "loss": 0.5267, "step": 4253 }, { "epoch": 0.6986225442900252, "grad_norm": 0.4900155873432433, "learning_rate": 8.623321386850449e-06, "loss": 0.5373, "step": 4254 }, { "epoch": 0.698786771498368, "grad_norm": 0.39734723306453845, "learning_rate": 8.623133537748847e-06, "loss": 0.5201, "step": 4255 }, { "epoch": 0.6989509987067107, "grad_norm": 0.3420535819762841, "learning_rate": 8.622945643865959e-06, "loss": 0.535, "step": 4256 }, { "epoch": 0.6991152259150535, "grad_norm": 0.34538252628053706, "learning_rate": 8.622757705203825e-06, "loss": 0.5332, "step": 4257 }, { "epoch": 0.6992794531233962, "grad_norm": 0.30859870827439856, "learning_rate": 8.622569721764487e-06, "loss": 0.5099, "step": 4258 }, { "epoch": 0.699443680331739, "grad_norm": 0.3291857838753656, "learning_rate": 8.622381693549985e-06, "loss": 0.5341, "step": 4259 }, { "epoch": 0.6996079075400817, "grad_norm": 0.36292305907656036, "learning_rate": 8.62219362056236e-06, "loss": 0.5301, "step": 4260 }, { "epoch": 0.6997721347484245, "grad_norm": 0.3350119610981238, "learning_rate": 8.622005502803659e-06, "loss": 0.5331, "step": 4261 }, { "epoch": 0.6999363619567672, "grad_norm": 0.35928706753215134, "learning_rate": 8.621817340275921e-06, "loss": 0.5262, "step": 4262 }, { "epoch": 0.70010058916511, "grad_norm": 0.29995114712883486, "learning_rate": 8.621629132981194e-06, "loss": 0.5107, "step": 4263 }, { "epoch": 0.7002648163734527, "grad_norm": 0.32459709306920986, "learning_rate": 8.621440880921519e-06, "loss": 0.5436, "step": 4264 }, { "epoch": 0.7004290435817954, "grad_norm": 0.27867869217003954, "learning_rate": 8.621252584098938e-06, "loss": 0.5332, "step": 4265 }, { "epoch": 0.7005932707901381, "grad_norm": 0.34422747615332705, "learning_rate": 8.621064242515503e-06, "loss": 0.5191, "step": 4266 }, { "epoch": 0.7007574979984809, "grad_norm": 0.29331677428471403, "learning_rate": 8.620875856173253e-06, "loss": 0.5079, "step": 4267 }, { "epoch": 0.7009217252068236, "grad_norm": 0.3121609634949846, "learning_rate": 8.620687425074238e-06, "loss": 0.5136, "step": 4268 }, { "epoch": 0.7010859524151664, "grad_norm": 0.3838693596047789, "learning_rate": 8.620498949220502e-06, "loss": 0.5175, "step": 4269 }, { "epoch": 0.7012501796235091, "grad_norm": 0.3399582939068176, "learning_rate": 8.620310428614094e-06, "loss": 0.5303, "step": 4270 }, { "epoch": 0.7014144068318519, "grad_norm": 0.3882307518798506, "learning_rate": 8.620121863257062e-06, "loss": 0.5358, "step": 4271 }, { "epoch": 0.7015786340401946, "grad_norm": 0.327844178142628, "learning_rate": 8.619933253151452e-06, "loss": 0.5179, "step": 4272 }, { "epoch": 0.7017428612485374, "grad_norm": 0.3059441881333392, "learning_rate": 8.619744598299315e-06, "loss": 0.5003, "step": 4273 }, { "epoch": 0.7019070884568801, "grad_norm": 0.33289088821210483, "learning_rate": 8.619555898702695e-06, "loss": 0.5203, "step": 4274 }, { "epoch": 0.7020713156652229, "grad_norm": 0.40764636675339916, "learning_rate": 8.619367154363647e-06, "loss": 0.5287, "step": 4275 }, { "epoch": 0.7022355428735656, "grad_norm": 0.32231801015454054, "learning_rate": 8.61917836528422e-06, "loss": 0.5228, "step": 4276 }, { "epoch": 0.7023997700819083, "grad_norm": 0.36811692912587884, "learning_rate": 8.618989531466462e-06, "loss": 0.5104, "step": 4277 }, { "epoch": 0.7025639972902511, "grad_norm": 0.3432330040499813, "learning_rate": 8.618800652912425e-06, "loss": 0.5105, "step": 4278 }, { "epoch": 0.7027282244985938, "grad_norm": 0.3163500424266403, "learning_rate": 8.618611729624161e-06, "loss": 0.536, "step": 4279 }, { "epoch": 0.7028924517069366, "grad_norm": 0.3334439933008529, "learning_rate": 8.61842276160372e-06, "loss": 0.5218, "step": 4280 }, { "epoch": 0.7030566789152793, "grad_norm": 0.3651496955106728, "learning_rate": 8.618233748853159e-06, "loss": 0.5291, "step": 4281 }, { "epoch": 0.703220906123622, "grad_norm": 0.3909489697860476, "learning_rate": 8.618044691374524e-06, "loss": 0.5138, "step": 4282 }, { "epoch": 0.7033851333319647, "grad_norm": 0.3432299333346993, "learning_rate": 8.617855589169873e-06, "loss": 0.5296, "step": 4283 }, { "epoch": 0.7035493605403075, "grad_norm": 0.32106670987419533, "learning_rate": 8.617666442241261e-06, "loss": 0.5538, "step": 4284 }, { "epoch": 0.7037135877486502, "grad_norm": 0.278987267758817, "learning_rate": 8.617477250590737e-06, "loss": 0.5351, "step": 4285 }, { "epoch": 0.703877814956993, "grad_norm": 0.30910724383387306, "learning_rate": 8.617288014220362e-06, "loss": 0.5326, "step": 4286 }, { "epoch": 0.7040420421653357, "grad_norm": 0.323362918561536, "learning_rate": 8.617098733132187e-06, "loss": 0.5348, "step": 4287 }, { "epoch": 0.7042062693736785, "grad_norm": 0.3078079738940741, "learning_rate": 8.616909407328268e-06, "loss": 0.5458, "step": 4288 }, { "epoch": 0.7043704965820212, "grad_norm": 0.2844380907240079, "learning_rate": 8.616720036810664e-06, "loss": 0.5177, "step": 4289 }, { "epoch": 0.704534723790364, "grad_norm": 0.32375350229388816, "learning_rate": 8.61653062158143e-06, "loss": 0.5277, "step": 4290 }, { "epoch": 0.7046989509987067, "grad_norm": 0.3375882355708296, "learning_rate": 8.616341161642622e-06, "loss": 0.5172, "step": 4291 }, { "epoch": 0.7048631782070495, "grad_norm": 0.2865337715238847, "learning_rate": 8.616151656996301e-06, "loss": 0.5367, "step": 4292 }, { "epoch": 0.7050274054153922, "grad_norm": 0.3144427358498577, "learning_rate": 8.615962107644523e-06, "loss": 0.4955, "step": 4293 }, { "epoch": 0.705191632623735, "grad_norm": 0.3656931892003649, "learning_rate": 8.615772513589345e-06, "loss": 0.5221, "step": 4294 }, { "epoch": 0.7053558598320777, "grad_norm": 0.2958582370426801, "learning_rate": 8.61558287483283e-06, "loss": 0.5537, "step": 4295 }, { "epoch": 0.7055200870404205, "grad_norm": 0.3768598902611741, "learning_rate": 8.615393191377035e-06, "loss": 0.5235, "step": 4296 }, { "epoch": 0.7056843142487632, "grad_norm": 0.3347727814313395, "learning_rate": 8.615203463224021e-06, "loss": 0.5174, "step": 4297 }, { "epoch": 0.705848541457106, "grad_norm": 0.38916588034781613, "learning_rate": 8.615013690375849e-06, "loss": 0.539, "step": 4298 }, { "epoch": 0.7060127686654486, "grad_norm": 0.27779637428046317, "learning_rate": 8.61482387283458e-06, "loss": 0.5066, "step": 4299 }, { "epoch": 0.7061769958737913, "grad_norm": 0.5400534178669388, "learning_rate": 8.614634010602274e-06, "loss": 0.508, "step": 4300 }, { "epoch": 0.7063412230821341, "grad_norm": 0.37039212228978896, "learning_rate": 8.614444103680995e-06, "loss": 0.5402, "step": 4301 }, { "epoch": 0.7065054502904768, "grad_norm": 0.279075184659078, "learning_rate": 8.614254152072805e-06, "loss": 0.5422, "step": 4302 }, { "epoch": 0.7066696774988196, "grad_norm": 0.32756207172905416, "learning_rate": 8.614064155779767e-06, "loss": 0.5286, "step": 4303 }, { "epoch": 0.7068339047071623, "grad_norm": 0.34922911866740997, "learning_rate": 8.613874114803945e-06, "loss": 0.5264, "step": 4304 }, { "epoch": 0.7069981319155051, "grad_norm": 0.31257689692201895, "learning_rate": 8.613684029147401e-06, "loss": 0.5178, "step": 4305 }, { "epoch": 0.7071623591238478, "grad_norm": 0.30307569125011224, "learning_rate": 8.613493898812202e-06, "loss": 0.5124, "step": 4306 }, { "epoch": 0.7073265863321906, "grad_norm": 0.36069449916053775, "learning_rate": 8.613303723800413e-06, "loss": 0.5282, "step": 4307 }, { "epoch": 0.7074908135405333, "grad_norm": 0.3092916348181574, "learning_rate": 8.6131135041141e-06, "loss": 0.5305, "step": 4308 }, { "epoch": 0.7076550407488761, "grad_norm": 0.40891948696883457, "learning_rate": 8.612923239755325e-06, "loss": 0.5214, "step": 4309 }, { "epoch": 0.7078192679572188, "grad_norm": 0.31616164432034843, "learning_rate": 8.612732930726157e-06, "loss": 0.5194, "step": 4310 }, { "epoch": 0.7079834951655616, "grad_norm": 0.3000505637839565, "learning_rate": 8.612542577028663e-06, "loss": 0.524, "step": 4311 }, { "epoch": 0.7081477223739043, "grad_norm": 0.3129782682534076, "learning_rate": 8.612352178664911e-06, "loss": 0.5238, "step": 4312 }, { "epoch": 0.7083119495822471, "grad_norm": 0.3206955929245123, "learning_rate": 8.612161735636968e-06, "loss": 0.5261, "step": 4313 }, { "epoch": 0.7084761767905898, "grad_norm": 0.37716743758695925, "learning_rate": 8.611971247946904e-06, "loss": 0.4876, "step": 4314 }, { "epoch": 0.7086404039989326, "grad_norm": 0.40831633759201236, "learning_rate": 8.611780715596786e-06, "loss": 0.5295, "step": 4315 }, { "epoch": 0.7088046312072752, "grad_norm": 0.304318562490442, "learning_rate": 8.611590138588685e-06, "loss": 0.5323, "step": 4316 }, { "epoch": 0.708968858415618, "grad_norm": 0.3975476450385827, "learning_rate": 8.611399516924666e-06, "loss": 0.5175, "step": 4317 }, { "epoch": 0.7091330856239607, "grad_norm": 0.30548294363985984, "learning_rate": 8.611208850606806e-06, "loss": 0.5389, "step": 4318 }, { "epoch": 0.7092973128323035, "grad_norm": 0.3777837973806069, "learning_rate": 8.611018139637175e-06, "loss": 0.5311, "step": 4319 }, { "epoch": 0.7094615400406462, "grad_norm": 0.2973347910181853, "learning_rate": 8.61082738401784e-06, "loss": 0.5402, "step": 4320 }, { "epoch": 0.709625767248989, "grad_norm": 0.36469127314551625, "learning_rate": 8.610636583750874e-06, "loss": 0.5209, "step": 4321 }, { "epoch": 0.7097899944573317, "grad_norm": 0.28293959836732613, "learning_rate": 8.610445738838352e-06, "loss": 0.5153, "step": 4322 }, { "epoch": 0.7099542216656745, "grad_norm": 0.42306390379784664, "learning_rate": 8.610254849282345e-06, "loss": 0.5531, "step": 4323 }, { "epoch": 0.7101184488740172, "grad_norm": 0.28628622993663794, "learning_rate": 8.610063915084926e-06, "loss": 0.5414, "step": 4324 }, { "epoch": 0.71028267608236, "grad_norm": 0.281324125785271, "learning_rate": 8.609872936248168e-06, "loss": 0.5226, "step": 4325 }, { "epoch": 0.7104469032907027, "grad_norm": 0.37477344881709673, "learning_rate": 8.609681912774149e-06, "loss": 0.542, "step": 4326 }, { "epoch": 0.7106111304990455, "grad_norm": 0.3206181721344887, "learning_rate": 8.609490844664938e-06, "loss": 0.5223, "step": 4327 }, { "epoch": 0.7107753577073882, "grad_norm": 0.3339242114148264, "learning_rate": 8.609299731922616e-06, "loss": 0.5106, "step": 4328 }, { "epoch": 0.710939584915731, "grad_norm": 0.2983358499194098, "learning_rate": 8.609108574549254e-06, "loss": 0.5397, "step": 4329 }, { "epoch": 0.7111038121240737, "grad_norm": 0.33380596620035463, "learning_rate": 8.608917372546931e-06, "loss": 0.5411, "step": 4330 }, { "epoch": 0.7112680393324164, "grad_norm": 0.2698264997063511, "learning_rate": 8.608726125917721e-06, "loss": 0.5073, "step": 4331 }, { "epoch": 0.7114322665407592, "grad_norm": 0.3947051740162191, "learning_rate": 8.608534834663705e-06, "loss": 0.5249, "step": 4332 }, { "epoch": 0.7115964937491018, "grad_norm": 0.2906445128721743, "learning_rate": 8.608343498786958e-06, "loss": 0.5403, "step": 4333 }, { "epoch": 0.7117607209574446, "grad_norm": 0.30410226624743664, "learning_rate": 8.60815211828956e-06, "loss": 0.5335, "step": 4334 }, { "epoch": 0.7119249481657873, "grad_norm": 0.28918039186839667, "learning_rate": 8.607960693173585e-06, "loss": 0.5053, "step": 4335 }, { "epoch": 0.7120891753741301, "grad_norm": 0.39784143895681123, "learning_rate": 8.607769223441118e-06, "loss": 0.5263, "step": 4336 }, { "epoch": 0.7122534025824728, "grad_norm": 0.3816380356849168, "learning_rate": 8.607577709094234e-06, "loss": 0.536, "step": 4337 }, { "epoch": 0.7124176297908156, "grad_norm": 0.48876799069675336, "learning_rate": 8.607386150135016e-06, "loss": 0.52, "step": 4338 }, { "epoch": 0.7125818569991583, "grad_norm": 0.30419187380004425, "learning_rate": 8.607194546565541e-06, "loss": 0.5172, "step": 4339 }, { "epoch": 0.7127460842075011, "grad_norm": 0.3046540779190067, "learning_rate": 8.607002898387894e-06, "loss": 0.5255, "step": 4340 }, { "epoch": 0.7129103114158438, "grad_norm": 0.3697888338261436, "learning_rate": 8.606811205604155e-06, "loss": 0.5025, "step": 4341 }, { "epoch": 0.7130745386241866, "grad_norm": 0.32176447793422863, "learning_rate": 8.606619468216403e-06, "loss": 0.4727, "step": 4342 }, { "epoch": 0.7132387658325293, "grad_norm": 0.372657494023084, "learning_rate": 8.606427686226727e-06, "loss": 0.5283, "step": 4343 }, { "epoch": 0.7134029930408721, "grad_norm": 0.3190889106654982, "learning_rate": 8.606235859637206e-06, "loss": 0.5326, "step": 4344 }, { "epoch": 0.7135672202492148, "grad_norm": 0.2928457979647746, "learning_rate": 8.606043988449922e-06, "loss": 0.5128, "step": 4345 }, { "epoch": 0.7137314474575576, "grad_norm": 0.2706617064341812, "learning_rate": 8.60585207266696e-06, "loss": 0.5224, "step": 4346 }, { "epoch": 0.7138956746659003, "grad_norm": 0.3205314934694761, "learning_rate": 8.605660112290406e-06, "loss": 0.5028, "step": 4347 }, { "epoch": 0.7140599018742431, "grad_norm": 0.33291126764659723, "learning_rate": 8.605468107322342e-06, "loss": 0.5144, "step": 4348 }, { "epoch": 0.7142241290825858, "grad_norm": 0.319633960734903, "learning_rate": 8.605276057764857e-06, "loss": 0.5165, "step": 4349 }, { "epoch": 0.7143883562909285, "grad_norm": 0.4054703641916312, "learning_rate": 8.605083963620034e-06, "loss": 0.5242, "step": 4350 }, { "epoch": 0.7145525834992712, "grad_norm": 0.3103957865806984, "learning_rate": 8.604891824889961e-06, "loss": 0.5218, "step": 4351 }, { "epoch": 0.714716810707614, "grad_norm": 0.3180642837392691, "learning_rate": 8.604699641576723e-06, "loss": 0.528, "step": 4352 }, { "epoch": 0.7148810379159567, "grad_norm": 0.3133787583026555, "learning_rate": 8.604507413682409e-06, "loss": 0.507, "step": 4353 }, { "epoch": 0.7150452651242994, "grad_norm": 0.33877895632971905, "learning_rate": 8.604315141209108e-06, "loss": 0.5238, "step": 4354 }, { "epoch": 0.7152094923326422, "grad_norm": 0.38840289489043733, "learning_rate": 8.604122824158905e-06, "loss": 0.5504, "step": 4355 }, { "epoch": 0.7153737195409849, "grad_norm": 0.3955477841512037, "learning_rate": 8.603930462533889e-06, "loss": 0.4993, "step": 4356 }, { "epoch": 0.7155379467493277, "grad_norm": 0.27883260117502756, "learning_rate": 8.603738056336152e-06, "loss": 0.5121, "step": 4357 }, { "epoch": 0.7157021739576704, "grad_norm": 0.2850443117354454, "learning_rate": 8.603545605567782e-06, "loss": 0.525, "step": 4358 }, { "epoch": 0.7158664011660132, "grad_norm": 0.43399050909543463, "learning_rate": 8.603353110230868e-06, "loss": 0.5265, "step": 4359 }, { "epoch": 0.7160306283743559, "grad_norm": 0.31443791711799157, "learning_rate": 8.603160570327504e-06, "loss": 0.535, "step": 4360 }, { "epoch": 0.7161948555826987, "grad_norm": 0.2939906689274626, "learning_rate": 8.602967985859779e-06, "loss": 0.5262, "step": 4361 }, { "epoch": 0.7163590827910414, "grad_norm": 0.3551573997835369, "learning_rate": 8.602775356829783e-06, "loss": 0.5221, "step": 4362 }, { "epoch": 0.7165233099993842, "grad_norm": 0.37242204799922163, "learning_rate": 8.602582683239612e-06, "loss": 0.5108, "step": 4363 }, { "epoch": 0.7166875372077269, "grad_norm": 0.3301229591082884, "learning_rate": 8.602389965091357e-06, "loss": 0.5033, "step": 4364 }, { "epoch": 0.7168517644160697, "grad_norm": 0.5596674893832613, "learning_rate": 8.602197202387109e-06, "loss": 0.5257, "step": 4365 }, { "epoch": 0.7170159916244124, "grad_norm": 0.32003899380645795, "learning_rate": 8.602004395128963e-06, "loss": 0.5096, "step": 4366 }, { "epoch": 0.7171802188327551, "grad_norm": 0.32101527283833253, "learning_rate": 8.601811543319016e-06, "loss": 0.5154, "step": 4367 }, { "epoch": 0.7173444460410978, "grad_norm": 0.3582105517657474, "learning_rate": 8.601618646959359e-06, "loss": 0.5368, "step": 4368 }, { "epoch": 0.7175086732494406, "grad_norm": 0.3834938893158892, "learning_rate": 8.601425706052086e-06, "loss": 0.5245, "step": 4369 }, { "epoch": 0.7176729004577833, "grad_norm": 0.31214498157447496, "learning_rate": 8.601232720599298e-06, "loss": 0.5392, "step": 4370 }, { "epoch": 0.7178371276661261, "grad_norm": 0.2968901286066325, "learning_rate": 8.601039690603085e-06, "loss": 0.5512, "step": 4371 }, { "epoch": 0.7180013548744688, "grad_norm": 0.3226767453514702, "learning_rate": 8.600846616065546e-06, "loss": 0.5344, "step": 4372 }, { "epoch": 0.7181655820828116, "grad_norm": 0.28203986767629385, "learning_rate": 8.600653496988781e-06, "loss": 0.5252, "step": 4373 }, { "epoch": 0.7183298092911543, "grad_norm": 0.3263527602745717, "learning_rate": 8.60046033337488e-06, "loss": 0.5381, "step": 4374 }, { "epoch": 0.7184940364994971, "grad_norm": 0.3830517652455408, "learning_rate": 8.600267125225948e-06, "loss": 0.5363, "step": 4375 }, { "epoch": 0.7186582637078398, "grad_norm": 0.3124628134298113, "learning_rate": 8.60007387254408e-06, "loss": 0.5039, "step": 4376 }, { "epoch": 0.7188224909161826, "grad_norm": 0.4344770371599259, "learning_rate": 8.599880575331379e-06, "loss": 0.5167, "step": 4377 }, { "epoch": 0.7189867181245253, "grad_norm": 0.44022478012369026, "learning_rate": 8.599687233589938e-06, "loss": 0.5214, "step": 4378 }, { "epoch": 0.719150945332868, "grad_norm": 0.3688156748206383, "learning_rate": 8.599493847321862e-06, "loss": 0.5206, "step": 4379 }, { "epoch": 0.7193151725412108, "grad_norm": 0.32935829629851043, "learning_rate": 8.599300416529249e-06, "loss": 0.5141, "step": 4380 }, { "epoch": 0.7194793997495535, "grad_norm": 0.29232395104565306, "learning_rate": 8.599106941214199e-06, "loss": 0.5259, "step": 4381 }, { "epoch": 0.7196436269578963, "grad_norm": 0.3230601984030344, "learning_rate": 8.598913421378815e-06, "loss": 0.5157, "step": 4382 }, { "epoch": 0.7198078541662389, "grad_norm": 0.3366778464764349, "learning_rate": 8.5987198570252e-06, "loss": 0.5163, "step": 4383 }, { "epoch": 0.7199720813745817, "grad_norm": 0.2822882327510804, "learning_rate": 8.598526248155453e-06, "loss": 0.5302, "step": 4384 }, { "epoch": 0.7201363085829244, "grad_norm": 0.34913151065251047, "learning_rate": 8.598332594771678e-06, "loss": 0.5239, "step": 4385 }, { "epoch": 0.7203005357912672, "grad_norm": 0.30615999757517137, "learning_rate": 8.598138896875982e-06, "loss": 0.5333, "step": 4386 }, { "epoch": 0.7204647629996099, "grad_norm": 0.3338081110150123, "learning_rate": 8.597945154470462e-06, "loss": 0.55, "step": 4387 }, { "epoch": 0.7206289902079527, "grad_norm": 0.27003702556268694, "learning_rate": 8.597751367557229e-06, "loss": 0.5179, "step": 4388 }, { "epoch": 0.7207932174162954, "grad_norm": 0.3166293407081746, "learning_rate": 8.59755753613838e-06, "loss": 0.5193, "step": 4389 }, { "epoch": 0.7209574446246382, "grad_norm": 0.2970707814051773, "learning_rate": 8.597363660216028e-06, "loss": 0.5089, "step": 4390 }, { "epoch": 0.7211216718329809, "grad_norm": 0.4042859510774983, "learning_rate": 8.597169739792276e-06, "loss": 0.5184, "step": 4391 }, { "epoch": 0.7212858990413237, "grad_norm": 0.36079226884819154, "learning_rate": 8.596975774869229e-06, "loss": 0.5115, "step": 4392 }, { "epoch": 0.7214501262496664, "grad_norm": 0.2960898042956307, "learning_rate": 8.596781765448994e-06, "loss": 0.5214, "step": 4393 }, { "epoch": 0.7216143534580092, "grad_norm": 0.3067528269205476, "learning_rate": 8.596587711533677e-06, "loss": 0.5404, "step": 4394 }, { "epoch": 0.7217785806663519, "grad_norm": 0.5567244165497772, "learning_rate": 8.596393613125387e-06, "loss": 0.5322, "step": 4395 }, { "epoch": 0.7219428078746947, "grad_norm": 0.31549950117169867, "learning_rate": 8.596199470226234e-06, "loss": 0.5202, "step": 4396 }, { "epoch": 0.7221070350830374, "grad_norm": 0.3558221536624943, "learning_rate": 8.596005282838324e-06, "loss": 0.5098, "step": 4397 }, { "epoch": 0.7222712622913802, "grad_norm": 0.3104817339740221, "learning_rate": 8.595811050963767e-06, "loss": 0.5341, "step": 4398 }, { "epoch": 0.7224354894997229, "grad_norm": 0.32786752772407324, "learning_rate": 8.59561677460467e-06, "loss": 0.5089, "step": 4399 }, { "epoch": 0.7225997167080656, "grad_norm": 0.4047878987733635, "learning_rate": 8.595422453763149e-06, "loss": 0.5458, "step": 4400 }, { "epoch": 0.7227639439164083, "grad_norm": 0.2858382446467845, "learning_rate": 8.595228088441307e-06, "loss": 0.5354, "step": 4401 }, { "epoch": 0.722928171124751, "grad_norm": 0.32023432751026115, "learning_rate": 8.595033678641261e-06, "loss": 0.5254, "step": 4402 }, { "epoch": 0.7230923983330938, "grad_norm": 0.3045500747617058, "learning_rate": 8.594839224365119e-06, "loss": 0.521, "step": 4403 }, { "epoch": 0.7232566255414365, "grad_norm": 0.29098460712999963, "learning_rate": 8.594644725614995e-06, "loss": 0.5173, "step": 4404 }, { "epoch": 0.7234208527497793, "grad_norm": 0.30702617159676615, "learning_rate": 8.594450182393002e-06, "loss": 0.5294, "step": 4405 }, { "epoch": 0.723585079958122, "grad_norm": 0.3657684663544443, "learning_rate": 8.59425559470125e-06, "loss": 0.5244, "step": 4406 }, { "epoch": 0.7237493071664648, "grad_norm": 0.33389501184795556, "learning_rate": 8.594060962541855e-06, "loss": 0.5379, "step": 4407 }, { "epoch": 0.7239135343748075, "grad_norm": 0.3401852810246631, "learning_rate": 8.593866285916928e-06, "loss": 0.523, "step": 4408 }, { "epoch": 0.7240777615831503, "grad_norm": 0.31282359408606597, "learning_rate": 8.593671564828587e-06, "loss": 0.5356, "step": 4409 }, { "epoch": 0.724241988791493, "grad_norm": 0.2828881931783121, "learning_rate": 8.593476799278945e-06, "loss": 0.5176, "step": 4410 }, { "epoch": 0.7244062159998358, "grad_norm": 0.28740377845431986, "learning_rate": 8.593281989270117e-06, "loss": 0.5182, "step": 4411 }, { "epoch": 0.7245704432081785, "grad_norm": 0.32739805868398925, "learning_rate": 8.59308713480422e-06, "loss": 0.5234, "step": 4412 }, { "epoch": 0.7247346704165213, "grad_norm": 0.31856973622465923, "learning_rate": 8.59289223588337e-06, "loss": 0.5102, "step": 4413 }, { "epoch": 0.724898897624864, "grad_norm": 0.30433947412820167, "learning_rate": 8.592697292509682e-06, "loss": 0.5323, "step": 4414 }, { "epoch": 0.7250631248332068, "grad_norm": 0.3093281266377262, "learning_rate": 8.592502304685274e-06, "loss": 0.4877, "step": 4415 }, { "epoch": 0.7252273520415495, "grad_norm": 0.4192937983246652, "learning_rate": 8.592307272412267e-06, "loss": 0.5315, "step": 4416 }, { "epoch": 0.7253915792498922, "grad_norm": 0.43350275798950666, "learning_rate": 8.592112195692776e-06, "loss": 0.5063, "step": 4417 }, { "epoch": 0.7255558064582349, "grad_norm": 0.4678623945111824, "learning_rate": 8.591917074528921e-06, "loss": 0.5424, "step": 4418 }, { "epoch": 0.7257200336665777, "grad_norm": 0.287898271867044, "learning_rate": 8.59172190892282e-06, "loss": 0.5038, "step": 4419 }, { "epoch": 0.7258842608749204, "grad_norm": 0.29822897607011906, "learning_rate": 8.591526698876592e-06, "loss": 0.5165, "step": 4420 }, { "epoch": 0.7260484880832632, "grad_norm": 0.3109623634429174, "learning_rate": 8.591331444392361e-06, "loss": 0.5296, "step": 4421 }, { "epoch": 0.7262127152916059, "grad_norm": 0.3741682736438768, "learning_rate": 8.591136145472244e-06, "loss": 0.5166, "step": 4422 }, { "epoch": 0.7263769424999487, "grad_norm": 0.3281684045665565, "learning_rate": 8.590940802118363e-06, "loss": 0.4932, "step": 4423 }, { "epoch": 0.7265411697082914, "grad_norm": 0.2967006165698348, "learning_rate": 8.59074541433284e-06, "loss": 0.5341, "step": 4424 }, { "epoch": 0.7267053969166342, "grad_norm": 0.41241388629963116, "learning_rate": 8.590549982117798e-06, "loss": 0.5022, "step": 4425 }, { "epoch": 0.7268696241249769, "grad_norm": 0.3291196432580282, "learning_rate": 8.590354505475357e-06, "loss": 0.5139, "step": 4426 }, { "epoch": 0.7270338513333197, "grad_norm": 0.32679147888924837, "learning_rate": 8.590158984407644e-06, "loss": 0.5137, "step": 4427 }, { "epoch": 0.7271980785416624, "grad_norm": 0.28783281690844026, "learning_rate": 8.589963418916778e-06, "loss": 0.5296, "step": 4428 }, { "epoch": 0.7273623057500052, "grad_norm": 0.31121651109770526, "learning_rate": 8.589767809004886e-06, "loss": 0.5407, "step": 4429 }, { "epoch": 0.7275265329583479, "grad_norm": 0.3231129949957135, "learning_rate": 8.589572154674093e-06, "loss": 0.5187, "step": 4430 }, { "epoch": 0.7276907601666907, "grad_norm": 0.35344471569362507, "learning_rate": 8.589376455926521e-06, "loss": 0.5165, "step": 4431 }, { "epoch": 0.7278549873750334, "grad_norm": 0.30854070183351756, "learning_rate": 8.589180712764298e-06, "loss": 0.5286, "step": 4432 }, { "epoch": 0.7280192145833762, "grad_norm": 0.3771827531386895, "learning_rate": 8.58898492518955e-06, "loss": 0.5381, "step": 4433 }, { "epoch": 0.7281834417917188, "grad_norm": 0.3842022509196105, "learning_rate": 8.588789093204402e-06, "loss": 0.5166, "step": 4434 }, { "epoch": 0.7283476690000615, "grad_norm": 0.3276679515164658, "learning_rate": 8.58859321681098e-06, "loss": 0.5236, "step": 4435 }, { "epoch": 0.7285118962084043, "grad_norm": 0.32004257785016976, "learning_rate": 8.588397296011416e-06, "loss": 0.5196, "step": 4436 }, { "epoch": 0.728676123416747, "grad_norm": 0.33663249793293937, "learning_rate": 8.588201330807833e-06, "loss": 0.5278, "step": 4437 }, { "epoch": 0.7288403506250898, "grad_norm": 0.33869601572570573, "learning_rate": 8.588005321202361e-06, "loss": 0.5294, "step": 4438 }, { "epoch": 0.7290045778334325, "grad_norm": 0.29466669461762524, "learning_rate": 8.587809267197132e-06, "loss": 0.5347, "step": 4439 }, { "epoch": 0.7291688050417753, "grad_norm": 0.32128430605037284, "learning_rate": 8.587613168794269e-06, "loss": 0.5244, "step": 4440 }, { "epoch": 0.729333032250118, "grad_norm": 0.32825756800992645, "learning_rate": 8.587417025995909e-06, "loss": 0.5339, "step": 4441 }, { "epoch": 0.7294972594584608, "grad_norm": 0.3301350950702061, "learning_rate": 8.587220838804176e-06, "loss": 0.5109, "step": 4442 }, { "epoch": 0.7296614866668035, "grad_norm": 0.28104238952110344, "learning_rate": 8.587024607221203e-06, "loss": 0.5046, "step": 4443 }, { "epoch": 0.7298257138751463, "grad_norm": 0.37224012508447873, "learning_rate": 8.586828331249123e-06, "loss": 0.5169, "step": 4444 }, { "epoch": 0.729989941083489, "grad_norm": 0.39243426907646256, "learning_rate": 8.586632010890065e-06, "loss": 0.4953, "step": 4445 }, { "epoch": 0.7301541682918318, "grad_norm": 0.3699144774546227, "learning_rate": 8.586435646146164e-06, "loss": 0.5161, "step": 4446 }, { "epoch": 0.7303183955001745, "grad_norm": 0.403502931249784, "learning_rate": 8.586239237019552e-06, "loss": 0.5144, "step": 4447 }, { "epoch": 0.7304826227085173, "grad_norm": 0.33530858772743866, "learning_rate": 8.586042783512361e-06, "loss": 0.5065, "step": 4448 }, { "epoch": 0.73064684991686, "grad_norm": 0.36473692233901733, "learning_rate": 8.585846285626724e-06, "loss": 0.5289, "step": 4449 }, { "epoch": 0.7308110771252028, "grad_norm": 0.39727298026229163, "learning_rate": 8.585649743364778e-06, "loss": 0.5261, "step": 4450 }, { "epoch": 0.7309753043335454, "grad_norm": 0.34627397363700524, "learning_rate": 8.585453156728655e-06, "loss": 0.4968, "step": 4451 }, { "epoch": 0.7311395315418882, "grad_norm": 0.3024856634002474, "learning_rate": 8.58525652572049e-06, "loss": 0.5308, "step": 4452 }, { "epoch": 0.7313037587502309, "grad_norm": 0.3052356152052224, "learning_rate": 8.585059850342422e-06, "loss": 0.4987, "step": 4453 }, { "epoch": 0.7314679859585737, "grad_norm": 0.3868676594902889, "learning_rate": 8.584863130596584e-06, "loss": 0.5319, "step": 4454 }, { "epoch": 0.7316322131669164, "grad_norm": 0.3114269144053163, "learning_rate": 8.584666366485115e-06, "loss": 0.5191, "step": 4455 }, { "epoch": 0.7317964403752591, "grad_norm": 0.32173171845094, "learning_rate": 8.584469558010148e-06, "loss": 0.5146, "step": 4456 }, { "epoch": 0.7319606675836019, "grad_norm": 0.32749605605918813, "learning_rate": 8.584272705173824e-06, "loss": 0.5223, "step": 4457 }, { "epoch": 0.7321248947919446, "grad_norm": 0.35873045536107073, "learning_rate": 8.58407580797828e-06, "loss": 0.5307, "step": 4458 }, { "epoch": 0.7322891220002874, "grad_norm": 0.33696054538783504, "learning_rate": 8.583878866425656e-06, "loss": 0.5112, "step": 4459 }, { "epoch": 0.7324533492086301, "grad_norm": 0.322150007734754, "learning_rate": 8.583681880518088e-06, "loss": 0.5408, "step": 4460 }, { "epoch": 0.7326175764169729, "grad_norm": 0.3577905586167644, "learning_rate": 8.583484850257717e-06, "loss": 0.5247, "step": 4461 }, { "epoch": 0.7327818036253156, "grad_norm": 0.3890314954445434, "learning_rate": 8.583287775646683e-06, "loss": 0.5403, "step": 4462 }, { "epoch": 0.7329460308336584, "grad_norm": 0.322614607637845, "learning_rate": 8.583090656687126e-06, "loss": 0.5116, "step": 4463 }, { "epoch": 0.7331102580420011, "grad_norm": 0.37697488192045586, "learning_rate": 8.582893493381187e-06, "loss": 0.5333, "step": 4464 }, { "epoch": 0.7332744852503439, "grad_norm": 0.37182964027775867, "learning_rate": 8.58269628573101e-06, "loss": 0.5285, "step": 4465 }, { "epoch": 0.7334387124586866, "grad_norm": 0.31262978823905135, "learning_rate": 8.582499033738732e-06, "loss": 0.5342, "step": 4466 }, { "epoch": 0.7336029396670294, "grad_norm": 0.29094147937733944, "learning_rate": 8.582301737406498e-06, "loss": 0.522, "step": 4467 }, { "epoch": 0.733767166875372, "grad_norm": 0.3595441468442823, "learning_rate": 8.582104396736453e-06, "loss": 0.5367, "step": 4468 }, { "epoch": 0.7339313940837148, "grad_norm": 0.32354047052265966, "learning_rate": 8.581907011730735e-06, "loss": 0.5146, "step": 4469 }, { "epoch": 0.7340956212920575, "grad_norm": 0.3434044743236067, "learning_rate": 8.581709582391492e-06, "loss": 0.5308, "step": 4470 }, { "epoch": 0.7342598485004003, "grad_norm": 0.2966580125366594, "learning_rate": 8.581512108720868e-06, "loss": 0.5352, "step": 4471 }, { "epoch": 0.734424075708743, "grad_norm": 0.3034642435029345, "learning_rate": 8.581314590721006e-06, "loss": 0.529, "step": 4472 }, { "epoch": 0.7345883029170858, "grad_norm": 0.3172501587579415, "learning_rate": 8.581117028394052e-06, "loss": 0.4962, "step": 4473 }, { "epoch": 0.7347525301254285, "grad_norm": 0.3057272334680113, "learning_rate": 8.580919421742153e-06, "loss": 0.5362, "step": 4474 }, { "epoch": 0.7349167573337713, "grad_norm": 0.27472219505829215, "learning_rate": 8.580721770767452e-06, "loss": 0.5451, "step": 4475 }, { "epoch": 0.735080984542114, "grad_norm": 0.3101713531189586, "learning_rate": 8.580524075472099e-06, "loss": 0.5333, "step": 4476 }, { "epoch": 0.7352452117504568, "grad_norm": 0.3294719891527552, "learning_rate": 8.58032633585824e-06, "loss": 0.5193, "step": 4477 }, { "epoch": 0.7354094389587995, "grad_norm": 2.171350755473058, "learning_rate": 8.580128551928021e-06, "loss": 0.4927, "step": 4478 }, { "epoch": 0.7355736661671423, "grad_norm": 0.3730561619746024, "learning_rate": 8.579930723683592e-06, "loss": 0.4969, "step": 4479 }, { "epoch": 0.735737893375485, "grad_norm": 0.5268860411236578, "learning_rate": 8.579732851127102e-06, "loss": 0.5256, "step": 4480 }, { "epoch": 0.7359021205838278, "grad_norm": 0.3387582635476446, "learning_rate": 8.5795349342607e-06, "loss": 0.5631, "step": 4481 }, { "epoch": 0.7360663477921705, "grad_norm": 0.28861855998881153, "learning_rate": 8.579336973086535e-06, "loss": 0.5199, "step": 4482 }, { "epoch": 0.7362305750005133, "grad_norm": 0.2942221387932248, "learning_rate": 8.579138967606755e-06, "loss": 0.5036, "step": 4483 }, { "epoch": 0.736394802208856, "grad_norm": 1.0358662167933195, "learning_rate": 8.578940917823514e-06, "loss": 0.5111, "step": 4484 }, { "epoch": 0.7365590294171986, "grad_norm": 0.37438050679631024, "learning_rate": 8.578742823738961e-06, "loss": 0.5173, "step": 4485 }, { "epoch": 0.7367232566255414, "grad_norm": 0.3277582159694442, "learning_rate": 8.578544685355248e-06, "loss": 0.528, "step": 4486 }, { "epoch": 0.7368874838338841, "grad_norm": 0.30091880038912744, "learning_rate": 8.578346502674526e-06, "loss": 0.5231, "step": 4487 }, { "epoch": 0.7370517110422269, "grad_norm": 0.3074065546946728, "learning_rate": 8.578148275698951e-06, "loss": 0.5222, "step": 4488 }, { "epoch": 0.7372159382505696, "grad_norm": 0.354491887670448, "learning_rate": 8.577950004430672e-06, "loss": 0.5327, "step": 4489 }, { "epoch": 0.7373801654589124, "grad_norm": 0.29587883852898034, "learning_rate": 8.577751688871842e-06, "loss": 0.5161, "step": 4490 }, { "epoch": 0.7375443926672551, "grad_norm": 0.3460943354301366, "learning_rate": 8.577553329024618e-06, "loss": 0.5146, "step": 4491 }, { "epoch": 0.7377086198755979, "grad_norm": 0.2967950619579812, "learning_rate": 8.577354924891155e-06, "loss": 0.53, "step": 4492 }, { "epoch": 0.7378728470839406, "grad_norm": 0.342330627577297, "learning_rate": 8.577156476473603e-06, "loss": 0.5101, "step": 4493 }, { "epoch": 0.7380370742922834, "grad_norm": 0.2936759776996891, "learning_rate": 8.576957983774123e-06, "loss": 0.5217, "step": 4494 }, { "epoch": 0.7382013015006261, "grad_norm": 0.3769017994359706, "learning_rate": 8.576759446794865e-06, "loss": 0.5267, "step": 4495 }, { "epoch": 0.7383655287089689, "grad_norm": 0.3243258117086145, "learning_rate": 8.57656086553799e-06, "loss": 0.5333, "step": 4496 }, { "epoch": 0.7385297559173116, "grad_norm": 0.288283887436931, "learning_rate": 8.576362240005653e-06, "loss": 0.5028, "step": 4497 }, { "epoch": 0.7386939831256544, "grad_norm": 0.47367175940020345, "learning_rate": 8.576163570200013e-06, "loss": 0.5243, "step": 4498 }, { "epoch": 0.7388582103339971, "grad_norm": 0.33129672486938455, "learning_rate": 8.575964856123224e-06, "loss": 0.5116, "step": 4499 }, { "epoch": 0.7390224375423399, "grad_norm": 0.30974731627218893, "learning_rate": 8.575766097777447e-06, "loss": 0.5142, "step": 4500 }, { "epoch": 0.7391866647506826, "grad_norm": 0.32922854453451555, "learning_rate": 8.575567295164842e-06, "loss": 0.5181, "step": 4501 }, { "epoch": 0.7393508919590253, "grad_norm": 0.3561359117611993, "learning_rate": 8.575368448287564e-06, "loss": 0.5343, "step": 4502 }, { "epoch": 0.739515119167368, "grad_norm": 0.30225794184268934, "learning_rate": 8.575169557147775e-06, "loss": 0.5407, "step": 4503 }, { "epoch": 0.7396793463757108, "grad_norm": 0.3190486485860462, "learning_rate": 8.574970621747636e-06, "loss": 0.5242, "step": 4504 }, { "epoch": 0.7398435735840535, "grad_norm": 1.4215271237322333, "learning_rate": 8.57477164208931e-06, "loss": 0.4993, "step": 4505 }, { "epoch": 0.7400078007923963, "grad_norm": 0.303708837722683, "learning_rate": 8.574572618174951e-06, "loss": 0.5197, "step": 4506 }, { "epoch": 0.740172028000739, "grad_norm": 0.3312313092554724, "learning_rate": 8.574373550006724e-06, "loss": 0.5142, "step": 4507 }, { "epoch": 0.7403362552090818, "grad_norm": 0.3313425846200784, "learning_rate": 8.574174437586794e-06, "loss": 0.5084, "step": 4508 }, { "epoch": 0.7405004824174245, "grad_norm": 0.34204198699638977, "learning_rate": 8.573975280917321e-06, "loss": 0.5148, "step": 4509 }, { "epoch": 0.7406647096257672, "grad_norm": 0.2988610461822118, "learning_rate": 8.573776080000466e-06, "loss": 0.5205, "step": 4510 }, { "epoch": 0.74082893683411, "grad_norm": 0.28717571034744444, "learning_rate": 8.573576834838397e-06, "loss": 0.528, "step": 4511 }, { "epoch": 0.7409931640424527, "grad_norm": 0.317583520589486, "learning_rate": 8.573377545433275e-06, "loss": 0.5248, "step": 4512 }, { "epoch": 0.7411573912507955, "grad_norm": 0.32758612071497484, "learning_rate": 8.573178211787266e-06, "loss": 0.5128, "step": 4513 }, { "epoch": 0.7413216184591382, "grad_norm": 0.33237284591262994, "learning_rate": 8.572978833902531e-06, "loss": 0.5349, "step": 4514 }, { "epoch": 0.741485845667481, "grad_norm": 0.42198291061829296, "learning_rate": 8.572779411781242e-06, "loss": 0.5221, "step": 4515 }, { "epoch": 0.7416500728758237, "grad_norm": 0.31109245246405137, "learning_rate": 8.57257994542556e-06, "loss": 0.5072, "step": 4516 }, { "epoch": 0.7418143000841665, "grad_norm": 0.2980861107722376, "learning_rate": 8.572380434837653e-06, "loss": 0.5141, "step": 4517 }, { "epoch": 0.7419785272925092, "grad_norm": 0.30924696499077564, "learning_rate": 8.572180880019688e-06, "loss": 0.5352, "step": 4518 }, { "epoch": 0.7421427545008519, "grad_norm": 0.339786312302817, "learning_rate": 8.571981280973832e-06, "loss": 0.5196, "step": 4519 }, { "epoch": 0.7423069817091946, "grad_norm": 0.274055301165695, "learning_rate": 8.571781637702254e-06, "loss": 0.5105, "step": 4520 }, { "epoch": 0.7424712089175374, "grad_norm": 0.34291339425641154, "learning_rate": 8.571581950207121e-06, "loss": 0.5071, "step": 4521 }, { "epoch": 0.7426354361258801, "grad_norm": 0.2971810938064332, "learning_rate": 8.571382218490602e-06, "loss": 0.5295, "step": 4522 }, { "epoch": 0.7427996633342229, "grad_norm": 0.27086010154061957, "learning_rate": 8.571182442554865e-06, "loss": 0.4966, "step": 4523 }, { "epoch": 0.7429638905425656, "grad_norm": 0.3187861637022362, "learning_rate": 8.570982622402082e-06, "loss": 0.5025, "step": 4524 }, { "epoch": 0.7431281177509084, "grad_norm": 0.304476818924289, "learning_rate": 8.570782758034423e-06, "loss": 0.5017, "step": 4525 }, { "epoch": 0.7432923449592511, "grad_norm": 0.3673418689445969, "learning_rate": 8.570582849454057e-06, "loss": 0.521, "step": 4526 }, { "epoch": 0.7434565721675939, "grad_norm": 0.31022619613586055, "learning_rate": 8.570382896663158e-06, "loss": 0.5205, "step": 4527 }, { "epoch": 0.7436207993759366, "grad_norm": 0.29831412465170415, "learning_rate": 8.570182899663896e-06, "loss": 0.538, "step": 4528 }, { "epoch": 0.7437850265842794, "grad_norm": 0.26560917646771326, "learning_rate": 8.569982858458441e-06, "loss": 0.4942, "step": 4529 }, { "epoch": 0.7439492537926221, "grad_norm": 0.40343919917248544, "learning_rate": 8.56978277304897e-06, "loss": 0.5388, "step": 4530 }, { "epoch": 0.7441134810009649, "grad_norm": 0.3538356979890078, "learning_rate": 8.569582643437653e-06, "loss": 0.5234, "step": 4531 }, { "epoch": 0.7442777082093076, "grad_norm": 0.3502814342255026, "learning_rate": 8.569382469626664e-06, "loss": 0.5098, "step": 4532 }, { "epoch": 0.7444419354176504, "grad_norm": 0.33687403724482856, "learning_rate": 8.56918225161818e-06, "loss": 0.5133, "step": 4533 }, { "epoch": 0.7446061626259931, "grad_norm": 0.28226515273483066, "learning_rate": 8.56898198941437e-06, "loss": 0.51, "step": 4534 }, { "epoch": 0.7447703898343359, "grad_norm": 0.30469490349467954, "learning_rate": 8.568781683017414e-06, "loss": 0.5227, "step": 4535 }, { "epoch": 0.7449346170426785, "grad_norm": 0.35862422640377734, "learning_rate": 8.568581332429486e-06, "loss": 0.5168, "step": 4536 }, { "epoch": 0.7450988442510212, "grad_norm": 0.2818516886467351, "learning_rate": 8.568380937652761e-06, "loss": 0.5075, "step": 4537 }, { "epoch": 0.745263071459364, "grad_norm": 0.2918812274214278, "learning_rate": 8.568180498689417e-06, "loss": 0.5286, "step": 4538 }, { "epoch": 0.7454272986677067, "grad_norm": 0.3570737420094653, "learning_rate": 8.56798001554163e-06, "loss": 0.5322, "step": 4539 }, { "epoch": 0.7455915258760495, "grad_norm": 0.2910121417077147, "learning_rate": 8.567779488211577e-06, "loss": 0.5371, "step": 4540 }, { "epoch": 0.7457557530843922, "grad_norm": 0.3164570259557555, "learning_rate": 8.567578916701437e-06, "loss": 0.5149, "step": 4541 }, { "epoch": 0.745919980292735, "grad_norm": 0.31432087465115854, "learning_rate": 8.567378301013388e-06, "loss": 0.5174, "step": 4542 }, { "epoch": 0.7460842075010777, "grad_norm": 0.3332922788821445, "learning_rate": 8.56717764114961e-06, "loss": 0.4993, "step": 4543 }, { "epoch": 0.7462484347094205, "grad_norm": 0.2942101993605978, "learning_rate": 8.56697693711228e-06, "loss": 0.5338, "step": 4544 }, { "epoch": 0.7464126619177632, "grad_norm": 0.3123029158168016, "learning_rate": 8.566776188903579e-06, "loss": 0.53, "step": 4545 }, { "epoch": 0.746576889126106, "grad_norm": 0.33334606207740836, "learning_rate": 8.566575396525688e-06, "loss": 0.5401, "step": 4546 }, { "epoch": 0.7467411163344487, "grad_norm": 0.3499456470338676, "learning_rate": 8.566374559980787e-06, "loss": 0.5053, "step": 4547 }, { "epoch": 0.7469053435427915, "grad_norm": 0.5894023063619104, "learning_rate": 8.566173679271057e-06, "loss": 0.5044, "step": 4548 }, { "epoch": 0.7470695707511342, "grad_norm": 0.3020765404041951, "learning_rate": 8.565972754398682e-06, "loss": 0.5099, "step": 4549 }, { "epoch": 0.747233797959477, "grad_norm": 0.3171528202979881, "learning_rate": 8.565771785365841e-06, "loss": 0.5044, "step": 4550 }, { "epoch": 0.7473980251678197, "grad_norm": 0.36820579002695975, "learning_rate": 8.565570772174718e-06, "loss": 0.5264, "step": 4551 }, { "epoch": 0.7475622523761625, "grad_norm": 0.3652549638401894, "learning_rate": 8.565369714827497e-06, "loss": 0.517, "step": 4552 }, { "epoch": 0.7477264795845051, "grad_norm": 0.28535511401718994, "learning_rate": 8.565168613326362e-06, "loss": 0.4884, "step": 4553 }, { "epoch": 0.7478907067928479, "grad_norm": 0.33634032346122994, "learning_rate": 8.564967467673494e-06, "loss": 0.5246, "step": 4554 }, { "epoch": 0.7480549340011906, "grad_norm": 0.28810968260779946, "learning_rate": 8.564766277871081e-06, "loss": 0.507, "step": 4555 }, { "epoch": 0.7482191612095334, "grad_norm": 0.2992743113260256, "learning_rate": 8.564565043921308e-06, "loss": 0.5188, "step": 4556 }, { "epoch": 0.7483833884178761, "grad_norm": 0.3558728320611078, "learning_rate": 8.564363765826358e-06, "loss": 0.5265, "step": 4557 }, { "epoch": 0.7485476156262189, "grad_norm": 0.292448000228156, "learning_rate": 8.564162443588421e-06, "loss": 0.5252, "step": 4558 }, { "epoch": 0.7487118428345616, "grad_norm": 0.3439831596581984, "learning_rate": 8.56396107720968e-06, "loss": 0.5187, "step": 4559 }, { "epoch": 0.7488760700429044, "grad_norm": 0.40810472098558376, "learning_rate": 8.563759666692323e-06, "loss": 0.524, "step": 4560 }, { "epoch": 0.7490402972512471, "grad_norm": 0.2845210850491724, "learning_rate": 8.563558212038538e-06, "loss": 0.5223, "step": 4561 }, { "epoch": 0.7492045244595898, "grad_norm": 0.361454778137837, "learning_rate": 8.563356713250513e-06, "loss": 0.5032, "step": 4562 }, { "epoch": 0.7493687516679326, "grad_norm": 0.3123393644294071, "learning_rate": 8.563155170330436e-06, "loss": 0.5003, "step": 4563 }, { "epoch": 0.7495329788762753, "grad_norm": 0.2893188997077662, "learning_rate": 8.562953583280497e-06, "loss": 0.5123, "step": 4564 }, { "epoch": 0.7496972060846181, "grad_norm": 0.325060447014177, "learning_rate": 8.562751952102883e-06, "loss": 0.5326, "step": 4565 }, { "epoch": 0.7498614332929608, "grad_norm": 0.38334438203554844, "learning_rate": 8.562550276799788e-06, "loss": 0.5499, "step": 4566 }, { "epoch": 0.7500256605013036, "grad_norm": 0.2895758675873873, "learning_rate": 8.5623485573734e-06, "loss": 0.5258, "step": 4567 }, { "epoch": 0.7501898877096463, "grad_norm": 0.34262490892337305, "learning_rate": 8.562146793825907e-06, "loss": 0.5252, "step": 4568 }, { "epoch": 0.7503541149179891, "grad_norm": 0.3077035016491017, "learning_rate": 8.561944986159505e-06, "loss": 0.5358, "step": 4569 }, { "epoch": 0.7505183421263317, "grad_norm": 0.3032102839545749, "learning_rate": 8.561743134376384e-06, "loss": 0.5255, "step": 4570 }, { "epoch": 0.7506825693346745, "grad_norm": 0.33693998727707863, "learning_rate": 8.561541238478735e-06, "loss": 0.5141, "step": 4571 }, { "epoch": 0.7508467965430172, "grad_norm": 0.2780956966036318, "learning_rate": 8.561339298468753e-06, "loss": 0.5382, "step": 4572 }, { "epoch": 0.75101102375136, "grad_norm": 0.33119142853483213, "learning_rate": 8.56113731434863e-06, "loss": 0.5032, "step": 4573 }, { "epoch": 0.7511752509597027, "grad_norm": 0.2969192531504866, "learning_rate": 8.560935286120562e-06, "loss": 0.5012, "step": 4574 }, { "epoch": 0.7513394781680455, "grad_norm": 0.30932229204461903, "learning_rate": 8.560733213786741e-06, "loss": 0.5228, "step": 4575 }, { "epoch": 0.7515037053763882, "grad_norm": 0.6375810468219024, "learning_rate": 8.56053109734936e-06, "loss": 0.5347, "step": 4576 }, { "epoch": 0.751667932584731, "grad_norm": 0.3602747428237164, "learning_rate": 8.56032893681062e-06, "loss": 0.5155, "step": 4577 }, { "epoch": 0.7518321597930737, "grad_norm": 0.3344243202645859, "learning_rate": 8.560126732172709e-06, "loss": 0.5525, "step": 4578 }, { "epoch": 0.7519963870014165, "grad_norm": 0.6821768214109283, "learning_rate": 8.559924483437827e-06, "loss": 0.5184, "step": 4579 }, { "epoch": 0.7521606142097592, "grad_norm": 0.35181079249212044, "learning_rate": 8.559722190608174e-06, "loss": 0.5069, "step": 4580 }, { "epoch": 0.752324841418102, "grad_norm": 0.32076296100754514, "learning_rate": 8.55951985368594e-06, "loss": 0.5245, "step": 4581 }, { "epoch": 0.7524890686264447, "grad_norm": 0.34037129519099774, "learning_rate": 8.55931747267333e-06, "loss": 0.5204, "step": 4582 }, { "epoch": 0.7526532958347875, "grad_norm": 0.35839103085610946, "learning_rate": 8.559115047572537e-06, "loss": 0.5028, "step": 4583 }, { "epoch": 0.7528175230431302, "grad_norm": 0.32243397643030064, "learning_rate": 8.55891257838576e-06, "loss": 0.5202, "step": 4584 }, { "epoch": 0.752981750251473, "grad_norm": 0.3207564345064311, "learning_rate": 8.5587100651152e-06, "loss": 0.51, "step": 4585 }, { "epoch": 0.7531459774598157, "grad_norm": 0.28960495569847716, "learning_rate": 8.558507507763055e-06, "loss": 0.5065, "step": 4586 }, { "epoch": 0.7533102046681583, "grad_norm": 0.32541735259990995, "learning_rate": 8.558304906331525e-06, "loss": 0.5082, "step": 4587 }, { "epoch": 0.7534744318765011, "grad_norm": 0.4295942413931405, "learning_rate": 8.558102260822812e-06, "loss": 0.5032, "step": 4588 }, { "epoch": 0.7536386590848438, "grad_norm": 0.3270938372241511, "learning_rate": 8.557899571239115e-06, "loss": 0.5189, "step": 4589 }, { "epoch": 0.7538028862931866, "grad_norm": 0.4618734327758789, "learning_rate": 8.557696837582636e-06, "loss": 0.5126, "step": 4590 }, { "epoch": 0.7539671135015293, "grad_norm": 0.2856325652395653, "learning_rate": 8.557494059855579e-06, "loss": 0.495, "step": 4591 }, { "epoch": 0.7541313407098721, "grad_norm": 0.2922332368335058, "learning_rate": 8.557291238060142e-06, "loss": 0.5173, "step": 4592 }, { "epoch": 0.7542955679182148, "grad_norm": 0.26960221791122096, "learning_rate": 8.557088372198532e-06, "loss": 0.5192, "step": 4593 }, { "epoch": 0.7544597951265576, "grad_norm": 0.29754034652447925, "learning_rate": 8.55688546227295e-06, "loss": 0.5044, "step": 4594 }, { "epoch": 0.7546240223349003, "grad_norm": 0.302344635843857, "learning_rate": 8.556682508285601e-06, "loss": 0.5439, "step": 4595 }, { "epoch": 0.7547882495432431, "grad_norm": 0.3497725787114854, "learning_rate": 8.556479510238688e-06, "loss": 0.5342, "step": 4596 }, { "epoch": 0.7549524767515858, "grad_norm": 0.29555475207440246, "learning_rate": 8.556276468134418e-06, "loss": 0.5399, "step": 4597 }, { "epoch": 0.7551167039599286, "grad_norm": 0.2604097571549104, "learning_rate": 8.556073381974992e-06, "loss": 0.509, "step": 4598 }, { "epoch": 0.7552809311682713, "grad_norm": 0.33188600308422034, "learning_rate": 8.555870251762619e-06, "loss": 0.5198, "step": 4599 }, { "epoch": 0.7554451583766141, "grad_norm": 0.3258458849112466, "learning_rate": 8.555667077499506e-06, "loss": 0.5124, "step": 4600 }, { "epoch": 0.7556093855849568, "grad_norm": 0.34142640335866914, "learning_rate": 8.555463859187858e-06, "loss": 0.5383, "step": 4601 }, { "epoch": 0.7557736127932996, "grad_norm": 0.32818633170502004, "learning_rate": 8.555260596829882e-06, "loss": 0.533, "step": 4602 }, { "epoch": 0.7559378400016423, "grad_norm": 0.3873086927551184, "learning_rate": 8.555057290427787e-06, "loss": 0.5217, "step": 4603 }, { "epoch": 0.756102067209985, "grad_norm": 0.33476337551175733, "learning_rate": 8.55485393998378e-06, "loss": 0.5138, "step": 4604 }, { "epoch": 0.7562662944183277, "grad_norm": 0.3318686735908207, "learning_rate": 8.554650545500068e-06, "loss": 0.5022, "step": 4605 }, { "epoch": 0.7564305216266705, "grad_norm": 0.326403174472294, "learning_rate": 8.554447106978865e-06, "loss": 0.5214, "step": 4606 }, { "epoch": 0.7565947488350132, "grad_norm": 0.39247588112022863, "learning_rate": 8.554243624422373e-06, "loss": 0.5156, "step": 4607 }, { "epoch": 0.756758976043356, "grad_norm": 0.326663427806945, "learning_rate": 8.55404009783281e-06, "loss": 0.5398, "step": 4608 }, { "epoch": 0.7569232032516987, "grad_norm": 0.3459223648779705, "learning_rate": 8.553836527212381e-06, "loss": 0.4895, "step": 4609 }, { "epoch": 0.7570874304600415, "grad_norm": 0.2941823147267791, "learning_rate": 8.5536329125633e-06, "loss": 0.518, "step": 4610 }, { "epoch": 0.7572516576683842, "grad_norm": 0.30618428789154545, "learning_rate": 8.553429253887778e-06, "loss": 0.5023, "step": 4611 }, { "epoch": 0.757415884876727, "grad_norm": 0.31806576498609396, "learning_rate": 8.553225551188025e-06, "loss": 0.5371, "step": 4612 }, { "epoch": 0.7575801120850697, "grad_norm": 0.35278860041835625, "learning_rate": 8.553021804466254e-06, "loss": 0.5085, "step": 4613 }, { "epoch": 0.7577443392934124, "grad_norm": 0.3214722436543048, "learning_rate": 8.55281801372468e-06, "loss": 0.5145, "step": 4614 }, { "epoch": 0.7579085665017552, "grad_norm": 0.32654098045991403, "learning_rate": 8.552614178965514e-06, "loss": 0.5254, "step": 4615 }, { "epoch": 0.758072793710098, "grad_norm": 0.3281384004825425, "learning_rate": 8.552410300190972e-06, "loss": 0.5087, "step": 4616 }, { "epoch": 0.7582370209184407, "grad_norm": 0.3279057768090118, "learning_rate": 8.552206377403265e-06, "loss": 0.5073, "step": 4617 }, { "epoch": 0.7584012481267834, "grad_norm": 0.33926049968773087, "learning_rate": 8.552002410604613e-06, "loss": 0.525, "step": 4618 }, { "epoch": 0.7585654753351262, "grad_norm": 0.2795274039873359, "learning_rate": 8.551798399797226e-06, "loss": 0.5183, "step": 4619 }, { "epoch": 0.7587297025434689, "grad_norm": 0.3206834642081961, "learning_rate": 8.551594344983322e-06, "loss": 0.5422, "step": 4620 }, { "epoch": 0.7588939297518116, "grad_norm": 0.3058743058502909, "learning_rate": 8.551390246165118e-06, "loss": 0.4945, "step": 4621 }, { "epoch": 0.7590581569601543, "grad_norm": 0.299835948991702, "learning_rate": 8.551186103344828e-06, "loss": 0.5012, "step": 4622 }, { "epoch": 0.7592223841684971, "grad_norm": 0.2883728461560533, "learning_rate": 8.550981916524673e-06, "loss": 0.5257, "step": 4623 }, { "epoch": 0.7593866113768398, "grad_norm": 0.2793782805803841, "learning_rate": 8.550777685706869e-06, "loss": 0.5489, "step": 4624 }, { "epoch": 0.7595508385851826, "grad_norm": 0.2638335021783196, "learning_rate": 8.550573410893633e-06, "loss": 0.5093, "step": 4625 }, { "epoch": 0.7597150657935253, "grad_norm": 0.3190328077240095, "learning_rate": 8.550369092087185e-06, "loss": 0.525, "step": 4626 }, { "epoch": 0.7598792930018681, "grad_norm": 0.3627572414267673, "learning_rate": 8.550164729289743e-06, "loss": 0.5023, "step": 4627 }, { "epoch": 0.7600435202102108, "grad_norm": 0.3706630695321476, "learning_rate": 8.549960322503529e-06, "loss": 0.5316, "step": 4628 }, { "epoch": 0.7602077474185536, "grad_norm": 0.33057504555303346, "learning_rate": 8.54975587173076e-06, "loss": 0.5226, "step": 4629 }, { "epoch": 0.7603719746268963, "grad_norm": 0.30309061540235327, "learning_rate": 8.549551376973658e-06, "loss": 0.5269, "step": 4630 }, { "epoch": 0.7605362018352391, "grad_norm": 0.31184472191583695, "learning_rate": 8.549346838234442e-06, "loss": 0.5076, "step": 4631 }, { "epoch": 0.7607004290435818, "grad_norm": 0.29095482111017995, "learning_rate": 8.549142255515338e-06, "loss": 0.4765, "step": 4632 }, { "epoch": 0.7608646562519246, "grad_norm": 0.2922099969230343, "learning_rate": 8.548937628818564e-06, "loss": 0.5138, "step": 4633 }, { "epoch": 0.7610288834602673, "grad_norm": 0.3400598354912872, "learning_rate": 8.548732958146344e-06, "loss": 0.5167, "step": 4634 }, { "epoch": 0.7611931106686101, "grad_norm": 0.3381885179424978, "learning_rate": 8.5485282435009e-06, "loss": 0.5004, "step": 4635 }, { "epoch": 0.7613573378769528, "grad_norm": 0.315306221932674, "learning_rate": 8.548323484884457e-06, "loss": 0.5548, "step": 4636 }, { "epoch": 0.7615215650852956, "grad_norm": 0.3346000660174875, "learning_rate": 8.548118682299237e-06, "loss": 0.5271, "step": 4637 }, { "epoch": 0.7616857922936382, "grad_norm": 0.4439712033523865, "learning_rate": 8.547913835747465e-06, "loss": 0.5002, "step": 4638 }, { "epoch": 0.761850019501981, "grad_norm": 0.31153812213147325, "learning_rate": 8.547708945231369e-06, "loss": 0.52, "step": 4639 }, { "epoch": 0.7620142467103237, "grad_norm": 0.340893810594224, "learning_rate": 8.54750401075317e-06, "loss": 0.518, "step": 4640 }, { "epoch": 0.7621784739186664, "grad_norm": 0.2955501959425951, "learning_rate": 8.547299032315092e-06, "loss": 0.5409, "step": 4641 }, { "epoch": 0.7623427011270092, "grad_norm": 0.28838767397812765, "learning_rate": 8.547094009919367e-06, "loss": 0.4977, "step": 4642 }, { "epoch": 0.7625069283353519, "grad_norm": 0.36043013999166806, "learning_rate": 8.546888943568222e-06, "loss": 0.5148, "step": 4643 }, { "epoch": 0.7626711555436947, "grad_norm": 0.2841530706289285, "learning_rate": 8.546683833263877e-06, "loss": 0.5261, "step": 4644 }, { "epoch": 0.7628353827520374, "grad_norm": 0.3517544924162603, "learning_rate": 8.546478679008567e-06, "loss": 0.5239, "step": 4645 }, { "epoch": 0.7629996099603802, "grad_norm": 0.3226642978400009, "learning_rate": 8.546273480804516e-06, "loss": 0.5214, "step": 4646 }, { "epoch": 0.7631638371687229, "grad_norm": 0.3350012172177672, "learning_rate": 8.546068238653956e-06, "loss": 0.5265, "step": 4647 }, { "epoch": 0.7633280643770657, "grad_norm": 0.3365204860158755, "learning_rate": 8.54586295255911e-06, "loss": 0.5224, "step": 4648 }, { "epoch": 0.7634922915854084, "grad_norm": 0.30134315958920094, "learning_rate": 8.545657622522215e-06, "loss": 0.543, "step": 4649 }, { "epoch": 0.7636565187937512, "grad_norm": 0.33019521783542843, "learning_rate": 8.545452248545498e-06, "loss": 0.5183, "step": 4650 }, { "epoch": 0.7638207460020939, "grad_norm": 0.37232931232505206, "learning_rate": 8.545246830631188e-06, "loss": 0.5175, "step": 4651 }, { "epoch": 0.7639849732104367, "grad_norm": 0.32383868334851396, "learning_rate": 8.545041368781517e-06, "loss": 0.5364, "step": 4652 }, { "epoch": 0.7641492004187794, "grad_norm": 0.44489749436817166, "learning_rate": 8.544835862998718e-06, "loss": 0.5201, "step": 4653 }, { "epoch": 0.7643134276271222, "grad_norm": 0.3077556378016318, "learning_rate": 8.544630313285022e-06, "loss": 0.5124, "step": 4654 }, { "epoch": 0.7644776548354648, "grad_norm": 0.4603259411059813, "learning_rate": 8.544424719642661e-06, "loss": 0.5183, "step": 4655 }, { "epoch": 0.7646418820438076, "grad_norm": 0.2832805233868871, "learning_rate": 8.54421908207387e-06, "loss": 0.53, "step": 4656 }, { "epoch": 0.7648061092521503, "grad_norm": 0.29170501882490907, "learning_rate": 8.54401340058088e-06, "loss": 0.5308, "step": 4657 }, { "epoch": 0.7649703364604931, "grad_norm": 0.2950625968091386, "learning_rate": 8.543807675165924e-06, "loss": 0.5321, "step": 4658 }, { "epoch": 0.7651345636688358, "grad_norm": 0.3632685280310422, "learning_rate": 8.543601905831239e-06, "loss": 0.5116, "step": 4659 }, { "epoch": 0.7652987908771786, "grad_norm": 0.3393323236915578, "learning_rate": 8.54339609257906e-06, "loss": 0.5123, "step": 4660 }, { "epoch": 0.7654630180855213, "grad_norm": 0.4729462883437287, "learning_rate": 8.543190235411619e-06, "loss": 0.519, "step": 4661 }, { "epoch": 0.7656272452938641, "grad_norm": 0.3293233461617647, "learning_rate": 8.542984334331155e-06, "loss": 0.5236, "step": 4662 }, { "epoch": 0.7657914725022068, "grad_norm": 0.36363391812587026, "learning_rate": 8.542778389339906e-06, "loss": 0.5094, "step": 4663 }, { "epoch": 0.7659556997105496, "grad_norm": 0.30508096976913734, "learning_rate": 8.542572400440103e-06, "loss": 0.5097, "step": 4664 }, { "epoch": 0.7661199269188923, "grad_norm": 0.31527353025695465, "learning_rate": 8.542366367633988e-06, "loss": 0.5233, "step": 4665 }, { "epoch": 0.766284154127235, "grad_norm": 0.3002322251825842, "learning_rate": 8.542160290923796e-06, "loss": 0.4995, "step": 4666 }, { "epoch": 0.7664483813355778, "grad_norm": 0.35868121348697857, "learning_rate": 8.541954170311768e-06, "loss": 0.4835, "step": 4667 }, { "epoch": 0.7666126085439205, "grad_norm": 0.2816569076089125, "learning_rate": 8.541748005800139e-06, "loss": 0.5175, "step": 4668 }, { "epoch": 0.7667768357522633, "grad_norm": 0.2794544666938337, "learning_rate": 8.54154179739115e-06, "loss": 0.5097, "step": 4669 }, { "epoch": 0.766941062960606, "grad_norm": 0.29808345975734646, "learning_rate": 8.541335545087043e-06, "loss": 0.4996, "step": 4670 }, { "epoch": 0.7671052901689488, "grad_norm": 0.28395721446353817, "learning_rate": 8.541129248890053e-06, "loss": 0.5332, "step": 4671 }, { "epoch": 0.7672695173772914, "grad_norm": 0.3514233085142723, "learning_rate": 8.540922908802425e-06, "loss": 0.4993, "step": 4672 }, { "epoch": 0.7674337445856342, "grad_norm": 0.41429693067925427, "learning_rate": 8.540716524826398e-06, "loss": 0.5151, "step": 4673 }, { "epoch": 0.7675979717939769, "grad_norm": 0.3520524597657039, "learning_rate": 8.540510096964215e-06, "loss": 0.5223, "step": 4674 }, { "epoch": 0.7677621990023197, "grad_norm": 0.3328068751348965, "learning_rate": 8.540303625218115e-06, "loss": 0.5111, "step": 4675 }, { "epoch": 0.7679264262106624, "grad_norm": 0.31064697105471334, "learning_rate": 8.540097109590346e-06, "loss": 0.5042, "step": 4676 }, { "epoch": 0.7680906534190052, "grad_norm": 0.39821289781714075, "learning_rate": 8.539890550083144e-06, "loss": 0.5076, "step": 4677 }, { "epoch": 0.7682548806273479, "grad_norm": 0.3141655423218781, "learning_rate": 8.539683946698758e-06, "loss": 0.5134, "step": 4678 }, { "epoch": 0.7684191078356907, "grad_norm": 0.46016224300055375, "learning_rate": 8.539477299439429e-06, "loss": 0.5196, "step": 4679 }, { "epoch": 0.7685833350440334, "grad_norm": 0.4735631016426876, "learning_rate": 8.539270608307402e-06, "loss": 0.5306, "step": 4680 }, { "epoch": 0.7687475622523762, "grad_norm": 0.2952547172783303, "learning_rate": 8.539063873304922e-06, "loss": 0.4949, "step": 4681 }, { "epoch": 0.7689117894607189, "grad_norm": 0.3079803366854746, "learning_rate": 8.538857094434234e-06, "loss": 0.5281, "step": 4682 }, { "epoch": 0.7690760166690617, "grad_norm": 0.3159192970003273, "learning_rate": 8.538650271697586e-06, "loss": 0.49, "step": 4683 }, { "epoch": 0.7692402438774044, "grad_norm": 0.31282817662971113, "learning_rate": 8.538443405097223e-06, "loss": 0.5307, "step": 4684 }, { "epoch": 0.7694044710857472, "grad_norm": 0.32242372926036034, "learning_rate": 8.538236494635389e-06, "loss": 0.5396, "step": 4685 }, { "epoch": 0.7695686982940899, "grad_norm": 0.48535860275289644, "learning_rate": 8.538029540314334e-06, "loss": 0.5167, "step": 4686 }, { "epoch": 0.7697329255024327, "grad_norm": 0.30109361527460454, "learning_rate": 8.537822542136306e-06, "loss": 0.5317, "step": 4687 }, { "epoch": 0.7698971527107754, "grad_norm": 0.4440781157107436, "learning_rate": 8.537615500103553e-06, "loss": 0.5044, "step": 4688 }, { "epoch": 0.770061379919118, "grad_norm": 0.3322199954679528, "learning_rate": 8.537408414218323e-06, "loss": 0.5235, "step": 4689 }, { "epoch": 0.7702256071274608, "grad_norm": 0.2736769475032565, "learning_rate": 8.537201284482864e-06, "loss": 0.5071, "step": 4690 }, { "epoch": 0.7703898343358035, "grad_norm": 0.3251526213165742, "learning_rate": 8.536994110899428e-06, "loss": 0.4937, "step": 4691 }, { "epoch": 0.7705540615441463, "grad_norm": 0.3412682423457841, "learning_rate": 8.536786893470264e-06, "loss": 0.5148, "step": 4692 }, { "epoch": 0.770718288752489, "grad_norm": 0.3158500123302273, "learning_rate": 8.536579632197622e-06, "loss": 0.4904, "step": 4693 }, { "epoch": 0.7708825159608318, "grad_norm": 0.38296860314735637, "learning_rate": 8.536372327083755e-06, "loss": 0.5299, "step": 4694 }, { "epoch": 0.7710467431691745, "grad_norm": 0.31222738840079106, "learning_rate": 8.536164978130913e-06, "loss": 0.535, "step": 4695 }, { "epoch": 0.7712109703775173, "grad_norm": 0.843025114686952, "learning_rate": 8.535957585341349e-06, "loss": 0.5245, "step": 4696 }, { "epoch": 0.77137519758586, "grad_norm": 0.32381269833214565, "learning_rate": 8.535750148717312e-06, "loss": 0.5388, "step": 4697 }, { "epoch": 0.7715394247942028, "grad_norm": 0.41423456394537234, "learning_rate": 8.53554266826106e-06, "loss": 0.5049, "step": 4698 }, { "epoch": 0.7717036520025455, "grad_norm": 0.2918748934972015, "learning_rate": 8.535335143974844e-06, "loss": 0.5151, "step": 4699 }, { "epoch": 0.7718678792108883, "grad_norm": 0.2785197474123093, "learning_rate": 8.535127575860917e-06, "loss": 0.523, "step": 4700 }, { "epoch": 0.772032106419231, "grad_norm": 0.4162412664492259, "learning_rate": 8.534919963921536e-06, "loss": 0.5231, "step": 4701 }, { "epoch": 0.7721963336275738, "grad_norm": 0.2769221462291233, "learning_rate": 8.534712308158954e-06, "loss": 0.5286, "step": 4702 }, { "epoch": 0.7723605608359165, "grad_norm": 0.30009896167616434, "learning_rate": 8.534504608575426e-06, "loss": 0.5155, "step": 4703 }, { "epoch": 0.7725247880442593, "grad_norm": 0.28533441325104786, "learning_rate": 8.53429686517321e-06, "loss": 0.4966, "step": 4704 }, { "epoch": 0.772689015252602, "grad_norm": 1.2238805912739166, "learning_rate": 8.534089077954558e-06, "loss": 0.5149, "step": 4705 }, { "epoch": 0.7728532424609447, "grad_norm": 0.29952221829630815, "learning_rate": 8.533881246921732e-06, "loss": 0.5181, "step": 4706 }, { "epoch": 0.7730174696692874, "grad_norm": 0.320317196375448, "learning_rate": 8.533673372076987e-06, "loss": 0.492, "step": 4707 }, { "epoch": 0.7731816968776302, "grad_norm": 0.3663326715952758, "learning_rate": 8.53346545342258e-06, "loss": 0.5393, "step": 4708 }, { "epoch": 0.7733459240859729, "grad_norm": 0.3036228367230328, "learning_rate": 8.533257490960768e-06, "loss": 0.5426, "step": 4709 }, { "epoch": 0.7735101512943157, "grad_norm": 0.2865700530075475, "learning_rate": 8.533049484693813e-06, "loss": 0.5117, "step": 4710 }, { "epoch": 0.7736743785026584, "grad_norm": 0.31601286020177577, "learning_rate": 8.532841434623974e-06, "loss": 0.533, "step": 4711 }, { "epoch": 0.7738386057110012, "grad_norm": 0.3364380677143077, "learning_rate": 8.532633340753507e-06, "loss": 0.53, "step": 4712 }, { "epoch": 0.7740028329193439, "grad_norm": 0.35339859314853217, "learning_rate": 8.532425203084675e-06, "loss": 0.5104, "step": 4713 }, { "epoch": 0.7741670601276867, "grad_norm": 0.5951590155877503, "learning_rate": 8.532217021619738e-06, "loss": 0.5445, "step": 4714 }, { "epoch": 0.7743312873360294, "grad_norm": 0.29684524677217816, "learning_rate": 8.532008796360957e-06, "loss": 0.515, "step": 4715 }, { "epoch": 0.7744955145443722, "grad_norm": 0.3610512910167456, "learning_rate": 8.531800527310594e-06, "loss": 0.4889, "step": 4716 }, { "epoch": 0.7746597417527149, "grad_norm": 0.2866312562125442, "learning_rate": 8.53159221447091e-06, "loss": 0.5028, "step": 4717 }, { "epoch": 0.7748239689610577, "grad_norm": 0.2883691284842144, "learning_rate": 8.531383857844169e-06, "loss": 0.5257, "step": 4718 }, { "epoch": 0.7749881961694004, "grad_norm": 0.32116600915112437, "learning_rate": 8.53117545743263e-06, "loss": 0.511, "step": 4719 }, { "epoch": 0.7751524233777431, "grad_norm": 0.36579551244838965, "learning_rate": 8.530967013238562e-06, "loss": 0.5279, "step": 4720 }, { "epoch": 0.7753166505860859, "grad_norm": 0.335274958914863, "learning_rate": 8.530758525264226e-06, "loss": 0.5219, "step": 4721 }, { "epoch": 0.7754808777944286, "grad_norm": 0.35908456979911946, "learning_rate": 8.530549993511886e-06, "loss": 0.5122, "step": 4722 }, { "epoch": 0.7756451050027713, "grad_norm": 0.3303814149214648, "learning_rate": 8.53034141798381e-06, "loss": 0.5188, "step": 4723 }, { "epoch": 0.775809332211114, "grad_norm": 0.552579852685583, "learning_rate": 8.530132798682258e-06, "loss": 0.5499, "step": 4724 }, { "epoch": 0.7759735594194568, "grad_norm": 0.6038296709572685, "learning_rate": 8.529924135609499e-06, "loss": 0.5315, "step": 4725 }, { "epoch": 0.7761377866277995, "grad_norm": 0.3259739996544789, "learning_rate": 8.5297154287678e-06, "loss": 0.4987, "step": 4726 }, { "epoch": 0.7763020138361423, "grad_norm": 0.2899897486092834, "learning_rate": 8.529506678159426e-06, "loss": 0.5278, "step": 4727 }, { "epoch": 0.776466241044485, "grad_norm": 0.2904572716926567, "learning_rate": 8.529297883786645e-06, "loss": 0.501, "step": 4728 }, { "epoch": 0.7766304682528278, "grad_norm": 0.31236889089317593, "learning_rate": 8.529089045651726e-06, "loss": 0.4995, "step": 4729 }, { "epoch": 0.7767946954611705, "grad_norm": 0.38956907875588986, "learning_rate": 8.528880163756935e-06, "loss": 0.5115, "step": 4730 }, { "epoch": 0.7769589226695133, "grad_norm": 0.34404235566924035, "learning_rate": 8.52867123810454e-06, "loss": 0.5264, "step": 4731 }, { "epoch": 0.777123149877856, "grad_norm": 0.3443712963310854, "learning_rate": 8.528462268696812e-06, "loss": 0.512, "step": 4732 }, { "epoch": 0.7772873770861988, "grad_norm": 0.28851683888501506, "learning_rate": 8.528253255536022e-06, "loss": 0.504, "step": 4733 }, { "epoch": 0.7774516042945415, "grad_norm": 0.3181508096343241, "learning_rate": 8.528044198624438e-06, "loss": 0.5244, "step": 4734 }, { "epoch": 0.7776158315028843, "grad_norm": 0.3867330975953575, "learning_rate": 8.527835097964331e-06, "loss": 0.5271, "step": 4735 }, { "epoch": 0.777780058711227, "grad_norm": 0.3066079602945166, "learning_rate": 8.527625953557972e-06, "loss": 0.4989, "step": 4736 }, { "epoch": 0.7779442859195698, "grad_norm": 0.3851892733779904, "learning_rate": 8.527416765407633e-06, "loss": 0.5198, "step": 4737 }, { "epoch": 0.7781085131279125, "grad_norm": 0.31990920459193956, "learning_rate": 8.527207533515583e-06, "loss": 0.5015, "step": 4738 }, { "epoch": 0.7782727403362553, "grad_norm": 0.2720719422478482, "learning_rate": 8.5269982578841e-06, "loss": 0.4928, "step": 4739 }, { "epoch": 0.7784369675445979, "grad_norm": 0.7093090861871565, "learning_rate": 8.526788938515451e-06, "loss": 0.511, "step": 4740 }, { "epoch": 0.7786011947529407, "grad_norm": 0.28949201274533776, "learning_rate": 8.526579575411914e-06, "loss": 0.5134, "step": 4741 }, { "epoch": 0.7787654219612834, "grad_norm": 0.41192958031795, "learning_rate": 8.526370168575762e-06, "loss": 0.5001, "step": 4742 }, { "epoch": 0.7789296491696261, "grad_norm": 0.3955354645563975, "learning_rate": 8.526160718009267e-06, "loss": 0.4977, "step": 4743 }, { "epoch": 0.7790938763779689, "grad_norm": 0.27548066782513797, "learning_rate": 8.525951223714705e-06, "loss": 0.5406, "step": 4744 }, { "epoch": 0.7792581035863116, "grad_norm": 0.34792018231763794, "learning_rate": 8.525741685694353e-06, "loss": 0.5035, "step": 4745 }, { "epoch": 0.7794223307946544, "grad_norm": 0.2558824257167838, "learning_rate": 8.525532103950485e-06, "loss": 0.5181, "step": 4746 }, { "epoch": 0.7795865580029971, "grad_norm": 0.37147364997022847, "learning_rate": 8.52532247848538e-06, "loss": 0.5063, "step": 4747 }, { "epoch": 0.7797507852113399, "grad_norm": 0.26862453917752677, "learning_rate": 8.525112809301308e-06, "loss": 0.5384, "step": 4748 }, { "epoch": 0.7799150124196826, "grad_norm": 0.31769694408212135, "learning_rate": 8.524903096400554e-06, "loss": 0.5187, "step": 4749 }, { "epoch": 0.7800792396280254, "grad_norm": 0.298163868790542, "learning_rate": 8.524693339785392e-06, "loss": 0.507, "step": 4750 }, { "epoch": 0.7802434668363681, "grad_norm": 0.2933975935484392, "learning_rate": 8.524483539458099e-06, "loss": 0.5265, "step": 4751 }, { "epoch": 0.7804076940447109, "grad_norm": 0.3179903130943498, "learning_rate": 8.524273695420957e-06, "loss": 0.5126, "step": 4752 }, { "epoch": 0.7805719212530536, "grad_norm": 0.26716291057570807, "learning_rate": 8.524063807676241e-06, "loss": 0.5204, "step": 4753 }, { "epoch": 0.7807361484613964, "grad_norm": 0.31106935340054614, "learning_rate": 8.523853876226236e-06, "loss": 0.5321, "step": 4754 }, { "epoch": 0.7809003756697391, "grad_norm": 0.31817775903788903, "learning_rate": 8.523643901073217e-06, "loss": 0.5051, "step": 4755 }, { "epoch": 0.7810646028780819, "grad_norm": 0.3039687088510633, "learning_rate": 8.523433882219467e-06, "loss": 0.4994, "step": 4756 }, { "epoch": 0.7812288300864245, "grad_norm": 0.2839010503466107, "learning_rate": 8.523223819667267e-06, "loss": 0.5216, "step": 4757 }, { "epoch": 0.7813930572947673, "grad_norm": 0.4120389386069269, "learning_rate": 8.523013713418897e-06, "loss": 0.481, "step": 4758 }, { "epoch": 0.78155728450311, "grad_norm": 0.3062612421267718, "learning_rate": 8.522803563476641e-06, "loss": 0.4983, "step": 4759 }, { "epoch": 0.7817215117114528, "grad_norm": 0.28296796391139384, "learning_rate": 8.52259336984278e-06, "loss": 0.5013, "step": 4760 }, { "epoch": 0.7818857389197955, "grad_norm": 0.3046360962097441, "learning_rate": 8.522383132519597e-06, "loss": 0.53, "step": 4761 }, { "epoch": 0.7820499661281383, "grad_norm": 0.3142566255762791, "learning_rate": 8.522172851509375e-06, "loss": 0.5181, "step": 4762 }, { "epoch": 0.782214193336481, "grad_norm": 0.33770779393011047, "learning_rate": 8.5219625268144e-06, "loss": 0.5003, "step": 4763 }, { "epoch": 0.7823784205448238, "grad_norm": 0.35718367865642603, "learning_rate": 8.521752158436954e-06, "loss": 0.4931, "step": 4764 }, { "epoch": 0.7825426477531665, "grad_norm": 0.3941435431475404, "learning_rate": 8.521541746379323e-06, "loss": 0.5216, "step": 4765 }, { "epoch": 0.7827068749615093, "grad_norm": 0.2865143701939515, "learning_rate": 8.521331290643791e-06, "loss": 0.5063, "step": 4766 }, { "epoch": 0.782871102169852, "grad_norm": 0.28411967137007504, "learning_rate": 8.521120791232646e-06, "loss": 0.5222, "step": 4767 }, { "epoch": 0.7830353293781948, "grad_norm": 0.3053542974959439, "learning_rate": 8.520910248148174e-06, "loss": 0.5365, "step": 4768 }, { "epoch": 0.7831995565865375, "grad_norm": 0.3912844265585773, "learning_rate": 8.52069966139266e-06, "loss": 0.5257, "step": 4769 }, { "epoch": 0.7833637837948803, "grad_norm": 0.34825980844701676, "learning_rate": 8.52048903096839e-06, "loss": 0.5452, "step": 4770 }, { "epoch": 0.783528011003223, "grad_norm": 0.2853114021603045, "learning_rate": 8.520278356877654e-06, "loss": 0.5073, "step": 4771 }, { "epoch": 0.7836922382115658, "grad_norm": 0.3703869120538064, "learning_rate": 8.52006763912274e-06, "loss": 0.4969, "step": 4772 }, { "epoch": 0.7838564654199084, "grad_norm": 0.27611234565761733, "learning_rate": 8.519856877705937e-06, "loss": 0.5344, "step": 4773 }, { "epoch": 0.7840206926282511, "grad_norm": 0.28744438714816317, "learning_rate": 8.519646072629533e-06, "loss": 0.4983, "step": 4774 }, { "epoch": 0.7841849198365939, "grad_norm": 0.3552014651411184, "learning_rate": 8.519435223895817e-06, "loss": 0.5018, "step": 4775 }, { "epoch": 0.7843491470449366, "grad_norm": 0.2757339930350272, "learning_rate": 8.519224331507081e-06, "loss": 0.5222, "step": 4776 }, { "epoch": 0.7845133742532794, "grad_norm": 0.3417843165815299, "learning_rate": 8.519013395465614e-06, "loss": 0.5093, "step": 4777 }, { "epoch": 0.7846776014616221, "grad_norm": 0.3300629702593418, "learning_rate": 8.518802415773707e-06, "loss": 0.5405, "step": 4778 }, { "epoch": 0.7848418286699649, "grad_norm": 0.3143676247665393, "learning_rate": 8.518591392433653e-06, "loss": 0.5189, "step": 4779 }, { "epoch": 0.7850060558783076, "grad_norm": 0.33319228445074417, "learning_rate": 8.518380325447741e-06, "loss": 0.5412, "step": 4780 }, { "epoch": 0.7851702830866504, "grad_norm": 0.5097647567354313, "learning_rate": 8.518169214818265e-06, "loss": 0.4978, "step": 4781 }, { "epoch": 0.7853345102949931, "grad_norm": 0.31219081932027304, "learning_rate": 8.51795806054752e-06, "loss": 0.5069, "step": 4782 }, { "epoch": 0.7854987375033359, "grad_norm": 0.3588349702401945, "learning_rate": 8.517746862637797e-06, "loss": 0.5079, "step": 4783 }, { "epoch": 0.7856629647116786, "grad_norm": 0.3102582288830573, "learning_rate": 8.517535621091388e-06, "loss": 0.5205, "step": 4784 }, { "epoch": 0.7858271919200214, "grad_norm": 0.2918198592895266, "learning_rate": 8.517324335910591e-06, "loss": 0.5156, "step": 4785 }, { "epoch": 0.7859914191283641, "grad_norm": 0.33430126386274583, "learning_rate": 8.5171130070977e-06, "loss": 0.5051, "step": 4786 }, { "epoch": 0.7861556463367069, "grad_norm": 0.3254535703108297, "learning_rate": 8.516901634655008e-06, "loss": 0.5253, "step": 4787 }, { "epoch": 0.7863198735450496, "grad_norm": 0.3484996970943442, "learning_rate": 8.516690218584811e-06, "loss": 0.5093, "step": 4788 }, { "epoch": 0.7864841007533924, "grad_norm": 0.263519147067759, "learning_rate": 8.51647875888941e-06, "loss": 0.5106, "step": 4789 }, { "epoch": 0.786648327961735, "grad_norm": 0.45546638774831777, "learning_rate": 8.516267255571094e-06, "loss": 0.5176, "step": 4790 }, { "epoch": 0.7868125551700778, "grad_norm": 0.3449889074977041, "learning_rate": 8.516055708632166e-06, "loss": 0.5301, "step": 4791 }, { "epoch": 0.7869767823784205, "grad_norm": 0.3698943802374998, "learning_rate": 8.515844118074923e-06, "loss": 0.5229, "step": 4792 }, { "epoch": 0.7871410095867633, "grad_norm": 0.33462868726553185, "learning_rate": 8.51563248390166e-06, "loss": 0.5161, "step": 4793 }, { "epoch": 0.787305236795106, "grad_norm": 0.3123568050312096, "learning_rate": 8.515420806114677e-06, "loss": 0.5109, "step": 4794 }, { "epoch": 0.7874694640034487, "grad_norm": 0.34422885124330654, "learning_rate": 8.515209084716275e-06, "loss": 0.4919, "step": 4795 }, { "epoch": 0.7876336912117915, "grad_norm": 0.3889682489845154, "learning_rate": 8.514997319708751e-06, "loss": 0.4851, "step": 4796 }, { "epoch": 0.7877979184201342, "grad_norm": 0.36985116050350514, "learning_rate": 8.514785511094408e-06, "loss": 0.5292, "step": 4797 }, { "epoch": 0.787962145628477, "grad_norm": 0.32305288714276936, "learning_rate": 8.514573658875541e-06, "loss": 0.4837, "step": 4798 }, { "epoch": 0.7881263728368197, "grad_norm": 0.41138982582893036, "learning_rate": 8.514361763054456e-06, "loss": 0.5034, "step": 4799 }, { "epoch": 0.7882906000451625, "grad_norm": 0.35851481494533577, "learning_rate": 8.514149823633453e-06, "loss": 0.53, "step": 4800 }, { "epoch": 0.7884548272535052, "grad_norm": 0.3239257397621457, "learning_rate": 8.513937840614832e-06, "loss": 0.5206, "step": 4801 }, { "epoch": 0.788619054461848, "grad_norm": 0.49352899282790785, "learning_rate": 8.513725814000898e-06, "loss": 0.5201, "step": 4802 }, { "epoch": 0.7887832816701907, "grad_norm": 0.29476675219482074, "learning_rate": 8.513513743793954e-06, "loss": 0.4968, "step": 4803 }, { "epoch": 0.7889475088785335, "grad_norm": 0.3763905524883607, "learning_rate": 8.513301629996299e-06, "loss": 0.4996, "step": 4804 }, { "epoch": 0.7891117360868762, "grad_norm": 0.5222654404707635, "learning_rate": 8.513089472610242e-06, "loss": 0.5233, "step": 4805 }, { "epoch": 0.789275963295219, "grad_norm": 0.33072041061279667, "learning_rate": 8.512877271638084e-06, "loss": 0.4858, "step": 4806 }, { "epoch": 0.7894401905035616, "grad_norm": 0.28791095300849756, "learning_rate": 8.51266502708213e-06, "loss": 0.5171, "step": 4807 }, { "epoch": 0.7896044177119044, "grad_norm": 0.29139790932119547, "learning_rate": 8.512452738944686e-06, "loss": 0.5295, "step": 4808 }, { "epoch": 0.7897686449202471, "grad_norm": 0.2964080244353324, "learning_rate": 8.51224040722806e-06, "loss": 0.4971, "step": 4809 }, { "epoch": 0.7899328721285899, "grad_norm": 0.32197530233101723, "learning_rate": 8.512028031934554e-06, "loss": 0.4932, "step": 4810 }, { "epoch": 0.7900970993369326, "grad_norm": 0.2843080931858888, "learning_rate": 8.511815613066475e-06, "loss": 0.5005, "step": 4811 }, { "epoch": 0.7902613265452754, "grad_norm": 0.535477832250208, "learning_rate": 8.511603150626132e-06, "loss": 0.5186, "step": 4812 }, { "epoch": 0.7904255537536181, "grad_norm": 0.29692623047749966, "learning_rate": 8.511390644615833e-06, "loss": 0.5124, "step": 4813 }, { "epoch": 0.7905897809619609, "grad_norm": 0.8690043401486638, "learning_rate": 8.511178095037885e-06, "loss": 0.5288, "step": 4814 }, { "epoch": 0.7907540081703036, "grad_norm": 0.3356252094797372, "learning_rate": 8.510965501894595e-06, "loss": 0.5202, "step": 4815 }, { "epoch": 0.7909182353786464, "grad_norm": 0.2574155639032546, "learning_rate": 8.510752865188275e-06, "loss": 0.4958, "step": 4816 }, { "epoch": 0.7910824625869891, "grad_norm": 0.26080904931135435, "learning_rate": 8.510540184921233e-06, "loss": 0.4926, "step": 4817 }, { "epoch": 0.7912466897953319, "grad_norm": 0.3428360730324517, "learning_rate": 8.510327461095777e-06, "loss": 0.5051, "step": 4818 }, { "epoch": 0.7914109170036746, "grad_norm": 0.4252710145129881, "learning_rate": 8.510114693714219e-06, "loss": 0.5369, "step": 4819 }, { "epoch": 0.7915751442120174, "grad_norm": 0.29726020666126923, "learning_rate": 8.509901882778872e-06, "loss": 0.4963, "step": 4820 }, { "epoch": 0.7917393714203601, "grad_norm": 0.34172269458973087, "learning_rate": 8.509689028292045e-06, "loss": 0.539, "step": 4821 }, { "epoch": 0.7919035986287029, "grad_norm": 0.30840633192280165, "learning_rate": 8.509476130256049e-06, "loss": 0.4956, "step": 4822 }, { "epoch": 0.7920678258370456, "grad_norm": 0.27131424112280017, "learning_rate": 8.509263188673198e-06, "loss": 0.5096, "step": 4823 }, { "epoch": 0.7922320530453882, "grad_norm": 0.3743847578318213, "learning_rate": 8.509050203545803e-06, "loss": 0.5065, "step": 4824 }, { "epoch": 0.792396280253731, "grad_norm": 0.3795053796958781, "learning_rate": 8.50883717487618e-06, "loss": 0.5179, "step": 4825 }, { "epoch": 0.7925605074620737, "grad_norm": 0.30026452426018335, "learning_rate": 8.508624102666642e-06, "loss": 0.5137, "step": 4826 }, { "epoch": 0.7927247346704165, "grad_norm": 0.31326093789198417, "learning_rate": 8.508410986919499e-06, "loss": 0.5146, "step": 4827 }, { "epoch": 0.7928889618787592, "grad_norm": 0.3620854669484693, "learning_rate": 8.508197827637073e-06, "loss": 0.5227, "step": 4828 }, { "epoch": 0.793053189087102, "grad_norm": 0.3014049980906719, "learning_rate": 8.507984624821672e-06, "loss": 0.5094, "step": 4829 }, { "epoch": 0.7932174162954447, "grad_norm": 0.3847631333967564, "learning_rate": 8.507771378475614e-06, "loss": 0.5218, "step": 4830 }, { "epoch": 0.7933816435037875, "grad_norm": 0.2873710411856193, "learning_rate": 8.507558088601218e-06, "loss": 0.4793, "step": 4831 }, { "epoch": 0.7935458707121302, "grad_norm": 0.2906589330262389, "learning_rate": 8.507344755200797e-06, "loss": 0.5164, "step": 4832 }, { "epoch": 0.793710097920473, "grad_norm": 0.2843661884302468, "learning_rate": 8.507131378276671e-06, "loss": 0.5149, "step": 4833 }, { "epoch": 0.7938743251288157, "grad_norm": 0.3119387145742007, "learning_rate": 8.506917957831153e-06, "loss": 0.4946, "step": 4834 }, { "epoch": 0.7940385523371585, "grad_norm": 1.3155836455625542, "learning_rate": 8.506704493866567e-06, "loss": 0.5234, "step": 4835 }, { "epoch": 0.7942027795455012, "grad_norm": 0.33312679287940616, "learning_rate": 8.506490986385225e-06, "loss": 0.503, "step": 4836 }, { "epoch": 0.794367006753844, "grad_norm": 0.2994563085360433, "learning_rate": 8.506277435389452e-06, "loss": 0.5368, "step": 4837 }, { "epoch": 0.7945312339621867, "grad_norm": 0.37734204973357055, "learning_rate": 8.506063840881562e-06, "loss": 0.5047, "step": 4838 }, { "epoch": 0.7946954611705295, "grad_norm": 0.3317028490234112, "learning_rate": 8.505850202863878e-06, "loss": 0.5231, "step": 4839 }, { "epoch": 0.7948596883788722, "grad_norm": 0.3180236812745663, "learning_rate": 8.50563652133872e-06, "loss": 0.498, "step": 4840 }, { "epoch": 0.7950239155872149, "grad_norm": 0.31286873557820605, "learning_rate": 8.505422796308408e-06, "loss": 0.5154, "step": 4841 }, { "epoch": 0.7951881427955576, "grad_norm": 0.3322648618516919, "learning_rate": 8.505209027775263e-06, "loss": 0.5144, "step": 4842 }, { "epoch": 0.7953523700039004, "grad_norm": 0.3246666933401929, "learning_rate": 8.504995215741608e-06, "loss": 0.509, "step": 4843 }, { "epoch": 0.7955165972122431, "grad_norm": 0.29849518101027295, "learning_rate": 8.504781360209765e-06, "loss": 0.5007, "step": 4844 }, { "epoch": 0.7956808244205859, "grad_norm": 0.5686947232248678, "learning_rate": 8.504567461182056e-06, "loss": 0.5184, "step": 4845 }, { "epoch": 0.7958450516289286, "grad_norm": 0.3001488106983265, "learning_rate": 8.504353518660804e-06, "loss": 0.5114, "step": 4846 }, { "epoch": 0.7960092788372714, "grad_norm": 0.2899684109515469, "learning_rate": 8.504139532648333e-06, "loss": 0.5319, "step": 4847 }, { "epoch": 0.7961735060456141, "grad_norm": 0.34811202789673956, "learning_rate": 8.503925503146968e-06, "loss": 0.5048, "step": 4848 }, { "epoch": 0.7963377332539568, "grad_norm": 0.28213312593019935, "learning_rate": 8.503711430159031e-06, "loss": 0.5225, "step": 4849 }, { "epoch": 0.7965019604622996, "grad_norm": 0.31306875930476336, "learning_rate": 8.50349731368685e-06, "loss": 0.5086, "step": 4850 }, { "epoch": 0.7966661876706423, "grad_norm": 0.2767900885059024, "learning_rate": 8.50328315373275e-06, "loss": 0.5246, "step": 4851 }, { "epoch": 0.7968304148789851, "grad_norm": 0.3163293048450247, "learning_rate": 8.503068950299054e-06, "loss": 0.5234, "step": 4852 }, { "epoch": 0.7969946420873278, "grad_norm": 0.337058184039474, "learning_rate": 8.502854703388094e-06, "loss": 0.5123, "step": 4853 }, { "epoch": 0.7971588692956706, "grad_norm": 0.5143054872690204, "learning_rate": 8.502640413002191e-06, "loss": 0.5047, "step": 4854 }, { "epoch": 0.7973230965040133, "grad_norm": 0.4630950984648018, "learning_rate": 8.502426079143675e-06, "loss": 0.5085, "step": 4855 }, { "epoch": 0.7974873237123561, "grad_norm": 0.329732576829339, "learning_rate": 8.502211701814876e-06, "loss": 0.5182, "step": 4856 }, { "epoch": 0.7976515509206988, "grad_norm": 0.3156590174293186, "learning_rate": 8.501997281018118e-06, "loss": 0.5087, "step": 4857 }, { "epoch": 0.7978157781290415, "grad_norm": 0.3275346397983897, "learning_rate": 8.501782816755732e-06, "loss": 0.5282, "step": 4858 }, { "epoch": 0.7979800053373842, "grad_norm": 0.3255908453673845, "learning_rate": 8.50156830903005e-06, "loss": 0.534, "step": 4859 }, { "epoch": 0.798144232545727, "grad_norm": 0.41658664353399205, "learning_rate": 8.501353757843398e-06, "loss": 0.5289, "step": 4860 }, { "epoch": 0.7983084597540697, "grad_norm": 0.33432186201094094, "learning_rate": 8.501139163198106e-06, "loss": 0.4954, "step": 4861 }, { "epoch": 0.7984726869624125, "grad_norm": 0.3844040400667963, "learning_rate": 8.500924525096508e-06, "loss": 0.5358, "step": 4862 }, { "epoch": 0.7986369141707552, "grad_norm": 0.28735140226035877, "learning_rate": 8.500709843540931e-06, "loss": 0.5387, "step": 4863 }, { "epoch": 0.798801141379098, "grad_norm": 0.4156195104655946, "learning_rate": 8.500495118533712e-06, "loss": 0.5146, "step": 4864 }, { "epoch": 0.7989653685874407, "grad_norm": 0.4438432023377831, "learning_rate": 8.500280350077176e-06, "loss": 0.5063, "step": 4865 }, { "epoch": 0.7991295957957835, "grad_norm": 0.313254971038212, "learning_rate": 8.500065538173663e-06, "loss": 0.5168, "step": 4866 }, { "epoch": 0.7992938230041262, "grad_norm": 0.31318752813737577, "learning_rate": 8.499850682825502e-06, "loss": 0.5017, "step": 4867 }, { "epoch": 0.799458050212469, "grad_norm": 0.29010297933150886, "learning_rate": 8.499635784035026e-06, "loss": 0.5076, "step": 4868 }, { "epoch": 0.7996222774208117, "grad_norm": 0.34097058759666493, "learning_rate": 8.499420841804572e-06, "loss": 0.5075, "step": 4869 }, { "epoch": 0.7997865046291545, "grad_norm": 0.34264820615195757, "learning_rate": 8.499205856136473e-06, "loss": 0.5353, "step": 4870 }, { "epoch": 0.7999507318374972, "grad_norm": 0.31214009809327814, "learning_rate": 8.498990827033062e-06, "loss": 0.5176, "step": 4871 }, { "epoch": 0.80011495904584, "grad_norm": 0.2919647666804401, "learning_rate": 8.498775754496677e-06, "loss": 0.5113, "step": 4872 }, { "epoch": 0.8002791862541827, "grad_norm": 0.29216191333093305, "learning_rate": 8.498560638529654e-06, "loss": 0.4954, "step": 4873 }, { "epoch": 0.8004434134625255, "grad_norm": 0.3648942224661276, "learning_rate": 8.498345479134327e-06, "loss": 0.5205, "step": 4874 }, { "epoch": 0.8006076406708681, "grad_norm": 0.30774809099478256, "learning_rate": 8.498130276313035e-06, "loss": 0.5478, "step": 4875 }, { "epoch": 0.8007718678792108, "grad_norm": 0.3454301234007663, "learning_rate": 8.497915030068113e-06, "loss": 0.4924, "step": 4876 }, { "epoch": 0.8009360950875536, "grad_norm": 0.3290857865234024, "learning_rate": 8.497699740401901e-06, "loss": 0.5202, "step": 4877 }, { "epoch": 0.8011003222958963, "grad_norm": 0.3546434922240655, "learning_rate": 8.497484407316737e-06, "loss": 0.4984, "step": 4878 }, { "epoch": 0.8012645495042391, "grad_norm": 0.33323543843021725, "learning_rate": 8.49726903081496e-06, "loss": 0.5027, "step": 4879 }, { "epoch": 0.8014287767125818, "grad_norm": 0.26386927946032285, "learning_rate": 8.497053610898908e-06, "loss": 0.5337, "step": 4880 }, { "epoch": 0.8015930039209246, "grad_norm": 0.40317599119391767, "learning_rate": 8.49683814757092e-06, "loss": 0.5328, "step": 4881 }, { "epoch": 0.8017572311292673, "grad_norm": 0.29720970041618555, "learning_rate": 8.49662264083334e-06, "loss": 0.5172, "step": 4882 }, { "epoch": 0.8019214583376101, "grad_norm": 0.30483337934954274, "learning_rate": 8.496407090688505e-06, "loss": 0.5347, "step": 4883 }, { "epoch": 0.8020856855459528, "grad_norm": 0.3390354026182259, "learning_rate": 8.496191497138757e-06, "loss": 0.5032, "step": 4884 }, { "epoch": 0.8022499127542956, "grad_norm": 0.46548930989663456, "learning_rate": 8.495975860186437e-06, "loss": 0.5125, "step": 4885 }, { "epoch": 0.8024141399626383, "grad_norm": 0.3909156993177809, "learning_rate": 8.495760179833888e-06, "loss": 0.5321, "step": 4886 }, { "epoch": 0.8025783671709811, "grad_norm": 0.30815593000960556, "learning_rate": 8.495544456083453e-06, "loss": 0.5152, "step": 4887 }, { "epoch": 0.8027425943793238, "grad_norm": 0.43576154208490675, "learning_rate": 8.495328688937473e-06, "loss": 0.5118, "step": 4888 }, { "epoch": 0.8029068215876666, "grad_norm": 0.3362982778923814, "learning_rate": 8.495112878398292e-06, "loss": 0.5199, "step": 4889 }, { "epoch": 0.8030710487960093, "grad_norm": 0.35648097356314234, "learning_rate": 8.494897024468255e-06, "loss": 0.5203, "step": 4890 }, { "epoch": 0.8032352760043521, "grad_norm": 0.40099724052233265, "learning_rate": 8.494681127149706e-06, "loss": 0.5142, "step": 4891 }, { "epoch": 0.8033995032126947, "grad_norm": 0.38363888188790995, "learning_rate": 8.49446518644499e-06, "loss": 0.5151, "step": 4892 }, { "epoch": 0.8035637304210375, "grad_norm": 0.32707620079419586, "learning_rate": 8.494249202356452e-06, "loss": 0.5143, "step": 4893 }, { "epoch": 0.8037279576293802, "grad_norm": 0.28527511913999304, "learning_rate": 8.494033174886438e-06, "loss": 0.5169, "step": 4894 }, { "epoch": 0.803892184837723, "grad_norm": 0.2832345525798682, "learning_rate": 8.493817104037294e-06, "loss": 0.5078, "step": 4895 }, { "epoch": 0.8040564120460657, "grad_norm": 1.4074803625802637, "learning_rate": 8.493600989811366e-06, "loss": 0.5151, "step": 4896 }, { "epoch": 0.8042206392544085, "grad_norm": 0.31620078421037934, "learning_rate": 8.493384832211003e-06, "loss": 0.5338, "step": 4897 }, { "epoch": 0.8043848664627512, "grad_norm": 0.36798937850075064, "learning_rate": 8.49316863123855e-06, "loss": 0.5047, "step": 4898 }, { "epoch": 0.804549093671094, "grad_norm": 0.3155938826874778, "learning_rate": 8.49295238689636e-06, "loss": 0.5316, "step": 4899 }, { "epoch": 0.8047133208794367, "grad_norm": 0.3090401650499103, "learning_rate": 8.492736099186776e-06, "loss": 0.4977, "step": 4900 }, { "epoch": 0.8048775480877794, "grad_norm": 0.4837767806205548, "learning_rate": 8.492519768112152e-06, "loss": 0.5188, "step": 4901 }, { "epoch": 0.8050417752961222, "grad_norm": 0.2831082962206799, "learning_rate": 8.492303393674834e-06, "loss": 0.5166, "step": 4902 }, { "epoch": 0.805206002504465, "grad_norm": 0.3364271799232852, "learning_rate": 8.492086975877173e-06, "loss": 0.5032, "step": 4903 }, { "epoch": 0.8053702297128077, "grad_norm": 0.3039287965588309, "learning_rate": 8.49187051472152e-06, "loss": 0.4997, "step": 4904 }, { "epoch": 0.8055344569211504, "grad_norm": 0.34438901266088634, "learning_rate": 8.491654010210224e-06, "loss": 0.5224, "step": 4905 }, { "epoch": 0.8056986841294932, "grad_norm": 0.410477956559512, "learning_rate": 8.49143746234564e-06, "loss": 0.521, "step": 4906 }, { "epoch": 0.8058629113378359, "grad_norm": 0.33178147394701, "learning_rate": 8.491220871130119e-06, "loss": 0.5141, "step": 4907 }, { "epoch": 0.8060271385461787, "grad_norm": 0.31226511617932345, "learning_rate": 8.49100423656601e-06, "loss": 0.5098, "step": 4908 }, { "epoch": 0.8061913657545213, "grad_norm": 0.3314940198037555, "learning_rate": 8.49078755865567e-06, "loss": 0.4966, "step": 4909 }, { "epoch": 0.8063555929628641, "grad_norm": 0.4213880727332896, "learning_rate": 8.490570837401452e-06, "loss": 0.5326, "step": 4910 }, { "epoch": 0.8065198201712068, "grad_norm": 0.25770417899130643, "learning_rate": 8.490354072805707e-06, "loss": 0.5258, "step": 4911 }, { "epoch": 0.8066840473795496, "grad_norm": 0.29938701046613064, "learning_rate": 8.49013726487079e-06, "loss": 0.5288, "step": 4912 }, { "epoch": 0.8068482745878923, "grad_norm": 0.3284235847108112, "learning_rate": 8.489920413599059e-06, "loss": 0.5331, "step": 4913 }, { "epoch": 0.8070125017962351, "grad_norm": 0.3092253263939411, "learning_rate": 8.489703518992865e-06, "loss": 0.5127, "step": 4914 }, { "epoch": 0.8071767290045778, "grad_norm": 0.31053867213043934, "learning_rate": 8.489486581054565e-06, "loss": 0.5181, "step": 4915 }, { "epoch": 0.8073409562129206, "grad_norm": 0.2781424039531393, "learning_rate": 8.489269599786516e-06, "loss": 0.4907, "step": 4916 }, { "epoch": 0.8075051834212633, "grad_norm": 0.29524804072816085, "learning_rate": 8.489052575191074e-06, "loss": 0.5155, "step": 4917 }, { "epoch": 0.8076694106296061, "grad_norm": 0.4577844332755319, "learning_rate": 8.488835507270597e-06, "loss": 0.5027, "step": 4918 }, { "epoch": 0.8078336378379488, "grad_norm": 0.30584168281729424, "learning_rate": 8.488618396027442e-06, "loss": 0.521, "step": 4919 }, { "epoch": 0.8079978650462916, "grad_norm": 0.3880806237530359, "learning_rate": 8.488401241463966e-06, "loss": 0.5352, "step": 4920 }, { "epoch": 0.8081620922546343, "grad_norm": 0.2955963334280968, "learning_rate": 8.48818404358253e-06, "loss": 0.5111, "step": 4921 }, { "epoch": 0.8083263194629771, "grad_norm": 0.3345446561322464, "learning_rate": 8.48796680238549e-06, "loss": 0.522, "step": 4922 }, { "epoch": 0.8084905466713198, "grad_norm": 0.6481791245070249, "learning_rate": 8.487749517875208e-06, "loss": 0.5065, "step": 4923 }, { "epoch": 0.8086547738796626, "grad_norm": 0.30909560781645284, "learning_rate": 8.487532190054043e-06, "loss": 0.5039, "step": 4924 }, { "epoch": 0.8088190010880053, "grad_norm": 0.34142963673856785, "learning_rate": 8.487314818924353e-06, "loss": 0.5092, "step": 4925 }, { "epoch": 0.808983228296348, "grad_norm": 0.4889539171715669, "learning_rate": 8.487097404488502e-06, "loss": 0.502, "step": 4926 }, { "epoch": 0.8091474555046907, "grad_norm": 0.5818910886026137, "learning_rate": 8.486879946748852e-06, "loss": 0.4925, "step": 4927 }, { "epoch": 0.8093116827130334, "grad_norm": 0.2932001298911658, "learning_rate": 8.486662445707762e-06, "loss": 0.5166, "step": 4928 }, { "epoch": 0.8094759099213762, "grad_norm": 0.29941350737145134, "learning_rate": 8.486444901367594e-06, "loss": 0.5076, "step": 4929 }, { "epoch": 0.8096401371297189, "grad_norm": 0.36587344576528646, "learning_rate": 8.486227313730716e-06, "loss": 0.4944, "step": 4930 }, { "epoch": 0.8098043643380617, "grad_norm": 0.3008403149194141, "learning_rate": 8.486009682799484e-06, "loss": 0.5398, "step": 4931 }, { "epoch": 0.8099685915464044, "grad_norm": 0.32000631815715636, "learning_rate": 8.485792008576269e-06, "loss": 0.5143, "step": 4932 }, { "epoch": 0.8101328187547472, "grad_norm": 0.4554555718255998, "learning_rate": 8.485574291063427e-06, "loss": 0.5106, "step": 4933 }, { "epoch": 0.8102970459630899, "grad_norm": 0.3396815075083648, "learning_rate": 8.48535653026333e-06, "loss": 0.5248, "step": 4934 }, { "epoch": 0.8104612731714327, "grad_norm": 0.29132372652667043, "learning_rate": 8.485138726178337e-06, "loss": 0.5149, "step": 4935 }, { "epoch": 0.8106255003797754, "grad_norm": 0.33433360634314313, "learning_rate": 8.484920878810818e-06, "loss": 0.5275, "step": 4936 }, { "epoch": 0.8107897275881182, "grad_norm": 0.3804802811332157, "learning_rate": 8.484702988163138e-06, "loss": 0.5372, "step": 4937 }, { "epoch": 0.8109539547964609, "grad_norm": 0.29590088600146536, "learning_rate": 8.484485054237663e-06, "loss": 0.5194, "step": 4938 }, { "epoch": 0.8111181820048037, "grad_norm": 0.30898279514885435, "learning_rate": 8.484267077036761e-06, "loss": 0.5067, "step": 4939 }, { "epoch": 0.8112824092131464, "grad_norm": 0.3169244925099661, "learning_rate": 8.484049056562796e-06, "loss": 0.5271, "step": 4940 }, { "epoch": 0.8114466364214892, "grad_norm": 0.2927706493730989, "learning_rate": 8.483830992818141e-06, "loss": 0.5497, "step": 4941 }, { "epoch": 0.8116108636298319, "grad_norm": 0.3785507710524582, "learning_rate": 8.483612885805161e-06, "loss": 0.495, "step": 4942 }, { "epoch": 0.8117750908381746, "grad_norm": 0.2886097711844155, "learning_rate": 8.483394735526226e-06, "loss": 0.5161, "step": 4943 }, { "epoch": 0.8119393180465173, "grad_norm": 0.3721913462493364, "learning_rate": 8.483176541983706e-06, "loss": 0.5196, "step": 4944 }, { "epoch": 0.8121035452548601, "grad_norm": 0.30708790688915927, "learning_rate": 8.482958305179967e-06, "loss": 0.516, "step": 4945 }, { "epoch": 0.8122677724632028, "grad_norm": 0.3761360983371278, "learning_rate": 8.482740025117385e-06, "loss": 0.5023, "step": 4946 }, { "epoch": 0.8124319996715456, "grad_norm": 0.3213489583168228, "learning_rate": 8.482521701798326e-06, "loss": 0.5272, "step": 4947 }, { "epoch": 0.8125962268798883, "grad_norm": 0.3341586201507787, "learning_rate": 8.482303335225164e-06, "loss": 0.5461, "step": 4948 }, { "epoch": 0.812760454088231, "grad_norm": 0.29858786103982876, "learning_rate": 8.48208492540027e-06, "loss": 0.5105, "step": 4949 }, { "epoch": 0.8129246812965738, "grad_norm": 0.32960934334045244, "learning_rate": 8.481866472326015e-06, "loss": 0.4996, "step": 4950 }, { "epoch": 0.8130889085049166, "grad_norm": 0.2762505138441524, "learning_rate": 8.481647976004773e-06, "loss": 0.4963, "step": 4951 }, { "epoch": 0.8132531357132593, "grad_norm": 0.3157606414855119, "learning_rate": 8.481429436438916e-06, "loss": 0.5259, "step": 4952 }, { "epoch": 0.813417362921602, "grad_norm": 0.31727752076093524, "learning_rate": 8.481210853630819e-06, "loss": 0.4996, "step": 4953 }, { "epoch": 0.8135815901299448, "grad_norm": 0.3480649578971305, "learning_rate": 8.480992227582854e-06, "loss": 0.5198, "step": 4954 }, { "epoch": 0.8137458173382875, "grad_norm": 0.6437859463406534, "learning_rate": 8.480773558297396e-06, "loss": 0.5263, "step": 4955 }, { "epoch": 0.8139100445466303, "grad_norm": 0.5095893397019333, "learning_rate": 8.480554845776823e-06, "loss": 0.5211, "step": 4956 }, { "epoch": 0.814074271754973, "grad_norm": 0.3655560668561918, "learning_rate": 8.480336090023506e-06, "loss": 0.4943, "step": 4957 }, { "epoch": 0.8142384989633158, "grad_norm": 0.29143515911184514, "learning_rate": 8.480117291039825e-06, "loss": 0.5029, "step": 4958 }, { "epoch": 0.8144027261716585, "grad_norm": 0.3366487975682201, "learning_rate": 8.479898448828154e-06, "loss": 0.5425, "step": 4959 }, { "epoch": 0.8145669533800012, "grad_norm": 0.3642127674595192, "learning_rate": 8.479679563390868e-06, "loss": 0.5197, "step": 4960 }, { "epoch": 0.8147311805883439, "grad_norm": 0.2796960538183455, "learning_rate": 8.479460634730347e-06, "loss": 0.5093, "step": 4961 }, { "epoch": 0.8148954077966867, "grad_norm": 0.3228600225851728, "learning_rate": 8.47924166284897e-06, "loss": 0.5218, "step": 4962 }, { "epoch": 0.8150596350050294, "grad_norm": 0.4189371272712448, "learning_rate": 8.479022647749112e-06, "loss": 0.536, "step": 4963 }, { "epoch": 0.8152238622133722, "grad_norm": 0.3709891099591292, "learning_rate": 8.478803589433154e-06, "loss": 0.4873, "step": 4964 }, { "epoch": 0.8153880894217149, "grad_norm": 0.2950453878548158, "learning_rate": 8.478584487903475e-06, "loss": 0.5154, "step": 4965 }, { "epoch": 0.8155523166300577, "grad_norm": 0.35668420853045724, "learning_rate": 8.478365343162452e-06, "loss": 0.5237, "step": 4966 }, { "epoch": 0.8157165438384004, "grad_norm": 0.5139188172905095, "learning_rate": 8.478146155212469e-06, "loss": 0.5194, "step": 4967 }, { "epoch": 0.8158807710467432, "grad_norm": 0.3158036394353838, "learning_rate": 8.477926924055905e-06, "loss": 0.5001, "step": 4968 }, { "epoch": 0.8160449982550859, "grad_norm": 0.35621905036856805, "learning_rate": 8.477707649695139e-06, "loss": 0.5315, "step": 4969 }, { "epoch": 0.8162092254634287, "grad_norm": 0.4811902163500481, "learning_rate": 8.477488332132554e-06, "loss": 0.4975, "step": 4970 }, { "epoch": 0.8163734526717714, "grad_norm": 0.2924221106772756, "learning_rate": 8.477268971370535e-06, "loss": 0.5045, "step": 4971 }, { "epoch": 0.8165376798801142, "grad_norm": 0.3111977388086835, "learning_rate": 8.47704956741146e-06, "loss": 0.5046, "step": 4972 }, { "epoch": 0.8167019070884569, "grad_norm": 0.5014288256969606, "learning_rate": 8.476830120257715e-06, "loss": 0.522, "step": 4973 }, { "epoch": 0.8168661342967997, "grad_norm": 0.30421993520314305, "learning_rate": 8.47661062991168e-06, "loss": 0.5172, "step": 4974 }, { "epoch": 0.8170303615051424, "grad_norm": 0.3200743075995799, "learning_rate": 8.476391096375744e-06, "loss": 0.4989, "step": 4975 }, { "epoch": 0.8171945887134852, "grad_norm": 0.3458207012656067, "learning_rate": 8.476171519652288e-06, "loss": 0.5056, "step": 4976 }, { "epoch": 0.8173588159218278, "grad_norm": 0.2890057786017369, "learning_rate": 8.475951899743695e-06, "loss": 0.5013, "step": 4977 }, { "epoch": 0.8175230431301705, "grad_norm": 0.3296551060524736, "learning_rate": 8.475732236652356e-06, "loss": 0.51, "step": 4978 }, { "epoch": 0.8176872703385133, "grad_norm": 0.3430824710511646, "learning_rate": 8.47551253038065e-06, "loss": 0.4904, "step": 4979 }, { "epoch": 0.817851497546856, "grad_norm": 0.2904594269875365, "learning_rate": 8.475292780930968e-06, "loss": 0.5092, "step": 4980 }, { "epoch": 0.8180157247551988, "grad_norm": 0.30751543008238613, "learning_rate": 8.475072988305696e-06, "loss": 0.523, "step": 4981 }, { "epoch": 0.8181799519635415, "grad_norm": 0.47299931672604006, "learning_rate": 8.474853152507219e-06, "loss": 0.4972, "step": 4982 }, { "epoch": 0.8183441791718843, "grad_norm": 0.3891091435955476, "learning_rate": 8.474633273537927e-06, "loss": 0.5199, "step": 4983 }, { "epoch": 0.818508406380227, "grad_norm": 0.3165875924441726, "learning_rate": 8.474413351400206e-06, "loss": 0.5225, "step": 4984 }, { "epoch": 0.8186726335885698, "grad_norm": 0.3692974632226017, "learning_rate": 8.474193386096447e-06, "loss": 0.5208, "step": 4985 }, { "epoch": 0.8188368607969125, "grad_norm": 0.7791784938729102, "learning_rate": 8.473973377629036e-06, "loss": 0.5032, "step": 4986 }, { "epoch": 0.8190010880052553, "grad_norm": 0.4044083364461391, "learning_rate": 8.473753326000367e-06, "loss": 0.5233, "step": 4987 }, { "epoch": 0.819165315213598, "grad_norm": 0.4033049001473696, "learning_rate": 8.473533231212827e-06, "loss": 0.5099, "step": 4988 }, { "epoch": 0.8193295424219408, "grad_norm": 0.3596693458396118, "learning_rate": 8.473313093268805e-06, "loss": 0.5225, "step": 4989 }, { "epoch": 0.8194937696302835, "grad_norm": 0.41076914940960707, "learning_rate": 8.473092912170692e-06, "loss": 0.5124, "step": 4990 }, { "epoch": 0.8196579968386263, "grad_norm": 0.3401356269581785, "learning_rate": 8.472872687920884e-06, "loss": 0.506, "step": 4991 }, { "epoch": 0.819822224046969, "grad_norm": 0.47125293670893087, "learning_rate": 8.472652420521768e-06, "loss": 0.5247, "step": 4992 }, { "epoch": 0.8199864512553118, "grad_norm": 0.28918564354635806, "learning_rate": 8.472432109975739e-06, "loss": 0.5196, "step": 4993 }, { "epoch": 0.8201506784636544, "grad_norm": 0.36219635279432766, "learning_rate": 8.47221175628519e-06, "loss": 0.5192, "step": 4994 }, { "epoch": 0.8203149056719972, "grad_norm": 0.34955506029391137, "learning_rate": 8.47199135945251e-06, "loss": 0.51, "step": 4995 }, { "epoch": 0.8204791328803399, "grad_norm": 0.45748560279916733, "learning_rate": 8.471770919480099e-06, "loss": 0.4931, "step": 4996 }, { "epoch": 0.8206433600886827, "grad_norm": 0.9575608714826659, "learning_rate": 8.471550436370348e-06, "loss": 0.5599, "step": 4997 }, { "epoch": 0.8208075872970254, "grad_norm": 0.2853618278425937, "learning_rate": 8.47132991012565e-06, "loss": 0.5248, "step": 4998 }, { "epoch": 0.8209718145053682, "grad_norm": 0.3093195551711788, "learning_rate": 8.471109340748404e-06, "loss": 0.5099, "step": 4999 }, { "epoch": 0.8211360417137109, "grad_norm": 0.319549610457436, "learning_rate": 8.470888728241e-06, "loss": 0.5118, "step": 5000 }, { "epoch": 0.8213002689220537, "grad_norm": 0.27962509644036526, "learning_rate": 8.47066807260584e-06, "loss": 0.5468, "step": 5001 }, { "epoch": 0.8214644961303964, "grad_norm": 0.2948514500194613, "learning_rate": 8.470447373845318e-06, "loss": 0.5016, "step": 5002 }, { "epoch": 0.8216287233387392, "grad_norm": 0.34033167071439674, "learning_rate": 8.470226631961833e-06, "loss": 0.5065, "step": 5003 }, { "epoch": 0.8217929505470819, "grad_norm": 0.2879423318987142, "learning_rate": 8.470005846957777e-06, "loss": 0.5033, "step": 5004 }, { "epoch": 0.8219571777554247, "grad_norm": 0.29707502090044696, "learning_rate": 8.469785018835555e-06, "loss": 0.4962, "step": 5005 }, { "epoch": 0.8221214049637674, "grad_norm": 0.32249529594575727, "learning_rate": 8.46956414759756e-06, "loss": 0.5189, "step": 5006 }, { "epoch": 0.8222856321721101, "grad_norm": 0.44102538660174917, "learning_rate": 8.469343233246193e-06, "loss": 0.5341, "step": 5007 }, { "epoch": 0.8224498593804529, "grad_norm": 0.36244736256653914, "learning_rate": 8.469122275783853e-06, "loss": 0.5041, "step": 5008 }, { "epoch": 0.8226140865887956, "grad_norm": 0.43767800046863825, "learning_rate": 8.46890127521294e-06, "loss": 0.5344, "step": 5009 }, { "epoch": 0.8227783137971384, "grad_norm": 0.7840909278204317, "learning_rate": 8.468680231535856e-06, "loss": 0.5084, "step": 5010 }, { "epoch": 0.822942541005481, "grad_norm": 0.39755023280098944, "learning_rate": 8.468459144754998e-06, "loss": 0.5206, "step": 5011 }, { "epoch": 0.8231067682138238, "grad_norm": 0.3169144920240181, "learning_rate": 8.468238014872769e-06, "loss": 0.5044, "step": 5012 }, { "epoch": 0.8232709954221665, "grad_norm": 0.32759349000574406, "learning_rate": 8.468016841891572e-06, "loss": 0.5082, "step": 5013 }, { "epoch": 0.8234352226305093, "grad_norm": 0.3040957401155583, "learning_rate": 8.467795625813808e-06, "loss": 0.5149, "step": 5014 }, { "epoch": 0.823599449838852, "grad_norm": 0.5909867028738016, "learning_rate": 8.46757436664188e-06, "loss": 0.5066, "step": 5015 }, { "epoch": 0.8237636770471948, "grad_norm": 0.3331879110597187, "learning_rate": 8.46735306437819e-06, "loss": 0.5252, "step": 5016 }, { "epoch": 0.8239279042555375, "grad_norm": 0.28786272922468276, "learning_rate": 8.467131719025143e-06, "loss": 0.5219, "step": 5017 }, { "epoch": 0.8240921314638803, "grad_norm": 0.37116912026207094, "learning_rate": 8.466910330585142e-06, "loss": 0.5163, "step": 5018 }, { "epoch": 0.824256358672223, "grad_norm": 0.41794447544277624, "learning_rate": 8.466688899060593e-06, "loss": 0.5218, "step": 5019 }, { "epoch": 0.8244205858805658, "grad_norm": 0.3212900078391455, "learning_rate": 8.466467424453898e-06, "loss": 0.5098, "step": 5020 }, { "epoch": 0.8245848130889085, "grad_norm": 0.3470167865008464, "learning_rate": 8.466245906767464e-06, "loss": 0.4971, "step": 5021 }, { "epoch": 0.8247490402972513, "grad_norm": 0.3205638094446894, "learning_rate": 8.4660243460037e-06, "loss": 0.5236, "step": 5022 }, { "epoch": 0.824913267505594, "grad_norm": 0.31070966884605533, "learning_rate": 8.465802742165007e-06, "loss": 0.4918, "step": 5023 }, { "epoch": 0.8250774947139368, "grad_norm": 0.3138870801849995, "learning_rate": 8.465581095253795e-06, "loss": 0.5128, "step": 5024 }, { "epoch": 0.8252417219222795, "grad_norm": 0.45582958216423314, "learning_rate": 8.465359405272471e-06, "loss": 0.5094, "step": 5025 }, { "epoch": 0.8254059491306223, "grad_norm": 0.32263746700503765, "learning_rate": 8.465137672223444e-06, "loss": 0.5176, "step": 5026 }, { "epoch": 0.825570176338965, "grad_norm": 0.36571660406047724, "learning_rate": 8.464915896109118e-06, "loss": 0.5176, "step": 5027 }, { "epoch": 0.8257344035473076, "grad_norm": 0.37715049533934497, "learning_rate": 8.464694076931907e-06, "loss": 0.5048, "step": 5028 }, { "epoch": 0.8258986307556504, "grad_norm": 0.3525169971936548, "learning_rate": 8.464472214694216e-06, "loss": 0.4956, "step": 5029 }, { "epoch": 0.8260628579639931, "grad_norm": 0.34935023750783545, "learning_rate": 8.464250309398457e-06, "loss": 0.5043, "step": 5030 }, { "epoch": 0.8262270851723359, "grad_norm": 0.3003527457744416, "learning_rate": 8.464028361047037e-06, "loss": 0.5098, "step": 5031 }, { "epoch": 0.8263913123806786, "grad_norm": 0.3211925859594905, "learning_rate": 8.463806369642373e-06, "loss": 0.5151, "step": 5032 }, { "epoch": 0.8265555395890214, "grad_norm": 0.2881298613603624, "learning_rate": 8.46358433518687e-06, "loss": 0.522, "step": 5033 }, { "epoch": 0.8267197667973641, "grad_norm": 0.3088604066956432, "learning_rate": 8.46336225768294e-06, "loss": 0.5042, "step": 5034 }, { "epoch": 0.8268839940057069, "grad_norm": 0.3207947009742772, "learning_rate": 8.463140137132997e-06, "loss": 0.4924, "step": 5035 }, { "epoch": 0.8270482212140496, "grad_norm": 0.30583600515085474, "learning_rate": 8.462917973539454e-06, "loss": 0.5144, "step": 5036 }, { "epoch": 0.8272124484223924, "grad_norm": 0.3273592428080974, "learning_rate": 8.462695766904724e-06, "loss": 0.5046, "step": 5037 }, { "epoch": 0.8273766756307351, "grad_norm": 0.24219539916900257, "learning_rate": 8.462473517231217e-06, "loss": 0.4942, "step": 5038 }, { "epoch": 0.8275409028390779, "grad_norm": 0.30598944645398374, "learning_rate": 8.462251224521349e-06, "loss": 0.5089, "step": 5039 }, { "epoch": 0.8277051300474206, "grad_norm": 0.2890067799934394, "learning_rate": 8.462028888777536e-06, "loss": 0.5153, "step": 5040 }, { "epoch": 0.8278693572557634, "grad_norm": 0.31820577059903815, "learning_rate": 8.461806510002189e-06, "loss": 0.4943, "step": 5041 }, { "epoch": 0.8280335844641061, "grad_norm": 0.3160940376849428, "learning_rate": 8.461584088197726e-06, "loss": 0.5148, "step": 5042 }, { "epoch": 0.8281978116724489, "grad_norm": 0.30044612758537664, "learning_rate": 8.461361623366564e-06, "loss": 0.498, "step": 5043 }, { "epoch": 0.8283620388807916, "grad_norm": 0.26764067211180775, "learning_rate": 8.461139115511116e-06, "loss": 0.4952, "step": 5044 }, { "epoch": 0.8285262660891343, "grad_norm": 0.31280182664520434, "learning_rate": 8.4609165646338e-06, "loss": 0.5031, "step": 5045 }, { "epoch": 0.828690493297477, "grad_norm": 1.0443570125444503, "learning_rate": 8.460693970737033e-06, "loss": 0.5144, "step": 5046 }, { "epoch": 0.8288547205058198, "grad_norm": 0.31610380504111363, "learning_rate": 8.460471333823232e-06, "loss": 0.5046, "step": 5047 }, { "epoch": 0.8290189477141625, "grad_norm": 0.2824804649298298, "learning_rate": 8.460248653894818e-06, "loss": 0.5478, "step": 5048 }, { "epoch": 0.8291831749225053, "grad_norm": 0.345837308750286, "learning_rate": 8.460025930954206e-06, "loss": 0.511, "step": 5049 }, { "epoch": 0.829347402130848, "grad_norm": 0.3016406355661234, "learning_rate": 8.459803165003815e-06, "loss": 0.5182, "step": 5050 }, { "epoch": 0.8295116293391908, "grad_norm": 0.3562213213518067, "learning_rate": 8.459580356046067e-06, "loss": 0.5082, "step": 5051 }, { "epoch": 0.8296758565475335, "grad_norm": 0.3025481909002121, "learning_rate": 8.459357504083381e-06, "loss": 0.5105, "step": 5052 }, { "epoch": 0.8298400837558763, "grad_norm": 0.33732616817813776, "learning_rate": 8.459134609118175e-06, "loss": 0.5031, "step": 5053 }, { "epoch": 0.830004310964219, "grad_norm": 0.330670296404074, "learning_rate": 8.458911671152874e-06, "loss": 0.491, "step": 5054 }, { "epoch": 0.8301685381725618, "grad_norm": 0.31696394533350386, "learning_rate": 8.458688690189897e-06, "loss": 0.5151, "step": 5055 }, { "epoch": 0.8303327653809045, "grad_norm": 0.35470601501695254, "learning_rate": 8.458465666231665e-06, "loss": 0.5185, "step": 5056 }, { "epoch": 0.8304969925892473, "grad_norm": 0.2745766999857397, "learning_rate": 8.4582425992806e-06, "loss": 0.5128, "step": 5057 }, { "epoch": 0.83066121979759, "grad_norm": 0.2910543164432107, "learning_rate": 8.458019489339129e-06, "loss": 0.5187, "step": 5058 }, { "epoch": 0.8308254470059327, "grad_norm": 0.3428610977610601, "learning_rate": 8.457796336409672e-06, "loss": 0.5007, "step": 5059 }, { "epoch": 0.8309896742142755, "grad_norm": 0.27773717516347307, "learning_rate": 8.45757314049465e-06, "loss": 0.4994, "step": 5060 }, { "epoch": 0.8311539014226182, "grad_norm": 0.28802939482408896, "learning_rate": 8.457349901596492e-06, "loss": 0.5205, "step": 5061 }, { "epoch": 0.8313181286309609, "grad_norm": 0.26938147503957355, "learning_rate": 8.45712661971762e-06, "loss": 0.4928, "step": 5062 }, { "epoch": 0.8314823558393036, "grad_norm": 0.27567436454998057, "learning_rate": 8.456903294860462e-06, "loss": 0.506, "step": 5063 }, { "epoch": 0.8316465830476464, "grad_norm": 0.2871225248215651, "learning_rate": 8.456679927027438e-06, "loss": 0.5058, "step": 5064 }, { "epoch": 0.8318108102559891, "grad_norm": 0.3209137982397029, "learning_rate": 8.45645651622098e-06, "loss": 0.5263, "step": 5065 }, { "epoch": 0.8319750374643319, "grad_norm": 0.2785837470184095, "learning_rate": 8.456233062443508e-06, "loss": 0.51, "step": 5066 }, { "epoch": 0.8321392646726746, "grad_norm": 0.29409807614294214, "learning_rate": 8.456009565697455e-06, "loss": 0.5561, "step": 5067 }, { "epoch": 0.8323034918810174, "grad_norm": 0.28712716258345833, "learning_rate": 8.455786025985244e-06, "loss": 0.5115, "step": 5068 }, { "epoch": 0.8324677190893601, "grad_norm": 0.284167141367049, "learning_rate": 8.455562443309308e-06, "loss": 0.5049, "step": 5069 }, { "epoch": 0.8326319462977029, "grad_norm": 0.29551746610441004, "learning_rate": 8.455338817672069e-06, "loss": 0.4867, "step": 5070 }, { "epoch": 0.8327961735060456, "grad_norm": 0.27646057535916885, "learning_rate": 8.455115149075961e-06, "loss": 0.5164, "step": 5071 }, { "epoch": 0.8329604007143884, "grad_norm": 0.3010201736022869, "learning_rate": 8.45489143752341e-06, "loss": 0.5152, "step": 5072 }, { "epoch": 0.8331246279227311, "grad_norm": 0.2622201228558617, "learning_rate": 8.454667683016847e-06, "loss": 0.4946, "step": 5073 }, { "epoch": 0.8332888551310739, "grad_norm": 0.3100844241068556, "learning_rate": 8.454443885558702e-06, "loss": 0.5018, "step": 5074 }, { "epoch": 0.8334530823394166, "grad_norm": 0.2609866015590737, "learning_rate": 8.454220045151407e-06, "loss": 0.5066, "step": 5075 }, { "epoch": 0.8336173095477594, "grad_norm": 0.443640168900632, "learning_rate": 8.45399616179739e-06, "loss": 0.4883, "step": 5076 }, { "epoch": 0.8337815367561021, "grad_norm": 0.3412324134114467, "learning_rate": 8.453772235499085e-06, "loss": 0.5173, "step": 5077 }, { "epoch": 0.8339457639644449, "grad_norm": 0.31007454373042503, "learning_rate": 8.453548266258924e-06, "loss": 0.5246, "step": 5078 }, { "epoch": 0.8341099911727875, "grad_norm": 0.3047252056284528, "learning_rate": 8.45332425407934e-06, "loss": 0.521, "step": 5079 }, { "epoch": 0.8342742183811303, "grad_norm": 0.3046193883490871, "learning_rate": 8.453100198962764e-06, "loss": 0.5287, "step": 5080 }, { "epoch": 0.834438445589473, "grad_norm": 0.2786460052072658, "learning_rate": 8.45287610091163e-06, "loss": 0.5062, "step": 5081 }, { "epoch": 0.8346026727978157, "grad_norm": 0.3078651792888881, "learning_rate": 8.452651959928374e-06, "loss": 0.5048, "step": 5082 }, { "epoch": 0.8347669000061585, "grad_norm": 0.2877663514705134, "learning_rate": 8.452427776015428e-06, "loss": 0.5403, "step": 5083 }, { "epoch": 0.8349311272145012, "grad_norm": 0.275191748660642, "learning_rate": 8.452203549175226e-06, "loss": 0.5112, "step": 5084 }, { "epoch": 0.835095354422844, "grad_norm": 0.29317499292240806, "learning_rate": 8.451979279410207e-06, "loss": 0.5207, "step": 5085 }, { "epoch": 0.8352595816311867, "grad_norm": 0.28347051510346993, "learning_rate": 8.451754966722804e-06, "loss": 0.4999, "step": 5086 }, { "epoch": 0.8354238088395295, "grad_norm": 0.3021476025483518, "learning_rate": 8.451530611115456e-06, "loss": 0.5157, "step": 5087 }, { "epoch": 0.8355880360478722, "grad_norm": 0.32453355242197585, "learning_rate": 8.451306212590595e-06, "loss": 0.4964, "step": 5088 }, { "epoch": 0.835752263256215, "grad_norm": 0.2786831375180388, "learning_rate": 8.451081771150663e-06, "loss": 0.5112, "step": 5089 }, { "epoch": 0.8359164904645577, "grad_norm": 0.3315162275707279, "learning_rate": 8.450857286798095e-06, "loss": 0.5169, "step": 5090 }, { "epoch": 0.8360807176729005, "grad_norm": 0.43424131015762185, "learning_rate": 8.450632759535329e-06, "loss": 0.4969, "step": 5091 }, { "epoch": 0.8362449448812432, "grad_norm": 0.34337420336609564, "learning_rate": 8.450408189364805e-06, "loss": 0.522, "step": 5092 }, { "epoch": 0.836409172089586, "grad_norm": 0.27471053663336875, "learning_rate": 8.450183576288962e-06, "loss": 0.5141, "step": 5093 }, { "epoch": 0.8365733992979287, "grad_norm": 0.416085438847666, "learning_rate": 8.449958920310237e-06, "loss": 0.5061, "step": 5094 }, { "epoch": 0.8367376265062715, "grad_norm": 0.32310592092724405, "learning_rate": 8.449734221431073e-06, "loss": 0.5235, "step": 5095 }, { "epoch": 0.8369018537146141, "grad_norm": 0.3186821997518407, "learning_rate": 8.449509479653911e-06, "loss": 0.4872, "step": 5096 }, { "epoch": 0.8370660809229569, "grad_norm": 0.2843584333397972, "learning_rate": 8.449284694981187e-06, "loss": 0.5301, "step": 5097 }, { "epoch": 0.8372303081312996, "grad_norm": 0.3032449654190625, "learning_rate": 8.449059867415348e-06, "loss": 0.5181, "step": 5098 }, { "epoch": 0.8373945353396424, "grad_norm": 0.33532208528088053, "learning_rate": 8.448834996958833e-06, "loss": 0.5166, "step": 5099 }, { "epoch": 0.8375587625479851, "grad_norm": 0.3208069728155115, "learning_rate": 8.448610083614085e-06, "loss": 0.5185, "step": 5100 }, { "epoch": 0.8377229897563279, "grad_norm": 0.2805736230950408, "learning_rate": 8.448385127383546e-06, "loss": 0.5269, "step": 5101 }, { "epoch": 0.8378872169646706, "grad_norm": 0.28498221869287943, "learning_rate": 8.448160128269659e-06, "loss": 0.5032, "step": 5102 }, { "epoch": 0.8380514441730134, "grad_norm": 0.3092573990050545, "learning_rate": 8.44793508627487e-06, "loss": 0.5213, "step": 5103 }, { "epoch": 0.8382156713813561, "grad_norm": 0.2936327617561994, "learning_rate": 8.447710001401622e-06, "loss": 0.4974, "step": 5104 }, { "epoch": 0.8383798985896989, "grad_norm": 0.3591294236741196, "learning_rate": 8.447484873652358e-06, "loss": 0.5023, "step": 5105 }, { "epoch": 0.8385441257980416, "grad_norm": 0.3441918054279239, "learning_rate": 8.447259703029525e-06, "loss": 0.5147, "step": 5106 }, { "epoch": 0.8387083530063844, "grad_norm": 0.3430452362801982, "learning_rate": 8.447034489535569e-06, "loss": 0.5152, "step": 5107 }, { "epoch": 0.8388725802147271, "grad_norm": 0.31263270317206515, "learning_rate": 8.446809233172934e-06, "loss": 0.524, "step": 5108 }, { "epoch": 0.8390368074230699, "grad_norm": 0.28639111012314905, "learning_rate": 8.446583933944067e-06, "loss": 0.519, "step": 5109 }, { "epoch": 0.8392010346314126, "grad_norm": 0.3446313615810392, "learning_rate": 8.446358591851417e-06, "loss": 0.5128, "step": 5110 }, { "epoch": 0.8393652618397553, "grad_norm": 0.5348002296317965, "learning_rate": 8.446133206897429e-06, "loss": 0.4922, "step": 5111 }, { "epoch": 0.8395294890480981, "grad_norm": 0.5051111534121978, "learning_rate": 8.445907779084553e-06, "loss": 0.5418, "step": 5112 }, { "epoch": 0.8396937162564407, "grad_norm": 0.30406474946147793, "learning_rate": 8.445682308415235e-06, "loss": 0.4946, "step": 5113 }, { "epoch": 0.8398579434647835, "grad_norm": 0.3380086818662066, "learning_rate": 8.445456794891925e-06, "loss": 0.5255, "step": 5114 }, { "epoch": 0.8400221706731262, "grad_norm": 0.3438745728098006, "learning_rate": 8.445231238517073e-06, "loss": 0.5423, "step": 5115 }, { "epoch": 0.840186397881469, "grad_norm": 0.40381447460269726, "learning_rate": 8.44500563929313e-06, "loss": 0.5137, "step": 5116 }, { "epoch": 0.8403506250898117, "grad_norm": 0.40787796480711397, "learning_rate": 8.444779997222541e-06, "loss": 0.5064, "step": 5117 }, { "epoch": 0.8405148522981545, "grad_norm": 0.36288902748934776, "learning_rate": 8.444554312307763e-06, "loss": 0.5082, "step": 5118 }, { "epoch": 0.8406790795064972, "grad_norm": 0.30047433656216793, "learning_rate": 8.444328584551243e-06, "loss": 0.512, "step": 5119 }, { "epoch": 0.84084330671484, "grad_norm": 0.3750624604659782, "learning_rate": 8.444102813955435e-06, "loss": 0.5186, "step": 5120 }, { "epoch": 0.8410075339231827, "grad_norm": 0.34600270284100215, "learning_rate": 8.443877000522788e-06, "loss": 0.4973, "step": 5121 }, { "epoch": 0.8411717611315255, "grad_norm": 0.3139983714728081, "learning_rate": 8.443651144255756e-06, "loss": 0.5256, "step": 5122 }, { "epoch": 0.8413359883398682, "grad_norm": 0.3346488739033981, "learning_rate": 8.443425245156795e-06, "loss": 0.5163, "step": 5123 }, { "epoch": 0.841500215548211, "grad_norm": 0.2948288518207391, "learning_rate": 8.443199303228355e-06, "loss": 0.5234, "step": 5124 }, { "epoch": 0.8416644427565537, "grad_norm": 0.33203493477375534, "learning_rate": 8.44297331847289e-06, "loss": 0.5105, "step": 5125 }, { "epoch": 0.8418286699648965, "grad_norm": 0.30379991266832673, "learning_rate": 8.442747290892856e-06, "loss": 0.5144, "step": 5126 }, { "epoch": 0.8419928971732392, "grad_norm": 0.28029415277526454, "learning_rate": 8.44252122049071e-06, "loss": 0.508, "step": 5127 }, { "epoch": 0.842157124381582, "grad_norm": 0.49847696844628037, "learning_rate": 8.4422951072689e-06, "loss": 0.5056, "step": 5128 }, { "epoch": 0.8423213515899247, "grad_norm": 0.5120725889382312, "learning_rate": 8.44206895122989e-06, "loss": 0.5298, "step": 5129 }, { "epoch": 0.8424855787982674, "grad_norm": 0.2882998585510167, "learning_rate": 8.44184275237613e-06, "loss": 0.5023, "step": 5130 }, { "epoch": 0.8426498060066101, "grad_norm": 0.3236394026994081, "learning_rate": 8.441616510710082e-06, "loss": 0.5098, "step": 5131 }, { "epoch": 0.8428140332149529, "grad_norm": 0.3631167452727695, "learning_rate": 8.441390226234199e-06, "loss": 0.5206, "step": 5132 }, { "epoch": 0.8429782604232956, "grad_norm": 0.3183503409285738, "learning_rate": 8.441163898950941e-06, "loss": 0.5229, "step": 5133 }, { "epoch": 0.8431424876316383, "grad_norm": 0.30255751019464133, "learning_rate": 8.440937528862766e-06, "loss": 0.5029, "step": 5134 }, { "epoch": 0.8433067148399811, "grad_norm": 0.3335196583075735, "learning_rate": 8.440711115972131e-06, "loss": 0.504, "step": 5135 }, { "epoch": 0.8434709420483238, "grad_norm": 0.32001255656113226, "learning_rate": 8.440484660281496e-06, "loss": 0.495, "step": 5136 }, { "epoch": 0.8436351692566666, "grad_norm": 0.36895012575981045, "learning_rate": 8.440258161793321e-06, "loss": 0.5073, "step": 5137 }, { "epoch": 0.8437993964650093, "grad_norm": 0.2755984017532756, "learning_rate": 8.440031620510068e-06, "loss": 0.5289, "step": 5138 }, { "epoch": 0.8439636236733521, "grad_norm": 0.3473947843063845, "learning_rate": 8.439805036434191e-06, "loss": 0.5169, "step": 5139 }, { "epoch": 0.8441278508816948, "grad_norm": 0.30833567846849247, "learning_rate": 8.439578409568158e-06, "loss": 0.5049, "step": 5140 }, { "epoch": 0.8442920780900376, "grad_norm": 0.2868463061560813, "learning_rate": 8.439351739914427e-06, "loss": 0.5024, "step": 5141 }, { "epoch": 0.8444563052983803, "grad_norm": 0.27930508177210783, "learning_rate": 8.439125027475459e-06, "loss": 0.5055, "step": 5142 }, { "epoch": 0.8446205325067231, "grad_norm": 0.38485774036475545, "learning_rate": 8.438898272253719e-06, "loss": 0.511, "step": 5143 }, { "epoch": 0.8447847597150658, "grad_norm": 0.30246859023528366, "learning_rate": 8.438671474251667e-06, "loss": 0.5309, "step": 5144 }, { "epoch": 0.8449489869234086, "grad_norm": 0.324011948208211, "learning_rate": 8.43844463347177e-06, "loss": 0.506, "step": 5145 }, { "epoch": 0.8451132141317512, "grad_norm": 0.3072522424422487, "learning_rate": 8.438217749916488e-06, "loss": 0.5089, "step": 5146 }, { "epoch": 0.845277441340094, "grad_norm": 0.44271916061839944, "learning_rate": 8.437990823588285e-06, "loss": 0.5399, "step": 5147 }, { "epoch": 0.8454416685484367, "grad_norm": 0.4356296662687189, "learning_rate": 8.43776385448963e-06, "loss": 0.5228, "step": 5148 }, { "epoch": 0.8456058957567795, "grad_norm": 0.2668006225000898, "learning_rate": 8.437536842622982e-06, "loss": 0.5014, "step": 5149 }, { "epoch": 0.8457701229651222, "grad_norm": 0.26564245328819, "learning_rate": 8.437309787990813e-06, "loss": 0.5096, "step": 5150 }, { "epoch": 0.845934350173465, "grad_norm": 0.2983680257581951, "learning_rate": 8.437082690595584e-06, "loss": 0.5136, "step": 5151 }, { "epoch": 0.8460985773818077, "grad_norm": 0.40578399435243784, "learning_rate": 8.436855550439765e-06, "loss": 0.4978, "step": 5152 }, { "epoch": 0.8462628045901505, "grad_norm": 0.44301373104727876, "learning_rate": 8.43662836752582e-06, "loss": 0.4988, "step": 5153 }, { "epoch": 0.8464270317984932, "grad_norm": 0.39247545992276484, "learning_rate": 8.436401141856218e-06, "loss": 0.5338, "step": 5154 }, { "epoch": 0.846591259006836, "grad_norm": 0.2681976914938843, "learning_rate": 8.436173873433428e-06, "loss": 0.5075, "step": 5155 }, { "epoch": 0.8467554862151787, "grad_norm": 0.3673054253397242, "learning_rate": 8.435946562259917e-06, "loss": 0.4936, "step": 5156 }, { "epoch": 0.8469197134235215, "grad_norm": 0.3534936525961469, "learning_rate": 8.435719208338153e-06, "loss": 0.5091, "step": 5157 }, { "epoch": 0.8470839406318642, "grad_norm": 0.3079026562799358, "learning_rate": 8.435491811670605e-06, "loss": 0.5126, "step": 5158 }, { "epoch": 0.847248167840207, "grad_norm": 0.37457392062869027, "learning_rate": 8.435264372259745e-06, "loss": 0.511, "step": 5159 }, { "epoch": 0.8474123950485497, "grad_norm": 0.31250952877477833, "learning_rate": 8.435036890108042e-06, "loss": 0.5055, "step": 5160 }, { "epoch": 0.8475766222568925, "grad_norm": 0.34748176886976945, "learning_rate": 8.434809365217968e-06, "loss": 0.5216, "step": 5161 }, { "epoch": 0.8477408494652352, "grad_norm": 0.4337562987176165, "learning_rate": 8.434581797591992e-06, "loss": 0.5325, "step": 5162 }, { "epoch": 0.8479050766735778, "grad_norm": 0.30289333346415104, "learning_rate": 8.434354187232587e-06, "loss": 0.5138, "step": 5163 }, { "epoch": 0.8480693038819206, "grad_norm": 0.28790463141112965, "learning_rate": 8.434126534142223e-06, "loss": 0.5058, "step": 5164 }, { "epoch": 0.8482335310902633, "grad_norm": 0.28706340640753114, "learning_rate": 8.433898838323375e-06, "loss": 0.5226, "step": 5165 }, { "epoch": 0.8483977582986061, "grad_norm": 0.32648676816606786, "learning_rate": 8.433671099778517e-06, "loss": 0.5049, "step": 5166 }, { "epoch": 0.8485619855069488, "grad_norm": 0.3414586375904832, "learning_rate": 8.43344331851012e-06, "loss": 0.4967, "step": 5167 }, { "epoch": 0.8487262127152916, "grad_norm": 0.3822562701157267, "learning_rate": 8.433215494520657e-06, "loss": 0.4928, "step": 5168 }, { "epoch": 0.8488904399236343, "grad_norm": 0.30473631955247477, "learning_rate": 8.432987627812606e-06, "loss": 0.5051, "step": 5169 }, { "epoch": 0.8490546671319771, "grad_norm": 0.4222486533363501, "learning_rate": 8.432759718388437e-06, "loss": 0.5026, "step": 5170 }, { "epoch": 0.8492188943403198, "grad_norm": 0.435952572835449, "learning_rate": 8.43253176625063e-06, "loss": 0.5014, "step": 5171 }, { "epoch": 0.8493831215486626, "grad_norm": 0.35308923943760095, "learning_rate": 8.432303771401659e-06, "loss": 0.5239, "step": 5172 }, { "epoch": 0.8495473487570053, "grad_norm": 0.3010694457237855, "learning_rate": 8.432075733844e-06, "loss": 0.5149, "step": 5173 }, { "epoch": 0.8497115759653481, "grad_norm": 0.32228750187138333, "learning_rate": 8.43184765358013e-06, "loss": 0.5011, "step": 5174 }, { "epoch": 0.8498758031736908, "grad_norm": 0.34908233140622946, "learning_rate": 8.431619530612525e-06, "loss": 0.5016, "step": 5175 }, { "epoch": 0.8500400303820336, "grad_norm": 0.3444780549516735, "learning_rate": 8.431391364943665e-06, "loss": 0.4882, "step": 5176 }, { "epoch": 0.8502042575903763, "grad_norm": 0.36189762611863124, "learning_rate": 8.431163156576028e-06, "loss": 0.5351, "step": 5177 }, { "epoch": 0.8503684847987191, "grad_norm": 0.31406121921663654, "learning_rate": 8.430934905512087e-06, "loss": 0.5083, "step": 5178 }, { "epoch": 0.8505327120070618, "grad_norm": 0.33779577449560827, "learning_rate": 8.43070661175433e-06, "loss": 0.5108, "step": 5179 }, { "epoch": 0.8506969392154045, "grad_norm": 0.31857681875078253, "learning_rate": 8.430478275305228e-06, "loss": 0.5219, "step": 5180 }, { "epoch": 0.8508611664237472, "grad_norm": 0.33539172162870345, "learning_rate": 8.430249896167269e-06, "loss": 0.5095, "step": 5181 }, { "epoch": 0.85102539363209, "grad_norm": 0.3775500012400237, "learning_rate": 8.430021474342928e-06, "loss": 0.5084, "step": 5182 }, { "epoch": 0.8511896208404327, "grad_norm": 0.3059924527973876, "learning_rate": 8.429793009834685e-06, "loss": 0.51, "step": 5183 }, { "epoch": 0.8513538480487755, "grad_norm": 0.3871570428014681, "learning_rate": 8.429564502645026e-06, "loss": 0.527, "step": 5184 }, { "epoch": 0.8515180752571182, "grad_norm": 0.3048202893164031, "learning_rate": 8.429335952776428e-06, "loss": 0.5057, "step": 5185 }, { "epoch": 0.851682302465461, "grad_norm": 0.3039632803794553, "learning_rate": 8.429107360231377e-06, "loss": 0.4986, "step": 5186 }, { "epoch": 0.8518465296738037, "grad_norm": 0.5060909240230959, "learning_rate": 8.428878725012354e-06, "loss": 0.5102, "step": 5187 }, { "epoch": 0.8520107568821464, "grad_norm": 0.3212393034431408, "learning_rate": 8.428650047121843e-06, "loss": 0.5046, "step": 5188 }, { "epoch": 0.8521749840904892, "grad_norm": 0.27915733547122695, "learning_rate": 8.428421326562328e-06, "loss": 0.5198, "step": 5189 }, { "epoch": 0.852339211298832, "grad_norm": 0.28708430427289955, "learning_rate": 8.42819256333629e-06, "loss": 0.5145, "step": 5190 }, { "epoch": 0.8525034385071747, "grad_norm": 0.30120292472904436, "learning_rate": 8.427963757446218e-06, "loss": 0.5093, "step": 5191 }, { "epoch": 0.8526676657155174, "grad_norm": 0.35635732289785643, "learning_rate": 8.427734908894594e-06, "loss": 0.5047, "step": 5192 }, { "epoch": 0.8528318929238602, "grad_norm": 0.28154755868785863, "learning_rate": 8.427506017683905e-06, "loss": 0.4954, "step": 5193 }, { "epoch": 0.8529961201322029, "grad_norm": 0.31810663307518133, "learning_rate": 8.427277083816636e-06, "loss": 0.5163, "step": 5194 }, { "epoch": 0.8531603473405457, "grad_norm": 0.4674936251067023, "learning_rate": 8.427048107295275e-06, "loss": 0.5103, "step": 5195 }, { "epoch": 0.8533245745488884, "grad_norm": 0.3264717854724029, "learning_rate": 8.426819088122307e-06, "loss": 0.5157, "step": 5196 }, { "epoch": 0.8534888017572311, "grad_norm": 0.34619937788013283, "learning_rate": 8.42659002630022e-06, "loss": 0.5148, "step": 5197 }, { "epoch": 0.8536530289655738, "grad_norm": 0.30500362594914693, "learning_rate": 8.426360921831503e-06, "loss": 0.4945, "step": 5198 }, { "epoch": 0.8538172561739166, "grad_norm": 0.35639512512192423, "learning_rate": 8.426131774718641e-06, "loss": 0.5236, "step": 5199 }, { "epoch": 0.8539814833822593, "grad_norm": 0.32749143917043566, "learning_rate": 8.425902584964129e-06, "loss": 0.516, "step": 5200 }, { "epoch": 0.8541457105906021, "grad_norm": 0.355371873683865, "learning_rate": 8.425673352570448e-06, "loss": 0.5243, "step": 5201 }, { "epoch": 0.8543099377989448, "grad_norm": 0.3431264317700648, "learning_rate": 8.425444077540094e-06, "loss": 0.5081, "step": 5202 }, { "epoch": 0.8544741650072876, "grad_norm": 0.46083194627277885, "learning_rate": 8.425214759875558e-06, "loss": 0.5187, "step": 5203 }, { "epoch": 0.8546383922156303, "grad_norm": 0.3628692906065567, "learning_rate": 8.424985399579323e-06, "loss": 0.4859, "step": 5204 }, { "epoch": 0.8548026194239731, "grad_norm": 0.3008814972088375, "learning_rate": 8.424755996653889e-06, "loss": 0.4949, "step": 5205 }, { "epoch": 0.8549668466323158, "grad_norm": 0.33859496251143534, "learning_rate": 8.424526551101741e-06, "loss": 0.5117, "step": 5206 }, { "epoch": 0.8551310738406586, "grad_norm": 0.3178485647764328, "learning_rate": 8.424297062925375e-06, "loss": 0.5097, "step": 5207 }, { "epoch": 0.8552953010490013, "grad_norm": 0.310539171285264, "learning_rate": 8.42406753212728e-06, "loss": 0.5085, "step": 5208 }, { "epoch": 0.8554595282573441, "grad_norm": 0.3222299566904057, "learning_rate": 8.423837958709952e-06, "loss": 0.5099, "step": 5209 }, { "epoch": 0.8556237554656868, "grad_norm": 0.3062954314262607, "learning_rate": 8.423608342675883e-06, "loss": 0.4782, "step": 5210 }, { "epoch": 0.8557879826740296, "grad_norm": 0.3855658097772442, "learning_rate": 8.423378684027568e-06, "loss": 0.525, "step": 5211 }, { "epoch": 0.8559522098823723, "grad_norm": 0.4804967081262836, "learning_rate": 8.4231489827675e-06, "loss": 0.5052, "step": 5212 }, { "epoch": 0.856116437090715, "grad_norm": 0.3271596419138035, "learning_rate": 8.422919238898173e-06, "loss": 0.4708, "step": 5213 }, { "epoch": 0.8562806642990577, "grad_norm": 0.31481541934023216, "learning_rate": 8.422689452422084e-06, "loss": 0.5051, "step": 5214 }, { "epoch": 0.8564448915074004, "grad_norm": 0.3099269647615455, "learning_rate": 8.42245962334173e-06, "loss": 0.4819, "step": 5215 }, { "epoch": 0.8566091187157432, "grad_norm": 0.3438652387948608, "learning_rate": 8.422229751659602e-06, "loss": 0.4947, "step": 5216 }, { "epoch": 0.8567733459240859, "grad_norm": 0.3459932973707239, "learning_rate": 8.421999837378202e-06, "loss": 0.53, "step": 5217 }, { "epoch": 0.8569375731324287, "grad_norm": 0.27299285950949304, "learning_rate": 8.421769880500025e-06, "loss": 0.5281, "step": 5218 }, { "epoch": 0.8571018003407714, "grad_norm": 0.27523955025295577, "learning_rate": 8.421539881027568e-06, "loss": 0.4841, "step": 5219 }, { "epoch": 0.8572660275491142, "grad_norm": 0.34338702265752363, "learning_rate": 8.42130983896333e-06, "loss": 0.5255, "step": 5220 }, { "epoch": 0.8574302547574569, "grad_norm": 0.3695909557488081, "learning_rate": 8.421079754309808e-06, "loss": 0.5017, "step": 5221 }, { "epoch": 0.8575944819657997, "grad_norm": 0.26262217495334517, "learning_rate": 8.420849627069504e-06, "loss": 0.5295, "step": 5222 }, { "epoch": 0.8577587091741424, "grad_norm": 0.3373161131890121, "learning_rate": 8.420619457244915e-06, "loss": 0.5172, "step": 5223 }, { "epoch": 0.8579229363824852, "grad_norm": 0.4176117431308755, "learning_rate": 8.42038924483854e-06, "loss": 0.5118, "step": 5224 }, { "epoch": 0.8580871635908279, "grad_norm": 0.4681792027400935, "learning_rate": 8.420158989852881e-06, "loss": 0.5068, "step": 5225 }, { "epoch": 0.8582513907991707, "grad_norm": 0.2916379992376806, "learning_rate": 8.41992869229044e-06, "loss": 0.4982, "step": 5226 }, { "epoch": 0.8584156180075134, "grad_norm": 0.27631425561265743, "learning_rate": 8.419698352153715e-06, "loss": 0.492, "step": 5227 }, { "epoch": 0.8585798452158562, "grad_norm": 0.2795686993363181, "learning_rate": 8.41946796944521e-06, "loss": 0.5012, "step": 5228 }, { "epoch": 0.8587440724241989, "grad_norm": 0.3963207725944165, "learning_rate": 8.419237544167427e-06, "loss": 0.4981, "step": 5229 }, { "epoch": 0.8589082996325417, "grad_norm": 0.3328663936121557, "learning_rate": 8.419007076322869e-06, "loss": 0.5219, "step": 5230 }, { "epoch": 0.8590725268408843, "grad_norm": 0.3493391789677639, "learning_rate": 8.418776565914036e-06, "loss": 0.5091, "step": 5231 }, { "epoch": 0.8592367540492271, "grad_norm": 0.29098994266551437, "learning_rate": 8.418546012943436e-06, "loss": 0.5031, "step": 5232 }, { "epoch": 0.8594009812575698, "grad_norm": 0.37668531359547847, "learning_rate": 8.41831541741357e-06, "loss": 0.5135, "step": 5233 }, { "epoch": 0.8595652084659126, "grad_norm": 0.29737232046529355, "learning_rate": 8.418084779326944e-06, "loss": 0.5127, "step": 5234 }, { "epoch": 0.8597294356742553, "grad_norm": 0.3572696957219688, "learning_rate": 8.417854098686062e-06, "loss": 0.5112, "step": 5235 }, { "epoch": 0.859893662882598, "grad_norm": 0.29116505908127643, "learning_rate": 8.41762337549343e-06, "loss": 0.5032, "step": 5236 }, { "epoch": 0.8600578900909408, "grad_norm": 0.3227422175765217, "learning_rate": 8.417392609751553e-06, "loss": 0.5192, "step": 5237 }, { "epoch": 0.8602221172992836, "grad_norm": 0.3052186718364593, "learning_rate": 8.417161801462939e-06, "loss": 0.5033, "step": 5238 }, { "epoch": 0.8603863445076263, "grad_norm": 0.2792044493744665, "learning_rate": 8.416930950630094e-06, "loss": 0.5113, "step": 5239 }, { "epoch": 0.860550571715969, "grad_norm": 0.28137907424947856, "learning_rate": 8.416700057255524e-06, "loss": 0.485, "step": 5240 }, { "epoch": 0.8607147989243118, "grad_norm": 0.3848989139038607, "learning_rate": 8.416469121341739e-06, "loss": 0.5146, "step": 5241 }, { "epoch": 0.8608790261326545, "grad_norm": 0.3647020656817828, "learning_rate": 8.416238142891246e-06, "loss": 0.5123, "step": 5242 }, { "epoch": 0.8610432533409973, "grad_norm": 0.30122311372081473, "learning_rate": 8.416007121906553e-06, "loss": 0.5191, "step": 5243 }, { "epoch": 0.86120748054934, "grad_norm": 0.3073748720310311, "learning_rate": 8.41577605839017e-06, "loss": 0.5236, "step": 5244 }, { "epoch": 0.8613717077576828, "grad_norm": 0.2905245125656662, "learning_rate": 8.415544952344607e-06, "loss": 0.4822, "step": 5245 }, { "epoch": 0.8615359349660255, "grad_norm": 0.30468902373544227, "learning_rate": 8.415313803772374e-06, "loss": 0.4926, "step": 5246 }, { "epoch": 0.8617001621743683, "grad_norm": 0.31458419828369694, "learning_rate": 8.415082612675979e-06, "loss": 0.5063, "step": 5247 }, { "epoch": 0.8618643893827109, "grad_norm": 0.30256781957016937, "learning_rate": 8.414851379057936e-06, "loss": 0.5143, "step": 5248 }, { "epoch": 0.8620286165910537, "grad_norm": 0.3512098795701238, "learning_rate": 8.414620102920755e-06, "loss": 0.5257, "step": 5249 }, { "epoch": 0.8621928437993964, "grad_norm": 0.33016065512395715, "learning_rate": 8.414388784266948e-06, "loss": 0.5003, "step": 5250 }, { "epoch": 0.8623570710077392, "grad_norm": 0.349765377162814, "learning_rate": 8.41415742309903e-06, "loss": 0.5098, "step": 5251 }, { "epoch": 0.8625212982160819, "grad_norm": 0.3232602041674713, "learning_rate": 8.413926019419508e-06, "loss": 0.5323, "step": 5252 }, { "epoch": 0.8626855254244247, "grad_norm": 0.33022614357718966, "learning_rate": 8.4136945732309e-06, "loss": 0.504, "step": 5253 }, { "epoch": 0.8628497526327674, "grad_norm": 0.40419642120733285, "learning_rate": 8.413463084535718e-06, "loss": 0.5134, "step": 5254 }, { "epoch": 0.8630139798411102, "grad_norm": 0.2947040903999055, "learning_rate": 8.413231553336478e-06, "loss": 0.52, "step": 5255 }, { "epoch": 0.8631782070494529, "grad_norm": 0.3128924063031786, "learning_rate": 8.412999979635692e-06, "loss": 0.5085, "step": 5256 }, { "epoch": 0.8633424342577957, "grad_norm": 0.34440429823573077, "learning_rate": 8.412768363435875e-06, "loss": 0.4934, "step": 5257 }, { "epoch": 0.8635066614661384, "grad_norm": 0.3223141456692689, "learning_rate": 8.412536704739547e-06, "loss": 0.506, "step": 5258 }, { "epoch": 0.8636708886744812, "grad_norm": 0.3620705564585329, "learning_rate": 8.41230500354922e-06, "loss": 0.5231, "step": 5259 }, { "epoch": 0.8638351158828239, "grad_norm": 0.40002625330809766, "learning_rate": 8.41207325986741e-06, "loss": 0.5201, "step": 5260 }, { "epoch": 0.8639993430911667, "grad_norm": 0.3658458873894559, "learning_rate": 8.411841473696637e-06, "loss": 0.5202, "step": 5261 }, { "epoch": 0.8641635702995094, "grad_norm": 0.4622292620162486, "learning_rate": 8.411609645039415e-06, "loss": 0.4848, "step": 5262 }, { "epoch": 0.8643277975078522, "grad_norm": 0.34047389699532343, "learning_rate": 8.411377773898267e-06, "loss": 0.5357, "step": 5263 }, { "epoch": 0.8644920247161949, "grad_norm": 0.33676241125712014, "learning_rate": 8.411145860275706e-06, "loss": 0.4958, "step": 5264 }, { "epoch": 0.8646562519245375, "grad_norm": 0.31135847431054403, "learning_rate": 8.410913904174252e-06, "loss": 0.5062, "step": 5265 }, { "epoch": 0.8648204791328803, "grad_norm": 0.39107972410690034, "learning_rate": 8.410681905596426e-06, "loss": 0.5082, "step": 5266 }, { "epoch": 0.864984706341223, "grad_norm": 0.3025022980375748, "learning_rate": 8.410449864544748e-06, "loss": 0.5117, "step": 5267 }, { "epoch": 0.8651489335495658, "grad_norm": 0.27122718992549955, "learning_rate": 8.410217781021736e-06, "loss": 0.5079, "step": 5268 }, { "epoch": 0.8653131607579085, "grad_norm": 0.33819644861136894, "learning_rate": 8.409985655029912e-06, "loss": 0.5044, "step": 5269 }, { "epoch": 0.8654773879662513, "grad_norm": 0.3125457178886905, "learning_rate": 8.409753486571795e-06, "loss": 0.4664, "step": 5270 }, { "epoch": 0.865641615174594, "grad_norm": 0.3283184967907039, "learning_rate": 8.409521275649912e-06, "loss": 0.5255, "step": 5271 }, { "epoch": 0.8658058423829368, "grad_norm": 0.35440138371082225, "learning_rate": 8.40928902226678e-06, "loss": 0.5087, "step": 5272 }, { "epoch": 0.8659700695912795, "grad_norm": 1.3917466044548144, "learning_rate": 8.409056726424922e-06, "loss": 0.4998, "step": 5273 }, { "epoch": 0.8661342967996223, "grad_norm": 0.2904222163965924, "learning_rate": 8.408824388126863e-06, "loss": 0.4879, "step": 5274 }, { "epoch": 0.866298524007965, "grad_norm": 0.3338819728127975, "learning_rate": 8.408592007375125e-06, "loss": 0.5197, "step": 5275 }, { "epoch": 0.8664627512163078, "grad_norm": 0.3069337778776626, "learning_rate": 8.408359584172234e-06, "loss": 0.4761, "step": 5276 }, { "epoch": 0.8666269784246505, "grad_norm": 0.3198664615484879, "learning_rate": 8.40812711852071e-06, "loss": 0.5159, "step": 5277 }, { "epoch": 0.8667912056329933, "grad_norm": 0.2928415185108605, "learning_rate": 8.407894610423082e-06, "loss": 0.4881, "step": 5278 }, { "epoch": 0.866955432841336, "grad_norm": 0.2942661306167567, "learning_rate": 8.407662059881872e-06, "loss": 0.5326, "step": 5279 }, { "epoch": 0.8671196600496788, "grad_norm": 0.33386555344083646, "learning_rate": 8.407429466899608e-06, "loss": 0.5147, "step": 5280 }, { "epoch": 0.8672838872580215, "grad_norm": 0.35711923571285464, "learning_rate": 8.407196831478817e-06, "loss": 0.5014, "step": 5281 }, { "epoch": 0.8674481144663642, "grad_norm": 0.3719107496609669, "learning_rate": 8.406964153622023e-06, "loss": 0.5145, "step": 5282 }, { "epoch": 0.8676123416747069, "grad_norm": 0.336945720581192, "learning_rate": 8.406731433331756e-06, "loss": 0.5096, "step": 5283 }, { "epoch": 0.8677765688830497, "grad_norm": 0.3087115536368896, "learning_rate": 8.40649867061054e-06, "loss": 0.5227, "step": 5284 }, { "epoch": 0.8679407960913924, "grad_norm": 0.47804182837248027, "learning_rate": 8.406265865460905e-06, "loss": 0.5085, "step": 5285 }, { "epoch": 0.8681050232997352, "grad_norm": 0.3378849373597841, "learning_rate": 8.406033017885381e-06, "loss": 0.4883, "step": 5286 }, { "epoch": 0.8682692505080779, "grad_norm": 0.4126831812504061, "learning_rate": 8.405800127886493e-06, "loss": 0.5181, "step": 5287 }, { "epoch": 0.8684334777164207, "grad_norm": 0.27899762274396317, "learning_rate": 8.405567195466775e-06, "loss": 0.5227, "step": 5288 }, { "epoch": 0.8685977049247634, "grad_norm": 0.33191781981524426, "learning_rate": 8.405334220628754e-06, "loss": 0.4971, "step": 5289 }, { "epoch": 0.8687619321331062, "grad_norm": 0.29662629916763467, "learning_rate": 8.405101203374962e-06, "loss": 0.5116, "step": 5290 }, { "epoch": 0.8689261593414489, "grad_norm": 0.32905380917085053, "learning_rate": 8.404868143707927e-06, "loss": 0.5111, "step": 5291 }, { "epoch": 0.8690903865497916, "grad_norm": 0.3228454402336175, "learning_rate": 8.404635041630184e-06, "loss": 0.533, "step": 5292 }, { "epoch": 0.8692546137581344, "grad_norm": 0.3159360456674922, "learning_rate": 8.404401897144262e-06, "loss": 0.5142, "step": 5293 }, { "epoch": 0.8694188409664771, "grad_norm": 0.31946115452138973, "learning_rate": 8.404168710252692e-06, "loss": 0.5095, "step": 5294 }, { "epoch": 0.8695830681748199, "grad_norm": 0.7962735316090845, "learning_rate": 8.403935480958011e-06, "loss": 0.5373, "step": 5295 }, { "epoch": 0.8697472953831626, "grad_norm": 1.2910079032447723, "learning_rate": 8.40370220926275e-06, "loss": 0.4944, "step": 5296 }, { "epoch": 0.8699115225915054, "grad_norm": 0.3078680638961182, "learning_rate": 8.40346889516944e-06, "loss": 0.4875, "step": 5297 }, { "epoch": 0.8700757497998481, "grad_norm": 0.35891347383172, "learning_rate": 8.40323553868062e-06, "loss": 0.5185, "step": 5298 }, { "epoch": 0.8702399770081908, "grad_norm": 0.5653737358384439, "learning_rate": 8.40300213979882e-06, "loss": 0.51, "step": 5299 }, { "epoch": 0.8704042042165335, "grad_norm": 0.32168689984896887, "learning_rate": 8.402768698526577e-06, "loss": 0.5194, "step": 5300 }, { "epoch": 0.8705684314248763, "grad_norm": 0.3381898859178391, "learning_rate": 8.402535214866426e-06, "loss": 0.5038, "step": 5301 }, { "epoch": 0.870732658633219, "grad_norm": 0.34810336533021025, "learning_rate": 8.402301688820903e-06, "loss": 0.5198, "step": 5302 }, { "epoch": 0.8708968858415618, "grad_norm": 0.3556606225523487, "learning_rate": 8.402068120392545e-06, "loss": 0.51, "step": 5303 }, { "epoch": 0.8710611130499045, "grad_norm": 0.2772688917845348, "learning_rate": 8.401834509583889e-06, "loss": 0.5163, "step": 5304 }, { "epoch": 0.8712253402582473, "grad_norm": 0.3070635849318607, "learning_rate": 8.40160085639747e-06, "loss": 0.5107, "step": 5305 }, { "epoch": 0.87138956746659, "grad_norm": 0.35390726184331905, "learning_rate": 8.401367160835826e-06, "loss": 0.5043, "step": 5306 }, { "epoch": 0.8715537946749328, "grad_norm": 0.33187000412444934, "learning_rate": 8.401133422901497e-06, "loss": 0.5116, "step": 5307 }, { "epoch": 0.8717180218832755, "grad_norm": 0.40659711524377584, "learning_rate": 8.400899642597022e-06, "loss": 0.5036, "step": 5308 }, { "epoch": 0.8718822490916183, "grad_norm": 0.3851914077124121, "learning_rate": 8.400665819924938e-06, "loss": 0.5056, "step": 5309 }, { "epoch": 0.872046476299961, "grad_norm": 0.32665644167454533, "learning_rate": 8.400431954887785e-06, "loss": 0.513, "step": 5310 }, { "epoch": 0.8722107035083038, "grad_norm": 0.41519115858739336, "learning_rate": 8.400198047488105e-06, "loss": 0.5219, "step": 5311 }, { "epoch": 0.8723749307166465, "grad_norm": 0.3552816056869143, "learning_rate": 8.399964097728436e-06, "loss": 0.516, "step": 5312 }, { "epoch": 0.8725391579249893, "grad_norm": 0.2861691798039096, "learning_rate": 8.39973010561132e-06, "loss": 0.4861, "step": 5313 }, { "epoch": 0.872703385133332, "grad_norm": 0.32524768948820126, "learning_rate": 8.399496071139298e-06, "loss": 0.4899, "step": 5314 }, { "epoch": 0.8728676123416748, "grad_norm": 0.34057732140952096, "learning_rate": 8.39926199431491e-06, "loss": 0.508, "step": 5315 }, { "epoch": 0.8730318395500174, "grad_norm": 0.4133044624728016, "learning_rate": 8.399027875140703e-06, "loss": 0.4852, "step": 5316 }, { "epoch": 0.8731960667583601, "grad_norm": 0.3126250586763464, "learning_rate": 8.398793713619218e-06, "loss": 0.4831, "step": 5317 }, { "epoch": 0.8733602939667029, "grad_norm": 0.35913370774447195, "learning_rate": 8.398559509752995e-06, "loss": 0.5068, "step": 5318 }, { "epoch": 0.8735245211750456, "grad_norm": 0.3425673312783436, "learning_rate": 8.398325263544582e-06, "loss": 0.5124, "step": 5319 }, { "epoch": 0.8736887483833884, "grad_norm": 0.31258714176169194, "learning_rate": 8.39809097499652e-06, "loss": 0.5134, "step": 5320 }, { "epoch": 0.8738529755917311, "grad_norm": 0.30534161830143747, "learning_rate": 8.397856644111356e-06, "loss": 0.5052, "step": 5321 }, { "epoch": 0.8740172028000739, "grad_norm": 0.3471453883343114, "learning_rate": 8.397622270891632e-06, "loss": 0.475, "step": 5322 }, { "epoch": 0.8741814300084166, "grad_norm": 0.30377985143302766, "learning_rate": 8.397387855339896e-06, "loss": 0.4997, "step": 5323 }, { "epoch": 0.8743456572167594, "grad_norm": 0.32085464444851813, "learning_rate": 8.397153397458694e-06, "loss": 0.5079, "step": 5324 }, { "epoch": 0.8745098844251021, "grad_norm": 0.3914308128149654, "learning_rate": 8.396918897250571e-06, "loss": 0.5051, "step": 5325 }, { "epoch": 0.8746741116334449, "grad_norm": 0.3968388154681836, "learning_rate": 8.396684354718076e-06, "loss": 0.4986, "step": 5326 }, { "epoch": 0.8748383388417876, "grad_norm": 0.2997877334792993, "learning_rate": 8.396449769863754e-06, "loss": 0.5121, "step": 5327 }, { "epoch": 0.8750025660501304, "grad_norm": 0.4294864886076223, "learning_rate": 8.396215142690154e-06, "loss": 0.5012, "step": 5328 }, { "epoch": 0.8751667932584731, "grad_norm": 0.31195660001293835, "learning_rate": 8.395980473199826e-06, "loss": 0.4977, "step": 5329 }, { "epoch": 0.8753310204668159, "grad_norm": 0.3503078869614596, "learning_rate": 8.395745761395314e-06, "loss": 0.515, "step": 5330 }, { "epoch": 0.8754952476751586, "grad_norm": 0.35642303207734, "learning_rate": 8.395511007279172e-06, "loss": 0.4936, "step": 5331 }, { "epoch": 0.8756594748835014, "grad_norm": 0.37437379291643125, "learning_rate": 8.395276210853946e-06, "loss": 0.5064, "step": 5332 }, { "epoch": 0.875823702091844, "grad_norm": 0.32548744308565203, "learning_rate": 8.39504137212219e-06, "loss": 0.5251, "step": 5333 }, { "epoch": 0.8759879293001868, "grad_norm": 0.3368536214240922, "learning_rate": 8.394806491086453e-06, "loss": 0.4841, "step": 5334 }, { "epoch": 0.8761521565085295, "grad_norm": 0.32379612932847374, "learning_rate": 8.394571567749283e-06, "loss": 0.519, "step": 5335 }, { "epoch": 0.8763163837168723, "grad_norm": 0.254907543345491, "learning_rate": 8.394336602113235e-06, "loss": 0.4934, "step": 5336 }, { "epoch": 0.876480610925215, "grad_norm": 0.292578050848311, "learning_rate": 8.39410159418086e-06, "loss": 0.5038, "step": 5337 }, { "epoch": 0.8766448381335578, "grad_norm": 0.33068895234319823, "learning_rate": 8.393866543954713e-06, "loss": 0.4894, "step": 5338 }, { "epoch": 0.8768090653419005, "grad_norm": 0.3022738707416738, "learning_rate": 8.393631451437341e-06, "loss": 0.495, "step": 5339 }, { "epoch": 0.8769732925502433, "grad_norm": 0.3309924116679411, "learning_rate": 8.393396316631301e-06, "loss": 0.4907, "step": 5340 }, { "epoch": 0.877137519758586, "grad_norm": 0.3072778428978678, "learning_rate": 8.393161139539147e-06, "loss": 0.5083, "step": 5341 }, { "epoch": 0.8773017469669288, "grad_norm": 0.4056601024135022, "learning_rate": 8.392925920163433e-06, "loss": 0.494, "step": 5342 }, { "epoch": 0.8774659741752715, "grad_norm": 0.3292865631147971, "learning_rate": 8.392690658506713e-06, "loss": 0.477, "step": 5343 }, { "epoch": 0.8776302013836143, "grad_norm": 0.3173888394810557, "learning_rate": 8.392455354571542e-06, "loss": 0.5042, "step": 5344 }, { "epoch": 0.877794428591957, "grad_norm": 0.41650203746899644, "learning_rate": 8.392220008360478e-06, "loss": 0.5128, "step": 5345 }, { "epoch": 0.8779586558002997, "grad_norm": 0.3128633850645757, "learning_rate": 8.391984619876073e-06, "loss": 0.5117, "step": 5346 }, { "epoch": 0.8781228830086425, "grad_norm": 0.3739198066157396, "learning_rate": 8.391749189120889e-06, "loss": 0.4983, "step": 5347 }, { "epoch": 0.8782871102169852, "grad_norm": 0.3270565163589913, "learning_rate": 8.391513716097476e-06, "loss": 0.5094, "step": 5348 }, { "epoch": 0.878451337425328, "grad_norm": 0.36481461717522407, "learning_rate": 8.391278200808398e-06, "loss": 0.4884, "step": 5349 }, { "epoch": 0.8786155646336706, "grad_norm": 0.3380444447356424, "learning_rate": 8.39104264325621e-06, "loss": 0.4973, "step": 5350 }, { "epoch": 0.8787797918420134, "grad_norm": 0.3226705104713618, "learning_rate": 8.390807043443468e-06, "loss": 0.5139, "step": 5351 }, { "epoch": 0.8789440190503561, "grad_norm": 0.3562909994144863, "learning_rate": 8.390571401372737e-06, "loss": 0.5125, "step": 5352 }, { "epoch": 0.8791082462586989, "grad_norm": 0.3465108100828412, "learning_rate": 8.39033571704657e-06, "loss": 0.5038, "step": 5353 }, { "epoch": 0.8792724734670416, "grad_norm": 0.46935542963470755, "learning_rate": 8.390099990467531e-06, "loss": 0.5063, "step": 5354 }, { "epoch": 0.8794367006753844, "grad_norm": 0.34137317758615526, "learning_rate": 8.389864221638179e-06, "loss": 0.514, "step": 5355 }, { "epoch": 0.8796009278837271, "grad_norm": 0.29825938926351886, "learning_rate": 8.389628410561074e-06, "loss": 0.5368, "step": 5356 }, { "epoch": 0.8797651550920699, "grad_norm": 0.32769587710392506, "learning_rate": 8.389392557238777e-06, "loss": 0.5395, "step": 5357 }, { "epoch": 0.8799293823004126, "grad_norm": 0.3103698388540546, "learning_rate": 8.389156661673851e-06, "loss": 0.4951, "step": 5358 }, { "epoch": 0.8800936095087554, "grad_norm": 0.31391936442953045, "learning_rate": 8.388920723868858e-06, "loss": 0.5139, "step": 5359 }, { "epoch": 0.8802578367170981, "grad_norm": 0.30702069935676596, "learning_rate": 8.388684743826358e-06, "loss": 0.4914, "step": 5360 }, { "epoch": 0.8804220639254409, "grad_norm": 0.287494636924883, "learning_rate": 8.388448721548916e-06, "loss": 0.5036, "step": 5361 }, { "epoch": 0.8805862911337836, "grad_norm": 0.3079894511670955, "learning_rate": 8.388212657039097e-06, "loss": 0.4936, "step": 5362 }, { "epoch": 0.8807505183421264, "grad_norm": 0.3360880269155661, "learning_rate": 8.387976550299462e-06, "loss": 0.5035, "step": 5363 }, { "epoch": 0.8809147455504691, "grad_norm": 0.5102037605942567, "learning_rate": 8.387740401332574e-06, "loss": 0.5013, "step": 5364 }, { "epoch": 0.8810789727588119, "grad_norm": 0.3313606109020479, "learning_rate": 8.387504210141003e-06, "loss": 0.4879, "step": 5365 }, { "epoch": 0.8812431999671546, "grad_norm": 0.3999436482907233, "learning_rate": 8.387267976727312e-06, "loss": 0.5208, "step": 5366 }, { "epoch": 0.8814074271754972, "grad_norm": 0.3326458033025108, "learning_rate": 8.387031701094066e-06, "loss": 0.5074, "step": 5367 }, { "epoch": 0.88157165438384, "grad_norm": 0.34094614319822397, "learning_rate": 8.386795383243828e-06, "loss": 0.5159, "step": 5368 }, { "epoch": 0.8817358815921827, "grad_norm": 0.3706442446081152, "learning_rate": 8.386559023179172e-06, "loss": 0.4928, "step": 5369 }, { "epoch": 0.8819001088005255, "grad_norm": 0.35070846954591484, "learning_rate": 8.38632262090266e-06, "loss": 0.5259, "step": 5370 }, { "epoch": 0.8820643360088682, "grad_norm": 0.3703793978322178, "learning_rate": 8.386086176416859e-06, "loss": 0.521, "step": 5371 }, { "epoch": 0.882228563217211, "grad_norm": 0.3957430663808368, "learning_rate": 8.38584968972434e-06, "loss": 0.5108, "step": 5372 }, { "epoch": 0.8823927904255537, "grad_norm": 0.3077741513865519, "learning_rate": 8.385613160827672e-06, "loss": 0.5007, "step": 5373 }, { "epoch": 0.8825570176338965, "grad_norm": 0.32405883564522314, "learning_rate": 8.385376589729419e-06, "loss": 0.4945, "step": 5374 }, { "epoch": 0.8827212448422392, "grad_norm": 0.32194303637237365, "learning_rate": 8.385139976432155e-06, "loss": 0.5216, "step": 5375 }, { "epoch": 0.882885472050582, "grad_norm": 0.34449583253230837, "learning_rate": 8.384903320938449e-06, "loss": 0.5024, "step": 5376 }, { "epoch": 0.8830496992589247, "grad_norm": 0.3035329723939267, "learning_rate": 8.38466662325087e-06, "loss": 0.5021, "step": 5377 }, { "epoch": 0.8832139264672675, "grad_norm": 0.34319045438113116, "learning_rate": 8.384429883371989e-06, "loss": 0.5162, "step": 5378 }, { "epoch": 0.8833781536756102, "grad_norm": 0.36034984090698774, "learning_rate": 8.384193101304377e-06, "loss": 0.5028, "step": 5379 }, { "epoch": 0.883542380883953, "grad_norm": 0.33601207342080214, "learning_rate": 8.383956277050609e-06, "loss": 0.4913, "step": 5380 }, { "epoch": 0.8837066080922957, "grad_norm": 0.394729287186722, "learning_rate": 8.383719410613254e-06, "loss": 0.5089, "step": 5381 }, { "epoch": 0.8838708353006385, "grad_norm": 0.33148220436620357, "learning_rate": 8.383482501994884e-06, "loss": 0.4929, "step": 5382 }, { "epoch": 0.8840350625089812, "grad_norm": 0.4144935425174578, "learning_rate": 8.383245551198074e-06, "loss": 0.5209, "step": 5383 }, { "epoch": 0.8841992897173239, "grad_norm": 0.29606041865345, "learning_rate": 8.383008558225395e-06, "loss": 0.4981, "step": 5384 }, { "epoch": 0.8843635169256666, "grad_norm": 0.31134244297032126, "learning_rate": 8.382771523079424e-06, "loss": 0.4992, "step": 5385 }, { "epoch": 0.8845277441340094, "grad_norm": 0.3370958117903781, "learning_rate": 8.382534445762735e-06, "loss": 0.4937, "step": 5386 }, { "epoch": 0.8846919713423521, "grad_norm": 0.3001698439254947, "learning_rate": 8.3822973262779e-06, "loss": 0.5329, "step": 5387 }, { "epoch": 0.8848561985506949, "grad_norm": 0.30084477565026285, "learning_rate": 8.382060164627499e-06, "loss": 0.5059, "step": 5388 }, { "epoch": 0.8850204257590376, "grad_norm": 0.3346426570065145, "learning_rate": 8.381822960814102e-06, "loss": 0.5013, "step": 5389 }, { "epoch": 0.8851846529673804, "grad_norm": 0.29458072214294884, "learning_rate": 8.381585714840291e-06, "loss": 0.505, "step": 5390 }, { "epoch": 0.8853488801757231, "grad_norm": 0.3378818988362336, "learning_rate": 8.38134842670864e-06, "loss": 0.5187, "step": 5391 }, { "epoch": 0.8855131073840659, "grad_norm": 0.35910179479581733, "learning_rate": 8.381111096421725e-06, "loss": 0.5095, "step": 5392 }, { "epoch": 0.8856773345924086, "grad_norm": 0.31216769263690824, "learning_rate": 8.380873723982126e-06, "loss": 0.5111, "step": 5393 }, { "epoch": 0.8858415618007514, "grad_norm": 0.4289924367030972, "learning_rate": 8.380636309392419e-06, "loss": 0.4956, "step": 5394 }, { "epoch": 0.8860057890090941, "grad_norm": 0.4956517822724945, "learning_rate": 8.380398852655184e-06, "loss": 0.4938, "step": 5395 }, { "epoch": 0.8861700162174369, "grad_norm": 0.28831824779869475, "learning_rate": 8.380161353773e-06, "loss": 0.5125, "step": 5396 }, { "epoch": 0.8863342434257796, "grad_norm": 0.3097895844490996, "learning_rate": 8.379923812748447e-06, "loss": 0.513, "step": 5397 }, { "epoch": 0.8864984706341223, "grad_norm": 0.3296849530356679, "learning_rate": 8.379686229584103e-06, "loss": 0.4992, "step": 5398 }, { "epoch": 0.8866626978424651, "grad_norm": 0.40568229779401993, "learning_rate": 8.37944860428255e-06, "loss": 0.5006, "step": 5399 }, { "epoch": 0.8868269250508078, "grad_norm": 0.34325966725882706, "learning_rate": 8.379210936846368e-06, "loss": 0.4764, "step": 5400 }, { "epoch": 0.8869911522591505, "grad_norm": 0.4758915504276449, "learning_rate": 8.378973227278139e-06, "loss": 0.5055, "step": 5401 }, { "epoch": 0.8871553794674932, "grad_norm": 0.297381855982727, "learning_rate": 8.378735475580444e-06, "loss": 0.498, "step": 5402 }, { "epoch": 0.887319606675836, "grad_norm": 0.4933418017370094, "learning_rate": 8.378497681755865e-06, "loss": 0.4995, "step": 5403 }, { "epoch": 0.8874838338841787, "grad_norm": 0.3983723166968519, "learning_rate": 8.378259845806986e-06, "loss": 0.5365, "step": 5404 }, { "epoch": 0.8876480610925215, "grad_norm": 0.3143111854125081, "learning_rate": 8.378021967736388e-06, "loss": 0.5197, "step": 5405 }, { "epoch": 0.8878122883008642, "grad_norm": 0.3801638714521817, "learning_rate": 8.377784047546657e-06, "loss": 0.4944, "step": 5406 }, { "epoch": 0.887976515509207, "grad_norm": 0.34407817181065525, "learning_rate": 8.377546085240376e-06, "loss": 0.4994, "step": 5407 }, { "epoch": 0.8881407427175497, "grad_norm": 0.3133043465281556, "learning_rate": 8.37730808082013e-06, "loss": 0.4759, "step": 5408 }, { "epoch": 0.8883049699258925, "grad_norm": 0.2965789041034004, "learning_rate": 8.377070034288505e-06, "loss": 0.5046, "step": 5409 }, { "epoch": 0.8884691971342352, "grad_norm": 0.326618198478491, "learning_rate": 8.376831945648081e-06, "loss": 0.5124, "step": 5410 }, { "epoch": 0.888633424342578, "grad_norm": 0.3053859757388855, "learning_rate": 8.37659381490145e-06, "loss": 0.4893, "step": 5411 }, { "epoch": 0.8887976515509207, "grad_norm": 0.2954348959691266, "learning_rate": 8.376355642051196e-06, "loss": 0.5098, "step": 5412 }, { "epoch": 0.8889618787592635, "grad_norm": 0.3135215141770848, "learning_rate": 8.376117427099907e-06, "loss": 0.4979, "step": 5413 }, { "epoch": 0.8891261059676062, "grad_norm": 0.3271102608994896, "learning_rate": 8.375879170050167e-06, "loss": 0.5178, "step": 5414 }, { "epoch": 0.889290333175949, "grad_norm": 0.38229922813402745, "learning_rate": 8.375640870904568e-06, "loss": 0.5232, "step": 5415 }, { "epoch": 0.8894545603842917, "grad_norm": 0.3797870401260684, "learning_rate": 8.375402529665694e-06, "loss": 0.4986, "step": 5416 }, { "epoch": 0.8896187875926345, "grad_norm": 0.31200316783816984, "learning_rate": 8.375164146336137e-06, "loss": 0.5241, "step": 5417 }, { "epoch": 0.8897830148009771, "grad_norm": 0.3064704900631936, "learning_rate": 8.374925720918485e-06, "loss": 0.4955, "step": 5418 }, { "epoch": 0.8899472420093198, "grad_norm": 0.3058769827785828, "learning_rate": 8.374687253415326e-06, "loss": 0.4883, "step": 5419 }, { "epoch": 0.8901114692176626, "grad_norm": 0.2843411368389142, "learning_rate": 8.374448743829252e-06, "loss": 0.5217, "step": 5420 }, { "epoch": 0.8902756964260053, "grad_norm": 0.3728611557769457, "learning_rate": 8.37421019216285e-06, "loss": 0.4938, "step": 5421 }, { "epoch": 0.8904399236343481, "grad_norm": 0.3011377477606396, "learning_rate": 8.373971598418717e-06, "loss": 0.5077, "step": 5422 }, { "epoch": 0.8906041508426908, "grad_norm": 0.36206134781906424, "learning_rate": 8.373732962599441e-06, "loss": 0.494, "step": 5423 }, { "epoch": 0.8907683780510336, "grad_norm": 0.3908084739505783, "learning_rate": 8.373494284707613e-06, "loss": 0.5201, "step": 5424 }, { "epoch": 0.8909326052593763, "grad_norm": 0.2749425323407986, "learning_rate": 8.373255564745824e-06, "loss": 0.4962, "step": 5425 }, { "epoch": 0.8910968324677191, "grad_norm": 0.3303930882629094, "learning_rate": 8.373016802716673e-06, "loss": 0.5098, "step": 5426 }, { "epoch": 0.8912610596760618, "grad_norm": 0.2998241441933591, "learning_rate": 8.372777998622745e-06, "loss": 0.4869, "step": 5427 }, { "epoch": 0.8914252868844046, "grad_norm": 0.29451950284439987, "learning_rate": 8.37253915246664e-06, "loss": 0.5264, "step": 5428 }, { "epoch": 0.8915895140927473, "grad_norm": 0.3063418491635172, "learning_rate": 8.37230026425095e-06, "loss": 0.5038, "step": 5429 }, { "epoch": 0.8917537413010901, "grad_norm": 0.2996944732030016, "learning_rate": 8.372061333978266e-06, "loss": 0.5014, "step": 5430 }, { "epoch": 0.8919179685094328, "grad_norm": 0.3424116018767127, "learning_rate": 8.37182236165119e-06, "loss": 0.5089, "step": 5431 }, { "epoch": 0.8920821957177756, "grad_norm": 0.29079573719915974, "learning_rate": 8.371583347272314e-06, "loss": 0.5219, "step": 5432 }, { "epoch": 0.8922464229261183, "grad_norm": 0.3304927425490326, "learning_rate": 8.37134429084423e-06, "loss": 0.5338, "step": 5433 }, { "epoch": 0.8924106501344611, "grad_norm": 0.4902664749685111, "learning_rate": 8.371105192369541e-06, "loss": 0.5078, "step": 5434 }, { "epoch": 0.8925748773428037, "grad_norm": 0.39179044375461103, "learning_rate": 8.37086605185084e-06, "loss": 0.514, "step": 5435 }, { "epoch": 0.8927391045511465, "grad_norm": 0.3111137817750461, "learning_rate": 8.370626869290725e-06, "loss": 0.4722, "step": 5436 }, { "epoch": 0.8929033317594892, "grad_norm": 0.30758120367970715, "learning_rate": 8.370387644691796e-06, "loss": 0.495, "step": 5437 }, { "epoch": 0.893067558967832, "grad_norm": 0.3982671596775974, "learning_rate": 8.370148378056647e-06, "loss": 0.5242, "step": 5438 }, { "epoch": 0.8932317861761747, "grad_norm": 0.27220767951031977, "learning_rate": 8.369909069387879e-06, "loss": 0.5019, "step": 5439 }, { "epoch": 0.8933960133845175, "grad_norm": 0.3798596024471178, "learning_rate": 8.369669718688093e-06, "loss": 0.5175, "step": 5440 }, { "epoch": 0.8935602405928602, "grad_norm": 0.46454027331217806, "learning_rate": 8.369430325959884e-06, "loss": 0.5056, "step": 5441 }, { "epoch": 0.893724467801203, "grad_norm": 0.2694505235037501, "learning_rate": 8.369190891205858e-06, "loss": 0.5006, "step": 5442 }, { "epoch": 0.8938886950095457, "grad_norm": 0.2990622026346461, "learning_rate": 8.36895141442861e-06, "loss": 0.5154, "step": 5443 }, { "epoch": 0.8940529222178885, "grad_norm": 0.30742433459438284, "learning_rate": 8.368711895630743e-06, "loss": 0.5201, "step": 5444 }, { "epoch": 0.8942171494262312, "grad_norm": 0.2877974512432656, "learning_rate": 8.36847233481486e-06, "loss": 0.4951, "step": 5445 }, { "epoch": 0.894381376634574, "grad_norm": 0.3799216470015632, "learning_rate": 8.368232731983559e-06, "loss": 0.524, "step": 5446 }, { "epoch": 0.8945456038429167, "grad_norm": 0.28985046386333174, "learning_rate": 8.367993087139446e-06, "loss": 0.4789, "step": 5447 }, { "epoch": 0.8947098310512595, "grad_norm": 0.2770590893279945, "learning_rate": 8.367753400285122e-06, "loss": 0.5151, "step": 5448 }, { "epoch": 0.8948740582596022, "grad_norm": 0.3125926454125177, "learning_rate": 8.367513671423191e-06, "loss": 0.5164, "step": 5449 }, { "epoch": 0.895038285467945, "grad_norm": 0.2789030307629401, "learning_rate": 8.367273900556256e-06, "loss": 0.4886, "step": 5450 }, { "epoch": 0.8952025126762877, "grad_norm": 0.3147777575769946, "learning_rate": 8.367034087686924e-06, "loss": 0.5288, "step": 5451 }, { "epoch": 0.8953667398846303, "grad_norm": 0.3087500424646868, "learning_rate": 8.366794232817795e-06, "loss": 0.493, "step": 5452 }, { "epoch": 0.8955309670929731, "grad_norm": 0.2880004076961026, "learning_rate": 8.366554335951474e-06, "loss": 0.4967, "step": 5453 }, { "epoch": 0.8956951943013158, "grad_norm": 0.26914699822687915, "learning_rate": 8.366314397090572e-06, "loss": 0.4957, "step": 5454 }, { "epoch": 0.8958594215096586, "grad_norm": 0.853277733033621, "learning_rate": 8.36607441623769e-06, "loss": 0.512, "step": 5455 }, { "epoch": 0.8960236487180013, "grad_norm": 0.28789407556256835, "learning_rate": 8.365834393395438e-06, "loss": 0.5066, "step": 5456 }, { "epoch": 0.8961878759263441, "grad_norm": 0.2684557559113861, "learning_rate": 8.36559432856642e-06, "loss": 0.5182, "step": 5457 }, { "epoch": 0.8963521031346868, "grad_norm": 0.3644698809817585, "learning_rate": 8.365354221753245e-06, "loss": 0.5137, "step": 5458 }, { "epoch": 0.8965163303430296, "grad_norm": 0.27994902706665303, "learning_rate": 8.36511407295852e-06, "loss": 0.4943, "step": 5459 }, { "epoch": 0.8966805575513723, "grad_norm": 0.35579239769018195, "learning_rate": 8.364873882184851e-06, "loss": 0.4925, "step": 5460 }, { "epoch": 0.8968447847597151, "grad_norm": 0.35041238641861094, "learning_rate": 8.364633649434853e-06, "loss": 0.5022, "step": 5461 }, { "epoch": 0.8970090119680578, "grad_norm": 0.3580120860072093, "learning_rate": 8.364393374711128e-06, "loss": 0.5104, "step": 5462 }, { "epoch": 0.8971732391764006, "grad_norm": 0.27954324217496795, "learning_rate": 8.364153058016292e-06, "loss": 0.5193, "step": 5463 }, { "epoch": 0.8973374663847433, "grad_norm": 0.3101665856935753, "learning_rate": 8.363912699352949e-06, "loss": 0.5095, "step": 5464 }, { "epoch": 0.8975016935930861, "grad_norm": 0.28512021161180856, "learning_rate": 8.363672298723714e-06, "loss": 0.4987, "step": 5465 }, { "epoch": 0.8976659208014288, "grad_norm": 0.35211494571368435, "learning_rate": 8.363431856131196e-06, "loss": 0.4955, "step": 5466 }, { "epoch": 0.8978301480097716, "grad_norm": 0.32744803108370507, "learning_rate": 8.363191371578006e-06, "loss": 0.5022, "step": 5467 }, { "epoch": 0.8979943752181143, "grad_norm": 0.3249957296286912, "learning_rate": 8.36295084506676e-06, "loss": 0.4945, "step": 5468 }, { "epoch": 0.898158602426457, "grad_norm": 0.28285293262085576, "learning_rate": 8.362710276600065e-06, "loss": 0.5094, "step": 5469 }, { "epoch": 0.8983228296347997, "grad_norm": 0.27983567395929104, "learning_rate": 8.362469666180536e-06, "loss": 0.4874, "step": 5470 }, { "epoch": 0.8984870568431425, "grad_norm": 0.397188968187713, "learning_rate": 8.362229013810786e-06, "loss": 0.5007, "step": 5471 }, { "epoch": 0.8986512840514852, "grad_norm": 0.29935214259494414, "learning_rate": 8.361988319493429e-06, "loss": 0.4832, "step": 5472 }, { "epoch": 0.898815511259828, "grad_norm": 0.35287779460582697, "learning_rate": 8.36174758323108e-06, "loss": 0.5194, "step": 5473 }, { "epoch": 0.8989797384681707, "grad_norm": 0.3208882613202515, "learning_rate": 8.361506805026352e-06, "loss": 0.4964, "step": 5474 }, { "epoch": 0.8991439656765134, "grad_norm": 0.3071412681372098, "learning_rate": 8.361265984881862e-06, "loss": 0.4833, "step": 5475 }, { "epoch": 0.8993081928848562, "grad_norm": 0.2924027689988717, "learning_rate": 8.361025122800223e-06, "loss": 0.5363, "step": 5476 }, { "epoch": 0.8994724200931989, "grad_norm": 0.2815585011141773, "learning_rate": 8.360784218784054e-06, "loss": 0.502, "step": 5477 }, { "epoch": 0.8996366473015417, "grad_norm": 0.361696972613681, "learning_rate": 8.360543272835968e-06, "loss": 0.5054, "step": 5478 }, { "epoch": 0.8998008745098844, "grad_norm": 0.2999832471190714, "learning_rate": 8.360302284958586e-06, "loss": 0.4908, "step": 5479 }, { "epoch": 0.8999651017182272, "grad_norm": 0.4261624509261548, "learning_rate": 8.360061255154521e-06, "loss": 0.4898, "step": 5480 }, { "epoch": 0.9001293289265699, "grad_norm": 0.2855915803003546, "learning_rate": 8.359820183426395e-06, "loss": 0.5164, "step": 5481 }, { "epoch": 0.9002935561349127, "grad_norm": 0.29396206548228376, "learning_rate": 8.359579069776822e-06, "loss": 0.5047, "step": 5482 }, { "epoch": 0.9004577833432554, "grad_norm": 0.3239709685752758, "learning_rate": 8.359337914208424e-06, "loss": 0.4881, "step": 5483 }, { "epoch": 0.9006220105515982, "grad_norm": 0.3189529432048531, "learning_rate": 8.35909671672382e-06, "loss": 0.5118, "step": 5484 }, { "epoch": 0.9007862377599409, "grad_norm": 0.3015118477260375, "learning_rate": 8.358855477325628e-06, "loss": 0.5252, "step": 5485 }, { "epoch": 0.9009504649682836, "grad_norm": 0.3220454001782521, "learning_rate": 8.35861419601647e-06, "loss": 0.5245, "step": 5486 }, { "epoch": 0.9011146921766263, "grad_norm": 0.29201083259315386, "learning_rate": 8.358372872798964e-06, "loss": 0.5238, "step": 5487 }, { "epoch": 0.9012789193849691, "grad_norm": 0.3027181699860984, "learning_rate": 8.358131507675735e-06, "loss": 0.4992, "step": 5488 }, { "epoch": 0.9014431465933118, "grad_norm": 0.3017338774888951, "learning_rate": 8.357890100649397e-06, "loss": 0.5002, "step": 5489 }, { "epoch": 0.9016073738016546, "grad_norm": 0.2942002751508648, "learning_rate": 8.357648651722582e-06, "loss": 0.503, "step": 5490 }, { "epoch": 0.9017716010099973, "grad_norm": 1.0776946000844136, "learning_rate": 8.357407160897904e-06, "loss": 0.5231, "step": 5491 }, { "epoch": 0.9019358282183401, "grad_norm": 0.2969948419282714, "learning_rate": 8.357165628177992e-06, "loss": 0.5096, "step": 5492 }, { "epoch": 0.9021000554266828, "grad_norm": 0.3085209220722856, "learning_rate": 8.356924053565463e-06, "loss": 0.4987, "step": 5493 }, { "epoch": 0.9022642826350256, "grad_norm": 0.3732207561485276, "learning_rate": 8.356682437062946e-06, "loss": 0.5137, "step": 5494 }, { "epoch": 0.9024285098433683, "grad_norm": 0.3006585094430494, "learning_rate": 8.356440778673063e-06, "loss": 0.4939, "step": 5495 }, { "epoch": 0.9025927370517111, "grad_norm": 0.33128076755712377, "learning_rate": 8.356199078398437e-06, "loss": 0.4933, "step": 5496 }, { "epoch": 0.9027569642600538, "grad_norm": 0.2931680063313428, "learning_rate": 8.355957336241697e-06, "loss": 0.4963, "step": 5497 }, { "epoch": 0.9029211914683966, "grad_norm": 0.43709410205234245, "learning_rate": 8.355715552205467e-06, "loss": 0.5172, "step": 5498 }, { "epoch": 0.9030854186767393, "grad_norm": 0.5518303131475227, "learning_rate": 8.355473726292373e-06, "loss": 0.5146, "step": 5499 }, { "epoch": 0.903249645885082, "grad_norm": 0.35951046327730357, "learning_rate": 8.35523185850504e-06, "loss": 0.5053, "step": 5500 }, { "epoch": 0.9034138730934248, "grad_norm": 0.31330180644732103, "learning_rate": 8.354989948846096e-06, "loss": 0.5169, "step": 5501 }, { "epoch": 0.9035781003017676, "grad_norm": 0.2881362333682155, "learning_rate": 8.354747997318168e-06, "loss": 0.49, "step": 5502 }, { "epoch": 0.9037423275101102, "grad_norm": 0.360101900220015, "learning_rate": 8.354506003923884e-06, "loss": 0.5072, "step": 5503 }, { "epoch": 0.9039065547184529, "grad_norm": 0.31153369530916664, "learning_rate": 8.354263968665873e-06, "loss": 0.4925, "step": 5504 }, { "epoch": 0.9040707819267957, "grad_norm": 0.37296961938079903, "learning_rate": 8.354021891546764e-06, "loss": 0.5135, "step": 5505 }, { "epoch": 0.9042350091351384, "grad_norm": 0.9154141358636969, "learning_rate": 8.353779772569184e-06, "loss": 0.5015, "step": 5506 }, { "epoch": 0.9043992363434812, "grad_norm": 0.296837819666743, "learning_rate": 8.353537611735765e-06, "loss": 0.5172, "step": 5507 }, { "epoch": 0.9045634635518239, "grad_norm": 0.5177522705968512, "learning_rate": 8.353295409049137e-06, "loss": 0.5139, "step": 5508 }, { "epoch": 0.9047276907601667, "grad_norm": 0.5204967918982784, "learning_rate": 8.353053164511928e-06, "loss": 0.5154, "step": 5509 }, { "epoch": 0.9048919179685094, "grad_norm": 0.27139361175624743, "learning_rate": 8.352810878126771e-06, "loss": 0.4869, "step": 5510 }, { "epoch": 0.9050561451768522, "grad_norm": 0.349204517447571, "learning_rate": 8.352568549896298e-06, "loss": 0.4964, "step": 5511 }, { "epoch": 0.9052203723851949, "grad_norm": 0.3148885616058754, "learning_rate": 8.352326179823139e-06, "loss": 0.4921, "step": 5512 }, { "epoch": 0.9053845995935377, "grad_norm": 0.5633992988398933, "learning_rate": 8.352083767909929e-06, "loss": 0.5095, "step": 5513 }, { "epoch": 0.9055488268018804, "grad_norm": 0.3212531165331761, "learning_rate": 8.351841314159298e-06, "loss": 0.5034, "step": 5514 }, { "epoch": 0.9057130540102232, "grad_norm": 0.45538311186140923, "learning_rate": 8.351598818573881e-06, "loss": 0.4843, "step": 5515 }, { "epoch": 0.9058772812185659, "grad_norm": 0.2766324918567951, "learning_rate": 8.351356281156313e-06, "loss": 0.5163, "step": 5516 }, { "epoch": 0.9060415084269087, "grad_norm": 0.36399910518378154, "learning_rate": 8.351113701909225e-06, "loss": 0.5107, "step": 5517 }, { "epoch": 0.9062057356352514, "grad_norm": 0.2757312212678587, "learning_rate": 8.350871080835253e-06, "loss": 0.4934, "step": 5518 }, { "epoch": 0.9063699628435941, "grad_norm": 0.2858011061130112, "learning_rate": 8.350628417937031e-06, "loss": 0.5012, "step": 5519 }, { "epoch": 0.9065341900519368, "grad_norm": 0.4340611250040619, "learning_rate": 8.350385713217198e-06, "loss": 0.5228, "step": 5520 }, { "epoch": 0.9066984172602796, "grad_norm": 0.3087231221133075, "learning_rate": 8.350142966678389e-06, "loss": 0.4983, "step": 5521 }, { "epoch": 0.9068626444686223, "grad_norm": 0.3000003148489532, "learning_rate": 8.349900178323235e-06, "loss": 0.493, "step": 5522 }, { "epoch": 0.907026871676965, "grad_norm": 0.32176413494489486, "learning_rate": 8.349657348154382e-06, "loss": 0.5153, "step": 5523 }, { "epoch": 0.9071910988853078, "grad_norm": 0.33979379386785885, "learning_rate": 8.34941447617446e-06, "loss": 0.5057, "step": 5524 }, { "epoch": 0.9073553260936505, "grad_norm": 0.2904116241815765, "learning_rate": 8.349171562386111e-06, "loss": 0.5207, "step": 5525 }, { "epoch": 0.9075195533019933, "grad_norm": 0.3719685326435547, "learning_rate": 8.348928606791971e-06, "loss": 0.483, "step": 5526 }, { "epoch": 0.907683780510336, "grad_norm": 0.42582862430775315, "learning_rate": 8.348685609394678e-06, "loss": 0.4974, "step": 5527 }, { "epoch": 0.9078480077186788, "grad_norm": 0.37620185257746835, "learning_rate": 8.348442570196875e-06, "loss": 0.5158, "step": 5528 }, { "epoch": 0.9080122349270215, "grad_norm": 0.34756089762014936, "learning_rate": 8.348199489201198e-06, "loss": 0.4865, "step": 5529 }, { "epoch": 0.9081764621353643, "grad_norm": 0.3427940825007974, "learning_rate": 8.34795636641029e-06, "loss": 0.5037, "step": 5530 }, { "epoch": 0.908340689343707, "grad_norm": 0.34160888168112385, "learning_rate": 8.347713201826788e-06, "loss": 0.5134, "step": 5531 }, { "epoch": 0.9085049165520498, "grad_norm": 0.30878978550613373, "learning_rate": 8.347469995453336e-06, "loss": 0.5287, "step": 5532 }, { "epoch": 0.9086691437603925, "grad_norm": 0.3362932220872595, "learning_rate": 8.347226747292575e-06, "loss": 0.5171, "step": 5533 }, { "epoch": 0.9088333709687353, "grad_norm": 0.6424167138285161, "learning_rate": 8.346983457347146e-06, "loss": 0.513, "step": 5534 }, { "epoch": 0.908997598177078, "grad_norm": 0.32394177404105495, "learning_rate": 8.346740125619689e-06, "loss": 0.4576, "step": 5535 }, { "epoch": 0.9091618253854207, "grad_norm": 0.34000863262833253, "learning_rate": 8.346496752112854e-06, "loss": 0.5224, "step": 5536 }, { "epoch": 0.9093260525937634, "grad_norm": 0.3247810713170286, "learning_rate": 8.346253336829277e-06, "loss": 0.4893, "step": 5537 }, { "epoch": 0.9094902798021062, "grad_norm": 0.2939508899368689, "learning_rate": 8.346009879771605e-06, "loss": 0.51, "step": 5538 }, { "epoch": 0.9096545070104489, "grad_norm": 0.32148698533969494, "learning_rate": 8.345766380942483e-06, "loss": 0.5037, "step": 5539 }, { "epoch": 0.9098187342187917, "grad_norm": 0.28369865166116354, "learning_rate": 8.345522840344553e-06, "loss": 0.4952, "step": 5540 }, { "epoch": 0.9099829614271344, "grad_norm": 0.5346924787068149, "learning_rate": 8.345279257980461e-06, "loss": 0.5126, "step": 5541 }, { "epoch": 0.9101471886354772, "grad_norm": 0.32168574288406493, "learning_rate": 8.345035633852855e-06, "loss": 0.5086, "step": 5542 }, { "epoch": 0.9103114158438199, "grad_norm": 0.3307362293505546, "learning_rate": 8.344791967964377e-06, "loss": 0.5107, "step": 5543 }, { "epoch": 0.9104756430521627, "grad_norm": 0.3365560145974131, "learning_rate": 8.344548260317678e-06, "loss": 0.488, "step": 5544 }, { "epoch": 0.9106398702605054, "grad_norm": 0.3480169970867232, "learning_rate": 8.3443045109154e-06, "loss": 0.4893, "step": 5545 }, { "epoch": 0.9108040974688482, "grad_norm": 0.299993591213752, "learning_rate": 8.344060719760193e-06, "loss": 0.4908, "step": 5546 }, { "epoch": 0.9109683246771909, "grad_norm": 0.2999257482434843, "learning_rate": 8.343816886854707e-06, "loss": 0.4876, "step": 5547 }, { "epoch": 0.9111325518855337, "grad_norm": 0.3924513462074901, "learning_rate": 8.343573012201584e-06, "loss": 0.5141, "step": 5548 }, { "epoch": 0.9112967790938764, "grad_norm": 0.3399115729754476, "learning_rate": 8.34332909580348e-06, "loss": 0.5082, "step": 5549 }, { "epoch": 0.9114610063022192, "grad_norm": 0.3194858626705532, "learning_rate": 8.343085137663037e-06, "loss": 0.4766, "step": 5550 }, { "epoch": 0.9116252335105619, "grad_norm": 0.47154597792158837, "learning_rate": 8.342841137782912e-06, "loss": 0.5031, "step": 5551 }, { "epoch": 0.9117894607189047, "grad_norm": 0.4575805099261349, "learning_rate": 8.342597096165748e-06, "loss": 0.51, "step": 5552 }, { "epoch": 0.9119536879272473, "grad_norm": 0.4071492592305457, "learning_rate": 8.342353012814202e-06, "loss": 0.4831, "step": 5553 }, { "epoch": 0.91211791513559, "grad_norm": 0.3173333243154672, "learning_rate": 8.34210888773092e-06, "loss": 0.4953, "step": 5554 }, { "epoch": 0.9122821423439328, "grad_norm": 0.3570551254398815, "learning_rate": 8.341864720918558e-06, "loss": 0.5008, "step": 5555 }, { "epoch": 0.9124463695522755, "grad_norm": 0.3179412588931241, "learning_rate": 8.341620512379762e-06, "loss": 0.5091, "step": 5556 }, { "epoch": 0.9126105967606183, "grad_norm": 0.3702640163571449, "learning_rate": 8.341376262117189e-06, "loss": 0.4887, "step": 5557 }, { "epoch": 0.912774823968961, "grad_norm": 0.33515234301086044, "learning_rate": 8.341131970133491e-06, "loss": 0.5023, "step": 5558 }, { "epoch": 0.9129390511773038, "grad_norm": 0.29906964223061433, "learning_rate": 8.34088763643132e-06, "loss": 0.4972, "step": 5559 }, { "epoch": 0.9131032783856465, "grad_norm": 0.3263066136291013, "learning_rate": 8.340643261013328e-06, "loss": 0.4997, "step": 5560 }, { "epoch": 0.9132675055939893, "grad_norm": 0.27989015681060403, "learning_rate": 8.340398843882175e-06, "loss": 0.5007, "step": 5561 }, { "epoch": 0.913431732802332, "grad_norm": 0.4458218715249904, "learning_rate": 8.340154385040511e-06, "loss": 0.4915, "step": 5562 }, { "epoch": 0.9135959600106748, "grad_norm": 0.30774662912355444, "learning_rate": 8.339909884490991e-06, "loss": 0.4783, "step": 5563 }, { "epoch": 0.9137601872190175, "grad_norm": 0.2993000222057823, "learning_rate": 8.339665342236275e-06, "loss": 0.5053, "step": 5564 }, { "epoch": 0.9139244144273603, "grad_norm": 0.3966672210293373, "learning_rate": 8.339420758279012e-06, "loss": 0.5183, "step": 5565 }, { "epoch": 0.914088641635703, "grad_norm": 0.30596909940584427, "learning_rate": 8.339176132621864e-06, "loss": 0.494, "step": 5566 }, { "epoch": 0.9142528688440458, "grad_norm": 0.35348430779350265, "learning_rate": 8.338931465267485e-06, "loss": 0.5163, "step": 5567 }, { "epoch": 0.9144170960523885, "grad_norm": 0.27033879796104393, "learning_rate": 8.338686756218534e-06, "loss": 0.5094, "step": 5568 }, { "epoch": 0.9145813232607313, "grad_norm": 0.3913309376275306, "learning_rate": 8.338442005477667e-06, "loss": 0.5038, "step": 5569 }, { "epoch": 0.9147455504690739, "grad_norm": 0.28689614433794935, "learning_rate": 8.338197213047544e-06, "loss": 0.5067, "step": 5570 }, { "epoch": 0.9149097776774167, "grad_norm": 0.4050157925870621, "learning_rate": 8.337952378930823e-06, "loss": 0.4983, "step": 5571 }, { "epoch": 0.9150740048857594, "grad_norm": 0.2865568953025675, "learning_rate": 8.337707503130163e-06, "loss": 0.4963, "step": 5572 }, { "epoch": 0.9152382320941022, "grad_norm": 0.29733174921046923, "learning_rate": 8.337462585648224e-06, "loss": 0.5153, "step": 5573 }, { "epoch": 0.9154024593024449, "grad_norm": 0.3180051252198621, "learning_rate": 8.337217626487665e-06, "loss": 0.5151, "step": 5574 }, { "epoch": 0.9155666865107877, "grad_norm": 0.40773067814807856, "learning_rate": 8.336972625651149e-06, "loss": 0.508, "step": 5575 }, { "epoch": 0.9157309137191304, "grad_norm": 0.3232392270623946, "learning_rate": 8.336727583141335e-06, "loss": 0.5015, "step": 5576 }, { "epoch": 0.9158951409274732, "grad_norm": 0.5050168357579791, "learning_rate": 8.336482498960883e-06, "loss": 0.5041, "step": 5577 }, { "epoch": 0.9160593681358159, "grad_norm": 0.36104906234395595, "learning_rate": 8.336237373112456e-06, "loss": 0.4821, "step": 5578 }, { "epoch": 0.9162235953441586, "grad_norm": 0.3076149792819011, "learning_rate": 8.33599220559872e-06, "loss": 0.5098, "step": 5579 }, { "epoch": 0.9163878225525014, "grad_norm": 0.6724914596205441, "learning_rate": 8.335746996422332e-06, "loss": 0.504, "step": 5580 }, { "epoch": 0.9165520497608441, "grad_norm": 0.42408137374830124, "learning_rate": 8.335501745585959e-06, "loss": 0.504, "step": 5581 }, { "epoch": 0.9167162769691869, "grad_norm": 0.41731179466139035, "learning_rate": 8.335256453092263e-06, "loss": 0.5103, "step": 5582 }, { "epoch": 0.9168805041775296, "grad_norm": 0.47722812707169454, "learning_rate": 8.335011118943908e-06, "loss": 0.4966, "step": 5583 }, { "epoch": 0.9170447313858724, "grad_norm": 0.4352879061716675, "learning_rate": 8.33476574314356e-06, "loss": 0.4979, "step": 5584 }, { "epoch": 0.9172089585942151, "grad_norm": 0.341870163144751, "learning_rate": 8.334520325693881e-06, "loss": 0.5084, "step": 5585 }, { "epoch": 0.9173731858025579, "grad_norm": 0.5324312522979684, "learning_rate": 8.334274866597541e-06, "loss": 0.4798, "step": 5586 }, { "epoch": 0.9175374130109005, "grad_norm": 0.39344851183289753, "learning_rate": 8.334029365857202e-06, "loss": 0.4974, "step": 5587 }, { "epoch": 0.9177016402192433, "grad_norm": 0.46426361762009455, "learning_rate": 8.333783823475533e-06, "loss": 0.5035, "step": 5588 }, { "epoch": 0.917865867427586, "grad_norm": 0.4075244707525329, "learning_rate": 8.333538239455199e-06, "loss": 0.48, "step": 5589 }, { "epoch": 0.9180300946359288, "grad_norm": 0.36870409810649557, "learning_rate": 8.333292613798868e-06, "loss": 0.505, "step": 5590 }, { "epoch": 0.9181943218442715, "grad_norm": 0.34786470966404165, "learning_rate": 8.333046946509209e-06, "loss": 0.4902, "step": 5591 }, { "epoch": 0.9183585490526143, "grad_norm": 0.2921529860594467, "learning_rate": 8.332801237588886e-06, "loss": 0.4824, "step": 5592 }, { "epoch": 0.918522776260957, "grad_norm": 0.6192703487224838, "learning_rate": 8.332555487040574e-06, "loss": 0.4948, "step": 5593 }, { "epoch": 0.9186870034692998, "grad_norm": 0.3317052036236938, "learning_rate": 8.332309694866937e-06, "loss": 0.5057, "step": 5594 }, { "epoch": 0.9188512306776425, "grad_norm": 0.38647318368047534, "learning_rate": 8.332063861070646e-06, "loss": 0.5057, "step": 5595 }, { "epoch": 0.9190154578859853, "grad_norm": 0.3435710297493405, "learning_rate": 8.331817985654374e-06, "loss": 0.4992, "step": 5596 }, { "epoch": 0.919179685094328, "grad_norm": 0.34725817180505014, "learning_rate": 8.331572068620785e-06, "loss": 0.4929, "step": 5597 }, { "epoch": 0.9193439123026708, "grad_norm": 0.3235167989235117, "learning_rate": 8.331326109972556e-06, "loss": 0.5095, "step": 5598 }, { "epoch": 0.9195081395110135, "grad_norm": 0.39005161313201875, "learning_rate": 8.331080109712355e-06, "loss": 0.4961, "step": 5599 }, { "epoch": 0.9196723667193563, "grad_norm": 0.3454792276254286, "learning_rate": 8.330834067842853e-06, "loss": 0.5285, "step": 5600 }, { "epoch": 0.919836593927699, "grad_norm": 1.0221632544154688, "learning_rate": 8.330587984366726e-06, "loss": 0.4986, "step": 5601 }, { "epoch": 0.9200008211360418, "grad_norm": 0.2994832026081828, "learning_rate": 8.330341859286645e-06, "loss": 0.4923, "step": 5602 }, { "epoch": 0.9201650483443845, "grad_norm": 0.4052390833737057, "learning_rate": 8.330095692605283e-06, "loss": 0.5122, "step": 5603 }, { "epoch": 0.9203292755527271, "grad_norm": 0.28648054829708214, "learning_rate": 8.329849484325313e-06, "loss": 0.5106, "step": 5604 }, { "epoch": 0.9204935027610699, "grad_norm": 0.3063542399597326, "learning_rate": 8.32960323444941e-06, "loss": 0.5021, "step": 5605 }, { "epoch": 0.9206577299694126, "grad_norm": 0.29985373636301893, "learning_rate": 8.329356942980245e-06, "loss": 0.5086, "step": 5606 }, { "epoch": 0.9208219571777554, "grad_norm": 0.35166974021523667, "learning_rate": 8.329110609920499e-06, "loss": 0.5017, "step": 5607 }, { "epoch": 0.9209861843860981, "grad_norm": 0.3991541658144142, "learning_rate": 8.328864235272845e-06, "loss": 0.5246, "step": 5608 }, { "epoch": 0.9211504115944409, "grad_norm": 0.3259044403152944, "learning_rate": 8.328617819039955e-06, "loss": 0.4865, "step": 5609 }, { "epoch": 0.9213146388027836, "grad_norm": 0.30778544588909174, "learning_rate": 8.328371361224512e-06, "loss": 0.4978, "step": 5610 }, { "epoch": 0.9214788660111264, "grad_norm": 0.5084722341526301, "learning_rate": 8.328124861829188e-06, "loss": 0.5107, "step": 5611 }, { "epoch": 0.9216430932194691, "grad_norm": 0.3117471003397797, "learning_rate": 8.327878320856662e-06, "loss": 0.4869, "step": 5612 }, { "epoch": 0.9218073204278119, "grad_norm": 0.34543706062762425, "learning_rate": 8.32763173830961e-06, "loss": 0.515, "step": 5613 }, { "epoch": 0.9219715476361546, "grad_norm": 0.334776273461246, "learning_rate": 8.327385114190714e-06, "loss": 0.4968, "step": 5614 }, { "epoch": 0.9221357748444974, "grad_norm": 0.32328953533067045, "learning_rate": 8.327138448502649e-06, "loss": 0.4855, "step": 5615 }, { "epoch": 0.9223000020528401, "grad_norm": 0.3302809998928379, "learning_rate": 8.326891741248094e-06, "loss": 0.5031, "step": 5616 }, { "epoch": 0.9224642292611829, "grad_norm": 0.3127553399557638, "learning_rate": 8.32664499242973e-06, "loss": 0.4988, "step": 5617 }, { "epoch": 0.9226284564695256, "grad_norm": 0.4586672526902129, "learning_rate": 8.326398202050236e-06, "loss": 0.4693, "step": 5618 }, { "epoch": 0.9227926836778684, "grad_norm": 0.3313144730744136, "learning_rate": 8.326151370112294e-06, "loss": 0.5186, "step": 5619 }, { "epoch": 0.9229569108862111, "grad_norm": 0.3316385292266702, "learning_rate": 8.325904496618583e-06, "loss": 0.5033, "step": 5620 }, { "epoch": 0.9231211380945538, "grad_norm": 0.29228573535246216, "learning_rate": 8.325657581571784e-06, "loss": 0.5004, "step": 5621 }, { "epoch": 0.9232853653028965, "grad_norm": 0.4226834910779713, "learning_rate": 8.325410624974582e-06, "loss": 0.493, "step": 5622 }, { "epoch": 0.9234495925112393, "grad_norm": 0.3458030476801539, "learning_rate": 8.325163626829656e-06, "loss": 0.5041, "step": 5623 }, { "epoch": 0.923613819719582, "grad_norm": 0.4242642019903402, "learning_rate": 8.324916587139689e-06, "loss": 0.5033, "step": 5624 }, { "epoch": 0.9237780469279248, "grad_norm": 0.3100029512474492, "learning_rate": 8.324669505907365e-06, "loss": 0.5062, "step": 5625 }, { "epoch": 0.9239422741362675, "grad_norm": 0.3185639380577177, "learning_rate": 8.324422383135368e-06, "loss": 0.4924, "step": 5626 }, { "epoch": 0.9241065013446103, "grad_norm": 0.4050609577398203, "learning_rate": 8.32417521882638e-06, "loss": 0.4899, "step": 5627 }, { "epoch": 0.924270728552953, "grad_norm": 0.31533095420755064, "learning_rate": 8.323928012983087e-06, "loss": 0.4909, "step": 5628 }, { "epoch": 0.9244349557612958, "grad_norm": 0.2842521067234476, "learning_rate": 8.323680765608173e-06, "loss": 0.4873, "step": 5629 }, { "epoch": 0.9245991829696385, "grad_norm": 0.3258802618679284, "learning_rate": 8.323433476704325e-06, "loss": 0.5076, "step": 5630 }, { "epoch": 0.9247634101779812, "grad_norm": 0.299409442711691, "learning_rate": 8.323186146274228e-06, "loss": 0.4988, "step": 5631 }, { "epoch": 0.924927637386324, "grad_norm": 0.3156360159559121, "learning_rate": 8.322938774320568e-06, "loss": 0.4989, "step": 5632 }, { "epoch": 0.9250918645946667, "grad_norm": 0.2938555458815616, "learning_rate": 8.32269136084603e-06, "loss": 0.5171, "step": 5633 }, { "epoch": 0.9252560918030095, "grad_norm": 0.40501414098775324, "learning_rate": 8.322443905853303e-06, "loss": 0.526, "step": 5634 }, { "epoch": 0.9254203190113522, "grad_norm": 0.3931817021892204, "learning_rate": 8.322196409345074e-06, "loss": 0.4976, "step": 5635 }, { "epoch": 0.925584546219695, "grad_norm": 0.4093911838085258, "learning_rate": 8.321948871324033e-06, "loss": 0.5009, "step": 5636 }, { "epoch": 0.9257487734280377, "grad_norm": 0.3601362068180492, "learning_rate": 8.321701291792867e-06, "loss": 0.5002, "step": 5637 }, { "epoch": 0.9259130006363804, "grad_norm": 0.8262370739905388, "learning_rate": 8.321453670754264e-06, "loss": 0.4923, "step": 5638 }, { "epoch": 0.9260772278447231, "grad_norm": 1.323739662827969, "learning_rate": 8.321206008210914e-06, "loss": 0.5188, "step": 5639 }, { "epoch": 0.9262414550530659, "grad_norm": 0.2963276769726195, "learning_rate": 8.320958304165509e-06, "loss": 0.5017, "step": 5640 }, { "epoch": 0.9264056822614086, "grad_norm": 0.35472151961006554, "learning_rate": 8.320710558620736e-06, "loss": 0.5008, "step": 5641 }, { "epoch": 0.9265699094697514, "grad_norm": 0.47096437311419886, "learning_rate": 8.320462771579287e-06, "loss": 0.5187, "step": 5642 }, { "epoch": 0.9267341366780941, "grad_norm": 0.3116469536361895, "learning_rate": 8.320214943043856e-06, "loss": 0.4931, "step": 5643 }, { "epoch": 0.9268983638864369, "grad_norm": 0.34571874140096176, "learning_rate": 8.31996707301713e-06, "loss": 0.5038, "step": 5644 }, { "epoch": 0.9270625910947796, "grad_norm": 0.3000670197452908, "learning_rate": 8.319719161501803e-06, "loss": 0.5081, "step": 5645 }, { "epoch": 0.9272268183031224, "grad_norm": 0.5315823597866184, "learning_rate": 8.319471208500568e-06, "loss": 0.5268, "step": 5646 }, { "epoch": 0.9273910455114651, "grad_norm": 0.3194174964969078, "learning_rate": 8.319223214016118e-06, "loss": 0.4853, "step": 5647 }, { "epoch": 0.9275552727198079, "grad_norm": 0.2909221729139333, "learning_rate": 8.318975178051146e-06, "loss": 0.4761, "step": 5648 }, { "epoch": 0.9277194999281506, "grad_norm": 0.3266591840424489, "learning_rate": 8.318727100608347e-06, "loss": 0.4968, "step": 5649 }, { "epoch": 0.9278837271364934, "grad_norm": 0.3951634535641033, "learning_rate": 8.318478981690415e-06, "loss": 0.4913, "step": 5650 }, { "epoch": 0.9280479543448361, "grad_norm": 0.5327103196781549, "learning_rate": 8.318230821300044e-06, "loss": 0.5045, "step": 5651 }, { "epoch": 0.9282121815531789, "grad_norm": 0.3387983201320291, "learning_rate": 8.317982619439927e-06, "loss": 0.496, "step": 5652 }, { "epoch": 0.9283764087615216, "grad_norm": 0.3534333758409748, "learning_rate": 8.317734376112766e-06, "loss": 0.4819, "step": 5653 }, { "epoch": 0.9285406359698644, "grad_norm": 0.45450659603234983, "learning_rate": 8.317486091321253e-06, "loss": 0.4933, "step": 5654 }, { "epoch": 0.928704863178207, "grad_norm": 0.49711559052900334, "learning_rate": 8.317237765068083e-06, "loss": 0.5089, "step": 5655 }, { "epoch": 0.9288690903865497, "grad_norm": 0.33609533448723666, "learning_rate": 8.316989397355956e-06, "loss": 0.5134, "step": 5656 }, { "epoch": 0.9290333175948925, "grad_norm": 0.4638457318413463, "learning_rate": 8.31674098818757e-06, "loss": 0.4927, "step": 5657 }, { "epoch": 0.9291975448032352, "grad_norm": 0.3490282675574865, "learning_rate": 8.31649253756562e-06, "loss": 0.4798, "step": 5658 }, { "epoch": 0.929361772011578, "grad_norm": 0.37016474674914635, "learning_rate": 8.316244045492809e-06, "loss": 0.5155, "step": 5659 }, { "epoch": 0.9295259992199207, "grad_norm": 0.37075033799182244, "learning_rate": 8.31599551197183e-06, "loss": 0.5202, "step": 5660 }, { "epoch": 0.9296902264282635, "grad_norm": 0.3424732929894227, "learning_rate": 8.315746937005386e-06, "loss": 0.52, "step": 5661 }, { "epoch": 0.9298544536366062, "grad_norm": 0.304676215982256, "learning_rate": 8.315498320596177e-06, "loss": 0.4924, "step": 5662 }, { "epoch": 0.930018680844949, "grad_norm": 0.3090516157537145, "learning_rate": 8.315249662746901e-06, "loss": 0.5067, "step": 5663 }, { "epoch": 0.9301829080532917, "grad_norm": 0.35434684092220853, "learning_rate": 8.315000963460261e-06, "loss": 0.5053, "step": 5664 }, { "epoch": 0.9303471352616345, "grad_norm": 0.3235687017825308, "learning_rate": 8.314752222738956e-06, "loss": 0.5004, "step": 5665 }, { "epoch": 0.9305113624699772, "grad_norm": 0.323592417972458, "learning_rate": 8.31450344058569e-06, "loss": 0.5178, "step": 5666 }, { "epoch": 0.93067558967832, "grad_norm": 0.3269011607386471, "learning_rate": 8.314254617003163e-06, "loss": 0.5111, "step": 5667 }, { "epoch": 0.9308398168866627, "grad_norm": 0.44163641425139216, "learning_rate": 8.31400575199408e-06, "loss": 0.5087, "step": 5668 }, { "epoch": 0.9310040440950055, "grad_norm": 0.544952712160666, "learning_rate": 8.313756845561139e-06, "loss": 0.5166, "step": 5669 }, { "epoch": 0.9311682713033482, "grad_norm": 0.31794568803873646, "learning_rate": 8.313507897707047e-06, "loss": 0.5212, "step": 5670 }, { "epoch": 0.931332498511691, "grad_norm": 0.39736597639619714, "learning_rate": 8.313258908434507e-06, "loss": 0.509, "step": 5671 }, { "epoch": 0.9314967257200336, "grad_norm": 0.30840079244447743, "learning_rate": 8.313009877746226e-06, "loss": 0.4957, "step": 5672 }, { "epoch": 0.9316609529283764, "grad_norm": 0.34739304809928856, "learning_rate": 8.312760805644905e-06, "loss": 0.4971, "step": 5673 }, { "epoch": 0.9318251801367191, "grad_norm": 0.335988944225995, "learning_rate": 8.312511692133251e-06, "loss": 0.5009, "step": 5674 }, { "epoch": 0.9319894073450619, "grad_norm": 0.3893509807380632, "learning_rate": 8.312262537213966e-06, "loss": 0.5073, "step": 5675 }, { "epoch": 0.9321536345534046, "grad_norm": 0.33293820028332555, "learning_rate": 8.312013340889763e-06, "loss": 0.4899, "step": 5676 }, { "epoch": 0.9323178617617474, "grad_norm": 0.4913326187606124, "learning_rate": 8.311764103163342e-06, "loss": 0.5175, "step": 5677 }, { "epoch": 0.9324820889700901, "grad_norm": 0.34344484845465234, "learning_rate": 8.311514824037414e-06, "loss": 0.4871, "step": 5678 }, { "epoch": 0.9326463161784329, "grad_norm": 0.31321528574452057, "learning_rate": 8.311265503514684e-06, "loss": 0.5137, "step": 5679 }, { "epoch": 0.9328105433867756, "grad_norm": 0.28952544103066263, "learning_rate": 8.311016141597862e-06, "loss": 0.4889, "step": 5680 }, { "epoch": 0.9329747705951184, "grad_norm": 0.3679457217525709, "learning_rate": 8.310766738289653e-06, "loss": 0.49, "step": 5681 }, { "epoch": 0.9331389978034611, "grad_norm": 2.1742606439642436, "learning_rate": 8.31051729359277e-06, "loss": 0.5173, "step": 5682 }, { "epoch": 0.9333032250118038, "grad_norm": 0.3115240268193473, "learning_rate": 8.310267807509918e-06, "loss": 0.5, "step": 5683 }, { "epoch": 0.9334674522201466, "grad_norm": 0.4170080800009593, "learning_rate": 8.310018280043811e-06, "loss": 0.53, "step": 5684 }, { "epoch": 0.9336316794284893, "grad_norm": 0.33607485508133855, "learning_rate": 8.309768711197156e-06, "loss": 0.5004, "step": 5685 }, { "epoch": 0.9337959066368321, "grad_norm": 0.34227008895253036, "learning_rate": 8.309519100972664e-06, "loss": 0.514, "step": 5686 }, { "epoch": 0.9339601338451748, "grad_norm": 0.32230259816889123, "learning_rate": 8.30926944937305e-06, "loss": 0.4983, "step": 5687 }, { "epoch": 0.9341243610535176, "grad_norm": 0.31003054600860314, "learning_rate": 8.309019756401016e-06, "loss": 0.4976, "step": 5688 }, { "epoch": 0.9342885882618602, "grad_norm": 0.33146582662481516, "learning_rate": 8.308770022059285e-06, "loss": 0.5141, "step": 5689 }, { "epoch": 0.934452815470203, "grad_norm": 0.29520921099226544, "learning_rate": 8.30852024635056e-06, "loss": 0.5035, "step": 5690 }, { "epoch": 0.9346170426785457, "grad_norm": 0.2898500067975092, "learning_rate": 8.308270429277562e-06, "loss": 0.491, "step": 5691 }, { "epoch": 0.9347812698868885, "grad_norm": 0.514172034927915, "learning_rate": 8.308020570842998e-06, "loss": 0.5117, "step": 5692 }, { "epoch": 0.9349454970952312, "grad_norm": 0.32118782445662386, "learning_rate": 8.307770671049586e-06, "loss": 0.5002, "step": 5693 }, { "epoch": 0.935109724303574, "grad_norm": 0.28434844389192415, "learning_rate": 8.307520729900037e-06, "loss": 0.5111, "step": 5694 }, { "epoch": 0.9352739515119167, "grad_norm": 0.3150859794798156, "learning_rate": 8.307270747397067e-06, "loss": 0.5157, "step": 5695 }, { "epoch": 0.9354381787202595, "grad_norm": 0.337073001654949, "learning_rate": 8.30702072354339e-06, "loss": 0.4861, "step": 5696 }, { "epoch": 0.9356024059286022, "grad_norm": 0.3450124523657821, "learning_rate": 8.306770658341723e-06, "loss": 0.4864, "step": 5697 }, { "epoch": 0.935766633136945, "grad_norm": 0.3124292867346078, "learning_rate": 8.306520551794781e-06, "loss": 0.5167, "step": 5698 }, { "epoch": 0.9359308603452877, "grad_norm": 0.35958452699114296, "learning_rate": 8.30627040390528e-06, "loss": 0.4719, "step": 5699 }, { "epoch": 0.9360950875536305, "grad_norm": 0.39144058349399646, "learning_rate": 8.306020214675938e-06, "loss": 0.4831, "step": 5700 }, { "epoch": 0.9362593147619732, "grad_norm": 0.32711266635221053, "learning_rate": 8.305769984109473e-06, "loss": 0.5025, "step": 5701 }, { "epoch": 0.936423541970316, "grad_norm": 0.32745055724297306, "learning_rate": 8.3055197122086e-06, "loss": 0.5015, "step": 5702 }, { "epoch": 0.9365877691786587, "grad_norm": 0.2867180288316144, "learning_rate": 8.30526939897604e-06, "loss": 0.5111, "step": 5703 }, { "epoch": 0.9367519963870015, "grad_norm": 0.295602851080418, "learning_rate": 8.30501904441451e-06, "loss": 0.4967, "step": 5704 }, { "epoch": 0.9369162235953442, "grad_norm": 0.4263974761885831, "learning_rate": 8.30476864852673e-06, "loss": 0.4912, "step": 5705 }, { "epoch": 0.9370804508036868, "grad_norm": 0.33887320596973486, "learning_rate": 8.304518211315417e-06, "loss": 0.4882, "step": 5706 }, { "epoch": 0.9372446780120296, "grad_norm": 0.3675882652000894, "learning_rate": 8.304267732783296e-06, "loss": 0.4911, "step": 5707 }, { "epoch": 0.9374089052203723, "grad_norm": 0.44201220610549874, "learning_rate": 8.304017212933082e-06, "loss": 0.5259, "step": 5708 }, { "epoch": 0.9375731324287151, "grad_norm": 0.4404115118052987, "learning_rate": 8.303766651767501e-06, "loss": 0.5017, "step": 5709 }, { "epoch": 0.9377373596370578, "grad_norm": 0.3166832436259222, "learning_rate": 8.30351604928927e-06, "loss": 0.498, "step": 5710 }, { "epoch": 0.9379015868454006, "grad_norm": 0.3220410348280446, "learning_rate": 8.303265405501113e-06, "loss": 0.4816, "step": 5711 }, { "epoch": 0.9380658140537433, "grad_norm": 0.49047811853744966, "learning_rate": 8.303014720405753e-06, "loss": 0.513, "step": 5712 }, { "epoch": 0.9382300412620861, "grad_norm": 0.33189355575641755, "learning_rate": 8.302763994005908e-06, "loss": 0.5082, "step": 5713 }, { "epoch": 0.9383942684704288, "grad_norm": 0.3758239947437896, "learning_rate": 8.30251322630431e-06, "loss": 0.5046, "step": 5714 }, { "epoch": 0.9385584956787716, "grad_norm": 0.33422695693160437, "learning_rate": 8.302262417303673e-06, "loss": 0.4978, "step": 5715 }, { "epoch": 0.9387227228871143, "grad_norm": 0.3216246403940893, "learning_rate": 8.302011567006726e-06, "loss": 0.4972, "step": 5716 }, { "epoch": 0.9388869500954571, "grad_norm": 0.3375123906537756, "learning_rate": 8.301760675416193e-06, "loss": 0.5055, "step": 5717 }, { "epoch": 0.9390511773037998, "grad_norm": 0.3245098375695823, "learning_rate": 8.301509742534797e-06, "loss": 0.4923, "step": 5718 }, { "epoch": 0.9392154045121426, "grad_norm": 0.300498447635204, "learning_rate": 8.301258768365269e-06, "loss": 0.5084, "step": 5719 }, { "epoch": 0.9393796317204853, "grad_norm": 0.2916667872950895, "learning_rate": 8.301007752910327e-06, "loss": 0.4728, "step": 5720 }, { "epoch": 0.9395438589288281, "grad_norm": 0.29636711926991816, "learning_rate": 8.300756696172703e-06, "loss": 0.5363, "step": 5721 }, { "epoch": 0.9397080861371708, "grad_norm": 0.36656193950960636, "learning_rate": 8.300505598155121e-06, "loss": 0.5059, "step": 5722 }, { "epoch": 0.9398723133455135, "grad_norm": 0.3305934017377256, "learning_rate": 8.30025445886031e-06, "loss": 0.4864, "step": 5723 }, { "epoch": 0.9400365405538562, "grad_norm": 0.2750640832912153, "learning_rate": 8.300003278290996e-06, "loss": 0.4961, "step": 5724 }, { "epoch": 0.940200767762199, "grad_norm": 0.2876805407058977, "learning_rate": 8.299752056449908e-06, "loss": 0.4967, "step": 5725 }, { "epoch": 0.9403649949705417, "grad_norm": 0.35466903617984374, "learning_rate": 8.299500793339775e-06, "loss": 0.4968, "step": 5726 }, { "epoch": 0.9405292221788845, "grad_norm": 0.340814806284325, "learning_rate": 8.299249488963322e-06, "loss": 0.4908, "step": 5727 }, { "epoch": 0.9406934493872272, "grad_norm": 0.3322242158557963, "learning_rate": 8.298998143323286e-06, "loss": 0.5069, "step": 5728 }, { "epoch": 0.94085767659557, "grad_norm": 0.2999082292928264, "learning_rate": 8.298746756422389e-06, "loss": 0.4959, "step": 5729 }, { "epoch": 0.9410219038039127, "grad_norm": 0.31878824304806774, "learning_rate": 8.298495328263367e-06, "loss": 0.5125, "step": 5730 }, { "epoch": 0.9411861310122555, "grad_norm": 0.36476084540179676, "learning_rate": 8.298243858848947e-06, "loss": 0.4991, "step": 5731 }, { "epoch": 0.9413503582205982, "grad_norm": 0.2634703515079286, "learning_rate": 8.297992348181862e-06, "loss": 0.5131, "step": 5732 }, { "epoch": 0.941514585428941, "grad_norm": 0.2739454213919614, "learning_rate": 8.297740796264845e-06, "loss": 0.5084, "step": 5733 }, { "epoch": 0.9416788126372837, "grad_norm": 0.32154129766877954, "learning_rate": 8.297489203100623e-06, "loss": 0.49, "step": 5734 }, { "epoch": 0.9418430398456265, "grad_norm": 0.2974470814025031, "learning_rate": 8.297237568691936e-06, "loss": 0.4825, "step": 5735 }, { "epoch": 0.9420072670539692, "grad_norm": 0.3876497388425138, "learning_rate": 8.29698589304151e-06, "loss": 0.523, "step": 5736 }, { "epoch": 0.942171494262312, "grad_norm": 0.28791684142287477, "learning_rate": 8.296734176152083e-06, "loss": 0.4948, "step": 5737 }, { "epoch": 0.9423357214706547, "grad_norm": 0.26299740923860576, "learning_rate": 8.296482418026387e-06, "loss": 0.5154, "step": 5738 }, { "epoch": 0.9424999486789974, "grad_norm": 0.3163157616250974, "learning_rate": 8.296230618667156e-06, "loss": 0.4981, "step": 5739 }, { "epoch": 0.9426641758873401, "grad_norm": 0.3183861930352154, "learning_rate": 8.295978778077128e-06, "loss": 0.4857, "step": 5740 }, { "epoch": 0.9428284030956828, "grad_norm": 0.32525277310277495, "learning_rate": 8.295726896259033e-06, "loss": 0.5191, "step": 5741 }, { "epoch": 0.9429926303040256, "grad_norm": 0.3938453035546823, "learning_rate": 8.29547497321561e-06, "loss": 0.49, "step": 5742 }, { "epoch": 0.9431568575123683, "grad_norm": 0.2898850112140784, "learning_rate": 8.295223008949595e-06, "loss": 0.4781, "step": 5743 }, { "epoch": 0.9433210847207111, "grad_norm": 0.3301700391413957, "learning_rate": 8.294971003463724e-06, "loss": 0.4957, "step": 5744 }, { "epoch": 0.9434853119290538, "grad_norm": 0.2804840088969759, "learning_rate": 8.294718956760736e-06, "loss": 0.486, "step": 5745 }, { "epoch": 0.9436495391373966, "grad_norm": 0.30848614862924606, "learning_rate": 8.294466868843363e-06, "loss": 0.4949, "step": 5746 }, { "epoch": 0.9438137663457393, "grad_norm": 0.2778952083425503, "learning_rate": 8.294214739714348e-06, "loss": 0.4947, "step": 5747 }, { "epoch": 0.9439779935540821, "grad_norm": 0.2912022792909345, "learning_rate": 8.293962569376428e-06, "loss": 0.5038, "step": 5748 }, { "epoch": 0.9441422207624248, "grad_norm": 0.31372201934122734, "learning_rate": 8.293710357832344e-06, "loss": 0.5014, "step": 5749 }, { "epoch": 0.9443064479707676, "grad_norm": 0.4827497389908659, "learning_rate": 8.29345810508483e-06, "loss": 0.4936, "step": 5750 }, { "epoch": 0.9444706751791103, "grad_norm": 0.2978087052404848, "learning_rate": 8.29320581113663e-06, "loss": 0.5146, "step": 5751 }, { "epoch": 0.9446349023874531, "grad_norm": 0.29384689410191, "learning_rate": 8.292953475990481e-06, "loss": 0.518, "step": 5752 }, { "epoch": 0.9447991295957958, "grad_norm": 0.3121394187485199, "learning_rate": 8.292701099649129e-06, "loss": 0.4906, "step": 5753 }, { "epoch": 0.9449633568041386, "grad_norm": 0.2965066015713293, "learning_rate": 8.292448682115309e-06, "loss": 0.5148, "step": 5754 }, { "epoch": 0.9451275840124813, "grad_norm": 0.3857468121891203, "learning_rate": 8.292196223391766e-06, "loss": 0.4877, "step": 5755 }, { "epoch": 0.9452918112208241, "grad_norm": 1.0082520752566388, "learning_rate": 8.29194372348124e-06, "loss": 0.4954, "step": 5756 }, { "epoch": 0.9454560384291667, "grad_norm": 0.2987206706456987, "learning_rate": 8.291691182386476e-06, "loss": 0.5066, "step": 5757 }, { "epoch": 0.9456202656375094, "grad_norm": 0.43792593819559156, "learning_rate": 8.291438600110214e-06, "loss": 0.5064, "step": 5758 }, { "epoch": 0.9457844928458522, "grad_norm": 0.3036885285849899, "learning_rate": 8.291185976655199e-06, "loss": 0.4887, "step": 5759 }, { "epoch": 0.945948720054195, "grad_norm": 0.39977706995255674, "learning_rate": 8.290933312024174e-06, "loss": 0.5063, "step": 5760 }, { "epoch": 0.9461129472625377, "grad_norm": 0.30397770713157646, "learning_rate": 8.290680606219883e-06, "loss": 0.5182, "step": 5761 }, { "epoch": 0.9462771744708804, "grad_norm": 0.29067054573184586, "learning_rate": 8.290427859245072e-06, "loss": 0.4953, "step": 5762 }, { "epoch": 0.9464414016792232, "grad_norm": 0.2807311725585339, "learning_rate": 8.290175071102486e-06, "loss": 0.5128, "step": 5763 }, { "epoch": 0.9466056288875659, "grad_norm": 0.35856042580615943, "learning_rate": 8.289922241794869e-06, "loss": 0.5088, "step": 5764 }, { "epoch": 0.9467698560959087, "grad_norm": 0.36928919153283474, "learning_rate": 8.289669371324966e-06, "loss": 0.5089, "step": 5765 }, { "epoch": 0.9469340833042514, "grad_norm": 0.6615780615800806, "learning_rate": 8.289416459695527e-06, "loss": 0.5067, "step": 5766 }, { "epoch": 0.9470983105125942, "grad_norm": 0.28510177778702833, "learning_rate": 8.289163506909297e-06, "loss": 0.482, "step": 5767 }, { "epoch": 0.9472625377209369, "grad_norm": 0.30217945823341685, "learning_rate": 8.288910512969021e-06, "loss": 0.5077, "step": 5768 }, { "epoch": 0.9474267649292797, "grad_norm": 0.278858026357913, "learning_rate": 8.288657477877452e-06, "loss": 0.4868, "step": 5769 }, { "epoch": 0.9475909921376224, "grad_norm": 0.3424671698610464, "learning_rate": 8.288404401637332e-06, "loss": 0.5011, "step": 5770 }, { "epoch": 0.9477552193459652, "grad_norm": 0.29888762243279215, "learning_rate": 8.288151284251417e-06, "loss": 0.5014, "step": 5771 }, { "epoch": 0.9479194465543079, "grad_norm": 0.3767666704556441, "learning_rate": 8.28789812572245e-06, "loss": 0.4968, "step": 5772 }, { "epoch": 0.9480836737626507, "grad_norm": 0.3007862052726042, "learning_rate": 8.287644926053182e-06, "loss": 0.5111, "step": 5773 }, { "epoch": 0.9482479009709933, "grad_norm": 0.36206526827161356, "learning_rate": 8.287391685246363e-06, "loss": 0.5185, "step": 5774 }, { "epoch": 0.9484121281793361, "grad_norm": 0.29609444550203556, "learning_rate": 8.287138403304746e-06, "loss": 0.4892, "step": 5775 }, { "epoch": 0.9485763553876788, "grad_norm": 0.2949042867632768, "learning_rate": 8.286885080231079e-06, "loss": 0.5162, "step": 5776 }, { "epoch": 0.9487405825960216, "grad_norm": 0.3347524409023741, "learning_rate": 8.286631716028112e-06, "loss": 0.4835, "step": 5777 }, { "epoch": 0.9489048098043643, "grad_norm": 0.2867731768433254, "learning_rate": 8.286378310698603e-06, "loss": 0.5115, "step": 5778 }, { "epoch": 0.9490690370127071, "grad_norm": 0.3303083901828599, "learning_rate": 8.286124864245298e-06, "loss": 0.4904, "step": 5779 }, { "epoch": 0.9492332642210498, "grad_norm": 0.30811639311477274, "learning_rate": 8.285871376670953e-06, "loss": 0.4935, "step": 5780 }, { "epoch": 0.9493974914293926, "grad_norm": 0.3030673745127514, "learning_rate": 8.285617847978318e-06, "loss": 0.5014, "step": 5781 }, { "epoch": 0.9495617186377353, "grad_norm": 0.3174059059837985, "learning_rate": 8.285364278170152e-06, "loss": 0.5008, "step": 5782 }, { "epoch": 0.9497259458460781, "grad_norm": 0.32230661304949565, "learning_rate": 8.285110667249202e-06, "loss": 0.4981, "step": 5783 }, { "epoch": 0.9498901730544208, "grad_norm": 0.2954900836401079, "learning_rate": 8.284857015218228e-06, "loss": 0.5152, "step": 5784 }, { "epoch": 0.9500544002627636, "grad_norm": 0.38341331561161734, "learning_rate": 8.284603322079982e-06, "loss": 0.516, "step": 5785 }, { "epoch": 0.9502186274711063, "grad_norm": 0.288180037663649, "learning_rate": 8.284349587837222e-06, "loss": 0.4971, "step": 5786 }, { "epoch": 0.950382854679449, "grad_norm": 0.2639278006502859, "learning_rate": 8.284095812492701e-06, "loss": 0.4973, "step": 5787 }, { "epoch": 0.9505470818877918, "grad_norm": 0.36465823278362897, "learning_rate": 8.283841996049176e-06, "loss": 0.5043, "step": 5788 }, { "epoch": 0.9507113090961345, "grad_norm": 0.40869908171798774, "learning_rate": 8.283588138509406e-06, "loss": 0.497, "step": 5789 }, { "epoch": 0.9508755363044773, "grad_norm": 0.2948081143710643, "learning_rate": 8.283334239876145e-06, "loss": 0.5012, "step": 5790 }, { "epoch": 0.9510397635128199, "grad_norm": 0.30131869054135385, "learning_rate": 8.283080300152151e-06, "loss": 0.5096, "step": 5791 }, { "epoch": 0.9512039907211627, "grad_norm": 0.2893470713777381, "learning_rate": 8.282826319340185e-06, "loss": 0.4803, "step": 5792 }, { "epoch": 0.9513682179295054, "grad_norm": 0.2924402635990607, "learning_rate": 8.282572297443002e-06, "loss": 0.5045, "step": 5793 }, { "epoch": 0.9515324451378482, "grad_norm": 0.30826834323579505, "learning_rate": 8.282318234463361e-06, "loss": 0.4942, "step": 5794 }, { "epoch": 0.9516966723461909, "grad_norm": 0.2575049688426361, "learning_rate": 8.282064130404025e-06, "loss": 0.5073, "step": 5795 }, { "epoch": 0.9518608995545337, "grad_norm": 0.2616141640965354, "learning_rate": 8.281809985267752e-06, "loss": 0.5015, "step": 5796 }, { "epoch": 0.9520251267628764, "grad_norm": 0.3188316003808188, "learning_rate": 8.2815557990573e-06, "loss": 0.519, "step": 5797 }, { "epoch": 0.9521893539712192, "grad_norm": 0.2517430104882772, "learning_rate": 8.281301571775431e-06, "loss": 0.4858, "step": 5798 }, { "epoch": 0.9523535811795619, "grad_norm": 0.31257213248047166, "learning_rate": 8.28104730342491e-06, "loss": 0.4955, "step": 5799 }, { "epoch": 0.9525178083879047, "grad_norm": 0.3406386793193978, "learning_rate": 8.280792994008492e-06, "loss": 0.4923, "step": 5800 }, { "epoch": 0.9526820355962474, "grad_norm": 0.2913983953202928, "learning_rate": 8.280538643528944e-06, "loss": 0.5039, "step": 5801 }, { "epoch": 0.9528462628045902, "grad_norm": 0.28772973551798076, "learning_rate": 8.280284251989024e-06, "loss": 0.506, "step": 5802 }, { "epoch": 0.9530104900129329, "grad_norm": 0.30481361757707226, "learning_rate": 8.280029819391499e-06, "loss": 0.5027, "step": 5803 }, { "epoch": 0.9531747172212757, "grad_norm": 0.31008301473548017, "learning_rate": 8.279775345739133e-06, "loss": 0.4902, "step": 5804 }, { "epoch": 0.9533389444296184, "grad_norm": 0.31681938852800007, "learning_rate": 8.279520831034688e-06, "loss": 0.4983, "step": 5805 }, { "epoch": 0.9535031716379612, "grad_norm": 0.36114831329488967, "learning_rate": 8.279266275280926e-06, "loss": 0.5168, "step": 5806 }, { "epoch": 0.9536673988463039, "grad_norm": 0.35047545093351945, "learning_rate": 8.279011678480614e-06, "loss": 0.5074, "step": 5807 }, { "epoch": 0.9538316260546466, "grad_norm": 0.3312605893409433, "learning_rate": 8.27875704063652e-06, "loss": 0.495, "step": 5808 }, { "epoch": 0.9539958532629893, "grad_norm": 0.5613178656403057, "learning_rate": 8.278502361751403e-06, "loss": 0.5045, "step": 5809 }, { "epoch": 0.954160080471332, "grad_norm": 0.28894635191610946, "learning_rate": 8.278247641828035e-06, "loss": 0.4889, "step": 5810 }, { "epoch": 0.9543243076796748, "grad_norm": 0.33802860483632036, "learning_rate": 8.27799288086918e-06, "loss": 0.4807, "step": 5811 }, { "epoch": 0.9544885348880175, "grad_norm": 0.3051254564753109, "learning_rate": 8.277738078877606e-06, "loss": 0.4955, "step": 5812 }, { "epoch": 0.9546527620963603, "grad_norm": 0.31460451693320945, "learning_rate": 8.277483235856079e-06, "loss": 0.4888, "step": 5813 }, { "epoch": 0.954816989304703, "grad_norm": 0.3011782131412546, "learning_rate": 8.277228351807367e-06, "loss": 0.4981, "step": 5814 }, { "epoch": 0.9549812165130458, "grad_norm": 0.3790567372061715, "learning_rate": 8.276973426734238e-06, "loss": 0.5033, "step": 5815 }, { "epoch": 0.9551454437213885, "grad_norm": 0.29233332389067523, "learning_rate": 8.276718460639464e-06, "loss": 0.5025, "step": 5816 }, { "epoch": 0.9553096709297313, "grad_norm": 0.28425808137423847, "learning_rate": 8.276463453525809e-06, "loss": 0.4782, "step": 5817 }, { "epoch": 0.955473898138074, "grad_norm": 0.3215019093899259, "learning_rate": 8.276208405396048e-06, "loss": 0.5106, "step": 5818 }, { "epoch": 0.9556381253464168, "grad_norm": 0.38444114269586827, "learning_rate": 8.275953316252946e-06, "loss": 0.495, "step": 5819 }, { "epoch": 0.9558023525547595, "grad_norm": 0.2708150920738918, "learning_rate": 8.275698186099278e-06, "loss": 0.4782, "step": 5820 }, { "epoch": 0.9559665797631023, "grad_norm": 0.574284020674942, "learning_rate": 8.27544301493781e-06, "loss": 0.5015, "step": 5821 }, { "epoch": 0.956130806971445, "grad_norm": 0.3558660647373787, "learning_rate": 8.27518780277132e-06, "loss": 0.4873, "step": 5822 }, { "epoch": 0.9562950341797878, "grad_norm": 0.2853300694877287, "learning_rate": 8.274932549602575e-06, "loss": 0.5086, "step": 5823 }, { "epoch": 0.9564592613881305, "grad_norm": 0.291071201624435, "learning_rate": 8.274677255434348e-06, "loss": 0.5011, "step": 5824 }, { "epoch": 0.9566234885964732, "grad_norm": 0.3247243607708094, "learning_rate": 8.274421920269412e-06, "loss": 0.5057, "step": 5825 }, { "epoch": 0.9567877158048159, "grad_norm": 0.26119747354567163, "learning_rate": 8.274166544110541e-06, "loss": 0.4932, "step": 5826 }, { "epoch": 0.9569519430131587, "grad_norm": 0.3785889300892869, "learning_rate": 8.273911126960507e-06, "loss": 0.4927, "step": 5827 }, { "epoch": 0.9571161702215014, "grad_norm": 0.38237933514300165, "learning_rate": 8.273655668822086e-06, "loss": 0.4997, "step": 5828 }, { "epoch": 0.9572803974298442, "grad_norm": 0.3519938306470608, "learning_rate": 8.273400169698051e-06, "loss": 0.4969, "step": 5829 }, { "epoch": 0.9574446246381869, "grad_norm": 0.2765721380041462, "learning_rate": 8.27314462959118e-06, "loss": 0.4796, "step": 5830 }, { "epoch": 0.9576088518465297, "grad_norm": 0.3239066959022419, "learning_rate": 8.272889048504244e-06, "loss": 0.4818, "step": 5831 }, { "epoch": 0.9577730790548724, "grad_norm": 0.28322233184270795, "learning_rate": 8.272633426440021e-06, "loss": 0.4928, "step": 5832 }, { "epoch": 0.9579373062632152, "grad_norm": 0.2633683826750771, "learning_rate": 8.272377763401287e-06, "loss": 0.4911, "step": 5833 }, { "epoch": 0.9581015334715579, "grad_norm": 0.29772403363255917, "learning_rate": 8.27212205939082e-06, "loss": 0.4798, "step": 5834 }, { "epoch": 0.9582657606799007, "grad_norm": 0.3413359910040477, "learning_rate": 8.271866314411395e-06, "loss": 0.5078, "step": 5835 }, { "epoch": 0.9584299878882434, "grad_norm": 0.34028950114887185, "learning_rate": 8.271610528465792e-06, "loss": 0.5045, "step": 5836 }, { "epoch": 0.9585942150965862, "grad_norm": 0.27175870163911775, "learning_rate": 8.271354701556786e-06, "loss": 0.5135, "step": 5837 }, { "epoch": 0.9587584423049289, "grad_norm": 0.3075850133116066, "learning_rate": 8.27109883368716e-06, "loss": 0.4803, "step": 5838 }, { "epoch": 0.9589226695132717, "grad_norm": 0.3351562770791254, "learning_rate": 8.270842924859688e-06, "loss": 0.4922, "step": 5839 }, { "epoch": 0.9590868967216144, "grad_norm": 0.2860224701466842, "learning_rate": 8.270586975077154e-06, "loss": 0.5001, "step": 5840 }, { "epoch": 0.9592511239299572, "grad_norm": 0.2810690644684735, "learning_rate": 8.270330984342334e-06, "loss": 0.517, "step": 5841 }, { "epoch": 0.9594153511382998, "grad_norm": 0.3647064774101022, "learning_rate": 8.27007495265801e-06, "loss": 0.5175, "step": 5842 }, { "epoch": 0.9595795783466425, "grad_norm": 0.29633901351977443, "learning_rate": 8.269818880026963e-06, "loss": 0.5043, "step": 5843 }, { "epoch": 0.9597438055549853, "grad_norm": 0.31863430795181835, "learning_rate": 8.269562766451974e-06, "loss": 0.5044, "step": 5844 }, { "epoch": 0.959908032763328, "grad_norm": 0.288021139963858, "learning_rate": 8.269306611935826e-06, "loss": 0.486, "step": 5845 }, { "epoch": 0.9600722599716708, "grad_norm": 0.40002756696662767, "learning_rate": 8.269050416481298e-06, "loss": 0.4917, "step": 5846 }, { "epoch": 0.9602364871800135, "grad_norm": 0.2847656625043193, "learning_rate": 8.268794180091175e-06, "loss": 0.5, "step": 5847 }, { "epoch": 0.9604007143883563, "grad_norm": 0.2876921231037368, "learning_rate": 8.268537902768239e-06, "loss": 0.4946, "step": 5848 }, { "epoch": 0.960564941596699, "grad_norm": 0.3600375948303123, "learning_rate": 8.268281584515273e-06, "loss": 0.5072, "step": 5849 }, { "epoch": 0.9607291688050418, "grad_norm": 0.27348127589456944, "learning_rate": 8.268025225335063e-06, "loss": 0.5142, "step": 5850 }, { "epoch": 0.9608933960133845, "grad_norm": 0.29609853935174174, "learning_rate": 8.267768825230392e-06, "loss": 0.4903, "step": 5851 }, { "epoch": 0.9610576232217273, "grad_norm": 0.3485982841735851, "learning_rate": 8.267512384204043e-06, "loss": 0.5078, "step": 5852 }, { "epoch": 0.96122185043007, "grad_norm": 0.31985530611185276, "learning_rate": 8.267255902258804e-06, "loss": 0.4809, "step": 5853 }, { "epoch": 0.9613860776384128, "grad_norm": 0.2827682502321915, "learning_rate": 8.266999379397458e-06, "loss": 0.5171, "step": 5854 }, { "epoch": 0.9615503048467555, "grad_norm": 0.3558347178567936, "learning_rate": 8.266742815622794e-06, "loss": 0.5205, "step": 5855 }, { "epoch": 0.9617145320550983, "grad_norm": 0.31569356701732815, "learning_rate": 8.266486210937595e-06, "loss": 0.488, "step": 5856 }, { "epoch": 0.961878759263441, "grad_norm": 0.280486740752955, "learning_rate": 8.266229565344651e-06, "loss": 0.5223, "step": 5857 }, { "epoch": 0.9620429864717838, "grad_norm": 0.30616030690303003, "learning_rate": 8.265972878846751e-06, "loss": 0.495, "step": 5858 }, { "epoch": 0.9622072136801264, "grad_norm": 0.2971057720358319, "learning_rate": 8.265716151446677e-06, "loss": 0.4886, "step": 5859 }, { "epoch": 0.9623714408884692, "grad_norm": 0.4860466387593598, "learning_rate": 8.26545938314722e-06, "loss": 0.4974, "step": 5860 }, { "epoch": 0.9625356680968119, "grad_norm": 0.31491956302389884, "learning_rate": 8.265202573951172e-06, "loss": 0.4915, "step": 5861 }, { "epoch": 0.9626998953051547, "grad_norm": 0.5128107426900643, "learning_rate": 8.26494572386132e-06, "loss": 0.5052, "step": 5862 }, { "epoch": 0.9628641225134974, "grad_norm": 0.28645208841438113, "learning_rate": 8.264688832880453e-06, "loss": 0.4772, "step": 5863 }, { "epoch": 0.9630283497218401, "grad_norm": 0.3334911284628625, "learning_rate": 8.264431901011358e-06, "loss": 0.5136, "step": 5864 }, { "epoch": 0.9631925769301829, "grad_norm": 0.272860391882876, "learning_rate": 8.264174928256832e-06, "loss": 0.5046, "step": 5865 }, { "epoch": 0.9633568041385256, "grad_norm": 0.3606448704975928, "learning_rate": 8.263917914619662e-06, "loss": 0.4904, "step": 5866 }, { "epoch": 0.9635210313468684, "grad_norm": 0.3258598539123226, "learning_rate": 8.263660860102641e-06, "loss": 0.4854, "step": 5867 }, { "epoch": 0.9636852585552111, "grad_norm": 0.28589834527541785, "learning_rate": 8.26340376470856e-06, "loss": 0.5102, "step": 5868 }, { "epoch": 0.9638494857635539, "grad_norm": 0.2781441103589388, "learning_rate": 8.26314662844021e-06, "loss": 0.5007, "step": 5869 }, { "epoch": 0.9640137129718966, "grad_norm": 0.2598080441340957, "learning_rate": 8.262889451300386e-06, "loss": 0.4703, "step": 5870 }, { "epoch": 0.9641779401802394, "grad_norm": 0.3118870916093809, "learning_rate": 8.26263223329188e-06, "loss": 0.4806, "step": 5871 }, { "epoch": 0.9643421673885821, "grad_norm": 0.28664634469887024, "learning_rate": 8.262374974417486e-06, "loss": 0.5147, "step": 5872 }, { "epoch": 0.9645063945969249, "grad_norm": 0.29088471375612124, "learning_rate": 8.26211767468e-06, "loss": 0.4965, "step": 5873 }, { "epoch": 0.9646706218052676, "grad_norm": 0.2934428904263493, "learning_rate": 8.261860334082212e-06, "loss": 0.4904, "step": 5874 }, { "epoch": 0.9648348490136104, "grad_norm": 0.3262898836978391, "learning_rate": 8.26160295262692e-06, "loss": 0.4977, "step": 5875 }, { "epoch": 0.964999076221953, "grad_norm": 0.28309291470955317, "learning_rate": 8.26134553031692e-06, "loss": 0.4942, "step": 5876 }, { "epoch": 0.9651633034302958, "grad_norm": 0.2727890246165504, "learning_rate": 8.261088067155008e-06, "loss": 0.4947, "step": 5877 }, { "epoch": 0.9653275306386385, "grad_norm": 0.28695720943171166, "learning_rate": 8.260830563143976e-06, "loss": 0.5002, "step": 5878 }, { "epoch": 0.9654917578469813, "grad_norm": 0.5454995618657891, "learning_rate": 8.260573018286626e-06, "loss": 0.4919, "step": 5879 }, { "epoch": 0.965655985055324, "grad_norm": 0.3329320093126202, "learning_rate": 8.260315432585754e-06, "loss": 0.5237, "step": 5880 }, { "epoch": 0.9658202122636668, "grad_norm": 0.3253192711895212, "learning_rate": 8.260057806044155e-06, "loss": 0.5048, "step": 5881 }, { "epoch": 0.9659844394720095, "grad_norm": 0.30670573862884587, "learning_rate": 8.259800138664628e-06, "loss": 0.4797, "step": 5882 }, { "epoch": 0.9661486666803523, "grad_norm": 0.30624801488520037, "learning_rate": 8.259542430449975e-06, "loss": 0.4847, "step": 5883 }, { "epoch": 0.966312893888695, "grad_norm": 0.29449002020199405, "learning_rate": 8.259284681402992e-06, "loss": 0.5065, "step": 5884 }, { "epoch": 0.9664771210970378, "grad_norm": 0.30474092018275056, "learning_rate": 8.259026891526478e-06, "loss": 0.4815, "step": 5885 }, { "epoch": 0.9666413483053805, "grad_norm": 0.284568010057758, "learning_rate": 8.258769060823232e-06, "loss": 0.486, "step": 5886 }, { "epoch": 0.9668055755137233, "grad_norm": 0.32753086529797554, "learning_rate": 8.258511189296057e-06, "loss": 0.483, "step": 5887 }, { "epoch": 0.966969802722066, "grad_norm": 0.2446509694431007, "learning_rate": 8.258253276947752e-06, "loss": 0.4698, "step": 5888 }, { "epoch": 0.9671340299304088, "grad_norm": 0.28462449616187135, "learning_rate": 8.257995323781122e-06, "loss": 0.4873, "step": 5889 }, { "epoch": 0.9672982571387515, "grad_norm": 0.26788096167237824, "learning_rate": 8.257737329798961e-06, "loss": 0.5108, "step": 5890 }, { "epoch": 0.9674624843470943, "grad_norm": 0.27341395472584296, "learning_rate": 8.257479295004079e-06, "loss": 0.4932, "step": 5891 }, { "epoch": 0.967626711555437, "grad_norm": 0.30169532415899364, "learning_rate": 8.257221219399272e-06, "loss": 0.4656, "step": 5892 }, { "epoch": 0.9677909387637796, "grad_norm": 2.138078420857035, "learning_rate": 8.256963102987349e-06, "loss": 0.5071, "step": 5893 }, { "epoch": 0.9679551659721224, "grad_norm": 0.29300939357485084, "learning_rate": 8.256704945771108e-06, "loss": 0.4886, "step": 5894 }, { "epoch": 0.9681193931804651, "grad_norm": 0.2819584830142459, "learning_rate": 8.256446747753356e-06, "loss": 0.4812, "step": 5895 }, { "epoch": 0.9682836203888079, "grad_norm": 0.30045867899014606, "learning_rate": 8.256188508936896e-06, "loss": 0.4915, "step": 5896 }, { "epoch": 0.9684478475971506, "grad_norm": 0.7846435366223166, "learning_rate": 8.255930229324535e-06, "loss": 0.5115, "step": 5897 }, { "epoch": 0.9686120748054934, "grad_norm": 0.330147198674231, "learning_rate": 8.255671908919075e-06, "loss": 0.4656, "step": 5898 }, { "epoch": 0.9687763020138361, "grad_norm": 0.2768259882158433, "learning_rate": 8.255413547723323e-06, "loss": 0.5064, "step": 5899 }, { "epoch": 0.9689405292221789, "grad_norm": 0.3766778545533828, "learning_rate": 8.255155145740084e-06, "loss": 0.5089, "step": 5900 }, { "epoch": 0.9691047564305216, "grad_norm": 0.4340928286077416, "learning_rate": 8.254896702972167e-06, "loss": 0.5171, "step": 5901 }, { "epoch": 0.9692689836388644, "grad_norm": 0.32064135591269965, "learning_rate": 8.254638219422378e-06, "loss": 0.4786, "step": 5902 }, { "epoch": 0.9694332108472071, "grad_norm": 0.35896251714917476, "learning_rate": 8.254379695093523e-06, "loss": 0.4763, "step": 5903 }, { "epoch": 0.9695974380555499, "grad_norm": 0.28803051965097765, "learning_rate": 8.25412112998841e-06, "loss": 0.498, "step": 5904 }, { "epoch": 0.9697616652638926, "grad_norm": 0.2843031109294082, "learning_rate": 8.253862524109849e-06, "loss": 0.5165, "step": 5905 }, { "epoch": 0.9699258924722354, "grad_norm": 0.2703477212769139, "learning_rate": 8.253603877460647e-06, "loss": 0.5026, "step": 5906 }, { "epoch": 0.9700901196805781, "grad_norm": 0.37540094540547747, "learning_rate": 8.253345190043613e-06, "loss": 0.4745, "step": 5907 }, { "epoch": 0.9702543468889209, "grad_norm": 0.34686637153638844, "learning_rate": 8.253086461861561e-06, "loss": 0.5061, "step": 5908 }, { "epoch": 0.9704185740972635, "grad_norm": 0.28355130217507657, "learning_rate": 8.252827692917295e-06, "loss": 0.4877, "step": 5909 }, { "epoch": 0.9705828013056063, "grad_norm": 0.3208759472372884, "learning_rate": 8.252568883213628e-06, "loss": 0.5004, "step": 5910 }, { "epoch": 0.970747028513949, "grad_norm": 0.27097365741813967, "learning_rate": 8.25231003275337e-06, "loss": 0.5167, "step": 5911 }, { "epoch": 0.9709112557222918, "grad_norm": 0.26847747863284094, "learning_rate": 8.252051141539335e-06, "loss": 0.5085, "step": 5912 }, { "epoch": 0.9710754829306345, "grad_norm": 0.2744476278316739, "learning_rate": 8.251792209574333e-06, "loss": 0.4946, "step": 5913 }, { "epoch": 0.9712397101389773, "grad_norm": 0.2949039780636973, "learning_rate": 8.251533236861175e-06, "loss": 0.5, "step": 5914 }, { "epoch": 0.97140393734732, "grad_norm": 0.2546736861392502, "learning_rate": 8.251274223402676e-06, "loss": 0.4998, "step": 5915 }, { "epoch": 0.9715681645556627, "grad_norm": 0.33011871520716857, "learning_rate": 8.251015169201649e-06, "loss": 0.4979, "step": 5916 }, { "epoch": 0.9717323917640055, "grad_norm": 0.36791096342909235, "learning_rate": 8.250756074260903e-06, "loss": 0.5164, "step": 5917 }, { "epoch": 0.9718966189723482, "grad_norm": 0.2940151689618452, "learning_rate": 8.25049693858326e-06, "loss": 0.4945, "step": 5918 }, { "epoch": 0.972060846180691, "grad_norm": 0.30790865804305934, "learning_rate": 8.250237762171527e-06, "loss": 0.4901, "step": 5919 }, { "epoch": 0.9722250733890337, "grad_norm": 0.30633815270914644, "learning_rate": 8.249978545028526e-06, "loss": 0.4894, "step": 5920 }, { "epoch": 0.9723893005973765, "grad_norm": 0.29364052405576624, "learning_rate": 8.249719287157066e-06, "loss": 0.5001, "step": 5921 }, { "epoch": 0.9725535278057192, "grad_norm": 0.368512399902772, "learning_rate": 8.249459988559965e-06, "loss": 0.4937, "step": 5922 }, { "epoch": 0.972717755014062, "grad_norm": 0.2963704816857771, "learning_rate": 8.249200649240041e-06, "loss": 0.4981, "step": 5923 }, { "epoch": 0.9728819822224047, "grad_norm": 0.3420546608598498, "learning_rate": 8.248941269200109e-06, "loss": 0.4944, "step": 5924 }, { "epoch": 0.9730462094307475, "grad_norm": 0.26229608477987965, "learning_rate": 8.248681848442985e-06, "loss": 0.5222, "step": 5925 }, { "epoch": 0.9732104366390901, "grad_norm": 0.3123298058628242, "learning_rate": 8.248422386971489e-06, "loss": 0.4972, "step": 5926 }, { "epoch": 0.9733746638474329, "grad_norm": 0.2899224102116319, "learning_rate": 8.248162884788437e-06, "loss": 0.5155, "step": 5927 }, { "epoch": 0.9735388910557756, "grad_norm": 0.2642905460836601, "learning_rate": 8.24790334189665e-06, "loss": 0.5054, "step": 5928 }, { "epoch": 0.9737031182641184, "grad_norm": 0.2811233637741677, "learning_rate": 8.247643758298943e-06, "loss": 0.5033, "step": 5929 }, { "epoch": 0.9738673454724611, "grad_norm": 0.3921209165058693, "learning_rate": 8.24738413399814e-06, "loss": 0.4756, "step": 5930 }, { "epoch": 0.9740315726808039, "grad_norm": 0.4300011495238529, "learning_rate": 8.247124468997057e-06, "loss": 0.4726, "step": 5931 }, { "epoch": 0.9741957998891466, "grad_norm": 0.32118620284015836, "learning_rate": 8.246864763298516e-06, "loss": 0.5117, "step": 5932 }, { "epoch": 0.9743600270974894, "grad_norm": 0.2675433845486991, "learning_rate": 8.246605016905338e-06, "loss": 0.4906, "step": 5933 }, { "epoch": 0.9745242543058321, "grad_norm": 0.3604906603250053, "learning_rate": 8.246345229820341e-06, "loss": 0.4758, "step": 5934 }, { "epoch": 0.9746884815141749, "grad_norm": 0.2552549150791266, "learning_rate": 8.246085402046351e-06, "loss": 0.4811, "step": 5935 }, { "epoch": 0.9748527087225176, "grad_norm": 0.27213223431139516, "learning_rate": 8.245825533586188e-06, "loss": 0.5028, "step": 5936 }, { "epoch": 0.9750169359308604, "grad_norm": 0.2923501485405211, "learning_rate": 8.245565624442674e-06, "loss": 0.4761, "step": 5937 }, { "epoch": 0.9751811631392031, "grad_norm": 0.25637703091485053, "learning_rate": 8.245305674618631e-06, "loss": 0.4957, "step": 5938 }, { "epoch": 0.9753453903475459, "grad_norm": 0.29585731786389724, "learning_rate": 8.245045684116885e-06, "loss": 0.5024, "step": 5939 }, { "epoch": 0.9755096175558886, "grad_norm": 0.3097492958132811, "learning_rate": 8.244785652940257e-06, "loss": 0.4911, "step": 5940 }, { "epoch": 0.9756738447642314, "grad_norm": 0.2796684436959236, "learning_rate": 8.244525581091574e-06, "loss": 0.4873, "step": 5941 }, { "epoch": 0.9758380719725741, "grad_norm": 0.4901344389329459, "learning_rate": 8.244265468573657e-06, "loss": 0.4952, "step": 5942 }, { "epoch": 0.9760022991809167, "grad_norm": 0.29449038922514387, "learning_rate": 8.244005315389335e-06, "loss": 0.5011, "step": 5943 }, { "epoch": 0.9761665263892595, "grad_norm": 0.2500820874976813, "learning_rate": 8.24374512154143e-06, "loss": 0.4833, "step": 5944 }, { "epoch": 0.9763307535976022, "grad_norm": 0.2831071561547265, "learning_rate": 8.24348488703277e-06, "loss": 0.5094, "step": 5945 }, { "epoch": 0.976494980805945, "grad_norm": 0.322064842547227, "learning_rate": 8.243224611866182e-06, "loss": 0.5126, "step": 5946 }, { "epoch": 0.9766592080142877, "grad_norm": 0.2683354525422334, "learning_rate": 8.242964296044494e-06, "loss": 0.4987, "step": 5947 }, { "epoch": 0.9768234352226305, "grad_norm": 0.2992209909395012, "learning_rate": 8.242703939570527e-06, "loss": 0.5231, "step": 5948 }, { "epoch": 0.9769876624309732, "grad_norm": 0.3050729492504926, "learning_rate": 8.242443542447115e-06, "loss": 0.5136, "step": 5949 }, { "epoch": 0.977151889639316, "grad_norm": 0.29155035443597166, "learning_rate": 8.242183104677083e-06, "loss": 0.4942, "step": 5950 }, { "epoch": 0.9773161168476587, "grad_norm": 0.25878300704709084, "learning_rate": 8.24192262626326e-06, "loss": 0.4816, "step": 5951 }, { "epoch": 0.9774803440560015, "grad_norm": 0.3087053142553389, "learning_rate": 8.241662107208478e-06, "loss": 0.5206, "step": 5952 }, { "epoch": 0.9776445712643442, "grad_norm": 0.4975144518325369, "learning_rate": 8.241401547515563e-06, "loss": 0.4931, "step": 5953 }, { "epoch": 0.977808798472687, "grad_norm": 0.3519450335069974, "learning_rate": 8.241140947187347e-06, "loss": 0.5137, "step": 5954 }, { "epoch": 0.9779730256810297, "grad_norm": 0.27425219345136576, "learning_rate": 8.240880306226659e-06, "loss": 0.4938, "step": 5955 }, { "epoch": 0.9781372528893725, "grad_norm": 0.313446231366959, "learning_rate": 8.24061962463633e-06, "loss": 0.4833, "step": 5956 }, { "epoch": 0.9783014800977152, "grad_norm": 0.31047102932394477, "learning_rate": 8.240358902419192e-06, "loss": 0.5167, "step": 5957 }, { "epoch": 0.978465707306058, "grad_norm": 0.3358474346649635, "learning_rate": 8.240098139578076e-06, "loss": 0.4916, "step": 5958 }, { "epoch": 0.9786299345144007, "grad_norm": 0.2916965129858983, "learning_rate": 8.239837336115814e-06, "loss": 0.5071, "step": 5959 }, { "epoch": 0.9787941617227434, "grad_norm": 0.3787787228362414, "learning_rate": 8.23957649203524e-06, "loss": 0.5196, "step": 5960 }, { "epoch": 0.9789583889310861, "grad_norm": 0.3064291953677091, "learning_rate": 8.239315607339186e-06, "loss": 0.4962, "step": 5961 }, { "epoch": 0.9791226161394289, "grad_norm": 0.2839286416723755, "learning_rate": 8.239054682030485e-06, "loss": 0.4926, "step": 5962 }, { "epoch": 0.9792868433477716, "grad_norm": 0.28085264039852126, "learning_rate": 8.238793716111971e-06, "loss": 0.5022, "step": 5963 }, { "epoch": 0.9794510705561144, "grad_norm": 0.33066819531167013, "learning_rate": 8.23853270958648e-06, "loss": 0.4936, "step": 5964 }, { "epoch": 0.9796152977644571, "grad_norm": 0.26488196400063624, "learning_rate": 8.238271662456844e-06, "loss": 0.4916, "step": 5965 }, { "epoch": 0.9797795249727999, "grad_norm": 0.3473924170842142, "learning_rate": 8.2380105747259e-06, "loss": 0.5109, "step": 5966 }, { "epoch": 0.9799437521811426, "grad_norm": 0.3409154975608138, "learning_rate": 8.237749446396485e-06, "loss": 0.4985, "step": 5967 }, { "epoch": 0.9801079793894854, "grad_norm": 0.3418439073219856, "learning_rate": 8.237488277471433e-06, "loss": 0.4969, "step": 5968 }, { "epoch": 0.9802722065978281, "grad_norm": 0.2569144853172575, "learning_rate": 8.237227067953581e-06, "loss": 0.4764, "step": 5969 }, { "epoch": 0.9804364338061708, "grad_norm": 0.31028003969795925, "learning_rate": 8.236965817845766e-06, "loss": 0.4886, "step": 5970 }, { "epoch": 0.9806006610145136, "grad_norm": 0.254519739291749, "learning_rate": 8.236704527150826e-06, "loss": 0.4989, "step": 5971 }, { "epoch": 0.9807648882228563, "grad_norm": 0.4203026362129426, "learning_rate": 8.236443195871597e-06, "loss": 0.4991, "step": 5972 }, { "epoch": 0.9809291154311991, "grad_norm": 0.26955631129226254, "learning_rate": 8.23618182401092e-06, "loss": 0.5171, "step": 5973 }, { "epoch": 0.9810933426395418, "grad_norm": 0.2675493381061797, "learning_rate": 8.235920411571632e-06, "loss": 0.5036, "step": 5974 }, { "epoch": 0.9812575698478846, "grad_norm": 0.33819000674277316, "learning_rate": 8.235658958556573e-06, "loss": 0.4782, "step": 5975 }, { "epoch": 0.9814217970562273, "grad_norm": 0.2732732202700863, "learning_rate": 8.235397464968581e-06, "loss": 0.517, "step": 5976 }, { "epoch": 0.98158602426457, "grad_norm": 0.2927136697546449, "learning_rate": 8.235135930810499e-06, "loss": 0.4975, "step": 5977 }, { "epoch": 0.9817502514729127, "grad_norm": 0.29528840625993147, "learning_rate": 8.234874356085165e-06, "loss": 0.5094, "step": 5978 }, { "epoch": 0.9819144786812555, "grad_norm": 0.28678695381696284, "learning_rate": 8.234612740795422e-06, "loss": 0.5042, "step": 5979 }, { "epoch": 0.9820787058895982, "grad_norm": 0.2947556284863859, "learning_rate": 8.23435108494411e-06, "loss": 0.4788, "step": 5980 }, { "epoch": 0.982242933097941, "grad_norm": 0.32944895594123025, "learning_rate": 8.23408938853407e-06, "loss": 0.486, "step": 5981 }, { "epoch": 0.9824071603062837, "grad_norm": 0.2849669802030251, "learning_rate": 8.233827651568146e-06, "loss": 0.5017, "step": 5982 }, { "epoch": 0.9825713875146265, "grad_norm": 0.34083531204598366, "learning_rate": 8.23356587404918e-06, "loss": 0.4901, "step": 5983 }, { "epoch": 0.9827356147229692, "grad_norm": 0.44565048901596793, "learning_rate": 8.233304055980015e-06, "loss": 0.5058, "step": 5984 }, { "epoch": 0.982899841931312, "grad_norm": 0.3147569511729347, "learning_rate": 8.233042197363495e-06, "loss": 0.5062, "step": 5985 }, { "epoch": 0.9830640691396547, "grad_norm": 0.30713689506371955, "learning_rate": 8.232780298202464e-06, "loss": 0.4924, "step": 5986 }, { "epoch": 0.9832282963479975, "grad_norm": 0.35891472000084595, "learning_rate": 8.232518358499768e-06, "loss": 0.4979, "step": 5987 }, { "epoch": 0.9833925235563402, "grad_norm": 0.2610523577041328, "learning_rate": 8.232256378258248e-06, "loss": 0.4785, "step": 5988 }, { "epoch": 0.983556750764683, "grad_norm": 0.29778668994780577, "learning_rate": 8.231994357480754e-06, "loss": 0.5078, "step": 5989 }, { "epoch": 0.9837209779730257, "grad_norm": 0.37794507997054505, "learning_rate": 8.231732296170127e-06, "loss": 0.4875, "step": 5990 }, { "epoch": 0.9838852051813685, "grad_norm": 0.3080732467400163, "learning_rate": 8.231470194329218e-06, "loss": 0.5156, "step": 5991 }, { "epoch": 0.9840494323897112, "grad_norm": 0.31909404424037935, "learning_rate": 8.23120805196087e-06, "loss": 0.4996, "step": 5992 }, { "epoch": 0.984213659598054, "grad_norm": 0.3035152405076065, "learning_rate": 8.230945869067931e-06, "loss": 0.4752, "step": 5993 }, { "epoch": 0.9843778868063966, "grad_norm": 0.3463376211275316, "learning_rate": 8.23068364565325e-06, "loss": 0.4903, "step": 5994 }, { "epoch": 0.9845421140147393, "grad_norm": 0.30248850655697274, "learning_rate": 8.230421381719674e-06, "loss": 0.5164, "step": 5995 }, { "epoch": 0.9847063412230821, "grad_norm": 0.27197297877835336, "learning_rate": 8.230159077270053e-06, "loss": 0.5114, "step": 5996 }, { "epoch": 0.9848705684314248, "grad_norm": 0.39503979748818113, "learning_rate": 8.229896732307233e-06, "loss": 0.4937, "step": 5997 }, { "epoch": 0.9850347956397676, "grad_norm": 0.27231037109643863, "learning_rate": 8.229634346834064e-06, "loss": 0.4854, "step": 5998 }, { "epoch": 0.9851990228481103, "grad_norm": 0.2743727883763561, "learning_rate": 8.229371920853399e-06, "loss": 0.4894, "step": 5999 }, { "epoch": 0.9853632500564531, "grad_norm": 0.3098154188220738, "learning_rate": 8.229109454368082e-06, "loss": 0.4931, "step": 6000 }, { "epoch": 0.9855274772647958, "grad_norm": 0.31105934974202104, "learning_rate": 8.22884694738097e-06, "loss": 0.5277, "step": 6001 }, { "epoch": 0.9856917044731386, "grad_norm": 0.30333660429551784, "learning_rate": 8.22858439989491e-06, "loss": 0.4962, "step": 6002 }, { "epoch": 0.9858559316814813, "grad_norm": 0.28194793432475335, "learning_rate": 8.228321811912757e-06, "loss": 0.4693, "step": 6003 }, { "epoch": 0.9860201588898241, "grad_norm": 0.27521067135499894, "learning_rate": 8.22805918343736e-06, "loss": 0.4978, "step": 6004 }, { "epoch": 0.9861843860981668, "grad_norm": 0.3516121931850756, "learning_rate": 8.227796514471571e-06, "loss": 0.4923, "step": 6005 }, { "epoch": 0.9863486133065096, "grad_norm": 0.26710061007145536, "learning_rate": 8.227533805018245e-06, "loss": 0.5155, "step": 6006 }, { "epoch": 0.9865128405148523, "grad_norm": 0.29224389368396897, "learning_rate": 8.227271055080236e-06, "loss": 0.4866, "step": 6007 }, { "epoch": 0.9866770677231951, "grad_norm": 0.3534119652282733, "learning_rate": 8.227008264660396e-06, "loss": 0.5215, "step": 6008 }, { "epoch": 0.9868412949315378, "grad_norm": 0.2726025456910224, "learning_rate": 8.226745433761578e-06, "loss": 0.5074, "step": 6009 }, { "epoch": 0.9870055221398806, "grad_norm": 0.3227584179564337, "learning_rate": 8.226482562386638e-06, "loss": 0.476, "step": 6010 }, { "epoch": 0.9871697493482232, "grad_norm": 0.2584528878550159, "learning_rate": 8.226219650538432e-06, "loss": 0.5026, "step": 6011 }, { "epoch": 0.987333976556566, "grad_norm": 0.2983429827813741, "learning_rate": 8.225956698219814e-06, "loss": 0.5065, "step": 6012 }, { "epoch": 0.9874982037649087, "grad_norm": 0.26958664034702273, "learning_rate": 8.225693705433639e-06, "loss": 0.5034, "step": 6013 }, { "epoch": 0.9876624309732515, "grad_norm": 0.3463746350662079, "learning_rate": 8.225430672182768e-06, "loss": 0.4991, "step": 6014 }, { "epoch": 0.9878266581815942, "grad_norm": 0.48975988570215695, "learning_rate": 8.225167598470052e-06, "loss": 0.4868, "step": 6015 }, { "epoch": 0.987990885389937, "grad_norm": 0.28846554880418807, "learning_rate": 8.22490448429835e-06, "loss": 0.4941, "step": 6016 }, { "epoch": 0.9881551125982797, "grad_norm": 0.25948125490643514, "learning_rate": 8.224641329670522e-06, "loss": 0.4822, "step": 6017 }, { "epoch": 0.9883193398066225, "grad_norm": 0.4795604048369903, "learning_rate": 8.224378134589426e-06, "loss": 0.4985, "step": 6018 }, { "epoch": 0.9884835670149652, "grad_norm": 0.27508472004944673, "learning_rate": 8.224114899057917e-06, "loss": 0.4824, "step": 6019 }, { "epoch": 0.988647794223308, "grad_norm": 0.2941581038633356, "learning_rate": 8.223851623078856e-06, "loss": 0.4916, "step": 6020 }, { "epoch": 0.9888120214316507, "grad_norm": 0.2857124408871979, "learning_rate": 8.223588306655105e-06, "loss": 0.4978, "step": 6021 }, { "epoch": 0.9889762486399934, "grad_norm": 0.28161753210610235, "learning_rate": 8.22332494978952e-06, "loss": 0.4917, "step": 6022 }, { "epoch": 0.9891404758483362, "grad_norm": 0.3404370427089407, "learning_rate": 8.223061552484962e-06, "loss": 0.5161, "step": 6023 }, { "epoch": 0.989304703056679, "grad_norm": 0.29723498104859414, "learning_rate": 8.222798114744294e-06, "loss": 0.4953, "step": 6024 }, { "epoch": 0.9894689302650217, "grad_norm": 0.25821568853288496, "learning_rate": 8.222534636570375e-06, "loss": 0.4889, "step": 6025 }, { "epoch": 0.9896331574733644, "grad_norm": 0.34018544169293335, "learning_rate": 8.222271117966067e-06, "loss": 0.4987, "step": 6026 }, { "epoch": 0.9897973846817072, "grad_norm": 0.4406773775755572, "learning_rate": 8.222007558934234e-06, "loss": 0.5073, "step": 6027 }, { "epoch": 0.9899616118900498, "grad_norm": 0.5805674365221384, "learning_rate": 8.221743959477735e-06, "loss": 0.4827, "step": 6028 }, { "epoch": 0.9901258390983926, "grad_norm": 0.2996142802822292, "learning_rate": 8.221480319599435e-06, "loss": 0.5045, "step": 6029 }, { "epoch": 0.9902900663067353, "grad_norm": 0.29344698377790773, "learning_rate": 8.221216639302199e-06, "loss": 0.4996, "step": 6030 }, { "epoch": 0.9904542935150781, "grad_norm": 0.32978471469877835, "learning_rate": 8.220952918588888e-06, "loss": 0.4832, "step": 6031 }, { "epoch": 0.9906185207234208, "grad_norm": 0.348936338155547, "learning_rate": 8.220689157462367e-06, "loss": 0.4999, "step": 6032 }, { "epoch": 0.9907827479317636, "grad_norm": 0.2929668547648481, "learning_rate": 8.220425355925503e-06, "loss": 0.4946, "step": 6033 }, { "epoch": 0.9909469751401063, "grad_norm": 0.28969490861086555, "learning_rate": 8.220161513981157e-06, "loss": 0.508, "step": 6034 }, { "epoch": 0.9911112023484491, "grad_norm": 0.30362512496995825, "learning_rate": 8.219897631632197e-06, "loss": 0.5036, "step": 6035 }, { "epoch": 0.9912754295567918, "grad_norm": 0.6155640604324444, "learning_rate": 8.21963370888149e-06, "loss": 0.4919, "step": 6036 }, { "epoch": 0.9914396567651346, "grad_norm": 0.37171675217371414, "learning_rate": 8.219369745731901e-06, "loss": 0.5144, "step": 6037 }, { "epoch": 0.9916038839734773, "grad_norm": 0.285698244962252, "learning_rate": 8.219105742186297e-06, "loss": 0.5015, "step": 6038 }, { "epoch": 0.9917681111818201, "grad_norm": 0.35864388593535235, "learning_rate": 8.218841698247545e-06, "loss": 0.512, "step": 6039 }, { "epoch": 0.9919323383901628, "grad_norm": 0.277329370636313, "learning_rate": 8.218577613918514e-06, "loss": 0.4877, "step": 6040 }, { "epoch": 0.9920965655985056, "grad_norm": 0.27621896049336875, "learning_rate": 8.21831348920207e-06, "loss": 0.4903, "step": 6041 }, { "epoch": 0.9922607928068483, "grad_norm": 0.3112911663410362, "learning_rate": 8.218049324101086e-06, "loss": 0.4834, "step": 6042 }, { "epoch": 0.9924250200151911, "grad_norm": 0.3014841204227243, "learning_rate": 8.217785118618426e-06, "loss": 0.4922, "step": 6043 }, { "epoch": 0.9925892472235338, "grad_norm": 0.2767958028017139, "learning_rate": 8.217520872756962e-06, "loss": 0.4973, "step": 6044 }, { "epoch": 0.9927534744318764, "grad_norm": 0.2918501643305302, "learning_rate": 8.217256586519567e-06, "loss": 0.5088, "step": 6045 }, { "epoch": 0.9929177016402192, "grad_norm": 0.32321579020808244, "learning_rate": 8.216992259909105e-06, "loss": 0.4937, "step": 6046 }, { "epoch": 0.993081928848562, "grad_norm": 0.29135334956770775, "learning_rate": 8.21672789292845e-06, "loss": 0.4867, "step": 6047 }, { "epoch": 0.9932461560569047, "grad_norm": 0.2755437262202141, "learning_rate": 8.216463485580474e-06, "loss": 0.5026, "step": 6048 }, { "epoch": 0.9934103832652474, "grad_norm": 0.28258408765853643, "learning_rate": 8.216199037868048e-06, "loss": 0.4922, "step": 6049 }, { "epoch": 0.9935746104735902, "grad_norm": 0.2856610733524225, "learning_rate": 8.215934549794043e-06, "loss": 0.4934, "step": 6050 }, { "epoch": 0.9937388376819329, "grad_norm": 0.26939264608863184, "learning_rate": 8.215670021361335e-06, "loss": 0.5089, "step": 6051 }, { "epoch": 0.9939030648902757, "grad_norm": 0.26228522266134563, "learning_rate": 8.215405452572793e-06, "loss": 0.5082, "step": 6052 }, { "epoch": 0.9940672920986184, "grad_norm": 0.2865666730968439, "learning_rate": 8.215140843431293e-06, "loss": 0.4986, "step": 6053 }, { "epoch": 0.9942315193069612, "grad_norm": 0.29112888158896566, "learning_rate": 8.21487619393971e-06, "loss": 0.4927, "step": 6054 }, { "epoch": 0.9943957465153039, "grad_norm": 0.32408251237526065, "learning_rate": 8.214611504100914e-06, "loss": 0.4819, "step": 6055 }, { "epoch": 0.9945599737236467, "grad_norm": 0.2980623923723772, "learning_rate": 8.214346773917784e-06, "loss": 0.4951, "step": 6056 }, { "epoch": 0.9947242009319894, "grad_norm": 0.3298708840715456, "learning_rate": 8.214082003393193e-06, "loss": 0.4933, "step": 6057 }, { "epoch": 0.9948884281403322, "grad_norm": 0.2886454071627483, "learning_rate": 8.213817192530015e-06, "loss": 0.5084, "step": 6058 }, { "epoch": 0.9950526553486749, "grad_norm": 0.26521275064187566, "learning_rate": 8.213552341331133e-06, "loss": 0.5027, "step": 6059 }, { "epoch": 0.9952168825570177, "grad_norm": 0.35831906260849744, "learning_rate": 8.213287449799416e-06, "loss": 0.5002, "step": 6060 }, { "epoch": 0.9953811097653604, "grad_norm": 0.49904049416821655, "learning_rate": 8.213022517937744e-06, "loss": 0.4946, "step": 6061 }, { "epoch": 0.9955453369737031, "grad_norm": 0.273090783481902, "learning_rate": 8.212757545748994e-06, "loss": 0.5072, "step": 6062 }, { "epoch": 0.9957095641820458, "grad_norm": 0.3750463965518032, "learning_rate": 8.212492533236046e-06, "loss": 0.493, "step": 6063 }, { "epoch": 0.9958737913903886, "grad_norm": 0.2788278001894012, "learning_rate": 8.212227480401774e-06, "loss": 0.5033, "step": 6064 }, { "epoch": 0.9960380185987313, "grad_norm": 0.25147500943496537, "learning_rate": 8.211962387249062e-06, "loss": 0.5144, "step": 6065 }, { "epoch": 0.9962022458070741, "grad_norm": 0.32790265255873924, "learning_rate": 8.211697253780785e-06, "loss": 0.5175, "step": 6066 }, { "epoch": 0.9963664730154168, "grad_norm": 0.3162485512645619, "learning_rate": 8.211432079999824e-06, "loss": 0.4795, "step": 6067 }, { "epoch": 0.9965307002237596, "grad_norm": 0.31059518886608145, "learning_rate": 8.211166865909058e-06, "loss": 0.4959, "step": 6068 }, { "epoch": 0.9966949274321023, "grad_norm": 0.34734654159685363, "learning_rate": 8.210901611511371e-06, "loss": 0.4801, "step": 6069 }, { "epoch": 0.996859154640445, "grad_norm": 0.2947683331373121, "learning_rate": 8.21063631680964e-06, "loss": 0.5083, "step": 6070 }, { "epoch": 0.9970233818487878, "grad_norm": 0.315138018398247, "learning_rate": 8.210370981806748e-06, "loss": 0.5118, "step": 6071 }, { "epoch": 0.9971876090571306, "grad_norm": 0.33194756505125095, "learning_rate": 8.210105606505577e-06, "loss": 0.4917, "step": 6072 }, { "epoch": 0.9973518362654733, "grad_norm": 0.3193908762263436, "learning_rate": 8.209840190909009e-06, "loss": 0.4971, "step": 6073 }, { "epoch": 0.997516063473816, "grad_norm": 0.2793014691048366, "learning_rate": 8.209574735019925e-06, "loss": 0.4966, "step": 6074 }, { "epoch": 0.9976802906821588, "grad_norm": 0.2966439416928268, "learning_rate": 8.20930923884121e-06, "loss": 0.5136, "step": 6075 }, { "epoch": 0.9978445178905015, "grad_norm": 0.33803139976143903, "learning_rate": 8.209043702375749e-06, "loss": 0.5076, "step": 6076 }, { "epoch": 0.9980087450988443, "grad_norm": 0.3729413860774098, "learning_rate": 8.208778125626423e-06, "loss": 0.4941, "step": 6077 }, { "epoch": 0.998172972307187, "grad_norm": 0.29849360362127925, "learning_rate": 8.208512508596118e-06, "loss": 0.5089, "step": 6078 }, { "epoch": 0.9983371995155297, "grad_norm": 0.3708493783333557, "learning_rate": 8.208246851287717e-06, "loss": 0.5017, "step": 6079 }, { "epoch": 0.9985014267238724, "grad_norm": 0.2648040375203547, "learning_rate": 8.207981153704108e-06, "loss": 0.5087, "step": 6080 }, { "epoch": 0.9986656539322152, "grad_norm": 0.330039886934558, "learning_rate": 8.207715415848176e-06, "loss": 0.4689, "step": 6081 }, { "epoch": 0.9988298811405579, "grad_norm": 0.30651609282300624, "learning_rate": 8.207449637722806e-06, "loss": 0.4954, "step": 6082 }, { "epoch": 0.9989941083489007, "grad_norm": 0.3019876534538926, "learning_rate": 8.207183819330884e-06, "loss": 0.5002, "step": 6083 }, { "epoch": 0.9991583355572434, "grad_norm": 0.2769342143142423, "learning_rate": 8.206917960675301e-06, "loss": 0.5214, "step": 6084 }, { "epoch": 0.9993225627655862, "grad_norm": 0.3556437500448538, "learning_rate": 8.20665206175894e-06, "loss": 0.4916, "step": 6085 }, { "epoch": 0.9994867899739289, "grad_norm": 0.32734441118412566, "learning_rate": 8.206386122584692e-06, "loss": 0.476, "step": 6086 }, { "epoch": 0.9996510171822717, "grad_norm": 0.33157470358823943, "learning_rate": 8.206120143155443e-06, "loss": 0.5219, "step": 6087 }, { "epoch": 0.9998152443906144, "grad_norm": 0.3207924796762015, "learning_rate": 8.205854123474083e-06, "loss": 0.4969, "step": 6088 }, { "epoch": 0.9999794715989572, "grad_norm": 0.268248980958867, "learning_rate": 8.205588063543502e-06, "loss": 0.4925, "step": 6089 }, { "epoch": 1.0001436988072998, "grad_norm": 0.2701398571042544, "learning_rate": 8.205321963366588e-06, "loss": 0.4835, "step": 6090 }, { "epoch": 1.0003079260156427, "grad_norm": 0.29488775351168806, "learning_rate": 8.205055822946233e-06, "loss": 0.4957, "step": 6091 }, { "epoch": 1.0004721532239853, "grad_norm": 0.3105970052078029, "learning_rate": 8.204789642285324e-06, "loss": 0.4934, "step": 6092 }, { "epoch": 1.0006363804323282, "grad_norm": 0.3507102653544947, "learning_rate": 8.204523421386757e-06, "loss": 0.4722, "step": 6093 }, { "epoch": 1.0008006076406708, "grad_norm": 0.30553103864347814, "learning_rate": 8.20425716025342e-06, "loss": 0.4892, "step": 6094 }, { "epoch": 1.0009648348490137, "grad_norm": 0.3301092842380056, "learning_rate": 8.203990858888206e-06, "loss": 0.4748, "step": 6095 }, { "epoch": 1.0011290620573563, "grad_norm": 0.3285339943190183, "learning_rate": 8.203724517294007e-06, "loss": 0.5096, "step": 6096 }, { "epoch": 1.0012932892656992, "grad_norm": 0.4022453312728853, "learning_rate": 8.203458135473716e-06, "loss": 0.482, "step": 6097 }, { "epoch": 1.0014575164740418, "grad_norm": 0.3226651889997907, "learning_rate": 8.203191713430225e-06, "loss": 0.4779, "step": 6098 }, { "epoch": 1.0016217436823847, "grad_norm": 0.32939182814617934, "learning_rate": 8.20292525116643e-06, "loss": 0.4972, "step": 6099 }, { "epoch": 1.0017859708907273, "grad_norm": 0.3599122148730225, "learning_rate": 8.202658748685223e-06, "loss": 0.4785, "step": 6100 }, { "epoch": 1.0019501980990702, "grad_norm": 0.26857771384618684, "learning_rate": 8.202392205989498e-06, "loss": 0.4919, "step": 6101 }, { "epoch": 1.0021144253074128, "grad_norm": 0.25228675296021535, "learning_rate": 8.202125623082151e-06, "loss": 0.4978, "step": 6102 }, { "epoch": 1.0022786525157557, "grad_norm": 0.2786770686176153, "learning_rate": 8.20185899996608e-06, "loss": 0.5098, "step": 6103 }, { "epoch": 1.0024428797240983, "grad_norm": 0.27270211292454544, "learning_rate": 8.201592336644176e-06, "loss": 0.5052, "step": 6104 }, { "epoch": 1.002607106932441, "grad_norm": 0.287144611347033, "learning_rate": 8.201325633119337e-06, "loss": 0.5038, "step": 6105 }, { "epoch": 1.0027713341407838, "grad_norm": 0.336224498870284, "learning_rate": 8.201058889394461e-06, "loss": 0.4697, "step": 6106 }, { "epoch": 1.0029355613491264, "grad_norm": 0.39484648633479735, "learning_rate": 8.200792105472442e-06, "loss": 0.5035, "step": 6107 }, { "epoch": 1.0030997885574693, "grad_norm": 0.3712949110335043, "learning_rate": 8.200525281356183e-06, "loss": 0.4759, "step": 6108 }, { "epoch": 1.003264015765812, "grad_norm": 0.270083642715779, "learning_rate": 8.200258417048578e-06, "loss": 0.5062, "step": 6109 }, { "epoch": 1.0034282429741548, "grad_norm": 0.35149759365651945, "learning_rate": 8.199991512552525e-06, "loss": 0.485, "step": 6110 }, { "epoch": 1.0035924701824974, "grad_norm": 0.27952436185145424, "learning_rate": 8.199724567870923e-06, "loss": 0.4849, "step": 6111 }, { "epoch": 1.0037566973908403, "grad_norm": 0.31188337577290126, "learning_rate": 8.199457583006671e-06, "loss": 0.5232, "step": 6112 }, { "epoch": 1.003920924599183, "grad_norm": 0.36877171718415647, "learning_rate": 8.199190557962673e-06, "loss": 0.4813, "step": 6113 }, { "epoch": 1.0040851518075258, "grad_norm": 0.29489431860375687, "learning_rate": 8.198923492741825e-06, "loss": 0.5065, "step": 6114 }, { "epoch": 1.0042493790158684, "grad_norm": 0.2871256782320963, "learning_rate": 8.198656387347028e-06, "loss": 0.4967, "step": 6115 }, { "epoch": 1.0044136062242113, "grad_norm": 0.3253782587546533, "learning_rate": 8.198389241781185e-06, "loss": 0.5157, "step": 6116 }, { "epoch": 1.004577833432554, "grad_norm": 0.2866600487798448, "learning_rate": 8.198122056047195e-06, "loss": 0.5116, "step": 6117 }, { "epoch": 1.0047420606408968, "grad_norm": 0.3204737775521936, "learning_rate": 8.197854830147961e-06, "loss": 0.4931, "step": 6118 }, { "epoch": 1.0049062878492394, "grad_norm": 0.305333919369849, "learning_rate": 8.197587564086386e-06, "loss": 0.495, "step": 6119 }, { "epoch": 1.0050705150575823, "grad_norm": 0.3240184222494894, "learning_rate": 8.19732025786537e-06, "loss": 0.4901, "step": 6120 }, { "epoch": 1.005234742265925, "grad_norm": 0.2586365842115682, "learning_rate": 8.197052911487823e-06, "loss": 0.5028, "step": 6121 }, { "epoch": 1.0053989694742675, "grad_norm": 0.7875093702083333, "learning_rate": 8.19678552495664e-06, "loss": 0.4758, "step": 6122 }, { "epoch": 1.0055631966826104, "grad_norm": 0.2959337138717507, "learning_rate": 8.19651809827473e-06, "loss": 0.5094, "step": 6123 }, { "epoch": 1.005727423890953, "grad_norm": 0.36942449603120836, "learning_rate": 8.196250631444996e-06, "loss": 0.497, "step": 6124 }, { "epoch": 1.005891651099296, "grad_norm": 0.2877734449312031, "learning_rate": 8.195983124470346e-06, "loss": 0.4854, "step": 6125 }, { "epoch": 1.0060558783076385, "grad_norm": 0.285412931493051, "learning_rate": 8.19571557735368e-06, "loss": 0.4869, "step": 6126 }, { "epoch": 1.0062201055159814, "grad_norm": 0.29427375436401393, "learning_rate": 8.195447990097908e-06, "loss": 0.4873, "step": 6127 }, { "epoch": 1.006384332724324, "grad_norm": 0.3929275658433873, "learning_rate": 8.195180362705935e-06, "loss": 0.4779, "step": 6128 }, { "epoch": 1.006548559932667, "grad_norm": 0.34591623515733444, "learning_rate": 8.194912695180668e-06, "loss": 0.494, "step": 6129 }, { "epoch": 1.0067127871410095, "grad_norm": 0.30622477678448884, "learning_rate": 8.194644987525013e-06, "loss": 0.4757, "step": 6130 }, { "epoch": 1.0068770143493524, "grad_norm": 0.29008846351927825, "learning_rate": 8.194377239741879e-06, "loss": 0.4972, "step": 6131 }, { "epoch": 1.007041241557695, "grad_norm": 0.2901644160895777, "learning_rate": 8.194109451834172e-06, "loss": 0.4962, "step": 6132 }, { "epoch": 1.007205468766038, "grad_norm": 0.39246471775157815, "learning_rate": 8.193841623804803e-06, "loss": 0.5022, "step": 6133 }, { "epoch": 1.0073696959743805, "grad_norm": 0.33816828096835855, "learning_rate": 8.193573755656681e-06, "loss": 0.4755, "step": 6134 }, { "epoch": 1.0075339231827234, "grad_norm": 0.2882989726136812, "learning_rate": 8.193305847392713e-06, "loss": 0.4939, "step": 6135 }, { "epoch": 1.007698150391066, "grad_norm": 0.8477706290899284, "learning_rate": 8.193037899015809e-06, "loss": 0.4873, "step": 6136 }, { "epoch": 1.0078623775994089, "grad_norm": 0.4657233345201176, "learning_rate": 8.19276991052888e-06, "loss": 0.489, "step": 6137 }, { "epoch": 1.0080266048077515, "grad_norm": 0.3198771301938268, "learning_rate": 8.192501881934838e-06, "loss": 0.5226, "step": 6138 }, { "epoch": 1.0081908320160942, "grad_norm": 0.37111830598748036, "learning_rate": 8.192233813236591e-06, "loss": 0.4778, "step": 6139 }, { "epoch": 1.008355059224437, "grad_norm": 0.29241114461127055, "learning_rate": 8.191965704437053e-06, "loss": 0.4817, "step": 6140 }, { "epoch": 1.0085192864327797, "grad_norm": 0.3005976492011157, "learning_rate": 8.191697555539135e-06, "loss": 0.4732, "step": 6141 }, { "epoch": 1.0086835136411225, "grad_norm": 0.2967960749197657, "learning_rate": 8.19142936654575e-06, "loss": 0.4941, "step": 6142 }, { "epoch": 1.0088477408494652, "grad_norm": 0.28884899020507654, "learning_rate": 8.191161137459809e-06, "loss": 0.4748, "step": 6143 }, { "epoch": 1.009011968057808, "grad_norm": 0.525761885715074, "learning_rate": 8.190892868284228e-06, "loss": 0.4712, "step": 6144 }, { "epoch": 1.0091761952661507, "grad_norm": 0.31060904710486237, "learning_rate": 8.190624559021916e-06, "loss": 0.4727, "step": 6145 }, { "epoch": 1.0093404224744935, "grad_norm": 0.24722628302120625, "learning_rate": 8.190356209675793e-06, "loss": 0.4859, "step": 6146 }, { "epoch": 1.0095046496828362, "grad_norm": 0.3700887463419303, "learning_rate": 8.19008782024877e-06, "loss": 0.4817, "step": 6147 }, { "epoch": 1.009668876891179, "grad_norm": 0.28728444716481255, "learning_rate": 8.189819390743762e-06, "loss": 0.4808, "step": 6148 }, { "epoch": 1.0098331040995217, "grad_norm": 0.29078167293740487, "learning_rate": 8.189550921163685e-06, "loss": 0.4983, "step": 6149 }, { "epoch": 1.0099973313078645, "grad_norm": 0.3636550521570456, "learning_rate": 8.189282411511457e-06, "loss": 0.471, "step": 6150 }, { "epoch": 1.0101615585162071, "grad_norm": 1.2961617563754146, "learning_rate": 8.18901386178999e-06, "loss": 0.499, "step": 6151 }, { "epoch": 1.01032578572455, "grad_norm": 0.31558961032524924, "learning_rate": 8.188745272002206e-06, "loss": 0.5054, "step": 6152 }, { "epoch": 1.0104900129328926, "grad_norm": 0.3187404488115447, "learning_rate": 8.188476642151016e-06, "loss": 0.4903, "step": 6153 }, { "epoch": 1.0106542401412355, "grad_norm": 0.4396331603761133, "learning_rate": 8.188207972239343e-06, "loss": 0.5123, "step": 6154 }, { "epoch": 1.0108184673495781, "grad_norm": 0.3098010717106893, "learning_rate": 8.187939262270101e-06, "loss": 0.5034, "step": 6155 }, { "epoch": 1.0109826945579208, "grad_norm": 0.5654184895396511, "learning_rate": 8.18767051224621e-06, "loss": 0.494, "step": 6156 }, { "epoch": 1.0111469217662636, "grad_norm": 0.3515168501435642, "learning_rate": 8.18740172217059e-06, "loss": 0.4978, "step": 6157 }, { "epoch": 1.0113111489746063, "grad_norm": 0.3442198924300222, "learning_rate": 8.18713289204616e-06, "loss": 0.4948, "step": 6158 }, { "epoch": 1.0114753761829491, "grad_norm": 0.2753283941014209, "learning_rate": 8.186864021875836e-06, "loss": 0.5203, "step": 6159 }, { "epoch": 1.0116396033912918, "grad_norm": 0.27913090907463, "learning_rate": 8.186595111662544e-06, "loss": 0.4882, "step": 6160 }, { "epoch": 1.0118038305996346, "grad_norm": 0.2762978360605873, "learning_rate": 8.186326161409202e-06, "loss": 0.4912, "step": 6161 }, { "epoch": 1.0119680578079773, "grad_norm": 0.286562286465793, "learning_rate": 8.186057171118731e-06, "loss": 0.4877, "step": 6162 }, { "epoch": 1.0121322850163201, "grad_norm": 0.33537335818622976, "learning_rate": 8.185788140794053e-06, "loss": 0.4776, "step": 6163 }, { "epoch": 1.0122965122246628, "grad_norm": 0.39338628713320906, "learning_rate": 8.18551907043809e-06, "loss": 0.5107, "step": 6164 }, { "epoch": 1.0124607394330056, "grad_norm": 0.2926564624467248, "learning_rate": 8.18524996005376e-06, "loss": 0.5054, "step": 6165 }, { "epoch": 1.0126249666413483, "grad_norm": 0.3033449050165952, "learning_rate": 8.184980809643992e-06, "loss": 0.5057, "step": 6166 }, { "epoch": 1.0127891938496911, "grad_norm": 0.34123205757993946, "learning_rate": 8.184711619211708e-06, "loss": 0.4784, "step": 6167 }, { "epoch": 1.0129534210580338, "grad_norm": 0.4648784639014738, "learning_rate": 8.18444238875983e-06, "loss": 0.5068, "step": 6168 }, { "epoch": 1.0131176482663766, "grad_norm": 0.3461201369794482, "learning_rate": 8.184173118291282e-06, "loss": 0.4855, "step": 6169 }, { "epoch": 1.0132818754747193, "grad_norm": 0.3150133852802901, "learning_rate": 8.183903807808989e-06, "loss": 0.4996, "step": 6170 }, { "epoch": 1.0134461026830621, "grad_norm": 0.401520522556826, "learning_rate": 8.183634457315875e-06, "loss": 0.4812, "step": 6171 }, { "epoch": 1.0136103298914048, "grad_norm": 0.3923158095446052, "learning_rate": 8.18336506681487e-06, "loss": 0.4672, "step": 6172 }, { "epoch": 1.0137745570997474, "grad_norm": 0.3516625662622814, "learning_rate": 8.183095636308895e-06, "loss": 0.5008, "step": 6173 }, { "epoch": 1.0139387843080903, "grad_norm": 0.3250018573957792, "learning_rate": 8.182826165800877e-06, "loss": 0.5109, "step": 6174 }, { "epoch": 1.014103011516433, "grad_norm": 0.28774650547836594, "learning_rate": 8.182556655293743e-06, "loss": 0.477, "step": 6175 }, { "epoch": 1.0142672387247758, "grad_norm": 0.3476177081030143, "learning_rate": 8.182287104790421e-06, "loss": 0.4912, "step": 6176 }, { "epoch": 1.0144314659331184, "grad_norm": 0.41693100296127283, "learning_rate": 8.182017514293839e-06, "loss": 0.5074, "step": 6177 }, { "epoch": 1.0145956931414613, "grad_norm": 0.3266041736928415, "learning_rate": 8.181747883806924e-06, "loss": 0.4978, "step": 6178 }, { "epoch": 1.014759920349804, "grad_norm": 0.3506095610631528, "learning_rate": 8.181478213332604e-06, "loss": 0.4827, "step": 6179 }, { "epoch": 1.0149241475581467, "grad_norm": 0.2756591884041051, "learning_rate": 8.181208502873809e-06, "loss": 0.4806, "step": 6180 }, { "epoch": 1.0150883747664894, "grad_norm": 0.2498743413864132, "learning_rate": 8.180938752433467e-06, "loss": 0.4937, "step": 6181 }, { "epoch": 1.0152526019748322, "grad_norm": 0.32540974513603127, "learning_rate": 8.180668962014509e-06, "loss": 0.5095, "step": 6182 }, { "epoch": 1.0154168291831749, "grad_norm": 0.29822184379944644, "learning_rate": 8.180399131619865e-06, "loss": 0.4799, "step": 6183 }, { "epoch": 1.0155810563915177, "grad_norm": 0.29922201319644454, "learning_rate": 8.180129261252465e-06, "loss": 0.4823, "step": 6184 }, { "epoch": 1.0157452835998604, "grad_norm": 0.3408181411078289, "learning_rate": 8.17985935091524e-06, "loss": 0.508, "step": 6185 }, { "epoch": 1.0159095108082032, "grad_norm": 0.3017704206443717, "learning_rate": 8.179589400611124e-06, "loss": 0.4762, "step": 6186 }, { "epoch": 1.0160737380165459, "grad_norm": 0.28087313141512144, "learning_rate": 8.179319410343046e-06, "loss": 0.4864, "step": 6187 }, { "epoch": 1.0162379652248887, "grad_norm": 0.343968512634284, "learning_rate": 8.17904938011394e-06, "loss": 0.518, "step": 6188 }, { "epoch": 1.0164021924332314, "grad_norm": 0.35943975526034677, "learning_rate": 8.178779309926736e-06, "loss": 0.4914, "step": 6189 }, { "epoch": 1.016566419641574, "grad_norm": 0.30449402360044453, "learning_rate": 8.17850919978437e-06, "loss": 0.5151, "step": 6190 }, { "epoch": 1.0167306468499169, "grad_norm": 0.4024272798430009, "learning_rate": 8.178239049689776e-06, "loss": 0.5001, "step": 6191 }, { "epoch": 1.0168948740582595, "grad_norm": 0.42775826191503635, "learning_rate": 8.177968859645886e-06, "loss": 0.4927, "step": 6192 }, { "epoch": 1.0170591012666024, "grad_norm": 0.3054641612310703, "learning_rate": 8.177698629655635e-06, "loss": 0.4729, "step": 6193 }, { "epoch": 1.017223328474945, "grad_norm": 0.3662068760964567, "learning_rate": 8.177428359721959e-06, "loss": 0.4779, "step": 6194 }, { "epoch": 1.0173875556832879, "grad_norm": 0.30360510720936423, "learning_rate": 8.177158049847793e-06, "loss": 0.4873, "step": 6195 }, { "epoch": 1.0175517828916305, "grad_norm": 0.3055845582263593, "learning_rate": 8.176887700036074e-06, "loss": 0.5044, "step": 6196 }, { "epoch": 1.0177160100999734, "grad_norm": 0.30314495024705285, "learning_rate": 8.176617310289734e-06, "loss": 0.5077, "step": 6197 }, { "epoch": 1.017880237308316, "grad_norm": 0.27451553538847134, "learning_rate": 8.176346880611716e-06, "loss": 0.4883, "step": 6198 }, { "epoch": 1.0180444645166589, "grad_norm": 0.29349998728599647, "learning_rate": 8.176076411004954e-06, "loss": 0.4968, "step": 6199 }, { "epoch": 1.0182086917250015, "grad_norm": 0.690790330399613, "learning_rate": 8.175805901472382e-06, "loss": 0.5199, "step": 6200 }, { "epoch": 1.0183729189333444, "grad_norm": 0.39758600380845943, "learning_rate": 8.175535352016944e-06, "loss": 0.5017, "step": 6201 }, { "epoch": 1.018537146141687, "grad_norm": 0.30757472221998394, "learning_rate": 8.175264762641575e-06, "loss": 0.4902, "step": 6202 }, { "epoch": 1.0187013733500299, "grad_norm": 0.2635507175010998, "learning_rate": 8.174994133349214e-06, "loss": 0.4966, "step": 6203 }, { "epoch": 1.0188656005583725, "grad_norm": 0.345398957493199, "learning_rate": 8.174723464142802e-06, "loss": 0.4957, "step": 6204 }, { "epoch": 1.0190298277667154, "grad_norm": 0.27142952139291604, "learning_rate": 8.174452755025279e-06, "loss": 0.4826, "step": 6205 }, { "epoch": 1.019194054975058, "grad_norm": 0.2915572983361061, "learning_rate": 8.174182005999583e-06, "loss": 0.4927, "step": 6206 }, { "epoch": 1.0193582821834006, "grad_norm": 0.27599364042760827, "learning_rate": 8.173911217068654e-06, "loss": 0.5096, "step": 6207 }, { "epoch": 1.0195225093917435, "grad_norm": 0.29414455181960725, "learning_rate": 8.173640388235436e-06, "loss": 0.5038, "step": 6208 }, { "epoch": 1.0196867366000861, "grad_norm": 0.342711202223622, "learning_rate": 8.173369519502868e-06, "loss": 0.4912, "step": 6209 }, { "epoch": 1.019850963808429, "grad_norm": 0.2670830751046088, "learning_rate": 8.173098610873893e-06, "loss": 0.4981, "step": 6210 }, { "epoch": 1.0200151910167716, "grad_norm": 0.2999878589401095, "learning_rate": 8.172827662351455e-06, "loss": 0.4759, "step": 6211 }, { "epoch": 1.0201794182251145, "grad_norm": 0.31873822598064333, "learning_rate": 8.172556673938493e-06, "loss": 0.4731, "step": 6212 }, { "epoch": 1.0203436454334571, "grad_norm": 0.2742035415040891, "learning_rate": 8.172285645637952e-06, "loss": 0.4674, "step": 6213 }, { "epoch": 1.0205078726418, "grad_norm": 0.31574870554300666, "learning_rate": 8.172014577452778e-06, "loss": 0.4775, "step": 6214 }, { "epoch": 1.0206720998501426, "grad_norm": 0.31740606222826007, "learning_rate": 8.17174346938591e-06, "loss": 0.5123, "step": 6215 }, { "epoch": 1.0208363270584855, "grad_norm": 0.27353324703910803, "learning_rate": 8.171472321440297e-06, "loss": 0.4837, "step": 6216 }, { "epoch": 1.0210005542668281, "grad_norm": 0.2944184923332211, "learning_rate": 8.171201133618883e-06, "loss": 0.4803, "step": 6217 }, { "epoch": 1.021164781475171, "grad_norm": 0.3013537117744429, "learning_rate": 8.170929905924613e-06, "loss": 0.4995, "step": 6218 }, { "epoch": 1.0213290086835136, "grad_norm": 0.44433352396382897, "learning_rate": 8.17065863836043e-06, "loss": 0.4857, "step": 6219 }, { "epoch": 1.0214932358918565, "grad_norm": 0.363216757817997, "learning_rate": 8.170387330929286e-06, "loss": 0.4831, "step": 6220 }, { "epoch": 1.0216574631001991, "grad_norm": 0.2788261710336551, "learning_rate": 8.170115983634123e-06, "loss": 0.484, "step": 6221 }, { "epoch": 1.021821690308542, "grad_norm": 0.31049045914006534, "learning_rate": 8.169844596477889e-06, "loss": 0.4982, "step": 6222 }, { "epoch": 1.0219859175168846, "grad_norm": 0.31160712706194205, "learning_rate": 8.169573169463534e-06, "loss": 0.5045, "step": 6223 }, { "epoch": 1.0221501447252272, "grad_norm": 0.26787311763186605, "learning_rate": 8.169301702594002e-06, "loss": 0.4912, "step": 6224 }, { "epoch": 1.02231437193357, "grad_norm": 0.28728606908000814, "learning_rate": 8.169030195872242e-06, "loss": 0.5129, "step": 6225 }, { "epoch": 1.0224785991419127, "grad_norm": 0.4256925386572884, "learning_rate": 8.168758649301207e-06, "loss": 0.4969, "step": 6226 }, { "epoch": 1.0226428263502556, "grad_norm": 0.2743511180790035, "learning_rate": 8.168487062883842e-06, "loss": 0.4909, "step": 6227 }, { "epoch": 1.0228070535585982, "grad_norm": 0.3205816659724719, "learning_rate": 8.168215436623099e-06, "loss": 0.5116, "step": 6228 }, { "epoch": 1.022971280766941, "grad_norm": 0.2797160146405526, "learning_rate": 8.167943770521928e-06, "loss": 0.4889, "step": 6229 }, { "epoch": 1.0231355079752837, "grad_norm": 0.3298401016695871, "learning_rate": 8.167672064583277e-06, "loss": 0.4948, "step": 6230 }, { "epoch": 1.0232997351836266, "grad_norm": 0.2820165824015003, "learning_rate": 8.1674003188101e-06, "loss": 0.4816, "step": 6231 }, { "epoch": 1.0234639623919692, "grad_norm": 0.7516619960945273, "learning_rate": 8.167128533205348e-06, "loss": 0.4978, "step": 6232 }, { "epoch": 1.023628189600312, "grad_norm": 0.3242252762838541, "learning_rate": 8.16685670777197e-06, "loss": 0.4701, "step": 6233 }, { "epoch": 1.0237924168086547, "grad_norm": 0.28078186626292395, "learning_rate": 8.166584842512922e-06, "loss": 0.5035, "step": 6234 }, { "epoch": 1.0239566440169976, "grad_norm": 0.2892632624945348, "learning_rate": 8.166312937431154e-06, "loss": 0.482, "step": 6235 }, { "epoch": 1.0241208712253402, "grad_norm": 0.30068090471091746, "learning_rate": 8.166040992529623e-06, "loss": 0.4935, "step": 6236 }, { "epoch": 1.024285098433683, "grad_norm": 0.30986046486983354, "learning_rate": 8.165769007811278e-06, "loss": 0.503, "step": 6237 }, { "epoch": 1.0244493256420257, "grad_norm": 0.2652243823608064, "learning_rate": 8.165496983279075e-06, "loss": 0.4862, "step": 6238 }, { "epoch": 1.0246135528503686, "grad_norm": 0.38129905728032715, "learning_rate": 8.165224918935968e-06, "loss": 0.5033, "step": 6239 }, { "epoch": 1.0247777800587112, "grad_norm": 0.3330210906119507, "learning_rate": 8.164952814784913e-06, "loss": 0.4925, "step": 6240 }, { "epoch": 1.0249420072670539, "grad_norm": 0.6285421145833283, "learning_rate": 8.164680670828864e-06, "loss": 0.4926, "step": 6241 }, { "epoch": 1.0251062344753967, "grad_norm": 0.28295504725253984, "learning_rate": 8.164408487070778e-06, "loss": 0.4815, "step": 6242 }, { "epoch": 1.0252704616837394, "grad_norm": 0.34828429823959156, "learning_rate": 8.164136263513609e-06, "loss": 0.4895, "step": 6243 }, { "epoch": 1.0254346888920822, "grad_norm": 0.33668507831640215, "learning_rate": 8.163864000160318e-06, "loss": 0.5016, "step": 6244 }, { "epoch": 1.0255989161004249, "grad_norm": 0.3371227103810928, "learning_rate": 8.163591697013857e-06, "loss": 0.4818, "step": 6245 }, { "epoch": 1.0257631433087677, "grad_norm": 0.37006258498862343, "learning_rate": 8.163319354077188e-06, "loss": 0.4906, "step": 6246 }, { "epoch": 1.0259273705171104, "grad_norm": 0.3321561343583107, "learning_rate": 8.163046971353263e-06, "loss": 0.4972, "step": 6247 }, { "epoch": 1.0260915977254532, "grad_norm": 0.33758474607503053, "learning_rate": 8.162774548845047e-06, "loss": 0.4886, "step": 6248 }, { "epoch": 1.0262558249337959, "grad_norm": 0.5719844368152448, "learning_rate": 8.162502086555494e-06, "loss": 0.4776, "step": 6249 }, { "epoch": 1.0264200521421387, "grad_norm": 0.3230093959766538, "learning_rate": 8.162229584487566e-06, "loss": 0.4895, "step": 6250 }, { "epoch": 1.0265842793504814, "grad_norm": 0.27381092738906676, "learning_rate": 8.16195704264422e-06, "loss": 0.4846, "step": 6251 }, { "epoch": 1.0267485065588242, "grad_norm": 0.3576355296217209, "learning_rate": 8.16168446102842e-06, "loss": 0.5213, "step": 6252 }, { "epoch": 1.0269127337671669, "grad_norm": 0.5889585564078031, "learning_rate": 8.161411839643121e-06, "loss": 0.5044, "step": 6253 }, { "epoch": 1.0270769609755097, "grad_norm": 0.2979480569514252, "learning_rate": 8.161139178491291e-06, "loss": 0.4715, "step": 6254 }, { "epoch": 1.0272411881838523, "grad_norm": 0.2771883322556505, "learning_rate": 8.160866477575885e-06, "loss": 0.4982, "step": 6255 }, { "epoch": 1.0274054153921952, "grad_norm": 0.27340407742016054, "learning_rate": 8.160593736899869e-06, "loss": 0.494, "step": 6256 }, { "epoch": 1.0275696426005378, "grad_norm": 0.3568661465565456, "learning_rate": 8.1603209564662e-06, "loss": 0.5152, "step": 6257 }, { "epoch": 1.0277338698088805, "grad_norm": 0.27665099872106325, "learning_rate": 8.160048136277846e-06, "loss": 0.5109, "step": 6258 }, { "epoch": 1.0278980970172233, "grad_norm": 0.31119032990878753, "learning_rate": 8.15977527633777e-06, "loss": 0.5023, "step": 6259 }, { "epoch": 1.028062324225566, "grad_norm": 0.316524222391938, "learning_rate": 8.159502376648932e-06, "loss": 0.4826, "step": 6260 }, { "epoch": 1.0282265514339088, "grad_norm": 0.3633517365497554, "learning_rate": 8.159229437214298e-06, "loss": 0.4991, "step": 6261 }, { "epoch": 1.0283907786422515, "grad_norm": 0.2734262319354803, "learning_rate": 8.158956458036833e-06, "loss": 0.4845, "step": 6262 }, { "epoch": 1.0285550058505943, "grad_norm": 0.3625579511734066, "learning_rate": 8.158683439119499e-06, "loss": 0.4943, "step": 6263 }, { "epoch": 1.028719233058937, "grad_norm": 0.4547336209639623, "learning_rate": 8.158410380465264e-06, "loss": 0.4927, "step": 6264 }, { "epoch": 1.0288834602672798, "grad_norm": 0.2978449984905939, "learning_rate": 8.158137282077095e-06, "loss": 0.5134, "step": 6265 }, { "epoch": 1.0290476874756225, "grad_norm": 0.3792401828910724, "learning_rate": 8.157864143957952e-06, "loss": 0.4817, "step": 6266 }, { "epoch": 1.0292119146839653, "grad_norm": 0.2943182894634262, "learning_rate": 8.157590966110808e-06, "loss": 0.4708, "step": 6267 }, { "epoch": 1.029376141892308, "grad_norm": 0.29181766258028285, "learning_rate": 8.157317748538628e-06, "loss": 0.499, "step": 6268 }, { "epoch": 1.0295403691006508, "grad_norm": 0.35172680386374233, "learning_rate": 8.157044491244378e-06, "loss": 0.488, "step": 6269 }, { "epoch": 1.0297045963089935, "grad_norm": 0.4369680201670247, "learning_rate": 8.156771194231026e-06, "loss": 0.4881, "step": 6270 }, { "epoch": 1.0298688235173363, "grad_norm": 0.32279653310986156, "learning_rate": 8.156497857501543e-06, "loss": 0.511, "step": 6271 }, { "epoch": 1.030033050725679, "grad_norm": 0.29216682293506124, "learning_rate": 8.156224481058893e-06, "loss": 0.5069, "step": 6272 }, { "epoch": 1.0301972779340218, "grad_norm": 0.3398346029618168, "learning_rate": 8.155951064906052e-06, "loss": 0.5047, "step": 6273 }, { "epoch": 1.0303615051423645, "grad_norm": 0.2996315228865986, "learning_rate": 8.155677609045982e-06, "loss": 0.4937, "step": 6274 }, { "epoch": 1.030525732350707, "grad_norm": 0.36267652125254585, "learning_rate": 8.155404113481658e-06, "loss": 0.5147, "step": 6275 }, { "epoch": 1.03068995955905, "grad_norm": 0.3672582502381371, "learning_rate": 8.155130578216048e-06, "loss": 0.5099, "step": 6276 }, { "epoch": 1.0308541867673926, "grad_norm": 0.30163867575416514, "learning_rate": 8.154857003252125e-06, "loss": 0.5172, "step": 6277 }, { "epoch": 1.0310184139757355, "grad_norm": 0.3176725462606026, "learning_rate": 8.154583388592858e-06, "loss": 0.5058, "step": 6278 }, { "epoch": 1.031182641184078, "grad_norm": 0.28021080159548745, "learning_rate": 8.154309734241219e-06, "loss": 0.4831, "step": 6279 }, { "epoch": 1.031346868392421, "grad_norm": 0.3046852140004648, "learning_rate": 8.154036040200182e-06, "loss": 0.4731, "step": 6280 }, { "epoch": 1.0315110956007636, "grad_norm": 0.34338511193132853, "learning_rate": 8.153762306472718e-06, "loss": 0.4932, "step": 6281 }, { "epoch": 1.0316753228091065, "grad_norm": 0.33288344314950746, "learning_rate": 8.153488533061803e-06, "loss": 0.4729, "step": 6282 }, { "epoch": 1.031839550017449, "grad_norm": 0.28550101428400565, "learning_rate": 8.153214719970404e-06, "loss": 0.5082, "step": 6283 }, { "epoch": 1.032003777225792, "grad_norm": 0.3258304073804269, "learning_rate": 8.152940867201502e-06, "loss": 0.4795, "step": 6284 }, { "epoch": 1.0321680044341346, "grad_norm": 0.2961807049535856, "learning_rate": 8.152666974758068e-06, "loss": 0.4791, "step": 6285 }, { "epoch": 1.0323322316424774, "grad_norm": 0.3435258210556305, "learning_rate": 8.152393042643075e-06, "loss": 0.5016, "step": 6286 }, { "epoch": 1.03249645885082, "grad_norm": 0.3171183540701704, "learning_rate": 8.1521190708595e-06, "loss": 0.5012, "step": 6287 }, { "epoch": 1.032660686059163, "grad_norm": 0.3163255037337862, "learning_rate": 8.15184505941032e-06, "loss": 0.4734, "step": 6288 }, { "epoch": 1.0328249132675056, "grad_norm": 0.28360770056732126, "learning_rate": 8.15157100829851e-06, "loss": 0.5327, "step": 6289 }, { "epoch": 1.0329891404758484, "grad_norm": 0.3295009314736953, "learning_rate": 8.151296917527048e-06, "loss": 0.4601, "step": 6290 }, { "epoch": 1.033153367684191, "grad_norm": 0.3109452542130047, "learning_rate": 8.151022787098904e-06, "loss": 0.4886, "step": 6291 }, { "epoch": 1.0333175948925337, "grad_norm": 0.3422775777883866, "learning_rate": 8.150748617017064e-06, "loss": 0.4804, "step": 6292 }, { "epoch": 1.0334818221008766, "grad_norm": 0.32723287713230975, "learning_rate": 8.150474407284502e-06, "loss": 0.5031, "step": 6293 }, { "epoch": 1.0336460493092192, "grad_norm": 0.3649000496586239, "learning_rate": 8.150200157904194e-06, "loss": 0.4876, "step": 6294 }, { "epoch": 1.033810276517562, "grad_norm": 0.2602417938122742, "learning_rate": 8.149925868879123e-06, "loss": 0.4804, "step": 6295 }, { "epoch": 1.0339745037259047, "grad_norm": 0.2759579393389584, "learning_rate": 8.149651540212267e-06, "loss": 0.4981, "step": 6296 }, { "epoch": 1.0341387309342476, "grad_norm": 0.2666833973790393, "learning_rate": 8.149377171906601e-06, "loss": 0.4779, "step": 6297 }, { "epoch": 1.0343029581425902, "grad_norm": 0.42156453734766997, "learning_rate": 8.149102763965112e-06, "loss": 0.5211, "step": 6298 }, { "epoch": 1.034467185350933, "grad_norm": 0.2823997950925321, "learning_rate": 8.148828316390776e-06, "loss": 0.496, "step": 6299 }, { "epoch": 1.0346314125592757, "grad_norm": 0.34982207290752043, "learning_rate": 8.148553829186573e-06, "loss": 0.4689, "step": 6300 }, { "epoch": 1.0347956397676186, "grad_norm": 0.391208131768673, "learning_rate": 8.148279302355487e-06, "loss": 0.4973, "step": 6301 }, { "epoch": 1.0349598669759612, "grad_norm": 0.28742279855847286, "learning_rate": 8.148004735900498e-06, "loss": 0.511, "step": 6302 }, { "epoch": 1.035124094184304, "grad_norm": 0.30432930318759355, "learning_rate": 8.147730129824588e-06, "loss": 0.4845, "step": 6303 }, { "epoch": 1.0352883213926467, "grad_norm": 0.2696241666956859, "learning_rate": 8.14745548413074e-06, "loss": 0.4746, "step": 6304 }, { "epoch": 1.0354525486009896, "grad_norm": 0.3517664508179872, "learning_rate": 8.147180798821937e-06, "loss": 0.5045, "step": 6305 }, { "epoch": 1.0356167758093322, "grad_norm": 0.4605530445297964, "learning_rate": 8.146906073901163e-06, "loss": 0.4981, "step": 6306 }, { "epoch": 1.035781003017675, "grad_norm": 0.6338064223676042, "learning_rate": 8.1466313093714e-06, "loss": 0.4754, "step": 6307 }, { "epoch": 1.0359452302260177, "grad_norm": 0.30453246058597266, "learning_rate": 8.146356505235634e-06, "loss": 0.484, "step": 6308 }, { "epoch": 1.0361094574343603, "grad_norm": 0.3257172020653211, "learning_rate": 8.146081661496848e-06, "loss": 0.4794, "step": 6309 }, { "epoch": 1.0362736846427032, "grad_norm": 0.34488299780916387, "learning_rate": 8.145806778158027e-06, "loss": 0.4687, "step": 6310 }, { "epoch": 1.0364379118510458, "grad_norm": 0.3213369359566095, "learning_rate": 8.14553185522216e-06, "loss": 0.4761, "step": 6311 }, { "epoch": 1.0366021390593887, "grad_norm": 0.32874328293494665, "learning_rate": 8.145256892692229e-06, "loss": 0.5035, "step": 6312 }, { "epoch": 1.0367663662677313, "grad_norm": 0.2970117333301102, "learning_rate": 8.14498189057122e-06, "loss": 0.4938, "step": 6313 }, { "epoch": 1.0369305934760742, "grad_norm": 0.38096806888632473, "learning_rate": 8.144706848862123e-06, "loss": 0.4961, "step": 6314 }, { "epoch": 1.0370948206844168, "grad_norm": 0.28450513054266885, "learning_rate": 8.144431767567925e-06, "loss": 0.4956, "step": 6315 }, { "epoch": 1.0372590478927597, "grad_norm": 0.33008924689826424, "learning_rate": 8.14415664669161e-06, "loss": 0.4889, "step": 6316 }, { "epoch": 1.0374232751011023, "grad_norm": 0.31032871022062886, "learning_rate": 8.14388148623617e-06, "loss": 0.4774, "step": 6317 }, { "epoch": 1.0375875023094452, "grad_norm": 0.3226996695818067, "learning_rate": 8.143606286204592e-06, "loss": 0.4722, "step": 6318 }, { "epoch": 1.0377517295177878, "grad_norm": 0.28015504286106274, "learning_rate": 8.143331046599863e-06, "loss": 0.4974, "step": 6319 }, { "epoch": 1.0379159567261307, "grad_norm": 0.3216228193671261, "learning_rate": 8.143055767424978e-06, "loss": 0.5035, "step": 6320 }, { "epoch": 1.0380801839344733, "grad_norm": 0.32400022353714, "learning_rate": 8.14278044868292e-06, "loss": 0.4915, "step": 6321 }, { "epoch": 1.0382444111428162, "grad_norm": 0.3889307559896388, "learning_rate": 8.142505090376683e-06, "loss": 0.5154, "step": 6322 }, { "epoch": 1.0384086383511588, "grad_norm": 0.31872095687613694, "learning_rate": 8.142229692509258e-06, "loss": 0.4746, "step": 6323 }, { "epoch": 1.0385728655595017, "grad_norm": 0.3825206261235205, "learning_rate": 8.141954255083633e-06, "loss": 0.5269, "step": 6324 }, { "epoch": 1.0387370927678443, "grad_norm": 0.3205615672991652, "learning_rate": 8.141678778102804e-06, "loss": 0.4983, "step": 6325 }, { "epoch": 1.038901319976187, "grad_norm": 0.4040202032633061, "learning_rate": 8.141403261569759e-06, "loss": 0.4822, "step": 6326 }, { "epoch": 1.0390655471845298, "grad_norm": 0.37001803297905866, "learning_rate": 8.141127705487492e-06, "loss": 0.4972, "step": 6327 }, { "epoch": 1.0392297743928725, "grad_norm": 0.33923188659094977, "learning_rate": 8.140852109858997e-06, "loss": 0.511, "step": 6328 }, { "epoch": 1.0393940016012153, "grad_norm": 0.29837615842553383, "learning_rate": 8.140576474687264e-06, "loss": 0.4919, "step": 6329 }, { "epoch": 1.039558228809558, "grad_norm": 0.2711498146698936, "learning_rate": 8.140300799975289e-06, "loss": 0.4858, "step": 6330 }, { "epoch": 1.0397224560179008, "grad_norm": 0.323007121971415, "learning_rate": 8.140025085726067e-06, "loss": 0.5001, "step": 6331 }, { "epoch": 1.0398866832262434, "grad_norm": 0.4009112749843193, "learning_rate": 8.139749331942591e-06, "loss": 0.4872, "step": 6332 }, { "epoch": 1.0400509104345863, "grad_norm": 0.26720307728953957, "learning_rate": 8.139473538627855e-06, "loss": 0.4931, "step": 6333 }, { "epoch": 1.040215137642929, "grad_norm": 0.29601146133600026, "learning_rate": 8.139197705784857e-06, "loss": 0.4857, "step": 6334 }, { "epoch": 1.0403793648512718, "grad_norm": 0.3176755739657561, "learning_rate": 8.13892183341659e-06, "loss": 0.4981, "step": 6335 }, { "epoch": 1.0405435920596144, "grad_norm": 0.31923052268403973, "learning_rate": 8.138645921526053e-06, "loss": 0.4985, "step": 6336 }, { "epoch": 1.0407078192679573, "grad_norm": 0.27262402169127714, "learning_rate": 8.138369970116242e-06, "loss": 0.4889, "step": 6337 }, { "epoch": 1.0408720464763, "grad_norm": 0.2981051983207571, "learning_rate": 8.13809397919015e-06, "loss": 0.4818, "step": 6338 }, { "epoch": 1.0410362736846428, "grad_norm": 0.26663296072728127, "learning_rate": 8.137817948750781e-06, "loss": 0.4983, "step": 6339 }, { "epoch": 1.0412005008929854, "grad_norm": 0.3322672618709718, "learning_rate": 8.13754187880113e-06, "loss": 0.4972, "step": 6340 }, { "epoch": 1.0413647281013283, "grad_norm": 0.3164200711690184, "learning_rate": 8.137265769344193e-06, "loss": 0.4834, "step": 6341 }, { "epoch": 1.041528955309671, "grad_norm": 0.29570874323950064, "learning_rate": 8.136989620382973e-06, "loss": 0.4938, "step": 6342 }, { "epoch": 1.0416931825180136, "grad_norm": 0.3395825507914, "learning_rate": 8.136713431920469e-06, "loss": 0.4713, "step": 6343 }, { "epoch": 1.0418574097263564, "grad_norm": 0.3596792089972651, "learning_rate": 8.136437203959677e-06, "loss": 0.4718, "step": 6344 }, { "epoch": 1.042021636934699, "grad_norm": 0.2913957363979316, "learning_rate": 8.1361609365036e-06, "loss": 0.49, "step": 6345 }, { "epoch": 1.042185864143042, "grad_norm": 0.30153632831779337, "learning_rate": 8.135884629555236e-06, "loss": 0.4872, "step": 6346 }, { "epoch": 1.0423500913513846, "grad_norm": 0.3027949712850462, "learning_rate": 8.135608283117589e-06, "loss": 0.5091, "step": 6347 }, { "epoch": 1.0425143185597274, "grad_norm": 0.3667060182197534, "learning_rate": 8.135331897193659e-06, "loss": 0.5129, "step": 6348 }, { "epoch": 1.04267854576807, "grad_norm": 0.34954670908517377, "learning_rate": 8.135055471786448e-06, "loss": 0.4907, "step": 6349 }, { "epoch": 1.042842772976413, "grad_norm": 0.5746944717000683, "learning_rate": 8.134779006898958e-06, "loss": 0.5251, "step": 6350 }, { "epoch": 1.0430070001847556, "grad_norm": 0.43380942060730043, "learning_rate": 8.134502502534192e-06, "loss": 0.4859, "step": 6351 }, { "epoch": 1.0431712273930984, "grad_norm": 0.3356139791441698, "learning_rate": 8.134225958695153e-06, "loss": 0.4771, "step": 6352 }, { "epoch": 1.043335454601441, "grad_norm": 0.3385240491576104, "learning_rate": 8.133949375384844e-06, "loss": 0.4863, "step": 6353 }, { "epoch": 1.043499681809784, "grad_norm": 0.3291980594155157, "learning_rate": 8.13367275260627e-06, "loss": 0.5003, "step": 6354 }, { "epoch": 1.0436639090181266, "grad_norm": 0.3383124118965275, "learning_rate": 8.133396090362435e-06, "loss": 0.4945, "step": 6355 }, { "epoch": 1.0438281362264694, "grad_norm": 0.5204865829726072, "learning_rate": 8.133119388656344e-06, "loss": 0.4808, "step": 6356 }, { "epoch": 1.043992363434812, "grad_norm": 0.2809584481919922, "learning_rate": 8.132842647491002e-06, "loss": 0.4964, "step": 6357 }, { "epoch": 1.044156590643155, "grad_norm": 0.3785537153231929, "learning_rate": 8.132565866869414e-06, "loss": 0.4898, "step": 6358 }, { "epoch": 1.0443208178514976, "grad_norm": 0.2990071029129028, "learning_rate": 8.132289046794584e-06, "loss": 0.4954, "step": 6359 }, { "epoch": 1.0444850450598402, "grad_norm": 0.38818656992221234, "learning_rate": 8.132012187269526e-06, "loss": 0.5099, "step": 6360 }, { "epoch": 1.044649272268183, "grad_norm": 0.3580751562409733, "learning_rate": 8.13173528829724e-06, "loss": 0.4925, "step": 6361 }, { "epoch": 1.0448134994765257, "grad_norm": 0.4102291661116885, "learning_rate": 8.131458349880735e-06, "loss": 0.4965, "step": 6362 }, { "epoch": 1.0449777266848685, "grad_norm": 0.3294150987730053, "learning_rate": 8.13118137202302e-06, "loss": 0.5011, "step": 6363 }, { "epoch": 1.0451419538932112, "grad_norm": 0.31692405906232507, "learning_rate": 8.130904354727103e-06, "loss": 0.4875, "step": 6364 }, { "epoch": 1.045306181101554, "grad_norm": 0.2950992571838892, "learning_rate": 8.130627297995991e-06, "loss": 0.5003, "step": 6365 }, { "epoch": 1.0454704083098967, "grad_norm": 0.45984806915384435, "learning_rate": 8.130350201832697e-06, "loss": 0.4856, "step": 6366 }, { "epoch": 1.0456346355182395, "grad_norm": 0.4105295951420729, "learning_rate": 8.130073066240226e-06, "loss": 0.5028, "step": 6367 }, { "epoch": 1.0457988627265822, "grad_norm": 0.36942653174057527, "learning_rate": 8.129795891221592e-06, "loss": 0.4869, "step": 6368 }, { "epoch": 1.045963089934925, "grad_norm": 0.34164681035447897, "learning_rate": 8.1295186767798e-06, "loss": 0.501, "step": 6369 }, { "epoch": 1.0461273171432677, "grad_norm": 0.6678599138574988, "learning_rate": 8.12924142291787e-06, "loss": 0.4662, "step": 6370 }, { "epoch": 1.0462915443516105, "grad_norm": 0.5031081494107003, "learning_rate": 8.128964129638802e-06, "loss": 0.4945, "step": 6371 }, { "epoch": 1.0464557715599532, "grad_norm": 0.2899303698213951, "learning_rate": 8.128686796945615e-06, "loss": 0.4913, "step": 6372 }, { "epoch": 1.046619998768296, "grad_norm": 0.3061299471747233, "learning_rate": 8.128409424841319e-06, "loss": 0.5124, "step": 6373 }, { "epoch": 1.0467842259766387, "grad_norm": 0.3186445589777642, "learning_rate": 8.128132013328928e-06, "loss": 0.5102, "step": 6374 }, { "epoch": 1.0469484531849815, "grad_norm": 0.30979226756641537, "learning_rate": 8.127854562411452e-06, "loss": 0.4907, "step": 6375 }, { "epoch": 1.0471126803933242, "grad_norm": 0.35666387304376157, "learning_rate": 8.127577072091906e-06, "loss": 0.4862, "step": 6376 }, { "epoch": 1.0472769076016668, "grad_norm": 0.33999127955278113, "learning_rate": 8.127299542373306e-06, "loss": 0.5005, "step": 6377 }, { "epoch": 1.0474411348100097, "grad_norm": 0.32517006313544056, "learning_rate": 8.127021973258664e-06, "loss": 0.5225, "step": 6378 }, { "epoch": 1.0476053620183523, "grad_norm": 0.30386677497274767, "learning_rate": 8.126744364750991e-06, "loss": 0.5187, "step": 6379 }, { "epoch": 1.0477695892266952, "grad_norm": 0.3028445243411251, "learning_rate": 8.12646671685331e-06, "loss": 0.4911, "step": 6380 }, { "epoch": 1.0479338164350378, "grad_norm": 0.3348978408227218, "learning_rate": 8.126189029568631e-06, "loss": 0.5013, "step": 6381 }, { "epoch": 1.0480980436433807, "grad_norm": 0.31603848477950536, "learning_rate": 8.125911302899973e-06, "loss": 0.4846, "step": 6382 }, { "epoch": 1.0482622708517233, "grad_norm": 0.2986142070302935, "learning_rate": 8.125633536850349e-06, "loss": 0.4859, "step": 6383 }, { "epoch": 1.0484264980600662, "grad_norm": 0.267336828835043, "learning_rate": 8.125355731422778e-06, "loss": 0.5089, "step": 6384 }, { "epoch": 1.0485907252684088, "grad_norm": 0.26584039989022573, "learning_rate": 8.125077886620277e-06, "loss": 0.4855, "step": 6385 }, { "epoch": 1.0487549524767517, "grad_norm": 0.2942213330070643, "learning_rate": 8.124800002445864e-06, "loss": 0.4914, "step": 6386 }, { "epoch": 1.0489191796850943, "grad_norm": 0.34842284141271185, "learning_rate": 8.124522078902556e-06, "loss": 0.4798, "step": 6387 }, { "epoch": 1.0490834068934372, "grad_norm": 0.28931993160219377, "learning_rate": 8.124244115993372e-06, "loss": 0.4917, "step": 6388 }, { "epoch": 1.0492476341017798, "grad_norm": 0.2914300210726281, "learning_rate": 8.123966113721331e-06, "loss": 0.4918, "step": 6389 }, { "epoch": 1.0494118613101227, "grad_norm": 0.3163077994522434, "learning_rate": 8.123688072089455e-06, "loss": 0.5009, "step": 6390 }, { "epoch": 1.0495760885184653, "grad_norm": 0.2867811327270028, "learning_rate": 8.123409991100758e-06, "loss": 0.4716, "step": 6391 }, { "epoch": 1.0497403157268081, "grad_norm": 0.30351396153490035, "learning_rate": 8.123131870758266e-06, "loss": 0.5001, "step": 6392 }, { "epoch": 1.0499045429351508, "grad_norm": 0.26456114320785307, "learning_rate": 8.122853711064997e-06, "loss": 0.5115, "step": 6393 }, { "epoch": 1.0500687701434934, "grad_norm": 0.3490191334110551, "learning_rate": 8.12257551202397e-06, "loss": 0.5033, "step": 6394 }, { "epoch": 1.0502329973518363, "grad_norm": 0.32342546862186944, "learning_rate": 8.122297273638212e-06, "loss": 0.4858, "step": 6395 }, { "epoch": 1.050397224560179, "grad_norm": 0.32261052856924055, "learning_rate": 8.122018995910738e-06, "loss": 0.4902, "step": 6396 }, { "epoch": 1.0505614517685218, "grad_norm": 0.32048637806924013, "learning_rate": 8.121740678844576e-06, "loss": 0.4695, "step": 6397 }, { "epoch": 1.0507256789768644, "grad_norm": 0.3235467650978821, "learning_rate": 8.121462322442749e-06, "loss": 0.5074, "step": 6398 }, { "epoch": 1.0508899061852073, "grad_norm": 0.3222995364524542, "learning_rate": 8.121183926708274e-06, "loss": 0.5017, "step": 6399 }, { "epoch": 1.05105413339355, "grad_norm": 0.3099918502023668, "learning_rate": 8.120905491644181e-06, "loss": 0.4903, "step": 6400 }, { "epoch": 1.0512183606018928, "grad_norm": 0.2991011827463223, "learning_rate": 8.120627017253492e-06, "loss": 0.4863, "step": 6401 }, { "epoch": 1.0513825878102354, "grad_norm": 0.3032089180759681, "learning_rate": 8.12034850353923e-06, "loss": 0.5018, "step": 6402 }, { "epoch": 1.0515468150185783, "grad_norm": 0.30778202378615055, "learning_rate": 8.120069950504426e-06, "loss": 0.4971, "step": 6403 }, { "epoch": 1.051711042226921, "grad_norm": 0.33858123870495077, "learning_rate": 8.119791358152097e-06, "loss": 0.5004, "step": 6404 }, { "epoch": 1.0518752694352638, "grad_norm": 0.31106722488997146, "learning_rate": 8.119512726485272e-06, "loss": 0.4889, "step": 6405 }, { "epoch": 1.0520394966436064, "grad_norm": 0.28103243070829426, "learning_rate": 8.119234055506979e-06, "loss": 0.4964, "step": 6406 }, { "epoch": 1.0522037238519493, "grad_norm": 0.30513334246236656, "learning_rate": 8.118955345220243e-06, "loss": 0.5038, "step": 6407 }, { "epoch": 1.052367951060292, "grad_norm": 0.2703130936598874, "learning_rate": 8.11867659562809e-06, "loss": 0.4753, "step": 6408 }, { "epoch": 1.0525321782686348, "grad_norm": 0.34695632434730056, "learning_rate": 8.118397806733549e-06, "loss": 0.4753, "step": 6409 }, { "epoch": 1.0526964054769774, "grad_norm": 0.35036872667227215, "learning_rate": 8.11811897853965e-06, "loss": 0.4835, "step": 6410 }, { "epoch": 1.05286063268532, "grad_norm": 0.2752474487062583, "learning_rate": 8.117840111049418e-06, "loss": 0.4795, "step": 6411 }, { "epoch": 1.053024859893663, "grad_norm": 0.3954558426656484, "learning_rate": 8.117561204265881e-06, "loss": 0.4736, "step": 6412 }, { "epoch": 1.0531890871020055, "grad_norm": 0.3024301299880532, "learning_rate": 8.117282258192073e-06, "loss": 0.4901, "step": 6413 }, { "epoch": 1.0533533143103484, "grad_norm": 0.36585136371914173, "learning_rate": 8.117003272831018e-06, "loss": 0.4861, "step": 6414 }, { "epoch": 1.053517541518691, "grad_norm": 0.30042123148773786, "learning_rate": 8.116724248185751e-06, "loss": 0.5021, "step": 6415 }, { "epoch": 1.053681768727034, "grad_norm": 0.3273045503653264, "learning_rate": 8.1164451842593e-06, "loss": 0.5005, "step": 6416 }, { "epoch": 1.0538459959353765, "grad_norm": 0.3148844502941428, "learning_rate": 8.116166081054698e-06, "loss": 0.4799, "step": 6417 }, { "epoch": 1.0540102231437194, "grad_norm": 0.30624673877227687, "learning_rate": 8.11588693857497e-06, "loss": 0.5072, "step": 6418 }, { "epoch": 1.054174450352062, "grad_norm": 0.3166765249654235, "learning_rate": 8.115607756823156e-06, "loss": 0.4966, "step": 6419 }, { "epoch": 1.054338677560405, "grad_norm": 0.2659242128969254, "learning_rate": 8.115328535802283e-06, "loss": 0.488, "step": 6420 }, { "epoch": 1.0545029047687475, "grad_norm": 0.25926394594099467, "learning_rate": 8.115049275515386e-06, "loss": 0.4734, "step": 6421 }, { "epoch": 1.0546671319770904, "grad_norm": 0.36846139712139503, "learning_rate": 8.114769975965496e-06, "loss": 0.52, "step": 6422 }, { "epoch": 1.054831359185433, "grad_norm": 0.28318609317997545, "learning_rate": 8.114490637155648e-06, "loss": 0.4973, "step": 6423 }, { "epoch": 1.0549955863937759, "grad_norm": 0.3365623874313242, "learning_rate": 8.114211259088875e-06, "loss": 0.4806, "step": 6424 }, { "epoch": 1.0551598136021185, "grad_norm": 0.37523450137919945, "learning_rate": 8.113931841768212e-06, "loss": 0.4829, "step": 6425 }, { "epoch": 1.0553240408104614, "grad_norm": 0.29403851701146794, "learning_rate": 8.113652385196695e-06, "loss": 0.4704, "step": 6426 }, { "epoch": 1.055488268018804, "grad_norm": 0.2945693218797296, "learning_rate": 8.113372889377357e-06, "loss": 0.4648, "step": 6427 }, { "epoch": 1.0556524952271467, "grad_norm": 0.3418559064297287, "learning_rate": 8.113093354313234e-06, "loss": 0.498, "step": 6428 }, { "epoch": 1.0558167224354895, "grad_norm": 0.3366328880212634, "learning_rate": 8.112813780007362e-06, "loss": 0.4891, "step": 6429 }, { "epoch": 1.0559809496438322, "grad_norm": 0.3358091705998052, "learning_rate": 8.112534166462778e-06, "loss": 0.4675, "step": 6430 }, { "epoch": 1.056145176852175, "grad_norm": 0.31678677491645746, "learning_rate": 8.112254513682519e-06, "loss": 0.4944, "step": 6431 }, { "epoch": 1.0563094040605177, "grad_norm": 0.2648603428506825, "learning_rate": 8.111974821669623e-06, "loss": 0.4974, "step": 6432 }, { "epoch": 1.0564736312688605, "grad_norm": 0.2982763427295238, "learning_rate": 8.111695090427125e-06, "loss": 0.4994, "step": 6433 }, { "epoch": 1.0566378584772032, "grad_norm": 0.3244793940093539, "learning_rate": 8.111415319958066e-06, "loss": 0.505, "step": 6434 }, { "epoch": 1.056802085685546, "grad_norm": 0.33260643099417697, "learning_rate": 8.111135510265483e-06, "loss": 0.5006, "step": 6435 }, { "epoch": 1.0569663128938886, "grad_norm": 0.2925676158677974, "learning_rate": 8.110855661352416e-06, "loss": 0.4819, "step": 6436 }, { "epoch": 1.0571305401022315, "grad_norm": 0.3493362290759631, "learning_rate": 8.110575773221903e-06, "loss": 0.4902, "step": 6437 }, { "epoch": 1.0572947673105741, "grad_norm": 0.3925359125596493, "learning_rate": 8.110295845876985e-06, "loss": 0.4894, "step": 6438 }, { "epoch": 1.057458994518917, "grad_norm": 0.2901191149400785, "learning_rate": 8.110015879320703e-06, "loss": 0.4952, "step": 6439 }, { "epoch": 1.0576232217272596, "grad_norm": 0.31226298482160286, "learning_rate": 8.109735873556097e-06, "loss": 0.4765, "step": 6440 }, { "epoch": 1.0577874489356025, "grad_norm": 0.2608839794267408, "learning_rate": 8.109455828586206e-06, "loss": 0.4841, "step": 6441 }, { "epoch": 1.0579516761439451, "grad_norm": 0.41179817747126496, "learning_rate": 8.109175744414074e-06, "loss": 0.4995, "step": 6442 }, { "epoch": 1.058115903352288, "grad_norm": 0.27599730187609833, "learning_rate": 8.108895621042743e-06, "loss": 0.4825, "step": 6443 }, { "epoch": 1.0582801305606306, "grad_norm": 0.38717551812691675, "learning_rate": 8.108615458475256e-06, "loss": 0.4693, "step": 6444 }, { "epoch": 1.0584443577689733, "grad_norm": 0.309503198292017, "learning_rate": 8.108335256714653e-06, "loss": 0.5025, "step": 6445 }, { "epoch": 1.0586085849773161, "grad_norm": 0.3527154897099522, "learning_rate": 8.108055015763979e-06, "loss": 0.4978, "step": 6446 }, { "epoch": 1.0587728121856588, "grad_norm": 0.34243211145802427, "learning_rate": 8.10777473562628e-06, "loss": 0.4918, "step": 6447 }, { "epoch": 1.0589370393940016, "grad_norm": 0.5315935088433437, "learning_rate": 8.107494416304595e-06, "loss": 0.5002, "step": 6448 }, { "epoch": 1.0591012666023443, "grad_norm": 0.34448608785796825, "learning_rate": 8.107214057801971e-06, "loss": 0.4802, "step": 6449 }, { "epoch": 1.0592654938106871, "grad_norm": 0.2679657284532223, "learning_rate": 8.106933660121455e-06, "loss": 0.4862, "step": 6450 }, { "epoch": 1.0594297210190298, "grad_norm": 0.3044665667323601, "learning_rate": 8.10665322326609e-06, "loss": 0.4964, "step": 6451 }, { "epoch": 1.0595939482273726, "grad_norm": 0.44302506304396316, "learning_rate": 8.106372747238923e-06, "loss": 0.5033, "step": 6452 }, { "epoch": 1.0597581754357153, "grad_norm": 0.3007104836080406, "learning_rate": 8.106092232043002e-06, "loss": 0.5041, "step": 6453 }, { "epoch": 1.0599224026440581, "grad_norm": 0.30669849052499437, "learning_rate": 8.105811677681367e-06, "loss": 0.4813, "step": 6454 }, { "epoch": 1.0600866298524008, "grad_norm": 0.33585983656673835, "learning_rate": 8.105531084157072e-06, "loss": 0.5003, "step": 6455 }, { "epoch": 1.0602508570607436, "grad_norm": 0.35016795972792997, "learning_rate": 8.105250451473162e-06, "loss": 0.4867, "step": 6456 }, { "epoch": 1.0604150842690863, "grad_norm": 0.3019447943301932, "learning_rate": 8.104969779632685e-06, "loss": 0.4829, "step": 6457 }, { "epoch": 1.0605793114774291, "grad_norm": 0.44307691839299485, "learning_rate": 8.10468906863869e-06, "loss": 0.4817, "step": 6458 }, { "epoch": 1.0607435386857718, "grad_norm": 0.7749990416144014, "learning_rate": 8.104408318494224e-06, "loss": 0.4748, "step": 6459 }, { "epoch": 1.0609077658941146, "grad_norm": 0.3369575113116707, "learning_rate": 8.104127529202338e-06, "loss": 0.4745, "step": 6460 }, { "epoch": 1.0610719931024573, "grad_norm": 0.30228965862062723, "learning_rate": 8.103846700766081e-06, "loss": 0.5156, "step": 6461 }, { "epoch": 1.0612362203108, "grad_norm": 0.28323225167550203, "learning_rate": 8.103565833188503e-06, "loss": 0.4891, "step": 6462 }, { "epoch": 1.0614004475191428, "grad_norm": 0.35102381961251805, "learning_rate": 8.103284926472654e-06, "loss": 0.5034, "step": 6463 }, { "epoch": 1.0615646747274854, "grad_norm": 0.31904575779002, "learning_rate": 8.103003980621585e-06, "loss": 0.4735, "step": 6464 }, { "epoch": 1.0617289019358283, "grad_norm": 0.37108455696765513, "learning_rate": 8.10272299563835e-06, "loss": 0.4903, "step": 6465 }, { "epoch": 1.0618931291441709, "grad_norm": 0.3043493567023591, "learning_rate": 8.102441971525999e-06, "loss": 0.4817, "step": 6466 }, { "epoch": 1.0620573563525137, "grad_norm": 0.2969183688186469, "learning_rate": 8.10216090828758e-06, "loss": 0.4812, "step": 6467 }, { "epoch": 1.0622215835608564, "grad_norm": 0.2940431258639217, "learning_rate": 8.101879805926152e-06, "loss": 0.468, "step": 6468 }, { "epoch": 1.0623858107691992, "grad_norm": 0.3572806332667413, "learning_rate": 8.101598664444765e-06, "loss": 0.4983, "step": 6469 }, { "epoch": 1.0625500379775419, "grad_norm": 0.32983325411802555, "learning_rate": 8.101317483846475e-06, "loss": 0.4785, "step": 6470 }, { "epoch": 1.0627142651858847, "grad_norm": 0.2903993681330892, "learning_rate": 8.101036264134332e-06, "loss": 0.4691, "step": 6471 }, { "epoch": 1.0628784923942274, "grad_norm": 0.2829052100129413, "learning_rate": 8.100755005311392e-06, "loss": 0.5018, "step": 6472 }, { "epoch": 1.0630427196025702, "grad_norm": 0.3091537259100867, "learning_rate": 8.10047370738071e-06, "loss": 0.4959, "step": 6473 }, { "epoch": 1.0632069468109129, "grad_norm": 0.3072357245938547, "learning_rate": 8.100192370345343e-06, "loss": 0.5023, "step": 6474 }, { "epoch": 1.0633711740192557, "grad_norm": 0.29934615692093325, "learning_rate": 8.099910994208345e-06, "loss": 0.4843, "step": 6475 }, { "epoch": 1.0635354012275984, "grad_norm": 0.29597394203379296, "learning_rate": 8.09962957897277e-06, "loss": 0.4746, "step": 6476 }, { "epoch": 1.0636996284359412, "grad_norm": 0.28468282474338635, "learning_rate": 8.099348124641676e-06, "loss": 0.475, "step": 6477 }, { "epoch": 1.0638638556442839, "grad_norm": 0.33136689956285587, "learning_rate": 8.09906663121812e-06, "loss": 0.4736, "step": 6478 }, { "epoch": 1.0640280828526265, "grad_norm": 0.31234683664303364, "learning_rate": 8.09878509870516e-06, "loss": 0.4958, "step": 6479 }, { "epoch": 1.0641923100609694, "grad_norm": 0.4152643175893618, "learning_rate": 8.098503527105852e-06, "loss": 0.4807, "step": 6480 }, { "epoch": 1.064356537269312, "grad_norm": 0.35436166148095855, "learning_rate": 8.098221916423257e-06, "loss": 0.4767, "step": 6481 }, { "epoch": 1.0645207644776549, "grad_norm": 0.2786918130841321, "learning_rate": 8.097940266660431e-06, "loss": 0.4951, "step": 6482 }, { "epoch": 1.0646849916859975, "grad_norm": 0.2984696445751109, "learning_rate": 8.097658577820436e-06, "loss": 0.5136, "step": 6483 }, { "epoch": 1.0648492188943404, "grad_norm": 0.2860516275936091, "learning_rate": 8.097376849906326e-06, "loss": 0.5039, "step": 6484 }, { "epoch": 1.065013446102683, "grad_norm": 0.35011048090627467, "learning_rate": 8.097095082921165e-06, "loss": 0.4975, "step": 6485 }, { "epoch": 1.0651776733110259, "grad_norm": 0.3063508579204193, "learning_rate": 8.096813276868014e-06, "loss": 0.4957, "step": 6486 }, { "epoch": 1.0653419005193685, "grad_norm": 0.4053799879807205, "learning_rate": 8.09653143174993e-06, "loss": 0.5002, "step": 6487 }, { "epoch": 1.0655061277277114, "grad_norm": 0.32329174991449955, "learning_rate": 8.096249547569976e-06, "loss": 0.4846, "step": 6488 }, { "epoch": 1.065670354936054, "grad_norm": 0.24985790032644659, "learning_rate": 8.095967624331216e-06, "loss": 0.4756, "step": 6489 }, { "epoch": 1.0658345821443969, "grad_norm": 0.2642583003677523, "learning_rate": 8.095685662036706e-06, "loss": 0.4837, "step": 6490 }, { "epoch": 1.0659988093527395, "grad_norm": 0.286132146880121, "learning_rate": 8.095403660689514e-06, "loss": 0.4827, "step": 6491 }, { "epoch": 1.0661630365610824, "grad_norm": 0.2745315651876201, "learning_rate": 8.0951216202927e-06, "loss": 0.482, "step": 6492 }, { "epoch": 1.066327263769425, "grad_norm": 0.34287529332732586, "learning_rate": 8.094839540849332e-06, "loss": 0.4928, "step": 6493 }, { "epoch": 1.0664914909777679, "grad_norm": 0.3333024161421935, "learning_rate": 8.094557422362467e-06, "loss": 0.5016, "step": 6494 }, { "epoch": 1.0666557181861105, "grad_norm": 0.35575934337835535, "learning_rate": 8.094275264835171e-06, "loss": 0.4946, "step": 6495 }, { "epoch": 1.0668199453944531, "grad_norm": 0.3056346750452761, "learning_rate": 8.09399306827051e-06, "loss": 0.4858, "step": 6496 }, { "epoch": 1.066984172602796, "grad_norm": 0.31099343207150504, "learning_rate": 8.093710832671548e-06, "loss": 0.5108, "step": 6497 }, { "epoch": 1.0671483998111386, "grad_norm": 0.322914818516821, "learning_rate": 8.09342855804135e-06, "loss": 0.4811, "step": 6498 }, { "epoch": 1.0673126270194815, "grad_norm": 0.300902999390282, "learning_rate": 8.093146244382983e-06, "loss": 0.4938, "step": 6499 }, { "epoch": 1.0674768542278241, "grad_norm": 0.31601742707388225, "learning_rate": 8.092863891699512e-06, "loss": 0.478, "step": 6500 }, { "epoch": 1.067641081436167, "grad_norm": 0.3046928653259927, "learning_rate": 8.092581499994007e-06, "loss": 0.4958, "step": 6501 }, { "epoch": 1.0678053086445096, "grad_norm": 0.3645549423936772, "learning_rate": 8.09229906926953e-06, "loss": 0.4993, "step": 6502 }, { "epoch": 1.0679695358528525, "grad_norm": 0.2768289330042299, "learning_rate": 8.092016599529151e-06, "loss": 0.4973, "step": 6503 }, { "epoch": 1.0681337630611951, "grad_norm": 0.36660395418493563, "learning_rate": 8.091734090775939e-06, "loss": 0.4891, "step": 6504 }, { "epoch": 1.068297990269538, "grad_norm": 0.3270702435861176, "learning_rate": 8.09145154301296e-06, "loss": 0.4957, "step": 6505 }, { "epoch": 1.0684622174778806, "grad_norm": 0.3498736633281296, "learning_rate": 8.091168956243282e-06, "loss": 0.4966, "step": 6506 }, { "epoch": 1.0686264446862235, "grad_norm": 0.2997356142625861, "learning_rate": 8.090886330469978e-06, "loss": 0.5231, "step": 6507 }, { "epoch": 1.0687906718945661, "grad_norm": 0.3697001884215529, "learning_rate": 8.090603665696114e-06, "loss": 0.4855, "step": 6508 }, { "epoch": 1.068954899102909, "grad_norm": 0.5614008437965243, "learning_rate": 8.090320961924763e-06, "loss": 0.4989, "step": 6509 }, { "epoch": 1.0691191263112516, "grad_norm": 0.3112432296277617, "learning_rate": 8.090038219158993e-06, "loss": 0.4942, "step": 6510 }, { "epoch": 1.0692833535195945, "grad_norm": 0.3863364983192195, "learning_rate": 8.089755437401877e-06, "loss": 0.4669, "step": 6511 }, { "epoch": 1.069447580727937, "grad_norm": 0.2590499894062906, "learning_rate": 8.089472616656484e-06, "loss": 0.5017, "step": 6512 }, { "epoch": 1.0696118079362797, "grad_norm": 0.3294195425723214, "learning_rate": 8.089189756925888e-06, "loss": 0.4761, "step": 6513 }, { "epoch": 1.0697760351446226, "grad_norm": 0.3477239348659754, "learning_rate": 8.088906858213158e-06, "loss": 0.5022, "step": 6514 }, { "epoch": 1.0699402623529652, "grad_norm": 0.2957800807082142, "learning_rate": 8.08862392052137e-06, "loss": 0.4896, "step": 6515 }, { "epoch": 1.070104489561308, "grad_norm": 0.277456686832057, "learning_rate": 8.088340943853595e-06, "loss": 0.4879, "step": 6516 }, { "epoch": 1.0702687167696507, "grad_norm": 0.2896224449064534, "learning_rate": 8.088057928212907e-06, "loss": 0.4825, "step": 6517 }, { "epoch": 1.0704329439779936, "grad_norm": 0.366817404524916, "learning_rate": 8.08777487360238e-06, "loss": 0.5051, "step": 6518 }, { "epoch": 1.0705971711863362, "grad_norm": 0.3243539755109171, "learning_rate": 8.087491780025088e-06, "loss": 0.4889, "step": 6519 }, { "epoch": 1.070761398394679, "grad_norm": 0.28142740942527517, "learning_rate": 8.087208647484104e-06, "loss": 0.4679, "step": 6520 }, { "epoch": 1.0709256256030217, "grad_norm": 0.29184571231550355, "learning_rate": 8.086925475982506e-06, "loss": 0.4633, "step": 6521 }, { "epoch": 1.0710898528113646, "grad_norm": 0.3149114520591637, "learning_rate": 8.08664226552337e-06, "loss": 0.4862, "step": 6522 }, { "epoch": 1.0712540800197072, "grad_norm": 0.3523906143591502, "learning_rate": 8.086359016109768e-06, "loss": 0.4926, "step": 6523 }, { "epoch": 1.07141830722805, "grad_norm": 0.2883075445690294, "learning_rate": 8.08607572774478e-06, "loss": 0.4749, "step": 6524 }, { "epoch": 1.0715825344363927, "grad_norm": 0.31977848898120104, "learning_rate": 8.08579240043148e-06, "loss": 0.4897, "step": 6525 }, { "epoch": 1.0717467616447356, "grad_norm": 0.27525095872824124, "learning_rate": 8.085509034172947e-06, "loss": 0.4904, "step": 6526 }, { "epoch": 1.0719109888530782, "grad_norm": 0.31146255558943825, "learning_rate": 8.085225628972259e-06, "loss": 0.5011, "step": 6527 }, { "epoch": 1.072075216061421, "grad_norm": 0.3464338330797061, "learning_rate": 8.084942184832492e-06, "loss": 0.4926, "step": 6528 }, { "epoch": 1.0722394432697637, "grad_norm": 0.27288841993252994, "learning_rate": 8.084658701756726e-06, "loss": 0.4704, "step": 6529 }, { "epoch": 1.0724036704781064, "grad_norm": 0.4372370882014702, "learning_rate": 8.08437517974804e-06, "loss": 0.4896, "step": 6530 }, { "epoch": 1.0725678976864492, "grad_norm": 0.30634972416888445, "learning_rate": 8.084091618809513e-06, "loss": 0.4975, "step": 6531 }, { "epoch": 1.0727321248947919, "grad_norm": 0.3221699368499719, "learning_rate": 8.083808018944226e-06, "loss": 0.4745, "step": 6532 }, { "epoch": 1.0728963521031347, "grad_norm": 0.31599052239465103, "learning_rate": 8.083524380155257e-06, "loss": 0.4781, "step": 6533 }, { "epoch": 1.0730605793114774, "grad_norm": 0.302902407696806, "learning_rate": 8.083240702445687e-06, "loss": 0.4798, "step": 6534 }, { "epoch": 1.0732248065198202, "grad_norm": 0.29767747935360217, "learning_rate": 8.082956985818598e-06, "loss": 0.4879, "step": 6535 }, { "epoch": 1.0733890337281629, "grad_norm": 0.30394027053429523, "learning_rate": 8.08267323027707e-06, "loss": 0.5003, "step": 6536 }, { "epoch": 1.0735532609365057, "grad_norm": 0.2854278337518416, "learning_rate": 8.082389435824187e-06, "loss": 0.4773, "step": 6537 }, { "epoch": 1.0737174881448484, "grad_norm": 0.3401948261966552, "learning_rate": 8.08210560246303e-06, "loss": 0.5256, "step": 6538 }, { "epoch": 1.0738817153531912, "grad_norm": 0.287518456779464, "learning_rate": 8.081821730196682e-06, "loss": 0.4981, "step": 6539 }, { "epoch": 1.0740459425615339, "grad_norm": 0.32190628608598426, "learning_rate": 8.081537819028225e-06, "loss": 0.4962, "step": 6540 }, { "epoch": 1.0742101697698767, "grad_norm": 0.37003432367879086, "learning_rate": 8.081253868960745e-06, "loss": 0.4993, "step": 6541 }, { "epoch": 1.0743743969782193, "grad_norm": 0.2904353068128155, "learning_rate": 8.080969879997323e-06, "loss": 0.4684, "step": 6542 }, { "epoch": 1.0745386241865622, "grad_norm": 0.29501265903261076, "learning_rate": 8.080685852141045e-06, "loss": 0.4512, "step": 6543 }, { "epoch": 1.0747028513949048, "grad_norm": 0.2885464571816661, "learning_rate": 8.080401785394997e-06, "loss": 0.5054, "step": 6544 }, { "epoch": 1.0748670786032477, "grad_norm": 0.282323525694132, "learning_rate": 8.08011767976226e-06, "loss": 0.4795, "step": 6545 }, { "epoch": 1.0750313058115903, "grad_norm": 0.31958791943190906, "learning_rate": 8.079833535245927e-06, "loss": 0.5158, "step": 6546 }, { "epoch": 1.075195533019933, "grad_norm": 0.2982945132663225, "learning_rate": 8.079549351849077e-06, "loss": 0.4961, "step": 6547 }, { "epoch": 1.0753597602282758, "grad_norm": 0.3665811507990705, "learning_rate": 8.0792651295748e-06, "loss": 0.488, "step": 6548 }, { "epoch": 1.0755239874366185, "grad_norm": 0.324428934023983, "learning_rate": 8.078980868426183e-06, "loss": 0.5029, "step": 6549 }, { "epoch": 1.0756882146449613, "grad_norm": 0.30815951965964145, "learning_rate": 8.078696568406311e-06, "loss": 0.4726, "step": 6550 }, { "epoch": 1.075852441853304, "grad_norm": 0.6351629022687446, "learning_rate": 8.078412229518273e-06, "loss": 0.4868, "step": 6551 }, { "epoch": 1.0760166690616468, "grad_norm": 0.27095936706763635, "learning_rate": 8.07812785176516e-06, "loss": 0.4846, "step": 6552 }, { "epoch": 1.0761808962699895, "grad_norm": 0.3373418483243227, "learning_rate": 8.077843435150056e-06, "loss": 0.5021, "step": 6553 }, { "epoch": 1.0763451234783323, "grad_norm": 0.2769493764982112, "learning_rate": 8.077558979676052e-06, "loss": 0.4995, "step": 6554 }, { "epoch": 1.076509350686675, "grad_norm": 0.3391246782727593, "learning_rate": 8.077274485346239e-06, "loss": 0.4947, "step": 6555 }, { "epoch": 1.0766735778950178, "grad_norm": 0.3194796445907453, "learning_rate": 8.076989952163704e-06, "loss": 0.501, "step": 6556 }, { "epoch": 1.0768378051033605, "grad_norm": 0.2884927330411459, "learning_rate": 8.07670538013154e-06, "loss": 0.4765, "step": 6557 }, { "epoch": 1.0770020323117033, "grad_norm": 0.27582021063538154, "learning_rate": 8.076420769252837e-06, "loss": 0.4917, "step": 6558 }, { "epoch": 1.077166259520046, "grad_norm": 0.32878546008855475, "learning_rate": 8.076136119530685e-06, "loss": 0.4925, "step": 6559 }, { "epoch": 1.0773304867283888, "grad_norm": 0.3719181139981429, "learning_rate": 8.075851430968176e-06, "loss": 0.4834, "step": 6560 }, { "epoch": 1.0774947139367315, "grad_norm": 0.299817050063877, "learning_rate": 8.075566703568402e-06, "loss": 0.4817, "step": 6561 }, { "epoch": 1.0776589411450743, "grad_norm": 0.27926753984629715, "learning_rate": 8.075281937334456e-06, "loss": 0.4684, "step": 6562 }, { "epoch": 1.077823168353417, "grad_norm": 0.3141612633195604, "learning_rate": 8.074997132269431e-06, "loss": 0.4943, "step": 6563 }, { "epoch": 1.0779873955617596, "grad_norm": 0.3227802156237344, "learning_rate": 8.07471228837642e-06, "loss": 0.4909, "step": 6564 }, { "epoch": 1.0781516227701025, "grad_norm": 0.27393818022295974, "learning_rate": 8.074427405658516e-06, "loss": 0.479, "step": 6565 }, { "epoch": 1.078315849978445, "grad_norm": 0.34096578885764833, "learning_rate": 8.074142484118814e-06, "loss": 0.4819, "step": 6566 }, { "epoch": 1.078480077186788, "grad_norm": 0.3628942414376617, "learning_rate": 8.073857523760407e-06, "loss": 0.4822, "step": 6567 }, { "epoch": 1.0786443043951306, "grad_norm": 0.3121047978071521, "learning_rate": 8.073572524586392e-06, "loss": 0.499, "step": 6568 }, { "epoch": 1.0788085316034735, "grad_norm": 0.3710136396120407, "learning_rate": 8.073287486599864e-06, "loss": 0.5031, "step": 6569 }, { "epoch": 1.078972758811816, "grad_norm": 0.27173145739016036, "learning_rate": 8.073002409803917e-06, "loss": 0.4897, "step": 6570 }, { "epoch": 1.079136986020159, "grad_norm": 0.3607583744416979, "learning_rate": 8.072717294201649e-06, "loss": 0.498, "step": 6571 }, { "epoch": 1.0793012132285016, "grad_norm": 0.33623588695032847, "learning_rate": 8.072432139796157e-06, "loss": 0.4989, "step": 6572 }, { "epoch": 1.0794654404368444, "grad_norm": 0.3315744739698578, "learning_rate": 8.072146946590536e-06, "loss": 0.4796, "step": 6573 }, { "epoch": 1.079629667645187, "grad_norm": 0.3222025399523138, "learning_rate": 8.071861714587885e-06, "loss": 0.4918, "step": 6574 }, { "epoch": 1.07979389485353, "grad_norm": 0.31798138184786123, "learning_rate": 8.071576443791302e-06, "loss": 0.496, "step": 6575 }, { "epoch": 1.0799581220618726, "grad_norm": 0.29852066241948144, "learning_rate": 8.071291134203885e-06, "loss": 0.4937, "step": 6576 }, { "epoch": 1.0801223492702154, "grad_norm": 0.26278834220161956, "learning_rate": 8.071005785828732e-06, "loss": 0.4872, "step": 6577 }, { "epoch": 1.080286576478558, "grad_norm": 0.2891753603047115, "learning_rate": 8.070720398668944e-06, "loss": 0.4777, "step": 6578 }, { "epoch": 1.080450803686901, "grad_norm": 0.3858364653964431, "learning_rate": 8.070434972727617e-06, "loss": 0.5132, "step": 6579 }, { "epoch": 1.0806150308952436, "grad_norm": 0.3764177313317822, "learning_rate": 8.070149508007854e-06, "loss": 0.5014, "step": 6580 }, { "epoch": 1.0807792581035862, "grad_norm": 0.37878161708248925, "learning_rate": 8.069864004512756e-06, "loss": 0.4936, "step": 6581 }, { "epoch": 1.080943485311929, "grad_norm": 0.278614098273255, "learning_rate": 8.069578462245422e-06, "loss": 0.4762, "step": 6582 }, { "epoch": 1.0811077125202717, "grad_norm": 0.32093940639927887, "learning_rate": 8.069292881208955e-06, "loss": 0.4862, "step": 6583 }, { "epoch": 1.0812719397286146, "grad_norm": 0.29447297248866366, "learning_rate": 8.069007261406454e-06, "loss": 0.4892, "step": 6584 }, { "epoch": 1.0814361669369572, "grad_norm": 0.36869470719217107, "learning_rate": 8.068721602841023e-06, "loss": 0.5079, "step": 6585 }, { "epoch": 1.0816003941453, "grad_norm": 0.28216039771339985, "learning_rate": 8.068435905515764e-06, "loss": 0.4853, "step": 6586 }, { "epoch": 1.0817646213536427, "grad_norm": 0.3304883936698149, "learning_rate": 8.068150169433781e-06, "loss": 0.5068, "step": 6587 }, { "epoch": 1.0819288485619856, "grad_norm": 0.33187478705588386, "learning_rate": 8.067864394598177e-06, "loss": 0.4933, "step": 6588 }, { "epoch": 1.0820930757703282, "grad_norm": 0.3068115166683626, "learning_rate": 8.067578581012054e-06, "loss": 0.5052, "step": 6589 }, { "epoch": 1.082257302978671, "grad_norm": 0.26991613220821475, "learning_rate": 8.067292728678519e-06, "loss": 0.5059, "step": 6590 }, { "epoch": 1.0824215301870137, "grad_norm": 0.3177761633438965, "learning_rate": 8.067006837600674e-06, "loss": 0.4982, "step": 6591 }, { "epoch": 1.0825857573953566, "grad_norm": 0.27743314130195196, "learning_rate": 8.066720907781625e-06, "loss": 0.473, "step": 6592 }, { "epoch": 1.0827499846036992, "grad_norm": 0.27281498017793654, "learning_rate": 8.066434939224478e-06, "loss": 0.4999, "step": 6593 }, { "epoch": 1.082914211812042, "grad_norm": 0.29010521029271646, "learning_rate": 8.06614893193234e-06, "loss": 0.4622, "step": 6594 }, { "epoch": 1.0830784390203847, "grad_norm": 0.3875811075929337, "learning_rate": 8.065862885908317e-06, "loss": 0.5063, "step": 6595 }, { "epoch": 1.0832426662287276, "grad_norm": 0.30672239793347644, "learning_rate": 8.065576801155512e-06, "loss": 0.4781, "step": 6596 }, { "epoch": 1.0834068934370702, "grad_norm": 0.2790991854199382, "learning_rate": 8.065290677677036e-06, "loss": 0.46, "step": 6597 }, { "epoch": 1.0835711206454128, "grad_norm": 0.37332831621147183, "learning_rate": 8.065004515475994e-06, "loss": 0.508, "step": 6598 }, { "epoch": 1.0837353478537557, "grad_norm": 0.3434760999904214, "learning_rate": 8.064718314555497e-06, "loss": 0.4793, "step": 6599 }, { "epoch": 1.0838995750620983, "grad_norm": 0.6233700972166917, "learning_rate": 8.06443207491865e-06, "loss": 0.4577, "step": 6600 }, { "epoch": 1.0840638022704412, "grad_norm": 0.3384347060463355, "learning_rate": 8.064145796568567e-06, "loss": 0.4932, "step": 6601 }, { "epoch": 1.0842280294787838, "grad_norm": 0.289153448823887, "learning_rate": 8.063859479508352e-06, "loss": 0.4912, "step": 6602 }, { "epoch": 1.0843922566871267, "grad_norm": 0.26405473877925406, "learning_rate": 8.063573123741117e-06, "loss": 0.5015, "step": 6603 }, { "epoch": 1.0845564838954693, "grad_norm": 0.31409076950725495, "learning_rate": 8.063286729269971e-06, "loss": 0.4959, "step": 6604 }, { "epoch": 1.0847207111038122, "grad_norm": 0.30705620044601656, "learning_rate": 8.063000296098026e-06, "loss": 0.4956, "step": 6605 }, { "epoch": 1.0848849383121548, "grad_norm": 0.31862613078857577, "learning_rate": 8.062713824228393e-06, "loss": 0.5036, "step": 6606 }, { "epoch": 1.0850491655204977, "grad_norm": 0.7253417981589776, "learning_rate": 8.062427313664183e-06, "loss": 0.4844, "step": 6607 }, { "epoch": 1.0852133927288403, "grad_norm": 0.7457709244399511, "learning_rate": 8.062140764408505e-06, "loss": 0.4859, "step": 6608 }, { "epoch": 1.0853776199371832, "grad_norm": 0.300353254880477, "learning_rate": 8.061854176464477e-06, "loss": 0.4849, "step": 6609 }, { "epoch": 1.0855418471455258, "grad_norm": 0.3253936123596342, "learning_rate": 8.061567549835206e-06, "loss": 0.4553, "step": 6610 }, { "epoch": 1.0857060743538687, "grad_norm": 0.3120425180000209, "learning_rate": 8.061280884523808e-06, "loss": 0.4681, "step": 6611 }, { "epoch": 1.0858703015622113, "grad_norm": 0.34523817729463896, "learning_rate": 8.060994180533395e-06, "loss": 0.4912, "step": 6612 }, { "epoch": 1.0860345287705542, "grad_norm": 0.3159024449975521, "learning_rate": 8.060707437867082e-06, "loss": 0.5238, "step": 6613 }, { "epoch": 1.0861987559788968, "grad_norm": 0.2708163187055613, "learning_rate": 8.060420656527983e-06, "loss": 0.4802, "step": 6614 }, { "epoch": 1.0863629831872395, "grad_norm": 0.2717693408294249, "learning_rate": 8.060133836519213e-06, "loss": 0.5011, "step": 6615 }, { "epoch": 1.0865272103955823, "grad_norm": 0.3046084952629126, "learning_rate": 8.059846977843885e-06, "loss": 0.4808, "step": 6616 }, { "epoch": 1.086691437603925, "grad_norm": 0.3484788088356775, "learning_rate": 8.059560080505119e-06, "loss": 0.4709, "step": 6617 }, { "epoch": 1.0868556648122678, "grad_norm": 0.4789898019185958, "learning_rate": 8.059273144506029e-06, "loss": 0.4782, "step": 6618 }, { "epoch": 1.0870198920206104, "grad_norm": 0.2721149994086529, "learning_rate": 8.058986169849727e-06, "loss": 0.5058, "step": 6619 }, { "epoch": 1.0871841192289533, "grad_norm": 0.5085546027446511, "learning_rate": 8.058699156539336e-06, "loss": 0.4936, "step": 6620 }, { "epoch": 1.087348346437296, "grad_norm": 0.28291266607791543, "learning_rate": 8.058412104577971e-06, "loss": 0.4956, "step": 6621 }, { "epoch": 1.0875125736456388, "grad_norm": 0.24750759560662303, "learning_rate": 8.058125013968749e-06, "loss": 0.4757, "step": 6622 }, { "epoch": 1.0876768008539814, "grad_norm": 0.2834097176537607, "learning_rate": 8.057837884714789e-06, "loss": 0.4825, "step": 6623 }, { "epoch": 1.0878410280623243, "grad_norm": 0.6745488072249444, "learning_rate": 8.05755071681921e-06, "loss": 0.5016, "step": 6624 }, { "epoch": 1.088005255270667, "grad_norm": 0.26972117677597274, "learning_rate": 8.057263510285128e-06, "loss": 0.4905, "step": 6625 }, { "epoch": 1.0881694824790098, "grad_norm": 0.38028687216798407, "learning_rate": 8.056976265115665e-06, "loss": 0.5123, "step": 6626 }, { "epoch": 1.0883337096873524, "grad_norm": 0.32171686241552383, "learning_rate": 8.056688981313942e-06, "loss": 0.5023, "step": 6627 }, { "epoch": 1.0884979368956953, "grad_norm": 0.35366407163708424, "learning_rate": 8.056401658883075e-06, "loss": 0.4843, "step": 6628 }, { "epoch": 1.088662164104038, "grad_norm": 0.31785783871908235, "learning_rate": 8.056114297826187e-06, "loss": 0.464, "step": 6629 }, { "epoch": 1.0888263913123808, "grad_norm": 0.37713493307433466, "learning_rate": 8.055826898146401e-06, "loss": 0.5046, "step": 6630 }, { "epoch": 1.0889906185207234, "grad_norm": 0.30751278180262237, "learning_rate": 8.055539459846836e-06, "loss": 0.4861, "step": 6631 }, { "epoch": 1.089154845729066, "grad_norm": 0.2679614159390897, "learning_rate": 8.055251982930612e-06, "loss": 0.5073, "step": 6632 }, { "epoch": 1.089319072937409, "grad_norm": 0.3583802438166624, "learning_rate": 8.054964467400856e-06, "loss": 0.4893, "step": 6633 }, { "epoch": 1.0894833001457516, "grad_norm": 0.281128336748366, "learning_rate": 8.054676913260687e-06, "loss": 0.4896, "step": 6634 }, { "epoch": 1.0896475273540944, "grad_norm": 0.31164083615547156, "learning_rate": 8.054389320513229e-06, "loss": 0.4937, "step": 6635 }, { "epoch": 1.089811754562437, "grad_norm": 0.2842462734493817, "learning_rate": 8.054101689161607e-06, "loss": 0.5003, "step": 6636 }, { "epoch": 1.08997598177078, "grad_norm": 0.41851163082840925, "learning_rate": 8.053814019208944e-06, "loss": 0.4766, "step": 6637 }, { "epoch": 1.0901402089791226, "grad_norm": 0.32067594574199665, "learning_rate": 8.053526310658364e-06, "loss": 0.4981, "step": 6638 }, { "epoch": 1.0903044361874654, "grad_norm": 0.39601430427739676, "learning_rate": 8.053238563512993e-06, "loss": 0.4858, "step": 6639 }, { "epoch": 1.090468663395808, "grad_norm": 0.31151104434543075, "learning_rate": 8.052950777775953e-06, "loss": 0.4751, "step": 6640 }, { "epoch": 1.090632890604151, "grad_norm": 0.35616453883131216, "learning_rate": 8.052662953450373e-06, "loss": 0.4833, "step": 6641 }, { "epoch": 1.0907971178124936, "grad_norm": 0.32908409486875606, "learning_rate": 8.05237509053938e-06, "loss": 0.5086, "step": 6642 }, { "epoch": 1.0909613450208364, "grad_norm": 0.34134888377075057, "learning_rate": 8.052087189046095e-06, "loss": 0.5087, "step": 6643 }, { "epoch": 1.091125572229179, "grad_norm": 0.47345364219699215, "learning_rate": 8.05179924897365e-06, "loss": 0.5156, "step": 6644 }, { "epoch": 1.091289799437522, "grad_norm": 0.3445889661685117, "learning_rate": 8.05151127032517e-06, "loss": 0.4996, "step": 6645 }, { "epoch": 1.0914540266458646, "grad_norm": 0.29551209314615595, "learning_rate": 8.051223253103785e-06, "loss": 0.4818, "step": 6646 }, { "epoch": 1.0916182538542074, "grad_norm": 0.2947551790268186, "learning_rate": 8.05093519731262e-06, "loss": 0.4825, "step": 6647 }, { "epoch": 1.09178248106255, "grad_norm": 0.3834351018766136, "learning_rate": 8.050647102954806e-06, "loss": 0.4972, "step": 6648 }, { "epoch": 1.0919467082708927, "grad_norm": 0.3396270899894368, "learning_rate": 8.050358970033471e-06, "loss": 0.4774, "step": 6649 }, { "epoch": 1.0921109354792355, "grad_norm": 0.2892429889400422, "learning_rate": 8.050070798551745e-06, "loss": 0.5017, "step": 6650 }, { "epoch": 1.0922751626875782, "grad_norm": 0.2861359821069163, "learning_rate": 8.049782588512757e-06, "loss": 0.4789, "step": 6651 }, { "epoch": 1.092439389895921, "grad_norm": 0.297574352808594, "learning_rate": 8.049494339919636e-06, "loss": 0.483, "step": 6652 }, { "epoch": 1.0926036171042637, "grad_norm": 0.295904675266878, "learning_rate": 8.049206052775515e-06, "loss": 0.4759, "step": 6653 }, { "epoch": 1.0927678443126065, "grad_norm": 0.27957006578379917, "learning_rate": 8.048917727083526e-06, "loss": 0.4742, "step": 6654 }, { "epoch": 1.0929320715209492, "grad_norm": 0.33890350213174564, "learning_rate": 8.048629362846796e-06, "loss": 0.4757, "step": 6655 }, { "epoch": 1.093096298729292, "grad_norm": 0.3047428157307131, "learning_rate": 8.04834096006846e-06, "loss": 0.4861, "step": 6656 }, { "epoch": 1.0932605259376347, "grad_norm": 0.5228184394903771, "learning_rate": 8.048052518751653e-06, "loss": 0.4754, "step": 6657 }, { "epoch": 1.0934247531459775, "grad_norm": 0.3112575437387302, "learning_rate": 8.047764038899505e-06, "loss": 0.4831, "step": 6658 }, { "epoch": 1.0935889803543202, "grad_norm": 0.29681541702163144, "learning_rate": 8.047475520515147e-06, "loss": 0.4925, "step": 6659 }, { "epoch": 1.093753207562663, "grad_norm": 0.280871176322509, "learning_rate": 8.047186963601714e-06, "loss": 0.4898, "step": 6660 }, { "epoch": 1.0939174347710057, "grad_norm": 0.30043549760570193, "learning_rate": 8.04689836816234e-06, "loss": 0.4929, "step": 6661 }, { "epoch": 1.0940816619793485, "grad_norm": 0.31300225911045476, "learning_rate": 8.046609734200162e-06, "loss": 0.5052, "step": 6662 }, { "epoch": 1.0942458891876912, "grad_norm": 0.28856161920277196, "learning_rate": 8.046321061718312e-06, "loss": 0.4975, "step": 6663 }, { "epoch": 1.094410116396034, "grad_norm": 0.26293407990784057, "learning_rate": 8.046032350719928e-06, "loss": 0.4676, "step": 6664 }, { "epoch": 1.0945743436043767, "grad_norm": 0.4804798489350901, "learning_rate": 8.04574360120814e-06, "loss": 0.5155, "step": 6665 }, { "epoch": 1.0947385708127193, "grad_norm": 0.29817475251799247, "learning_rate": 8.045454813186092e-06, "loss": 0.4926, "step": 6666 }, { "epoch": 1.0949027980210622, "grad_norm": 0.2792147268849725, "learning_rate": 8.045165986656914e-06, "loss": 0.4694, "step": 6667 }, { "epoch": 1.0950670252294048, "grad_norm": 0.26740986430834013, "learning_rate": 8.044877121623747e-06, "loss": 0.47, "step": 6668 }, { "epoch": 1.0952312524377477, "grad_norm": 0.3175206189654057, "learning_rate": 8.044588218089726e-06, "loss": 0.5061, "step": 6669 }, { "epoch": 1.0953954796460903, "grad_norm": 0.3521071238807635, "learning_rate": 8.04429927605799e-06, "loss": 0.498, "step": 6670 }, { "epoch": 1.0955597068544332, "grad_norm": 0.30179167992615835, "learning_rate": 8.044010295531676e-06, "loss": 0.474, "step": 6671 }, { "epoch": 1.0957239340627758, "grad_norm": 0.41781233652809985, "learning_rate": 8.043721276513922e-06, "loss": 0.4931, "step": 6672 }, { "epoch": 1.0958881612711187, "grad_norm": 0.32796241643504886, "learning_rate": 8.043432219007872e-06, "loss": 0.4832, "step": 6673 }, { "epoch": 1.0960523884794613, "grad_norm": 0.2986490602919634, "learning_rate": 8.04314312301666e-06, "loss": 0.4907, "step": 6674 }, { "epoch": 1.0962166156878042, "grad_norm": 0.3823780932901472, "learning_rate": 8.042853988543427e-06, "loss": 0.4867, "step": 6675 }, { "epoch": 1.0963808428961468, "grad_norm": 0.3210929957900738, "learning_rate": 8.042564815591314e-06, "loss": 0.4768, "step": 6676 }, { "epoch": 1.0965450701044896, "grad_norm": 0.37904104957766954, "learning_rate": 8.042275604163462e-06, "loss": 0.4926, "step": 6677 }, { "epoch": 1.0967092973128323, "grad_norm": 0.3349723807012331, "learning_rate": 8.041986354263013e-06, "loss": 0.5118, "step": 6678 }, { "epoch": 1.0968735245211751, "grad_norm": 0.3571590062105453, "learning_rate": 8.041697065893105e-06, "loss": 0.4751, "step": 6679 }, { "epoch": 1.0970377517295178, "grad_norm": 0.2921285386665655, "learning_rate": 8.041407739056885e-06, "loss": 0.4739, "step": 6680 }, { "epoch": 1.0972019789378606, "grad_norm": 0.31244656526919706, "learning_rate": 8.04111837375749e-06, "loss": 0.4929, "step": 6681 }, { "epoch": 1.0973662061462033, "grad_norm": 0.47267268926626577, "learning_rate": 8.040828969998068e-06, "loss": 0.4898, "step": 6682 }, { "epoch": 1.097530433354546, "grad_norm": 0.2693517145732392, "learning_rate": 8.04053952778176e-06, "loss": 0.498, "step": 6683 }, { "epoch": 1.0976946605628888, "grad_norm": 0.28351936482992657, "learning_rate": 8.040250047111706e-06, "loss": 0.4876, "step": 6684 }, { "epoch": 1.0978588877712314, "grad_norm": 0.3594674569385834, "learning_rate": 8.039960527991055e-06, "loss": 0.4815, "step": 6685 }, { "epoch": 1.0980231149795743, "grad_norm": 0.30259751217946457, "learning_rate": 8.03967097042295e-06, "loss": 0.5022, "step": 6686 }, { "epoch": 1.098187342187917, "grad_norm": 0.38365337530932603, "learning_rate": 8.039381374410536e-06, "loss": 0.4949, "step": 6687 }, { "epoch": 1.0983515693962598, "grad_norm": 0.30124925300942795, "learning_rate": 8.039091739956959e-06, "loss": 0.5001, "step": 6688 }, { "epoch": 1.0985157966046024, "grad_norm": 0.2848395875961075, "learning_rate": 8.03880206706536e-06, "loss": 0.5062, "step": 6689 }, { "epoch": 1.0986800238129453, "grad_norm": 0.4221184803884185, "learning_rate": 8.038512355738892e-06, "loss": 0.4814, "step": 6690 }, { "epoch": 1.098844251021288, "grad_norm": 0.2897228472309949, "learning_rate": 8.038222605980698e-06, "loss": 0.4858, "step": 6691 }, { "epoch": 1.0990084782296308, "grad_norm": 0.2999559771720914, "learning_rate": 8.037932817793924e-06, "loss": 0.4975, "step": 6692 }, { "epoch": 1.0991727054379734, "grad_norm": 0.2796159071321173, "learning_rate": 8.037642991181721e-06, "loss": 0.4843, "step": 6693 }, { "epoch": 1.0993369326463163, "grad_norm": 0.3118067130960556, "learning_rate": 8.037353126147233e-06, "loss": 0.4975, "step": 6694 }, { "epoch": 1.099501159854659, "grad_norm": 0.3917793286041874, "learning_rate": 8.03706322269361e-06, "loss": 0.4984, "step": 6695 }, { "epoch": 1.0996653870630018, "grad_norm": 0.2701048707532968, "learning_rate": 8.036773280824e-06, "loss": 0.4592, "step": 6696 }, { "epoch": 1.0998296142713444, "grad_norm": 0.3657316253260058, "learning_rate": 8.036483300541554e-06, "loss": 0.4971, "step": 6697 }, { "epoch": 1.0999938414796873, "grad_norm": 0.3368853629369273, "learning_rate": 8.036193281849419e-06, "loss": 0.478, "step": 6698 }, { "epoch": 1.10015806868803, "grad_norm": 0.30968426218990286, "learning_rate": 8.035903224750745e-06, "loss": 0.5134, "step": 6699 }, { "epoch": 1.1003222958963725, "grad_norm": 0.31875699537562413, "learning_rate": 8.035613129248683e-06, "loss": 0.4717, "step": 6700 }, { "epoch": 1.1004865231047154, "grad_norm": 0.2941617351152997, "learning_rate": 8.035322995346386e-06, "loss": 0.4622, "step": 6701 }, { "epoch": 1.100650750313058, "grad_norm": 0.2992618349245907, "learning_rate": 8.035032823047001e-06, "loss": 0.4923, "step": 6702 }, { "epoch": 1.100814977521401, "grad_norm": 0.32832975541354775, "learning_rate": 8.034742612353681e-06, "loss": 0.4931, "step": 6703 }, { "epoch": 1.1009792047297435, "grad_norm": 0.36302102867175484, "learning_rate": 8.034452363269581e-06, "loss": 0.4783, "step": 6704 }, { "epoch": 1.1011434319380864, "grad_norm": 0.33724953597312485, "learning_rate": 8.034162075797849e-06, "loss": 0.4849, "step": 6705 }, { "epoch": 1.101307659146429, "grad_norm": 0.26336764360879583, "learning_rate": 8.033871749941642e-06, "loss": 0.4979, "step": 6706 }, { "epoch": 1.101471886354772, "grad_norm": 0.46284018414744904, "learning_rate": 8.033581385704108e-06, "loss": 0.4716, "step": 6707 }, { "epoch": 1.1016361135631145, "grad_norm": 0.26767536775305845, "learning_rate": 8.033290983088405e-06, "loss": 0.4923, "step": 6708 }, { "epoch": 1.1018003407714574, "grad_norm": 0.33545393017343467, "learning_rate": 8.033000542097685e-06, "loss": 0.4894, "step": 6709 }, { "epoch": 1.1019645679798, "grad_norm": 0.28161470373946695, "learning_rate": 8.032710062735103e-06, "loss": 0.4919, "step": 6710 }, { "epoch": 1.1021287951881429, "grad_norm": 0.29950638831768156, "learning_rate": 8.032419545003815e-06, "loss": 0.4772, "step": 6711 }, { "epoch": 1.1022930223964855, "grad_norm": 0.36968967122204577, "learning_rate": 8.032128988906975e-06, "loss": 0.5008, "step": 6712 }, { "epoch": 1.1024572496048284, "grad_norm": 0.3152702117734696, "learning_rate": 8.03183839444774e-06, "loss": 0.5156, "step": 6713 }, { "epoch": 1.102621476813171, "grad_norm": 0.27945506823563687, "learning_rate": 8.031547761629264e-06, "loss": 0.4931, "step": 6714 }, { "epoch": 1.1027857040215139, "grad_norm": 0.40158231798578, "learning_rate": 8.031257090454704e-06, "loss": 0.4854, "step": 6715 }, { "epoch": 1.1029499312298565, "grad_norm": 0.3677471976180123, "learning_rate": 8.030966380927218e-06, "loss": 0.4895, "step": 6716 }, { "epoch": 1.1031141584381992, "grad_norm": 0.32174666226612697, "learning_rate": 8.030675633049964e-06, "loss": 0.5074, "step": 6717 }, { "epoch": 1.103278385646542, "grad_norm": 0.28039648079111684, "learning_rate": 8.030384846826098e-06, "loss": 0.4949, "step": 6718 }, { "epoch": 1.1034426128548847, "grad_norm": 0.373231733104368, "learning_rate": 8.03009402225878e-06, "loss": 0.4826, "step": 6719 }, { "epoch": 1.1036068400632275, "grad_norm": 0.2952935970118553, "learning_rate": 8.029803159351167e-06, "loss": 0.477, "step": 6720 }, { "epoch": 1.1037710672715701, "grad_norm": 0.708722428069576, "learning_rate": 8.029512258106419e-06, "loss": 0.4978, "step": 6721 }, { "epoch": 1.103935294479913, "grad_norm": 0.5013498552378116, "learning_rate": 8.029221318527697e-06, "loss": 0.5029, "step": 6722 }, { "epoch": 1.1040995216882556, "grad_norm": 0.2879331351210175, "learning_rate": 8.028930340618158e-06, "loss": 0.4899, "step": 6723 }, { "epoch": 1.1042637488965985, "grad_norm": 0.30600835982671126, "learning_rate": 8.028639324380962e-06, "loss": 0.4869, "step": 6724 }, { "epoch": 1.1044279761049411, "grad_norm": 0.3288737516377353, "learning_rate": 8.028348269819273e-06, "loss": 0.4858, "step": 6725 }, { "epoch": 1.104592203313284, "grad_norm": 0.3064727566685573, "learning_rate": 8.02805717693625e-06, "loss": 0.4765, "step": 6726 }, { "epoch": 1.1047564305216266, "grad_norm": 0.3315491714374862, "learning_rate": 8.027766045735054e-06, "loss": 0.4951, "step": 6727 }, { "epoch": 1.1049206577299695, "grad_norm": 0.32381105894241796, "learning_rate": 8.02747487621885e-06, "loss": 0.4908, "step": 6728 }, { "epoch": 1.1050848849383121, "grad_norm": 0.29213412468763467, "learning_rate": 8.027183668390795e-06, "loss": 0.4844, "step": 6729 }, { "epoch": 1.105249112146655, "grad_norm": 0.3109379295275468, "learning_rate": 8.026892422254058e-06, "loss": 0.4955, "step": 6730 }, { "epoch": 1.1054133393549976, "grad_norm": 0.30931573698151227, "learning_rate": 8.0266011378118e-06, "loss": 0.4868, "step": 6731 }, { "epoch": 1.1055775665633405, "grad_norm": 0.33109195922803386, "learning_rate": 8.02630981506718e-06, "loss": 0.4952, "step": 6732 }, { "epoch": 1.1057417937716831, "grad_norm": 0.3782335185626781, "learning_rate": 8.026018454023368e-06, "loss": 0.4915, "step": 6733 }, { "epoch": 1.1059060209800258, "grad_norm": 0.2978339135303565, "learning_rate": 8.025727054683528e-06, "loss": 0.5001, "step": 6734 }, { "epoch": 1.1060702481883686, "grad_norm": 0.32795336859626906, "learning_rate": 8.02543561705082e-06, "loss": 0.488, "step": 6735 }, { "epoch": 1.1062344753967113, "grad_norm": 0.4264225502151214, "learning_rate": 8.025144141128416e-06, "loss": 0.4796, "step": 6736 }, { "epoch": 1.1063987026050541, "grad_norm": 0.35193668625045177, "learning_rate": 8.024852626919476e-06, "loss": 0.4659, "step": 6737 }, { "epoch": 1.1065629298133968, "grad_norm": 0.5959409766547124, "learning_rate": 8.02456107442717e-06, "loss": 0.5001, "step": 6738 }, { "epoch": 1.1067271570217396, "grad_norm": 0.28543948369974015, "learning_rate": 8.024269483654663e-06, "loss": 0.4819, "step": 6739 }, { "epoch": 1.1068913842300823, "grad_norm": 0.35121396805542204, "learning_rate": 8.02397785460512e-06, "loss": 0.5072, "step": 6740 }, { "epoch": 1.1070556114384251, "grad_norm": 0.34711974817765606, "learning_rate": 8.023686187281715e-06, "loss": 0.497, "step": 6741 }, { "epoch": 1.1072198386467678, "grad_norm": 0.2914907052694788, "learning_rate": 8.023394481687607e-06, "loss": 0.4943, "step": 6742 }, { "epoch": 1.1073840658551106, "grad_norm": 0.24210791352085734, "learning_rate": 8.023102737825968e-06, "loss": 0.4686, "step": 6743 }, { "epoch": 1.1075482930634533, "grad_norm": 0.30686925835224993, "learning_rate": 8.022810955699969e-06, "loss": 0.4998, "step": 6744 }, { "epoch": 1.1077125202717961, "grad_norm": 0.364074147936255, "learning_rate": 8.022519135312778e-06, "loss": 0.4706, "step": 6745 }, { "epoch": 1.1078767474801388, "grad_norm": 0.3451141324499041, "learning_rate": 8.02222727666756e-06, "loss": 0.4977, "step": 6746 }, { "epoch": 1.1080409746884816, "grad_norm": 0.4224844755920305, "learning_rate": 8.02193537976749e-06, "loss": 0.4832, "step": 6747 }, { "epoch": 1.1082052018968243, "grad_norm": 0.3336599123558074, "learning_rate": 8.021643444615738e-06, "loss": 0.4962, "step": 6748 }, { "epoch": 1.1083694291051671, "grad_norm": 0.29675735868901576, "learning_rate": 8.021351471215474e-06, "loss": 0.4864, "step": 6749 }, { "epoch": 1.1085336563135098, "grad_norm": 0.37380530835569564, "learning_rate": 8.021059459569865e-06, "loss": 0.5131, "step": 6750 }, { "epoch": 1.1086978835218524, "grad_norm": 0.3060505137161988, "learning_rate": 8.020767409682087e-06, "loss": 0.4857, "step": 6751 }, { "epoch": 1.1088621107301952, "grad_norm": 0.3374431312187076, "learning_rate": 8.020475321555313e-06, "loss": 0.497, "step": 6752 }, { "epoch": 1.1090263379385379, "grad_norm": 0.2895837020492894, "learning_rate": 8.020183195192712e-06, "loss": 0.4897, "step": 6753 }, { "epoch": 1.1091905651468807, "grad_norm": 0.30210173515026933, "learning_rate": 8.019891030597459e-06, "loss": 0.4866, "step": 6754 }, { "epoch": 1.1093547923552234, "grad_norm": 0.30843038965702, "learning_rate": 8.019598827772726e-06, "loss": 0.4946, "step": 6755 }, { "epoch": 1.1095190195635662, "grad_norm": 0.35010649192439486, "learning_rate": 8.019306586721687e-06, "loss": 0.4874, "step": 6756 }, { "epoch": 1.1096832467719089, "grad_norm": 0.33598334774441196, "learning_rate": 8.019014307447516e-06, "loss": 0.4906, "step": 6757 }, { "epoch": 1.1098474739802517, "grad_norm": 0.3193771491741254, "learning_rate": 8.018721989953385e-06, "loss": 0.4726, "step": 6758 }, { "epoch": 1.1100117011885944, "grad_norm": 0.30846186162965583, "learning_rate": 8.018429634242472e-06, "loss": 0.4822, "step": 6759 }, { "epoch": 1.1101759283969372, "grad_norm": 0.2720305115460639, "learning_rate": 8.018137240317953e-06, "loss": 0.4925, "step": 6760 }, { "epoch": 1.1103401556052799, "grad_norm": 0.3475860658133177, "learning_rate": 8.017844808183002e-06, "loss": 0.4853, "step": 6761 }, { "epoch": 1.1105043828136227, "grad_norm": 0.2984241485105639, "learning_rate": 8.017552337840797e-06, "loss": 0.503, "step": 6762 }, { "epoch": 1.1106686100219654, "grad_norm": 0.2736700880333033, "learning_rate": 8.017259829294508e-06, "loss": 0.4958, "step": 6763 }, { "epoch": 1.1108328372303082, "grad_norm": 0.2947106459552869, "learning_rate": 8.01696728254732e-06, "loss": 0.4935, "step": 6764 }, { "epoch": 1.1109970644386509, "grad_norm": 0.4868928828665441, "learning_rate": 8.016674697602408e-06, "loss": 0.4979, "step": 6765 }, { "epoch": 1.1111612916469937, "grad_norm": 0.35004835932239026, "learning_rate": 8.016382074462947e-06, "loss": 0.4935, "step": 6766 }, { "epoch": 1.1113255188553364, "grad_norm": 0.31130301478663125, "learning_rate": 8.016089413132118e-06, "loss": 0.5001, "step": 6767 }, { "epoch": 1.111489746063679, "grad_norm": 0.3159981965485992, "learning_rate": 8.0157967136131e-06, "loss": 0.4817, "step": 6768 }, { "epoch": 1.1116539732720219, "grad_norm": 0.3877824930328974, "learning_rate": 8.015503975909066e-06, "loss": 0.4923, "step": 6769 }, { "epoch": 1.1118182004803645, "grad_norm": 0.29734466757560113, "learning_rate": 8.015211200023204e-06, "loss": 0.4786, "step": 6770 }, { "epoch": 1.1119824276887074, "grad_norm": 0.5139772002706117, "learning_rate": 8.014918385958688e-06, "loss": 0.4958, "step": 6771 }, { "epoch": 1.11214665489705, "grad_norm": 0.3794058659356544, "learning_rate": 8.0146255337187e-06, "loss": 0.4877, "step": 6772 }, { "epoch": 1.1123108821053929, "grad_norm": 0.5224552555310031, "learning_rate": 8.014332643306422e-06, "loss": 0.4793, "step": 6773 }, { "epoch": 1.1124751093137355, "grad_norm": 0.33283451191101004, "learning_rate": 8.014039714725034e-06, "loss": 0.4912, "step": 6774 }, { "epoch": 1.1126393365220784, "grad_norm": 0.29502653757934966, "learning_rate": 8.013746747977716e-06, "loss": 0.4949, "step": 6775 }, { "epoch": 1.112803563730421, "grad_norm": 0.3077537731044598, "learning_rate": 8.013453743067653e-06, "loss": 0.4841, "step": 6776 }, { "epoch": 1.1129677909387639, "grad_norm": 0.3273063129273603, "learning_rate": 8.013160699998025e-06, "loss": 0.4913, "step": 6777 }, { "epoch": 1.1131320181471065, "grad_norm": 0.3087636468252292, "learning_rate": 8.012867618772014e-06, "loss": 0.4965, "step": 6778 }, { "epoch": 1.1132962453554494, "grad_norm": 0.39746359055685737, "learning_rate": 8.012574499392805e-06, "loss": 0.489, "step": 6779 }, { "epoch": 1.113460472563792, "grad_norm": 0.37044284746318834, "learning_rate": 8.012281341863583e-06, "loss": 0.5052, "step": 6780 }, { "epoch": 1.1136246997721349, "grad_norm": 0.3366605150154806, "learning_rate": 8.011988146187527e-06, "loss": 0.4752, "step": 6781 }, { "epoch": 1.1137889269804775, "grad_norm": 0.25809968388187504, "learning_rate": 8.011694912367826e-06, "loss": 0.4877, "step": 6782 }, { "epoch": 1.1139531541888203, "grad_norm": 0.326670691401543, "learning_rate": 8.011401640407663e-06, "loss": 0.4749, "step": 6783 }, { "epoch": 1.114117381397163, "grad_norm": 0.28171208952437016, "learning_rate": 8.011108330310224e-06, "loss": 0.4985, "step": 6784 }, { "epoch": 1.1142816086055056, "grad_norm": 0.301397371835579, "learning_rate": 8.010814982078693e-06, "loss": 0.4937, "step": 6785 }, { "epoch": 1.1144458358138485, "grad_norm": 0.31271833861298604, "learning_rate": 8.010521595716257e-06, "loss": 0.5043, "step": 6786 }, { "epoch": 1.1146100630221911, "grad_norm": 0.3061642359290164, "learning_rate": 8.010228171226104e-06, "loss": 0.4773, "step": 6787 }, { "epoch": 1.114774290230534, "grad_norm": 0.34226686963469144, "learning_rate": 8.009934708611418e-06, "loss": 0.4814, "step": 6788 }, { "epoch": 1.1149385174388766, "grad_norm": 0.2925460215378301, "learning_rate": 8.00964120787539e-06, "loss": 0.5067, "step": 6789 }, { "epoch": 1.1151027446472195, "grad_norm": 0.5386012244349162, "learning_rate": 8.009347669021203e-06, "loss": 0.5001, "step": 6790 }, { "epoch": 1.1152669718555621, "grad_norm": 0.3130942002159212, "learning_rate": 8.009054092052048e-06, "loss": 0.4763, "step": 6791 }, { "epoch": 1.115431199063905, "grad_norm": 0.31529862231909345, "learning_rate": 8.008760476971114e-06, "loss": 0.5011, "step": 6792 }, { "epoch": 1.1155954262722476, "grad_norm": 0.2964668287424008, "learning_rate": 8.00846682378159e-06, "loss": 0.482, "step": 6793 }, { "epoch": 1.1157596534805905, "grad_norm": 0.47499097388877043, "learning_rate": 8.008173132486663e-06, "loss": 0.4886, "step": 6794 }, { "epoch": 1.1159238806889331, "grad_norm": 0.2928459229882892, "learning_rate": 8.007879403089523e-06, "loss": 0.4676, "step": 6795 }, { "epoch": 1.116088107897276, "grad_norm": 0.3433109964688705, "learning_rate": 8.007585635593364e-06, "loss": 0.4909, "step": 6796 }, { "epoch": 1.1162523351056186, "grad_norm": 0.4728391511187168, "learning_rate": 8.007291830001372e-06, "loss": 0.4819, "step": 6797 }, { "epoch": 1.1164165623139615, "grad_norm": 0.4378303059005657, "learning_rate": 8.006997986316741e-06, "loss": 0.5186, "step": 6798 }, { "epoch": 1.116580789522304, "grad_norm": 0.31289617638300654, "learning_rate": 8.006704104542661e-06, "loss": 0.4973, "step": 6799 }, { "epoch": 1.116745016730647, "grad_norm": 0.34202747263702443, "learning_rate": 8.006410184682325e-06, "loss": 0.47, "step": 6800 }, { "epoch": 1.1169092439389896, "grad_norm": 0.2870020047280274, "learning_rate": 8.006116226738924e-06, "loss": 0.4959, "step": 6801 }, { "epoch": 1.1170734711473322, "grad_norm": 0.35538818945126666, "learning_rate": 8.00582223071565e-06, "loss": 0.4819, "step": 6802 }, { "epoch": 1.117237698355675, "grad_norm": 0.39746931662385276, "learning_rate": 8.005528196615698e-06, "loss": 0.4882, "step": 6803 }, { "epoch": 1.1174019255640177, "grad_norm": 0.32267732981456915, "learning_rate": 8.005234124442263e-06, "loss": 0.4929, "step": 6804 }, { "epoch": 1.1175661527723606, "grad_norm": 0.4033719198932124, "learning_rate": 8.004940014198535e-06, "loss": 0.5093, "step": 6805 }, { "epoch": 1.1177303799807032, "grad_norm": 0.29773637858266944, "learning_rate": 8.00464586588771e-06, "loss": 0.4692, "step": 6806 }, { "epoch": 1.117894607189046, "grad_norm": 0.34154499889118795, "learning_rate": 8.004351679512983e-06, "loss": 0.4915, "step": 6807 }, { "epoch": 1.1180588343973887, "grad_norm": 0.3385191940718238, "learning_rate": 8.004057455077549e-06, "loss": 0.4972, "step": 6808 }, { "epoch": 1.1182230616057316, "grad_norm": 0.31447094702417544, "learning_rate": 8.003763192584602e-06, "loss": 0.4847, "step": 6809 }, { "epoch": 1.1183872888140742, "grad_norm": 0.3199578937694982, "learning_rate": 8.003468892037342e-06, "loss": 0.4921, "step": 6810 }, { "epoch": 1.118551516022417, "grad_norm": 0.28372764438025105, "learning_rate": 8.003174553438961e-06, "loss": 0.4905, "step": 6811 }, { "epoch": 1.1187157432307597, "grad_norm": 0.2801623722482946, "learning_rate": 8.002880176792659e-06, "loss": 0.4684, "step": 6812 }, { "epoch": 1.1188799704391026, "grad_norm": 0.30831350090955995, "learning_rate": 8.002585762101632e-06, "loss": 0.4729, "step": 6813 }, { "epoch": 1.1190441976474452, "grad_norm": 0.3297361277092859, "learning_rate": 8.002291309369075e-06, "loss": 0.4947, "step": 6814 }, { "epoch": 1.119208424855788, "grad_norm": 0.28407434308522084, "learning_rate": 8.001996818598192e-06, "loss": 0.4891, "step": 6815 }, { "epoch": 1.1193726520641307, "grad_norm": 0.3316328143968119, "learning_rate": 8.001702289792178e-06, "loss": 0.4746, "step": 6816 }, { "epoch": 1.1195368792724736, "grad_norm": 0.30843174241476523, "learning_rate": 8.001407722954228e-06, "loss": 0.4867, "step": 6817 }, { "epoch": 1.1197011064808162, "grad_norm": 0.30407152262582604, "learning_rate": 8.00111311808755e-06, "loss": 0.4799, "step": 6818 }, { "epoch": 1.1198653336891589, "grad_norm": 0.30124797465704084, "learning_rate": 8.000818475195335e-06, "loss": 0.5165, "step": 6819 }, { "epoch": 1.1200295608975017, "grad_norm": 0.27888439867394527, "learning_rate": 8.00052379428079e-06, "loss": 0.4949, "step": 6820 }, { "epoch": 1.1201937881058444, "grad_norm": 0.2996848725896366, "learning_rate": 8.00022907534711e-06, "loss": 0.4691, "step": 6821 }, { "epoch": 1.1203580153141872, "grad_norm": 0.49092113883894145, "learning_rate": 7.999934318397499e-06, "loss": 0.4864, "step": 6822 }, { "epoch": 1.1205222425225299, "grad_norm": 0.32199904567550197, "learning_rate": 7.99963952343516e-06, "loss": 0.5065, "step": 6823 }, { "epoch": 1.1206864697308727, "grad_norm": 0.3454881500764911, "learning_rate": 7.99934469046329e-06, "loss": 0.5026, "step": 6824 }, { "epoch": 1.1208506969392154, "grad_norm": 0.34670731138934535, "learning_rate": 7.999049819485094e-06, "loss": 0.4836, "step": 6825 }, { "epoch": 1.1210149241475582, "grad_norm": 0.40482327295583864, "learning_rate": 7.998754910503777e-06, "loss": 0.4726, "step": 6826 }, { "epoch": 1.1211791513559008, "grad_norm": 0.2845574511224747, "learning_rate": 7.998459963522537e-06, "loss": 0.4893, "step": 6827 }, { "epoch": 1.1213433785642437, "grad_norm": 0.2887523915174508, "learning_rate": 7.998164978544581e-06, "loss": 0.4788, "step": 6828 }, { "epoch": 1.1215076057725863, "grad_norm": 0.28850627113746496, "learning_rate": 7.99786995557311e-06, "loss": 0.5, "step": 6829 }, { "epoch": 1.1216718329809292, "grad_norm": 0.42010910356929393, "learning_rate": 7.997574894611332e-06, "loss": 0.4881, "step": 6830 }, { "epoch": 1.1218360601892718, "grad_norm": 0.3032247075642661, "learning_rate": 7.997279795662447e-06, "loss": 0.4677, "step": 6831 }, { "epoch": 1.1220002873976145, "grad_norm": 0.3286475855194463, "learning_rate": 7.996984658729664e-06, "loss": 0.5162, "step": 6832 }, { "epoch": 1.1221645146059573, "grad_norm": 0.28999341860845684, "learning_rate": 7.996689483816187e-06, "loss": 0.4928, "step": 6833 }, { "epoch": 1.1223287418143002, "grad_norm": 0.42759062227189226, "learning_rate": 7.996394270925222e-06, "loss": 0.486, "step": 6834 }, { "epoch": 1.1224929690226428, "grad_norm": 0.3450545136947932, "learning_rate": 7.996099020059975e-06, "loss": 0.4977, "step": 6835 }, { "epoch": 1.1226571962309855, "grad_norm": 0.3517990654176804, "learning_rate": 7.995803731223652e-06, "loss": 0.4919, "step": 6836 }, { "epoch": 1.1228214234393283, "grad_norm": 0.30347220564861077, "learning_rate": 7.995508404419462e-06, "loss": 0.4698, "step": 6837 }, { "epoch": 1.122985650647671, "grad_norm": 0.4887298383039076, "learning_rate": 7.995213039650613e-06, "loss": 0.469, "step": 6838 }, { "epoch": 1.1231498778560138, "grad_norm": 0.3297509541396508, "learning_rate": 7.994917636920311e-06, "loss": 0.4922, "step": 6839 }, { "epoch": 1.1233141050643565, "grad_norm": 0.36093345558286805, "learning_rate": 7.994622196231764e-06, "loss": 0.5015, "step": 6840 }, { "epoch": 1.1234783322726993, "grad_norm": 0.2868360244813704, "learning_rate": 7.994326717588181e-06, "loss": 0.4784, "step": 6841 }, { "epoch": 1.123642559481042, "grad_norm": 0.3860834145580953, "learning_rate": 7.994031200992771e-06, "loss": 0.4697, "step": 6842 }, { "epoch": 1.1238067866893848, "grad_norm": 0.30524245277727996, "learning_rate": 7.993735646448747e-06, "loss": 0.4782, "step": 6843 }, { "epoch": 1.1239710138977275, "grad_norm": 0.33538156594853824, "learning_rate": 7.993440053959317e-06, "loss": 0.4734, "step": 6844 }, { "epoch": 1.1241352411060703, "grad_norm": 0.29676905678033594, "learning_rate": 7.993144423527688e-06, "loss": 0.4693, "step": 6845 }, { "epoch": 1.124299468314413, "grad_norm": 0.36949417708823395, "learning_rate": 7.992848755157078e-06, "loss": 0.4808, "step": 6846 }, { "epoch": 1.1244636955227558, "grad_norm": 0.28107760405872567, "learning_rate": 7.99255304885069e-06, "loss": 0.4771, "step": 6847 }, { "epoch": 1.1246279227310985, "grad_norm": 0.3159370048129486, "learning_rate": 7.992257304611742e-06, "loss": 0.5189, "step": 6848 }, { "epoch": 1.124792149939441, "grad_norm": 0.3667710699910642, "learning_rate": 7.991961522443443e-06, "loss": 0.4874, "step": 6849 }, { "epoch": 1.124956377147784, "grad_norm": 0.4336645803076832, "learning_rate": 7.991665702349006e-06, "loss": 0.4809, "step": 6850 }, { "epoch": 1.1251206043561268, "grad_norm": 0.3175651323732905, "learning_rate": 7.991369844331644e-06, "loss": 0.4693, "step": 6851 }, { "epoch": 1.1252848315644695, "grad_norm": 0.31051693646801093, "learning_rate": 7.991073948394571e-06, "loss": 0.4779, "step": 6852 }, { "epoch": 1.125449058772812, "grad_norm": 0.3380347830518644, "learning_rate": 7.990778014541e-06, "loss": 0.5012, "step": 6853 }, { "epoch": 1.125613285981155, "grad_norm": 0.3200429128632671, "learning_rate": 7.990482042774146e-06, "loss": 0.4813, "step": 6854 }, { "epoch": 1.1257775131894976, "grad_norm": 0.6531875458342193, "learning_rate": 7.990186033097221e-06, "loss": 0.4828, "step": 6855 }, { "epoch": 1.1259417403978405, "grad_norm": 0.29262763987154605, "learning_rate": 7.989889985513443e-06, "loss": 0.4919, "step": 6856 }, { "epoch": 1.126105967606183, "grad_norm": 0.34676785422500556, "learning_rate": 7.989593900026025e-06, "loss": 0.4872, "step": 6857 }, { "epoch": 1.126270194814526, "grad_norm": 0.3232893839101977, "learning_rate": 7.989297776638185e-06, "loss": 0.4767, "step": 6858 }, { "epoch": 1.1264344220228686, "grad_norm": 0.4125187177592716, "learning_rate": 7.98900161535314e-06, "loss": 0.4851, "step": 6859 }, { "epoch": 1.1265986492312114, "grad_norm": 0.4946867385562897, "learning_rate": 7.988705416174103e-06, "loss": 0.5002, "step": 6860 }, { "epoch": 1.126762876439554, "grad_norm": 0.3315793843980202, "learning_rate": 7.988409179104291e-06, "loss": 0.4835, "step": 6861 }, { "epoch": 1.126927103647897, "grad_norm": 0.306718722386014, "learning_rate": 7.988112904146926e-06, "loss": 0.4587, "step": 6862 }, { "epoch": 1.1270913308562396, "grad_norm": 0.31903874679389793, "learning_rate": 7.987816591305222e-06, "loss": 0.4702, "step": 6863 }, { "epoch": 1.1272555580645824, "grad_norm": 0.2940358413813076, "learning_rate": 7.987520240582398e-06, "loss": 0.4732, "step": 6864 }, { "epoch": 1.127419785272925, "grad_norm": 0.2965111002119381, "learning_rate": 7.987223851981673e-06, "loss": 0.4842, "step": 6865 }, { "epoch": 1.1275840124812677, "grad_norm": 0.29814281040642143, "learning_rate": 7.986927425506266e-06, "loss": 0.5065, "step": 6866 }, { "epoch": 1.1277482396896106, "grad_norm": 0.44997584875021623, "learning_rate": 7.986630961159396e-06, "loss": 0.4937, "step": 6867 }, { "epoch": 1.1279124668979534, "grad_norm": 0.3079144456977793, "learning_rate": 7.986334458944284e-06, "loss": 0.4688, "step": 6868 }, { "epoch": 1.128076694106296, "grad_norm": 0.28911938864641884, "learning_rate": 7.986037918864149e-06, "loss": 0.4743, "step": 6869 }, { "epoch": 1.1282409213146387, "grad_norm": 0.3061321697174208, "learning_rate": 7.985741340922214e-06, "loss": 0.4876, "step": 6870 }, { "epoch": 1.1284051485229816, "grad_norm": 0.31625410917552904, "learning_rate": 7.985444725121698e-06, "loss": 0.5166, "step": 6871 }, { "epoch": 1.1285693757313242, "grad_norm": 0.36309716087794164, "learning_rate": 7.985148071465822e-06, "loss": 0.4754, "step": 6872 }, { "epoch": 1.128733602939667, "grad_norm": 0.6592469202298972, "learning_rate": 7.984851379957809e-06, "loss": 0.4874, "step": 6873 }, { "epoch": 1.1288978301480097, "grad_norm": 0.31436127933999225, "learning_rate": 7.984554650600883e-06, "loss": 0.4789, "step": 6874 }, { "epoch": 1.1290620573563526, "grad_norm": 0.2779929040169858, "learning_rate": 7.984257883398264e-06, "loss": 0.4827, "step": 6875 }, { "epoch": 1.1292262845646952, "grad_norm": 0.2747578546096707, "learning_rate": 7.983961078353175e-06, "loss": 0.5012, "step": 6876 }, { "epoch": 1.129390511773038, "grad_norm": 0.31846168785030443, "learning_rate": 7.983664235468845e-06, "loss": 0.5102, "step": 6877 }, { "epoch": 1.1295547389813807, "grad_norm": 0.771861999176361, "learning_rate": 7.98336735474849e-06, "loss": 0.4946, "step": 6878 }, { "epoch": 1.1297189661897236, "grad_norm": 0.502259440692531, "learning_rate": 7.98307043619534e-06, "loss": 0.4755, "step": 6879 }, { "epoch": 1.1298831933980662, "grad_norm": 0.35067185395810174, "learning_rate": 7.982773479812616e-06, "loss": 0.4899, "step": 6880 }, { "epoch": 1.130047420606409, "grad_norm": 0.27701882831760466, "learning_rate": 7.98247648560355e-06, "loss": 0.5046, "step": 6881 }, { "epoch": 1.1302116478147517, "grad_norm": 0.3261854870605588, "learning_rate": 7.98217945357136e-06, "loss": 0.4823, "step": 6882 }, { "epoch": 1.1303758750230943, "grad_norm": 0.2753413581996859, "learning_rate": 7.981882383719276e-06, "loss": 0.4917, "step": 6883 }, { "epoch": 1.1305401022314372, "grad_norm": 0.2713442153753001, "learning_rate": 7.981585276050522e-06, "loss": 0.4697, "step": 6884 }, { "epoch": 1.13070432943978, "grad_norm": 0.3051670893385003, "learning_rate": 7.981288130568328e-06, "loss": 0.4753, "step": 6885 }, { "epoch": 1.1308685566481227, "grad_norm": 0.3512406823319829, "learning_rate": 7.980990947275918e-06, "loss": 0.4971, "step": 6886 }, { "epoch": 1.1310327838564653, "grad_norm": 0.48853751705585835, "learning_rate": 7.980693726176525e-06, "loss": 0.491, "step": 6887 }, { "epoch": 1.1311970110648082, "grad_norm": 0.42627311188328976, "learning_rate": 7.98039646727337e-06, "loss": 0.4857, "step": 6888 }, { "epoch": 1.1313612382731508, "grad_norm": 0.2639648626523676, "learning_rate": 7.980099170569687e-06, "loss": 0.4999, "step": 6889 }, { "epoch": 1.1315254654814937, "grad_norm": 0.339761116269173, "learning_rate": 7.979801836068703e-06, "loss": 0.5077, "step": 6890 }, { "epoch": 1.1316896926898363, "grad_norm": 0.3125885329528476, "learning_rate": 7.979504463773647e-06, "loss": 0.5013, "step": 6891 }, { "epoch": 1.1318539198981792, "grad_norm": 0.35306176008613577, "learning_rate": 7.979207053687749e-06, "loss": 0.4881, "step": 6892 }, { "epoch": 1.1320181471065218, "grad_norm": 0.29872445785240126, "learning_rate": 7.97890960581424e-06, "loss": 0.475, "step": 6893 }, { "epoch": 1.1321823743148647, "grad_norm": 0.44540460265682047, "learning_rate": 7.97861212015635e-06, "loss": 0.4885, "step": 6894 }, { "epoch": 1.1323466015232073, "grad_norm": 0.292544060291412, "learning_rate": 7.978314596717308e-06, "loss": 0.4914, "step": 6895 }, { "epoch": 1.1325108287315502, "grad_norm": 0.3707873447225147, "learning_rate": 7.97801703550035e-06, "loss": 0.4705, "step": 6896 }, { "epoch": 1.1326750559398928, "grad_norm": 0.31368514148862653, "learning_rate": 7.977719436508702e-06, "loss": 0.4815, "step": 6897 }, { "epoch": 1.1328392831482357, "grad_norm": 0.3268045977778402, "learning_rate": 7.977421799745602e-06, "loss": 0.4857, "step": 6898 }, { "epoch": 1.1330035103565783, "grad_norm": 0.328387273053832, "learning_rate": 7.977124125214278e-06, "loss": 0.4928, "step": 6899 }, { "epoch": 1.133167737564921, "grad_norm": 0.8777662985504897, "learning_rate": 7.976826412917966e-06, "loss": 0.4823, "step": 6900 }, { "epoch": 1.1333319647732638, "grad_norm": 0.30699456552514476, "learning_rate": 7.9765286628599e-06, "loss": 0.4806, "step": 6901 }, { "epoch": 1.1334961919816067, "grad_norm": 0.3127291424152957, "learning_rate": 7.976230875043309e-06, "loss": 0.4816, "step": 6902 }, { "epoch": 1.1336604191899493, "grad_norm": 0.3543717904664054, "learning_rate": 7.975933049471433e-06, "loss": 0.4662, "step": 6903 }, { "epoch": 1.133824646398292, "grad_norm": 0.3399489359198452, "learning_rate": 7.975635186147504e-06, "loss": 0.5052, "step": 6904 }, { "epoch": 1.1339888736066348, "grad_norm": 0.30131910741574397, "learning_rate": 7.975337285074755e-06, "loss": 0.5135, "step": 6905 }, { "epoch": 1.1341531008149774, "grad_norm": 0.31272070176723527, "learning_rate": 7.975039346256427e-06, "loss": 0.5042, "step": 6906 }, { "epoch": 1.1343173280233203, "grad_norm": 0.29675626380205483, "learning_rate": 7.974741369695752e-06, "loss": 0.4861, "step": 6907 }, { "epoch": 1.134481555231663, "grad_norm": 0.37560474384127424, "learning_rate": 7.974443355395965e-06, "loss": 0.4789, "step": 6908 }, { "epoch": 1.1346457824400058, "grad_norm": 0.28532039885356286, "learning_rate": 7.974145303360305e-06, "loss": 0.5013, "step": 6909 }, { "epoch": 1.1348100096483484, "grad_norm": 0.9029060408738501, "learning_rate": 7.97384721359201e-06, "loss": 0.4825, "step": 6910 }, { "epoch": 1.1349742368566913, "grad_norm": 0.3563699828853312, "learning_rate": 7.973549086094317e-06, "loss": 0.5041, "step": 6911 }, { "epoch": 1.135138464065034, "grad_norm": 0.28934448513751987, "learning_rate": 7.973250920870463e-06, "loss": 0.4934, "step": 6912 }, { "epoch": 1.1353026912733768, "grad_norm": 0.30062125993335753, "learning_rate": 7.972952717923686e-06, "loss": 0.464, "step": 6913 }, { "epoch": 1.1354669184817194, "grad_norm": 0.319334843105809, "learning_rate": 7.972654477257226e-06, "loss": 0.4851, "step": 6914 }, { "epoch": 1.1356311456900623, "grad_norm": 0.29289754121365025, "learning_rate": 7.972356198874322e-06, "loss": 0.4783, "step": 6915 }, { "epoch": 1.135795372898405, "grad_norm": 0.5508218005907385, "learning_rate": 7.972057882778214e-06, "loss": 0.4843, "step": 6916 }, { "epoch": 1.1359596001067476, "grad_norm": 0.3015173952359254, "learning_rate": 7.97175952897214e-06, "loss": 0.4723, "step": 6917 }, { "epoch": 1.1361238273150904, "grad_norm": 0.29478196668659074, "learning_rate": 7.971461137459344e-06, "loss": 0.4849, "step": 6918 }, { "epoch": 1.1362880545234333, "grad_norm": 0.29208606846186064, "learning_rate": 7.971162708243062e-06, "loss": 0.5051, "step": 6919 }, { "epoch": 1.136452281731776, "grad_norm": 0.3201941594667582, "learning_rate": 7.97086424132654e-06, "loss": 0.4686, "step": 6920 }, { "epoch": 1.1366165089401186, "grad_norm": 0.320605390702946, "learning_rate": 7.970565736713015e-06, "loss": 0.4832, "step": 6921 }, { "epoch": 1.1367807361484614, "grad_norm": 0.4047041761384959, "learning_rate": 7.970267194405732e-06, "loss": 0.4722, "step": 6922 }, { "epoch": 1.136944963356804, "grad_norm": 0.30560656069904657, "learning_rate": 7.969968614407934e-06, "loss": 0.5096, "step": 6923 }, { "epoch": 1.137109190565147, "grad_norm": 0.2658716031775788, "learning_rate": 7.969669996722862e-06, "loss": 0.4676, "step": 6924 }, { "epoch": 1.1372734177734896, "grad_norm": 0.3873747529118186, "learning_rate": 7.96937134135376e-06, "loss": 0.4907, "step": 6925 }, { "epoch": 1.1374376449818324, "grad_norm": 0.31960115771207515, "learning_rate": 7.969072648303874e-06, "loss": 0.4802, "step": 6926 }, { "epoch": 1.137601872190175, "grad_norm": 0.3590451481218717, "learning_rate": 7.968773917576445e-06, "loss": 0.4909, "step": 6927 }, { "epoch": 1.137766099398518, "grad_norm": 0.35887748907508493, "learning_rate": 7.968475149174718e-06, "loss": 0.4828, "step": 6928 }, { "epoch": 1.1379303266068606, "grad_norm": 0.343101086073062, "learning_rate": 7.96817634310194e-06, "loss": 0.4925, "step": 6929 }, { "epoch": 1.1380945538152034, "grad_norm": 0.30393988234287916, "learning_rate": 7.967877499361351e-06, "loss": 0.4939, "step": 6930 }, { "epoch": 1.138258781023546, "grad_norm": 0.5099165088420305, "learning_rate": 7.967578617956203e-06, "loss": 0.471, "step": 6931 }, { "epoch": 1.138423008231889, "grad_norm": 0.40570362767769275, "learning_rate": 7.96727969888974e-06, "loss": 0.4982, "step": 6932 }, { "epoch": 1.1385872354402315, "grad_norm": 0.2764636379439328, "learning_rate": 7.966980742165207e-06, "loss": 0.4871, "step": 6933 }, { "epoch": 1.1387514626485742, "grad_norm": 0.31228463120141703, "learning_rate": 7.966681747785852e-06, "loss": 0.4833, "step": 6934 }, { "epoch": 1.138915689856917, "grad_norm": 0.28009158703806936, "learning_rate": 7.966382715754922e-06, "loss": 0.5005, "step": 6935 }, { "epoch": 1.13907991706526, "grad_norm": 0.30035288065758636, "learning_rate": 7.966083646075666e-06, "loss": 0.4989, "step": 6936 }, { "epoch": 1.1392441442736025, "grad_norm": 0.32496233991867585, "learning_rate": 7.96578453875133e-06, "loss": 0.4964, "step": 6937 }, { "epoch": 1.1394083714819452, "grad_norm": 0.27863619283479796, "learning_rate": 7.965485393785167e-06, "loss": 0.4853, "step": 6938 }, { "epoch": 1.139572598690288, "grad_norm": 0.3023354281991656, "learning_rate": 7.965186211180421e-06, "loss": 0.477, "step": 6939 }, { "epoch": 1.1397368258986307, "grad_norm": 0.3838169698056991, "learning_rate": 7.964886990940344e-06, "loss": 0.4988, "step": 6940 }, { "epoch": 1.1399010531069735, "grad_norm": 0.3102097812889304, "learning_rate": 7.964587733068186e-06, "loss": 0.4733, "step": 6941 }, { "epoch": 1.1400652803153162, "grad_norm": 0.38695000979047367, "learning_rate": 7.964288437567195e-06, "loss": 0.4877, "step": 6942 }, { "epoch": 1.140229507523659, "grad_norm": 0.30817199886482777, "learning_rate": 7.963989104440625e-06, "loss": 0.4835, "step": 6943 }, { "epoch": 1.1403937347320017, "grad_norm": 0.30619596819987116, "learning_rate": 7.963689733691724e-06, "loss": 0.4842, "step": 6944 }, { "epoch": 1.1405579619403445, "grad_norm": 0.2729947507683516, "learning_rate": 7.963390325323744e-06, "loss": 0.4712, "step": 6945 }, { "epoch": 1.1407221891486872, "grad_norm": 0.2844335128707053, "learning_rate": 7.963090879339939e-06, "loss": 0.5104, "step": 6946 }, { "epoch": 1.14088641635703, "grad_norm": 0.35143865483515885, "learning_rate": 7.962791395743559e-06, "loss": 0.5016, "step": 6947 }, { "epoch": 1.1410506435653727, "grad_norm": 0.29935758721236755, "learning_rate": 7.962491874537856e-06, "loss": 0.4927, "step": 6948 }, { "epoch": 1.1412148707737155, "grad_norm": 0.38742203083071736, "learning_rate": 7.962192315726086e-06, "loss": 0.4872, "step": 6949 }, { "epoch": 1.1413790979820582, "grad_norm": 0.26125003171023947, "learning_rate": 7.961892719311504e-06, "loss": 0.4861, "step": 6950 }, { "epoch": 1.1415433251904008, "grad_norm": 0.2788446059911499, "learning_rate": 7.961593085297357e-06, "loss": 0.4806, "step": 6951 }, { "epoch": 1.1417075523987437, "grad_norm": 0.37282944348130787, "learning_rate": 7.961293413686906e-06, "loss": 0.514, "step": 6952 }, { "epoch": 1.1418717796070865, "grad_norm": 0.3248997890565748, "learning_rate": 7.960993704483402e-06, "loss": 0.4991, "step": 6953 }, { "epoch": 1.1420360068154292, "grad_norm": 0.36815448389317, "learning_rate": 7.960693957690101e-06, "loss": 0.4934, "step": 6954 }, { "epoch": 1.1422002340237718, "grad_norm": 0.43626717191432984, "learning_rate": 7.960394173310259e-06, "loss": 0.4954, "step": 6955 }, { "epoch": 1.1423644612321147, "grad_norm": 0.34564209422226533, "learning_rate": 7.96009435134713e-06, "loss": 0.5072, "step": 6956 }, { "epoch": 1.1425286884404573, "grad_norm": 0.3229667721598629, "learning_rate": 7.959794491803975e-06, "loss": 0.484, "step": 6957 }, { "epoch": 1.1426929156488002, "grad_norm": 0.5355285268543933, "learning_rate": 7.959494594684047e-06, "loss": 0.5168, "step": 6958 }, { "epoch": 1.1428571428571428, "grad_norm": 0.3110418435011149, "learning_rate": 7.959194659990602e-06, "loss": 0.4927, "step": 6959 }, { "epoch": 1.1430213700654857, "grad_norm": 0.28513444305183067, "learning_rate": 7.958894687726902e-06, "loss": 0.4817, "step": 6960 }, { "epoch": 1.1431855972738283, "grad_norm": 0.27728566842644153, "learning_rate": 7.958594677896201e-06, "loss": 0.4884, "step": 6961 }, { "epoch": 1.1433498244821712, "grad_norm": 0.3150109157076501, "learning_rate": 7.958294630501761e-06, "loss": 0.5025, "step": 6962 }, { "epoch": 1.1435140516905138, "grad_norm": 0.37821826587535073, "learning_rate": 7.957994545546838e-06, "loss": 0.4677, "step": 6963 }, { "epoch": 1.1436782788988566, "grad_norm": 0.272360968366249, "learning_rate": 7.95769442303469e-06, "loss": 0.4825, "step": 6964 }, { "epoch": 1.1438425061071993, "grad_norm": 0.29025116102764614, "learning_rate": 7.957394262968581e-06, "loss": 0.4693, "step": 6965 }, { "epoch": 1.1440067333155421, "grad_norm": 0.3988120980456517, "learning_rate": 7.957094065351767e-06, "loss": 0.4878, "step": 6966 }, { "epoch": 1.1441709605238848, "grad_norm": 0.32215514818082297, "learning_rate": 7.956793830187512e-06, "loss": 0.5049, "step": 6967 }, { "epoch": 1.1443351877322274, "grad_norm": 0.3141652271654795, "learning_rate": 7.956493557479074e-06, "loss": 0.4791, "step": 6968 }, { "epoch": 1.1444994149405703, "grad_norm": 0.29312116929724086, "learning_rate": 7.956193247229714e-06, "loss": 0.4669, "step": 6969 }, { "epoch": 1.1446636421489131, "grad_norm": 0.3198331201930757, "learning_rate": 7.955892899442697e-06, "loss": 0.4892, "step": 6970 }, { "epoch": 1.1448278693572558, "grad_norm": 0.8979760840640667, "learning_rate": 7.955592514121281e-06, "loss": 0.4767, "step": 6971 }, { "epoch": 1.1449920965655984, "grad_norm": 0.2990306712439826, "learning_rate": 7.955292091268733e-06, "loss": 0.5102, "step": 6972 }, { "epoch": 1.1451563237739413, "grad_norm": 0.3053581095580193, "learning_rate": 7.954991630888311e-06, "loss": 0.4723, "step": 6973 }, { "epoch": 1.145320550982284, "grad_norm": 0.3150822655293769, "learning_rate": 7.954691132983282e-06, "loss": 0.4704, "step": 6974 }, { "epoch": 1.1454847781906268, "grad_norm": 0.2882659024696099, "learning_rate": 7.954390597556908e-06, "loss": 0.4725, "step": 6975 }, { "epoch": 1.1456490053989694, "grad_norm": 0.2797251128466923, "learning_rate": 7.954090024612453e-06, "loss": 0.4743, "step": 6976 }, { "epoch": 1.1458132326073123, "grad_norm": 0.3642369480396053, "learning_rate": 7.953789414153183e-06, "loss": 0.5029, "step": 6977 }, { "epoch": 1.145977459815655, "grad_norm": 0.29543854541561576, "learning_rate": 7.953488766182361e-06, "loss": 0.4801, "step": 6978 }, { "epoch": 1.1461416870239978, "grad_norm": 0.3695440138181109, "learning_rate": 7.953188080703254e-06, "loss": 0.4773, "step": 6979 }, { "epoch": 1.1463059142323404, "grad_norm": 0.35804494112721486, "learning_rate": 7.952887357719125e-06, "loss": 0.4892, "step": 6980 }, { "epoch": 1.1464701414406833, "grad_norm": 0.32574825202652413, "learning_rate": 7.952586597233245e-06, "loss": 0.5225, "step": 6981 }, { "epoch": 1.146634368649026, "grad_norm": 0.2549369661307935, "learning_rate": 7.952285799248875e-06, "loss": 0.4872, "step": 6982 }, { "epoch": 1.1467985958573688, "grad_norm": 0.2836474018881919, "learning_rate": 7.951984963769287e-06, "loss": 0.4807, "step": 6983 }, { "epoch": 1.1469628230657114, "grad_norm": 0.3010588829949219, "learning_rate": 7.951684090797744e-06, "loss": 0.4766, "step": 6984 }, { "epoch": 1.147127050274054, "grad_norm": 0.2774477777434259, "learning_rate": 7.951383180337516e-06, "loss": 0.4914, "step": 6985 }, { "epoch": 1.147291277482397, "grad_norm": 0.43537709750406217, "learning_rate": 7.951082232391873e-06, "loss": 0.4822, "step": 6986 }, { "epoch": 1.1474555046907398, "grad_norm": 0.29585741210346284, "learning_rate": 7.950781246964079e-06, "loss": 0.47, "step": 6987 }, { "epoch": 1.1476197318990824, "grad_norm": 0.3355195438180755, "learning_rate": 7.950480224057406e-06, "loss": 0.4849, "step": 6988 }, { "epoch": 1.147783959107425, "grad_norm": 0.32705446324079807, "learning_rate": 7.950179163675124e-06, "loss": 0.4889, "step": 6989 }, { "epoch": 1.147948186315768, "grad_norm": 0.2670710136847381, "learning_rate": 7.9498780658205e-06, "loss": 0.4794, "step": 6990 }, { "epoch": 1.1481124135241105, "grad_norm": 0.30118225566962875, "learning_rate": 7.949576930496808e-06, "loss": 0.4883, "step": 6991 }, { "epoch": 1.1482766407324534, "grad_norm": 0.36070352710146236, "learning_rate": 7.949275757707316e-06, "loss": 0.477, "step": 6992 }, { "epoch": 1.148440867940796, "grad_norm": 0.3061406159550247, "learning_rate": 7.948974547455297e-06, "loss": 0.4931, "step": 6993 }, { "epoch": 1.1486050951491389, "grad_norm": 0.3545081273612038, "learning_rate": 7.94867329974402e-06, "loss": 0.4847, "step": 6994 }, { "epoch": 1.1487693223574815, "grad_norm": 0.3311782995765307, "learning_rate": 7.948372014576756e-06, "loss": 0.4959, "step": 6995 }, { "epoch": 1.1489335495658244, "grad_norm": 0.3077919240460935, "learning_rate": 7.94807069195678e-06, "loss": 0.4757, "step": 6996 }, { "epoch": 1.149097776774167, "grad_norm": 0.2870260528487584, "learning_rate": 7.947769331887365e-06, "loss": 0.4615, "step": 6997 }, { "epoch": 1.1492620039825099, "grad_norm": 0.29569179787697997, "learning_rate": 7.947467934371783e-06, "loss": 0.4936, "step": 6998 }, { "epoch": 1.1494262311908525, "grad_norm": 0.35220813871941586, "learning_rate": 7.947166499413307e-06, "loss": 0.4593, "step": 6999 }, { "epoch": 1.1495904583991954, "grad_norm": 0.32263584184992183, "learning_rate": 7.946865027015212e-06, "loss": 0.4904, "step": 7000 }, { "epoch": 1.149754685607538, "grad_norm": 0.5166828266471525, "learning_rate": 7.94656351718077e-06, "loss": 0.4904, "step": 7001 }, { "epoch": 1.1499189128158807, "grad_norm": 0.2712023974328414, "learning_rate": 7.946261969913257e-06, "loss": 0.4818, "step": 7002 }, { "epoch": 1.1500831400242235, "grad_norm": 0.26915717210756196, "learning_rate": 7.94596038521595e-06, "loss": 0.4819, "step": 7003 }, { "epoch": 1.1502473672325664, "grad_norm": 0.30042354024540735, "learning_rate": 7.945658763092124e-06, "loss": 0.4732, "step": 7004 }, { "epoch": 1.150411594440909, "grad_norm": 0.32655935806435304, "learning_rate": 7.94535710354505e-06, "loss": 0.4978, "step": 7005 }, { "epoch": 1.1505758216492517, "grad_norm": 0.3199955481342031, "learning_rate": 7.94505540657801e-06, "loss": 0.4744, "step": 7006 }, { "epoch": 1.1507400488575945, "grad_norm": 0.35223648349278697, "learning_rate": 7.94475367219428e-06, "loss": 0.4725, "step": 7007 }, { "epoch": 1.1509042760659371, "grad_norm": 0.34834147750497035, "learning_rate": 7.944451900397133e-06, "loss": 0.4773, "step": 7008 }, { "epoch": 1.15106850327428, "grad_norm": 0.3568296672581254, "learning_rate": 7.944150091189854e-06, "loss": 0.495, "step": 7009 }, { "epoch": 1.1512327304826226, "grad_norm": 0.3000946461945242, "learning_rate": 7.943848244575712e-06, "loss": 0.4742, "step": 7010 }, { "epoch": 1.1513969576909655, "grad_norm": 0.2721934419714511, "learning_rate": 7.943546360557992e-06, "loss": 0.496, "step": 7011 }, { "epoch": 1.1515611848993081, "grad_norm": 0.31830130435810267, "learning_rate": 7.94324443913997e-06, "loss": 0.508, "step": 7012 }, { "epoch": 1.151725412107651, "grad_norm": 0.3229175169530436, "learning_rate": 7.942942480324925e-06, "loss": 0.4816, "step": 7013 }, { "epoch": 1.1518896393159936, "grad_norm": 0.4111817485404657, "learning_rate": 7.942640484116138e-06, "loss": 0.4684, "step": 7014 }, { "epoch": 1.1520538665243365, "grad_norm": 0.2656930494883279, "learning_rate": 7.942338450516888e-06, "loss": 0.458, "step": 7015 }, { "epoch": 1.1522180937326791, "grad_norm": 0.31851059556159345, "learning_rate": 7.942036379530456e-06, "loss": 0.4754, "step": 7016 }, { "epoch": 1.152382320941022, "grad_norm": 0.27602728318038483, "learning_rate": 7.941734271160122e-06, "loss": 0.4817, "step": 7017 }, { "epoch": 1.1525465481493646, "grad_norm": 0.29468735688655845, "learning_rate": 7.941432125409168e-06, "loss": 0.4742, "step": 7018 }, { "epoch": 1.1527107753577073, "grad_norm": 0.5489676416342217, "learning_rate": 7.941129942280876e-06, "loss": 0.4855, "step": 7019 }, { "epoch": 1.1528750025660501, "grad_norm": 0.3221574182624256, "learning_rate": 7.940827721778525e-06, "loss": 0.4695, "step": 7020 }, { "epoch": 1.153039229774393, "grad_norm": 0.314407501924177, "learning_rate": 7.940525463905401e-06, "loss": 0.4909, "step": 7021 }, { "epoch": 1.1532034569827356, "grad_norm": 0.3598478943499774, "learning_rate": 7.940223168664785e-06, "loss": 0.5016, "step": 7022 }, { "epoch": 1.1533676841910783, "grad_norm": 0.39422712873691973, "learning_rate": 7.93992083605996e-06, "loss": 0.4817, "step": 7023 }, { "epoch": 1.1535319113994211, "grad_norm": 0.36165863495626405, "learning_rate": 7.939618466094213e-06, "loss": 0.4803, "step": 7024 }, { "epoch": 1.1536961386077638, "grad_norm": 0.31260829963726494, "learning_rate": 7.939316058770823e-06, "loss": 0.486, "step": 7025 }, { "epoch": 1.1538603658161066, "grad_norm": 0.34725257054626907, "learning_rate": 7.939013614093078e-06, "loss": 0.4775, "step": 7026 }, { "epoch": 1.1540245930244493, "grad_norm": 0.2669254990323276, "learning_rate": 7.93871113206426e-06, "loss": 0.4889, "step": 7027 }, { "epoch": 1.1541888202327921, "grad_norm": 0.4093792540774185, "learning_rate": 7.938408612687657e-06, "loss": 0.491, "step": 7028 }, { "epoch": 1.1543530474411348, "grad_norm": 0.3290675100373528, "learning_rate": 7.938106055966554e-06, "loss": 0.4954, "step": 7029 }, { "epoch": 1.1545172746494776, "grad_norm": 0.3225318096562668, "learning_rate": 7.937803461904236e-06, "loss": 0.4846, "step": 7030 }, { "epoch": 1.1546815018578203, "grad_norm": 0.3516853816464303, "learning_rate": 7.93750083050399e-06, "loss": 0.4874, "step": 7031 }, { "epoch": 1.1548457290661631, "grad_norm": 0.32631152573393535, "learning_rate": 7.937198161769102e-06, "loss": 0.4735, "step": 7032 }, { "epoch": 1.1550099562745058, "grad_norm": 0.31907307198476026, "learning_rate": 7.936895455702861e-06, "loss": 0.49, "step": 7033 }, { "epoch": 1.1551741834828486, "grad_norm": 0.27136595054747875, "learning_rate": 7.936592712308557e-06, "loss": 0.4598, "step": 7034 }, { "epoch": 1.1553384106911913, "grad_norm": 0.3763844729573229, "learning_rate": 7.93628993158947e-06, "loss": 0.4689, "step": 7035 }, { "epoch": 1.155502637899534, "grad_norm": 0.3195562295488287, "learning_rate": 7.935987113548896e-06, "loss": 0.4878, "step": 7036 }, { "epoch": 1.1556668651078768, "grad_norm": 0.2761377095445116, "learning_rate": 7.93568425819012e-06, "loss": 0.4706, "step": 7037 }, { "epoch": 1.1558310923162196, "grad_norm": 0.5413992090888955, "learning_rate": 7.935381365516435e-06, "loss": 0.5024, "step": 7038 }, { "epoch": 1.1559953195245622, "grad_norm": 0.5469978575561415, "learning_rate": 7.935078435531127e-06, "loss": 0.4571, "step": 7039 }, { "epoch": 1.1561595467329049, "grad_norm": 0.2823534067099165, "learning_rate": 7.934775468237486e-06, "loss": 0.4698, "step": 7040 }, { "epoch": 1.1563237739412477, "grad_norm": 0.44633643276642965, "learning_rate": 7.934472463638807e-06, "loss": 0.4956, "step": 7041 }, { "epoch": 1.1564880011495904, "grad_norm": 0.376102625780165, "learning_rate": 7.934169421738377e-06, "loss": 0.4824, "step": 7042 }, { "epoch": 1.1566522283579332, "grad_norm": 0.27588776388549247, "learning_rate": 7.933866342539488e-06, "loss": 0.5206, "step": 7043 }, { "epoch": 1.1568164555662759, "grad_norm": 0.2878482112541905, "learning_rate": 7.933563226045431e-06, "loss": 0.4852, "step": 7044 }, { "epoch": 1.1569806827746187, "grad_norm": 0.2697563816295159, "learning_rate": 7.933260072259501e-06, "loss": 0.499, "step": 7045 }, { "epoch": 1.1571449099829614, "grad_norm": 0.28236168172776016, "learning_rate": 7.932956881184988e-06, "loss": 0.5005, "step": 7046 }, { "epoch": 1.1573091371913042, "grad_norm": 0.3338941095186326, "learning_rate": 7.932653652825185e-06, "loss": 0.4796, "step": 7047 }, { "epoch": 1.1574733643996469, "grad_norm": 0.3625356119489219, "learning_rate": 7.932350387183387e-06, "loss": 0.5081, "step": 7048 }, { "epoch": 1.1576375916079897, "grad_norm": 0.27136370495062984, "learning_rate": 7.932047084262887e-06, "loss": 0.4879, "step": 7049 }, { "epoch": 1.1578018188163324, "grad_norm": 0.3366519673880185, "learning_rate": 7.931743744066978e-06, "loss": 0.491, "step": 7050 }, { "epoch": 1.1579660460246752, "grad_norm": 0.30178495916921333, "learning_rate": 7.931440366598956e-06, "loss": 0.4831, "step": 7051 }, { "epoch": 1.1581302732330179, "grad_norm": 0.32500677131083183, "learning_rate": 7.931136951862117e-06, "loss": 0.4977, "step": 7052 }, { "epoch": 1.1582945004413605, "grad_norm": 0.2751069112378041, "learning_rate": 7.930833499859752e-06, "loss": 0.4689, "step": 7053 }, { "epoch": 1.1584587276497034, "grad_norm": 0.3078889240750525, "learning_rate": 7.930530010595161e-06, "loss": 0.4922, "step": 7054 }, { "epoch": 1.1586229548580462, "grad_norm": 0.2956108438153107, "learning_rate": 7.93022648407164e-06, "loss": 0.4897, "step": 7055 }, { "epoch": 1.1587871820663889, "grad_norm": 0.2927169524393216, "learning_rate": 7.929922920292483e-06, "loss": 0.4975, "step": 7056 }, { "epoch": 1.1589514092747315, "grad_norm": 0.33526725775792277, "learning_rate": 7.929619319260988e-06, "loss": 0.4927, "step": 7057 }, { "epoch": 1.1591156364830744, "grad_norm": 0.4759390949788766, "learning_rate": 7.929315680980456e-06, "loss": 0.4908, "step": 7058 }, { "epoch": 1.159279863691417, "grad_norm": 0.32076151526244373, "learning_rate": 7.929012005454178e-06, "loss": 0.4795, "step": 7059 }, { "epoch": 1.1594440908997599, "grad_norm": 0.31178290972430867, "learning_rate": 7.928708292685458e-06, "loss": 0.5054, "step": 7060 }, { "epoch": 1.1596083181081025, "grad_norm": 0.3028328443170019, "learning_rate": 7.928404542677592e-06, "loss": 0.4939, "step": 7061 }, { "epoch": 1.1597725453164454, "grad_norm": 0.8078948086337088, "learning_rate": 7.92810075543388e-06, "loss": 0.4848, "step": 7062 }, { "epoch": 1.159936772524788, "grad_norm": 0.2892938136222857, "learning_rate": 7.927796930957622e-06, "loss": 0.4998, "step": 7063 }, { "epoch": 1.1601009997331309, "grad_norm": 0.3019318848201967, "learning_rate": 7.927493069252115e-06, "loss": 0.4862, "step": 7064 }, { "epoch": 1.1602652269414735, "grad_norm": 0.2908551244366857, "learning_rate": 7.927189170320663e-06, "loss": 0.4978, "step": 7065 }, { "epoch": 1.1604294541498164, "grad_norm": 0.29280416653917135, "learning_rate": 7.926885234166562e-06, "loss": 0.4755, "step": 7066 }, { "epoch": 1.160593681358159, "grad_norm": 0.269957864981838, "learning_rate": 7.926581260793119e-06, "loss": 0.4701, "step": 7067 }, { "epoch": 1.1607579085665019, "grad_norm": 0.30097708194571493, "learning_rate": 7.926277250203629e-06, "loss": 0.4644, "step": 7068 }, { "epoch": 1.1609221357748445, "grad_norm": 0.32757186478730316, "learning_rate": 7.925973202401399e-06, "loss": 0.4749, "step": 7069 }, { "epoch": 1.1610863629831871, "grad_norm": 0.40123882700762675, "learning_rate": 7.925669117389728e-06, "loss": 0.4788, "step": 7070 }, { "epoch": 1.16125059019153, "grad_norm": 0.3603074875810292, "learning_rate": 7.92536499517192e-06, "loss": 0.4812, "step": 7071 }, { "epoch": 1.1614148173998728, "grad_norm": 0.3698279608261931, "learning_rate": 7.92506083575128e-06, "loss": 0.4665, "step": 7072 }, { "epoch": 1.1615790446082155, "grad_norm": 0.4518716080031763, "learning_rate": 7.924756639131108e-06, "loss": 0.4997, "step": 7073 }, { "epoch": 1.1617432718165581, "grad_norm": 0.33189232317990797, "learning_rate": 7.92445240531471e-06, "loss": 0.4534, "step": 7074 }, { "epoch": 1.161907499024901, "grad_norm": 0.3044263355830535, "learning_rate": 7.924148134305389e-06, "loss": 0.5074, "step": 7075 }, { "epoch": 1.1620717262332436, "grad_norm": 0.34185010495544116, "learning_rate": 7.92384382610645e-06, "loss": 0.4768, "step": 7076 }, { "epoch": 1.1622359534415865, "grad_norm": 0.3453996765799976, "learning_rate": 7.9235394807212e-06, "loss": 0.4863, "step": 7077 }, { "epoch": 1.1624001806499291, "grad_norm": 0.3128815977202802, "learning_rate": 7.923235098152943e-06, "loss": 0.5233, "step": 7078 }, { "epoch": 1.162564407858272, "grad_norm": 0.5247087085994546, "learning_rate": 7.922930678404983e-06, "loss": 0.4742, "step": 7079 }, { "epoch": 1.1627286350666146, "grad_norm": 0.39150724559453093, "learning_rate": 7.922626221480629e-06, "loss": 0.4781, "step": 7080 }, { "epoch": 1.1628928622749575, "grad_norm": 0.33804533874168374, "learning_rate": 7.922321727383187e-06, "loss": 0.4862, "step": 7081 }, { "epoch": 1.1630570894833, "grad_norm": 0.43511358014238194, "learning_rate": 7.922017196115964e-06, "loss": 0.4706, "step": 7082 }, { "epoch": 1.163221316691643, "grad_norm": 0.32505034890033285, "learning_rate": 7.921712627682266e-06, "loss": 0.483, "step": 7083 }, { "epoch": 1.1633855438999856, "grad_norm": 0.29173852633659864, "learning_rate": 7.921408022085404e-06, "loss": 0.4763, "step": 7084 }, { "epoch": 1.1635497711083285, "grad_norm": 0.32413350522873113, "learning_rate": 7.921103379328685e-06, "loss": 0.5107, "step": 7085 }, { "epoch": 1.163713998316671, "grad_norm": 0.29827489864002776, "learning_rate": 7.920798699415416e-06, "loss": 0.4904, "step": 7086 }, { "epoch": 1.1638782255250137, "grad_norm": 0.32881461136004475, "learning_rate": 7.92049398234891e-06, "loss": 0.4878, "step": 7087 }, { "epoch": 1.1640424527333566, "grad_norm": 0.26966455996545646, "learning_rate": 7.92018922813247e-06, "loss": 0.4843, "step": 7088 }, { "epoch": 1.1642066799416995, "grad_norm": 0.3524190577600752, "learning_rate": 7.919884436769413e-06, "loss": 0.4807, "step": 7089 }, { "epoch": 1.164370907150042, "grad_norm": 0.2873202615000851, "learning_rate": 7.919579608263045e-06, "loss": 0.4889, "step": 7090 }, { "epoch": 1.1645351343583847, "grad_norm": 0.32072446570760915, "learning_rate": 7.91927474261668e-06, "loss": 0.4881, "step": 7091 }, { "epoch": 1.1646993615667276, "grad_norm": 0.35024441319253874, "learning_rate": 7.918969839833625e-06, "loss": 0.4794, "step": 7092 }, { "epoch": 1.1648635887750702, "grad_norm": 0.3299835820205014, "learning_rate": 7.918664899917194e-06, "loss": 0.4884, "step": 7093 }, { "epoch": 1.165027815983413, "grad_norm": 0.29717281189054834, "learning_rate": 7.9183599228707e-06, "loss": 0.4886, "step": 7094 }, { "epoch": 1.1651920431917557, "grad_norm": 0.3284713919170598, "learning_rate": 7.918054908697453e-06, "loss": 0.4818, "step": 7095 }, { "epoch": 1.1653562704000986, "grad_norm": 0.2925217287720715, "learning_rate": 7.917749857400766e-06, "loss": 0.4869, "step": 7096 }, { "epoch": 1.1655204976084412, "grad_norm": 0.2765372710402451, "learning_rate": 7.917444768983954e-06, "loss": 0.4661, "step": 7097 }, { "epoch": 1.165684724816784, "grad_norm": 0.3101792926600381, "learning_rate": 7.917139643450331e-06, "loss": 0.4848, "step": 7098 }, { "epoch": 1.1658489520251267, "grad_norm": 0.2848413701567469, "learning_rate": 7.916834480803207e-06, "loss": 0.4973, "step": 7099 }, { "epoch": 1.1660131792334696, "grad_norm": 0.28047689897386036, "learning_rate": 7.9165292810459e-06, "loss": 0.4548, "step": 7100 }, { "epoch": 1.1661774064418122, "grad_norm": 0.35063738100165215, "learning_rate": 7.916224044181723e-06, "loss": 0.4869, "step": 7101 }, { "epoch": 1.166341633650155, "grad_norm": 0.2823049704902083, "learning_rate": 7.915918770213992e-06, "loss": 0.4783, "step": 7102 }, { "epoch": 1.1665058608584977, "grad_norm": 0.29208609576357075, "learning_rate": 7.915613459146022e-06, "loss": 0.4933, "step": 7103 }, { "epoch": 1.1666700880668404, "grad_norm": 0.2908603084533477, "learning_rate": 7.915308110981129e-06, "loss": 0.4762, "step": 7104 }, { "epoch": 1.1668343152751832, "grad_norm": 0.3142549539312152, "learning_rate": 7.915002725722632e-06, "loss": 0.467, "step": 7105 }, { "epoch": 1.166998542483526, "grad_norm": 0.297802378353576, "learning_rate": 7.914697303373843e-06, "loss": 0.4712, "step": 7106 }, { "epoch": 1.1671627696918687, "grad_norm": 0.30827567036324066, "learning_rate": 7.914391843938082e-06, "loss": 0.4782, "step": 7107 }, { "epoch": 1.1673269969002114, "grad_norm": 0.27376585280710897, "learning_rate": 7.914086347418667e-06, "loss": 0.4667, "step": 7108 }, { "epoch": 1.1674912241085542, "grad_norm": 0.2912467864119177, "learning_rate": 7.913780813818914e-06, "loss": 0.4891, "step": 7109 }, { "epoch": 1.1676554513168969, "grad_norm": 0.3755970122826635, "learning_rate": 7.913475243142145e-06, "loss": 0.4698, "step": 7110 }, { "epoch": 1.1678196785252397, "grad_norm": 0.3126792391825246, "learning_rate": 7.913169635391675e-06, "loss": 0.4962, "step": 7111 }, { "epoch": 1.1679839057335824, "grad_norm": 0.33128634368155074, "learning_rate": 7.912863990570826e-06, "loss": 0.4906, "step": 7112 }, { "epoch": 1.1681481329419252, "grad_norm": 0.276069135824882, "learning_rate": 7.912558308682914e-06, "loss": 0.5039, "step": 7113 }, { "epoch": 1.1683123601502678, "grad_norm": 0.30298873479148086, "learning_rate": 7.912252589731262e-06, "loss": 0.4697, "step": 7114 }, { "epoch": 1.1684765873586107, "grad_norm": 0.34034916001374615, "learning_rate": 7.91194683371919e-06, "loss": 0.4949, "step": 7115 }, { "epoch": 1.1686408145669533, "grad_norm": 0.30127604093427846, "learning_rate": 7.911641040650019e-06, "loss": 0.4656, "step": 7116 }, { "epoch": 1.1688050417752962, "grad_norm": 0.30448083371606166, "learning_rate": 7.911335210527068e-06, "loss": 0.504, "step": 7117 }, { "epoch": 1.1689692689836388, "grad_norm": 0.3490091808051166, "learning_rate": 7.911029343353664e-06, "loss": 0.4747, "step": 7118 }, { "epoch": 1.1691334961919817, "grad_norm": 0.28283313823611345, "learning_rate": 7.910723439133123e-06, "loss": 0.4985, "step": 7119 }, { "epoch": 1.1692977234003243, "grad_norm": 1.1831427012153144, "learning_rate": 7.910417497868768e-06, "loss": 0.5003, "step": 7120 }, { "epoch": 1.169461950608667, "grad_norm": 0.37970954472659757, "learning_rate": 7.910111519563926e-06, "loss": 0.4743, "step": 7121 }, { "epoch": 1.1696261778170098, "grad_norm": 0.3060323198171699, "learning_rate": 7.909805504221917e-06, "loss": 0.4727, "step": 7122 }, { "epoch": 1.1697904050253527, "grad_norm": 0.3090375579006394, "learning_rate": 7.909499451846065e-06, "loss": 0.4854, "step": 7123 }, { "epoch": 1.1699546322336953, "grad_norm": 0.35422086742882625, "learning_rate": 7.909193362439696e-06, "loss": 0.481, "step": 7124 }, { "epoch": 1.170118859442038, "grad_norm": 0.2833349932119131, "learning_rate": 7.908887236006131e-06, "loss": 0.4609, "step": 7125 }, { "epoch": 1.1702830866503808, "grad_norm": 0.3438027843268845, "learning_rate": 7.908581072548698e-06, "loss": 0.4978, "step": 7126 }, { "epoch": 1.1704473138587235, "grad_norm": 0.3084865886495486, "learning_rate": 7.908274872070718e-06, "loss": 0.4879, "step": 7127 }, { "epoch": 1.1706115410670663, "grad_norm": 0.2968696229849295, "learning_rate": 7.907968634575524e-06, "loss": 0.4669, "step": 7128 }, { "epoch": 1.170775768275409, "grad_norm": 0.30218235608708677, "learning_rate": 7.907662360066435e-06, "loss": 0.4836, "step": 7129 }, { "epoch": 1.1709399954837518, "grad_norm": 0.29267877493515143, "learning_rate": 7.907356048546781e-06, "loss": 0.4828, "step": 7130 }, { "epoch": 1.1711042226920945, "grad_norm": 0.30254596069123213, "learning_rate": 7.907049700019888e-06, "loss": 0.4937, "step": 7131 }, { "epoch": 1.1712684499004373, "grad_norm": 0.3029131007987612, "learning_rate": 7.906743314489084e-06, "loss": 0.4709, "step": 7132 }, { "epoch": 1.17143267710878, "grad_norm": 0.38195767468163316, "learning_rate": 7.906436891957694e-06, "loss": 0.4835, "step": 7133 }, { "epoch": 1.1715969043171228, "grad_norm": 0.32624775519999805, "learning_rate": 7.906130432429048e-06, "loss": 0.5137, "step": 7134 }, { "epoch": 1.1717611315254655, "grad_norm": 0.2754028052145745, "learning_rate": 7.905823935906474e-06, "loss": 0.4959, "step": 7135 }, { "epoch": 1.1719253587338083, "grad_norm": 0.4444728587958453, "learning_rate": 7.905517402393304e-06, "loss": 0.514, "step": 7136 }, { "epoch": 1.172089585942151, "grad_norm": 0.2726245516199059, "learning_rate": 7.905210831892863e-06, "loss": 0.4567, "step": 7137 }, { "epoch": 1.1722538131504936, "grad_norm": 0.28681242761135123, "learning_rate": 7.904904224408481e-06, "loss": 0.4986, "step": 7138 }, { "epoch": 1.1724180403588365, "grad_norm": 0.2756319660402748, "learning_rate": 7.904597579943488e-06, "loss": 0.4842, "step": 7139 }, { "epoch": 1.1725822675671793, "grad_norm": 0.33331624186474573, "learning_rate": 7.904290898501218e-06, "loss": 0.4678, "step": 7140 }, { "epoch": 1.172746494775522, "grad_norm": 0.3325946939274747, "learning_rate": 7.903984180084999e-06, "loss": 0.4677, "step": 7141 }, { "epoch": 1.1729107219838646, "grad_norm": 0.2699322093677336, "learning_rate": 7.903677424698163e-06, "loss": 0.473, "step": 7142 }, { "epoch": 1.1730749491922075, "grad_norm": 0.28956151873217795, "learning_rate": 7.903370632344042e-06, "loss": 0.4828, "step": 7143 }, { "epoch": 1.17323917640055, "grad_norm": 0.5323259173407964, "learning_rate": 7.903063803025965e-06, "loss": 0.4677, "step": 7144 }, { "epoch": 1.173403403608893, "grad_norm": 0.38857627895908703, "learning_rate": 7.902756936747268e-06, "loss": 0.4758, "step": 7145 }, { "epoch": 1.1735676308172356, "grad_norm": 0.3227428663071013, "learning_rate": 7.902450033511284e-06, "loss": 0.4831, "step": 7146 }, { "epoch": 1.1737318580255784, "grad_norm": 0.3086496978455175, "learning_rate": 7.902143093321344e-06, "loss": 0.4857, "step": 7147 }, { "epoch": 1.173896085233921, "grad_norm": 0.32069342974778725, "learning_rate": 7.901836116180784e-06, "loss": 0.4907, "step": 7148 }, { "epoch": 1.174060312442264, "grad_norm": 0.4404613405259411, "learning_rate": 7.901529102092935e-06, "loss": 0.4866, "step": 7149 }, { "epoch": 1.1742245396506066, "grad_norm": 0.28651128563078515, "learning_rate": 7.901222051061133e-06, "loss": 0.4649, "step": 7150 }, { "epoch": 1.1743887668589494, "grad_norm": 2.9411089773533248, "learning_rate": 7.900914963088717e-06, "loss": 0.4908, "step": 7151 }, { "epoch": 1.174552994067292, "grad_norm": 0.3004882084769234, "learning_rate": 7.900607838179015e-06, "loss": 0.4819, "step": 7152 }, { "epoch": 1.174717221275635, "grad_norm": 0.2929426082232998, "learning_rate": 7.900300676335366e-06, "loss": 0.4785, "step": 7153 }, { "epoch": 1.1748814484839776, "grad_norm": 0.3017982722752946, "learning_rate": 7.899993477561107e-06, "loss": 0.4783, "step": 7154 }, { "epoch": 1.1750456756923202, "grad_norm": 0.37263447468744393, "learning_rate": 7.899686241859574e-06, "loss": 0.4787, "step": 7155 }, { "epoch": 1.175209902900663, "grad_norm": 0.2718529502001876, "learning_rate": 7.899378969234103e-06, "loss": 0.4677, "step": 7156 }, { "epoch": 1.175374130109006, "grad_norm": 0.32113420311098073, "learning_rate": 7.899071659688032e-06, "loss": 0.4871, "step": 7157 }, { "epoch": 1.1755383573173486, "grad_norm": 0.30453037794151244, "learning_rate": 7.8987643132247e-06, "loss": 0.4866, "step": 7158 }, { "epoch": 1.1757025845256912, "grad_norm": 0.37467717111887067, "learning_rate": 7.898456929847442e-06, "loss": 0.4978, "step": 7159 }, { "epoch": 1.175866811734034, "grad_norm": 0.3034542609392963, "learning_rate": 7.898149509559599e-06, "loss": 0.4899, "step": 7160 }, { "epoch": 1.1760310389423767, "grad_norm": 0.40653052696984354, "learning_rate": 7.897842052364508e-06, "loss": 0.4876, "step": 7161 }, { "epoch": 1.1761952661507196, "grad_norm": 0.4270247739034037, "learning_rate": 7.89753455826551e-06, "loss": 0.4674, "step": 7162 }, { "epoch": 1.1763594933590622, "grad_norm": 0.3277953459600014, "learning_rate": 7.897227027265943e-06, "loss": 0.4772, "step": 7163 }, { "epoch": 1.176523720567405, "grad_norm": 0.4031180802456469, "learning_rate": 7.89691945936915e-06, "loss": 0.5218, "step": 7164 }, { "epoch": 1.1766879477757477, "grad_norm": 0.3228656700450739, "learning_rate": 7.89661185457847e-06, "loss": 0.4886, "step": 7165 }, { "epoch": 1.1768521749840906, "grad_norm": 0.30127464136306215, "learning_rate": 7.89630421289724e-06, "loss": 0.4974, "step": 7166 }, { "epoch": 1.1770164021924332, "grad_norm": 0.3407442389191083, "learning_rate": 7.895996534328806e-06, "loss": 0.4672, "step": 7167 }, { "epoch": 1.177180629400776, "grad_norm": 0.3611630703926426, "learning_rate": 7.895688818876508e-06, "loss": 0.5189, "step": 7168 }, { "epoch": 1.1773448566091187, "grad_norm": 0.3194743450347194, "learning_rate": 7.895381066543691e-06, "loss": 0.5102, "step": 7169 }, { "epoch": 1.1775090838174616, "grad_norm": 0.33776411509241444, "learning_rate": 7.895073277333694e-06, "loss": 0.4825, "step": 7170 }, { "epoch": 1.1776733110258042, "grad_norm": 0.33471683930689616, "learning_rate": 7.89476545124986e-06, "loss": 0.4675, "step": 7171 }, { "epoch": 1.1778375382341468, "grad_norm": 0.3248330468687593, "learning_rate": 7.894457588295533e-06, "loss": 0.4876, "step": 7172 }, { "epoch": 1.1780017654424897, "grad_norm": 0.3094418395243431, "learning_rate": 7.894149688474058e-06, "loss": 0.4779, "step": 7173 }, { "epoch": 1.1781659926508325, "grad_norm": 0.4121252840483226, "learning_rate": 7.893841751788777e-06, "loss": 0.5082, "step": 7174 }, { "epoch": 1.1783302198591752, "grad_norm": 0.306690976540399, "learning_rate": 7.893533778243037e-06, "loss": 0.4679, "step": 7175 }, { "epoch": 1.1784944470675178, "grad_norm": 0.3098485120429855, "learning_rate": 7.89322576784018e-06, "loss": 0.4656, "step": 7176 }, { "epoch": 1.1786586742758607, "grad_norm": 0.3139158217304466, "learning_rate": 7.892917720583553e-06, "loss": 0.4817, "step": 7177 }, { "epoch": 1.1788229014842033, "grad_norm": 0.27150948840561384, "learning_rate": 7.892609636476502e-06, "loss": 0.4734, "step": 7178 }, { "epoch": 1.1789871286925462, "grad_norm": 0.3792047308148616, "learning_rate": 7.892301515522371e-06, "loss": 0.4892, "step": 7179 }, { "epoch": 1.1791513559008888, "grad_norm": 0.2695161583725051, "learning_rate": 7.89199335772451e-06, "loss": 0.4469, "step": 7180 }, { "epoch": 1.1793155831092317, "grad_norm": 0.29931825739146534, "learning_rate": 7.891685163086262e-06, "loss": 0.4822, "step": 7181 }, { "epoch": 1.1794798103175743, "grad_norm": 0.2696831618893344, "learning_rate": 7.891376931610977e-06, "loss": 0.4963, "step": 7182 }, { "epoch": 1.1796440375259172, "grad_norm": 0.3602896096956024, "learning_rate": 7.891068663302003e-06, "loss": 0.4941, "step": 7183 }, { "epoch": 1.1798082647342598, "grad_norm": 0.3380357807441332, "learning_rate": 7.890760358162686e-06, "loss": 0.4828, "step": 7184 }, { "epoch": 1.1799724919426027, "grad_norm": 0.3158752432413669, "learning_rate": 7.890452016196373e-06, "loss": 0.4987, "step": 7185 }, { "epoch": 1.1801367191509453, "grad_norm": 0.54489675202446, "learning_rate": 7.89014363740642e-06, "loss": 0.4751, "step": 7186 }, { "epoch": 1.1803009463592882, "grad_norm": 0.2946086684484438, "learning_rate": 7.889835221796168e-06, "loss": 0.4927, "step": 7187 }, { "epoch": 1.1804651735676308, "grad_norm": 0.29319451335645197, "learning_rate": 7.889526769368971e-06, "loss": 0.4791, "step": 7188 }, { "epoch": 1.1806294007759734, "grad_norm": 0.3168463859588959, "learning_rate": 7.88921828012818e-06, "loss": 0.4695, "step": 7189 }, { "epoch": 1.1807936279843163, "grad_norm": 0.3594514406581799, "learning_rate": 7.888909754077142e-06, "loss": 0.4798, "step": 7190 }, { "epoch": 1.1809578551926592, "grad_norm": 0.2990964083755579, "learning_rate": 7.888601191219211e-06, "loss": 0.4916, "step": 7191 }, { "epoch": 1.1811220824010018, "grad_norm": 0.5104588170168158, "learning_rate": 7.888292591557738e-06, "loss": 0.4768, "step": 7192 }, { "epoch": 1.1812863096093444, "grad_norm": 0.31064065418444264, "learning_rate": 7.887983955096072e-06, "loss": 0.4828, "step": 7193 }, { "epoch": 1.1814505368176873, "grad_norm": 0.31641511809024947, "learning_rate": 7.887675281837568e-06, "loss": 0.4864, "step": 7194 }, { "epoch": 1.18161476402603, "grad_norm": 0.3876844228637263, "learning_rate": 7.887366571785577e-06, "loss": 0.4788, "step": 7195 }, { "epoch": 1.1817789912343728, "grad_norm": 0.28781277770735475, "learning_rate": 7.887057824943451e-06, "loss": 0.4813, "step": 7196 }, { "epoch": 1.1819432184427154, "grad_norm": 0.2780428348147511, "learning_rate": 7.886749041314546e-06, "loss": 0.4975, "step": 7197 }, { "epoch": 1.1821074456510583, "grad_norm": 0.3092106232254678, "learning_rate": 7.886440220902214e-06, "loss": 0.4926, "step": 7198 }, { "epoch": 1.182271672859401, "grad_norm": 0.2840760200057259, "learning_rate": 7.88613136370981e-06, "loss": 0.4898, "step": 7199 }, { "epoch": 1.1824359000677438, "grad_norm": 0.27574696423953743, "learning_rate": 7.885822469740688e-06, "loss": 0.4891, "step": 7200 }, { "epoch": 1.1826001272760864, "grad_norm": 0.35965149805749896, "learning_rate": 7.885513538998203e-06, "loss": 0.4588, "step": 7201 }, { "epoch": 1.1827643544844293, "grad_norm": 0.33303040766200065, "learning_rate": 7.885204571485709e-06, "loss": 0.4753, "step": 7202 }, { "epoch": 1.182928581692772, "grad_norm": 0.2593780964338368, "learning_rate": 7.884895567206563e-06, "loss": 0.4835, "step": 7203 }, { "epoch": 1.1830928089011148, "grad_norm": 0.35610378246540597, "learning_rate": 7.884586526164121e-06, "loss": 0.4746, "step": 7204 }, { "epoch": 1.1832570361094574, "grad_norm": 0.28443270166004814, "learning_rate": 7.88427744836174e-06, "loss": 0.4791, "step": 7205 }, { "epoch": 1.1834212633178, "grad_norm": 0.2831758991670496, "learning_rate": 7.883968333802774e-06, "loss": 0.4684, "step": 7206 }, { "epoch": 1.183585490526143, "grad_norm": 0.35471014312117216, "learning_rate": 7.883659182490585e-06, "loss": 0.4668, "step": 7207 }, { "epoch": 1.1837497177344858, "grad_norm": 0.29537181388674116, "learning_rate": 7.883349994428527e-06, "loss": 0.4802, "step": 7208 }, { "epoch": 1.1839139449428284, "grad_norm": 0.3368787078344288, "learning_rate": 7.88304076961996e-06, "loss": 0.4802, "step": 7209 }, { "epoch": 1.184078172151171, "grad_norm": 0.36423298240411317, "learning_rate": 7.88273150806824e-06, "loss": 0.4846, "step": 7210 }, { "epoch": 1.184242399359514, "grad_norm": 0.45191874292943746, "learning_rate": 7.88242220977673e-06, "loss": 0.4971, "step": 7211 }, { "epoch": 1.1844066265678566, "grad_norm": 0.31730056357201625, "learning_rate": 7.882112874748787e-06, "loss": 0.5007, "step": 7212 }, { "epoch": 1.1845708537761994, "grad_norm": 0.276740020652526, "learning_rate": 7.881803502987769e-06, "loss": 0.4603, "step": 7213 }, { "epoch": 1.184735080984542, "grad_norm": 0.3226551874841209, "learning_rate": 7.881494094497038e-06, "loss": 0.4973, "step": 7214 }, { "epoch": 1.184899308192885, "grad_norm": 0.2698997195963249, "learning_rate": 7.881184649279956e-06, "loss": 0.4571, "step": 7215 }, { "epoch": 1.1850635354012276, "grad_norm": 0.3116107257091327, "learning_rate": 7.88087516733988e-06, "loss": 0.4939, "step": 7216 }, { "epoch": 1.1852277626095704, "grad_norm": 0.26601114390957037, "learning_rate": 7.880565648680174e-06, "loss": 0.4781, "step": 7217 }, { "epoch": 1.185391989817913, "grad_norm": 0.2960317652813218, "learning_rate": 7.880256093304199e-06, "loss": 0.498, "step": 7218 }, { "epoch": 1.185556217026256, "grad_norm": 0.2897931619508775, "learning_rate": 7.879946501215317e-06, "loss": 0.4928, "step": 7219 }, { "epoch": 1.1857204442345985, "grad_norm": 0.3444469864217827, "learning_rate": 7.87963687241689e-06, "loss": 0.4992, "step": 7220 }, { "epoch": 1.1858846714429414, "grad_norm": 0.32963841574885944, "learning_rate": 7.879327206912283e-06, "loss": 0.4832, "step": 7221 }, { "epoch": 1.186048898651284, "grad_norm": 0.27874786931557877, "learning_rate": 7.879017504704856e-06, "loss": 0.4807, "step": 7222 }, { "epoch": 1.1862131258596267, "grad_norm": 0.2954496205585582, "learning_rate": 7.878707765797975e-06, "loss": 0.4638, "step": 7223 }, { "epoch": 1.1863773530679695, "grad_norm": 0.2997197947778376, "learning_rate": 7.878397990195004e-06, "loss": 0.4811, "step": 7224 }, { "epoch": 1.1865415802763124, "grad_norm": 0.3608949284326675, "learning_rate": 7.878088177899307e-06, "loss": 0.4879, "step": 7225 }, { "epoch": 1.186705807484655, "grad_norm": 0.31512186735330405, "learning_rate": 7.877778328914248e-06, "loss": 0.4778, "step": 7226 }, { "epoch": 1.1868700346929977, "grad_norm": 0.5780317877360018, "learning_rate": 7.877468443243195e-06, "loss": 0.4951, "step": 7227 }, { "epoch": 1.1870342619013405, "grad_norm": 0.30742943688087454, "learning_rate": 7.877158520889509e-06, "loss": 0.4756, "step": 7228 }, { "epoch": 1.1871984891096832, "grad_norm": 0.30278431149830287, "learning_rate": 7.87684856185656e-06, "loss": 0.4714, "step": 7229 }, { "epoch": 1.187362716318026, "grad_norm": 0.3118677206225516, "learning_rate": 7.876538566147713e-06, "loss": 0.4795, "step": 7230 }, { "epoch": 1.1875269435263687, "grad_norm": 0.3327275128495535, "learning_rate": 7.876228533766335e-06, "loss": 0.485, "step": 7231 }, { "epoch": 1.1876911707347115, "grad_norm": 0.280947840200177, "learning_rate": 7.875918464715795e-06, "loss": 0.4733, "step": 7232 }, { "epoch": 1.1878553979430542, "grad_norm": 0.27631423496822494, "learning_rate": 7.875608358999456e-06, "loss": 0.4769, "step": 7233 }, { "epoch": 1.188019625151397, "grad_norm": 0.3424215714956797, "learning_rate": 7.875298216620692e-06, "loss": 0.4745, "step": 7234 }, { "epoch": 1.1881838523597397, "grad_norm": 0.3392612659281871, "learning_rate": 7.874988037582868e-06, "loss": 0.4821, "step": 7235 }, { "epoch": 1.1883480795680825, "grad_norm": 0.30768246487911133, "learning_rate": 7.874677821889352e-06, "loss": 0.492, "step": 7236 }, { "epoch": 1.1885123067764252, "grad_norm": 0.4768362934562472, "learning_rate": 7.874367569543516e-06, "loss": 0.4709, "step": 7237 }, { "epoch": 1.188676533984768, "grad_norm": 0.39163590605246273, "learning_rate": 7.874057280548727e-06, "loss": 0.4987, "step": 7238 }, { "epoch": 1.1888407611931107, "grad_norm": 0.48949358688016253, "learning_rate": 7.873746954908358e-06, "loss": 0.473, "step": 7239 }, { "epoch": 1.1890049884014533, "grad_norm": 0.34669067573595047, "learning_rate": 7.873436592625775e-06, "loss": 0.4871, "step": 7240 }, { "epoch": 1.1891692156097962, "grad_norm": 0.2881379597778867, "learning_rate": 7.873126193704353e-06, "loss": 0.4835, "step": 7241 }, { "epoch": 1.189333442818139, "grad_norm": 0.28928702283893437, "learning_rate": 7.872815758147463e-06, "loss": 0.5041, "step": 7242 }, { "epoch": 1.1894976700264817, "grad_norm": 0.2779046323729139, "learning_rate": 7.872505285958475e-06, "loss": 0.4861, "step": 7243 }, { "epoch": 1.1896618972348243, "grad_norm": 0.32171186966240967, "learning_rate": 7.872194777140761e-06, "loss": 0.4708, "step": 7244 }, { "epoch": 1.1898261244431672, "grad_norm": 0.33224800587019176, "learning_rate": 7.871884231697693e-06, "loss": 0.4752, "step": 7245 }, { "epoch": 1.1899903516515098, "grad_norm": 0.39098922706360334, "learning_rate": 7.871573649632646e-06, "loss": 0.4722, "step": 7246 }, { "epoch": 1.1901545788598527, "grad_norm": 0.260711638664843, "learning_rate": 7.871263030948992e-06, "loss": 0.4923, "step": 7247 }, { "epoch": 1.1903188060681953, "grad_norm": 0.3056696630001395, "learning_rate": 7.870952375650105e-06, "loss": 0.4753, "step": 7248 }, { "epoch": 1.1904830332765381, "grad_norm": 0.33626309268971577, "learning_rate": 7.870641683739358e-06, "loss": 0.4845, "step": 7249 }, { "epoch": 1.1906472604848808, "grad_norm": 0.40524575271035757, "learning_rate": 7.870330955220124e-06, "loss": 0.476, "step": 7250 }, { "epoch": 1.1908114876932236, "grad_norm": 0.30297205462394605, "learning_rate": 7.870020190095783e-06, "loss": 0.4827, "step": 7251 }, { "epoch": 1.1909757149015663, "grad_norm": 0.2851543936766989, "learning_rate": 7.869709388369705e-06, "loss": 0.4808, "step": 7252 }, { "epoch": 1.1911399421099091, "grad_norm": 0.27115976850848544, "learning_rate": 7.869398550045268e-06, "loss": 0.4858, "step": 7253 }, { "epoch": 1.1913041693182518, "grad_norm": 0.30476809755351303, "learning_rate": 7.86908767512585e-06, "loss": 0.4876, "step": 7254 }, { "epoch": 1.1914683965265946, "grad_norm": 0.3758066090495282, "learning_rate": 7.868776763614824e-06, "loss": 0.4615, "step": 7255 }, { "epoch": 1.1916326237349373, "grad_norm": 0.2936697469025561, "learning_rate": 7.868465815515568e-06, "loss": 0.4939, "step": 7256 }, { "epoch": 1.19179685094328, "grad_norm": 0.2704607304163568, "learning_rate": 7.868154830831458e-06, "loss": 0.4794, "step": 7257 }, { "epoch": 1.1919610781516228, "grad_norm": 0.28176731888861567, "learning_rate": 7.867843809565873e-06, "loss": 0.4723, "step": 7258 }, { "epoch": 1.1921253053599656, "grad_norm": 0.2760536908218363, "learning_rate": 7.86753275172219e-06, "loss": 0.4917, "step": 7259 }, { "epoch": 1.1922895325683083, "grad_norm": 0.25631381946862203, "learning_rate": 7.86722165730379e-06, "loss": 0.4652, "step": 7260 }, { "epoch": 1.192453759776651, "grad_norm": 0.27318642320256536, "learning_rate": 7.866910526314049e-06, "loss": 0.4479, "step": 7261 }, { "epoch": 1.1926179869849938, "grad_norm": 0.28956412253272856, "learning_rate": 7.866599358756347e-06, "loss": 0.4779, "step": 7262 }, { "epoch": 1.1927822141933364, "grad_norm": 0.30001643869690897, "learning_rate": 7.866288154634064e-06, "loss": 0.4873, "step": 7263 }, { "epoch": 1.1929464414016793, "grad_norm": 0.2924444418420769, "learning_rate": 7.86597691395058e-06, "loss": 0.4865, "step": 7264 }, { "epoch": 1.193110668610022, "grad_norm": 0.25822309694150847, "learning_rate": 7.865665636709275e-06, "loss": 0.4937, "step": 7265 }, { "epoch": 1.1932748958183648, "grad_norm": 0.2582323654371515, "learning_rate": 7.865354322913529e-06, "loss": 0.4789, "step": 7266 }, { "epoch": 1.1934391230267074, "grad_norm": 0.2560281240282641, "learning_rate": 7.865042972566723e-06, "loss": 0.4958, "step": 7267 }, { "epoch": 1.1936033502350503, "grad_norm": 0.283105334712507, "learning_rate": 7.86473158567224e-06, "loss": 0.4857, "step": 7268 }, { "epoch": 1.193767577443393, "grad_norm": 0.3074559301853208, "learning_rate": 7.864420162233464e-06, "loss": 0.4822, "step": 7269 }, { "epoch": 1.1939318046517358, "grad_norm": 0.272089654585132, "learning_rate": 7.864108702253773e-06, "loss": 0.4826, "step": 7270 }, { "epoch": 1.1940960318600784, "grad_norm": 0.34574826869825065, "learning_rate": 7.863797205736552e-06, "loss": 0.4898, "step": 7271 }, { "epoch": 1.1942602590684213, "grad_norm": 0.3400107585169252, "learning_rate": 7.863485672685183e-06, "loss": 0.4679, "step": 7272 }, { "epoch": 1.194424486276764, "grad_norm": 0.2794512133883541, "learning_rate": 7.863174103103053e-06, "loss": 0.4935, "step": 7273 }, { "epoch": 1.1945887134851065, "grad_norm": 0.26107892204001254, "learning_rate": 7.862862496993541e-06, "loss": 0.4756, "step": 7274 }, { "epoch": 1.1947529406934494, "grad_norm": 0.3010930525172073, "learning_rate": 7.862550854360033e-06, "loss": 0.4778, "step": 7275 }, { "epoch": 1.1949171679017923, "grad_norm": 0.31706413409182155, "learning_rate": 7.862239175205915e-06, "loss": 0.4893, "step": 7276 }, { "epoch": 1.195081395110135, "grad_norm": 0.6518163187078756, "learning_rate": 7.861927459534572e-06, "loss": 0.4701, "step": 7277 }, { "epoch": 1.1952456223184775, "grad_norm": 0.2829791035958546, "learning_rate": 7.86161570734939e-06, "loss": 0.4871, "step": 7278 }, { "epoch": 1.1954098495268204, "grad_norm": 0.2739474278242147, "learning_rate": 7.861303918653752e-06, "loss": 0.4755, "step": 7279 }, { "epoch": 1.195574076735163, "grad_norm": 0.3436202620943284, "learning_rate": 7.860992093451049e-06, "loss": 0.4733, "step": 7280 }, { "epoch": 1.1957383039435059, "grad_norm": 0.30182634928669677, "learning_rate": 7.860680231744663e-06, "loss": 0.5101, "step": 7281 }, { "epoch": 1.1959025311518485, "grad_norm": 0.2987955559095261, "learning_rate": 7.860368333537984e-06, "loss": 0.5102, "step": 7282 }, { "epoch": 1.1960667583601914, "grad_norm": 0.2696173051533773, "learning_rate": 7.860056398834399e-06, "loss": 0.4914, "step": 7283 }, { "epoch": 1.196230985568534, "grad_norm": 0.3588548931620607, "learning_rate": 7.859744427637295e-06, "loss": 0.4714, "step": 7284 }, { "epoch": 1.1963952127768769, "grad_norm": 0.2960177348685412, "learning_rate": 7.85943241995006e-06, "loss": 0.487, "step": 7285 }, { "epoch": 1.1965594399852195, "grad_norm": 0.3321192088445762, "learning_rate": 7.859120375776086e-06, "loss": 0.4574, "step": 7286 }, { "epoch": 1.1967236671935624, "grad_norm": 0.3047274250456088, "learning_rate": 7.85880829511876e-06, "loss": 0.4767, "step": 7287 }, { "epoch": 1.196887894401905, "grad_norm": 0.2940277359499893, "learning_rate": 7.85849617798147e-06, "loss": 0.4727, "step": 7288 }, { "epoch": 1.1970521216102479, "grad_norm": 0.31805435075245675, "learning_rate": 7.858184024367606e-06, "loss": 0.5016, "step": 7289 }, { "epoch": 1.1972163488185905, "grad_norm": 0.2893430047178316, "learning_rate": 7.857871834280562e-06, "loss": 0.473, "step": 7290 }, { "epoch": 1.1973805760269332, "grad_norm": 0.3214483934935793, "learning_rate": 7.857559607723724e-06, "loss": 0.4891, "step": 7291 }, { "epoch": 1.197544803235276, "grad_norm": 0.2779040042258734, "learning_rate": 7.857247344700485e-06, "loss": 0.4857, "step": 7292 }, { "epoch": 1.1977090304436186, "grad_norm": 0.29268673694745784, "learning_rate": 7.85693504521424e-06, "loss": 0.5038, "step": 7293 }, { "epoch": 1.1978732576519615, "grad_norm": 0.2583632166117344, "learning_rate": 7.856622709268375e-06, "loss": 0.4702, "step": 7294 }, { "epoch": 1.1980374848603041, "grad_norm": 0.3039569672751797, "learning_rate": 7.856310336866284e-06, "loss": 0.4916, "step": 7295 }, { "epoch": 1.198201712068647, "grad_norm": 0.37623591673858825, "learning_rate": 7.855997928011363e-06, "loss": 0.4649, "step": 7296 }, { "epoch": 1.1983659392769896, "grad_norm": 0.32792556540288026, "learning_rate": 7.855685482707001e-06, "loss": 0.5132, "step": 7297 }, { "epoch": 1.1985301664853325, "grad_norm": 0.33159113984877747, "learning_rate": 7.855373000956595e-06, "loss": 0.4862, "step": 7298 }, { "epoch": 1.1986943936936751, "grad_norm": 0.46686228289895976, "learning_rate": 7.855060482763534e-06, "loss": 0.5074, "step": 7299 }, { "epoch": 1.198858620902018, "grad_norm": 0.29765525293464823, "learning_rate": 7.854747928131219e-06, "loss": 0.4779, "step": 7300 }, { "epoch": 1.1990228481103606, "grad_norm": 0.31934509278801926, "learning_rate": 7.854435337063037e-06, "loss": 0.4852, "step": 7301 }, { "epoch": 1.1991870753187035, "grad_norm": 0.424679981759697, "learning_rate": 7.85412270956239e-06, "loss": 0.4786, "step": 7302 }, { "epoch": 1.1993513025270461, "grad_norm": 0.28126109328181276, "learning_rate": 7.853810045632668e-06, "loss": 0.4889, "step": 7303 }, { "epoch": 1.199515529735389, "grad_norm": 0.27650129426773495, "learning_rate": 7.853497345277272e-06, "loss": 0.4622, "step": 7304 }, { "epoch": 1.1996797569437316, "grad_norm": 0.3398188905283428, "learning_rate": 7.853184608499593e-06, "loss": 0.4689, "step": 7305 }, { "epoch": 1.1998439841520745, "grad_norm": 0.298516041385809, "learning_rate": 7.852871835303031e-06, "loss": 0.4656, "step": 7306 }, { "epoch": 1.2000082113604171, "grad_norm": 0.24499209339877884, "learning_rate": 7.852559025690981e-06, "loss": 0.4924, "step": 7307 }, { "epoch": 1.2001724385687598, "grad_norm": 0.27863042979543257, "learning_rate": 7.852246179666844e-06, "loss": 0.4795, "step": 7308 }, { "epoch": 1.2003366657771026, "grad_norm": 0.2756647160445179, "learning_rate": 7.851933297234012e-06, "loss": 0.4754, "step": 7309 }, { "epoch": 1.2005008929854453, "grad_norm": 0.2831834434790002, "learning_rate": 7.85162037839589e-06, "loss": 0.4737, "step": 7310 }, { "epoch": 1.2006651201937881, "grad_norm": 0.29162631607674616, "learning_rate": 7.851307423155871e-06, "loss": 0.4983, "step": 7311 }, { "epoch": 1.2008293474021308, "grad_norm": 0.27672574832406416, "learning_rate": 7.850994431517356e-06, "loss": 0.4777, "step": 7312 }, { "epoch": 1.2009935746104736, "grad_norm": 0.27869712581342154, "learning_rate": 7.850681403483745e-06, "loss": 0.506, "step": 7313 }, { "epoch": 1.2011578018188163, "grad_norm": 0.2923731692650365, "learning_rate": 7.850368339058438e-06, "loss": 0.503, "step": 7314 }, { "epoch": 1.2013220290271591, "grad_norm": 0.42420781724762624, "learning_rate": 7.850055238244835e-06, "loss": 0.472, "step": 7315 }, { "epoch": 1.2014862562355018, "grad_norm": 0.2648715107534324, "learning_rate": 7.849742101046333e-06, "loss": 0.4963, "step": 7316 }, { "epoch": 1.2016504834438446, "grad_norm": 0.2982734322965936, "learning_rate": 7.849428927466338e-06, "loss": 0.4765, "step": 7317 }, { "epoch": 1.2018147106521873, "grad_norm": 0.34924463673787454, "learning_rate": 7.849115717508252e-06, "loss": 0.4945, "step": 7318 }, { "epoch": 1.2019789378605301, "grad_norm": 0.3021561206760538, "learning_rate": 7.84880247117547e-06, "loss": 0.4746, "step": 7319 }, { "epoch": 1.2021431650688728, "grad_norm": 0.4022430865737298, "learning_rate": 7.848489188471401e-06, "loss": 0.4897, "step": 7320 }, { "epoch": 1.2023073922772156, "grad_norm": 0.28673004576717576, "learning_rate": 7.848175869399444e-06, "loss": 0.4809, "step": 7321 }, { "epoch": 1.2024716194855583, "grad_norm": 0.2604773400575898, "learning_rate": 7.847862513963003e-06, "loss": 0.4712, "step": 7322 }, { "epoch": 1.2026358466939011, "grad_norm": 0.44028520566728796, "learning_rate": 7.847549122165481e-06, "loss": 0.4975, "step": 7323 }, { "epoch": 1.2028000739022437, "grad_norm": 0.30359081977857466, "learning_rate": 7.847235694010283e-06, "loss": 0.4754, "step": 7324 }, { "epoch": 1.2029643011105864, "grad_norm": 0.2688194720858126, "learning_rate": 7.846922229500812e-06, "loss": 0.4809, "step": 7325 }, { "epoch": 1.2031285283189292, "grad_norm": 0.27441461039552933, "learning_rate": 7.846608728640471e-06, "loss": 0.4881, "step": 7326 }, { "epoch": 1.2032927555272719, "grad_norm": 0.36663350552704727, "learning_rate": 7.846295191432668e-06, "loss": 0.4883, "step": 7327 }, { "epoch": 1.2034569827356147, "grad_norm": 0.30093440887610545, "learning_rate": 7.845981617880808e-06, "loss": 0.4791, "step": 7328 }, { "epoch": 1.2036212099439574, "grad_norm": 0.30653428630193286, "learning_rate": 7.845668007988292e-06, "loss": 0.47, "step": 7329 }, { "epoch": 1.2037854371523002, "grad_norm": 0.2898402343328568, "learning_rate": 7.845354361758533e-06, "loss": 0.4795, "step": 7330 }, { "epoch": 1.2039496643606429, "grad_norm": 0.2890752297414382, "learning_rate": 7.845040679194934e-06, "loss": 0.4865, "step": 7331 }, { "epoch": 1.2041138915689857, "grad_norm": 0.3624219698154435, "learning_rate": 7.8447269603009e-06, "loss": 0.475, "step": 7332 }, { "epoch": 1.2042781187773284, "grad_norm": 0.296336578656, "learning_rate": 7.844413205079842e-06, "loss": 0.4954, "step": 7333 }, { "epoch": 1.2044423459856712, "grad_norm": 0.35399910890682923, "learning_rate": 7.844099413535167e-06, "loss": 0.4947, "step": 7334 }, { "epoch": 1.2046065731940139, "grad_norm": 0.33519400632511764, "learning_rate": 7.843785585670279e-06, "loss": 0.4739, "step": 7335 }, { "epoch": 1.2047708004023567, "grad_norm": 0.28240279450470623, "learning_rate": 7.843471721488593e-06, "loss": 0.5025, "step": 7336 }, { "epoch": 1.2049350276106994, "grad_norm": 0.25326684831156665, "learning_rate": 7.843157820993515e-06, "loss": 0.4706, "step": 7337 }, { "epoch": 1.2050992548190422, "grad_norm": 0.29493194722350735, "learning_rate": 7.84284388418845e-06, "loss": 0.4717, "step": 7338 }, { "epoch": 1.2052634820273849, "grad_norm": 0.298157102180501, "learning_rate": 7.842529911076815e-06, "loss": 0.5083, "step": 7339 }, { "epoch": 1.2054277092357277, "grad_norm": 0.4151324217642813, "learning_rate": 7.842215901662014e-06, "loss": 0.4794, "step": 7340 }, { "epoch": 1.2055919364440704, "grad_norm": 0.2826353275894072, "learning_rate": 7.841901855947462e-06, "loss": 0.4877, "step": 7341 }, { "epoch": 1.205756163652413, "grad_norm": 0.28516782000663105, "learning_rate": 7.841587773936568e-06, "loss": 0.4773, "step": 7342 }, { "epoch": 1.2059203908607559, "grad_norm": 0.3039940380114767, "learning_rate": 7.841273655632741e-06, "loss": 0.4945, "step": 7343 }, { "epoch": 1.2060846180690985, "grad_norm": 0.40356339790866297, "learning_rate": 7.840959501039397e-06, "loss": 0.4859, "step": 7344 }, { "epoch": 1.2062488452774414, "grad_norm": 0.5442637135737383, "learning_rate": 7.840645310159945e-06, "loss": 0.474, "step": 7345 }, { "epoch": 1.206413072485784, "grad_norm": 0.3455945345487415, "learning_rate": 7.840331082997799e-06, "loss": 0.5056, "step": 7346 }, { "epoch": 1.2065772996941269, "grad_norm": 0.3203479224628378, "learning_rate": 7.840016819556369e-06, "loss": 0.491, "step": 7347 }, { "epoch": 1.2067415269024695, "grad_norm": 0.43912639657814323, "learning_rate": 7.839702519839071e-06, "loss": 0.4847, "step": 7348 }, { "epoch": 1.2069057541108124, "grad_norm": 0.2913019474705546, "learning_rate": 7.839388183849318e-06, "loss": 0.4542, "step": 7349 }, { "epoch": 1.207069981319155, "grad_norm": 0.2997673311165085, "learning_rate": 7.839073811590524e-06, "loss": 0.451, "step": 7350 }, { "epoch": 1.2072342085274979, "grad_norm": 0.32497023477104164, "learning_rate": 7.838759403066103e-06, "loss": 0.4607, "step": 7351 }, { "epoch": 1.2073984357358405, "grad_norm": 0.30168217147672366, "learning_rate": 7.83844495827947e-06, "loss": 0.4608, "step": 7352 }, { "epoch": 1.2075626629441834, "grad_norm": 0.3171359606091761, "learning_rate": 7.83813047723404e-06, "loss": 0.4863, "step": 7353 }, { "epoch": 1.207726890152526, "grad_norm": 0.3601274627022798, "learning_rate": 7.83781595993323e-06, "loss": 0.4696, "step": 7354 }, { "epoch": 1.2078911173608688, "grad_norm": 0.3203665019807861, "learning_rate": 7.837501406380452e-06, "loss": 0.4717, "step": 7355 }, { "epoch": 1.2080553445692115, "grad_norm": 0.29351737271594, "learning_rate": 7.837186816579128e-06, "loss": 0.4904, "step": 7356 }, { "epoch": 1.2082195717775543, "grad_norm": 0.33220095890415596, "learning_rate": 7.83687219053267e-06, "loss": 0.4878, "step": 7357 }, { "epoch": 1.208383798985897, "grad_norm": 0.2975976385334508, "learning_rate": 7.836557528244497e-06, "loss": 0.4774, "step": 7358 }, { "epoch": 1.2085480261942396, "grad_norm": 0.277639522422605, "learning_rate": 7.836242829718028e-06, "loss": 0.4827, "step": 7359 }, { "epoch": 1.2087122534025825, "grad_norm": 0.28230791338424405, "learning_rate": 7.835928094956677e-06, "loss": 0.4858, "step": 7360 }, { "epoch": 1.2088764806109251, "grad_norm": 0.3070274198241027, "learning_rate": 7.835613323963867e-06, "loss": 0.4732, "step": 7361 }, { "epoch": 1.209040707819268, "grad_norm": 0.3168714949165464, "learning_rate": 7.835298516743014e-06, "loss": 0.4867, "step": 7362 }, { "epoch": 1.2092049350276106, "grad_norm": 0.3453218427128881, "learning_rate": 7.834983673297537e-06, "loss": 0.4556, "step": 7363 }, { "epoch": 1.2093691622359535, "grad_norm": 0.3528769701781255, "learning_rate": 7.834668793630856e-06, "loss": 0.4985, "step": 7364 }, { "epoch": 1.2095333894442961, "grad_norm": 0.2638158286045824, "learning_rate": 7.834353877746391e-06, "loss": 0.4675, "step": 7365 }, { "epoch": 1.209697616652639, "grad_norm": 0.2788721286113114, "learning_rate": 7.834038925647563e-06, "loss": 0.4796, "step": 7366 }, { "epoch": 1.2098618438609816, "grad_norm": 0.31074874505065736, "learning_rate": 7.833723937337792e-06, "loss": 0.4873, "step": 7367 }, { "epoch": 1.2100260710693245, "grad_norm": 0.2616105603314999, "learning_rate": 7.8334089128205e-06, "loss": 0.4853, "step": 7368 }, { "epoch": 1.210190298277667, "grad_norm": 0.25894275590866295, "learning_rate": 7.833093852099104e-06, "loss": 0.4763, "step": 7369 }, { "epoch": 1.21035452548601, "grad_norm": 0.28972598700545293, "learning_rate": 7.832778755177034e-06, "loss": 0.4718, "step": 7370 }, { "epoch": 1.2105187526943526, "grad_norm": 0.53511817324183, "learning_rate": 7.832463622057705e-06, "loss": 0.478, "step": 7371 }, { "epoch": 1.2106829799026955, "grad_norm": 0.2908338880957059, "learning_rate": 7.832148452744544e-06, "loss": 0.4872, "step": 7372 }, { "epoch": 1.210847207111038, "grad_norm": 0.31807390385130946, "learning_rate": 7.83183324724097e-06, "loss": 0.492, "step": 7373 }, { "epoch": 1.211011434319381, "grad_norm": 0.296720479252511, "learning_rate": 7.831518005550412e-06, "loss": 0.4774, "step": 7374 }, { "epoch": 1.2111756615277236, "grad_norm": 0.2966182688407565, "learning_rate": 7.831202727676287e-06, "loss": 0.4725, "step": 7375 }, { "epoch": 1.2113398887360662, "grad_norm": 0.2888443450249788, "learning_rate": 7.830887413622027e-06, "loss": 0.4726, "step": 7376 }, { "epoch": 1.211504115944409, "grad_norm": 0.27658756184306366, "learning_rate": 7.830572063391049e-06, "loss": 0.4877, "step": 7377 }, { "epoch": 1.2116683431527517, "grad_norm": 0.3201453442232231, "learning_rate": 7.830256676986785e-06, "loss": 0.4801, "step": 7378 }, { "epoch": 1.2118325703610946, "grad_norm": 0.30096927890993763, "learning_rate": 7.829941254412654e-06, "loss": 0.4828, "step": 7379 }, { "epoch": 1.2119967975694372, "grad_norm": 0.3084680100763514, "learning_rate": 7.829625795672085e-06, "loss": 0.4981, "step": 7380 }, { "epoch": 1.21216102477778, "grad_norm": 0.2678693496989439, "learning_rate": 7.829310300768505e-06, "loss": 0.4669, "step": 7381 }, { "epoch": 1.2123252519861227, "grad_norm": 0.2697735354847363, "learning_rate": 7.828994769705339e-06, "loss": 0.4929, "step": 7382 }, { "epoch": 1.2124894791944656, "grad_norm": 0.2560319566439462, "learning_rate": 7.828679202486015e-06, "loss": 0.4841, "step": 7383 }, { "epoch": 1.2126537064028082, "grad_norm": 0.2836939551834787, "learning_rate": 7.828363599113959e-06, "loss": 0.4697, "step": 7384 }, { "epoch": 1.212817933611151, "grad_norm": 0.31007946274580067, "learning_rate": 7.828047959592601e-06, "loss": 0.4882, "step": 7385 }, { "epoch": 1.2129821608194937, "grad_norm": 0.29459524995287784, "learning_rate": 7.827732283925366e-06, "loss": 0.482, "step": 7386 }, { "epoch": 1.2131463880278366, "grad_norm": 0.3793586584509073, "learning_rate": 7.827416572115686e-06, "loss": 0.4817, "step": 7387 }, { "epoch": 1.2133106152361792, "grad_norm": 0.2644897980491223, "learning_rate": 7.827100824166988e-06, "loss": 0.4794, "step": 7388 }, { "epoch": 1.213474842444522, "grad_norm": 0.3017168977924796, "learning_rate": 7.826785040082702e-06, "loss": 0.4818, "step": 7389 }, { "epoch": 1.2136390696528647, "grad_norm": 0.305708816789366, "learning_rate": 7.826469219866257e-06, "loss": 0.4802, "step": 7390 }, { "epoch": 1.2138032968612076, "grad_norm": 0.3059340241183472, "learning_rate": 7.826153363521082e-06, "loss": 0.5047, "step": 7391 }, { "epoch": 1.2139675240695502, "grad_norm": 0.40313192821803495, "learning_rate": 7.82583747105061e-06, "loss": 0.497, "step": 7392 }, { "epoch": 1.2141317512778929, "grad_norm": 0.3177682689036567, "learning_rate": 7.82552154245827e-06, "loss": 0.4912, "step": 7393 }, { "epoch": 1.2142959784862357, "grad_norm": 0.33379735245100434, "learning_rate": 7.825205577747495e-06, "loss": 0.4787, "step": 7394 }, { "epoch": 1.2144602056945784, "grad_norm": 0.32610810464507667, "learning_rate": 7.824889576921718e-06, "loss": 0.4641, "step": 7395 }, { "epoch": 1.2146244329029212, "grad_norm": 0.2576373012534115, "learning_rate": 7.824573539984367e-06, "loss": 0.48, "step": 7396 }, { "epoch": 1.2147886601112639, "grad_norm": 0.2962684067235093, "learning_rate": 7.824257466938875e-06, "loss": 0.4809, "step": 7397 }, { "epoch": 1.2149528873196067, "grad_norm": 0.3373698620542774, "learning_rate": 7.823941357788679e-06, "loss": 0.4949, "step": 7398 }, { "epoch": 1.2151171145279493, "grad_norm": 0.2714003069990936, "learning_rate": 7.823625212537206e-06, "loss": 0.4791, "step": 7399 }, { "epoch": 1.2152813417362922, "grad_norm": 0.27004087404298466, "learning_rate": 7.823309031187897e-06, "loss": 0.4778, "step": 7400 }, { "epoch": 1.2154455689446348, "grad_norm": 0.27836482022600734, "learning_rate": 7.82299281374418e-06, "loss": 0.4814, "step": 7401 }, { "epoch": 1.2156097961529777, "grad_norm": 0.5709071029748667, "learning_rate": 7.822676560209493e-06, "loss": 0.4845, "step": 7402 }, { "epoch": 1.2157740233613203, "grad_norm": 0.28200824916519507, "learning_rate": 7.822360270587269e-06, "loss": 0.4959, "step": 7403 }, { "epoch": 1.2159382505696632, "grad_norm": 0.36164400269974106, "learning_rate": 7.822043944880943e-06, "loss": 0.4751, "step": 7404 }, { "epoch": 1.2161024777780058, "grad_norm": 0.2920172636322637, "learning_rate": 7.821727583093951e-06, "loss": 0.4898, "step": 7405 }, { "epoch": 1.2162667049863487, "grad_norm": 0.27259579242872134, "learning_rate": 7.821411185229732e-06, "loss": 0.4832, "step": 7406 }, { "epoch": 1.2164309321946913, "grad_norm": 2.1964313211670277, "learning_rate": 7.821094751291716e-06, "loss": 0.4811, "step": 7407 }, { "epoch": 1.2165951594030342, "grad_norm": 0.3163824618854596, "learning_rate": 7.820778281283346e-06, "loss": 0.4965, "step": 7408 }, { "epoch": 1.2167593866113768, "grad_norm": 0.29345503086766606, "learning_rate": 7.820461775208056e-06, "loss": 0.4871, "step": 7409 }, { "epoch": 1.2169236138197195, "grad_norm": 0.30196703868592945, "learning_rate": 7.820145233069284e-06, "loss": 0.4605, "step": 7410 }, { "epoch": 1.2170878410280623, "grad_norm": 0.28311895504982654, "learning_rate": 7.81982865487047e-06, "loss": 0.4912, "step": 7411 }, { "epoch": 1.217252068236405, "grad_norm": 0.30339328791642767, "learning_rate": 7.819512040615047e-06, "loss": 0.4609, "step": 7412 }, { "epoch": 1.2174162954447478, "grad_norm": 0.2596969919404455, "learning_rate": 7.819195390306459e-06, "loss": 0.4753, "step": 7413 }, { "epoch": 1.2175805226530905, "grad_norm": 0.322969273895278, "learning_rate": 7.818878703948144e-06, "loss": 0.4794, "step": 7414 }, { "epoch": 1.2177447498614333, "grad_norm": 0.3519640745006145, "learning_rate": 7.818561981543541e-06, "loss": 0.4773, "step": 7415 }, { "epoch": 1.217908977069776, "grad_norm": 0.34107543982912136, "learning_rate": 7.81824522309609e-06, "loss": 0.492, "step": 7416 }, { "epoch": 1.2180732042781188, "grad_norm": 0.3281282771747284, "learning_rate": 7.817928428609229e-06, "loss": 0.4803, "step": 7417 }, { "epoch": 1.2182374314864615, "grad_norm": 0.5308962228651787, "learning_rate": 7.817611598086403e-06, "loss": 0.4642, "step": 7418 }, { "epoch": 1.2184016586948043, "grad_norm": 0.26900387954254784, "learning_rate": 7.81729473153105e-06, "loss": 0.4797, "step": 7419 }, { "epoch": 1.218565885903147, "grad_norm": 0.28024744617753733, "learning_rate": 7.816977828946612e-06, "loss": 0.4846, "step": 7420 }, { "epoch": 1.2187301131114898, "grad_norm": 0.2647335194613172, "learning_rate": 7.816660890336532e-06, "loss": 0.4755, "step": 7421 }, { "epoch": 1.2188943403198325, "grad_norm": 0.2945285536442789, "learning_rate": 7.816343915704252e-06, "loss": 0.4746, "step": 7422 }, { "epoch": 1.2190585675281753, "grad_norm": 0.2835965254364738, "learning_rate": 7.816026905053214e-06, "loss": 0.4782, "step": 7423 }, { "epoch": 1.219222794736518, "grad_norm": 0.4352423901004788, "learning_rate": 7.815709858386861e-06, "loss": 0.481, "step": 7424 }, { "epoch": 1.2193870219448608, "grad_norm": 0.2687199290132344, "learning_rate": 7.815392775708639e-06, "loss": 0.4907, "step": 7425 }, { "epoch": 1.2195512491532035, "grad_norm": 0.26676255690653605, "learning_rate": 7.815075657021986e-06, "loss": 0.4787, "step": 7426 }, { "epoch": 1.219715476361546, "grad_norm": 0.32625127264289355, "learning_rate": 7.814758502330352e-06, "loss": 0.4822, "step": 7427 }, { "epoch": 1.219879703569889, "grad_norm": 0.2957996126267077, "learning_rate": 7.814441311637179e-06, "loss": 0.4793, "step": 7428 }, { "epoch": 1.2200439307782316, "grad_norm": 0.3095382988258092, "learning_rate": 7.814124084945911e-06, "loss": 0.4866, "step": 7429 }, { "epoch": 1.2202081579865744, "grad_norm": 0.2886374847171023, "learning_rate": 7.813806822259996e-06, "loss": 0.4791, "step": 7430 }, { "epoch": 1.220372385194917, "grad_norm": 0.3268027407819441, "learning_rate": 7.81348952358288e-06, "loss": 0.4972, "step": 7431 }, { "epoch": 1.22053661240326, "grad_norm": 0.3406074181446285, "learning_rate": 7.813172188918005e-06, "loss": 0.5041, "step": 7432 }, { "epoch": 1.2207008396116026, "grad_norm": 0.4363014651036817, "learning_rate": 7.81285481826882e-06, "loss": 0.466, "step": 7433 }, { "epoch": 1.2208650668199454, "grad_norm": 0.27869275838433155, "learning_rate": 7.812537411638776e-06, "loss": 0.4781, "step": 7434 }, { "epoch": 1.221029294028288, "grad_norm": 0.28443886078659325, "learning_rate": 7.812219969031313e-06, "loss": 0.4733, "step": 7435 }, { "epoch": 1.221193521236631, "grad_norm": 0.2876297976501121, "learning_rate": 7.811902490449884e-06, "loss": 0.4638, "step": 7436 }, { "epoch": 1.2213577484449736, "grad_norm": 0.27526069765232986, "learning_rate": 7.811584975897936e-06, "loss": 0.4671, "step": 7437 }, { "epoch": 1.2215219756533164, "grad_norm": 0.2595326036832628, "learning_rate": 7.811267425378915e-06, "loss": 0.4738, "step": 7438 }, { "epoch": 1.221686202861659, "grad_norm": 0.47780546288542847, "learning_rate": 7.810949838896273e-06, "loss": 0.4714, "step": 7439 }, { "epoch": 1.221850430070002, "grad_norm": 0.3353387377929998, "learning_rate": 7.81063221645346e-06, "loss": 0.4911, "step": 7440 }, { "epoch": 1.2220146572783446, "grad_norm": 0.2867479853754556, "learning_rate": 7.81031455805392e-06, "loss": 0.4739, "step": 7441 }, { "epoch": 1.2221788844866874, "grad_norm": 0.2873199750403933, "learning_rate": 7.80999686370111e-06, "loss": 0.4777, "step": 7442 }, { "epoch": 1.22234311169503, "grad_norm": 0.2786622828477527, "learning_rate": 7.809679133398477e-06, "loss": 0.4834, "step": 7443 }, { "epoch": 1.2225073389033727, "grad_norm": 0.26644504322703355, "learning_rate": 7.809361367149472e-06, "loss": 0.4781, "step": 7444 }, { "epoch": 1.2226715661117156, "grad_norm": 0.32504077445550017, "learning_rate": 7.809043564957546e-06, "loss": 0.4964, "step": 7445 }, { "epoch": 1.2228357933200582, "grad_norm": 0.3443886570485489, "learning_rate": 7.808725726826152e-06, "loss": 0.4831, "step": 7446 }, { "epoch": 1.223000020528401, "grad_norm": 0.3078921394902557, "learning_rate": 7.808407852758741e-06, "loss": 0.486, "step": 7447 }, { "epoch": 1.2231642477367437, "grad_norm": 0.3137312338134038, "learning_rate": 7.808089942758765e-06, "loss": 0.4817, "step": 7448 }, { "epoch": 1.2233284749450866, "grad_norm": 0.29602008603252766, "learning_rate": 7.80777199682968e-06, "loss": 0.4811, "step": 7449 }, { "epoch": 1.2234927021534292, "grad_norm": 0.33542775379730577, "learning_rate": 7.807454014974935e-06, "loss": 0.4842, "step": 7450 }, { "epoch": 1.223656929361772, "grad_norm": 0.28587329682265583, "learning_rate": 7.807135997197983e-06, "loss": 0.4821, "step": 7451 }, { "epoch": 1.2238211565701147, "grad_norm": 0.31582625696235567, "learning_rate": 7.806817943502283e-06, "loss": 0.4764, "step": 7452 }, { "epoch": 1.2239853837784576, "grad_norm": 0.4508737429529132, "learning_rate": 7.806499853891286e-06, "loss": 0.4682, "step": 7453 }, { "epoch": 1.2241496109868002, "grad_norm": 0.34199337453924816, "learning_rate": 7.806181728368447e-06, "loss": 0.4876, "step": 7454 }, { "epoch": 1.224313838195143, "grad_norm": 0.29162050236465775, "learning_rate": 7.805863566937222e-06, "loss": 0.4816, "step": 7455 }, { "epoch": 1.2244780654034857, "grad_norm": 0.25706162141912203, "learning_rate": 7.805545369601068e-06, "loss": 0.4749, "step": 7456 }, { "epoch": 1.2246422926118286, "grad_norm": 0.30467782555406403, "learning_rate": 7.805227136363438e-06, "loss": 0.482, "step": 7457 }, { "epoch": 1.2248065198201712, "grad_norm": 0.4620126635135206, "learning_rate": 7.804908867227787e-06, "loss": 0.4814, "step": 7458 }, { "epoch": 1.224970747028514, "grad_norm": 0.6847698761546367, "learning_rate": 7.804590562197577e-06, "loss": 0.4663, "step": 7459 }, { "epoch": 1.2251349742368567, "grad_norm": 0.3919829953997119, "learning_rate": 7.80427222127626e-06, "loss": 0.46, "step": 7460 }, { "epoch": 1.2252992014451993, "grad_norm": 0.9675079986361146, "learning_rate": 7.803953844467296e-06, "loss": 0.4948, "step": 7461 }, { "epoch": 1.2254634286535422, "grad_norm": 0.4048339109198711, "learning_rate": 7.803635431774145e-06, "loss": 0.4751, "step": 7462 }, { "epoch": 1.2256276558618848, "grad_norm": 0.33359033850303504, "learning_rate": 7.80331698320026e-06, "loss": 0.4903, "step": 7463 }, { "epoch": 1.2257918830702277, "grad_norm": 0.4551179514390609, "learning_rate": 7.802998498749104e-06, "loss": 0.4831, "step": 7464 }, { "epoch": 1.2259561102785703, "grad_norm": 0.2962278157330623, "learning_rate": 7.802679978424136e-06, "loss": 0.4894, "step": 7465 }, { "epoch": 1.2261203374869132, "grad_norm": 0.3567481952133428, "learning_rate": 7.802361422228812e-06, "loss": 0.4846, "step": 7466 }, { "epoch": 1.2262845646952558, "grad_norm": 0.2693500702419807, "learning_rate": 7.802042830166594e-06, "loss": 0.4673, "step": 7467 }, { "epoch": 1.2264487919035987, "grad_norm": 0.2679369801617365, "learning_rate": 7.801724202240943e-06, "loss": 0.4873, "step": 7468 }, { "epoch": 1.2266130191119413, "grad_norm": 0.28647992777890036, "learning_rate": 7.801405538455317e-06, "loss": 0.4603, "step": 7469 }, { "epoch": 1.2267772463202842, "grad_norm": 0.2850138121930835, "learning_rate": 7.801086838813181e-06, "loss": 0.4809, "step": 7470 }, { "epoch": 1.2269414735286268, "grad_norm": 0.3343754442632032, "learning_rate": 7.800768103317991e-06, "loss": 0.4717, "step": 7471 }, { "epoch": 1.2271057007369697, "grad_norm": 0.3380996961344888, "learning_rate": 7.800449331973215e-06, "loss": 0.4688, "step": 7472 }, { "epoch": 1.2272699279453123, "grad_norm": 0.2828645657673943, "learning_rate": 7.80013052478231e-06, "loss": 0.4826, "step": 7473 }, { "epoch": 1.2274341551536552, "grad_norm": 0.34488922487887297, "learning_rate": 7.799811681748743e-06, "loss": 0.4963, "step": 7474 }, { "epoch": 1.2275983823619978, "grad_norm": 0.36977738285825107, "learning_rate": 7.799492802875973e-06, "loss": 0.4799, "step": 7475 }, { "epoch": 1.2277626095703407, "grad_norm": 0.4107523883100698, "learning_rate": 7.799173888167465e-06, "loss": 0.4791, "step": 7476 }, { "epoch": 1.2279268367786833, "grad_norm": 0.3065610046874601, "learning_rate": 7.798854937626682e-06, "loss": 0.4837, "step": 7477 }, { "epoch": 1.228091063987026, "grad_norm": 0.3125906559459154, "learning_rate": 7.79853595125709e-06, "loss": 0.4933, "step": 7478 }, { "epoch": 1.2282552911953688, "grad_norm": 0.2626573945619913, "learning_rate": 7.79821692906215e-06, "loss": 0.4714, "step": 7479 }, { "epoch": 1.2284195184037114, "grad_norm": 0.3571791073570016, "learning_rate": 7.797897871045332e-06, "loss": 0.4705, "step": 7480 }, { "epoch": 1.2285837456120543, "grad_norm": 0.34360557571526473, "learning_rate": 7.797578777210096e-06, "loss": 0.4663, "step": 7481 }, { "epoch": 1.228747972820397, "grad_norm": 0.2963914639094682, "learning_rate": 7.797259647559912e-06, "loss": 0.4723, "step": 7482 }, { "epoch": 1.2289122000287398, "grad_norm": 0.27699861933825076, "learning_rate": 7.796940482098244e-06, "loss": 0.4731, "step": 7483 }, { "epoch": 1.2290764272370824, "grad_norm": 0.2969103632510052, "learning_rate": 7.796621280828558e-06, "loss": 0.4761, "step": 7484 }, { "epoch": 1.2292406544454253, "grad_norm": 0.3065597940547537, "learning_rate": 7.796302043754321e-06, "loss": 0.4505, "step": 7485 }, { "epoch": 1.229404881653768, "grad_norm": 0.26417503495718664, "learning_rate": 7.795982770879e-06, "loss": 0.4748, "step": 7486 }, { "epoch": 1.2295691088621108, "grad_norm": 0.3398290499434709, "learning_rate": 7.795663462206067e-06, "loss": 0.4669, "step": 7487 }, { "epoch": 1.2297333360704534, "grad_norm": 0.34742823434001574, "learning_rate": 7.795344117738982e-06, "loss": 0.4781, "step": 7488 }, { "epoch": 1.2298975632787963, "grad_norm": 0.2945219892926102, "learning_rate": 7.795024737481219e-06, "loss": 0.4687, "step": 7489 }, { "epoch": 1.230061790487139, "grad_norm": 0.2770494663562303, "learning_rate": 7.794705321436248e-06, "loss": 0.4587, "step": 7490 }, { "epoch": 1.2302260176954818, "grad_norm": 0.2968149118488759, "learning_rate": 7.794385869607532e-06, "loss": 0.511, "step": 7491 }, { "epoch": 1.2303902449038244, "grad_norm": 0.2915104293412883, "learning_rate": 7.794066381998546e-06, "loss": 0.4689, "step": 7492 }, { "epoch": 1.2305544721121673, "grad_norm": 0.35989630798640004, "learning_rate": 7.793746858612759e-06, "loss": 0.4944, "step": 7493 }, { "epoch": 1.23071869932051, "grad_norm": 0.3141717355163613, "learning_rate": 7.79342729945364e-06, "loss": 0.4933, "step": 7494 }, { "epoch": 1.2308829265288526, "grad_norm": 0.3191568033697543, "learning_rate": 7.793107704524659e-06, "loss": 0.4767, "step": 7495 }, { "epoch": 1.2310471537371954, "grad_norm": 0.3239132079874789, "learning_rate": 7.792788073829289e-06, "loss": 0.4672, "step": 7496 }, { "epoch": 1.231211380945538, "grad_norm": 0.2635865983108721, "learning_rate": 7.792468407371e-06, "loss": 0.4478, "step": 7497 }, { "epoch": 1.231375608153881, "grad_norm": 0.38292422598333736, "learning_rate": 7.792148705153266e-06, "loss": 0.4812, "step": 7498 }, { "epoch": 1.2315398353622236, "grad_norm": 0.37145374460876307, "learning_rate": 7.791828967179559e-06, "loss": 0.4876, "step": 7499 }, { "epoch": 1.2317040625705664, "grad_norm": 0.28915250611709403, "learning_rate": 7.791509193453348e-06, "loss": 0.4792, "step": 7500 }, { "epoch": 1.231868289778909, "grad_norm": 0.2889110278668834, "learning_rate": 7.79118938397811e-06, "loss": 0.4905, "step": 7501 }, { "epoch": 1.232032516987252, "grad_norm": 0.33366376283958143, "learning_rate": 7.790869538757317e-06, "loss": 0.5038, "step": 7502 }, { "epoch": 1.2321967441955946, "grad_norm": 0.44289224941063765, "learning_rate": 7.790549657794443e-06, "loss": 0.4876, "step": 7503 }, { "epoch": 1.2323609714039374, "grad_norm": 0.3136464858676508, "learning_rate": 7.790229741092962e-06, "loss": 0.4955, "step": 7504 }, { "epoch": 1.23252519861228, "grad_norm": 0.2531177671121733, "learning_rate": 7.78990978865635e-06, "loss": 0.4962, "step": 7505 }, { "epoch": 1.232689425820623, "grad_norm": 0.2978682970598931, "learning_rate": 7.789589800488081e-06, "loss": 0.5, "step": 7506 }, { "epoch": 1.2328536530289655, "grad_norm": 0.2534835272168865, "learning_rate": 7.789269776591631e-06, "loss": 0.4681, "step": 7507 }, { "epoch": 1.2330178802373084, "grad_norm": 0.35502693503125016, "learning_rate": 7.788949716970472e-06, "loss": 0.4868, "step": 7508 }, { "epoch": 1.233182107445651, "grad_norm": 0.35494605281913755, "learning_rate": 7.788629621628084e-06, "loss": 0.4862, "step": 7509 }, { "epoch": 1.233346334653994, "grad_norm": 0.2796519161526337, "learning_rate": 7.788309490567945e-06, "loss": 0.4809, "step": 7510 }, { "epoch": 1.2335105618623365, "grad_norm": 0.35202315535595735, "learning_rate": 7.787989323793527e-06, "loss": 0.4927, "step": 7511 }, { "epoch": 1.2336747890706792, "grad_norm": 0.27572529786791367, "learning_rate": 7.787669121308312e-06, "loss": 0.477, "step": 7512 }, { "epoch": 1.233839016279022, "grad_norm": 0.29646983151833184, "learning_rate": 7.787348883115774e-06, "loss": 0.4731, "step": 7513 }, { "epoch": 1.2340032434873647, "grad_norm": 0.32889863862531893, "learning_rate": 7.787028609219394e-06, "loss": 0.4951, "step": 7514 }, { "epoch": 1.2341674706957075, "grad_norm": 0.2655153867934256, "learning_rate": 7.78670829962265e-06, "loss": 0.4506, "step": 7515 }, { "epoch": 1.2343316979040502, "grad_norm": 0.2663148975867709, "learning_rate": 7.786387954329018e-06, "loss": 0.4742, "step": 7516 }, { "epoch": 1.234495925112393, "grad_norm": 0.42508777196790876, "learning_rate": 7.786067573341982e-06, "loss": 0.4826, "step": 7517 }, { "epoch": 1.2346601523207357, "grad_norm": 0.3041477860219649, "learning_rate": 7.785747156665018e-06, "loss": 0.4699, "step": 7518 }, { "epoch": 1.2348243795290785, "grad_norm": 0.2897416257015565, "learning_rate": 7.785426704301607e-06, "loss": 0.4883, "step": 7519 }, { "epoch": 1.2349886067374212, "grad_norm": 0.2720161031746429, "learning_rate": 7.785106216255229e-06, "loss": 0.4819, "step": 7520 }, { "epoch": 1.235152833945764, "grad_norm": 0.3006375841709048, "learning_rate": 7.784785692529365e-06, "loss": 0.4817, "step": 7521 }, { "epoch": 1.2353170611541067, "grad_norm": 0.31301572775303516, "learning_rate": 7.784465133127498e-06, "loss": 0.4642, "step": 7522 }, { "epoch": 1.2354812883624495, "grad_norm": 0.3852031934863473, "learning_rate": 7.784144538053108e-06, "loss": 0.4686, "step": 7523 }, { "epoch": 1.2356455155707922, "grad_norm": 0.2872787747457896, "learning_rate": 7.783823907309676e-06, "loss": 0.4778, "step": 7524 }, { "epoch": 1.235809742779135, "grad_norm": 0.2864937598952176, "learning_rate": 7.783503240900686e-06, "loss": 0.4869, "step": 7525 }, { "epoch": 1.2359739699874777, "grad_norm": 0.3873222568675411, "learning_rate": 7.78318253882962e-06, "loss": 0.4727, "step": 7526 }, { "epoch": 1.2361381971958205, "grad_norm": 0.32706793356824965, "learning_rate": 7.782861801099963e-06, "loss": 0.4931, "step": 7527 }, { "epoch": 1.2363024244041632, "grad_norm": 0.24977174505164065, "learning_rate": 7.782541027715195e-06, "loss": 0.4436, "step": 7528 }, { "epoch": 1.2364666516125058, "grad_norm": 0.31267245459003645, "learning_rate": 7.782220218678804e-06, "loss": 0.4731, "step": 7529 }, { "epoch": 1.2366308788208487, "grad_norm": 0.33351244334836483, "learning_rate": 7.78189937399427e-06, "loss": 0.4922, "step": 7530 }, { "epoch": 1.2367951060291913, "grad_norm": 0.3155971525806248, "learning_rate": 7.781578493665083e-06, "loss": 0.5006, "step": 7531 }, { "epoch": 1.2369593332375342, "grad_norm": 0.3519882716741187, "learning_rate": 7.78125757769472e-06, "loss": 0.4795, "step": 7532 }, { "epoch": 1.2371235604458768, "grad_norm": 0.2979601040823767, "learning_rate": 7.780936626086675e-06, "loss": 0.4766, "step": 7533 }, { "epoch": 1.2372877876542197, "grad_norm": 0.3063945709411937, "learning_rate": 7.78061563884443e-06, "loss": 0.4789, "step": 7534 }, { "epoch": 1.2374520148625623, "grad_norm": 0.26722072179018536, "learning_rate": 7.780294615971471e-06, "loss": 0.4838, "step": 7535 }, { "epoch": 1.2376162420709051, "grad_norm": 0.292009730274553, "learning_rate": 7.779973557471285e-06, "loss": 0.4987, "step": 7536 }, { "epoch": 1.2377804692792478, "grad_norm": 0.2807020434939495, "learning_rate": 7.77965246334736e-06, "loss": 0.4715, "step": 7537 }, { "epoch": 1.2379446964875906, "grad_norm": 0.3453670092832338, "learning_rate": 7.77933133360318e-06, "loss": 0.4902, "step": 7538 }, { "epoch": 1.2381089236959333, "grad_norm": 0.31388854164530383, "learning_rate": 7.779010168242236e-06, "loss": 0.4856, "step": 7539 }, { "epoch": 1.2382731509042761, "grad_norm": 0.2867020260405545, "learning_rate": 7.778688967268017e-06, "loss": 0.4847, "step": 7540 }, { "epoch": 1.2384373781126188, "grad_norm": 0.3438963236599197, "learning_rate": 7.77836773068401e-06, "loss": 0.5006, "step": 7541 }, { "epoch": 1.2386016053209616, "grad_norm": 0.4297254179891695, "learning_rate": 7.778046458493703e-06, "loss": 0.4747, "step": 7542 }, { "epoch": 1.2387658325293043, "grad_norm": 0.3582137581158092, "learning_rate": 7.777725150700587e-06, "loss": 0.4832, "step": 7543 }, { "epoch": 1.2389300597376471, "grad_norm": 0.29072289657454703, "learning_rate": 7.777403807308148e-06, "loss": 0.4966, "step": 7544 }, { "epoch": 1.2390942869459898, "grad_norm": 0.3252671784530234, "learning_rate": 7.777082428319884e-06, "loss": 0.4866, "step": 7545 }, { "epoch": 1.2392585141543324, "grad_norm": 0.2908401782457425, "learning_rate": 7.776761013739277e-06, "loss": 0.4755, "step": 7546 }, { "epoch": 1.2394227413626753, "grad_norm": 0.28684276655151986, "learning_rate": 7.776439563569825e-06, "loss": 0.4761, "step": 7547 }, { "epoch": 1.239586968571018, "grad_norm": 0.30531377495350137, "learning_rate": 7.776118077815012e-06, "loss": 0.4831, "step": 7548 }, { "epoch": 1.2397511957793608, "grad_norm": 0.29723211564359786, "learning_rate": 7.775796556478336e-06, "loss": 0.4801, "step": 7549 }, { "epoch": 1.2399154229877034, "grad_norm": 0.2936682730249869, "learning_rate": 7.775474999563285e-06, "loss": 0.4832, "step": 7550 }, { "epoch": 1.2400796501960463, "grad_norm": 0.3018141932268422, "learning_rate": 7.775153407073353e-06, "loss": 0.4906, "step": 7551 }, { "epoch": 1.240243877404389, "grad_norm": 0.2787629798347041, "learning_rate": 7.774831779012033e-06, "loss": 0.4896, "step": 7552 }, { "epoch": 1.2404081046127318, "grad_norm": 0.34481809463993834, "learning_rate": 7.774510115382818e-06, "loss": 0.4659, "step": 7553 }, { "epoch": 1.2405723318210744, "grad_norm": 0.2779282824687869, "learning_rate": 7.774188416189201e-06, "loss": 0.4546, "step": 7554 }, { "epoch": 1.2407365590294173, "grad_norm": 0.2786407520838788, "learning_rate": 7.773866681434676e-06, "loss": 0.4798, "step": 7555 }, { "epoch": 1.24090078623776, "grad_norm": 0.3285343236852004, "learning_rate": 7.77354491112274e-06, "loss": 0.4797, "step": 7556 }, { "epoch": 1.2410650134461028, "grad_norm": 0.37506765944836495, "learning_rate": 7.773223105256883e-06, "loss": 0.4845, "step": 7557 }, { "epoch": 1.2412292406544454, "grad_norm": 0.35685647641502605, "learning_rate": 7.772901263840605e-06, "loss": 0.4877, "step": 7558 }, { "epoch": 1.2413934678627883, "grad_norm": 0.3358896496566247, "learning_rate": 7.772579386877396e-06, "loss": 0.49, "step": 7559 }, { "epoch": 1.241557695071131, "grad_norm": 0.3514289943534612, "learning_rate": 7.772257474370757e-06, "loss": 0.4744, "step": 7560 }, { "epoch": 1.2417219222794738, "grad_norm": 0.2937933570234482, "learning_rate": 7.771935526324183e-06, "loss": 0.4843, "step": 7561 }, { "epoch": 1.2418861494878164, "grad_norm": 0.3393020982420418, "learning_rate": 7.77161354274117e-06, "loss": 0.4573, "step": 7562 }, { "epoch": 1.242050376696159, "grad_norm": 0.3228901072188093, "learning_rate": 7.771291523625214e-06, "loss": 0.5017, "step": 7563 }, { "epoch": 1.242214603904502, "grad_norm": 0.3065450686112153, "learning_rate": 7.770969468979814e-06, "loss": 0.4758, "step": 7564 }, { "epoch": 1.2423788311128445, "grad_norm": 0.3106520824315073, "learning_rate": 7.770647378808469e-06, "loss": 0.4759, "step": 7565 }, { "epoch": 1.2425430583211874, "grad_norm": 0.2937174607831136, "learning_rate": 7.770325253114674e-06, "loss": 0.4622, "step": 7566 }, { "epoch": 1.24270728552953, "grad_norm": 0.3386310405929065, "learning_rate": 7.770003091901928e-06, "loss": 0.4851, "step": 7567 }, { "epoch": 1.2428715127378729, "grad_norm": 0.34444528064525604, "learning_rate": 7.769680895173733e-06, "loss": 0.4878, "step": 7568 }, { "epoch": 1.2430357399462155, "grad_norm": 0.27978430576428204, "learning_rate": 7.769358662933584e-06, "loss": 0.4847, "step": 7569 }, { "epoch": 1.2431999671545584, "grad_norm": 0.31389488030986795, "learning_rate": 7.769036395184987e-06, "loss": 0.4745, "step": 7570 }, { "epoch": 1.243364194362901, "grad_norm": 0.27170554485549553, "learning_rate": 7.768714091931436e-06, "loss": 0.4794, "step": 7571 }, { "epoch": 1.2435284215712439, "grad_norm": 0.33136051639556463, "learning_rate": 7.768391753176434e-06, "loss": 0.4644, "step": 7572 }, { "epoch": 1.2436926487795865, "grad_norm": 0.3093556203425737, "learning_rate": 7.768069378923483e-06, "loss": 0.4646, "step": 7573 }, { "epoch": 1.2438568759879294, "grad_norm": 0.46973993506400613, "learning_rate": 7.767746969176082e-06, "loss": 0.4632, "step": 7574 }, { "epoch": 1.244021103196272, "grad_norm": 0.26908039743071843, "learning_rate": 7.767424523937735e-06, "loss": 0.4705, "step": 7575 }, { "epoch": 1.2441853304046149, "grad_norm": 0.37290345238525935, "learning_rate": 7.767102043211942e-06, "loss": 0.4944, "step": 7576 }, { "epoch": 1.2443495576129575, "grad_norm": 0.43611265222170165, "learning_rate": 7.766779527002208e-06, "loss": 0.4909, "step": 7577 }, { "epoch": 1.2445137848213004, "grad_norm": 0.3328354020728991, "learning_rate": 7.766456975312032e-06, "loss": 0.4696, "step": 7578 }, { "epoch": 1.244678012029643, "grad_norm": 0.25915177451173665, "learning_rate": 7.766134388144921e-06, "loss": 0.4875, "step": 7579 }, { "epoch": 1.2448422392379856, "grad_norm": 0.25832700528645586, "learning_rate": 7.765811765504377e-06, "loss": 0.4837, "step": 7580 }, { "epoch": 1.2450064664463285, "grad_norm": 0.30829434869947336, "learning_rate": 7.765489107393903e-06, "loss": 0.4748, "step": 7581 }, { "epoch": 1.2451706936546711, "grad_norm": 0.30836619497139556, "learning_rate": 7.765166413817006e-06, "loss": 0.486, "step": 7582 }, { "epoch": 1.245334920863014, "grad_norm": 0.6948548750504677, "learning_rate": 7.76484368477719e-06, "loss": 0.4691, "step": 7583 }, { "epoch": 1.2454991480713566, "grad_norm": 0.2700607921883031, "learning_rate": 7.764520920277958e-06, "loss": 0.473, "step": 7584 }, { "epoch": 1.2456633752796995, "grad_norm": 0.3288894558422325, "learning_rate": 7.764198120322816e-06, "loss": 0.4831, "step": 7585 }, { "epoch": 1.2458276024880421, "grad_norm": 0.3254518062997147, "learning_rate": 7.763875284915272e-06, "loss": 0.4723, "step": 7586 }, { "epoch": 1.245991829696385, "grad_norm": 0.3169491475419649, "learning_rate": 7.76355241405883e-06, "loss": 0.5002, "step": 7587 }, { "epoch": 1.2461560569047276, "grad_norm": 0.2896778965883232, "learning_rate": 7.763229507757e-06, "loss": 0.4784, "step": 7588 }, { "epoch": 1.2463202841130705, "grad_norm": 0.6643183902372932, "learning_rate": 7.762906566013287e-06, "loss": 0.4641, "step": 7589 }, { "epoch": 1.2464845113214131, "grad_norm": 0.4694379861367071, "learning_rate": 7.762583588831197e-06, "loss": 0.4951, "step": 7590 }, { "epoch": 1.246648738529756, "grad_norm": 0.2714033681351259, "learning_rate": 7.76226057621424e-06, "loss": 0.4512, "step": 7591 }, { "epoch": 1.2468129657380986, "grad_norm": 0.33738219660603047, "learning_rate": 7.761937528165923e-06, "loss": 0.4711, "step": 7592 }, { "epoch": 1.2469771929464415, "grad_norm": 0.31785803759257947, "learning_rate": 7.761614444689755e-06, "loss": 0.4799, "step": 7593 }, { "epoch": 1.2471414201547841, "grad_norm": 0.2944630094956895, "learning_rate": 7.761291325789244e-06, "loss": 0.4857, "step": 7594 }, { "epoch": 1.247305647363127, "grad_norm": 0.2539993541834688, "learning_rate": 7.760968171467903e-06, "loss": 0.4545, "step": 7595 }, { "epoch": 1.2474698745714696, "grad_norm": 0.31121532100643007, "learning_rate": 7.760644981729238e-06, "loss": 0.4742, "step": 7596 }, { "epoch": 1.2476341017798123, "grad_norm": 0.41726789416464144, "learning_rate": 7.76032175657676e-06, "loss": 0.4569, "step": 7597 }, { "epoch": 1.2477983289881551, "grad_norm": 0.2858322392854143, "learning_rate": 7.759998496013981e-06, "loss": 0.4628, "step": 7598 }, { "epoch": 1.2479625561964978, "grad_norm": 0.29840578768448217, "learning_rate": 7.759675200044411e-06, "loss": 0.4865, "step": 7599 }, { "epoch": 1.2481267834048406, "grad_norm": 0.429100208558146, "learning_rate": 7.75935186867156e-06, "loss": 0.4707, "step": 7600 }, { "epoch": 1.2482910106131833, "grad_norm": 0.2838003969767729, "learning_rate": 7.759028501898942e-06, "loss": 0.4709, "step": 7601 }, { "epoch": 1.2484552378215261, "grad_norm": 0.3557099389351078, "learning_rate": 7.758705099730069e-06, "loss": 0.4723, "step": 7602 }, { "epoch": 1.2486194650298688, "grad_norm": 0.2870847693915563, "learning_rate": 7.75838166216845e-06, "loss": 0.4866, "step": 7603 }, { "epoch": 1.2487836922382116, "grad_norm": 0.3188591937844555, "learning_rate": 7.758058189217604e-06, "loss": 0.4486, "step": 7604 }, { "epoch": 1.2489479194465543, "grad_norm": 0.37056779120212635, "learning_rate": 7.757734680881036e-06, "loss": 0.4855, "step": 7605 }, { "epoch": 1.2491121466548971, "grad_norm": 0.3639198034440745, "learning_rate": 7.757411137162267e-06, "loss": 0.4747, "step": 7606 }, { "epoch": 1.2492763738632398, "grad_norm": 0.2787655583803941, "learning_rate": 7.757087558064806e-06, "loss": 0.4783, "step": 7607 }, { "epoch": 1.2494406010715826, "grad_norm": 0.30779539880108575, "learning_rate": 7.756763943592173e-06, "loss": 0.4965, "step": 7608 }, { "epoch": 1.2496048282799253, "grad_norm": 0.31669536490305483, "learning_rate": 7.756440293747877e-06, "loss": 0.4912, "step": 7609 }, { "epoch": 1.249769055488268, "grad_norm": 0.308687391665335, "learning_rate": 7.756116608535436e-06, "loss": 0.4889, "step": 7610 }, { "epoch": 1.2499332826966107, "grad_norm": 0.38901222664933416, "learning_rate": 7.755792887958365e-06, "loss": 0.4977, "step": 7611 }, { "epoch": 1.2500975099049536, "grad_norm": 0.28324939107336244, "learning_rate": 7.75546913202018e-06, "loss": 0.4828, "step": 7612 }, { "epoch": 1.2502617371132962, "grad_norm": 0.4111706793850341, "learning_rate": 7.755145340724396e-06, "loss": 0.4756, "step": 7613 }, { "epoch": 1.2504259643216389, "grad_norm": 0.3162486727419526, "learning_rate": 7.754821514074534e-06, "loss": 0.4869, "step": 7614 }, { "epoch": 1.2505901915299817, "grad_norm": 0.30369645916751725, "learning_rate": 7.754497652074106e-06, "loss": 0.498, "step": 7615 }, { "epoch": 1.2507544187383246, "grad_norm": 0.2973675199028055, "learning_rate": 7.754173754726631e-06, "loss": 0.4724, "step": 7616 }, { "epoch": 1.2509186459466672, "grad_norm": 0.3635026371907883, "learning_rate": 7.75384982203563e-06, "loss": 0.4735, "step": 7617 }, { "epoch": 1.2510828731550099, "grad_norm": 0.3335327209309586, "learning_rate": 7.753525854004618e-06, "loss": 0.4845, "step": 7618 }, { "epoch": 1.2512471003633527, "grad_norm": 0.27222796938938104, "learning_rate": 7.753201850637111e-06, "loss": 0.4745, "step": 7619 }, { "epoch": 1.2514113275716954, "grad_norm": 0.25174104281048826, "learning_rate": 7.752877811936634e-06, "loss": 0.4695, "step": 7620 }, { "epoch": 1.2515755547800382, "grad_norm": 0.37642279840391096, "learning_rate": 7.752553737906702e-06, "loss": 0.4846, "step": 7621 }, { "epoch": 1.2517397819883809, "grad_norm": 0.28660532539995426, "learning_rate": 7.752229628550837e-06, "loss": 0.4891, "step": 7622 }, { "epoch": 1.2519040091967237, "grad_norm": 0.5222307981778282, "learning_rate": 7.75190548387256e-06, "loss": 0.4841, "step": 7623 }, { "epoch": 1.2520682364050664, "grad_norm": 0.2689431118979005, "learning_rate": 7.751581303875387e-06, "loss": 0.4654, "step": 7624 }, { "epoch": 1.2522324636134092, "grad_norm": 0.26136425839250754, "learning_rate": 7.751257088562843e-06, "loss": 0.4892, "step": 7625 }, { "epoch": 1.2523966908217519, "grad_norm": 0.3392486862232706, "learning_rate": 7.75093283793845e-06, "loss": 0.4785, "step": 7626 }, { "epoch": 1.2525609180300945, "grad_norm": 0.28026937023228876, "learning_rate": 7.750608552005726e-06, "loss": 0.4753, "step": 7627 }, { "epoch": 1.2527251452384374, "grad_norm": 0.3107954217489059, "learning_rate": 7.750284230768194e-06, "loss": 0.4883, "step": 7628 }, { "epoch": 1.2528893724467802, "grad_norm": 0.3049446665147886, "learning_rate": 7.749959874229381e-06, "loss": 0.4796, "step": 7629 }, { "epoch": 1.2530535996551229, "grad_norm": 0.3168276384106462, "learning_rate": 7.749635482392802e-06, "loss": 0.4955, "step": 7630 }, { "epoch": 1.2532178268634655, "grad_norm": 0.3235340299163566, "learning_rate": 7.749311055261989e-06, "loss": 0.481, "step": 7631 }, { "epoch": 1.2533820540718084, "grad_norm": 0.39371662529450346, "learning_rate": 7.748986592840457e-06, "loss": 0.4861, "step": 7632 }, { "epoch": 1.2535462812801512, "grad_norm": 0.285427242350543, "learning_rate": 7.748662095131736e-06, "loss": 0.498, "step": 7633 }, { "epoch": 1.2537105084884939, "grad_norm": 0.33650249587902675, "learning_rate": 7.748337562139348e-06, "loss": 0.4865, "step": 7634 }, { "epoch": 1.2538747356968365, "grad_norm": 0.3027523754521188, "learning_rate": 7.748012993866817e-06, "loss": 0.4752, "step": 7635 }, { "epoch": 1.2540389629051794, "grad_norm": 0.2806626536948512, "learning_rate": 7.747688390317672e-06, "loss": 0.4701, "step": 7636 }, { "epoch": 1.254203190113522, "grad_norm": 0.2646422086313468, "learning_rate": 7.747363751495434e-06, "loss": 0.4891, "step": 7637 }, { "epoch": 1.2543674173218649, "grad_norm": 0.273441689961552, "learning_rate": 7.747039077403631e-06, "loss": 0.4606, "step": 7638 }, { "epoch": 1.2545316445302075, "grad_norm": 0.39997955334967183, "learning_rate": 7.746714368045788e-06, "loss": 0.4665, "step": 7639 }, { "epoch": 1.2546958717385504, "grad_norm": 0.3156118656661959, "learning_rate": 7.746389623425435e-06, "loss": 0.4848, "step": 7640 }, { "epoch": 1.254860098946893, "grad_norm": 0.2993686116712013, "learning_rate": 7.746064843546096e-06, "loss": 0.4766, "step": 7641 }, { "epoch": 1.2550243261552358, "grad_norm": 0.35049350059175216, "learning_rate": 7.745740028411296e-06, "loss": 0.4811, "step": 7642 }, { "epoch": 1.2551885533635785, "grad_norm": 0.33841545284685837, "learning_rate": 7.74541517802457e-06, "loss": 0.4875, "step": 7643 }, { "epoch": 1.2553527805719211, "grad_norm": 0.29653348815931907, "learning_rate": 7.745090292389438e-06, "loss": 0.4834, "step": 7644 }, { "epoch": 1.255517007780264, "grad_norm": 1.0619524130211613, "learning_rate": 7.744765371509437e-06, "loss": 0.4715, "step": 7645 }, { "epoch": 1.2556812349886068, "grad_norm": 0.33283296033041115, "learning_rate": 7.744440415388089e-06, "loss": 0.4661, "step": 7646 }, { "epoch": 1.2558454621969495, "grad_norm": 0.3195752649112358, "learning_rate": 7.744115424028925e-06, "loss": 0.4981, "step": 7647 }, { "epoch": 1.2560096894052921, "grad_norm": 0.2546867337134892, "learning_rate": 7.74379039743548e-06, "loss": 0.4771, "step": 7648 }, { "epoch": 1.256173916613635, "grad_norm": 0.30738186122930333, "learning_rate": 7.743465335611276e-06, "loss": 0.4666, "step": 7649 }, { "epoch": 1.2563381438219778, "grad_norm": 0.27859639311481776, "learning_rate": 7.74314023855985e-06, "loss": 0.4712, "step": 7650 }, { "epoch": 1.2565023710303205, "grad_norm": 0.3330781418753308, "learning_rate": 7.742815106284728e-06, "loss": 0.4659, "step": 7651 }, { "epoch": 1.2566665982386631, "grad_norm": 0.32030910397101686, "learning_rate": 7.742489938789444e-06, "loss": 0.4489, "step": 7652 }, { "epoch": 1.256830825447006, "grad_norm": 0.358633123705577, "learning_rate": 7.74216473607753e-06, "loss": 0.455, "step": 7653 }, { "epoch": 1.2569950526553486, "grad_norm": 0.28410974154650376, "learning_rate": 7.741839498152515e-06, "loss": 0.4801, "step": 7654 }, { "epoch": 1.2571592798636915, "grad_norm": 0.34427076281168334, "learning_rate": 7.741514225017935e-06, "loss": 0.487, "step": 7655 }, { "epoch": 1.257323507072034, "grad_norm": 0.31974128073632185, "learning_rate": 7.741188916677321e-06, "loss": 0.4781, "step": 7656 }, { "epoch": 1.257487734280377, "grad_norm": 0.2841276269841283, "learning_rate": 7.740863573134208e-06, "loss": 0.4656, "step": 7657 }, { "epoch": 1.2576519614887196, "grad_norm": 0.29801297035583985, "learning_rate": 7.740538194392126e-06, "loss": 0.492, "step": 7658 }, { "epoch": 1.2578161886970625, "grad_norm": 0.40026325549229824, "learning_rate": 7.740212780454611e-06, "loss": 0.4843, "step": 7659 }, { "epoch": 1.257980415905405, "grad_norm": 0.3383340220937869, "learning_rate": 7.739887331325199e-06, "loss": 0.4584, "step": 7660 }, { "epoch": 1.2581446431137477, "grad_norm": 0.3879046565559537, "learning_rate": 7.73956184700742e-06, "loss": 0.4712, "step": 7661 }, { "epoch": 1.2583088703220906, "grad_norm": 0.3046193115336133, "learning_rate": 7.739236327504814e-06, "loss": 0.4757, "step": 7662 }, { "epoch": 1.2584730975304335, "grad_norm": 0.27583639779518276, "learning_rate": 7.738910772820915e-06, "loss": 0.4625, "step": 7663 }, { "epoch": 1.258637324738776, "grad_norm": 0.2743611344103936, "learning_rate": 7.738585182959257e-06, "loss": 0.4819, "step": 7664 }, { "epoch": 1.2588015519471187, "grad_norm": 0.26396529926757517, "learning_rate": 7.73825955792338e-06, "loss": 0.4645, "step": 7665 }, { "epoch": 1.2589657791554616, "grad_norm": 0.33484568225629724, "learning_rate": 7.737933897716815e-06, "loss": 0.483, "step": 7666 }, { "epoch": 1.2591300063638045, "grad_norm": 0.2855404254518291, "learning_rate": 7.737608202343104e-06, "loss": 0.4797, "step": 7667 }, { "epoch": 1.259294233572147, "grad_norm": 0.2678897122032849, "learning_rate": 7.737282471805782e-06, "loss": 0.4677, "step": 7668 }, { "epoch": 1.2594584607804897, "grad_norm": 0.33378647809677103, "learning_rate": 7.736956706108388e-06, "loss": 0.4698, "step": 7669 }, { "epoch": 1.2596226879888326, "grad_norm": 0.33762949882200577, "learning_rate": 7.736630905254458e-06, "loss": 0.4698, "step": 7670 }, { "epoch": 1.2597869151971752, "grad_norm": 0.28816229123616627, "learning_rate": 7.736305069247535e-06, "loss": 0.4851, "step": 7671 }, { "epoch": 1.259951142405518, "grad_norm": 0.3474911045704335, "learning_rate": 7.735979198091151e-06, "loss": 0.4729, "step": 7672 }, { "epoch": 1.2601153696138607, "grad_norm": 0.31218960292473513, "learning_rate": 7.735653291788851e-06, "loss": 0.4663, "step": 7673 }, { "epoch": 1.2602795968222036, "grad_norm": 0.2741703738720939, "learning_rate": 7.735327350344173e-06, "loss": 0.4788, "step": 7674 }, { "epoch": 1.2604438240305462, "grad_norm": 0.3220346800939815, "learning_rate": 7.735001373760658e-06, "loss": 0.4785, "step": 7675 }, { "epoch": 1.260608051238889, "grad_norm": 0.2687168544207819, "learning_rate": 7.734675362041843e-06, "loss": 0.4707, "step": 7676 }, { "epoch": 1.2607722784472317, "grad_norm": 0.2882314916411854, "learning_rate": 7.734349315191272e-06, "loss": 0.4771, "step": 7677 }, { "epoch": 1.2609365056555744, "grad_norm": 0.27978276123566304, "learning_rate": 7.734023233212484e-06, "loss": 0.4883, "step": 7678 }, { "epoch": 1.2611007328639172, "grad_norm": 0.4947967595175015, "learning_rate": 7.733697116109024e-06, "loss": 0.4965, "step": 7679 }, { "epoch": 1.26126496007226, "grad_norm": 0.3388665890482255, "learning_rate": 7.73337096388443e-06, "loss": 0.4687, "step": 7680 }, { "epoch": 1.2614291872806027, "grad_norm": 0.34797616819323746, "learning_rate": 7.733044776542248e-06, "loss": 0.4794, "step": 7681 }, { "epoch": 1.2615934144889454, "grad_norm": 0.3238567132030185, "learning_rate": 7.732718554086017e-06, "loss": 0.4868, "step": 7682 }, { "epoch": 1.2617576416972882, "grad_norm": 0.2742114952715846, "learning_rate": 7.732392296519283e-06, "loss": 0.4742, "step": 7683 }, { "epoch": 1.261921868905631, "grad_norm": 0.29131539497818637, "learning_rate": 7.732066003845588e-06, "loss": 0.4896, "step": 7684 }, { "epoch": 1.2620860961139737, "grad_norm": 0.2622760343595224, "learning_rate": 7.731739676068477e-06, "loss": 0.4703, "step": 7685 }, { "epoch": 1.2622503233223163, "grad_norm": 0.2930264769742631, "learning_rate": 7.731413313191492e-06, "loss": 0.4515, "step": 7686 }, { "epoch": 1.2624145505306592, "grad_norm": 0.34566046733120337, "learning_rate": 7.731086915218181e-06, "loss": 0.4722, "step": 7687 }, { "epoch": 1.2625787777390018, "grad_norm": 0.39056124524545094, "learning_rate": 7.730760482152085e-06, "loss": 0.4809, "step": 7688 }, { "epoch": 1.2627430049473447, "grad_norm": 0.3505071210062579, "learning_rate": 7.730434013996753e-06, "loss": 0.4887, "step": 7689 }, { "epoch": 1.2629072321556873, "grad_norm": 0.34205645309234606, "learning_rate": 7.730107510755729e-06, "loss": 0.4932, "step": 7690 }, { "epoch": 1.2630714593640302, "grad_norm": 0.29161143835530734, "learning_rate": 7.729780972432559e-06, "loss": 0.4854, "step": 7691 }, { "epoch": 1.2632356865723728, "grad_norm": 0.45514430334380673, "learning_rate": 7.729454399030791e-06, "loss": 0.4945, "step": 7692 }, { "epoch": 1.2633999137807157, "grad_norm": 0.3141566231439699, "learning_rate": 7.72912779055397e-06, "loss": 0.4761, "step": 7693 }, { "epoch": 1.2635641409890583, "grad_norm": 0.3559790524001432, "learning_rate": 7.728801147005643e-06, "loss": 0.4822, "step": 7694 }, { "epoch": 1.263728368197401, "grad_norm": 0.3237951014226304, "learning_rate": 7.728474468389361e-06, "loss": 0.5001, "step": 7695 }, { "epoch": 1.2638925954057438, "grad_norm": 0.3288019447855348, "learning_rate": 7.72814775470867e-06, "loss": 0.4917, "step": 7696 }, { "epoch": 1.2640568226140867, "grad_norm": 0.431996411415779, "learning_rate": 7.727821005967117e-06, "loss": 0.4715, "step": 7697 }, { "epoch": 1.2642210498224293, "grad_norm": 0.2891597431196658, "learning_rate": 7.727494222168252e-06, "loss": 0.4691, "step": 7698 }, { "epoch": 1.264385277030772, "grad_norm": 0.37428064068458555, "learning_rate": 7.727167403315625e-06, "loss": 0.4835, "step": 7699 }, { "epoch": 1.2645495042391148, "grad_norm": 0.3053769528132182, "learning_rate": 7.726840549412784e-06, "loss": 0.4808, "step": 7700 }, { "epoch": 1.2647137314474577, "grad_norm": 0.4850739518874018, "learning_rate": 7.726513660463282e-06, "loss": 0.4627, "step": 7701 }, { "epoch": 1.2648779586558003, "grad_norm": 0.2920598155904834, "learning_rate": 7.726186736470666e-06, "loss": 0.4685, "step": 7702 }, { "epoch": 1.265042185864143, "grad_norm": 0.338510521081132, "learning_rate": 7.725859777438487e-06, "loss": 0.4743, "step": 7703 }, { "epoch": 1.2652064130724858, "grad_norm": 0.4296571563713848, "learning_rate": 7.725532783370298e-06, "loss": 0.4923, "step": 7704 }, { "epoch": 1.2653706402808285, "grad_norm": 0.30765920212094816, "learning_rate": 7.725205754269648e-06, "loss": 0.5024, "step": 7705 }, { "epoch": 1.2655348674891713, "grad_norm": 0.2709899542946473, "learning_rate": 7.724878690140093e-06, "loss": 0.4717, "step": 7706 }, { "epoch": 1.265699094697514, "grad_norm": 0.29725395484723255, "learning_rate": 7.724551590985182e-06, "loss": 0.4627, "step": 7707 }, { "epoch": 1.2658633219058568, "grad_norm": 0.4053905691221062, "learning_rate": 7.724224456808465e-06, "loss": 0.4852, "step": 7708 }, { "epoch": 1.2660275491141995, "grad_norm": 0.3116877740483153, "learning_rate": 7.723897287613502e-06, "loss": 0.4735, "step": 7709 }, { "epoch": 1.2661917763225423, "grad_norm": 0.35449971632410593, "learning_rate": 7.72357008340384e-06, "loss": 0.4981, "step": 7710 }, { "epoch": 1.266356003530885, "grad_norm": 0.3482399264073418, "learning_rate": 7.723242844183038e-06, "loss": 0.4552, "step": 7711 }, { "epoch": 1.2665202307392276, "grad_norm": 0.437774132734577, "learning_rate": 7.722915569954646e-06, "loss": 0.4631, "step": 7712 }, { "epoch": 1.2666844579475705, "grad_norm": 0.26839427210078487, "learning_rate": 7.72258826072222e-06, "loss": 0.495, "step": 7713 }, { "epoch": 1.2668486851559133, "grad_norm": 0.31217056993105924, "learning_rate": 7.722260916489313e-06, "loss": 0.4844, "step": 7714 }, { "epoch": 1.267012912364256, "grad_norm": 0.4914939353825111, "learning_rate": 7.721933537259483e-06, "loss": 0.4797, "step": 7715 }, { "epoch": 1.2671771395725986, "grad_norm": 0.29182947550725524, "learning_rate": 7.721606123036288e-06, "loss": 0.4833, "step": 7716 }, { "epoch": 1.2673413667809414, "grad_norm": 0.37793151937577985, "learning_rate": 7.721278673823278e-06, "loss": 0.4669, "step": 7717 }, { "epoch": 1.2675055939892843, "grad_norm": 0.26048313020401365, "learning_rate": 7.720951189624013e-06, "loss": 0.4815, "step": 7718 }, { "epoch": 1.267669821197627, "grad_norm": 0.3111343689668257, "learning_rate": 7.720623670442048e-06, "loss": 0.471, "step": 7719 }, { "epoch": 1.2678340484059696, "grad_norm": 0.3135420875235267, "learning_rate": 7.720296116280944e-06, "loss": 0.4746, "step": 7720 }, { "epoch": 1.2679982756143124, "grad_norm": 0.2976984908795594, "learning_rate": 7.719968527144253e-06, "loss": 0.4635, "step": 7721 }, { "epoch": 1.268162502822655, "grad_norm": 0.2756385116643583, "learning_rate": 7.719640903035538e-06, "loss": 0.4821, "step": 7722 }, { "epoch": 1.268326730030998, "grad_norm": 0.3310928364944098, "learning_rate": 7.719313243958353e-06, "loss": 0.5135, "step": 7723 }, { "epoch": 1.2684909572393406, "grad_norm": 0.2898540745776658, "learning_rate": 7.71898554991626e-06, "loss": 0.4747, "step": 7724 }, { "epoch": 1.2686551844476834, "grad_norm": 0.29846089427282313, "learning_rate": 7.718657820912816e-06, "loss": 0.4645, "step": 7725 }, { "epoch": 1.268819411656026, "grad_norm": 0.4277166211567597, "learning_rate": 7.718330056951582e-06, "loss": 0.4626, "step": 7726 }, { "epoch": 1.268983638864369, "grad_norm": 0.3386584451128976, "learning_rate": 7.718002258036117e-06, "loss": 0.45, "step": 7727 }, { "epoch": 1.2691478660727116, "grad_norm": 0.31044845044724056, "learning_rate": 7.717674424169983e-06, "loss": 0.493, "step": 7728 }, { "epoch": 1.2693120932810542, "grad_norm": 0.28929110658363827, "learning_rate": 7.717346555356737e-06, "loss": 0.4995, "step": 7729 }, { "epoch": 1.269476320489397, "grad_norm": 0.2750277317962798, "learning_rate": 7.717018651599942e-06, "loss": 0.4523, "step": 7730 }, { "epoch": 1.26964054769774, "grad_norm": 0.2646663022346015, "learning_rate": 7.71669071290316e-06, "loss": 0.4684, "step": 7731 }, { "epoch": 1.2698047749060826, "grad_norm": 0.2722408985365008, "learning_rate": 7.716362739269952e-06, "loss": 0.4867, "step": 7732 }, { "epoch": 1.2699690021144252, "grad_norm": 0.2813196075417855, "learning_rate": 7.71603473070388e-06, "loss": 0.5011, "step": 7733 }, { "epoch": 1.270133229322768, "grad_norm": 0.2811195028154342, "learning_rate": 7.715706687208507e-06, "loss": 0.4883, "step": 7734 }, { "epoch": 1.270297456531111, "grad_norm": 0.3121290061085945, "learning_rate": 7.715378608787394e-06, "loss": 0.5022, "step": 7735 }, { "epoch": 1.2704616837394536, "grad_norm": 0.29219405088342026, "learning_rate": 7.715050495444108e-06, "loss": 0.4765, "step": 7736 }, { "epoch": 1.2706259109477962, "grad_norm": 0.29988768120792714, "learning_rate": 7.71472234718221e-06, "loss": 0.4849, "step": 7737 }, { "epoch": 1.270790138156139, "grad_norm": 0.30856320601214104, "learning_rate": 7.714394164005264e-06, "loss": 0.4753, "step": 7738 }, { "epoch": 1.2709543653644817, "grad_norm": 0.29738267786753775, "learning_rate": 7.714065945916834e-06, "loss": 0.4698, "step": 7739 }, { "epoch": 1.2711185925728246, "grad_norm": 0.3323745523516455, "learning_rate": 7.713737692920488e-06, "loss": 0.4704, "step": 7740 }, { "epoch": 1.2712828197811672, "grad_norm": 0.2512031804860789, "learning_rate": 7.713409405019786e-06, "loss": 0.4737, "step": 7741 }, { "epoch": 1.27144704698951, "grad_norm": 0.28820433890993685, "learning_rate": 7.713081082218297e-06, "loss": 0.4625, "step": 7742 }, { "epoch": 1.2716112741978527, "grad_norm": 0.3977011594936401, "learning_rate": 7.712752724519588e-06, "loss": 0.4781, "step": 7743 }, { "epoch": 1.2717755014061956, "grad_norm": 0.26281093424723, "learning_rate": 7.712424331927221e-06, "loss": 0.4773, "step": 7744 }, { "epoch": 1.2719397286145382, "grad_norm": 0.3895924508235183, "learning_rate": 7.712095904444767e-06, "loss": 0.4865, "step": 7745 }, { "epoch": 1.2721039558228808, "grad_norm": 0.6117971589272038, "learning_rate": 7.71176744207579e-06, "loss": 0.4621, "step": 7746 }, { "epoch": 1.2722681830312237, "grad_norm": 0.2916185512611372, "learning_rate": 7.71143894482386e-06, "loss": 0.4791, "step": 7747 }, { "epoch": 1.2724324102395665, "grad_norm": 0.3215675610815016, "learning_rate": 7.711110412692543e-06, "loss": 0.4743, "step": 7748 }, { "epoch": 1.2725966374479092, "grad_norm": 0.3985246567089694, "learning_rate": 7.710781845685406e-06, "loss": 0.4826, "step": 7749 }, { "epoch": 1.2727608646562518, "grad_norm": 0.3815067560152665, "learning_rate": 7.710453243806021e-06, "loss": 0.4847, "step": 7750 }, { "epoch": 1.2729250918645947, "grad_norm": 0.2545415166282443, "learning_rate": 7.710124607057954e-06, "loss": 0.452, "step": 7751 }, { "epoch": 1.2730893190729375, "grad_norm": 0.3512974958871999, "learning_rate": 7.709795935444777e-06, "loss": 0.4545, "step": 7752 }, { "epoch": 1.2732535462812802, "grad_norm": 0.33763955275343127, "learning_rate": 7.709467228970056e-06, "loss": 0.4819, "step": 7753 }, { "epoch": 1.2734177734896228, "grad_norm": 0.47180854525995025, "learning_rate": 7.709138487637365e-06, "loss": 0.4773, "step": 7754 }, { "epoch": 1.2735820006979657, "grad_norm": 0.2883546400469497, "learning_rate": 7.708809711450272e-06, "loss": 0.4654, "step": 7755 }, { "epoch": 1.2737462279063083, "grad_norm": 0.27563610891719326, "learning_rate": 7.708480900412348e-06, "loss": 0.4801, "step": 7756 }, { "epoch": 1.2739104551146512, "grad_norm": 0.5716890214745179, "learning_rate": 7.708152054527165e-06, "loss": 0.4688, "step": 7757 }, { "epoch": 1.2740746823229938, "grad_norm": 0.42195057385213214, "learning_rate": 7.707823173798295e-06, "loss": 0.4796, "step": 7758 }, { "epoch": 1.2742389095313367, "grad_norm": 0.30176871840975483, "learning_rate": 7.707494258229308e-06, "loss": 0.4812, "step": 7759 }, { "epoch": 1.2744031367396793, "grad_norm": 0.3565361831398607, "learning_rate": 7.707165307823778e-06, "loss": 0.4662, "step": 7760 }, { "epoch": 1.2745673639480222, "grad_norm": 0.29370991851580663, "learning_rate": 7.706836322585278e-06, "loss": 0.485, "step": 7761 }, { "epoch": 1.2747315911563648, "grad_norm": 0.3047484919897347, "learning_rate": 7.70650730251738e-06, "loss": 0.4779, "step": 7762 }, { "epoch": 1.2748958183647074, "grad_norm": 0.28808269074598614, "learning_rate": 7.706178247623659e-06, "loss": 0.4763, "step": 7763 }, { "epoch": 1.2750600455730503, "grad_norm": 0.27915183570391694, "learning_rate": 7.705849157907686e-06, "loss": 0.4792, "step": 7764 }, { "epoch": 1.2752242727813932, "grad_norm": 0.32806606132369737, "learning_rate": 7.705520033373038e-06, "loss": 0.4698, "step": 7765 }, { "epoch": 1.2753884999897358, "grad_norm": 0.3089376020988689, "learning_rate": 7.70519087402329e-06, "loss": 0.5008, "step": 7766 }, { "epoch": 1.2755527271980784, "grad_norm": 0.2725056465017377, "learning_rate": 7.704861679862013e-06, "loss": 0.4721, "step": 7767 }, { "epoch": 1.2757169544064213, "grad_norm": 0.28418412299316054, "learning_rate": 7.704532450892785e-06, "loss": 0.4714, "step": 7768 }, { "epoch": 1.2758811816147642, "grad_norm": 0.3165971984097728, "learning_rate": 7.704203187119183e-06, "loss": 0.4929, "step": 7769 }, { "epoch": 1.2760454088231068, "grad_norm": 0.36671004266675833, "learning_rate": 7.703873888544782e-06, "loss": 0.4695, "step": 7770 }, { "epoch": 1.2762096360314494, "grad_norm": 0.2693544975307786, "learning_rate": 7.703544555173158e-06, "loss": 0.476, "step": 7771 }, { "epoch": 1.2763738632397923, "grad_norm": 0.3594750493142773, "learning_rate": 7.703215187007889e-06, "loss": 0.4489, "step": 7772 }, { "epoch": 1.276538090448135, "grad_norm": 0.3247458570973557, "learning_rate": 7.70288578405255e-06, "loss": 0.4897, "step": 7773 }, { "epoch": 1.2767023176564778, "grad_norm": 0.31715952464757263, "learning_rate": 7.702556346310721e-06, "loss": 0.4879, "step": 7774 }, { "epoch": 1.2768665448648204, "grad_norm": 0.3040453912621078, "learning_rate": 7.70222687378598e-06, "loss": 0.4747, "step": 7775 }, { "epoch": 1.2770307720731633, "grad_norm": 0.35248094697874277, "learning_rate": 7.701897366481903e-06, "loss": 0.4936, "step": 7776 }, { "epoch": 1.277194999281506, "grad_norm": 0.28411064669334896, "learning_rate": 7.70156782440207e-06, "loss": 0.4849, "step": 7777 }, { "epoch": 1.2773592264898488, "grad_norm": 0.30851415055717885, "learning_rate": 7.701238247550064e-06, "loss": 0.4883, "step": 7778 }, { "epoch": 1.2775234536981914, "grad_norm": 0.30100077430210254, "learning_rate": 7.700908635929458e-06, "loss": 0.4926, "step": 7779 }, { "epoch": 1.277687680906534, "grad_norm": 0.3256247158390222, "learning_rate": 7.700578989543835e-06, "loss": 0.4769, "step": 7780 }, { "epoch": 1.277851908114877, "grad_norm": 0.3831007490176088, "learning_rate": 7.700249308396775e-06, "loss": 0.481, "step": 7781 }, { "epoch": 1.2780161353232198, "grad_norm": 0.2900015324255877, "learning_rate": 7.699919592491862e-06, "loss": 0.4581, "step": 7782 }, { "epoch": 1.2781803625315624, "grad_norm": 0.39968965012855234, "learning_rate": 7.699589841832671e-06, "loss": 0.475, "step": 7783 }, { "epoch": 1.278344589739905, "grad_norm": 0.2987916540815722, "learning_rate": 7.699260056422787e-06, "loss": 0.4799, "step": 7784 }, { "epoch": 1.278508816948248, "grad_norm": 0.3031895511173132, "learning_rate": 7.69893023626579e-06, "loss": 0.4982, "step": 7785 }, { "epoch": 1.2786730441565908, "grad_norm": 0.3036242316048294, "learning_rate": 7.698600381365264e-06, "loss": 0.4725, "step": 7786 }, { "epoch": 1.2788372713649334, "grad_norm": 0.29859693726042974, "learning_rate": 7.698270491724793e-06, "loss": 0.4765, "step": 7787 }, { "epoch": 1.279001498573276, "grad_norm": 0.2845879852561358, "learning_rate": 7.697940567347956e-06, "loss": 0.4864, "step": 7788 }, { "epoch": 1.279165725781619, "grad_norm": 0.28163700973601685, "learning_rate": 7.697610608238338e-06, "loss": 0.4703, "step": 7789 }, { "epoch": 1.2793299529899615, "grad_norm": 0.30036853676830083, "learning_rate": 7.697280614399523e-06, "loss": 0.4867, "step": 7790 }, { "epoch": 1.2794941801983044, "grad_norm": 0.28760448854099513, "learning_rate": 7.696950585835094e-06, "loss": 0.4448, "step": 7791 }, { "epoch": 1.279658407406647, "grad_norm": 0.33460690891254224, "learning_rate": 7.696620522548638e-06, "loss": 0.4686, "step": 7792 }, { "epoch": 1.27982263461499, "grad_norm": 0.27290421235975976, "learning_rate": 7.696290424543737e-06, "loss": 0.4735, "step": 7793 }, { "epoch": 1.2799868618233325, "grad_norm": 0.2896345427090911, "learning_rate": 7.695960291823978e-06, "loss": 0.4585, "step": 7794 }, { "epoch": 1.2801510890316754, "grad_norm": 0.2884151664673828, "learning_rate": 7.695630124392945e-06, "loss": 0.459, "step": 7795 }, { "epoch": 1.280315316240018, "grad_norm": 0.29439858268755925, "learning_rate": 7.695299922254224e-06, "loss": 0.4635, "step": 7796 }, { "epoch": 1.2804795434483607, "grad_norm": 0.27699445691313457, "learning_rate": 7.694969685411404e-06, "loss": 0.484, "step": 7797 }, { "epoch": 1.2806437706567035, "grad_norm": 0.24163729471658402, "learning_rate": 7.694639413868068e-06, "loss": 0.4605, "step": 7798 }, { "epoch": 1.2808079978650464, "grad_norm": 0.2896967081115697, "learning_rate": 7.694309107627806e-06, "loss": 0.4856, "step": 7799 }, { "epoch": 1.280972225073389, "grad_norm": 0.299584030087436, "learning_rate": 7.693978766694204e-06, "loss": 0.452, "step": 7800 }, { "epoch": 1.2811364522817317, "grad_norm": 0.3385113952854699, "learning_rate": 7.693648391070851e-06, "loss": 0.4516, "step": 7801 }, { "epoch": 1.2813006794900745, "grad_norm": 0.2772793930227556, "learning_rate": 7.693317980761334e-06, "loss": 0.4786, "step": 7802 }, { "epoch": 1.2814649066984174, "grad_norm": 0.3340403620740822, "learning_rate": 7.69298753576924e-06, "loss": 0.485, "step": 7803 }, { "epoch": 1.28162913390676, "grad_norm": 0.3315139362421689, "learning_rate": 7.692657056098163e-06, "loss": 0.4865, "step": 7804 }, { "epoch": 1.2817933611151027, "grad_norm": 0.36606393979166196, "learning_rate": 7.692326541751687e-06, "loss": 0.4633, "step": 7805 }, { "epoch": 1.2819575883234455, "grad_norm": 0.2958031648909456, "learning_rate": 7.691995992733404e-06, "loss": 0.4859, "step": 7806 }, { "epoch": 1.2821218155317882, "grad_norm": 0.29278420815146317, "learning_rate": 7.691665409046905e-06, "loss": 0.4689, "step": 7807 }, { "epoch": 1.282286042740131, "grad_norm": 0.26629433728377444, "learning_rate": 7.69133479069578e-06, "loss": 0.4652, "step": 7808 }, { "epoch": 1.2824502699484737, "grad_norm": 0.3085504664475122, "learning_rate": 7.691004137683617e-06, "loss": 0.4806, "step": 7809 }, { "epoch": 1.2826144971568165, "grad_norm": 0.31694594797826553, "learning_rate": 7.69067345001401e-06, "loss": 0.4751, "step": 7810 }, { "epoch": 1.2827787243651592, "grad_norm": 0.31604221751382056, "learning_rate": 7.690342727690553e-06, "loss": 0.4662, "step": 7811 }, { "epoch": 1.282942951573502, "grad_norm": 0.3576539317488584, "learning_rate": 7.690011970716833e-06, "loss": 0.483, "step": 7812 }, { "epoch": 1.2831071787818447, "grad_norm": 0.26640826289893155, "learning_rate": 7.689681179096443e-06, "loss": 0.4585, "step": 7813 }, { "epoch": 1.2832714059901873, "grad_norm": 0.24764426877232484, "learning_rate": 7.68935035283298e-06, "loss": 0.4626, "step": 7814 }, { "epoch": 1.2834356331985302, "grad_norm": 0.31402171978360294, "learning_rate": 7.689019491930033e-06, "loss": 0.4765, "step": 7815 }, { "epoch": 1.283599860406873, "grad_norm": 0.33472276789419364, "learning_rate": 7.688688596391197e-06, "loss": 0.4848, "step": 7816 }, { "epoch": 1.2837640876152157, "grad_norm": 0.31467631849619093, "learning_rate": 7.688357666220065e-06, "loss": 0.4696, "step": 7817 }, { "epoch": 1.2839283148235583, "grad_norm": 0.5620271778383376, "learning_rate": 7.688026701420233e-06, "loss": 0.4873, "step": 7818 }, { "epoch": 1.2840925420319012, "grad_norm": 0.30644020342381995, "learning_rate": 7.687695701995295e-06, "loss": 0.5001, "step": 7819 }, { "epoch": 1.284256769240244, "grad_norm": 0.31653718755727756, "learning_rate": 7.687364667948842e-06, "loss": 0.4556, "step": 7820 }, { "epoch": 1.2844209964485866, "grad_norm": 0.35087942099619734, "learning_rate": 7.687033599284475e-06, "loss": 0.4497, "step": 7821 }, { "epoch": 1.2845852236569293, "grad_norm": 0.28672157890856287, "learning_rate": 7.686702496005788e-06, "loss": 0.4981, "step": 7822 }, { "epoch": 1.2847494508652721, "grad_norm": 0.3079510574279276, "learning_rate": 7.686371358116374e-06, "loss": 0.4902, "step": 7823 }, { "epoch": 1.2849136780736148, "grad_norm": 0.44588293878026053, "learning_rate": 7.686040185619835e-06, "loss": 0.46, "step": 7824 }, { "epoch": 1.2850779052819576, "grad_norm": 0.872447022198342, "learning_rate": 7.685708978519764e-06, "loss": 0.459, "step": 7825 }, { "epoch": 1.2852421324903003, "grad_norm": 0.3303561048803215, "learning_rate": 7.68537773681976e-06, "loss": 0.4975, "step": 7826 }, { "epoch": 1.2854063596986431, "grad_norm": 0.26456162856250737, "learning_rate": 7.685046460523419e-06, "loss": 0.4888, "step": 7827 }, { "epoch": 1.2855705869069858, "grad_norm": 0.2816527615338236, "learning_rate": 7.684715149634339e-06, "loss": 0.4782, "step": 7828 }, { "epoch": 1.2857348141153286, "grad_norm": 0.3147258571690476, "learning_rate": 7.68438380415612e-06, "loss": 0.4844, "step": 7829 }, { "epoch": 1.2858990413236713, "grad_norm": 0.27745651039854247, "learning_rate": 7.68405242409236e-06, "loss": 0.4556, "step": 7830 }, { "epoch": 1.286063268532014, "grad_norm": 0.30628636649669544, "learning_rate": 7.683721009446657e-06, "loss": 0.4868, "step": 7831 }, { "epoch": 1.2862274957403568, "grad_norm": 0.42664379936326835, "learning_rate": 7.683389560222612e-06, "loss": 0.4784, "step": 7832 }, { "epoch": 1.2863917229486996, "grad_norm": 0.2680221112365354, "learning_rate": 7.683058076423825e-06, "loss": 0.4547, "step": 7833 }, { "epoch": 1.2865559501570423, "grad_norm": 0.28850192993663787, "learning_rate": 7.682726558053896e-06, "loss": 0.4816, "step": 7834 }, { "epoch": 1.286720177365385, "grad_norm": 0.3826116769850467, "learning_rate": 7.682395005116424e-06, "loss": 0.4604, "step": 7835 }, { "epoch": 1.2868844045737278, "grad_norm": 0.3269271472240874, "learning_rate": 7.682063417615011e-06, "loss": 0.4856, "step": 7836 }, { "epoch": 1.2870486317820706, "grad_norm": 0.2903246693555103, "learning_rate": 7.681731795553259e-06, "loss": 0.4717, "step": 7837 }, { "epoch": 1.2872128589904133, "grad_norm": 0.40635716934363103, "learning_rate": 7.681400138934768e-06, "loss": 0.4641, "step": 7838 }, { "epoch": 1.287377086198756, "grad_norm": 2.2006250297078624, "learning_rate": 7.681068447763143e-06, "loss": 0.4772, "step": 7839 }, { "epoch": 1.2875413134070988, "grad_norm": 0.25645959340248886, "learning_rate": 7.680736722041985e-06, "loss": 0.4555, "step": 7840 }, { "epoch": 1.2877055406154414, "grad_norm": 0.29428794743469366, "learning_rate": 7.680404961774898e-06, "loss": 0.4554, "step": 7841 }, { "epoch": 1.2878697678237843, "grad_norm": 0.504873026867374, "learning_rate": 7.680073166965482e-06, "loss": 0.4737, "step": 7842 }, { "epoch": 1.288033995032127, "grad_norm": 0.3206722968962623, "learning_rate": 7.679741337617344e-06, "loss": 0.4555, "step": 7843 }, { "epoch": 1.2881982222404698, "grad_norm": 0.31375416139386475, "learning_rate": 7.679409473734085e-06, "loss": 0.4731, "step": 7844 }, { "epoch": 1.2883624494488124, "grad_norm": 0.32465719686723155, "learning_rate": 7.67907757531931e-06, "loss": 0.4608, "step": 7845 }, { "epoch": 1.2885266766571553, "grad_norm": 0.2873096643272258, "learning_rate": 7.678745642376627e-06, "loss": 0.4706, "step": 7846 }, { "epoch": 1.288690903865498, "grad_norm": 0.28816537852812213, "learning_rate": 7.67841367490964e-06, "loss": 0.4673, "step": 7847 }, { "epoch": 1.2888551310738405, "grad_norm": 0.27675067172937057, "learning_rate": 7.67808167292195e-06, "loss": 0.4654, "step": 7848 }, { "epoch": 1.2890193582821834, "grad_norm": 0.3456711175077695, "learning_rate": 7.67774963641717e-06, "loss": 0.4813, "step": 7849 }, { "epoch": 1.2891835854905263, "grad_norm": 0.3354036100274437, "learning_rate": 7.677417565398899e-06, "loss": 0.4839, "step": 7850 }, { "epoch": 1.289347812698869, "grad_norm": 0.6324881168177896, "learning_rate": 7.677085459870749e-06, "loss": 0.4587, "step": 7851 }, { "epoch": 1.2895120399072115, "grad_norm": 0.3504917226050069, "learning_rate": 7.676753319836324e-06, "loss": 0.4733, "step": 7852 }, { "epoch": 1.2896762671155544, "grad_norm": 0.44980333945469475, "learning_rate": 7.676421145299233e-06, "loss": 0.4676, "step": 7853 }, { "epoch": 1.2898404943238972, "grad_norm": 0.5372650657765722, "learning_rate": 7.676088936263084e-06, "loss": 0.4819, "step": 7854 }, { "epoch": 1.2900047215322399, "grad_norm": 0.3343313623537713, "learning_rate": 7.675756692731483e-06, "loss": 0.4794, "step": 7855 }, { "epoch": 1.2901689487405825, "grad_norm": 0.3946859710241651, "learning_rate": 7.67542441470804e-06, "loss": 0.476, "step": 7856 }, { "epoch": 1.2903331759489254, "grad_norm": 0.2813570879176587, "learning_rate": 7.675092102196365e-06, "loss": 0.4871, "step": 7857 }, { "epoch": 1.290497403157268, "grad_norm": 0.3379571926965503, "learning_rate": 7.674759755200064e-06, "loss": 0.4802, "step": 7858 }, { "epoch": 1.2906616303656109, "grad_norm": 0.44337435809416115, "learning_rate": 7.67442737372275e-06, "loss": 0.5086, "step": 7859 }, { "epoch": 1.2908258575739535, "grad_norm": 0.2620657068472747, "learning_rate": 7.674094957768031e-06, "loss": 0.4507, "step": 7860 }, { "epoch": 1.2909900847822964, "grad_norm": 0.39448810420551583, "learning_rate": 7.673762507339517e-06, "loss": 0.4757, "step": 7861 }, { "epoch": 1.291154311990639, "grad_norm": 0.27602612398995907, "learning_rate": 7.67343002244082e-06, "loss": 0.4791, "step": 7862 }, { "epoch": 1.2913185391989819, "grad_norm": 0.30145053047947384, "learning_rate": 7.67309750307555e-06, "loss": 0.4637, "step": 7863 }, { "epoch": 1.2914827664073245, "grad_norm": 0.28617067481182934, "learning_rate": 7.672764949247322e-06, "loss": 0.4892, "step": 7864 }, { "epoch": 1.2916469936156671, "grad_norm": 0.30999809485905655, "learning_rate": 7.672432360959743e-06, "loss": 0.5012, "step": 7865 }, { "epoch": 1.29181122082401, "grad_norm": 0.32598640649592947, "learning_rate": 7.672099738216427e-06, "loss": 0.4727, "step": 7866 }, { "epoch": 1.2919754480323529, "grad_norm": 0.4536033430129751, "learning_rate": 7.671767081020988e-06, "loss": 0.4617, "step": 7867 }, { "epoch": 1.2921396752406955, "grad_norm": 0.2727844305112922, "learning_rate": 7.671434389377038e-06, "loss": 0.4555, "step": 7868 }, { "epoch": 1.2923039024490381, "grad_norm": 0.3128387369899632, "learning_rate": 7.67110166328819e-06, "loss": 0.4667, "step": 7869 }, { "epoch": 1.292468129657381, "grad_norm": 0.314249956724488, "learning_rate": 7.670768902758058e-06, "loss": 0.4856, "step": 7870 }, { "epoch": 1.2926323568657239, "grad_norm": 0.4280519629179726, "learning_rate": 7.670436107790254e-06, "loss": 0.4689, "step": 7871 }, { "epoch": 1.2927965840740665, "grad_norm": 0.289677597791496, "learning_rate": 7.670103278388398e-06, "loss": 0.4686, "step": 7872 }, { "epoch": 1.2929608112824091, "grad_norm": 0.2946877863022075, "learning_rate": 7.6697704145561e-06, "loss": 0.5096, "step": 7873 }, { "epoch": 1.293125038490752, "grad_norm": 0.2894735076390649, "learning_rate": 7.669437516296976e-06, "loss": 0.4614, "step": 7874 }, { "epoch": 1.2932892656990946, "grad_norm": 0.3028986464007853, "learning_rate": 7.669104583614642e-06, "loss": 0.4541, "step": 7875 }, { "epoch": 1.2934534929074375, "grad_norm": 0.25582922737852953, "learning_rate": 7.668771616512716e-06, "loss": 0.4725, "step": 7876 }, { "epoch": 1.2936177201157801, "grad_norm": 0.33755488887573115, "learning_rate": 7.668438614994812e-06, "loss": 0.4787, "step": 7877 }, { "epoch": 1.293781947324123, "grad_norm": 0.32217811133152874, "learning_rate": 7.668105579064546e-06, "loss": 0.4938, "step": 7878 }, { "epoch": 1.2939461745324656, "grad_norm": 0.30142798654133546, "learning_rate": 7.667772508725538e-06, "loss": 0.4816, "step": 7879 }, { "epoch": 1.2941104017408085, "grad_norm": 0.26134890830428037, "learning_rate": 7.667439403981402e-06, "loss": 0.4805, "step": 7880 }, { "epoch": 1.2942746289491511, "grad_norm": 0.2859171992401748, "learning_rate": 7.66710626483576e-06, "loss": 0.4585, "step": 7881 }, { "epoch": 1.2944388561574938, "grad_norm": 0.27490980788645614, "learning_rate": 7.666773091292227e-06, "loss": 0.4662, "step": 7882 }, { "epoch": 1.2946030833658366, "grad_norm": 0.29612051837616454, "learning_rate": 7.666439883354421e-06, "loss": 0.4681, "step": 7883 }, { "epoch": 1.2947673105741795, "grad_norm": 0.3558706635350218, "learning_rate": 7.666106641025965e-06, "loss": 0.4668, "step": 7884 }, { "epoch": 1.2949315377825221, "grad_norm": 0.29081743807681926, "learning_rate": 7.665773364310476e-06, "loss": 0.4673, "step": 7885 }, { "epoch": 1.2950957649908648, "grad_norm": 0.36587349094750693, "learning_rate": 7.665440053211571e-06, "loss": 0.472, "step": 7886 }, { "epoch": 1.2952599921992076, "grad_norm": 0.3846805506898003, "learning_rate": 7.665106707732875e-06, "loss": 0.478, "step": 7887 }, { "epoch": 1.2954242194075505, "grad_norm": 0.4763302799103845, "learning_rate": 7.664773327878005e-06, "loss": 0.4822, "step": 7888 }, { "epoch": 1.2955884466158931, "grad_norm": 0.2659489183122764, "learning_rate": 7.664439913650583e-06, "loss": 0.4482, "step": 7889 }, { "epoch": 1.2957526738242358, "grad_norm": 0.3211381900041531, "learning_rate": 7.66410646505423e-06, "loss": 0.4634, "step": 7890 }, { "epoch": 1.2959169010325786, "grad_norm": 0.3289264312374959, "learning_rate": 7.663772982092569e-06, "loss": 0.4912, "step": 7891 }, { "epoch": 1.2960811282409213, "grad_norm": 0.2874286485731477, "learning_rate": 7.66343946476922e-06, "loss": 0.4758, "step": 7892 }, { "epoch": 1.2962453554492641, "grad_norm": 0.29145863615207435, "learning_rate": 7.663105913087804e-06, "loss": 0.4948, "step": 7893 }, { "epoch": 1.2964095826576068, "grad_norm": 0.3151056218253248, "learning_rate": 7.662772327051947e-06, "loss": 0.4864, "step": 7894 }, { "epoch": 1.2965738098659496, "grad_norm": 0.5383538039811437, "learning_rate": 7.662438706665272e-06, "loss": 0.4827, "step": 7895 }, { "epoch": 1.2967380370742922, "grad_norm": 0.2904499074975075, "learning_rate": 7.662105051931401e-06, "loss": 0.4577, "step": 7896 }, { "epoch": 1.296902264282635, "grad_norm": 0.3404388178106021, "learning_rate": 7.661771362853958e-06, "loss": 0.4973, "step": 7897 }, { "epoch": 1.2970664914909777, "grad_norm": 0.28420348043237087, "learning_rate": 7.661437639436565e-06, "loss": 0.4746, "step": 7898 }, { "epoch": 1.2972307186993204, "grad_norm": 0.31300648105957757, "learning_rate": 7.661103881682851e-06, "loss": 0.4718, "step": 7899 }, { "epoch": 1.2973949459076632, "grad_norm": 0.2617389825483887, "learning_rate": 7.660770089596437e-06, "loss": 0.5036, "step": 7900 }, { "epoch": 1.297559173116006, "grad_norm": 0.3558329374409586, "learning_rate": 7.660436263180954e-06, "loss": 0.4704, "step": 7901 }, { "epoch": 1.2977234003243487, "grad_norm": 0.3296553098157969, "learning_rate": 7.66010240244002e-06, "loss": 0.4836, "step": 7902 }, { "epoch": 1.2978876275326914, "grad_norm": 0.40087534912194606, "learning_rate": 7.659768507377265e-06, "loss": 0.4807, "step": 7903 }, { "epoch": 1.2980518547410342, "grad_norm": 0.29088669339411083, "learning_rate": 7.659434577996318e-06, "loss": 0.4739, "step": 7904 }, { "epoch": 1.298216081949377, "grad_norm": 0.2990246246306228, "learning_rate": 7.659100614300798e-06, "loss": 0.4529, "step": 7905 }, { "epoch": 1.2983803091577197, "grad_norm": 0.29901740330002713, "learning_rate": 7.658766616294343e-06, "loss": 0.4725, "step": 7906 }, { "epoch": 1.2985445363660624, "grad_norm": 0.30059464854419815, "learning_rate": 7.65843258398057e-06, "loss": 0.4856, "step": 7907 }, { "epoch": 1.2987087635744052, "grad_norm": 0.6435372599150829, "learning_rate": 7.658098517363115e-06, "loss": 0.4717, "step": 7908 }, { "epoch": 1.2988729907827479, "grad_norm": 0.3214157341766446, "learning_rate": 7.657764416445601e-06, "loss": 0.4679, "step": 7909 }, { "epoch": 1.2990372179910907, "grad_norm": 0.324097294636231, "learning_rate": 7.65743028123166e-06, "loss": 0.458, "step": 7910 }, { "epoch": 1.2992014451994334, "grad_norm": 0.3110100490491375, "learning_rate": 7.657096111724917e-06, "loss": 0.46, "step": 7911 }, { "epoch": 1.2993656724077762, "grad_norm": 0.3124904806207106, "learning_rate": 7.656761907929006e-06, "loss": 0.4666, "step": 7912 }, { "epoch": 1.2995298996161189, "grad_norm": 0.29953706182151885, "learning_rate": 7.656427669847557e-06, "loss": 0.5017, "step": 7913 }, { "epoch": 1.2996941268244617, "grad_norm": 0.3084623975455468, "learning_rate": 7.656093397484195e-06, "loss": 0.4756, "step": 7914 }, { "epoch": 1.2998583540328044, "grad_norm": 0.2995223537477973, "learning_rate": 7.655759090842554e-06, "loss": 0.4865, "step": 7915 }, { "epoch": 1.300022581241147, "grad_norm": 0.34171160928676775, "learning_rate": 7.655424749926265e-06, "loss": 0.4924, "step": 7916 }, { "epoch": 1.3001868084494899, "grad_norm": 0.37593951569139283, "learning_rate": 7.655090374738958e-06, "loss": 0.4758, "step": 7917 }, { "epoch": 1.3003510356578327, "grad_norm": 0.27259654106749204, "learning_rate": 7.654755965284266e-06, "loss": 0.4855, "step": 7918 }, { "epoch": 1.3005152628661754, "grad_norm": 0.26982803154783935, "learning_rate": 7.65442152156582e-06, "loss": 0.4638, "step": 7919 }, { "epoch": 1.300679490074518, "grad_norm": 0.3358575968821194, "learning_rate": 7.654087043587253e-06, "loss": 0.4822, "step": 7920 }, { "epoch": 1.3008437172828609, "grad_norm": 0.3025242716671943, "learning_rate": 7.653752531352197e-06, "loss": 0.4854, "step": 7921 }, { "epoch": 1.3010079444912037, "grad_norm": 0.31977526421106844, "learning_rate": 7.653417984864286e-06, "loss": 0.4781, "step": 7922 }, { "epoch": 1.3011721716995464, "grad_norm": 0.4255861102576953, "learning_rate": 7.653083404127154e-06, "loss": 0.4695, "step": 7923 }, { "epoch": 1.301336398907889, "grad_norm": 0.32376807035451516, "learning_rate": 7.652748789144432e-06, "loss": 0.4724, "step": 7924 }, { "epoch": 1.3015006261162319, "grad_norm": 0.2621931897315843, "learning_rate": 7.652414139919758e-06, "loss": 0.483, "step": 7925 }, { "epoch": 1.3016648533245745, "grad_norm": 0.32369868128194706, "learning_rate": 7.652079456456765e-06, "loss": 0.4788, "step": 7926 }, { "epoch": 1.3018290805329173, "grad_norm": 0.29915004458420025, "learning_rate": 7.651744738759086e-06, "loss": 0.4656, "step": 7927 }, { "epoch": 1.30199330774126, "grad_norm": 0.3552626648407864, "learning_rate": 7.65140998683036e-06, "loss": 0.4942, "step": 7928 }, { "epoch": 1.3021575349496028, "grad_norm": 0.28752600760195246, "learning_rate": 7.65107520067422e-06, "loss": 0.4736, "step": 7929 }, { "epoch": 1.3023217621579455, "grad_norm": 0.3278934170298695, "learning_rate": 7.650740380294304e-06, "loss": 0.4824, "step": 7930 }, { "epoch": 1.3024859893662883, "grad_norm": 0.29068672152221015, "learning_rate": 7.650405525694247e-06, "loss": 0.4952, "step": 7931 }, { "epoch": 1.302650216574631, "grad_norm": 0.36659481274056716, "learning_rate": 7.650070636877686e-06, "loss": 0.4905, "step": 7932 }, { "epoch": 1.3028144437829736, "grad_norm": 0.318759886416517, "learning_rate": 7.64973571384826e-06, "loss": 0.4993, "step": 7933 }, { "epoch": 1.3029786709913165, "grad_norm": 0.4421733290213734, "learning_rate": 7.649400756609603e-06, "loss": 0.4704, "step": 7934 }, { "epoch": 1.3031428981996593, "grad_norm": 0.4854735090770185, "learning_rate": 7.649065765165356e-06, "loss": 0.4819, "step": 7935 }, { "epoch": 1.303307125408002, "grad_norm": 0.3538652860607975, "learning_rate": 7.648730739519159e-06, "loss": 0.4599, "step": 7936 }, { "epoch": 1.3034713526163446, "grad_norm": 0.2711419157816641, "learning_rate": 7.648395679674645e-06, "loss": 0.4653, "step": 7937 }, { "epoch": 1.3036355798246875, "grad_norm": 0.28791299089951694, "learning_rate": 7.648060585635457e-06, "loss": 0.467, "step": 7938 }, { "epoch": 1.3037998070330303, "grad_norm": 0.3147023306380566, "learning_rate": 7.647725457405235e-06, "loss": 0.4785, "step": 7939 }, { "epoch": 1.303964034241373, "grad_norm": 0.33694405249481874, "learning_rate": 7.647390294987618e-06, "loss": 0.4759, "step": 7940 }, { "epoch": 1.3041282614497156, "grad_norm": 0.3704706312364689, "learning_rate": 7.647055098386243e-06, "loss": 0.4731, "step": 7941 }, { "epoch": 1.3042924886580585, "grad_norm": 0.2981416710176911, "learning_rate": 7.646719867604756e-06, "loss": 0.4727, "step": 7942 }, { "epoch": 1.304456715866401, "grad_norm": 0.4236048668542745, "learning_rate": 7.646384602646794e-06, "loss": 0.491, "step": 7943 }, { "epoch": 1.304620943074744, "grad_norm": 0.25920675415941064, "learning_rate": 7.646049303516001e-06, "loss": 0.4647, "step": 7944 }, { "epoch": 1.3047851702830866, "grad_norm": 0.37288961353740263, "learning_rate": 7.645713970216015e-06, "loss": 0.4729, "step": 7945 }, { "epoch": 1.3049493974914295, "grad_norm": 0.30280530467800054, "learning_rate": 7.645378602750481e-06, "loss": 0.4951, "step": 7946 }, { "epoch": 1.305113624699772, "grad_norm": 0.32637097512312524, "learning_rate": 7.645043201123042e-06, "loss": 0.4863, "step": 7947 }, { "epoch": 1.305277851908115, "grad_norm": 0.46033399936430586, "learning_rate": 7.64470776533734e-06, "loss": 0.4814, "step": 7948 }, { "epoch": 1.3054420791164576, "grad_norm": 0.31590207279548094, "learning_rate": 7.644372295397015e-06, "loss": 0.4821, "step": 7949 }, { "epoch": 1.3056063063248002, "grad_norm": 0.31853698788592905, "learning_rate": 7.644036791305715e-06, "loss": 0.4544, "step": 7950 }, { "epoch": 1.305770533533143, "grad_norm": 0.28684806650613004, "learning_rate": 7.643701253067082e-06, "loss": 0.4912, "step": 7951 }, { "epoch": 1.305934760741486, "grad_norm": 0.43154634307997536, "learning_rate": 7.64336568068476e-06, "loss": 0.4867, "step": 7952 }, { "epoch": 1.3060989879498286, "grad_norm": 0.3614722309268757, "learning_rate": 7.643030074162395e-06, "loss": 0.4644, "step": 7953 }, { "epoch": 1.3062632151581712, "grad_norm": 0.2847346531183879, "learning_rate": 7.64269443350363e-06, "loss": 0.4463, "step": 7954 }, { "epoch": 1.306427442366514, "grad_norm": 0.33721951652964166, "learning_rate": 7.642358758712112e-06, "loss": 0.4786, "step": 7955 }, { "epoch": 1.306591669574857, "grad_norm": 0.29914397820153493, "learning_rate": 7.642023049791485e-06, "loss": 0.4622, "step": 7956 }, { "epoch": 1.3067558967831996, "grad_norm": 0.2687785996804088, "learning_rate": 7.641687306745399e-06, "loss": 0.4709, "step": 7957 }, { "epoch": 1.3069201239915422, "grad_norm": 0.31963062378427437, "learning_rate": 7.641351529577494e-06, "loss": 0.468, "step": 7958 }, { "epoch": 1.307084351199885, "grad_norm": 0.545023022599308, "learning_rate": 7.641015718291425e-06, "loss": 0.4806, "step": 7959 }, { "epoch": 1.3072485784082277, "grad_norm": 0.395362345706082, "learning_rate": 7.640679872890832e-06, "loss": 0.4628, "step": 7960 }, { "epoch": 1.3074128056165706, "grad_norm": 0.466981966839366, "learning_rate": 7.640343993379368e-06, "loss": 0.4682, "step": 7961 }, { "epoch": 1.3075770328249132, "grad_norm": 0.28939268969208437, "learning_rate": 7.640008079760676e-06, "loss": 0.4592, "step": 7962 }, { "epoch": 1.307741260033256, "grad_norm": 0.33538113413961285, "learning_rate": 7.639672132038407e-06, "loss": 0.4806, "step": 7963 }, { "epoch": 1.3079054872415987, "grad_norm": 0.32099795596270486, "learning_rate": 7.639336150216211e-06, "loss": 0.482, "step": 7964 }, { "epoch": 1.3080697144499416, "grad_norm": 0.553802750895834, "learning_rate": 7.639000134297735e-06, "loss": 0.4876, "step": 7965 }, { "epoch": 1.3082339416582842, "grad_norm": 0.34601641793654353, "learning_rate": 7.638664084286629e-06, "loss": 0.4836, "step": 7966 }, { "epoch": 1.3083981688666269, "grad_norm": 0.2989365043791791, "learning_rate": 7.638328000186545e-06, "loss": 0.4683, "step": 7967 }, { "epoch": 1.3085623960749697, "grad_norm": 0.3029078532588946, "learning_rate": 7.63799188200113e-06, "loss": 0.4701, "step": 7968 }, { "epoch": 1.3087266232833126, "grad_norm": 0.4104382751629648, "learning_rate": 7.637655729734036e-06, "loss": 0.4682, "step": 7969 }, { "epoch": 1.3088908504916552, "grad_norm": 0.3401069351664016, "learning_rate": 7.637319543388913e-06, "loss": 0.4816, "step": 7970 }, { "epoch": 1.3090550776999978, "grad_norm": 0.36051234077198613, "learning_rate": 7.63698332296941e-06, "loss": 0.4704, "step": 7971 }, { "epoch": 1.3092193049083407, "grad_norm": 0.2680635234976543, "learning_rate": 7.636647068479188e-06, "loss": 0.4862, "step": 7972 }, { "epoch": 1.3093835321166836, "grad_norm": 0.35280649807851294, "learning_rate": 7.636310779921889e-06, "loss": 0.4844, "step": 7973 }, { "epoch": 1.3095477593250262, "grad_norm": 0.3241893700421079, "learning_rate": 7.63597445730117e-06, "loss": 0.482, "step": 7974 }, { "epoch": 1.3097119865333688, "grad_norm": 0.25658079785257215, "learning_rate": 7.635638100620683e-06, "loss": 0.4656, "step": 7975 }, { "epoch": 1.3098762137417117, "grad_norm": 0.340790886688055, "learning_rate": 7.63530170988408e-06, "loss": 0.4806, "step": 7976 }, { "epoch": 1.3100404409500543, "grad_norm": 0.29263625469920773, "learning_rate": 7.634965285095018e-06, "loss": 0.4735, "step": 7977 }, { "epoch": 1.3102046681583972, "grad_norm": 0.31652479596424954, "learning_rate": 7.634628826257148e-06, "loss": 0.4759, "step": 7978 }, { "epoch": 1.3103688953667398, "grad_norm": 0.3299859845201978, "learning_rate": 7.634292333374123e-06, "loss": 0.4708, "step": 7979 }, { "epoch": 1.3105331225750827, "grad_norm": 0.272066769077068, "learning_rate": 7.633955806449603e-06, "loss": 0.4941, "step": 7980 }, { "epoch": 1.3106973497834253, "grad_norm": 0.31916471197677193, "learning_rate": 7.633619245487237e-06, "loss": 0.4492, "step": 7981 }, { "epoch": 1.3108615769917682, "grad_norm": 0.41008229525586465, "learning_rate": 7.633282650490684e-06, "loss": 0.4817, "step": 7982 }, { "epoch": 1.3110258042001108, "grad_norm": 0.46510580333747115, "learning_rate": 7.632946021463597e-06, "loss": 0.4693, "step": 7983 }, { "epoch": 1.3111900314084535, "grad_norm": 0.26202627385773686, "learning_rate": 7.632609358409637e-06, "loss": 0.4582, "step": 7984 }, { "epoch": 1.3113542586167963, "grad_norm": 0.5539807098934775, "learning_rate": 7.632272661332454e-06, "loss": 0.4755, "step": 7985 }, { "epoch": 1.3115184858251392, "grad_norm": 0.28471144410474203, "learning_rate": 7.63193593023571e-06, "loss": 0.4628, "step": 7986 }, { "epoch": 1.3116827130334818, "grad_norm": 0.2806760601317828, "learning_rate": 7.631599165123058e-06, "loss": 0.4716, "step": 7987 }, { "epoch": 1.3118469402418245, "grad_norm": 0.3691693460679819, "learning_rate": 7.631262365998161e-06, "loss": 0.4798, "step": 7988 }, { "epoch": 1.3120111674501673, "grad_norm": 0.2821735353713559, "learning_rate": 7.630925532864672e-06, "loss": 0.4818, "step": 7989 }, { "epoch": 1.3121753946585102, "grad_norm": 0.34444638448337844, "learning_rate": 7.630588665726253e-06, "loss": 0.4925, "step": 7990 }, { "epoch": 1.3123396218668528, "grad_norm": 0.33577779366945215, "learning_rate": 7.63025176458656e-06, "loss": 0.4806, "step": 7991 }, { "epoch": 1.3125038490751955, "grad_norm": 0.30897078274720075, "learning_rate": 7.629914829449253e-06, "loss": 0.4806, "step": 7992 }, { "epoch": 1.3126680762835383, "grad_norm": 0.390373369768928, "learning_rate": 7.629577860317991e-06, "loss": 0.5017, "step": 7993 }, { "epoch": 1.312832303491881, "grad_norm": 0.3872526842133121, "learning_rate": 7.6292408571964354e-06, "loss": 0.4827, "step": 7994 }, { "epoch": 1.3129965307002238, "grad_norm": 0.32955819048590035, "learning_rate": 7.6289038200882445e-06, "loss": 0.4687, "step": 7995 }, { "epoch": 1.3131607579085665, "grad_norm": 0.31306415443858815, "learning_rate": 7.628566748997081e-06, "loss": 0.4652, "step": 7996 }, { "epoch": 1.3133249851169093, "grad_norm": 0.34561030406592913, "learning_rate": 7.628229643926603e-06, "loss": 0.475, "step": 7997 }, { "epoch": 1.313489212325252, "grad_norm": 0.35034799088508667, "learning_rate": 7.627892504880474e-06, "loss": 0.4701, "step": 7998 }, { "epoch": 1.3136534395335948, "grad_norm": 0.2711573129803233, "learning_rate": 7.627555331862355e-06, "loss": 0.4778, "step": 7999 }, { "epoch": 1.3138176667419375, "grad_norm": 0.376753307536093, "learning_rate": 7.627218124875908e-06, "loss": 0.4861, "step": 8000 }, { "epoch": 1.31398189395028, "grad_norm": 0.3478626411436554, "learning_rate": 7.626880883924795e-06, "loss": 0.4648, "step": 8001 }, { "epoch": 1.314146121158623, "grad_norm": 0.29665293395553316, "learning_rate": 7.62654360901268e-06, "loss": 0.484, "step": 8002 }, { "epoch": 1.3143103483669658, "grad_norm": 0.3763583038100201, "learning_rate": 7.626206300143224e-06, "loss": 0.4835, "step": 8003 }, { "epoch": 1.3144745755753084, "grad_norm": 0.33950295810942993, "learning_rate": 7.625868957320092e-06, "loss": 0.4712, "step": 8004 }, { "epoch": 1.314638802783651, "grad_norm": 0.2764303498324206, "learning_rate": 7.62553158054695e-06, "loss": 0.4672, "step": 8005 }, { "epoch": 1.314803029991994, "grad_norm": 0.2943201514727324, "learning_rate": 7.625194169827458e-06, "loss": 0.488, "step": 8006 }, { "epoch": 1.3149672572003368, "grad_norm": 0.3697002180213209, "learning_rate": 7.6248567251652825e-06, "loss": 0.4727, "step": 8007 }, { "epoch": 1.3151314844086794, "grad_norm": 0.3757351595733627, "learning_rate": 7.6245192465640885e-06, "loss": 0.4865, "step": 8008 }, { "epoch": 1.315295711617022, "grad_norm": 0.3738711600535506, "learning_rate": 7.624181734027541e-06, "loss": 0.4717, "step": 8009 }, { "epoch": 1.315459938825365, "grad_norm": 0.28050608814768224, "learning_rate": 7.623844187559308e-06, "loss": 0.5125, "step": 8010 }, { "epoch": 1.3156241660337076, "grad_norm": 0.2919003234877574, "learning_rate": 7.623506607163052e-06, "loss": 0.4539, "step": 8011 }, { "epoch": 1.3157883932420504, "grad_norm": 0.3622981290018002, "learning_rate": 7.6231689928424415e-06, "loss": 0.4625, "step": 8012 }, { "epoch": 1.315952620450393, "grad_norm": 0.3516883118340059, "learning_rate": 7.622831344601143e-06, "loss": 0.4845, "step": 8013 }, { "epoch": 1.316116847658736, "grad_norm": 0.29660929220781496, "learning_rate": 7.622493662442823e-06, "loss": 0.4642, "step": 8014 }, { "epoch": 1.3162810748670786, "grad_norm": 1.2368299556551743, "learning_rate": 7.622155946371151e-06, "loss": 0.4798, "step": 8015 }, { "epoch": 1.3164453020754214, "grad_norm": 0.29717759444454805, "learning_rate": 7.621818196389793e-06, "loss": 0.4756, "step": 8016 }, { "epoch": 1.316609529283764, "grad_norm": 0.3918627797621546, "learning_rate": 7.621480412502418e-06, "loss": 0.4788, "step": 8017 }, { "epoch": 1.3167737564921067, "grad_norm": 0.29543561155814907, "learning_rate": 7.621142594712694e-06, "loss": 0.4981, "step": 8018 }, { "epoch": 1.3169379837004496, "grad_norm": 0.37159328406059056, "learning_rate": 7.620804743024291e-06, "loss": 0.4948, "step": 8019 }, { "epoch": 1.3171022109087924, "grad_norm": 0.2783048589916877, "learning_rate": 7.620466857440879e-06, "loss": 0.4717, "step": 8020 }, { "epoch": 1.317266438117135, "grad_norm": 0.306567811369454, "learning_rate": 7.620128937966125e-06, "loss": 0.4724, "step": 8021 }, { "epoch": 1.3174306653254777, "grad_norm": 0.29327978868608195, "learning_rate": 7.619790984603702e-06, "loss": 0.4633, "step": 8022 }, { "epoch": 1.3175948925338206, "grad_norm": 0.3917006265643016, "learning_rate": 7.61945299735728e-06, "loss": 0.4604, "step": 8023 }, { "epoch": 1.3177591197421634, "grad_norm": 0.2667837817112557, "learning_rate": 7.619114976230528e-06, "loss": 0.4541, "step": 8024 }, { "epoch": 1.317923346950506, "grad_norm": 0.35010204950872126, "learning_rate": 7.6187769212271194e-06, "loss": 0.4935, "step": 8025 }, { "epoch": 1.3180875741588487, "grad_norm": 0.29046284897377767, "learning_rate": 7.618438832350725e-06, "loss": 0.4803, "step": 8026 }, { "epoch": 1.3182518013671916, "grad_norm": 0.33231245117304176, "learning_rate": 7.618100709605017e-06, "loss": 0.469, "step": 8027 }, { "epoch": 1.3184160285755342, "grad_norm": 0.29343771355212916, "learning_rate": 7.617762552993667e-06, "loss": 0.4845, "step": 8028 }, { "epoch": 1.318580255783877, "grad_norm": 0.3313127989483271, "learning_rate": 7.617424362520349e-06, "loss": 0.4435, "step": 8029 }, { "epoch": 1.3187444829922197, "grad_norm": 0.28290016169336146, "learning_rate": 7.617086138188733e-06, "loss": 0.4678, "step": 8030 }, { "epoch": 1.3189087102005626, "grad_norm": 0.38666467517347725, "learning_rate": 7.616747880002497e-06, "loss": 0.4674, "step": 8031 }, { "epoch": 1.3190729374089052, "grad_norm": 0.356353790147301, "learning_rate": 7.616409587965312e-06, "loss": 0.4725, "step": 8032 }, { "epoch": 1.319237164617248, "grad_norm": 0.40378474599862085, "learning_rate": 7.616071262080853e-06, "loss": 0.4639, "step": 8033 }, { "epoch": 1.3194013918255907, "grad_norm": 0.3306098327327851, "learning_rate": 7.6157329023527925e-06, "loss": 0.4802, "step": 8034 }, { "epoch": 1.3195656190339333, "grad_norm": 0.35127743283140145, "learning_rate": 7.61539450878481e-06, "loss": 0.4667, "step": 8035 }, { "epoch": 1.3197298462422762, "grad_norm": 0.4309638643922434, "learning_rate": 7.615056081380577e-06, "loss": 0.4838, "step": 8036 }, { "epoch": 1.319894073450619, "grad_norm": 0.3355719844565293, "learning_rate": 7.6147176201437695e-06, "loss": 0.4957, "step": 8037 }, { "epoch": 1.3200583006589617, "grad_norm": 0.5136141703832557, "learning_rate": 7.614379125078063e-06, "loss": 0.4786, "step": 8038 }, { "epoch": 1.3202225278673043, "grad_norm": 0.4938624681910936, "learning_rate": 7.614040596187138e-06, "loss": 0.4649, "step": 8039 }, { "epoch": 1.3203867550756472, "grad_norm": 0.3426636485394977, "learning_rate": 7.613702033474667e-06, "loss": 0.4791, "step": 8040 }, { "epoch": 1.32055098228399, "grad_norm": 0.29924421972371945, "learning_rate": 7.613363436944328e-06, "loss": 0.4634, "step": 8041 }, { "epoch": 1.3207152094923327, "grad_norm": 0.37481981284715726, "learning_rate": 7.613024806599799e-06, "loss": 0.4718, "step": 8042 }, { "epoch": 1.3208794367006753, "grad_norm": 0.2831538659040023, "learning_rate": 7.612686142444757e-06, "loss": 0.4685, "step": 8043 }, { "epoch": 1.3210436639090182, "grad_norm": 0.29528741651177665, "learning_rate": 7.612347444482883e-06, "loss": 0.4519, "step": 8044 }, { "epoch": 1.3212078911173608, "grad_norm": 0.34103817691768396, "learning_rate": 7.612008712717853e-06, "loss": 0.4884, "step": 8045 }, { "epoch": 1.3213721183257037, "grad_norm": 0.32873295054672647, "learning_rate": 7.611669947153346e-06, "loss": 0.4951, "step": 8046 }, { "epoch": 1.3215363455340463, "grad_norm": 0.3487948323682672, "learning_rate": 7.611331147793042e-06, "loss": 0.4863, "step": 8047 }, { "epoch": 1.3217005727423892, "grad_norm": 0.3518588634886226, "learning_rate": 7.610992314640621e-06, "loss": 0.4878, "step": 8048 }, { "epoch": 1.3218647999507318, "grad_norm": 0.3851542782176887, "learning_rate": 7.610653447699763e-06, "loss": 0.4799, "step": 8049 }, { "epoch": 1.3220290271590747, "grad_norm": 0.30015345022801126, "learning_rate": 7.610314546974146e-06, "loss": 0.4742, "step": 8050 }, { "epoch": 1.3221932543674173, "grad_norm": 0.3396771981317586, "learning_rate": 7.6099756124674555e-06, "loss": 0.4692, "step": 8051 }, { "epoch": 1.32235748157576, "grad_norm": 0.37353944059171346, "learning_rate": 7.6096366441833686e-06, "loss": 0.4923, "step": 8052 }, { "epoch": 1.3225217087841028, "grad_norm": 0.3611586443903467, "learning_rate": 7.609297642125568e-06, "loss": 0.4695, "step": 8053 }, { "epoch": 1.3226859359924457, "grad_norm": 0.3391887708192555, "learning_rate": 7.6089586062977375e-06, "loss": 0.4752, "step": 8054 }, { "epoch": 1.3228501632007883, "grad_norm": 0.3614387838925698, "learning_rate": 7.608619536703557e-06, "loss": 0.4739, "step": 8055 }, { "epoch": 1.323014390409131, "grad_norm": 0.3365756410105476, "learning_rate": 7.608280433346709e-06, "loss": 0.4798, "step": 8056 }, { "epoch": 1.3231786176174738, "grad_norm": 1.238861339205873, "learning_rate": 7.607941296230878e-06, "loss": 0.4753, "step": 8057 }, { "epoch": 1.3233428448258167, "grad_norm": 0.3307128371330611, "learning_rate": 7.6076021253597465e-06, "loss": 0.4914, "step": 8058 }, { "epoch": 1.3235070720341593, "grad_norm": 0.4128137716597536, "learning_rate": 7.607262920736999e-06, "loss": 0.4686, "step": 8059 }, { "epoch": 1.323671299242502, "grad_norm": 0.32301645197658924, "learning_rate": 7.606923682366318e-06, "loss": 0.4168, "step": 8060 }, { "epoch": 1.3238355264508448, "grad_norm": 0.2996180664900655, "learning_rate": 7.60658441025139e-06, "loss": 0.4777, "step": 8061 }, { "epoch": 1.3239997536591874, "grad_norm": 0.3167191179377588, "learning_rate": 7.606245104395898e-06, "loss": 0.4795, "step": 8062 }, { "epoch": 1.3241639808675303, "grad_norm": 0.32137763185269175, "learning_rate": 7.605905764803528e-06, "loss": 0.4715, "step": 8063 }, { "epoch": 1.324328208075873, "grad_norm": 0.47425814179048575, "learning_rate": 7.6055663914779665e-06, "loss": 0.477, "step": 8064 }, { "epoch": 1.3244924352842158, "grad_norm": 0.38687364650229183, "learning_rate": 7.605226984422899e-06, "loss": 0.47, "step": 8065 }, { "epoch": 1.3246566624925584, "grad_norm": 0.29405401978587575, "learning_rate": 7.60488754364201e-06, "loss": 0.4682, "step": 8066 }, { "epoch": 1.3248208897009013, "grad_norm": 0.3612032326513613, "learning_rate": 7.604548069138988e-06, "loss": 0.4877, "step": 8067 }, { "epoch": 1.324985116909244, "grad_norm": 0.3739762267080125, "learning_rate": 7.604208560917519e-06, "loss": 0.4728, "step": 8068 }, { "epoch": 1.3251493441175866, "grad_norm": 0.33058668071605135, "learning_rate": 7.603869018981292e-06, "loss": 0.4707, "step": 8069 }, { "epoch": 1.3253135713259294, "grad_norm": 0.5793876431825817, "learning_rate": 7.603529443333993e-06, "loss": 0.491, "step": 8070 }, { "epoch": 1.3254777985342723, "grad_norm": 0.2881885125717379, "learning_rate": 7.603189833979311e-06, "loss": 0.4674, "step": 8071 }, { "epoch": 1.325642025742615, "grad_norm": 0.2923464384400088, "learning_rate": 7.602850190920933e-06, "loss": 0.4742, "step": 8072 }, { "epoch": 1.3258062529509576, "grad_norm": 0.32004321913986356, "learning_rate": 7.602510514162551e-06, "loss": 0.4909, "step": 8073 }, { "epoch": 1.3259704801593004, "grad_norm": 0.3242085527775499, "learning_rate": 7.602170803707852e-06, "loss": 0.4895, "step": 8074 }, { "epoch": 1.3261347073676433, "grad_norm": 0.31932451799372075, "learning_rate": 7.601831059560525e-06, "loss": 0.4472, "step": 8075 }, { "epoch": 1.326298934575986, "grad_norm": 0.43480385093250357, "learning_rate": 7.6014912817242615e-06, "loss": 0.5229, "step": 8076 }, { "epoch": 1.3264631617843285, "grad_norm": 0.28575102479694664, "learning_rate": 7.601151470202752e-06, "loss": 0.4712, "step": 8077 }, { "epoch": 1.3266273889926714, "grad_norm": 0.3413516866373773, "learning_rate": 7.600811624999685e-06, "loss": 0.4893, "step": 8078 }, { "epoch": 1.326791616201014, "grad_norm": 0.4762227758293843, "learning_rate": 7.600471746118754e-06, "loss": 0.4593, "step": 8079 }, { "epoch": 1.326955843409357, "grad_norm": 0.36642531096793896, "learning_rate": 7.600131833563648e-06, "loss": 0.4828, "step": 8080 }, { "epoch": 1.3271200706176995, "grad_norm": 0.24844728932647275, "learning_rate": 7.599791887338061e-06, "loss": 0.4486, "step": 8081 }, { "epoch": 1.3272842978260424, "grad_norm": 0.2761053375616054, "learning_rate": 7.599451907445685e-06, "loss": 0.4666, "step": 8082 }, { "epoch": 1.327448525034385, "grad_norm": 0.35201886560818063, "learning_rate": 7.599111893890211e-06, "loss": 0.4635, "step": 8083 }, { "epoch": 1.327612752242728, "grad_norm": 0.33488597740115933, "learning_rate": 7.598771846675333e-06, "loss": 0.4694, "step": 8084 }, { "epoch": 1.3277769794510705, "grad_norm": 2.0433814286588077, "learning_rate": 7.598431765804745e-06, "loss": 0.472, "step": 8085 }, { "epoch": 1.3279412066594132, "grad_norm": 0.28476359738830287, "learning_rate": 7.598091651282138e-06, "loss": 0.4753, "step": 8086 }, { "epoch": 1.328105433867756, "grad_norm": 0.3091484184216086, "learning_rate": 7.597751503111208e-06, "loss": 0.4704, "step": 8087 }, { "epoch": 1.328269661076099, "grad_norm": 0.2939492958850838, "learning_rate": 7.597411321295649e-06, "loss": 0.4881, "step": 8088 }, { "epoch": 1.3284338882844415, "grad_norm": 0.2938888929723579, "learning_rate": 7.597071105839155e-06, "loss": 0.4922, "step": 8089 }, { "epoch": 1.3285981154927842, "grad_norm": 0.30286546203247877, "learning_rate": 7.596730856745423e-06, "loss": 0.4762, "step": 8090 }, { "epoch": 1.328762342701127, "grad_norm": 0.30359089033544046, "learning_rate": 7.5963905740181465e-06, "loss": 0.4806, "step": 8091 }, { "epoch": 1.3289265699094697, "grad_norm": 0.3143376216387629, "learning_rate": 7.5960502576610206e-06, "loss": 0.4909, "step": 8092 }, { "epoch": 1.3290907971178125, "grad_norm": 0.38876383498655837, "learning_rate": 7.5957099076777445e-06, "loss": 0.4568, "step": 8093 }, { "epoch": 1.3292550243261552, "grad_norm": 0.36562505267623785, "learning_rate": 7.595369524072013e-06, "loss": 0.4806, "step": 8094 }, { "epoch": 1.329419251534498, "grad_norm": 0.34457728082214156, "learning_rate": 7.595029106847523e-06, "loss": 0.4746, "step": 8095 }, { "epoch": 1.3295834787428407, "grad_norm": 0.3324520354871399, "learning_rate": 7.59468865600797e-06, "loss": 0.4903, "step": 8096 }, { "epoch": 1.3297477059511835, "grad_norm": 0.4155866763230899, "learning_rate": 7.594348171557055e-06, "loss": 0.4703, "step": 8097 }, { "epoch": 1.3299119331595262, "grad_norm": 0.2969915001045424, "learning_rate": 7.594007653498475e-06, "loss": 0.4858, "step": 8098 }, { "epoch": 1.330076160367869, "grad_norm": 0.30978801552869756, "learning_rate": 7.593667101835927e-06, "loss": 0.4635, "step": 8099 }, { "epoch": 1.3302403875762117, "grad_norm": 0.34181601930545924, "learning_rate": 7.593326516573111e-06, "loss": 0.4701, "step": 8100 }, { "epoch": 1.3304046147845545, "grad_norm": 0.42274058666425973, "learning_rate": 7.592985897713724e-06, "loss": 0.4826, "step": 8101 }, { "epoch": 1.3305688419928972, "grad_norm": 0.492798755390736, "learning_rate": 7.592645245261468e-06, "loss": 0.4745, "step": 8102 }, { "epoch": 1.3307330692012398, "grad_norm": 0.2959789659905488, "learning_rate": 7.5923045592200425e-06, "loss": 0.4788, "step": 8103 }, { "epoch": 1.3308972964095827, "grad_norm": 0.2900045872919727, "learning_rate": 7.591963839593147e-06, "loss": 0.4897, "step": 8104 }, { "epoch": 1.3310615236179255, "grad_norm": 0.28604574921354087, "learning_rate": 7.591623086384482e-06, "loss": 0.4801, "step": 8105 }, { "epoch": 1.3312257508262682, "grad_norm": 0.2883666824407346, "learning_rate": 7.5912822995977485e-06, "loss": 0.4641, "step": 8106 }, { "epoch": 1.3313899780346108, "grad_norm": 0.3693527994487577, "learning_rate": 7.590941479236647e-06, "loss": 0.497, "step": 8107 }, { "epoch": 1.3315542052429536, "grad_norm": 0.26417888929256916, "learning_rate": 7.590600625304882e-06, "loss": 0.4961, "step": 8108 }, { "epoch": 1.3317184324512963, "grad_norm": 0.37961262463328665, "learning_rate": 7.590259737806151e-06, "loss": 0.4758, "step": 8109 }, { "epoch": 1.3318826596596391, "grad_norm": 0.32640878987499683, "learning_rate": 7.5899188167441596e-06, "loss": 0.4956, "step": 8110 }, { "epoch": 1.3320468868679818, "grad_norm": 0.29588985784763766, "learning_rate": 7.589577862122611e-06, "loss": 0.4828, "step": 8111 }, { "epoch": 1.3322111140763246, "grad_norm": 0.37197198970391443, "learning_rate": 7.589236873945205e-06, "loss": 0.4528, "step": 8112 }, { "epoch": 1.3323753412846673, "grad_norm": 0.370013610959104, "learning_rate": 7.588895852215649e-06, "loss": 0.4599, "step": 8113 }, { "epoch": 1.3325395684930101, "grad_norm": 0.37106657556119116, "learning_rate": 7.588554796937643e-06, "loss": 0.4644, "step": 8114 }, { "epoch": 1.3327037957013528, "grad_norm": 0.3269685481961678, "learning_rate": 7.588213708114895e-06, "loss": 0.4845, "step": 8115 }, { "epoch": 1.3328680229096956, "grad_norm": 0.2555741764001781, "learning_rate": 7.587872585751108e-06, "loss": 0.4721, "step": 8116 }, { "epoch": 1.3330322501180383, "grad_norm": 0.3073360793567854, "learning_rate": 7.587531429849986e-06, "loss": 0.4881, "step": 8117 }, { "epoch": 1.3331964773263811, "grad_norm": 0.48663839387542973, "learning_rate": 7.587190240415235e-06, "loss": 0.4736, "step": 8118 }, { "epoch": 1.3333607045347238, "grad_norm": 0.37921822868911753, "learning_rate": 7.58684901745056e-06, "loss": 0.487, "step": 8119 }, { "epoch": 1.3335249317430664, "grad_norm": 0.2710430476175914, "learning_rate": 7.586507760959668e-06, "loss": 0.4553, "step": 8120 }, { "epoch": 1.3336891589514093, "grad_norm": 0.2823588509201183, "learning_rate": 7.586166470946265e-06, "loss": 0.4723, "step": 8121 }, { "epoch": 1.3338533861597521, "grad_norm": 0.3588907283766576, "learning_rate": 7.585825147414058e-06, "loss": 0.4588, "step": 8122 }, { "epoch": 1.3340176133680948, "grad_norm": 0.26534977430795154, "learning_rate": 7.585483790366755e-06, "loss": 0.4488, "step": 8123 }, { "epoch": 1.3341818405764374, "grad_norm": 0.36041667654909365, "learning_rate": 7.58514239980806e-06, "loss": 0.4778, "step": 8124 }, { "epoch": 1.3343460677847803, "grad_norm": 0.3025737628651881, "learning_rate": 7.584800975741684e-06, "loss": 0.486, "step": 8125 }, { "epoch": 1.334510294993123, "grad_norm": 0.2634336667104377, "learning_rate": 7.584459518171334e-06, "loss": 0.4659, "step": 8126 }, { "epoch": 1.3346745222014658, "grad_norm": 0.44720072697100516, "learning_rate": 7.58411802710072e-06, "loss": 0.4965, "step": 8127 }, { "epoch": 1.3348387494098084, "grad_norm": 0.33297381374331897, "learning_rate": 7.58377650253355e-06, "loss": 0.5003, "step": 8128 }, { "epoch": 1.3350029766181513, "grad_norm": 0.2732531330453581, "learning_rate": 7.583434944473531e-06, "loss": 0.4742, "step": 8129 }, { "epoch": 1.335167203826494, "grad_norm": 0.314812564000472, "learning_rate": 7.583093352924377e-06, "loss": 0.4602, "step": 8130 }, { "epoch": 1.3353314310348368, "grad_norm": 0.3506246269150991, "learning_rate": 7.582751727889795e-06, "loss": 0.4655, "step": 8131 }, { "epoch": 1.3354956582431794, "grad_norm": 0.31631151263064144, "learning_rate": 7.582410069373497e-06, "loss": 0.4862, "step": 8132 }, { "epoch": 1.3356598854515223, "grad_norm": 0.3378754290915035, "learning_rate": 7.582068377379192e-06, "loss": 0.4874, "step": 8133 }, { "epoch": 1.335824112659865, "grad_norm": 0.2950314734983047, "learning_rate": 7.581726651910592e-06, "loss": 0.4602, "step": 8134 }, { "epoch": 1.3359883398682078, "grad_norm": 0.37993331588181634, "learning_rate": 7.58138489297141e-06, "loss": 0.485, "step": 8135 }, { "epoch": 1.3361525670765504, "grad_norm": 0.47312803234745243, "learning_rate": 7.5810431005653555e-06, "loss": 0.4602, "step": 8136 }, { "epoch": 1.336316794284893, "grad_norm": 0.38400139770922437, "learning_rate": 7.580701274696141e-06, "loss": 0.473, "step": 8137 }, { "epoch": 1.3364810214932359, "grad_norm": 0.2713798872467751, "learning_rate": 7.58035941536748e-06, "loss": 0.4735, "step": 8138 }, { "epoch": 1.3366452487015787, "grad_norm": 0.28391273813649176, "learning_rate": 7.580017522583085e-06, "loss": 0.4917, "step": 8139 }, { "epoch": 1.3368094759099214, "grad_norm": 0.29927465149936233, "learning_rate": 7.57967559634667e-06, "loss": 0.4704, "step": 8140 }, { "epoch": 1.336973703118264, "grad_norm": 0.36460727628287165, "learning_rate": 7.579333636661947e-06, "loss": 0.4906, "step": 8141 }, { "epoch": 1.3371379303266069, "grad_norm": 0.33819147075200523, "learning_rate": 7.578991643532631e-06, "loss": 0.4861, "step": 8142 }, { "epoch": 1.3373021575349495, "grad_norm": 0.29856704467356415, "learning_rate": 7.578649616962437e-06, "loss": 0.4814, "step": 8143 }, { "epoch": 1.3374663847432924, "grad_norm": 0.3243878064563001, "learning_rate": 7.57830755695508e-06, "loss": 0.474, "step": 8144 }, { "epoch": 1.337630611951635, "grad_norm": 0.3260830025390945, "learning_rate": 7.577965463514273e-06, "loss": 0.4439, "step": 8145 }, { "epoch": 1.3377948391599779, "grad_norm": 0.3561533434330576, "learning_rate": 7.577623336643734e-06, "loss": 0.4819, "step": 8146 }, { "epoch": 1.3379590663683205, "grad_norm": 0.40282604646571896, "learning_rate": 7.5772811763471765e-06, "loss": 0.4731, "step": 8147 }, { "epoch": 1.3381232935766634, "grad_norm": 0.4354568605464644, "learning_rate": 7.576938982628319e-06, "loss": 0.4791, "step": 8148 }, { "epoch": 1.338287520785006, "grad_norm": 0.27024614355661286, "learning_rate": 7.5765967554908766e-06, "loss": 0.4534, "step": 8149 }, { "epoch": 1.3384517479933489, "grad_norm": 0.2800449426831548, "learning_rate": 7.576254494938565e-06, "loss": 0.4638, "step": 8150 }, { "epoch": 1.3386159752016915, "grad_norm": 0.31469804284271313, "learning_rate": 7.5759122009751034e-06, "loss": 0.4672, "step": 8151 }, { "epoch": 1.3387802024100344, "grad_norm": 0.31981087249106815, "learning_rate": 7.575569873604211e-06, "loss": 0.4754, "step": 8152 }, { "epoch": 1.338944429618377, "grad_norm": 0.32041798503279073, "learning_rate": 7.575227512829601e-06, "loss": 0.4542, "step": 8153 }, { "epoch": 1.3391086568267196, "grad_norm": 0.30671216747447894, "learning_rate": 7.574885118654997e-06, "loss": 0.4687, "step": 8154 }, { "epoch": 1.3392728840350625, "grad_norm": 0.2954637648278154, "learning_rate": 7.574542691084114e-06, "loss": 0.4976, "step": 8155 }, { "epoch": 1.3394371112434054, "grad_norm": 0.3081145232331669, "learning_rate": 7.574200230120672e-06, "loss": 0.4592, "step": 8156 }, { "epoch": 1.339601338451748, "grad_norm": 0.320559291908532, "learning_rate": 7.573857735768392e-06, "loss": 0.4742, "step": 8157 }, { "epoch": 1.3397655656600906, "grad_norm": 0.3365256596562173, "learning_rate": 7.573515208030992e-06, "loss": 0.4914, "step": 8158 }, { "epoch": 1.3399297928684335, "grad_norm": 0.2751284881552089, "learning_rate": 7.5731726469121925e-06, "loss": 0.489, "step": 8159 }, { "epoch": 1.3400940200767761, "grad_norm": 0.37688645099946766, "learning_rate": 7.572830052415716e-06, "loss": 0.4933, "step": 8160 }, { "epoch": 1.340258247285119, "grad_norm": 0.30999225372323935, "learning_rate": 7.572487424545282e-06, "loss": 0.4993, "step": 8161 }, { "epoch": 1.3404224744934616, "grad_norm": 0.33954222243627585, "learning_rate": 7.572144763304609e-06, "loss": 0.4623, "step": 8162 }, { "epoch": 1.3405867017018045, "grad_norm": 0.35287127368966964, "learning_rate": 7.571802068697424e-06, "loss": 0.4674, "step": 8163 }, { "epoch": 1.3407509289101471, "grad_norm": 0.26149503644123145, "learning_rate": 7.571459340727444e-06, "loss": 0.4895, "step": 8164 }, { "epoch": 1.34091515611849, "grad_norm": 0.340594769554577, "learning_rate": 7.5711165793983955e-06, "loss": 0.4668, "step": 8165 }, { "epoch": 1.3410793833268326, "grad_norm": 0.35235304957633606, "learning_rate": 7.570773784714e-06, "loss": 0.4808, "step": 8166 }, { "epoch": 1.3412436105351755, "grad_norm": 0.2674302744225442, "learning_rate": 7.570430956677978e-06, "loss": 0.4555, "step": 8167 }, { "epoch": 1.3414078377435181, "grad_norm": 0.3802926912537419, "learning_rate": 7.570088095294056e-06, "loss": 0.4788, "step": 8168 }, { "epoch": 1.341572064951861, "grad_norm": 0.3044697674369472, "learning_rate": 7.569745200565956e-06, "loss": 0.479, "step": 8169 }, { "epoch": 1.3417362921602036, "grad_norm": 0.5955916581848941, "learning_rate": 7.569402272497403e-06, "loss": 0.4623, "step": 8170 }, { "epoch": 1.3419005193685463, "grad_norm": 0.31407739496613557, "learning_rate": 7.569059311092121e-06, "loss": 0.4711, "step": 8171 }, { "epoch": 1.3420647465768891, "grad_norm": 0.34767475507542017, "learning_rate": 7.568716316353837e-06, "loss": 0.5016, "step": 8172 }, { "epoch": 1.342228973785232, "grad_norm": 0.38120882129971084, "learning_rate": 7.568373288286274e-06, "loss": 0.4619, "step": 8173 }, { "epoch": 1.3423932009935746, "grad_norm": 0.33282327887772417, "learning_rate": 7.568030226893158e-06, "loss": 0.4911, "step": 8174 }, { "epoch": 1.3425574282019173, "grad_norm": 0.2965917498704802, "learning_rate": 7.567687132178216e-06, "loss": 0.4884, "step": 8175 }, { "epoch": 1.3427216554102601, "grad_norm": 0.3396648386159076, "learning_rate": 7.567344004145172e-06, "loss": 0.483, "step": 8176 }, { "epoch": 1.3428858826186028, "grad_norm": 0.30333232393757004, "learning_rate": 7.567000842797754e-06, "loss": 0.4851, "step": 8177 }, { "epoch": 1.3430501098269456, "grad_norm": 0.34618357769248426, "learning_rate": 7.56665764813969e-06, "loss": 0.4906, "step": 8178 }, { "epoch": 1.3432143370352883, "grad_norm": 0.3789647075081895, "learning_rate": 7.566314420174707e-06, "loss": 0.4798, "step": 8179 }, { "epoch": 1.3433785642436311, "grad_norm": 0.8248885820482107, "learning_rate": 7.565971158906533e-06, "loss": 0.4696, "step": 8180 }, { "epoch": 1.3435427914519738, "grad_norm": 0.34738798654195413, "learning_rate": 7.565627864338896e-06, "loss": 0.4731, "step": 8181 }, { "epoch": 1.3437070186603166, "grad_norm": 0.3001257042915786, "learning_rate": 7.565284536475523e-06, "loss": 0.4761, "step": 8182 }, { "epoch": 1.3438712458686592, "grad_norm": 0.29395541996703284, "learning_rate": 7.564941175320145e-06, "loss": 0.4704, "step": 8183 }, { "epoch": 1.344035473077002, "grad_norm": 0.3490565668842923, "learning_rate": 7.564597780876489e-06, "loss": 0.484, "step": 8184 }, { "epoch": 1.3441997002853447, "grad_norm": 0.33784447165967335, "learning_rate": 7.564254353148286e-06, "loss": 0.4551, "step": 8185 }, { "epoch": 1.3443639274936876, "grad_norm": 0.31079920003562245, "learning_rate": 7.563910892139268e-06, "loss": 0.4629, "step": 8186 }, { "epoch": 1.3445281547020302, "grad_norm": 0.339855134296473, "learning_rate": 7.563567397853162e-06, "loss": 0.4723, "step": 8187 }, { "epoch": 1.3446923819103729, "grad_norm": 0.31361048502666455, "learning_rate": 7.5632238702937e-06, "loss": 0.4626, "step": 8188 }, { "epoch": 1.3448566091187157, "grad_norm": 0.30916236843543365, "learning_rate": 7.562880309464612e-06, "loss": 0.4885, "step": 8189 }, { "epoch": 1.3450208363270586, "grad_norm": 0.42762086725224197, "learning_rate": 7.562536715369632e-06, "loss": 0.4798, "step": 8190 }, { "epoch": 1.3451850635354012, "grad_norm": 0.29582310475373164, "learning_rate": 7.562193088012489e-06, "loss": 0.4757, "step": 8191 }, { "epoch": 1.3453492907437439, "grad_norm": 0.2885250979215616, "learning_rate": 7.561849427396916e-06, "loss": 0.4677, "step": 8192 }, { "epoch": 1.3455135179520867, "grad_norm": 0.3149369224702306, "learning_rate": 7.561505733526646e-06, "loss": 0.4801, "step": 8193 }, { "epoch": 1.3456777451604294, "grad_norm": 0.3230766779512786, "learning_rate": 7.561162006405413e-06, "loss": 0.4705, "step": 8194 }, { "epoch": 1.3458419723687722, "grad_norm": 0.2664636359519095, "learning_rate": 7.560818246036948e-06, "loss": 0.4632, "step": 8195 }, { "epoch": 1.3460061995771149, "grad_norm": 0.33555822669646035, "learning_rate": 7.560474452424984e-06, "loss": 0.4661, "step": 8196 }, { "epoch": 1.3461704267854577, "grad_norm": 0.6934805707048038, "learning_rate": 7.560130625573259e-06, "loss": 0.4862, "step": 8197 }, { "epoch": 1.3463346539938004, "grad_norm": 0.2607196059610446, "learning_rate": 7.559786765485503e-06, "loss": 0.481, "step": 8198 }, { "epoch": 1.3464988812021432, "grad_norm": 0.32406956402984705, "learning_rate": 7.559442872165452e-06, "loss": 0.4648, "step": 8199 }, { "epoch": 1.3466631084104859, "grad_norm": 0.23869746457513058, "learning_rate": 7.5590989456168425e-06, "loss": 0.4656, "step": 8200 }, { "epoch": 1.3468273356188287, "grad_norm": 0.3208825001499969, "learning_rate": 7.558754985843408e-06, "loss": 0.4713, "step": 8201 }, { "epoch": 1.3469915628271714, "grad_norm": 0.2841932547090098, "learning_rate": 7.558410992848886e-06, "loss": 0.4806, "step": 8202 }, { "epoch": 1.3471557900355142, "grad_norm": 0.3941615072914976, "learning_rate": 7.55806696663701e-06, "loss": 0.462, "step": 8203 }, { "epoch": 1.3473200172438569, "grad_norm": 0.30906597354873233, "learning_rate": 7.557722907211518e-06, "loss": 0.4597, "step": 8204 }, { "epoch": 1.3474842444521995, "grad_norm": 0.3261144107035512, "learning_rate": 7.557378814576148e-06, "loss": 0.4767, "step": 8205 }, { "epoch": 1.3476484716605424, "grad_norm": 0.3231943091793371, "learning_rate": 7.557034688734636e-06, "loss": 0.4903, "step": 8206 }, { "epoch": 1.3478126988688852, "grad_norm": 0.31891508433673865, "learning_rate": 7.556690529690719e-06, "loss": 0.4661, "step": 8207 }, { "epoch": 1.3479769260772279, "grad_norm": 0.3561392861742652, "learning_rate": 7.556346337448135e-06, "loss": 0.468, "step": 8208 }, { "epoch": 1.3481411532855705, "grad_norm": 0.29269560637612674, "learning_rate": 7.556002112010623e-06, "loss": 0.489, "step": 8209 }, { "epoch": 1.3483053804939134, "grad_norm": 0.29961111960033693, "learning_rate": 7.555657853381921e-06, "loss": 0.4822, "step": 8210 }, { "epoch": 1.348469607702256, "grad_norm": 0.37927679526581437, "learning_rate": 7.55531356156577e-06, "loss": 0.475, "step": 8211 }, { "epoch": 1.3486338349105988, "grad_norm": 0.2841023304201933, "learning_rate": 7.554969236565906e-06, "loss": 0.4762, "step": 8212 }, { "epoch": 1.3487980621189415, "grad_norm": 0.28326627175304997, "learning_rate": 7.554624878386071e-06, "loss": 0.4959, "step": 8213 }, { "epoch": 1.3489622893272843, "grad_norm": 0.41227391057862817, "learning_rate": 7.5542804870300035e-06, "loss": 0.4711, "step": 8214 }, { "epoch": 1.349126516535627, "grad_norm": 0.3915072018838373, "learning_rate": 7.553936062501448e-06, "loss": 0.481, "step": 8215 }, { "epoch": 1.3492907437439698, "grad_norm": 0.3381486335554688, "learning_rate": 7.55359160480414e-06, "loss": 0.4789, "step": 8216 }, { "epoch": 1.3494549709523125, "grad_norm": 0.27882630443105566, "learning_rate": 7.553247113941822e-06, "loss": 0.4697, "step": 8217 }, { "epoch": 1.3496191981606553, "grad_norm": 0.31148174098022713, "learning_rate": 7.552902589918237e-06, "loss": 0.4677, "step": 8218 }, { "epoch": 1.349783425368998, "grad_norm": 0.32014703747449885, "learning_rate": 7.552558032737128e-06, "loss": 0.4803, "step": 8219 }, { "epoch": 1.3499476525773408, "grad_norm": 0.3466674862984574, "learning_rate": 7.552213442402233e-06, "loss": 0.4739, "step": 8220 }, { "epoch": 1.3501118797856835, "grad_norm": 0.33280537484605616, "learning_rate": 7.551868818917298e-06, "loss": 0.4627, "step": 8221 }, { "epoch": 1.3502761069940261, "grad_norm": 0.2603930956385385, "learning_rate": 7.551524162286065e-06, "loss": 0.4662, "step": 8222 }, { "epoch": 1.350440334202369, "grad_norm": 0.2930172645612939, "learning_rate": 7.551179472512278e-06, "loss": 0.5013, "step": 8223 }, { "epoch": 1.3506045614107118, "grad_norm": 0.4860934666600133, "learning_rate": 7.5508347495996785e-06, "loss": 0.4903, "step": 8224 }, { "epoch": 1.3507687886190545, "grad_norm": 0.29072814081065385, "learning_rate": 7.5504899935520135e-06, "loss": 0.4827, "step": 8225 }, { "epoch": 1.350933015827397, "grad_norm": 0.28925745501679684, "learning_rate": 7.550145204373025e-06, "loss": 0.467, "step": 8226 }, { "epoch": 1.35109724303574, "grad_norm": 0.3215258109778846, "learning_rate": 7.549800382066458e-06, "loss": 0.4546, "step": 8227 }, { "epoch": 1.3512614702440826, "grad_norm": 0.3339575673451271, "learning_rate": 7.549455526636061e-06, "loss": 0.4796, "step": 8228 }, { "epoch": 1.3514256974524255, "grad_norm": 0.2885210154394203, "learning_rate": 7.549110638085574e-06, "loss": 0.449, "step": 8229 }, { "epoch": 1.351589924660768, "grad_norm": 0.2797488004637664, "learning_rate": 7.548765716418745e-06, "loss": 0.4589, "step": 8230 }, { "epoch": 1.351754151869111, "grad_norm": 0.42563485332320505, "learning_rate": 7.5484207616393225e-06, "loss": 0.4715, "step": 8231 }, { "epoch": 1.3519183790774536, "grad_norm": 0.32789103541201964, "learning_rate": 7.548075773751052e-06, "loss": 0.4707, "step": 8232 }, { "epoch": 1.3520826062857965, "grad_norm": 0.3307854208254755, "learning_rate": 7.547730752757679e-06, "loss": 0.4846, "step": 8233 }, { "epoch": 1.352246833494139, "grad_norm": 0.33558511584102185, "learning_rate": 7.547385698662949e-06, "loss": 0.4713, "step": 8234 }, { "epoch": 1.352411060702482, "grad_norm": 0.5575831635879408, "learning_rate": 7.547040611470615e-06, "loss": 0.4816, "step": 8235 }, { "epoch": 1.3525752879108246, "grad_norm": 0.29420650994646813, "learning_rate": 7.546695491184422e-06, "loss": 0.4757, "step": 8236 }, { "epoch": 1.3527395151191675, "grad_norm": 0.43134384746264715, "learning_rate": 7.546350337808117e-06, "loss": 0.458, "step": 8237 }, { "epoch": 1.35290374232751, "grad_norm": 0.28010941089448865, "learning_rate": 7.546005151345451e-06, "loss": 0.461, "step": 8238 }, { "epoch": 1.3530679695358527, "grad_norm": 0.4136198715585237, "learning_rate": 7.545659931800171e-06, "loss": 0.474, "step": 8239 }, { "epoch": 1.3532321967441956, "grad_norm": 0.2773429745089072, "learning_rate": 7.5453146791760295e-06, "loss": 0.4636, "step": 8240 }, { "epoch": 1.3533964239525385, "grad_norm": 0.4222897509697245, "learning_rate": 7.544969393476774e-06, "loss": 0.4792, "step": 8241 }, { "epoch": 1.353560651160881, "grad_norm": 0.30400034421879774, "learning_rate": 7.544624074706155e-06, "loss": 0.482, "step": 8242 }, { "epoch": 1.3537248783692237, "grad_norm": 0.3404611704418366, "learning_rate": 7.544278722867922e-06, "loss": 0.4801, "step": 8243 }, { "epoch": 1.3538891055775666, "grad_norm": 0.3795342780211706, "learning_rate": 7.543933337965828e-06, "loss": 0.4357, "step": 8244 }, { "epoch": 1.3540533327859092, "grad_norm": 0.2967089858210644, "learning_rate": 7.543587920003622e-06, "loss": 0.4621, "step": 8245 }, { "epoch": 1.354217559994252, "grad_norm": 0.27784864211749893, "learning_rate": 7.543242468985057e-06, "loss": 0.4604, "step": 8246 }, { "epoch": 1.3543817872025947, "grad_norm": 0.47344567937835635, "learning_rate": 7.542896984913885e-06, "loss": 0.4714, "step": 8247 }, { "epoch": 1.3545460144109376, "grad_norm": 0.3088897586133829, "learning_rate": 7.542551467793858e-06, "loss": 0.4707, "step": 8248 }, { "epoch": 1.3547102416192802, "grad_norm": 0.3436137560670549, "learning_rate": 7.542205917628729e-06, "loss": 0.4804, "step": 8249 }, { "epoch": 1.354874468827623, "grad_norm": 0.35880366032473915, "learning_rate": 7.54186033442225e-06, "loss": 0.4797, "step": 8250 }, { "epoch": 1.3550386960359657, "grad_norm": 0.4984973501764961, "learning_rate": 7.541514718178174e-06, "loss": 0.4688, "step": 8251 }, { "epoch": 1.3552029232443086, "grad_norm": 0.33572784628150065, "learning_rate": 7.541169068900258e-06, "loss": 0.4816, "step": 8252 }, { "epoch": 1.3553671504526512, "grad_norm": 0.2685346799680185, "learning_rate": 7.540823386592252e-06, "loss": 0.4689, "step": 8253 }, { "epoch": 1.355531377660994, "grad_norm": 0.32616794767271073, "learning_rate": 7.540477671257913e-06, "loss": 0.4785, "step": 8254 }, { "epoch": 1.3556956048693367, "grad_norm": 0.297867131998836, "learning_rate": 7.540131922900995e-06, "loss": 0.4597, "step": 8255 }, { "epoch": 1.3558598320776793, "grad_norm": 0.4019680636042429, "learning_rate": 7.5397861415252526e-06, "loss": 0.4575, "step": 8256 }, { "epoch": 1.3560240592860222, "grad_norm": 0.3480970659977098, "learning_rate": 7.539440327134442e-06, "loss": 0.4863, "step": 8257 }, { "epoch": 1.356188286494365, "grad_norm": 0.2997521494595294, "learning_rate": 7.53909447973232e-06, "loss": 0.4875, "step": 8258 }, { "epoch": 1.3563525137027077, "grad_norm": 0.3412320676442106, "learning_rate": 7.538748599322642e-06, "loss": 0.4846, "step": 8259 }, { "epoch": 1.3565167409110503, "grad_norm": 0.30124717220604347, "learning_rate": 7.538402685909164e-06, "loss": 0.4913, "step": 8260 }, { "epoch": 1.3566809681193932, "grad_norm": 0.5869707815625042, "learning_rate": 7.538056739495643e-06, "loss": 0.4619, "step": 8261 }, { "epoch": 1.3568451953277358, "grad_norm": 0.41886149665065575, "learning_rate": 7.537710760085837e-06, "loss": 0.4819, "step": 8262 }, { "epoch": 1.3570094225360787, "grad_norm": 0.3076873203590548, "learning_rate": 7.537364747683502e-06, "loss": 0.461, "step": 8263 }, { "epoch": 1.3571736497444213, "grad_norm": 0.339881383453682, "learning_rate": 7.537018702292401e-06, "loss": 0.4584, "step": 8264 }, { "epoch": 1.3573378769527642, "grad_norm": 0.4871044368615955, "learning_rate": 7.536672623916286e-06, "loss": 0.4749, "step": 8265 }, { "epoch": 1.3575021041611068, "grad_norm": 0.2778485736129083, "learning_rate": 7.5363265125589195e-06, "loss": 0.4515, "step": 8266 }, { "epoch": 1.3576663313694497, "grad_norm": 0.29364065488596336, "learning_rate": 7.535980368224061e-06, "loss": 0.4816, "step": 8267 }, { "epoch": 1.3578305585777923, "grad_norm": 0.2838595820478707, "learning_rate": 7.5356341909154665e-06, "loss": 0.4566, "step": 8268 }, { "epoch": 1.3579947857861352, "grad_norm": 0.3392614946332992, "learning_rate": 7.5352879806369e-06, "loss": 0.4608, "step": 8269 }, { "epoch": 1.3581590129944778, "grad_norm": 0.3391061577152166, "learning_rate": 7.5349417373921175e-06, "loss": 0.4797, "step": 8270 }, { "epoch": 1.3583232402028207, "grad_norm": 0.3055348595500309, "learning_rate": 7.534595461184884e-06, "loss": 0.4872, "step": 8271 }, { "epoch": 1.3584874674111633, "grad_norm": 0.3142799258083078, "learning_rate": 7.534249152018957e-06, "loss": 0.4582, "step": 8272 }, { "epoch": 1.358651694619506, "grad_norm": 0.314329556767126, "learning_rate": 7.533902809898098e-06, "loss": 0.4672, "step": 8273 }, { "epoch": 1.3588159218278488, "grad_norm": 0.2967056278298423, "learning_rate": 7.533556434826072e-06, "loss": 0.476, "step": 8274 }, { "epoch": 1.3589801490361917, "grad_norm": 0.2837665629837183, "learning_rate": 7.533210026806636e-06, "loss": 0.4681, "step": 8275 }, { "epoch": 1.3591443762445343, "grad_norm": 0.32916697819824303, "learning_rate": 7.532863585843556e-06, "loss": 0.4799, "step": 8276 }, { "epoch": 1.359308603452877, "grad_norm": 0.3801296291612379, "learning_rate": 7.532517111940593e-06, "loss": 0.482, "step": 8277 }, { "epoch": 1.3594728306612198, "grad_norm": 0.30518418701039557, "learning_rate": 7.5321706051015115e-06, "loss": 0.4902, "step": 8278 }, { "epoch": 1.3596370578695625, "grad_norm": 0.31494213222449563, "learning_rate": 7.531824065330073e-06, "loss": 0.4801, "step": 8279 }, { "epoch": 1.3598012850779053, "grad_norm": 0.27913060649487614, "learning_rate": 7.5314774926300425e-06, "loss": 0.4674, "step": 8280 }, { "epoch": 1.359965512286248, "grad_norm": 0.4272270596215226, "learning_rate": 7.531130887005185e-06, "loss": 0.4564, "step": 8281 }, { "epoch": 1.3601297394945908, "grad_norm": 0.33419238881683055, "learning_rate": 7.5307842484592625e-06, "loss": 0.4829, "step": 8282 }, { "epoch": 1.3602939667029335, "grad_norm": 0.3286978349267236, "learning_rate": 7.530437576996042e-06, "loss": 0.4811, "step": 8283 }, { "epoch": 1.3604581939112763, "grad_norm": 0.29069776505360395, "learning_rate": 7.530090872619287e-06, "loss": 0.4869, "step": 8284 }, { "epoch": 1.360622421119619, "grad_norm": 0.3464972039264677, "learning_rate": 7.529744135332765e-06, "loss": 0.4772, "step": 8285 }, { "epoch": 1.3607866483279618, "grad_norm": 0.2661431197402841, "learning_rate": 7.52939736514024e-06, "loss": 0.4928, "step": 8286 }, { "epoch": 1.3609508755363044, "grad_norm": 0.3143496844872588, "learning_rate": 7.5290505620454785e-06, "loss": 0.4915, "step": 8287 }, { "epoch": 1.3611151027446473, "grad_norm": 0.379624731415056, "learning_rate": 7.528703726052248e-06, "loss": 0.4606, "step": 8288 }, { "epoch": 1.36127932995299, "grad_norm": 0.3309339659074846, "learning_rate": 7.528356857164315e-06, "loss": 0.4516, "step": 8289 }, { "epoch": 1.3614435571613326, "grad_norm": 0.30095414860162006, "learning_rate": 7.5280099553854495e-06, "loss": 0.453, "step": 8290 }, { "epoch": 1.3616077843696754, "grad_norm": 0.5214225092991446, "learning_rate": 7.527663020719415e-06, "loss": 0.4589, "step": 8291 }, { "epoch": 1.3617720115780183, "grad_norm": 0.29060618148335327, "learning_rate": 7.52731605316998e-06, "loss": 0.4746, "step": 8292 }, { "epoch": 1.361936238786361, "grad_norm": 0.3803658344262675, "learning_rate": 7.526969052740916e-06, "loss": 0.4697, "step": 8293 }, { "epoch": 1.3621004659947036, "grad_norm": 0.34410484988053686, "learning_rate": 7.52662201943599e-06, "loss": 0.4707, "step": 8294 }, { "epoch": 1.3622646932030464, "grad_norm": 0.32125725777151576, "learning_rate": 7.52627495325897e-06, "loss": 0.472, "step": 8295 }, { "epoch": 1.362428920411389, "grad_norm": 0.3261931445987548, "learning_rate": 7.525927854213627e-06, "loss": 0.4928, "step": 8296 }, { "epoch": 1.362593147619732, "grad_norm": 0.4204108817507413, "learning_rate": 7.52558072230373e-06, "loss": 0.4595, "step": 8297 }, { "epoch": 1.3627573748280746, "grad_norm": 0.32134642277397796, "learning_rate": 7.5252335575330514e-06, "loss": 0.4782, "step": 8298 }, { "epoch": 1.3629216020364174, "grad_norm": 0.3515706624302091, "learning_rate": 7.524886359905357e-06, "loss": 0.4944, "step": 8299 }, { "epoch": 1.36308582924476, "grad_norm": 0.3573167400693006, "learning_rate": 7.5245391294244225e-06, "loss": 0.4852, "step": 8300 }, { "epoch": 1.363250056453103, "grad_norm": 0.3120269607498423, "learning_rate": 7.524191866094016e-06, "loss": 0.4793, "step": 8301 }, { "epoch": 1.3634142836614456, "grad_norm": 0.30100585842420313, "learning_rate": 7.523844569917912e-06, "loss": 0.4567, "step": 8302 }, { "epoch": 1.3635785108697884, "grad_norm": 0.33931025334201326, "learning_rate": 7.523497240899881e-06, "loss": 0.495, "step": 8303 }, { "epoch": 1.363742738078131, "grad_norm": 0.30310450315920817, "learning_rate": 7.523149879043694e-06, "loss": 0.4844, "step": 8304 }, { "epoch": 1.363906965286474, "grad_norm": 0.36035296781002274, "learning_rate": 7.522802484353125e-06, "loss": 0.4844, "step": 8305 }, { "epoch": 1.3640711924948166, "grad_norm": 0.32849349322708093, "learning_rate": 7.522455056831948e-06, "loss": 0.4785, "step": 8306 }, { "epoch": 1.3642354197031592, "grad_norm": 0.3416519194854029, "learning_rate": 7.522107596483934e-06, "loss": 0.4575, "step": 8307 }, { "epoch": 1.364399646911502, "grad_norm": 0.29685825846548575, "learning_rate": 7.5217601033128604e-06, "loss": 0.4852, "step": 8308 }, { "epoch": 1.364563874119845, "grad_norm": 0.31485021328089774, "learning_rate": 7.5214125773224975e-06, "loss": 0.4671, "step": 8309 }, { "epoch": 1.3647281013281876, "grad_norm": 0.28896520824254684, "learning_rate": 7.5210650185166205e-06, "loss": 0.4626, "step": 8310 }, { "epoch": 1.3648923285365302, "grad_norm": 0.3173799606341833, "learning_rate": 7.520717426899007e-06, "loss": 0.4505, "step": 8311 }, { "epoch": 1.365056555744873, "grad_norm": 0.45661343907823004, "learning_rate": 7.520369802473429e-06, "loss": 0.46, "step": 8312 }, { "epoch": 1.3652207829532157, "grad_norm": 0.31662459661797954, "learning_rate": 7.520022145243664e-06, "loss": 0.4689, "step": 8313 }, { "epoch": 1.3653850101615586, "grad_norm": 0.3206366426822329, "learning_rate": 7.5196744552134866e-06, "loss": 0.4767, "step": 8314 }, { "epoch": 1.3655492373699012, "grad_norm": 0.31121042162784335, "learning_rate": 7.519326732386674e-06, "loss": 0.4463, "step": 8315 }, { "epoch": 1.365713464578244, "grad_norm": 0.3287820896136919, "learning_rate": 7.518978976767001e-06, "loss": 0.4833, "step": 8316 }, { "epoch": 1.3658776917865867, "grad_norm": 0.44335887820682335, "learning_rate": 7.518631188358249e-06, "loss": 0.4892, "step": 8317 }, { "epoch": 1.3660419189949295, "grad_norm": 0.2763335722270975, "learning_rate": 7.51828336716419e-06, "loss": 0.4862, "step": 8318 }, { "epoch": 1.3662061462032722, "grad_norm": 0.3209361127665086, "learning_rate": 7.517935513188605e-06, "loss": 0.4563, "step": 8319 }, { "epoch": 1.366370373411615, "grad_norm": 0.35302259974899775, "learning_rate": 7.517587626435271e-06, "loss": 0.4781, "step": 8320 }, { "epoch": 1.3665346006199577, "grad_norm": 0.3165049719005419, "learning_rate": 7.5172397069079656e-06, "loss": 0.4508, "step": 8321 }, { "epoch": 1.3666988278283005, "grad_norm": 0.292617607086799, "learning_rate": 7.516891754610469e-06, "loss": 0.4726, "step": 8322 }, { "epoch": 1.3668630550366432, "grad_norm": 0.3238974592916656, "learning_rate": 7.5165437695465605e-06, "loss": 0.4742, "step": 8323 }, { "epoch": 1.3670272822449858, "grad_norm": 0.32845712957705675, "learning_rate": 7.516195751720018e-06, "loss": 0.4692, "step": 8324 }, { "epoch": 1.3671915094533287, "grad_norm": 0.3973395741657114, "learning_rate": 7.515847701134623e-06, "loss": 0.4554, "step": 8325 }, { "epoch": 1.3673557366616715, "grad_norm": 0.3248357261446677, "learning_rate": 7.5154996177941544e-06, "loss": 0.4755, "step": 8326 }, { "epoch": 1.3675199638700142, "grad_norm": 0.29548108080992236, "learning_rate": 7.515151501702392e-06, "loss": 0.5038, "step": 8327 }, { "epoch": 1.3676841910783568, "grad_norm": 0.3793355108500688, "learning_rate": 7.514803352863119e-06, "loss": 0.4573, "step": 8328 }, { "epoch": 1.3678484182866997, "grad_norm": 0.28719236328501363, "learning_rate": 7.5144551712801146e-06, "loss": 0.4873, "step": 8329 }, { "epoch": 1.3680126454950423, "grad_norm": 0.31485206904199836, "learning_rate": 7.514106956957162e-06, "loss": 0.4553, "step": 8330 }, { "epoch": 1.3681768727033852, "grad_norm": 0.4207883658274008, "learning_rate": 7.513758709898041e-06, "loss": 0.4754, "step": 8331 }, { "epoch": 1.3683410999117278, "grad_norm": 0.3500059465239506, "learning_rate": 7.513410430106538e-06, "loss": 0.4904, "step": 8332 }, { "epoch": 1.3685053271200707, "grad_norm": 0.29361003110067885, "learning_rate": 7.5130621175864295e-06, "loss": 0.4701, "step": 8333 }, { "epoch": 1.3686695543284133, "grad_norm": 0.36962502570741995, "learning_rate": 7.512713772341504e-06, "loss": 0.4786, "step": 8334 }, { "epoch": 1.3688337815367562, "grad_norm": 0.3727744016142195, "learning_rate": 7.512365394375543e-06, "loss": 0.4797, "step": 8335 }, { "epoch": 1.3689980087450988, "grad_norm": 0.2792130647553743, "learning_rate": 7.512016983692329e-06, "loss": 0.4687, "step": 8336 }, { "epoch": 1.3691622359534417, "grad_norm": 0.29179763332648856, "learning_rate": 7.511668540295648e-06, "loss": 0.481, "step": 8337 }, { "epoch": 1.3693264631617843, "grad_norm": 0.3281601698226462, "learning_rate": 7.5113200641892826e-06, "loss": 0.4811, "step": 8338 }, { "epoch": 1.3694906903701272, "grad_norm": 0.32319827408143587, "learning_rate": 7.510971555377019e-06, "loss": 0.4744, "step": 8339 }, { "epoch": 1.3696549175784698, "grad_norm": 0.48402519535000715, "learning_rate": 7.510623013862643e-06, "loss": 0.4692, "step": 8340 }, { "epoch": 1.3698191447868124, "grad_norm": 0.36689549011407746, "learning_rate": 7.510274439649938e-06, "loss": 0.472, "step": 8341 }, { "epoch": 1.3699833719951553, "grad_norm": 0.2931911030026804, "learning_rate": 7.509925832742691e-06, "loss": 0.4815, "step": 8342 }, { "epoch": 1.3701475992034982, "grad_norm": 0.38794231413739944, "learning_rate": 7.5095771931446874e-06, "loss": 0.4806, "step": 8343 }, { "epoch": 1.3703118264118408, "grad_norm": 0.32982445067061356, "learning_rate": 7.509228520859716e-06, "loss": 0.4655, "step": 8344 }, { "epoch": 1.3704760536201834, "grad_norm": 0.29391263723942956, "learning_rate": 7.508879815891561e-06, "loss": 0.4733, "step": 8345 }, { "epoch": 1.3706402808285263, "grad_norm": 0.35197760791067445, "learning_rate": 7.50853107824401e-06, "loss": 0.4762, "step": 8346 }, { "epoch": 1.370804508036869, "grad_norm": 0.3392993683378087, "learning_rate": 7.508182307920853e-06, "loss": 0.4749, "step": 8347 }, { "epoch": 1.3709687352452118, "grad_norm": 0.4550315133155526, "learning_rate": 7.507833504925876e-06, "loss": 0.461, "step": 8348 }, { "epoch": 1.3711329624535544, "grad_norm": 0.31311924905014327, "learning_rate": 7.507484669262869e-06, "loss": 0.4714, "step": 8349 }, { "epoch": 1.3712971896618973, "grad_norm": 0.39457589400869614, "learning_rate": 7.507135800935618e-06, "loss": 0.4842, "step": 8350 }, { "epoch": 1.37146141687024, "grad_norm": 0.32116871340010233, "learning_rate": 7.506786899947914e-06, "loss": 0.4808, "step": 8351 }, { "epoch": 1.3716256440785828, "grad_norm": 0.28265646058257143, "learning_rate": 7.506437966303546e-06, "loss": 0.4592, "step": 8352 }, { "epoch": 1.3717898712869254, "grad_norm": 0.2988692411952469, "learning_rate": 7.5060890000063035e-06, "loss": 0.4847, "step": 8353 }, { "epoch": 1.3719540984952683, "grad_norm": 0.3962595768036921, "learning_rate": 7.505740001059977e-06, "loss": 0.4694, "step": 8354 }, { "epoch": 1.372118325703611, "grad_norm": 0.27664766973560395, "learning_rate": 7.5053909694683575e-06, "loss": 0.4652, "step": 8355 }, { "epoch": 1.3722825529119538, "grad_norm": 0.34306614080647385, "learning_rate": 7.505041905235234e-06, "loss": 0.4609, "step": 8356 }, { "epoch": 1.3724467801202964, "grad_norm": 0.3243406451551074, "learning_rate": 7.5046928083644e-06, "loss": 0.4756, "step": 8357 }, { "epoch": 1.372611007328639, "grad_norm": 0.32581858539649494, "learning_rate": 7.504343678859645e-06, "loss": 0.4865, "step": 8358 }, { "epoch": 1.372775234536982, "grad_norm": 0.3316606832659808, "learning_rate": 7.5039945167247625e-06, "loss": 0.4613, "step": 8359 }, { "epoch": 1.3729394617453248, "grad_norm": 0.3043342240235991, "learning_rate": 7.503645321963543e-06, "loss": 0.4445, "step": 8360 }, { "epoch": 1.3731036889536674, "grad_norm": 0.36325843544559216, "learning_rate": 7.503296094579782e-06, "loss": 0.4766, "step": 8361 }, { "epoch": 1.37326791616201, "grad_norm": 0.3295513937739328, "learning_rate": 7.502946834577269e-06, "loss": 0.4764, "step": 8362 }, { "epoch": 1.373432143370353, "grad_norm": 0.29301402397809956, "learning_rate": 7.5025975419597995e-06, "loss": 0.453, "step": 8363 }, { "epoch": 1.3735963705786955, "grad_norm": 0.3265871905901499, "learning_rate": 7.502248216731166e-06, "loss": 0.485, "step": 8364 }, { "epoch": 1.3737605977870384, "grad_norm": 0.3307848184863391, "learning_rate": 7.501898858895163e-06, "loss": 0.482, "step": 8365 }, { "epoch": 1.373924824995381, "grad_norm": 0.318539651593937, "learning_rate": 7.501549468455586e-06, "loss": 0.4635, "step": 8366 }, { "epoch": 1.374089052203724, "grad_norm": 0.4299176845852357, "learning_rate": 7.501200045416228e-06, "loss": 0.4746, "step": 8367 }, { "epoch": 1.3742532794120665, "grad_norm": 0.33954283941162516, "learning_rate": 7.500850589780885e-06, "loss": 0.4581, "step": 8368 }, { "epoch": 1.3744175066204094, "grad_norm": 0.4102003323616825, "learning_rate": 7.5005011015533515e-06, "loss": 0.4652, "step": 8369 }, { "epoch": 1.374581733828752, "grad_norm": 0.37286895421142485, "learning_rate": 7.500151580737423e-06, "loss": 0.4749, "step": 8370 }, { "epoch": 1.374745961037095, "grad_norm": 0.4328474063000734, "learning_rate": 7.4998020273368985e-06, "loss": 0.4734, "step": 8371 }, { "epoch": 1.3749101882454375, "grad_norm": 0.3229313804670453, "learning_rate": 7.499452441355571e-06, "loss": 0.4854, "step": 8372 }, { "epoch": 1.3750744154537804, "grad_norm": 0.338741035436834, "learning_rate": 7.49910282279724e-06, "loss": 0.4485, "step": 8373 }, { "epoch": 1.375238642662123, "grad_norm": 0.3835064638344377, "learning_rate": 7.498753171665702e-06, "loss": 0.4763, "step": 8374 }, { "epoch": 1.3754028698704657, "grad_norm": 0.3824516415335608, "learning_rate": 7.498403487964754e-06, "loss": 0.4885, "step": 8375 }, { "epoch": 1.3755670970788085, "grad_norm": 0.3196719968516, "learning_rate": 7.4980537716981935e-06, "loss": 0.4901, "step": 8376 }, { "epoch": 1.3757313242871514, "grad_norm": 0.3553565763368463, "learning_rate": 7.49770402286982e-06, "loss": 0.4437, "step": 8377 }, { "epoch": 1.375895551495494, "grad_norm": 0.4544423987932916, "learning_rate": 7.49735424148343e-06, "loss": 0.4518, "step": 8378 }, { "epoch": 1.3760597787038367, "grad_norm": 0.3659010510622629, "learning_rate": 7.497004427542827e-06, "loss": 0.5014, "step": 8379 }, { "epoch": 1.3762240059121795, "grad_norm": 0.30706994581649516, "learning_rate": 7.4966545810518046e-06, "loss": 0.4702, "step": 8380 }, { "epoch": 1.3763882331205222, "grad_norm": 0.3120020596631399, "learning_rate": 7.496304702014165e-06, "loss": 0.4603, "step": 8381 }, { "epoch": 1.376552460328865, "grad_norm": 0.3428900780388786, "learning_rate": 7.49595479043371e-06, "loss": 0.4851, "step": 8382 }, { "epoch": 1.3767166875372077, "grad_norm": 0.28232317726051354, "learning_rate": 7.495604846314236e-06, "loss": 0.473, "step": 8383 }, { "epoch": 1.3768809147455505, "grad_norm": 0.40601124361359603, "learning_rate": 7.495254869659548e-06, "loss": 0.4825, "step": 8384 }, { "epoch": 1.3770451419538932, "grad_norm": 0.30385377720199874, "learning_rate": 7.494904860473446e-06, "loss": 0.4827, "step": 8385 }, { "epoch": 1.377209369162236, "grad_norm": 0.3585977445148269, "learning_rate": 7.494554818759729e-06, "loss": 0.4686, "step": 8386 }, { "epoch": 1.3773735963705787, "grad_norm": 0.296032640267518, "learning_rate": 7.4942047445222005e-06, "loss": 0.4749, "step": 8387 }, { "epoch": 1.3775378235789215, "grad_norm": 0.3929412698877934, "learning_rate": 7.493854637764663e-06, "loss": 0.471, "step": 8388 }, { "epoch": 1.3777020507872642, "grad_norm": 0.3584205071049299, "learning_rate": 7.493504498490919e-06, "loss": 0.4703, "step": 8389 }, { "epoch": 1.377866277995607, "grad_norm": 0.26009475523382186, "learning_rate": 7.49315432670477e-06, "loss": 0.4767, "step": 8390 }, { "epoch": 1.3780305052039497, "grad_norm": 0.5168875923714398, "learning_rate": 7.492804122410021e-06, "loss": 0.467, "step": 8391 }, { "epoch": 1.3781947324122923, "grad_norm": 0.2963401806601922, "learning_rate": 7.492453885610474e-06, "loss": 0.4768, "step": 8392 }, { "epoch": 1.3783589596206351, "grad_norm": 0.31659003611238296, "learning_rate": 7.492103616309933e-06, "loss": 0.4711, "step": 8393 }, { "epoch": 1.378523186828978, "grad_norm": 0.33403184376342293, "learning_rate": 7.491753314512205e-06, "loss": 0.4788, "step": 8394 }, { "epoch": 1.3786874140373206, "grad_norm": 0.6322963767384263, "learning_rate": 7.491402980221091e-06, "loss": 0.484, "step": 8395 }, { "epoch": 1.3788516412456633, "grad_norm": 0.3105545863957514, "learning_rate": 7.491052613440398e-06, "loss": 0.4804, "step": 8396 }, { "epoch": 1.3790158684540061, "grad_norm": 0.37750790683795843, "learning_rate": 7.4907022141739305e-06, "loss": 0.4725, "step": 8397 }, { "epoch": 1.3791800956623488, "grad_norm": 4.6596300526714955, "learning_rate": 7.490351782425494e-06, "loss": 0.4773, "step": 8398 }, { "epoch": 1.3793443228706916, "grad_norm": 0.35738475045237333, "learning_rate": 7.490001318198896e-06, "loss": 0.4715, "step": 8399 }, { "epoch": 1.3795085500790343, "grad_norm": 0.2786126847265779, "learning_rate": 7.489650821497942e-06, "loss": 0.4735, "step": 8400 }, { "epoch": 1.3796727772873771, "grad_norm": 0.33428832912866097, "learning_rate": 7.489300292326438e-06, "loss": 0.4639, "step": 8401 }, { "epoch": 1.3798370044957198, "grad_norm": 0.30964514582994457, "learning_rate": 7.4889497306881924e-06, "loss": 0.466, "step": 8402 }, { "epoch": 1.3800012317040626, "grad_norm": 0.2889957503949191, "learning_rate": 7.488599136587012e-06, "loss": 0.4659, "step": 8403 }, { "epoch": 1.3801654589124053, "grad_norm": 0.3925535751253334, "learning_rate": 7.488248510026704e-06, "loss": 0.4676, "step": 8404 }, { "epoch": 1.3803296861207481, "grad_norm": 0.4829764742485803, "learning_rate": 7.487897851011077e-06, "loss": 0.4857, "step": 8405 }, { "epoch": 1.3804939133290908, "grad_norm": 0.5775087390936786, "learning_rate": 7.4875471595439395e-06, "loss": 0.4627, "step": 8406 }, { "epoch": 1.3806581405374336, "grad_norm": 0.41355222116583273, "learning_rate": 7.4871964356291015e-06, "loss": 0.5126, "step": 8407 }, { "epoch": 1.3808223677457763, "grad_norm": 0.43117946888282094, "learning_rate": 7.4868456792703715e-06, "loss": 0.4567, "step": 8408 }, { "epoch": 1.380986594954119, "grad_norm": 0.5106408016749285, "learning_rate": 7.486494890471557e-06, "loss": 0.4632, "step": 8409 }, { "epoch": 1.3811508221624618, "grad_norm": 0.4436767147179429, "learning_rate": 7.48614406923647e-06, "loss": 0.4679, "step": 8410 }, { "epoch": 1.3813150493708046, "grad_norm": 0.4034600956616641, "learning_rate": 7.4857932155689216e-06, "loss": 0.4875, "step": 8411 }, { "epoch": 1.3814792765791473, "grad_norm": 0.33297679444132877, "learning_rate": 7.485442329472721e-06, "loss": 0.4634, "step": 8412 }, { "epoch": 1.38164350378749, "grad_norm": 0.3543346865632646, "learning_rate": 7.485091410951679e-06, "loss": 0.4761, "step": 8413 }, { "epoch": 1.3818077309958328, "grad_norm": 0.34842494937452245, "learning_rate": 7.484740460009608e-06, "loss": 0.4822, "step": 8414 }, { "epoch": 1.3819719582041754, "grad_norm": 0.36443048186557747, "learning_rate": 7.484389476650317e-06, "loss": 0.4781, "step": 8415 }, { "epoch": 1.3821361854125183, "grad_norm": 0.343766357021871, "learning_rate": 7.484038460877623e-06, "loss": 0.4706, "step": 8416 }, { "epoch": 1.382300412620861, "grad_norm": 0.32675746001653816, "learning_rate": 7.483687412695334e-06, "loss": 0.4476, "step": 8417 }, { "epoch": 1.3824646398292038, "grad_norm": 0.3221113117267582, "learning_rate": 7.483336332107262e-06, "loss": 0.4647, "step": 8418 }, { "epoch": 1.3826288670375464, "grad_norm": 0.3215797449648108, "learning_rate": 7.482985219117225e-06, "loss": 0.4721, "step": 8419 }, { "epoch": 1.3827930942458893, "grad_norm": 0.2928507581174783, "learning_rate": 7.482634073729034e-06, "loss": 0.4581, "step": 8420 }, { "epoch": 1.382957321454232, "grad_norm": 0.35576242260440516, "learning_rate": 7.482282895946501e-06, "loss": 0.4617, "step": 8421 }, { "epoch": 1.3831215486625748, "grad_norm": 0.3366737168411468, "learning_rate": 7.481931685773442e-06, "loss": 0.4536, "step": 8422 }, { "epoch": 1.3832857758709174, "grad_norm": 0.46285205063668183, "learning_rate": 7.481580443213671e-06, "loss": 0.4628, "step": 8423 }, { "epoch": 1.3834500030792602, "grad_norm": 0.2907414212608209, "learning_rate": 7.481229168271003e-06, "loss": 0.4778, "step": 8424 }, { "epoch": 1.3836142302876029, "grad_norm": 0.3830255011938799, "learning_rate": 7.480877860949253e-06, "loss": 0.483, "step": 8425 }, { "epoch": 1.3837784574959455, "grad_norm": 0.3698854430170395, "learning_rate": 7.480526521252237e-06, "loss": 0.4766, "step": 8426 }, { "epoch": 1.3839426847042884, "grad_norm": 0.41376734309397795, "learning_rate": 7.480175149183771e-06, "loss": 0.491, "step": 8427 }, { "epoch": 1.3841069119126312, "grad_norm": 0.39701920731416607, "learning_rate": 7.479823744747669e-06, "loss": 0.4913, "step": 8428 }, { "epoch": 1.3842711391209739, "grad_norm": 0.2992212234035658, "learning_rate": 7.479472307947752e-06, "loss": 0.4996, "step": 8429 }, { "epoch": 1.3844353663293165, "grad_norm": 0.3945568889015645, "learning_rate": 7.479120838787832e-06, "loss": 0.4843, "step": 8430 }, { "epoch": 1.3845995935376594, "grad_norm": 0.32718184784481347, "learning_rate": 7.478769337271729e-06, "loss": 0.4688, "step": 8431 }, { "epoch": 1.384763820746002, "grad_norm": 0.40322396401045213, "learning_rate": 7.478417803403262e-06, "loss": 0.4774, "step": 8432 }, { "epoch": 1.3849280479543449, "grad_norm": 0.31278115808345475, "learning_rate": 7.4780662371862454e-06, "loss": 0.4887, "step": 8433 }, { "epoch": 1.3850922751626875, "grad_norm": 0.2880177748586995, "learning_rate": 7.4777146386245e-06, "loss": 0.4622, "step": 8434 }, { "epoch": 1.3852565023710304, "grad_norm": 0.2922145141022478, "learning_rate": 7.477363007721842e-06, "loss": 0.472, "step": 8435 }, { "epoch": 1.385420729579373, "grad_norm": 0.2595927763167588, "learning_rate": 7.477011344482097e-06, "loss": 0.4598, "step": 8436 }, { "epoch": 1.3855849567877159, "grad_norm": 0.3827827123321603, "learning_rate": 7.4766596489090765e-06, "loss": 0.4768, "step": 8437 }, { "epoch": 1.3857491839960585, "grad_norm": 0.3046199138275548, "learning_rate": 7.476307921006603e-06, "loss": 0.4708, "step": 8438 }, { "epoch": 1.3859134112044014, "grad_norm": 0.36047761509934007, "learning_rate": 7.475956160778499e-06, "loss": 0.4779, "step": 8439 }, { "epoch": 1.386077638412744, "grad_norm": 0.2876281404368738, "learning_rate": 7.475604368228583e-06, "loss": 0.4676, "step": 8440 }, { "epoch": 1.3862418656210869, "grad_norm": 0.3415868267007575, "learning_rate": 7.475252543360676e-06, "loss": 0.4821, "step": 8441 }, { "epoch": 1.3864060928294295, "grad_norm": 0.3071306454877626, "learning_rate": 7.474900686178598e-06, "loss": 0.4661, "step": 8442 }, { "epoch": 1.3865703200377721, "grad_norm": 0.3816098854453923, "learning_rate": 7.474548796686172e-06, "loss": 0.4777, "step": 8443 }, { "epoch": 1.386734547246115, "grad_norm": 0.26175587149765556, "learning_rate": 7.474196874887219e-06, "loss": 0.4637, "step": 8444 }, { "epoch": 1.3868987744544579, "grad_norm": 0.3957223210428203, "learning_rate": 7.473844920785564e-06, "loss": 0.4536, "step": 8445 }, { "epoch": 1.3870630016628005, "grad_norm": 0.42887048967429414, "learning_rate": 7.473492934385025e-06, "loss": 0.4608, "step": 8446 }, { "epoch": 1.3872272288711431, "grad_norm": 0.3039193509641038, "learning_rate": 7.473140915689428e-06, "loss": 0.487, "step": 8447 }, { "epoch": 1.387391456079486, "grad_norm": 0.2924723132481886, "learning_rate": 7.472788864702596e-06, "loss": 0.4658, "step": 8448 }, { "epoch": 1.3875556832878286, "grad_norm": 0.3104130699213828, "learning_rate": 7.4724367814283515e-06, "loss": 0.4661, "step": 8449 }, { "epoch": 1.3877199104961715, "grad_norm": 0.2516987078850876, "learning_rate": 7.472084665870519e-06, "loss": 0.459, "step": 8450 }, { "epoch": 1.3878841377045141, "grad_norm": 0.3246890939691412, "learning_rate": 7.4717325180329246e-06, "loss": 0.481, "step": 8451 }, { "epoch": 1.388048364912857, "grad_norm": 0.3079833527004406, "learning_rate": 7.4713803379193885e-06, "loss": 0.4644, "step": 8452 }, { "epoch": 1.3882125921211996, "grad_norm": 0.29180556133406804, "learning_rate": 7.47102812553374e-06, "loss": 0.4614, "step": 8453 }, { "epoch": 1.3883768193295425, "grad_norm": 0.2886969888932696, "learning_rate": 7.470675880879802e-06, "loss": 0.4736, "step": 8454 }, { "epoch": 1.3885410465378851, "grad_norm": 0.32198851042761334, "learning_rate": 7.470323603961402e-06, "loss": 0.4488, "step": 8455 }, { "epoch": 1.388705273746228, "grad_norm": 0.27850170833038657, "learning_rate": 7.469971294782366e-06, "loss": 0.4719, "step": 8456 }, { "epoch": 1.3888695009545706, "grad_norm": 0.3167504981040049, "learning_rate": 7.469618953346519e-06, "loss": 0.4733, "step": 8457 }, { "epoch": 1.3890337281629135, "grad_norm": 0.2952002433702235, "learning_rate": 7.469266579657688e-06, "loss": 0.4689, "step": 8458 }, { "epoch": 1.3891979553712561, "grad_norm": 0.3653309348288008, "learning_rate": 7.468914173719701e-06, "loss": 0.4991, "step": 8459 }, { "epoch": 1.3893621825795988, "grad_norm": 0.33786108288862315, "learning_rate": 7.468561735536384e-06, "loss": 0.4691, "step": 8460 }, { "epoch": 1.3895264097879416, "grad_norm": 0.38459079965390597, "learning_rate": 7.468209265111568e-06, "loss": 0.4428, "step": 8461 }, { "epoch": 1.3896906369962845, "grad_norm": 0.26526099944477444, "learning_rate": 7.467856762449077e-06, "loss": 0.4388, "step": 8462 }, { "epoch": 1.3898548642046271, "grad_norm": 0.29889257287695886, "learning_rate": 7.467504227552743e-06, "loss": 0.4768, "step": 8463 }, { "epoch": 1.3900190914129698, "grad_norm": 0.4112551605074209, "learning_rate": 7.467151660426393e-06, "loss": 0.473, "step": 8464 }, { "epoch": 1.3901833186213126, "grad_norm": 0.41186499581502484, "learning_rate": 7.466799061073857e-06, "loss": 0.475, "step": 8465 }, { "epoch": 1.3903475458296553, "grad_norm": 0.5636465016362382, "learning_rate": 7.466446429498963e-06, "loss": 0.469, "step": 8466 }, { "epoch": 1.3905117730379981, "grad_norm": 0.32784216972789626, "learning_rate": 7.4660937657055426e-06, "loss": 0.4753, "step": 8467 }, { "epoch": 1.3906760002463407, "grad_norm": 0.357908475606493, "learning_rate": 7.465741069697428e-06, "loss": 0.488, "step": 8468 }, { "epoch": 1.3908402274546836, "grad_norm": 0.33925619079683195, "learning_rate": 7.4653883414784445e-06, "loss": 0.472, "step": 8469 }, { "epoch": 1.3910044546630262, "grad_norm": 0.29056325498193813, "learning_rate": 7.465035581052428e-06, "loss": 0.4641, "step": 8470 }, { "epoch": 1.391168681871369, "grad_norm": 0.3130794677226531, "learning_rate": 7.464682788423206e-06, "loss": 0.4607, "step": 8471 }, { "epoch": 1.3913329090797117, "grad_norm": 0.2999242321369178, "learning_rate": 7.464329963594613e-06, "loss": 0.4615, "step": 8472 }, { "epoch": 1.3914971362880546, "grad_norm": 0.31033618826953613, "learning_rate": 7.463977106570481e-06, "loss": 0.4693, "step": 8473 }, { "epoch": 1.3916613634963972, "grad_norm": 0.40690685776686436, "learning_rate": 7.463624217354641e-06, "loss": 0.4811, "step": 8474 }, { "epoch": 1.39182559070474, "grad_norm": 0.5071406863141119, "learning_rate": 7.463271295950926e-06, "loss": 0.4661, "step": 8475 }, { "epoch": 1.3919898179130827, "grad_norm": 0.33631701642143286, "learning_rate": 7.462918342363169e-06, "loss": 0.4771, "step": 8476 }, { "epoch": 1.3921540451214254, "grad_norm": 0.30037934836463953, "learning_rate": 7.462565356595202e-06, "loss": 0.4802, "step": 8477 }, { "epoch": 1.3923182723297682, "grad_norm": 0.3193277779391227, "learning_rate": 7.4622123386508635e-06, "loss": 0.4876, "step": 8478 }, { "epoch": 1.392482499538111, "grad_norm": 0.270676534414637, "learning_rate": 7.461859288533983e-06, "loss": 0.4594, "step": 8479 }, { "epoch": 1.3926467267464537, "grad_norm": 0.4127141520008906, "learning_rate": 7.461506206248397e-06, "loss": 0.4653, "step": 8480 }, { "epoch": 1.3928109539547964, "grad_norm": 0.4991721815266884, "learning_rate": 7.46115309179794e-06, "loss": 0.4844, "step": 8481 }, { "epoch": 1.3929751811631392, "grad_norm": 0.29698984397598005, "learning_rate": 7.460799945186447e-06, "loss": 0.4613, "step": 8482 }, { "epoch": 1.3931394083714819, "grad_norm": 0.3248795253734255, "learning_rate": 7.4604467664177535e-06, "loss": 0.4638, "step": 8483 }, { "epoch": 1.3933036355798247, "grad_norm": 0.37999833355929574, "learning_rate": 7.460093555495695e-06, "loss": 0.483, "step": 8484 }, { "epoch": 1.3934678627881674, "grad_norm": 0.3038401995582087, "learning_rate": 7.459740312424108e-06, "loss": 0.4846, "step": 8485 }, { "epoch": 1.3936320899965102, "grad_norm": 0.33297402648887964, "learning_rate": 7.459387037206831e-06, "loss": 0.4907, "step": 8486 }, { "epoch": 1.3937963172048529, "grad_norm": 0.5686034853236254, "learning_rate": 7.459033729847697e-06, "loss": 0.4726, "step": 8487 }, { "epoch": 1.3939605444131957, "grad_norm": 0.3948521265247895, "learning_rate": 7.458680390350547e-06, "loss": 0.4641, "step": 8488 }, { "epoch": 1.3941247716215384, "grad_norm": 0.45439200880210123, "learning_rate": 7.458327018719214e-06, "loss": 0.4778, "step": 8489 }, { "epoch": 1.3942889988298812, "grad_norm": 0.3042339933458967, "learning_rate": 7.457973614957541e-06, "loss": 0.4634, "step": 8490 }, { "epoch": 1.3944532260382239, "grad_norm": 0.3305137784668754, "learning_rate": 7.457620179069366e-06, "loss": 0.4842, "step": 8491 }, { "epoch": 1.3946174532465667, "grad_norm": 0.27760176510723344, "learning_rate": 7.457266711058524e-06, "loss": 0.4752, "step": 8492 }, { "epoch": 1.3947816804549094, "grad_norm": 0.4102659742863658, "learning_rate": 7.4569132109288555e-06, "loss": 0.4685, "step": 8493 }, { "epoch": 1.394945907663252, "grad_norm": 0.31841425627424486, "learning_rate": 7.456559678684201e-06, "loss": 0.469, "step": 8494 }, { "epoch": 1.3951101348715949, "grad_norm": 0.2770736991011022, "learning_rate": 7.456206114328402e-06, "loss": 0.4522, "step": 8495 }, { "epoch": 1.3952743620799377, "grad_norm": 0.36425396742082483, "learning_rate": 7.455852517865292e-06, "loss": 0.4661, "step": 8496 }, { "epoch": 1.3954385892882804, "grad_norm": 0.29309115832848304, "learning_rate": 7.4554988892987186e-06, "loss": 0.4566, "step": 8497 }, { "epoch": 1.395602816496623, "grad_norm": 0.3467605734878834, "learning_rate": 7.455145228632518e-06, "loss": 0.4851, "step": 8498 }, { "epoch": 1.3957670437049658, "grad_norm": 0.2803211107231899, "learning_rate": 7.454791535870533e-06, "loss": 0.4698, "step": 8499 }, { "epoch": 1.3959312709133085, "grad_norm": 0.30707032322686445, "learning_rate": 7.454437811016605e-06, "loss": 0.4637, "step": 8500 }, { "epoch": 1.3960954981216513, "grad_norm": 0.292408272146633, "learning_rate": 7.454084054074575e-06, "loss": 0.4647, "step": 8501 }, { "epoch": 1.396259725329994, "grad_norm": 0.3176060833917574, "learning_rate": 7.453730265048285e-06, "loss": 0.4789, "step": 8502 }, { "epoch": 1.3964239525383368, "grad_norm": 0.2712677509040151, "learning_rate": 7.45337644394158e-06, "loss": 0.4637, "step": 8503 }, { "epoch": 1.3965881797466795, "grad_norm": 0.2992868914528555, "learning_rate": 7.453022590758301e-06, "loss": 0.4662, "step": 8504 }, { "epoch": 1.3967524069550223, "grad_norm": 0.2862272436304518, "learning_rate": 7.45266870550229e-06, "loss": 0.4684, "step": 8505 }, { "epoch": 1.396916634163365, "grad_norm": 0.34703558397614, "learning_rate": 7.452314788177391e-06, "loss": 0.4745, "step": 8506 }, { "epoch": 1.3970808613717078, "grad_norm": 0.3593107459557686, "learning_rate": 7.451960838787452e-06, "loss": 0.4701, "step": 8507 }, { "epoch": 1.3972450885800505, "grad_norm": 0.27753636142750543, "learning_rate": 7.451606857336312e-06, "loss": 0.454, "step": 8508 }, { "epoch": 1.3974093157883933, "grad_norm": 0.32519945938169814, "learning_rate": 7.4512528438278174e-06, "loss": 0.4653, "step": 8509 }, { "epoch": 1.397573542996736, "grad_norm": 0.37636601508996753, "learning_rate": 7.4508987982658135e-06, "loss": 0.4723, "step": 8510 }, { "epoch": 1.3977377702050786, "grad_norm": 0.3242174929140608, "learning_rate": 7.450544720654145e-06, "loss": 0.4673, "step": 8511 }, { "epoch": 1.3979019974134215, "grad_norm": 0.3152017954856842, "learning_rate": 7.4501906109966595e-06, "loss": 0.4684, "step": 8512 }, { "epoch": 1.3980662246217643, "grad_norm": 0.273731924885685, "learning_rate": 7.4498364692971996e-06, "loss": 0.4686, "step": 8513 }, { "epoch": 1.398230451830107, "grad_norm": 0.4412982637682514, "learning_rate": 7.449482295559614e-06, "loss": 0.4689, "step": 8514 }, { "epoch": 1.3983946790384496, "grad_norm": 0.6275413982705256, "learning_rate": 7.4491280897877475e-06, "loss": 0.4428, "step": 8515 }, { "epoch": 1.3985589062467925, "grad_norm": 0.3754192144297303, "learning_rate": 7.44877385198545e-06, "loss": 0.4798, "step": 8516 }, { "epoch": 1.398723133455135, "grad_norm": 1.2042144009390472, "learning_rate": 7.448419582156568e-06, "loss": 0.4711, "step": 8517 }, { "epoch": 1.398887360663478, "grad_norm": 0.32201331477470474, "learning_rate": 7.448065280304946e-06, "loss": 0.4691, "step": 8518 }, { "epoch": 1.3990515878718206, "grad_norm": 0.36748107501569693, "learning_rate": 7.447710946434438e-06, "loss": 0.4736, "step": 8519 }, { "epoch": 1.3992158150801635, "grad_norm": 0.30196407746713133, "learning_rate": 7.447356580548886e-06, "loss": 0.461, "step": 8520 }, { "epoch": 1.399380042288506, "grad_norm": 0.40934242162584306, "learning_rate": 7.447002182652143e-06, "loss": 0.4674, "step": 8521 }, { "epoch": 1.399544269496849, "grad_norm": 0.2764712262536976, "learning_rate": 7.446647752748056e-06, "loss": 0.442, "step": 8522 }, { "epoch": 1.3997084967051916, "grad_norm": 0.2964002281736302, "learning_rate": 7.446293290840475e-06, "loss": 0.466, "step": 8523 }, { "epoch": 1.3998727239135345, "grad_norm": 0.31240139575336234, "learning_rate": 7.4459387969332514e-06, "loss": 0.4669, "step": 8524 }, { "epoch": 1.400036951121877, "grad_norm": 0.5236529764809225, "learning_rate": 7.4455842710302346e-06, "loss": 0.4811, "step": 8525 }, { "epoch": 1.40020117833022, "grad_norm": 0.2690862497073942, "learning_rate": 7.445229713135273e-06, "loss": 0.4989, "step": 8526 }, { "epoch": 1.4003654055385626, "grad_norm": 0.31760863779848236, "learning_rate": 7.444875123252219e-06, "loss": 0.4686, "step": 8527 }, { "epoch": 1.4005296327469052, "grad_norm": 0.3106443897577975, "learning_rate": 7.444520501384925e-06, "loss": 0.4796, "step": 8528 }, { "epoch": 1.400693859955248, "grad_norm": 0.45939652043207874, "learning_rate": 7.44416584753724e-06, "loss": 0.465, "step": 8529 }, { "epoch": 1.400858087163591, "grad_norm": 0.3001121880109803, "learning_rate": 7.443811161713018e-06, "loss": 0.4606, "step": 8530 }, { "epoch": 1.4010223143719336, "grad_norm": 0.32939005836550994, "learning_rate": 7.443456443916111e-06, "loss": 0.4665, "step": 8531 }, { "epoch": 1.4011865415802762, "grad_norm": 0.31886006819648094, "learning_rate": 7.443101694150371e-06, "loss": 0.4913, "step": 8532 }, { "epoch": 1.401350768788619, "grad_norm": 0.2781872322411299, "learning_rate": 7.442746912419649e-06, "loss": 0.4668, "step": 8533 }, { "epoch": 1.4015149959969617, "grad_norm": 0.9270944124758137, "learning_rate": 7.442392098727801e-06, "loss": 0.4763, "step": 8534 }, { "epoch": 1.4016792232053046, "grad_norm": 0.35113450993008694, "learning_rate": 7.442037253078681e-06, "loss": 0.465, "step": 8535 }, { "epoch": 1.4018434504136472, "grad_norm": 0.4559463003574399, "learning_rate": 7.441682375476141e-06, "loss": 0.4837, "step": 8536 }, { "epoch": 1.40200767762199, "grad_norm": 0.4216442006644967, "learning_rate": 7.441327465924038e-06, "loss": 0.4726, "step": 8537 }, { "epoch": 1.4021719048303327, "grad_norm": 0.2786121891687586, "learning_rate": 7.440972524426222e-06, "loss": 0.4744, "step": 8538 }, { "epoch": 1.4023361320386756, "grad_norm": 0.38727303750505243, "learning_rate": 7.440617550986552e-06, "loss": 0.4792, "step": 8539 }, { "epoch": 1.4025003592470182, "grad_norm": 0.30560195767242304, "learning_rate": 7.4402625456088826e-06, "loss": 0.4695, "step": 8540 }, { "epoch": 1.402664586455361, "grad_norm": 0.28880668327933817, "learning_rate": 7.43990750829707e-06, "loss": 0.4721, "step": 8541 }, { "epoch": 1.4028288136637037, "grad_norm": 0.4172695597566841, "learning_rate": 7.439552439054967e-06, "loss": 0.4622, "step": 8542 }, { "epoch": 1.4029930408720466, "grad_norm": 0.30842138351354353, "learning_rate": 7.439197337886435e-06, "loss": 0.4837, "step": 8543 }, { "epoch": 1.4031572680803892, "grad_norm": 0.29297139265934846, "learning_rate": 7.438842204795327e-06, "loss": 0.4716, "step": 8544 }, { "epoch": 1.4033214952887318, "grad_norm": 0.59448376485058, "learning_rate": 7.4384870397855e-06, "loss": 0.4695, "step": 8545 }, { "epoch": 1.4034857224970747, "grad_norm": 0.27269194109427797, "learning_rate": 7.438131842860813e-06, "loss": 0.4929, "step": 8546 }, { "epoch": 1.4036499497054176, "grad_norm": 0.341391040294893, "learning_rate": 7.437776614025125e-06, "loss": 0.4908, "step": 8547 }, { "epoch": 1.4038141769137602, "grad_norm": 0.32200140091490614, "learning_rate": 7.4374213532822915e-06, "loss": 0.4745, "step": 8548 }, { "epoch": 1.4039784041221028, "grad_norm": 0.2661207928603546, "learning_rate": 7.437066060636174e-06, "loss": 0.4899, "step": 8549 }, { "epoch": 1.4041426313304457, "grad_norm": 0.29696741496995765, "learning_rate": 7.436710736090627e-06, "loss": 0.4584, "step": 8550 }, { "epoch": 1.4043068585387883, "grad_norm": 0.29306385562883797, "learning_rate": 7.436355379649513e-06, "loss": 0.4588, "step": 8551 }, { "epoch": 1.4044710857471312, "grad_norm": 0.3970937075104934, "learning_rate": 7.43599999131669e-06, "loss": 0.4891, "step": 8552 }, { "epoch": 1.4046353129554738, "grad_norm": 0.34058582880145505, "learning_rate": 7.435644571096019e-06, "loss": 0.4523, "step": 8553 }, { "epoch": 1.4047995401638167, "grad_norm": 0.45701364527626853, "learning_rate": 7.435289118991359e-06, "loss": 0.4677, "step": 8554 }, { "epoch": 1.4049637673721593, "grad_norm": 0.31318959131377144, "learning_rate": 7.434933635006573e-06, "loss": 0.4598, "step": 8555 }, { "epoch": 1.4051279945805022, "grad_norm": 0.3489953705021745, "learning_rate": 7.4345781191455184e-06, "loss": 0.4679, "step": 8556 }, { "epoch": 1.4052922217888448, "grad_norm": 0.5725145140493672, "learning_rate": 7.434222571412059e-06, "loss": 0.4708, "step": 8557 }, { "epoch": 1.4054564489971877, "grad_norm": 0.2986176145115553, "learning_rate": 7.433866991810055e-06, "loss": 0.4637, "step": 8558 }, { "epoch": 1.4056206762055303, "grad_norm": 0.3467896030354416, "learning_rate": 7.433511380343369e-06, "loss": 0.4869, "step": 8559 }, { "epoch": 1.4057849034138732, "grad_norm": 0.5115851153170545, "learning_rate": 7.433155737015863e-06, "loss": 0.4801, "step": 8560 }, { "epoch": 1.4059491306222158, "grad_norm": 0.3898500557859638, "learning_rate": 7.432800061831401e-06, "loss": 0.4636, "step": 8561 }, { "epoch": 1.4061133578305585, "grad_norm": 0.3072252086568954, "learning_rate": 7.432444354793844e-06, "loss": 0.4574, "step": 8562 }, { "epoch": 1.4062775850389013, "grad_norm": 0.38470436228122484, "learning_rate": 7.432088615907057e-06, "loss": 0.4824, "step": 8563 }, { "epoch": 1.4064418122472442, "grad_norm": 0.35917326620407675, "learning_rate": 7.431732845174901e-06, "loss": 0.4609, "step": 8564 }, { "epoch": 1.4066060394555868, "grad_norm": 0.3190169868757679, "learning_rate": 7.4313770426012435e-06, "loss": 0.4755, "step": 8565 }, { "epoch": 1.4067702666639295, "grad_norm": 0.30937207655898136, "learning_rate": 7.4310212081899475e-06, "loss": 0.4747, "step": 8566 }, { "epoch": 1.4069344938722723, "grad_norm": 0.30277533292576975, "learning_rate": 7.430665341944877e-06, "loss": 0.4801, "step": 8567 }, { "epoch": 1.407098721080615, "grad_norm": 0.3104919922663232, "learning_rate": 7.430309443869896e-06, "loss": 0.4733, "step": 8568 }, { "epoch": 1.4072629482889578, "grad_norm": 0.4631342370065941, "learning_rate": 7.429953513968873e-06, "loss": 0.4611, "step": 8569 }, { "epoch": 1.4074271754973005, "grad_norm": 0.2910622862956142, "learning_rate": 7.429597552245673e-06, "loss": 0.4707, "step": 8570 }, { "epoch": 1.4075914027056433, "grad_norm": 0.3070539773881218, "learning_rate": 7.429241558704159e-06, "loss": 0.4674, "step": 8571 }, { "epoch": 1.407755629913986, "grad_norm": 0.3795506728011231, "learning_rate": 7.428885533348201e-06, "loss": 0.4866, "step": 8572 }, { "epoch": 1.4079198571223288, "grad_norm": 0.3507655176583296, "learning_rate": 7.428529476181664e-06, "loss": 0.4611, "step": 8573 }, { "epoch": 1.4080840843306714, "grad_norm": 0.34017407358390006, "learning_rate": 7.428173387208416e-06, "loss": 0.4712, "step": 8574 }, { "epoch": 1.4082483115390143, "grad_norm": 0.30794027444638433, "learning_rate": 7.427817266432324e-06, "loss": 0.4707, "step": 8575 }, { "epoch": 1.408412538747357, "grad_norm": 0.3143973834229577, "learning_rate": 7.427461113857256e-06, "loss": 0.451, "step": 8576 }, { "epoch": 1.4085767659556998, "grad_norm": 0.5979945971913595, "learning_rate": 7.42710492948708e-06, "loss": 0.4639, "step": 8577 }, { "epoch": 1.4087409931640424, "grad_norm": 0.2887234277853099, "learning_rate": 7.426748713325664e-06, "loss": 0.47, "step": 8578 }, { "epoch": 1.408905220372385, "grad_norm": 0.33756145494788314, "learning_rate": 7.426392465376879e-06, "loss": 0.4775, "step": 8579 }, { "epoch": 1.409069447580728, "grad_norm": 0.2800702567687854, "learning_rate": 7.426036185644591e-06, "loss": 0.4517, "step": 8580 }, { "epoch": 1.4092336747890708, "grad_norm": 0.46678552703550924, "learning_rate": 7.425679874132672e-06, "loss": 0.4797, "step": 8581 }, { "epoch": 1.4093979019974134, "grad_norm": 0.3356274085717647, "learning_rate": 7.42532353084499e-06, "loss": 0.4575, "step": 8582 }, { "epoch": 1.409562129205756, "grad_norm": 0.3116451615310341, "learning_rate": 7.424967155785418e-06, "loss": 0.4616, "step": 8583 }, { "epoch": 1.409726356414099, "grad_norm": 0.2696206283265771, "learning_rate": 7.424610748957823e-06, "loss": 0.4636, "step": 8584 }, { "epoch": 1.4098905836224416, "grad_norm": 0.33772238707792296, "learning_rate": 7.424254310366079e-06, "loss": 0.4704, "step": 8585 }, { "epoch": 1.4100548108307844, "grad_norm": 0.3055990131999076, "learning_rate": 7.4238978400140565e-06, "loss": 0.476, "step": 8586 }, { "epoch": 1.410219038039127, "grad_norm": 0.3551794459285267, "learning_rate": 7.423541337905626e-06, "loss": 0.4573, "step": 8587 }, { "epoch": 1.41038326524747, "grad_norm": 0.3186811436613112, "learning_rate": 7.4231848040446605e-06, "loss": 0.4709, "step": 8588 }, { "epoch": 1.4105474924558126, "grad_norm": 0.49816413275093707, "learning_rate": 7.4228282384350315e-06, "loss": 0.4923, "step": 8589 }, { "epoch": 1.4107117196641554, "grad_norm": 0.35030694382788935, "learning_rate": 7.4224716410806126e-06, "loss": 0.4781, "step": 8590 }, { "epoch": 1.410875946872498, "grad_norm": 0.30048156452761454, "learning_rate": 7.422115011985278e-06, "loss": 0.4796, "step": 8591 }, { "epoch": 1.411040174080841, "grad_norm": 0.3196508862449292, "learning_rate": 7.421758351152898e-06, "loss": 0.4611, "step": 8592 }, { "epoch": 1.4112044012891836, "grad_norm": 0.4013596427282612, "learning_rate": 7.421401658587347e-06, "loss": 0.4694, "step": 8593 }, { "epoch": 1.4113686284975264, "grad_norm": 0.2652885562504778, "learning_rate": 7.4210449342924995e-06, "loss": 0.4387, "step": 8594 }, { "epoch": 1.411532855705869, "grad_norm": 0.30374163694020784, "learning_rate": 7.4206881782722305e-06, "loss": 0.4537, "step": 8595 }, { "epoch": 1.4116970829142117, "grad_norm": 0.4612477117901834, "learning_rate": 7.420331390530415e-06, "loss": 0.4827, "step": 8596 }, { "epoch": 1.4118613101225546, "grad_norm": 0.2844872627001613, "learning_rate": 7.419974571070927e-06, "loss": 0.4617, "step": 8597 }, { "epoch": 1.4120255373308974, "grad_norm": 0.27905217514415914, "learning_rate": 7.419617719897642e-06, "loss": 0.4682, "step": 8598 }, { "epoch": 1.41218976453924, "grad_norm": 0.27318352991817796, "learning_rate": 7.4192608370144355e-06, "loss": 0.4905, "step": 8599 }, { "epoch": 1.4123539917475827, "grad_norm": 0.3544091023529306, "learning_rate": 7.418903922425187e-06, "loss": 0.4396, "step": 8600 }, { "epoch": 1.4125182189559256, "grad_norm": 0.32549410255272687, "learning_rate": 7.418546976133766e-06, "loss": 0.457, "step": 8601 }, { "epoch": 1.4126824461642682, "grad_norm": 0.33155795928650694, "learning_rate": 7.418189998144056e-06, "loss": 0.4621, "step": 8602 }, { "epoch": 1.412846673372611, "grad_norm": 0.2963843090235932, "learning_rate": 7.417832988459932e-06, "loss": 0.4527, "step": 8603 }, { "epoch": 1.4130109005809537, "grad_norm": 0.3009253973532018, "learning_rate": 7.417475947085269e-06, "loss": 0.4651, "step": 8604 }, { "epoch": 1.4131751277892965, "grad_norm": 0.31122937905474224, "learning_rate": 7.4171188740239475e-06, "loss": 0.4778, "step": 8605 }, { "epoch": 1.4133393549976392, "grad_norm": 0.35058399361945486, "learning_rate": 7.416761769279846e-06, "loss": 0.4452, "step": 8606 }, { "epoch": 1.413503582205982, "grad_norm": 0.32076975303698535, "learning_rate": 7.4164046328568404e-06, "loss": 0.441, "step": 8607 }, { "epoch": 1.4136678094143247, "grad_norm": 0.5070947218880237, "learning_rate": 7.416047464758812e-06, "loss": 0.4604, "step": 8608 }, { "epoch": 1.4138320366226675, "grad_norm": 0.37100148779464004, "learning_rate": 7.415690264989639e-06, "loss": 0.4645, "step": 8609 }, { "epoch": 1.4139962638310102, "grad_norm": 0.3308140782663967, "learning_rate": 7.415333033553201e-06, "loss": 0.47, "step": 8610 }, { "epoch": 1.414160491039353, "grad_norm": 0.36507773865455445, "learning_rate": 7.414975770453378e-06, "loss": 0.4506, "step": 8611 }, { "epoch": 1.4143247182476957, "grad_norm": 0.3049033070114644, "learning_rate": 7.414618475694051e-06, "loss": 0.4712, "step": 8612 }, { "epoch": 1.4144889454560383, "grad_norm": 0.3199448512706157, "learning_rate": 7.414261149279099e-06, "loss": 0.4779, "step": 8613 }, { "epoch": 1.4146531726643812, "grad_norm": 0.4204919239325658, "learning_rate": 7.413903791212403e-06, "loss": 0.4661, "step": 8614 }, { "epoch": 1.414817399872724, "grad_norm": 0.31521651354486385, "learning_rate": 7.413546401497846e-06, "loss": 0.4816, "step": 8615 }, { "epoch": 1.4149816270810667, "grad_norm": 0.35333273072663923, "learning_rate": 7.413188980139309e-06, "loss": 0.4628, "step": 8616 }, { "epoch": 1.4151458542894093, "grad_norm": 0.289281221874642, "learning_rate": 7.412831527140672e-06, "loss": 0.4563, "step": 8617 }, { "epoch": 1.4153100814977522, "grad_norm": 0.32144669061901465, "learning_rate": 7.412474042505819e-06, "loss": 0.4588, "step": 8618 }, { "epoch": 1.4154743087060948, "grad_norm": 0.46956177346878347, "learning_rate": 7.412116526238633e-06, "loss": 0.4575, "step": 8619 }, { "epoch": 1.4156385359144377, "grad_norm": 0.38669633047785396, "learning_rate": 7.411758978342996e-06, "loss": 0.4939, "step": 8620 }, { "epoch": 1.4158027631227803, "grad_norm": 0.3337701977281101, "learning_rate": 7.411401398822792e-06, "loss": 0.455, "step": 8621 }, { "epoch": 1.4159669903311232, "grad_norm": 0.27205267115254267, "learning_rate": 7.411043787681904e-06, "loss": 0.4595, "step": 8622 }, { "epoch": 1.4161312175394658, "grad_norm": 0.31901257150921336, "learning_rate": 7.410686144924216e-06, "loss": 0.4744, "step": 8623 }, { "epoch": 1.4162954447478087, "grad_norm": 0.29990230673339774, "learning_rate": 7.410328470553614e-06, "loss": 0.4677, "step": 8624 }, { "epoch": 1.4164596719561513, "grad_norm": 0.432034413195062, "learning_rate": 7.409970764573981e-06, "loss": 0.4718, "step": 8625 }, { "epoch": 1.4166238991644942, "grad_norm": 0.45117493263050745, "learning_rate": 7.409613026989202e-06, "loss": 0.4503, "step": 8626 }, { "epoch": 1.4167881263728368, "grad_norm": 0.288399028366705, "learning_rate": 7.409255257803164e-06, "loss": 0.477, "step": 8627 }, { "epoch": 1.4169523535811797, "grad_norm": 0.2742665130026796, "learning_rate": 7.40889745701975e-06, "loss": 0.4793, "step": 8628 }, { "epoch": 1.4171165807895223, "grad_norm": 0.28388631767832506, "learning_rate": 7.408539624642849e-06, "loss": 0.4707, "step": 8629 }, { "epoch": 1.417280807997865, "grad_norm": 0.3762696631629608, "learning_rate": 7.408181760676345e-06, "loss": 0.4742, "step": 8630 }, { "epoch": 1.4174450352062078, "grad_norm": 0.30604710663862517, "learning_rate": 7.407823865124126e-06, "loss": 0.4893, "step": 8631 }, { "epoch": 1.4176092624145507, "grad_norm": 0.30730097863583844, "learning_rate": 7.407465937990079e-06, "loss": 0.4583, "step": 8632 }, { "epoch": 1.4177734896228933, "grad_norm": 0.28177094476792225, "learning_rate": 7.407107979278093e-06, "loss": 0.4893, "step": 8633 }, { "epoch": 1.417937716831236, "grad_norm": 0.3339766813808621, "learning_rate": 7.406749988992052e-06, "loss": 0.4782, "step": 8634 }, { "epoch": 1.4181019440395788, "grad_norm": 0.3550560311986116, "learning_rate": 7.4063919671358456e-06, "loss": 0.4521, "step": 8635 }, { "epoch": 1.4182661712479214, "grad_norm": 0.31422102493092624, "learning_rate": 7.406033913713365e-06, "loss": 0.4574, "step": 8636 }, { "epoch": 1.4184303984562643, "grad_norm": 0.36862943811813514, "learning_rate": 7.405675828728497e-06, "loss": 0.4612, "step": 8637 }, { "epoch": 1.418594625664607, "grad_norm": 0.3823455427636954, "learning_rate": 7.405317712185129e-06, "loss": 0.4642, "step": 8638 }, { "epoch": 1.4187588528729498, "grad_norm": 0.6697363617447964, "learning_rate": 7.4049595640871534e-06, "loss": 0.4611, "step": 8639 }, { "epoch": 1.4189230800812924, "grad_norm": 0.3082877536536946, "learning_rate": 7.404601384438458e-06, "loss": 0.4553, "step": 8640 }, { "epoch": 1.4190873072896353, "grad_norm": 0.3866697717486414, "learning_rate": 7.404243173242936e-06, "loss": 0.4717, "step": 8641 }, { "epoch": 1.419251534497978, "grad_norm": 0.3095639973516765, "learning_rate": 7.403884930504474e-06, "loss": 0.4899, "step": 8642 }, { "epoch": 1.4194157617063208, "grad_norm": 0.3281301085749949, "learning_rate": 7.403526656226965e-06, "loss": 0.4433, "step": 8643 }, { "epoch": 1.4195799889146634, "grad_norm": 0.3093984334312015, "learning_rate": 7.4031683504142985e-06, "loss": 0.4617, "step": 8644 }, { "epoch": 1.4197442161230063, "grad_norm": 0.3132767270598841, "learning_rate": 7.402810013070369e-06, "loss": 0.4648, "step": 8645 }, { "epoch": 1.419908443331349, "grad_norm": 0.35555840733129107, "learning_rate": 7.4024516441990665e-06, "loss": 0.4825, "step": 8646 }, { "epoch": 1.4200726705396916, "grad_norm": 0.3605843369817561, "learning_rate": 7.402093243804283e-06, "loss": 0.5036, "step": 8647 }, { "epoch": 1.4202368977480344, "grad_norm": 0.31417513230023925, "learning_rate": 7.401734811889911e-06, "loss": 0.4756, "step": 8648 }, { "epoch": 1.4204011249563773, "grad_norm": 0.3143056077749063, "learning_rate": 7.401376348459846e-06, "loss": 0.463, "step": 8649 }, { "epoch": 1.42056535216472, "grad_norm": 0.33212448977759, "learning_rate": 7.401017853517978e-06, "loss": 0.4869, "step": 8650 }, { "epoch": 1.4207295793730625, "grad_norm": 0.4137208235695577, "learning_rate": 7.400659327068202e-06, "loss": 0.4866, "step": 8651 }, { "epoch": 1.4208938065814054, "grad_norm": 0.3671152000569904, "learning_rate": 7.400300769114411e-06, "loss": 0.4672, "step": 8652 }, { "epoch": 1.421058033789748, "grad_norm": 0.36134698599484183, "learning_rate": 7.399942179660502e-06, "loss": 0.4803, "step": 8653 }, { "epoch": 1.421222260998091, "grad_norm": 0.3226969808681514, "learning_rate": 7.399583558710367e-06, "loss": 0.4564, "step": 8654 }, { "epoch": 1.4213864882064335, "grad_norm": 0.3360087789001294, "learning_rate": 7.399224906267901e-06, "loss": 0.4695, "step": 8655 }, { "epoch": 1.4215507154147764, "grad_norm": 0.3999377209987789, "learning_rate": 7.398866222337e-06, "loss": 0.4524, "step": 8656 }, { "epoch": 1.421714942623119, "grad_norm": 0.2619494941802104, "learning_rate": 7.39850750692156e-06, "loss": 0.4738, "step": 8657 }, { "epoch": 1.421879169831462, "grad_norm": 0.3377812599960666, "learning_rate": 7.398148760025479e-06, "loss": 0.4783, "step": 8658 }, { "epoch": 1.4220433970398045, "grad_norm": 0.5649331309834287, "learning_rate": 7.397789981652648e-06, "loss": 0.4686, "step": 8659 }, { "epoch": 1.4222076242481472, "grad_norm": 0.266427925002802, "learning_rate": 7.3974311718069685e-06, "loss": 0.4613, "step": 8660 }, { "epoch": 1.42237185145649, "grad_norm": 0.3444625103911929, "learning_rate": 7.397072330492334e-06, "loss": 0.4537, "step": 8661 }, { "epoch": 1.422536078664833, "grad_norm": 0.34059735816769793, "learning_rate": 7.396713457712646e-06, "loss": 0.4899, "step": 8662 }, { "epoch": 1.4227003058731755, "grad_norm": 0.3790439525703914, "learning_rate": 7.396354553471799e-06, "loss": 0.4625, "step": 8663 }, { "epoch": 1.4228645330815182, "grad_norm": 0.39460946921143725, "learning_rate": 7.3959956177736906e-06, "loss": 0.4553, "step": 8664 }, { "epoch": 1.423028760289861, "grad_norm": 0.2817981735232756, "learning_rate": 7.3956366506222225e-06, "loss": 0.4691, "step": 8665 }, { "epoch": 1.4231929874982039, "grad_norm": 0.4378902762621768, "learning_rate": 7.39527765202129e-06, "loss": 0.4843, "step": 8666 }, { "epoch": 1.4233572147065465, "grad_norm": 0.975584940866872, "learning_rate": 7.394918621974795e-06, "loss": 0.4811, "step": 8667 }, { "epoch": 1.4235214419148892, "grad_norm": 0.7960565831153253, "learning_rate": 7.394559560486634e-06, "loss": 0.4538, "step": 8668 }, { "epoch": 1.423685669123232, "grad_norm": 0.30691411758843296, "learning_rate": 7.394200467560708e-06, "loss": 0.4838, "step": 8669 }, { "epoch": 1.4238498963315747, "grad_norm": 0.2963936182755594, "learning_rate": 7.39384134320092e-06, "loss": 0.4711, "step": 8670 }, { "epoch": 1.4240141235399175, "grad_norm": 0.4976685910588599, "learning_rate": 7.393482187411165e-06, "loss": 0.4713, "step": 8671 }, { "epoch": 1.4241783507482602, "grad_norm": 0.3144718758400609, "learning_rate": 7.393123000195349e-06, "loss": 0.4841, "step": 8672 }, { "epoch": 1.424342577956603, "grad_norm": 0.30284813504731734, "learning_rate": 7.392763781557369e-06, "loss": 0.4538, "step": 8673 }, { "epoch": 1.4245068051649457, "grad_norm": 0.34444613471749935, "learning_rate": 7.3924045315011294e-06, "loss": 0.4684, "step": 8674 }, { "epoch": 1.4246710323732885, "grad_norm": 0.30198678685548713, "learning_rate": 7.39204525003053e-06, "loss": 0.4796, "step": 8675 }, { "epoch": 1.4248352595816312, "grad_norm": 0.29695760309471625, "learning_rate": 7.391685937149474e-06, "loss": 0.4972, "step": 8676 }, { "epoch": 1.4249994867899738, "grad_norm": 0.3142088847097196, "learning_rate": 7.391326592861863e-06, "loss": 0.4796, "step": 8677 }, { "epoch": 1.4251637139983167, "grad_norm": 0.3184273247226719, "learning_rate": 7.390967217171602e-06, "loss": 0.4991, "step": 8678 }, { "epoch": 1.4253279412066595, "grad_norm": 0.3155069256631096, "learning_rate": 7.390607810082593e-06, "loss": 0.4629, "step": 8679 }, { "epoch": 1.4254921684150021, "grad_norm": 0.39120624749395266, "learning_rate": 7.390248371598738e-06, "loss": 0.4489, "step": 8680 }, { "epoch": 1.4256563956233448, "grad_norm": 0.3245465147182549, "learning_rate": 7.389888901723942e-06, "loss": 0.4579, "step": 8681 }, { "epoch": 1.4258206228316876, "grad_norm": 0.3394055494225815, "learning_rate": 7.38952940046211e-06, "loss": 0.4757, "step": 8682 }, { "epoch": 1.4259848500400305, "grad_norm": 0.32787591201901345, "learning_rate": 7.389169867817145e-06, "loss": 0.4814, "step": 8683 }, { "epoch": 1.4261490772483731, "grad_norm": 0.49850515920788757, "learning_rate": 7.388810303792953e-06, "loss": 0.4779, "step": 8684 }, { "epoch": 1.4263133044567158, "grad_norm": 0.4000878439452594, "learning_rate": 7.388450708393439e-06, "loss": 0.4735, "step": 8685 }, { "epoch": 1.4264775316650586, "grad_norm": 0.3884268909927122, "learning_rate": 7.388091081622508e-06, "loss": 0.4615, "step": 8686 }, { "epoch": 1.4266417588734013, "grad_norm": 0.47546552185240776, "learning_rate": 7.387731423484068e-06, "loss": 0.4673, "step": 8687 }, { "epoch": 1.4268059860817441, "grad_norm": 0.3451691022903009, "learning_rate": 7.387371733982022e-06, "loss": 0.4527, "step": 8688 }, { "epoch": 1.4269702132900868, "grad_norm": 0.3351213061571729, "learning_rate": 7.387012013120278e-06, "loss": 0.4505, "step": 8689 }, { "epoch": 1.4271344404984296, "grad_norm": 0.33697957511100923, "learning_rate": 7.386652260902743e-06, "loss": 0.4526, "step": 8690 }, { "epoch": 1.4272986677067723, "grad_norm": 0.3288480616443145, "learning_rate": 7.3862924773333266e-06, "loss": 0.4605, "step": 8691 }, { "epoch": 1.4274628949151151, "grad_norm": 0.2741337484391843, "learning_rate": 7.385932662415932e-06, "loss": 0.4733, "step": 8692 }, { "epoch": 1.4276271221234578, "grad_norm": 0.35755160262639774, "learning_rate": 7.38557281615447e-06, "loss": 0.4764, "step": 8693 }, { "epoch": 1.4277913493318004, "grad_norm": 0.42377404035966615, "learning_rate": 7.3852129385528476e-06, "loss": 0.4806, "step": 8694 }, { "epoch": 1.4279555765401433, "grad_norm": 0.3601017434270535, "learning_rate": 7.3848530296149756e-06, "loss": 0.4878, "step": 8695 }, { "epoch": 1.4281198037484861, "grad_norm": 0.2642369260132183, "learning_rate": 7.38449308934476e-06, "loss": 0.46, "step": 8696 }, { "epoch": 1.4282840309568288, "grad_norm": 0.3231395826179842, "learning_rate": 7.3841331177461114e-06, "loss": 0.4513, "step": 8697 }, { "epoch": 1.4284482581651714, "grad_norm": 0.5167362775726638, "learning_rate": 7.38377311482294e-06, "loss": 0.4731, "step": 8698 }, { "epoch": 1.4286124853735143, "grad_norm": 0.31768177825252264, "learning_rate": 7.383413080579156e-06, "loss": 0.4808, "step": 8699 }, { "epoch": 1.4287767125818571, "grad_norm": 0.2670760327678991, "learning_rate": 7.383053015018668e-06, "loss": 0.4463, "step": 8700 }, { "epoch": 1.4289409397901998, "grad_norm": 0.7385714108339203, "learning_rate": 7.382692918145388e-06, "loss": 0.4714, "step": 8701 }, { "epoch": 1.4291051669985424, "grad_norm": 0.3665760851986582, "learning_rate": 7.382332789963226e-06, "loss": 0.45, "step": 8702 }, { "epoch": 1.4292693942068853, "grad_norm": 0.37412406306082946, "learning_rate": 7.381972630476095e-06, "loss": 0.4606, "step": 8703 }, { "epoch": 1.429433621415228, "grad_norm": 0.282501205455874, "learning_rate": 7.381612439687906e-06, "loss": 0.4713, "step": 8704 }, { "epoch": 1.4295978486235708, "grad_norm": 0.3175716080706612, "learning_rate": 7.3812522176025705e-06, "loss": 0.4505, "step": 8705 }, { "epoch": 1.4297620758319134, "grad_norm": 0.30021046264890927, "learning_rate": 7.380891964224001e-06, "loss": 0.468, "step": 8706 }, { "epoch": 1.4299263030402563, "grad_norm": 0.36843733896681896, "learning_rate": 7.38053167955611e-06, "loss": 0.4797, "step": 8707 }, { "epoch": 1.430090530248599, "grad_norm": 0.4091002966536038, "learning_rate": 7.380171363602812e-06, "loss": 0.4429, "step": 8708 }, { "epoch": 1.4302547574569417, "grad_norm": 0.34868960892958695, "learning_rate": 7.379811016368018e-06, "loss": 0.4822, "step": 8709 }, { "epoch": 1.4304189846652844, "grad_norm": 0.30935125392275414, "learning_rate": 7.379450637855644e-06, "loss": 0.4717, "step": 8710 }, { "epoch": 1.430583211873627, "grad_norm": 0.39039071388522256, "learning_rate": 7.379090228069602e-06, "loss": 0.4512, "step": 8711 }, { "epoch": 1.4307474390819699, "grad_norm": 0.34733265965538085, "learning_rate": 7.378729787013809e-06, "loss": 0.4544, "step": 8712 }, { "epoch": 1.4309116662903127, "grad_norm": 0.30168326488643704, "learning_rate": 7.3783693146921765e-06, "loss": 0.4871, "step": 8713 }, { "epoch": 1.4310758934986554, "grad_norm": 0.30953253420001814, "learning_rate": 7.378008811108622e-06, "loss": 0.4725, "step": 8714 }, { "epoch": 1.431240120706998, "grad_norm": 0.38832020065310613, "learning_rate": 7.377648276267061e-06, "loss": 0.4744, "step": 8715 }, { "epoch": 1.4314043479153409, "grad_norm": 0.3305266235225147, "learning_rate": 7.377287710171408e-06, "loss": 0.4819, "step": 8716 }, { "epoch": 1.4315685751236837, "grad_norm": 0.34863300149843024, "learning_rate": 7.376927112825579e-06, "loss": 0.4465, "step": 8717 }, { "epoch": 1.4317328023320264, "grad_norm": 0.3006649152307014, "learning_rate": 7.376566484233492e-06, "loss": 0.4851, "step": 8718 }, { "epoch": 1.431897029540369, "grad_norm": 0.4097936421003854, "learning_rate": 7.3762058243990615e-06, "loss": 0.4564, "step": 8719 }, { "epoch": 1.4320612567487119, "grad_norm": 0.29230226483539684, "learning_rate": 7.3758451333262075e-06, "loss": 0.4433, "step": 8720 }, { "epoch": 1.4322254839570545, "grad_norm": 0.48553587471592025, "learning_rate": 7.375484411018845e-06, "loss": 0.4547, "step": 8721 }, { "epoch": 1.4323897111653974, "grad_norm": 0.322278646726219, "learning_rate": 7.375123657480893e-06, "loss": 0.4635, "step": 8722 }, { "epoch": 1.43255393837374, "grad_norm": 0.3545662737503202, "learning_rate": 7.374762872716269e-06, "loss": 0.4687, "step": 8723 }, { "epoch": 1.4327181655820829, "grad_norm": 0.3099671112478105, "learning_rate": 7.374402056728893e-06, "loss": 0.4638, "step": 8724 }, { "epoch": 1.4328823927904255, "grad_norm": 0.3716005362691296, "learning_rate": 7.374041209522682e-06, "loss": 0.4784, "step": 8725 }, { "epoch": 1.4330466199987684, "grad_norm": 0.27171238151285476, "learning_rate": 7.373680331101554e-06, "loss": 0.4728, "step": 8726 }, { "epoch": 1.433210847207111, "grad_norm": 0.29662911135939474, "learning_rate": 7.373319421469432e-06, "loss": 0.4717, "step": 8727 }, { "epoch": 1.4333750744154536, "grad_norm": 0.2888872805223644, "learning_rate": 7.3729584806302346e-06, "loss": 0.4556, "step": 8728 }, { "epoch": 1.4335393016237965, "grad_norm": 0.8980499551263118, "learning_rate": 7.372597508587881e-06, "loss": 0.4434, "step": 8729 }, { "epoch": 1.4337035288321394, "grad_norm": 0.31920864527645193, "learning_rate": 7.372236505346292e-06, "loss": 0.4707, "step": 8730 }, { "epoch": 1.433867756040482, "grad_norm": 0.31494192651957037, "learning_rate": 7.371875470909388e-06, "loss": 0.4751, "step": 8731 }, { "epoch": 1.4340319832488246, "grad_norm": 0.3646059146697081, "learning_rate": 7.371514405281091e-06, "loss": 0.4542, "step": 8732 }, { "epoch": 1.4341962104571675, "grad_norm": 0.29678542187972806, "learning_rate": 7.371153308465324e-06, "loss": 0.4688, "step": 8733 }, { "epoch": 1.4343604376655104, "grad_norm": 0.30307859935590387, "learning_rate": 7.370792180466006e-06, "loss": 0.4811, "step": 8734 }, { "epoch": 1.434524664873853, "grad_norm": 0.35944445084086313, "learning_rate": 7.370431021287059e-06, "loss": 0.4892, "step": 8735 }, { "epoch": 1.4346888920821956, "grad_norm": 0.3054816830042426, "learning_rate": 7.37006983093241e-06, "loss": 0.4489, "step": 8736 }, { "epoch": 1.4348531192905385, "grad_norm": 0.3480155627029634, "learning_rate": 7.369708609405977e-06, "loss": 0.4521, "step": 8737 }, { "epoch": 1.4350173464988811, "grad_norm": 0.33385036419258024, "learning_rate": 7.369347356711686e-06, "loss": 0.4694, "step": 8738 }, { "epoch": 1.435181573707224, "grad_norm": 0.2680938800862615, "learning_rate": 7.368986072853459e-06, "loss": 0.4641, "step": 8739 }, { "epoch": 1.4353458009155666, "grad_norm": 0.45577800534453167, "learning_rate": 7.36862475783522e-06, "loss": 0.4672, "step": 8740 }, { "epoch": 1.4355100281239095, "grad_norm": 0.3124628033703303, "learning_rate": 7.3682634116608955e-06, "loss": 0.4613, "step": 8741 }, { "epoch": 1.4356742553322521, "grad_norm": 0.29964203647181226, "learning_rate": 7.367902034334407e-06, "loss": 0.4702, "step": 8742 }, { "epoch": 1.435838482540595, "grad_norm": 0.3592963394992377, "learning_rate": 7.367540625859681e-06, "loss": 0.4741, "step": 8743 }, { "epoch": 1.4360027097489376, "grad_norm": 0.3646717741074932, "learning_rate": 7.367179186240642e-06, "loss": 0.459, "step": 8744 }, { "epoch": 1.4361669369572803, "grad_norm": 0.3194017770756867, "learning_rate": 7.366817715481216e-06, "loss": 0.4661, "step": 8745 }, { "epoch": 1.4363311641656231, "grad_norm": 0.8041761901096787, "learning_rate": 7.36645621358533e-06, "loss": 0.4723, "step": 8746 }, { "epoch": 1.436495391373966, "grad_norm": 0.3466083547248436, "learning_rate": 7.36609468055691e-06, "loss": 0.4528, "step": 8747 }, { "epoch": 1.4366596185823086, "grad_norm": 0.43236098119183647, "learning_rate": 7.36573311639988e-06, "loss": 0.4912, "step": 8748 }, { "epoch": 1.4368238457906513, "grad_norm": 0.48201449975080524, "learning_rate": 7.365371521118171e-06, "loss": 0.4662, "step": 8749 }, { "epoch": 1.4369880729989941, "grad_norm": 0.2897789566298985, "learning_rate": 7.365009894715706e-06, "loss": 0.4697, "step": 8750 }, { "epoch": 1.437152300207337, "grad_norm": 0.4285770698212275, "learning_rate": 7.364648237196416e-06, "loss": 0.4645, "step": 8751 }, { "epoch": 1.4373165274156796, "grad_norm": 0.30895575283505117, "learning_rate": 7.364286548564226e-06, "loss": 0.4635, "step": 8752 }, { "epoch": 1.4374807546240222, "grad_norm": 0.3878587785336352, "learning_rate": 7.3639248288230685e-06, "loss": 0.4717, "step": 8753 }, { "epoch": 1.437644981832365, "grad_norm": 0.31587243157295675, "learning_rate": 7.3635630779768694e-06, "loss": 0.4807, "step": 8754 }, { "epoch": 1.4378092090407077, "grad_norm": 0.29345313599614753, "learning_rate": 7.363201296029556e-06, "loss": 0.4743, "step": 8755 }, { "epoch": 1.4379734362490506, "grad_norm": 0.29824132861417796, "learning_rate": 7.362839482985061e-06, "loss": 0.4708, "step": 8756 }, { "epoch": 1.4381376634573932, "grad_norm": 0.3150943702529251, "learning_rate": 7.362477638847312e-06, "loss": 0.4803, "step": 8757 }, { "epoch": 1.438301890665736, "grad_norm": 0.3035150249486316, "learning_rate": 7.362115763620241e-06, "loss": 0.4642, "step": 8758 }, { "epoch": 1.4384661178740787, "grad_norm": 0.49228834262052035, "learning_rate": 7.361753857307775e-06, "loss": 0.4644, "step": 8759 }, { "epoch": 1.4386303450824216, "grad_norm": 0.425149859034616, "learning_rate": 7.3613919199138464e-06, "loss": 0.4573, "step": 8760 }, { "epoch": 1.4387945722907642, "grad_norm": 0.38725281483202917, "learning_rate": 7.361029951442388e-06, "loss": 0.4717, "step": 8761 }, { "epoch": 1.4389587994991069, "grad_norm": 0.304654791341761, "learning_rate": 7.360667951897329e-06, "loss": 0.475, "step": 8762 }, { "epoch": 1.4391230267074497, "grad_norm": 0.29352327279914797, "learning_rate": 7.3603059212826e-06, "loss": 0.4814, "step": 8763 }, { "epoch": 1.4392872539157926, "grad_norm": 0.39246611782479257, "learning_rate": 7.359943859602135e-06, "loss": 0.4847, "step": 8764 }, { "epoch": 1.4394514811241352, "grad_norm": 0.3311896456985306, "learning_rate": 7.359581766859867e-06, "loss": 0.4603, "step": 8765 }, { "epoch": 1.4396157083324779, "grad_norm": 0.359044957795356, "learning_rate": 7.359219643059727e-06, "loss": 0.4447, "step": 8766 }, { "epoch": 1.4397799355408207, "grad_norm": 0.26938397873450154, "learning_rate": 7.3588574882056485e-06, "loss": 0.4674, "step": 8767 }, { "epoch": 1.4399441627491636, "grad_norm": 0.4728328295191253, "learning_rate": 7.358495302301566e-06, "loss": 0.4584, "step": 8768 }, { "epoch": 1.4401083899575062, "grad_norm": 0.28764939085722807, "learning_rate": 7.35813308535141e-06, "loss": 0.5044, "step": 8769 }, { "epoch": 1.4402726171658489, "grad_norm": 0.64332414790041, "learning_rate": 7.357770837359119e-06, "loss": 0.4699, "step": 8770 }, { "epoch": 1.4404368443741917, "grad_norm": 0.26700818401796017, "learning_rate": 7.357408558328623e-06, "loss": 0.4667, "step": 8771 }, { "epoch": 1.4406010715825344, "grad_norm": 0.3857951942674286, "learning_rate": 7.35704624826386e-06, "loss": 0.4705, "step": 8772 }, { "epoch": 1.4407652987908772, "grad_norm": 0.2941631079784591, "learning_rate": 7.356683907168762e-06, "loss": 0.4678, "step": 8773 }, { "epoch": 1.4409295259992199, "grad_norm": 0.35313285672785943, "learning_rate": 7.356321535047269e-06, "loss": 0.4609, "step": 8774 }, { "epoch": 1.4410937532075627, "grad_norm": 0.30558355477754984, "learning_rate": 7.355959131903313e-06, "loss": 0.491, "step": 8775 }, { "epoch": 1.4412579804159054, "grad_norm": 0.30028824124065273, "learning_rate": 7.355596697740828e-06, "loss": 0.4782, "step": 8776 }, { "epoch": 1.4414222076242482, "grad_norm": 0.3974043321955806, "learning_rate": 7.355234232563758e-06, "loss": 0.466, "step": 8777 }, { "epoch": 1.4415864348325909, "grad_norm": 0.32176678659026303, "learning_rate": 7.354871736376031e-06, "loss": 0.4519, "step": 8778 }, { "epoch": 1.4417506620409335, "grad_norm": 0.32792249317240885, "learning_rate": 7.354509209181591e-06, "loss": 0.4738, "step": 8779 }, { "epoch": 1.4419148892492764, "grad_norm": 0.30658790737339253, "learning_rate": 7.354146650984372e-06, "loss": 0.462, "step": 8780 }, { "epoch": 1.4420791164576192, "grad_norm": 0.4592510866299841, "learning_rate": 7.35378406178831e-06, "loss": 0.4529, "step": 8781 }, { "epoch": 1.4422433436659619, "grad_norm": 0.2998740701505465, "learning_rate": 7.353421441597348e-06, "loss": 0.4682, "step": 8782 }, { "epoch": 1.4424075708743045, "grad_norm": 0.355911208435906, "learning_rate": 7.353058790415422e-06, "loss": 0.4655, "step": 8783 }, { "epoch": 1.4425717980826473, "grad_norm": 0.32644863293812215, "learning_rate": 7.352696108246469e-06, "loss": 0.4618, "step": 8784 }, { "epoch": 1.4427360252909902, "grad_norm": 0.2730444038859821, "learning_rate": 7.352333395094429e-06, "loss": 0.4931, "step": 8785 }, { "epoch": 1.4429002524993328, "grad_norm": 0.32896780601935, "learning_rate": 7.351970650963243e-06, "loss": 0.4527, "step": 8786 }, { "epoch": 1.4430644797076755, "grad_norm": 0.3141642761360534, "learning_rate": 7.35160787585685e-06, "loss": 0.4731, "step": 8787 }, { "epoch": 1.4432287069160183, "grad_norm": 0.3259242243345612, "learning_rate": 7.35124506977919e-06, "loss": 0.472, "step": 8788 }, { "epoch": 1.443392934124361, "grad_norm": 0.28421921594410116, "learning_rate": 7.350882232734202e-06, "loss": 0.4423, "step": 8789 }, { "epoch": 1.4435571613327038, "grad_norm": 0.3161737021968298, "learning_rate": 7.350519364725829e-06, "loss": 0.4716, "step": 8790 }, { "epoch": 1.4437213885410465, "grad_norm": 0.2932958787210009, "learning_rate": 7.350156465758012e-06, "loss": 0.4774, "step": 8791 }, { "epoch": 1.4438856157493893, "grad_norm": 0.6697209643610852, "learning_rate": 7.3497935358346894e-06, "loss": 0.4357, "step": 8792 }, { "epoch": 1.444049842957732, "grad_norm": 0.3264213977418509, "learning_rate": 7.349430574959807e-06, "loss": 0.4725, "step": 8793 }, { "epoch": 1.4442140701660748, "grad_norm": 0.32557575314880266, "learning_rate": 7.349067583137305e-06, "loss": 0.4615, "step": 8794 }, { "epoch": 1.4443782973744175, "grad_norm": 0.31900811247252975, "learning_rate": 7.348704560371126e-06, "loss": 0.4705, "step": 8795 }, { "epoch": 1.4445425245827601, "grad_norm": 0.35273304255974214, "learning_rate": 7.348341506665211e-06, "loss": 0.4836, "step": 8796 }, { "epoch": 1.444706751791103, "grad_norm": 0.34808125964253456, "learning_rate": 7.347978422023507e-06, "loss": 0.4652, "step": 8797 }, { "epoch": 1.4448709789994458, "grad_norm": 0.4687086087072129, "learning_rate": 7.347615306449954e-06, "loss": 0.473, "step": 8798 }, { "epoch": 1.4450352062077885, "grad_norm": 0.3125715957895599, "learning_rate": 7.347252159948498e-06, "loss": 0.4606, "step": 8799 }, { "epoch": 1.445199433416131, "grad_norm": 0.2823950209487057, "learning_rate": 7.346888982523081e-06, "loss": 0.4761, "step": 8800 }, { "epoch": 1.445363660624474, "grad_norm": 0.27396154510210274, "learning_rate": 7.3465257741776495e-06, "loss": 0.4841, "step": 8801 }, { "epoch": 1.4455278878328168, "grad_norm": 0.38488801653092736, "learning_rate": 7.346162534916148e-06, "loss": 0.4717, "step": 8802 }, { "epoch": 1.4456921150411595, "grad_norm": 0.3296881924583521, "learning_rate": 7.34579926474252e-06, "loss": 0.4636, "step": 8803 }, { "epoch": 1.445856342249502, "grad_norm": 0.4285832646198502, "learning_rate": 7.345435963660713e-06, "loss": 0.4575, "step": 8804 }, { "epoch": 1.446020569457845, "grad_norm": 0.34978061084114653, "learning_rate": 7.345072631674672e-06, "loss": 0.4777, "step": 8805 }, { "epoch": 1.4461847966661876, "grad_norm": 0.308759693755405, "learning_rate": 7.344709268788342e-06, "loss": 0.4674, "step": 8806 }, { "epoch": 1.4463490238745305, "grad_norm": 0.37776335856045, "learning_rate": 7.344345875005671e-06, "loss": 0.4734, "step": 8807 }, { "epoch": 1.446513251082873, "grad_norm": 0.30213707597068723, "learning_rate": 7.343982450330605e-06, "loss": 0.4677, "step": 8808 }, { "epoch": 1.446677478291216, "grad_norm": 0.2863664625153726, "learning_rate": 7.343618994767093e-06, "loss": 0.4757, "step": 8809 }, { "epoch": 1.4468417054995586, "grad_norm": 0.4567100127507095, "learning_rate": 7.343255508319079e-06, "loss": 0.4826, "step": 8810 }, { "epoch": 1.4470059327079015, "grad_norm": 0.3235014519664871, "learning_rate": 7.342891990990514e-06, "loss": 0.4877, "step": 8811 }, { "epoch": 1.447170159916244, "grad_norm": 0.48932063009730403, "learning_rate": 7.342528442785346e-06, "loss": 0.4597, "step": 8812 }, { "epoch": 1.4473343871245867, "grad_norm": 0.29500404335881325, "learning_rate": 7.34216486370752e-06, "loss": 0.4617, "step": 8813 }, { "epoch": 1.4474986143329296, "grad_norm": 0.32921988580938477, "learning_rate": 7.341801253760988e-06, "loss": 0.4409, "step": 8814 }, { "epoch": 1.4476628415412724, "grad_norm": 0.32024711284584884, "learning_rate": 7.341437612949699e-06, "loss": 0.459, "step": 8815 }, { "epoch": 1.447827068749615, "grad_norm": 0.2953490698527201, "learning_rate": 7.341073941277602e-06, "loss": 0.4596, "step": 8816 }, { "epoch": 1.4479912959579577, "grad_norm": 0.3474854112500576, "learning_rate": 7.340710238748646e-06, "loss": 0.4713, "step": 8817 }, { "epoch": 1.4481555231663006, "grad_norm": 0.6411897652901434, "learning_rate": 7.340346505366782e-06, "loss": 0.4792, "step": 8818 }, { "epoch": 1.4483197503746434, "grad_norm": 0.3431135250449065, "learning_rate": 7.3399827411359615e-06, "loss": 0.4734, "step": 8819 }, { "epoch": 1.448483977582986, "grad_norm": 0.3095387941404658, "learning_rate": 7.3396189460601325e-06, "loss": 0.4672, "step": 8820 }, { "epoch": 1.4486482047913287, "grad_norm": 0.29983920611558473, "learning_rate": 7.339255120143251e-06, "loss": 0.4751, "step": 8821 }, { "epoch": 1.4488124319996716, "grad_norm": 0.29211550519830737, "learning_rate": 7.338891263389263e-06, "loss": 0.4871, "step": 8822 }, { "epoch": 1.4489766592080142, "grad_norm": 0.3023119216930271, "learning_rate": 7.338527375802123e-06, "loss": 0.4751, "step": 8823 }, { "epoch": 1.449140886416357, "grad_norm": 0.2741409984560532, "learning_rate": 7.338163457385783e-06, "loss": 0.4576, "step": 8824 }, { "epoch": 1.4493051136246997, "grad_norm": 0.7815452059668843, "learning_rate": 7.337799508144196e-06, "loss": 0.477, "step": 8825 }, { "epoch": 1.4494693408330426, "grad_norm": 0.28544871860888393, "learning_rate": 7.337435528081315e-06, "loss": 0.4815, "step": 8826 }, { "epoch": 1.4496335680413852, "grad_norm": 0.3701675073844943, "learning_rate": 7.33707151720109e-06, "loss": 0.478, "step": 8827 }, { "epoch": 1.449797795249728, "grad_norm": 0.32291784357585807, "learning_rate": 7.336707475507479e-06, "loss": 0.4671, "step": 8828 }, { "epoch": 1.4499620224580707, "grad_norm": 0.2915111220168, "learning_rate": 7.336343403004434e-06, "loss": 0.4804, "step": 8829 }, { "epoch": 1.4501262496664133, "grad_norm": 0.3140881121761215, "learning_rate": 7.335979299695907e-06, "loss": 0.4604, "step": 8830 }, { "epoch": 1.4502904768747562, "grad_norm": 0.2802099567639804, "learning_rate": 7.335615165585857e-06, "loss": 0.4703, "step": 8831 }, { "epoch": 1.450454704083099, "grad_norm": 0.3050059252111744, "learning_rate": 7.335251000678235e-06, "loss": 0.4572, "step": 8832 }, { "epoch": 1.4506189312914417, "grad_norm": 0.3919127908954277, "learning_rate": 7.334886804976999e-06, "loss": 0.4758, "step": 8833 }, { "epoch": 1.4507831584997843, "grad_norm": 0.31520124295877416, "learning_rate": 7.334522578486102e-06, "loss": 0.4934, "step": 8834 }, { "epoch": 1.4509473857081272, "grad_norm": 0.302542810820108, "learning_rate": 7.334158321209502e-06, "loss": 0.4637, "step": 8835 }, { "epoch": 1.45111161291647, "grad_norm": 0.2983970505298541, "learning_rate": 7.333794033151153e-06, "loss": 0.456, "step": 8836 }, { "epoch": 1.4512758401248127, "grad_norm": 0.2992793685686754, "learning_rate": 7.333429714315014e-06, "loss": 0.4765, "step": 8837 }, { "epoch": 1.4514400673331553, "grad_norm": 0.30608183883212225, "learning_rate": 7.333065364705039e-06, "loss": 0.4544, "step": 8838 }, { "epoch": 1.4516042945414982, "grad_norm": 0.33612070244669445, "learning_rate": 7.332700984325188e-06, "loss": 0.4473, "step": 8839 }, { "epoch": 1.4517685217498408, "grad_norm": 0.36256371284524325, "learning_rate": 7.332336573179417e-06, "loss": 0.4773, "step": 8840 }, { "epoch": 1.4519327489581837, "grad_norm": 0.5163192135177029, "learning_rate": 7.331972131271683e-06, "loss": 0.4803, "step": 8841 }, { "epoch": 1.4520969761665263, "grad_norm": 0.291303436118226, "learning_rate": 7.331607658605947e-06, "loss": 0.4666, "step": 8842 }, { "epoch": 1.4522612033748692, "grad_norm": 0.30054038001077393, "learning_rate": 7.331243155186165e-06, "loss": 0.4596, "step": 8843 }, { "epoch": 1.4524254305832118, "grad_norm": 0.32552628814577383, "learning_rate": 7.330878621016298e-06, "loss": 0.4655, "step": 8844 }, { "epoch": 1.4525896577915547, "grad_norm": 0.3255490210170379, "learning_rate": 7.330514056100302e-06, "loss": 0.4728, "step": 8845 }, { "epoch": 1.4527538849998973, "grad_norm": 0.3732452570361322, "learning_rate": 7.330149460442139e-06, "loss": 0.4678, "step": 8846 }, { "epoch": 1.45291811220824, "grad_norm": 0.3403884811461902, "learning_rate": 7.329784834045769e-06, "loss": 0.473, "step": 8847 }, { "epoch": 1.4530823394165828, "grad_norm": 0.35037990887786574, "learning_rate": 7.329420176915151e-06, "loss": 0.4578, "step": 8848 }, { "epoch": 1.4532465666249257, "grad_norm": 0.33906292449742215, "learning_rate": 7.329055489054248e-06, "loss": 0.473, "step": 8849 }, { "epoch": 1.4534107938332683, "grad_norm": 0.3476995567742656, "learning_rate": 7.3286907704670175e-06, "loss": 0.489, "step": 8850 }, { "epoch": 1.453575021041611, "grad_norm": 0.3307868667516689, "learning_rate": 7.328326021157423e-06, "loss": 0.4775, "step": 8851 }, { "epoch": 1.4537392482499538, "grad_norm": 0.47335856697556894, "learning_rate": 7.327961241129423e-06, "loss": 0.471, "step": 8852 }, { "epoch": 1.4539034754582967, "grad_norm": 0.27687802887216756, "learning_rate": 7.327596430386984e-06, "loss": 0.4493, "step": 8853 }, { "epoch": 1.4540677026666393, "grad_norm": 0.3294214659206888, "learning_rate": 7.327231588934065e-06, "loss": 0.4644, "step": 8854 }, { "epoch": 1.454231929874982, "grad_norm": 0.321441907890736, "learning_rate": 7.326866716774629e-06, "loss": 0.4733, "step": 8855 }, { "epoch": 1.4543961570833248, "grad_norm": 0.3409296447226374, "learning_rate": 7.326501813912639e-06, "loss": 0.4459, "step": 8856 }, { "epoch": 1.4545603842916675, "grad_norm": 0.40546821658975873, "learning_rate": 7.326136880352058e-06, "loss": 0.4736, "step": 8857 }, { "epoch": 1.4547246115000103, "grad_norm": 0.3157113879724182, "learning_rate": 7.325771916096853e-06, "loss": 0.4581, "step": 8858 }, { "epoch": 1.454888838708353, "grad_norm": 0.43633886623309787, "learning_rate": 7.325406921150981e-06, "loss": 0.4638, "step": 8859 }, { "epoch": 1.4550530659166958, "grad_norm": 0.6933423819272897, "learning_rate": 7.3250418955184115e-06, "loss": 0.4936, "step": 8860 }, { "epoch": 1.4552172931250384, "grad_norm": 0.30856150710693425, "learning_rate": 7.324676839203108e-06, "loss": 0.4739, "step": 8861 }, { "epoch": 1.4553815203333813, "grad_norm": 0.37647065666549806, "learning_rate": 7.324311752209035e-06, "loss": 0.4774, "step": 8862 }, { "epoch": 1.455545747541724, "grad_norm": 0.31474858543251255, "learning_rate": 7.323946634540156e-06, "loss": 0.4775, "step": 8863 }, { "epoch": 1.4557099747500666, "grad_norm": 0.35550058465976125, "learning_rate": 7.32358148620044e-06, "loss": 0.475, "step": 8864 }, { "epoch": 1.4558742019584094, "grad_norm": 0.3180664131895398, "learning_rate": 7.32321630719385e-06, "loss": 0.4534, "step": 8865 }, { "epoch": 1.4560384291667523, "grad_norm": 0.4056724611982341, "learning_rate": 7.322851097524354e-06, "loss": 0.4915, "step": 8866 }, { "epoch": 1.456202656375095, "grad_norm": 0.3267146528014822, "learning_rate": 7.322485857195916e-06, "loss": 0.4957, "step": 8867 }, { "epoch": 1.4563668835834376, "grad_norm": 0.429087209244443, "learning_rate": 7.322120586212507e-06, "loss": 0.4619, "step": 8868 }, { "epoch": 1.4565311107917804, "grad_norm": 0.2921345299614119, "learning_rate": 7.32175528457809e-06, "loss": 0.4468, "step": 8869 }, { "epoch": 1.4566953380001233, "grad_norm": 0.326412519394861, "learning_rate": 7.321389952296635e-06, "loss": 0.4696, "step": 8870 }, { "epoch": 1.456859565208466, "grad_norm": 0.30064282080456756, "learning_rate": 7.32102458937211e-06, "loss": 0.4659, "step": 8871 }, { "epoch": 1.4570237924168086, "grad_norm": 0.3096831053057281, "learning_rate": 7.320659195808482e-06, "loss": 0.4621, "step": 8872 }, { "epoch": 1.4571880196251514, "grad_norm": 0.29535434369055585, "learning_rate": 7.32029377160972e-06, "loss": 0.4546, "step": 8873 }, { "epoch": 1.457352246833494, "grad_norm": 0.32787363679249254, "learning_rate": 7.319928316779792e-06, "loss": 0.4522, "step": 8874 }, { "epoch": 1.457516474041837, "grad_norm": 0.35674569817713586, "learning_rate": 7.3195628313226685e-06, "loss": 0.4668, "step": 8875 }, { "epoch": 1.4576807012501796, "grad_norm": 0.3534886154900476, "learning_rate": 7.31919731524232e-06, "loss": 0.463, "step": 8876 }, { "epoch": 1.4578449284585224, "grad_norm": 0.37215114639067504, "learning_rate": 7.318831768542713e-06, "loss": 0.4606, "step": 8877 }, { "epoch": 1.458009155666865, "grad_norm": 0.412766233405986, "learning_rate": 7.31846619122782e-06, "loss": 0.4659, "step": 8878 }, { "epoch": 1.458173382875208, "grad_norm": 0.322259459292066, "learning_rate": 7.318100583301612e-06, "loss": 0.4712, "step": 8879 }, { "epoch": 1.4583376100835506, "grad_norm": 0.38762871022432, "learning_rate": 7.31773494476806e-06, "loss": 0.4848, "step": 8880 }, { "epoch": 1.4585018372918932, "grad_norm": 0.33110964123463305, "learning_rate": 7.317369275631131e-06, "loss": 0.4534, "step": 8881 }, { "epoch": 1.458666064500236, "grad_norm": 0.29537116751045295, "learning_rate": 7.317003575894802e-06, "loss": 0.4618, "step": 8882 }, { "epoch": 1.458830291708579, "grad_norm": 0.3093608228609226, "learning_rate": 7.316637845563043e-06, "loss": 0.4649, "step": 8883 }, { "epoch": 1.4589945189169216, "grad_norm": 0.3557175296933889, "learning_rate": 7.316272084639824e-06, "loss": 0.4524, "step": 8884 }, { "epoch": 1.4591587461252642, "grad_norm": 0.3408766965625624, "learning_rate": 7.31590629312912e-06, "loss": 0.4615, "step": 8885 }, { "epoch": 1.459322973333607, "grad_norm": 0.31371414979257384, "learning_rate": 7.315540471034903e-06, "loss": 0.4605, "step": 8886 }, { "epoch": 1.45948720054195, "grad_norm": 0.3204753006979815, "learning_rate": 7.3151746183611485e-06, "loss": 0.4685, "step": 8887 }, { "epoch": 1.4596514277502926, "grad_norm": 0.2763367425927962, "learning_rate": 7.314808735111825e-06, "loss": 0.4723, "step": 8888 }, { "epoch": 1.4598156549586352, "grad_norm": 0.30501998510386186, "learning_rate": 7.314442821290911e-06, "loss": 0.4755, "step": 8889 }, { "epoch": 1.459979882166978, "grad_norm": 0.31957072046857943, "learning_rate": 7.314076876902378e-06, "loss": 0.4508, "step": 8890 }, { "epoch": 1.4601441093753207, "grad_norm": 0.4569959723681631, "learning_rate": 7.313710901950203e-06, "loss": 0.4711, "step": 8891 }, { "epoch": 1.4603083365836635, "grad_norm": 0.3275277934527127, "learning_rate": 7.3133448964383575e-06, "loss": 0.4531, "step": 8892 }, { "epoch": 1.4604725637920062, "grad_norm": 0.31450641388610223, "learning_rate": 7.312978860370818e-06, "loss": 0.4699, "step": 8893 }, { "epoch": 1.460636791000349, "grad_norm": 0.35690484276864787, "learning_rate": 7.312612793751563e-06, "loss": 0.4571, "step": 8894 }, { "epoch": 1.4608010182086917, "grad_norm": 0.32055550173392267, "learning_rate": 7.312246696584564e-06, "loss": 0.4687, "step": 8895 }, { "epoch": 1.4609652454170345, "grad_norm": 0.31626984369788685, "learning_rate": 7.311880568873799e-06, "loss": 0.4789, "step": 8896 }, { "epoch": 1.4611294726253772, "grad_norm": 0.27444977756984074, "learning_rate": 7.311514410623244e-06, "loss": 0.4762, "step": 8897 }, { "epoch": 1.4612936998337198, "grad_norm": 0.2779313804007929, "learning_rate": 7.311148221836878e-06, "loss": 0.4485, "step": 8898 }, { "epoch": 1.4614579270420627, "grad_norm": 0.32831057129001473, "learning_rate": 7.310782002518676e-06, "loss": 0.4724, "step": 8899 }, { "epoch": 1.4616221542504055, "grad_norm": 0.36700689821516946, "learning_rate": 7.310415752672616e-06, "loss": 0.4913, "step": 8900 }, { "epoch": 1.4617863814587482, "grad_norm": 0.24873549957452207, "learning_rate": 7.310049472302676e-06, "loss": 0.4701, "step": 8901 }, { "epoch": 1.4619506086670908, "grad_norm": 0.271400064217122, "learning_rate": 7.3096831614128336e-06, "loss": 0.4849, "step": 8902 }, { "epoch": 1.4621148358754337, "grad_norm": 0.30503298928358774, "learning_rate": 7.309316820007066e-06, "loss": 0.4793, "step": 8903 }, { "epoch": 1.4622790630837765, "grad_norm": 0.32951277679543467, "learning_rate": 7.3089504480893565e-06, "loss": 0.4634, "step": 8904 }, { "epoch": 1.4624432902921192, "grad_norm": 0.3415409673036727, "learning_rate": 7.30858404566368e-06, "loss": 0.456, "step": 8905 }, { "epoch": 1.4626075175004618, "grad_norm": 0.2969992645784687, "learning_rate": 7.308217612734019e-06, "loss": 0.4706, "step": 8906 }, { "epoch": 1.4627717447088047, "grad_norm": 0.28013964963023535, "learning_rate": 7.30785114930435e-06, "loss": 0.4544, "step": 8907 }, { "epoch": 1.4629359719171473, "grad_norm": 0.32904919821824175, "learning_rate": 7.307484655378656e-06, "loss": 0.4442, "step": 8908 }, { "epoch": 1.4631001991254902, "grad_norm": 0.4378861663903591, "learning_rate": 7.307118130960917e-06, "loss": 0.4411, "step": 8909 }, { "epoch": 1.4632644263338328, "grad_norm": 0.2887836195173391, "learning_rate": 7.306751576055111e-06, "loss": 0.4565, "step": 8910 }, { "epoch": 1.4634286535421757, "grad_norm": 0.3921748038982994, "learning_rate": 7.306384990665223e-06, "loss": 0.4707, "step": 8911 }, { "epoch": 1.4635928807505183, "grad_norm": 0.33692717526667476, "learning_rate": 7.306018374795234e-06, "loss": 0.4521, "step": 8912 }, { "epoch": 1.4637571079588612, "grad_norm": 0.3331839944553744, "learning_rate": 7.305651728449123e-06, "loss": 0.4676, "step": 8913 }, { "epoch": 1.4639213351672038, "grad_norm": 0.4512463545321785, "learning_rate": 7.305285051630875e-06, "loss": 0.4694, "step": 8914 }, { "epoch": 1.4640855623755464, "grad_norm": 0.3429881607623171, "learning_rate": 7.3049183443444694e-06, "loss": 0.4444, "step": 8915 }, { "epoch": 1.4642497895838893, "grad_norm": 0.2916652689698767, "learning_rate": 7.304551606593892e-06, "loss": 0.4534, "step": 8916 }, { "epoch": 1.4644140167922322, "grad_norm": 0.3177553803478125, "learning_rate": 7.3041848383831254e-06, "loss": 0.4787, "step": 8917 }, { "epoch": 1.4645782440005748, "grad_norm": 0.2936657930249479, "learning_rate": 7.303818039716152e-06, "loss": 0.4576, "step": 8918 }, { "epoch": 1.4647424712089174, "grad_norm": 0.3535099457184482, "learning_rate": 7.303451210596957e-06, "loss": 0.4742, "step": 8919 }, { "epoch": 1.4649066984172603, "grad_norm": 0.36301749310474385, "learning_rate": 7.303084351029522e-06, "loss": 0.4771, "step": 8920 }, { "epoch": 1.4650709256256031, "grad_norm": 0.27236820017666563, "learning_rate": 7.302717461017834e-06, "loss": 0.476, "step": 8921 }, { "epoch": 1.4652351528339458, "grad_norm": 0.31864032094301475, "learning_rate": 7.302350540565877e-06, "loss": 0.4626, "step": 8922 }, { "epoch": 1.4653993800422884, "grad_norm": 0.44159276047496465, "learning_rate": 7.301983589677637e-06, "loss": 0.4661, "step": 8923 }, { "epoch": 1.4655636072506313, "grad_norm": 0.6163261811363203, "learning_rate": 7.301616608357096e-06, "loss": 0.4687, "step": 8924 }, { "epoch": 1.465727834458974, "grad_norm": 0.6132487217057926, "learning_rate": 7.301249596608244e-06, "loss": 0.4746, "step": 8925 }, { "epoch": 1.4658920616673168, "grad_norm": 0.30355394160206417, "learning_rate": 7.300882554435065e-06, "loss": 0.4899, "step": 8926 }, { "epoch": 1.4660562888756594, "grad_norm": 0.3028776881337748, "learning_rate": 7.3005154818415446e-06, "loss": 0.4624, "step": 8927 }, { "epoch": 1.4662205160840023, "grad_norm": 0.3490294656355712, "learning_rate": 7.300148378831672e-06, "loss": 0.4762, "step": 8928 }, { "epoch": 1.466384743292345, "grad_norm": 0.38672288617898776, "learning_rate": 7.2997812454094325e-06, "loss": 0.4563, "step": 8929 }, { "epoch": 1.4665489705006878, "grad_norm": 0.3460975897825717, "learning_rate": 7.2994140815788146e-06, "loss": 0.4558, "step": 8930 }, { "epoch": 1.4667131977090304, "grad_norm": 0.3817812381648602, "learning_rate": 7.299046887343805e-06, "loss": 0.4676, "step": 8931 }, { "epoch": 1.466877424917373, "grad_norm": 0.29091502397352326, "learning_rate": 7.298679662708392e-06, "loss": 0.4535, "step": 8932 }, { "epoch": 1.467041652125716, "grad_norm": 0.3720886033235574, "learning_rate": 7.298312407676565e-06, "loss": 0.4719, "step": 8933 }, { "epoch": 1.4672058793340588, "grad_norm": 0.2982734402976886, "learning_rate": 7.297945122252312e-06, "loss": 0.4642, "step": 8934 }, { "epoch": 1.4673701065424014, "grad_norm": 0.2767453925146361, "learning_rate": 7.297577806439622e-06, "loss": 0.4618, "step": 8935 }, { "epoch": 1.467534333750744, "grad_norm": 0.36345402905506835, "learning_rate": 7.297210460242484e-06, "loss": 0.4714, "step": 8936 }, { "epoch": 1.467698560959087, "grad_norm": 0.32274055597623746, "learning_rate": 7.2968430836648885e-06, "loss": 0.4605, "step": 8937 }, { "epoch": 1.4678627881674298, "grad_norm": 0.2545913876862383, "learning_rate": 7.2964756767108265e-06, "loss": 0.4743, "step": 8938 }, { "epoch": 1.4680270153757724, "grad_norm": 0.3158754146163357, "learning_rate": 7.296108239384287e-06, "loss": 0.4785, "step": 8939 }, { "epoch": 1.468191242584115, "grad_norm": 0.2791997204542906, "learning_rate": 7.2957407716892604e-06, "loss": 0.4799, "step": 8940 }, { "epoch": 1.468355469792458, "grad_norm": 0.26346926896439843, "learning_rate": 7.295373273629739e-06, "loss": 0.481, "step": 8941 }, { "epoch": 1.4685196970008005, "grad_norm": 0.27357834356219624, "learning_rate": 7.295005745209713e-06, "loss": 0.446, "step": 8942 }, { "epoch": 1.4686839242091434, "grad_norm": 0.2852558176896654, "learning_rate": 7.294638186433175e-06, "loss": 0.4663, "step": 8943 }, { "epoch": 1.468848151417486, "grad_norm": 0.3705262899315953, "learning_rate": 7.294270597304117e-06, "loss": 0.4699, "step": 8944 }, { "epoch": 1.469012378625829, "grad_norm": 0.31750281046154766, "learning_rate": 7.2939029778265295e-06, "loss": 0.4582, "step": 8945 }, { "epoch": 1.4691766058341715, "grad_norm": 0.3038648764501812, "learning_rate": 7.29353532800441e-06, "loss": 0.4509, "step": 8946 }, { "epoch": 1.4693408330425144, "grad_norm": 0.29723681969502824, "learning_rate": 7.293167647841745e-06, "loss": 0.472, "step": 8947 }, { "epoch": 1.469505060250857, "grad_norm": 0.3876434334700834, "learning_rate": 7.292799937342534e-06, "loss": 0.4829, "step": 8948 }, { "epoch": 1.4696692874591997, "grad_norm": 0.2695956978075607, "learning_rate": 7.292432196510766e-06, "loss": 0.449, "step": 8949 }, { "epoch": 1.4698335146675425, "grad_norm": 0.2974485527980798, "learning_rate": 7.292064425350438e-06, "loss": 0.4747, "step": 8950 }, { "epoch": 1.4699977418758854, "grad_norm": 0.2859895877854272, "learning_rate": 7.2916966238655424e-06, "loss": 0.448, "step": 8951 }, { "epoch": 1.470161969084228, "grad_norm": 0.31309097836143424, "learning_rate": 7.291328792060075e-06, "loss": 0.4844, "step": 8952 }, { "epoch": 1.4703261962925707, "grad_norm": 0.31950330577085617, "learning_rate": 7.290960929938032e-06, "loss": 0.4677, "step": 8953 }, { "epoch": 1.4704904235009135, "grad_norm": 0.356031776293735, "learning_rate": 7.290593037503405e-06, "loss": 0.4631, "step": 8954 }, { "epoch": 1.4706546507092564, "grad_norm": 0.3892179659188917, "learning_rate": 7.290225114760195e-06, "loss": 0.4788, "step": 8955 }, { "epoch": 1.470818877917599, "grad_norm": 0.3088060110020469, "learning_rate": 7.289857161712393e-06, "loss": 0.4773, "step": 8956 }, { "epoch": 1.4709831051259417, "grad_norm": 0.34766766878995486, "learning_rate": 7.289489178363997e-06, "loss": 0.4641, "step": 8957 }, { "epoch": 1.4711473323342845, "grad_norm": 0.987689661044155, "learning_rate": 7.289121164719006e-06, "loss": 0.4616, "step": 8958 }, { "epoch": 1.4713115595426272, "grad_norm": 0.37323779225069614, "learning_rate": 7.288753120781414e-06, "loss": 0.4725, "step": 8959 }, { "epoch": 1.47147578675097, "grad_norm": 0.2999592106766716, "learning_rate": 7.288385046555218e-06, "loss": 0.4874, "step": 8960 }, { "epoch": 1.4716400139593127, "grad_norm": 0.4071471939682784, "learning_rate": 7.288016942044418e-06, "loss": 0.476, "step": 8961 }, { "epoch": 1.4718042411676555, "grad_norm": 0.4291504258791555, "learning_rate": 7.287648807253012e-06, "loss": 0.4571, "step": 8962 }, { "epoch": 1.4719684683759982, "grad_norm": 0.3502198595250842, "learning_rate": 7.287280642184996e-06, "loss": 0.4828, "step": 8963 }, { "epoch": 1.472132695584341, "grad_norm": 0.375414310412938, "learning_rate": 7.286912446844369e-06, "loss": 0.4879, "step": 8964 }, { "epoch": 1.4722969227926836, "grad_norm": 0.26277910260284426, "learning_rate": 7.286544221235134e-06, "loss": 0.4556, "step": 8965 }, { "epoch": 1.4724611500010263, "grad_norm": 0.31490149171126364, "learning_rate": 7.286175965361285e-06, "loss": 0.4638, "step": 8966 }, { "epoch": 1.4726253772093691, "grad_norm": 0.2892208531501333, "learning_rate": 7.285807679226825e-06, "loss": 0.4601, "step": 8967 }, { "epoch": 1.472789604417712, "grad_norm": 0.2918932647093718, "learning_rate": 7.285439362835751e-06, "loss": 0.4796, "step": 8968 }, { "epoch": 1.4729538316260546, "grad_norm": 0.2945099295955126, "learning_rate": 7.285071016192067e-06, "loss": 0.4636, "step": 8969 }, { "epoch": 1.4731180588343973, "grad_norm": 0.28597820917530037, "learning_rate": 7.28470263929977e-06, "loss": 0.4905, "step": 8970 }, { "epoch": 1.4732822860427401, "grad_norm": 0.4109484557655828, "learning_rate": 7.284334232162864e-06, "loss": 0.4654, "step": 8971 }, { "epoch": 1.473446513251083, "grad_norm": 0.2621363643613177, "learning_rate": 7.283965794785346e-06, "loss": 0.4665, "step": 8972 }, { "epoch": 1.4736107404594256, "grad_norm": 0.3483351477212068, "learning_rate": 7.283597327171223e-06, "loss": 0.4772, "step": 8973 }, { "epoch": 1.4737749676677683, "grad_norm": 0.3508989330249181, "learning_rate": 7.2832288293244935e-06, "loss": 0.4476, "step": 8974 }, { "epoch": 1.4739391948761111, "grad_norm": 0.44165630426238167, "learning_rate": 7.282860301249162e-06, "loss": 0.4698, "step": 8975 }, { "epoch": 1.4741034220844538, "grad_norm": 0.3086075564657962, "learning_rate": 7.2824917429492275e-06, "loss": 0.4756, "step": 8976 }, { "epoch": 1.4742676492927966, "grad_norm": 0.5470619925050496, "learning_rate": 7.282123154428696e-06, "loss": 0.4881, "step": 8977 }, { "epoch": 1.4744318765011393, "grad_norm": 0.28503437164757617, "learning_rate": 7.28175453569157e-06, "loss": 0.4909, "step": 8978 }, { "epoch": 1.4745961037094821, "grad_norm": 0.3183139987359893, "learning_rate": 7.281385886741852e-06, "loss": 0.4508, "step": 8979 }, { "epoch": 1.4747603309178248, "grad_norm": 0.30594851299796155, "learning_rate": 7.281017207583548e-06, "loss": 0.4369, "step": 8980 }, { "epoch": 1.4749245581261676, "grad_norm": 0.32208508578505773, "learning_rate": 7.28064849822066e-06, "loss": 0.471, "step": 8981 }, { "epoch": 1.4750887853345103, "grad_norm": 0.35597240221197657, "learning_rate": 7.280279758657194e-06, "loss": 0.4673, "step": 8982 }, { "epoch": 1.475253012542853, "grad_norm": 0.30726013389705537, "learning_rate": 7.2799109888971544e-06, "loss": 0.4376, "step": 8983 }, { "epoch": 1.4754172397511958, "grad_norm": 0.3061948703740364, "learning_rate": 7.279542188944548e-06, "loss": 0.4453, "step": 8984 }, { "epoch": 1.4755814669595386, "grad_norm": 0.3801963399207698, "learning_rate": 7.279173358803376e-06, "loss": 0.4601, "step": 8985 }, { "epoch": 1.4757456941678813, "grad_norm": 0.30531711719679955, "learning_rate": 7.2788044984776475e-06, "loss": 0.4602, "step": 8986 }, { "epoch": 1.475909921376224, "grad_norm": 0.26931105089669216, "learning_rate": 7.2784356079713695e-06, "loss": 0.458, "step": 8987 }, { "epoch": 1.4760741485845668, "grad_norm": 0.3842988883695888, "learning_rate": 7.278066687288547e-06, "loss": 0.4869, "step": 8988 }, { "epoch": 1.4762383757929096, "grad_norm": 0.28218722853074585, "learning_rate": 7.277697736433186e-06, "loss": 0.4642, "step": 8989 }, { "epoch": 1.4764026030012523, "grad_norm": 0.2897705573643494, "learning_rate": 7.277328755409295e-06, "loss": 0.4625, "step": 8990 }, { "epoch": 1.476566830209595, "grad_norm": 0.27586866292250783, "learning_rate": 7.276959744220881e-06, "loss": 0.4664, "step": 8991 }, { "epoch": 1.4767310574179378, "grad_norm": 0.2827774853066872, "learning_rate": 7.276590702871954e-06, "loss": 0.4744, "step": 8992 }, { "epoch": 1.4768952846262804, "grad_norm": 0.32702311421279057, "learning_rate": 7.276221631366516e-06, "loss": 0.475, "step": 8993 }, { "epoch": 1.4770595118346233, "grad_norm": 0.2617829134294582, "learning_rate": 7.275852529708582e-06, "loss": 0.4705, "step": 8994 }, { "epoch": 1.4772237390429659, "grad_norm": 0.47054078391175314, "learning_rate": 7.275483397902159e-06, "loss": 0.4546, "step": 8995 }, { "epoch": 1.4773879662513087, "grad_norm": 0.28021517153771475, "learning_rate": 7.275114235951256e-06, "loss": 0.4753, "step": 8996 }, { "epoch": 1.4775521934596514, "grad_norm": 0.26960945938330666, "learning_rate": 7.274745043859882e-06, "loss": 0.4722, "step": 8997 }, { "epoch": 1.4777164206679942, "grad_norm": 0.34790989377611464, "learning_rate": 7.274375821632045e-06, "loss": 0.4666, "step": 8998 }, { "epoch": 1.4778806478763369, "grad_norm": 0.2692479263324211, "learning_rate": 7.274006569271758e-06, "loss": 0.4628, "step": 8999 }, { "epoch": 1.4780448750846795, "grad_norm": 0.429200616981824, "learning_rate": 7.273637286783031e-06, "loss": 0.4727, "step": 9000 }, { "epoch": 1.4782091022930224, "grad_norm": 0.27891585980617317, "learning_rate": 7.273267974169874e-06, "loss": 0.4539, "step": 9001 }, { "epoch": 1.4783733295013652, "grad_norm": 0.3124890295725687, "learning_rate": 7.272898631436298e-06, "loss": 0.4667, "step": 9002 }, { "epoch": 1.4785375567097079, "grad_norm": 0.3246369076421457, "learning_rate": 7.272529258586314e-06, "loss": 0.4608, "step": 9003 }, { "epoch": 1.4787017839180505, "grad_norm": 0.4909357585608995, "learning_rate": 7.272159855623936e-06, "loss": 0.4267, "step": 9004 }, { "epoch": 1.4788660111263934, "grad_norm": 0.2962438853516577, "learning_rate": 7.271790422553172e-06, "loss": 0.4808, "step": 9005 }, { "epoch": 1.4790302383347362, "grad_norm": 0.3504770230621263, "learning_rate": 7.27142095937804e-06, "loss": 0.4711, "step": 9006 }, { "epoch": 1.4791944655430789, "grad_norm": 0.2911014714288107, "learning_rate": 7.271051466102547e-06, "loss": 0.4596, "step": 9007 }, { "epoch": 1.4793586927514215, "grad_norm": 0.31010532252304307, "learning_rate": 7.27068194273071e-06, "loss": 0.4781, "step": 9008 }, { "epoch": 1.4795229199597644, "grad_norm": 0.286613279240997, "learning_rate": 7.270312389266542e-06, "loss": 0.4617, "step": 9009 }, { "epoch": 1.479687147168107, "grad_norm": 0.3897309711832461, "learning_rate": 7.2699428057140545e-06, "loss": 0.4781, "step": 9010 }, { "epoch": 1.4798513743764499, "grad_norm": 0.37326616468971, "learning_rate": 7.269573192077263e-06, "loss": 0.4661, "step": 9011 }, { "epoch": 1.4800156015847925, "grad_norm": 0.2772497866687941, "learning_rate": 7.269203548360182e-06, "loss": 0.4527, "step": 9012 }, { "epoch": 1.4801798287931354, "grad_norm": 0.2775398257236335, "learning_rate": 7.2688338745668264e-06, "loss": 0.4574, "step": 9013 }, { "epoch": 1.480344056001478, "grad_norm": 0.365805191968904, "learning_rate": 7.26846417070121e-06, "loss": 0.4674, "step": 9014 }, { "epoch": 1.4805082832098209, "grad_norm": 0.2995131322182461, "learning_rate": 7.26809443676735e-06, "loss": 0.4625, "step": 9015 }, { "epoch": 1.4806725104181635, "grad_norm": 0.2920748910867749, "learning_rate": 7.2677246727692605e-06, "loss": 0.449, "step": 9016 }, { "epoch": 1.4808367376265061, "grad_norm": 0.3134085998958177, "learning_rate": 7.267354878710957e-06, "loss": 0.4752, "step": 9017 }, { "epoch": 1.481000964834849, "grad_norm": 0.2910762798061273, "learning_rate": 7.266985054596457e-06, "loss": 0.4668, "step": 9018 }, { "epoch": 1.4811651920431919, "grad_norm": 0.3620826935634441, "learning_rate": 7.266615200429778e-06, "loss": 0.4694, "step": 9019 }, { "epoch": 1.4813294192515345, "grad_norm": 0.3239612400513165, "learning_rate": 7.266245316214935e-06, "loss": 0.4685, "step": 9020 }, { "epoch": 1.4814936464598771, "grad_norm": 0.512868777403178, "learning_rate": 7.265875401955947e-06, "loss": 0.4471, "step": 9021 }, { "epoch": 1.48165787366822, "grad_norm": 0.35965132682584766, "learning_rate": 7.265505457656831e-06, "loss": 0.475, "step": 9022 }, { "epoch": 1.4818221008765629, "grad_norm": 0.3268419705604959, "learning_rate": 7.265135483321604e-06, "loss": 0.4804, "step": 9023 }, { "epoch": 1.4819863280849055, "grad_norm": 0.3992553047998647, "learning_rate": 7.264765478954286e-06, "loss": 0.449, "step": 9024 }, { "epoch": 1.4821505552932481, "grad_norm": 0.2918082085432597, "learning_rate": 7.264395444558895e-06, "loss": 0.4584, "step": 9025 }, { "epoch": 1.482314782501591, "grad_norm": 0.270882114076403, "learning_rate": 7.264025380139448e-06, "loss": 0.4664, "step": 9026 }, { "epoch": 1.4824790097099336, "grad_norm": 0.3532222289683168, "learning_rate": 7.263655285699966e-06, "loss": 0.4719, "step": 9027 }, { "epoch": 1.4826432369182765, "grad_norm": 0.2849963487764551, "learning_rate": 7.263285161244469e-06, "loss": 0.4576, "step": 9028 }, { "epoch": 1.4828074641266191, "grad_norm": 0.3139206838920788, "learning_rate": 7.262915006776978e-06, "loss": 0.4721, "step": 9029 }, { "epoch": 1.482971691334962, "grad_norm": 0.48344010886321853, "learning_rate": 7.26254482230151e-06, "loss": 0.4539, "step": 9030 }, { "epoch": 1.4831359185433046, "grad_norm": 0.3287835530250805, "learning_rate": 7.262174607822088e-06, "loss": 0.4511, "step": 9031 }, { "epoch": 1.4833001457516475, "grad_norm": 0.2750891894079266, "learning_rate": 7.261804363342731e-06, "loss": 0.4718, "step": 9032 }, { "epoch": 1.4834643729599901, "grad_norm": 0.3425506478423419, "learning_rate": 7.261434088867463e-06, "loss": 0.4768, "step": 9033 }, { "epoch": 1.4836286001683328, "grad_norm": 0.298876986879865, "learning_rate": 7.261063784400304e-06, "loss": 0.4703, "step": 9034 }, { "epoch": 1.4837928273766756, "grad_norm": 0.34455142421650525, "learning_rate": 7.260693449945274e-06, "loss": 0.478, "step": 9035 }, { "epoch": 1.4839570545850185, "grad_norm": 0.34054511742199567, "learning_rate": 7.260323085506398e-06, "loss": 0.4653, "step": 9036 }, { "epoch": 1.4841212817933611, "grad_norm": 0.2849754511923055, "learning_rate": 7.259952691087697e-06, "loss": 0.4581, "step": 9037 }, { "epoch": 1.4842855090017038, "grad_norm": 0.31908669245109694, "learning_rate": 7.259582266693196e-06, "loss": 0.4709, "step": 9038 }, { "epoch": 1.4844497362100466, "grad_norm": 0.261760821048014, "learning_rate": 7.259211812326916e-06, "loss": 0.4595, "step": 9039 }, { "epoch": 1.4846139634183895, "grad_norm": 0.5699588523422938, "learning_rate": 7.258841327992879e-06, "loss": 0.4435, "step": 9040 }, { "epoch": 1.484778190626732, "grad_norm": 0.3436625253191333, "learning_rate": 7.258470813695112e-06, "loss": 0.49, "step": 9041 }, { "epoch": 1.4849424178350747, "grad_norm": 0.29229791987285086, "learning_rate": 7.258100269437637e-06, "loss": 0.448, "step": 9042 }, { "epoch": 1.4851066450434176, "grad_norm": 0.2696646005456477, "learning_rate": 7.257729695224482e-06, "loss": 0.4741, "step": 9043 }, { "epoch": 1.4852708722517602, "grad_norm": 0.5564414897893172, "learning_rate": 7.257359091059668e-06, "loss": 0.4651, "step": 9044 }, { "epoch": 1.485435099460103, "grad_norm": 0.37273520474470473, "learning_rate": 7.256988456947221e-06, "loss": 0.4658, "step": 9045 }, { "epoch": 1.4855993266684457, "grad_norm": 0.26984808619858086, "learning_rate": 7.256617792891168e-06, "loss": 0.4763, "step": 9046 }, { "epoch": 1.4857635538767886, "grad_norm": 0.30726132225802555, "learning_rate": 7.256247098895533e-06, "loss": 0.4564, "step": 9047 }, { "epoch": 1.4859277810851312, "grad_norm": 0.2964204921364908, "learning_rate": 7.255876374964341e-06, "loss": 0.449, "step": 9048 }, { "epoch": 1.486092008293474, "grad_norm": 0.34428474234477524, "learning_rate": 7.255505621101623e-06, "loss": 0.4738, "step": 9049 }, { "epoch": 1.4862562355018167, "grad_norm": 0.3100524445547254, "learning_rate": 7.255134837311402e-06, "loss": 0.4513, "step": 9050 }, { "epoch": 1.4864204627101594, "grad_norm": 0.2968065640191466, "learning_rate": 7.254764023597705e-06, "loss": 0.4488, "step": 9051 }, { "epoch": 1.4865846899185022, "grad_norm": 0.42991865968143095, "learning_rate": 7.254393179964561e-06, "loss": 0.4656, "step": 9052 }, { "epoch": 1.486748917126845, "grad_norm": 0.2840509670038126, "learning_rate": 7.254022306415996e-06, "loss": 0.4576, "step": 9053 }, { "epoch": 1.4869131443351877, "grad_norm": 0.27885545284201946, "learning_rate": 7.25365140295604e-06, "loss": 0.4566, "step": 9054 }, { "epoch": 1.4870773715435304, "grad_norm": 0.2880230279300889, "learning_rate": 7.253280469588722e-06, "loss": 0.467, "step": 9055 }, { "epoch": 1.4872415987518732, "grad_norm": 0.2713198311101982, "learning_rate": 7.252909506318067e-06, "loss": 0.4737, "step": 9056 }, { "epoch": 1.487405825960216, "grad_norm": 0.29087205139831046, "learning_rate": 7.252538513148108e-06, "loss": 0.457, "step": 9057 }, { "epoch": 1.4875700531685587, "grad_norm": 0.3218582414140392, "learning_rate": 7.2521674900828705e-06, "loss": 0.4679, "step": 9058 }, { "epoch": 1.4877342803769014, "grad_norm": 0.3022988004594291, "learning_rate": 7.251796437126388e-06, "loss": 0.4762, "step": 9059 }, { "epoch": 1.4878985075852442, "grad_norm": 0.3237420410141972, "learning_rate": 7.251425354282689e-06, "loss": 0.4806, "step": 9060 }, { "epoch": 1.4880627347935869, "grad_norm": 0.40148076135141486, "learning_rate": 7.251054241555803e-06, "loss": 0.4479, "step": 9061 }, { "epoch": 1.4882269620019297, "grad_norm": 0.3021622550741741, "learning_rate": 7.250683098949761e-06, "loss": 0.4489, "step": 9062 }, { "epoch": 1.4883911892102724, "grad_norm": 0.2737397256477465, "learning_rate": 7.250311926468595e-06, "loss": 0.4622, "step": 9063 }, { "epoch": 1.4885554164186152, "grad_norm": 0.2838829341107318, "learning_rate": 7.249940724116335e-06, "loss": 0.4788, "step": 9064 }, { "epoch": 1.4887196436269579, "grad_norm": 0.2900387237367032, "learning_rate": 7.2495694918970125e-06, "loss": 0.5016, "step": 9065 }, { "epoch": 1.4888838708353007, "grad_norm": 0.32723225864852523, "learning_rate": 7.249198229814661e-06, "loss": 0.4635, "step": 9066 }, { "epoch": 1.4890480980436434, "grad_norm": 0.2929153755364235, "learning_rate": 7.248826937873313e-06, "loss": 0.4627, "step": 9067 }, { "epoch": 1.489212325251986, "grad_norm": 0.29032930548652564, "learning_rate": 7.248455616076998e-06, "loss": 0.4736, "step": 9068 }, { "epoch": 1.4893765524603289, "grad_norm": 0.35280273211851365, "learning_rate": 7.248084264429751e-06, "loss": 0.473, "step": 9069 }, { "epoch": 1.4895407796686717, "grad_norm": 0.46850827088761615, "learning_rate": 7.2477128829356055e-06, "loss": 0.4495, "step": 9070 }, { "epoch": 1.4897050068770143, "grad_norm": 0.3028158132164672, "learning_rate": 7.247341471598596e-06, "loss": 0.4797, "step": 9071 }, { "epoch": 1.489869234085357, "grad_norm": 0.3222872181362865, "learning_rate": 7.2469700304227535e-06, "loss": 0.4687, "step": 9072 }, { "epoch": 1.4900334612936998, "grad_norm": 0.3353242450473158, "learning_rate": 7.246598559412115e-06, "loss": 0.4442, "step": 9073 }, { "epoch": 1.4901976885020427, "grad_norm": 0.3147282673915163, "learning_rate": 7.246227058570714e-06, "loss": 0.4727, "step": 9074 }, { "epoch": 1.4903619157103853, "grad_norm": 0.26213956343160233, "learning_rate": 7.2458555279025836e-06, "loss": 0.4803, "step": 9075 }, { "epoch": 1.490526142918728, "grad_norm": 0.43566408018354824, "learning_rate": 7.245483967411762e-06, "loss": 0.4805, "step": 9076 }, { "epoch": 1.4906903701270708, "grad_norm": 0.49105090744394064, "learning_rate": 7.2451123771022816e-06, "loss": 0.4697, "step": 9077 }, { "epoch": 1.4908545973354135, "grad_norm": 0.5978965467093454, "learning_rate": 7.244740756978181e-06, "loss": 0.4664, "step": 9078 }, { "epoch": 1.4910188245437563, "grad_norm": 0.34767763702904925, "learning_rate": 7.2443691070434955e-06, "loss": 0.4618, "step": 9079 }, { "epoch": 1.491183051752099, "grad_norm": 0.2835559414850179, "learning_rate": 7.2439974273022625e-06, "loss": 0.4952, "step": 9080 }, { "epoch": 1.4913472789604418, "grad_norm": 0.36899619802931427, "learning_rate": 7.243625717758516e-06, "loss": 0.4588, "step": 9081 }, { "epoch": 1.4915115061687845, "grad_norm": 0.31211856962328216, "learning_rate": 7.243253978416294e-06, "loss": 0.4719, "step": 9082 }, { "epoch": 1.4916757333771273, "grad_norm": 0.2795658714787976, "learning_rate": 7.242882209279637e-06, "loss": 0.4635, "step": 9083 }, { "epoch": 1.49183996058547, "grad_norm": 0.25916023604519495, "learning_rate": 7.242510410352581e-06, "loss": 0.4682, "step": 9084 }, { "epoch": 1.4920041877938126, "grad_norm": 0.3129340495290174, "learning_rate": 7.242138581639162e-06, "loss": 0.4615, "step": 9085 }, { "epoch": 1.4921684150021555, "grad_norm": 0.36526210508797624, "learning_rate": 7.24176672314342e-06, "loss": 0.4804, "step": 9086 }, { "epoch": 1.4923326422104983, "grad_norm": 0.37940069172741353, "learning_rate": 7.241394834869395e-06, "loss": 0.444, "step": 9087 }, { "epoch": 1.492496869418841, "grad_norm": 0.36743810559285073, "learning_rate": 7.241022916821124e-06, "loss": 0.4617, "step": 9088 }, { "epoch": 1.4926610966271836, "grad_norm": 0.3399224851236104, "learning_rate": 7.240650969002647e-06, "loss": 0.4767, "step": 9089 }, { "epoch": 1.4928253238355265, "grad_norm": 0.3568026906661504, "learning_rate": 7.2402789914180045e-06, "loss": 0.4583, "step": 9090 }, { "epoch": 1.4929895510438693, "grad_norm": 0.47928165555028224, "learning_rate": 7.239906984071238e-06, "loss": 0.4717, "step": 9091 }, { "epoch": 1.493153778252212, "grad_norm": 0.3438429100691715, "learning_rate": 7.239534946966384e-06, "loss": 0.4975, "step": 9092 }, { "epoch": 1.4933180054605546, "grad_norm": 0.37410444420126543, "learning_rate": 7.239162880107485e-06, "loss": 0.4614, "step": 9093 }, { "epoch": 1.4934822326688975, "grad_norm": 0.3130268740405012, "learning_rate": 7.238790783498583e-06, "loss": 0.4601, "step": 9094 }, { "epoch": 1.49364645987724, "grad_norm": 0.34707548690935847, "learning_rate": 7.238418657143716e-06, "loss": 0.4751, "step": 9095 }, { "epoch": 1.493810687085583, "grad_norm": 0.2975015764973547, "learning_rate": 7.2380465010469316e-06, "loss": 0.4566, "step": 9096 }, { "epoch": 1.4939749142939256, "grad_norm": 0.2923288441893135, "learning_rate": 7.237674315212267e-06, "loss": 0.4669, "step": 9097 }, { "epoch": 1.4941391415022685, "grad_norm": 0.31449192041777363, "learning_rate": 7.237302099643766e-06, "loss": 0.4665, "step": 9098 }, { "epoch": 1.494303368710611, "grad_norm": 0.4705678262595334, "learning_rate": 7.23692985434547e-06, "loss": 0.4765, "step": 9099 }, { "epoch": 1.494467595918954, "grad_norm": 0.3617709294721401, "learning_rate": 7.236557579321424e-06, "loss": 0.4689, "step": 9100 }, { "epoch": 1.4946318231272966, "grad_norm": 0.3301324253026518, "learning_rate": 7.23618527457567e-06, "loss": 0.442, "step": 9101 }, { "epoch": 1.4947960503356392, "grad_norm": 0.658918156487996, "learning_rate": 7.235812940112252e-06, "loss": 0.4592, "step": 9102 }, { "epoch": 1.494960277543982, "grad_norm": 1.0723152036270371, "learning_rate": 7.235440575935215e-06, "loss": 0.472, "step": 9103 }, { "epoch": 1.495124504752325, "grad_norm": 0.32671311937006464, "learning_rate": 7.235068182048599e-06, "loss": 0.4698, "step": 9104 }, { "epoch": 1.4952887319606676, "grad_norm": 0.28952442856594846, "learning_rate": 7.234695758456454e-06, "loss": 0.4607, "step": 9105 }, { "epoch": 1.4954529591690102, "grad_norm": 0.5227314578459401, "learning_rate": 7.234323305162822e-06, "loss": 0.4777, "step": 9106 }, { "epoch": 1.495617186377353, "grad_norm": 0.2734681071879501, "learning_rate": 7.233950822171748e-06, "loss": 0.4682, "step": 9107 }, { "epoch": 1.495781413585696, "grad_norm": 0.3778217861495209, "learning_rate": 7.233578309487279e-06, "loss": 0.46, "step": 9108 }, { "epoch": 1.4959456407940386, "grad_norm": 0.3616470314992457, "learning_rate": 7.23320576711346e-06, "loss": 0.4457, "step": 9109 }, { "epoch": 1.4961098680023812, "grad_norm": 0.31428696869504247, "learning_rate": 7.232833195054337e-06, "loss": 0.4759, "step": 9110 }, { "epoch": 1.496274095210724, "grad_norm": 0.3320566406078576, "learning_rate": 7.232460593313957e-06, "loss": 0.4645, "step": 9111 }, { "epoch": 1.4964383224190667, "grad_norm": 0.5345790329651504, "learning_rate": 7.232087961896366e-06, "loss": 0.4563, "step": 9112 }, { "epoch": 1.4966025496274096, "grad_norm": 0.41574619219644277, "learning_rate": 7.231715300805613e-06, "loss": 0.473, "step": 9113 }, { "epoch": 1.4967667768357522, "grad_norm": 0.2652127144284407, "learning_rate": 7.231342610045744e-06, "loss": 0.4606, "step": 9114 }, { "epoch": 1.496931004044095, "grad_norm": 0.2908442547923296, "learning_rate": 7.230969889620806e-06, "loss": 0.4607, "step": 9115 }, { "epoch": 1.4970952312524377, "grad_norm": 0.3965280190532817, "learning_rate": 7.230597139534848e-06, "loss": 0.465, "step": 9116 }, { "epoch": 1.4972594584607806, "grad_norm": 0.37558384455595123, "learning_rate": 7.230224359791918e-06, "loss": 0.4845, "step": 9117 }, { "epoch": 1.4974236856691232, "grad_norm": 0.2828617605706177, "learning_rate": 7.2298515503960665e-06, "loss": 0.4719, "step": 9118 }, { "epoch": 1.4975879128774658, "grad_norm": 0.28998875121414835, "learning_rate": 7.229478711351341e-06, "loss": 0.4722, "step": 9119 }, { "epoch": 1.4977521400858087, "grad_norm": 0.3699351941513517, "learning_rate": 7.229105842661792e-06, "loss": 0.4716, "step": 9120 }, { "epoch": 1.4979163672941516, "grad_norm": 0.4265125664411172, "learning_rate": 7.228732944331468e-06, "loss": 0.4693, "step": 9121 }, { "epoch": 1.4980805945024942, "grad_norm": 0.4204329568659064, "learning_rate": 7.228360016364418e-06, "loss": 0.4519, "step": 9122 }, { "epoch": 1.4982448217108368, "grad_norm": 0.5794156378337116, "learning_rate": 7.227987058764696e-06, "loss": 0.4771, "step": 9123 }, { "epoch": 1.4984090489191797, "grad_norm": 0.2791439561851294, "learning_rate": 7.22761407153635e-06, "loss": 0.4829, "step": 9124 }, { "epoch": 1.4985732761275226, "grad_norm": 0.6543863320758504, "learning_rate": 7.227241054683431e-06, "loss": 0.466, "step": 9125 }, { "epoch": 1.4987375033358652, "grad_norm": 0.4130277000795869, "learning_rate": 7.2268680082099915e-06, "loss": 0.4615, "step": 9126 }, { "epoch": 1.4989017305442078, "grad_norm": 0.29982715625006073, "learning_rate": 7.226494932120081e-06, "loss": 0.4564, "step": 9127 }, { "epoch": 1.4990659577525507, "grad_norm": 0.35911683308681835, "learning_rate": 7.226121826417755e-06, "loss": 0.4566, "step": 9128 }, { "epoch": 1.4992301849608933, "grad_norm": 0.3561038189163182, "learning_rate": 7.225748691107063e-06, "loss": 0.4506, "step": 9129 }, { "epoch": 1.4993944121692362, "grad_norm": 0.31919930536086555, "learning_rate": 7.225375526192059e-06, "loss": 0.4602, "step": 9130 }, { "epoch": 1.4995586393775788, "grad_norm": 0.37857621222149634, "learning_rate": 7.225002331676795e-06, "loss": 0.4588, "step": 9131 }, { "epoch": 1.4997228665859217, "grad_norm": 0.29728581761591794, "learning_rate": 7.224629107565324e-06, "loss": 0.4722, "step": 9132 }, { "epoch": 1.4998870937942643, "grad_norm": 0.4309870688263262, "learning_rate": 7.224255853861701e-06, "loss": 0.4506, "step": 9133 }, { "epoch": 1.5000513210026072, "grad_norm": 0.4385984762113458, "learning_rate": 7.22388257056998e-06, "loss": 0.4754, "step": 9134 }, { "epoch": 1.5002155482109498, "grad_norm": 0.37723360627303476, "learning_rate": 7.2235092576942125e-06, "loss": 0.4835, "step": 9135 }, { "epoch": 1.5003797754192925, "grad_norm": 0.2969754460478771, "learning_rate": 7.223135915238455e-06, "loss": 0.4753, "step": 9136 }, { "epoch": 1.5005440026276353, "grad_norm": 0.3254557370821571, "learning_rate": 7.222762543206763e-06, "loss": 0.4764, "step": 9137 }, { "epoch": 1.5007082298359782, "grad_norm": 0.3534731655018746, "learning_rate": 7.222389141603192e-06, "loss": 0.4517, "step": 9138 }, { "epoch": 1.5008724570443208, "grad_norm": 1.0771222470195647, "learning_rate": 7.222015710431795e-06, "loss": 0.4836, "step": 9139 }, { "epoch": 1.5010366842526635, "grad_norm": 0.4629425359743406, "learning_rate": 7.221642249696629e-06, "loss": 0.4474, "step": 9140 }, { "epoch": 1.5012009114610063, "grad_norm": 0.3712616941989587, "learning_rate": 7.221268759401751e-06, "loss": 0.4755, "step": 9141 }, { "epoch": 1.5013651386693492, "grad_norm": 0.334863403671556, "learning_rate": 7.220895239551218e-06, "loss": 0.4748, "step": 9142 }, { "epoch": 1.5015293658776918, "grad_norm": 0.3404372632720664, "learning_rate": 7.220521690149084e-06, "loss": 0.4634, "step": 9143 }, { "epoch": 1.5016935930860345, "grad_norm": 0.3129244116574231, "learning_rate": 7.220148111199409e-06, "loss": 0.4562, "step": 9144 }, { "epoch": 1.5018578202943773, "grad_norm": 0.3654382271943735, "learning_rate": 7.219774502706248e-06, "loss": 0.4505, "step": 9145 }, { "epoch": 1.5020220475027202, "grad_norm": 0.35624390931397154, "learning_rate": 7.21940086467366e-06, "loss": 0.4513, "step": 9146 }, { "epoch": 1.5021862747110628, "grad_norm": 0.3228160726866767, "learning_rate": 7.219027197105705e-06, "loss": 0.4384, "step": 9147 }, { "epoch": 1.5023505019194054, "grad_norm": 0.3362682026085022, "learning_rate": 7.218653500006438e-06, "loss": 0.4613, "step": 9148 }, { "epoch": 1.502514729127748, "grad_norm": 0.3517407041807582, "learning_rate": 7.21827977337992e-06, "loss": 0.4515, "step": 9149 }, { "epoch": 1.502678956336091, "grad_norm": 0.3059612901310846, "learning_rate": 7.217906017230208e-06, "loss": 0.4642, "step": 9150 }, { "epoch": 1.5028431835444338, "grad_norm": 0.3005958427302342, "learning_rate": 7.217532231561363e-06, "loss": 0.4691, "step": 9151 }, { "epoch": 1.5030074107527764, "grad_norm": 0.5129411936571094, "learning_rate": 7.217158416377445e-06, "loss": 0.465, "step": 9152 }, { "epoch": 1.503171637961119, "grad_norm": 0.3666288166046643, "learning_rate": 7.216784571682513e-06, "loss": 0.4509, "step": 9153 }, { "epoch": 1.503335865169462, "grad_norm": 0.294422504507227, "learning_rate": 7.216410697480627e-06, "loss": 0.4554, "step": 9154 }, { "epoch": 1.5035000923778048, "grad_norm": 0.3113601579543826, "learning_rate": 7.21603679377585e-06, "loss": 0.4828, "step": 9155 }, { "epoch": 1.5036643195861474, "grad_norm": 0.30263614060555033, "learning_rate": 7.215662860572238e-06, "loss": 0.4694, "step": 9156 }, { "epoch": 1.50382854679449, "grad_norm": 0.4734181151005649, "learning_rate": 7.2152888978738585e-06, "loss": 0.4762, "step": 9157 }, { "epoch": 1.503992774002833, "grad_norm": 0.3287225146005883, "learning_rate": 7.214914905684769e-06, "loss": 0.4666, "step": 9158 }, { "epoch": 1.5041570012111758, "grad_norm": 0.305886165071671, "learning_rate": 7.214540884009032e-06, "loss": 0.4496, "step": 9159 }, { "epoch": 1.5043212284195184, "grad_norm": 0.3011882096853734, "learning_rate": 7.214166832850711e-06, "loss": 0.4582, "step": 9160 }, { "epoch": 1.504485455627861, "grad_norm": 0.41488963999633277, "learning_rate": 7.213792752213867e-06, "loss": 0.4648, "step": 9161 }, { "epoch": 1.504649682836204, "grad_norm": 0.31544674799165684, "learning_rate": 7.213418642102564e-06, "loss": 0.4844, "step": 9162 }, { "epoch": 1.5048139100445468, "grad_norm": 0.28949199485026034, "learning_rate": 7.213044502520866e-06, "loss": 0.4671, "step": 9163 }, { "epoch": 1.5049781372528894, "grad_norm": 0.2939590327333749, "learning_rate": 7.212670333472835e-06, "loss": 0.4771, "step": 9164 }, { "epoch": 1.505142364461232, "grad_norm": 0.3670101412740805, "learning_rate": 7.212296134962533e-06, "loss": 0.4688, "step": 9165 }, { "epoch": 1.5053065916695747, "grad_norm": 0.28723258461262186, "learning_rate": 7.2119219069940296e-06, "loss": 0.4862, "step": 9166 }, { "epoch": 1.5054708188779176, "grad_norm": 0.32542175887326896, "learning_rate": 7.2115476495713846e-06, "loss": 0.4553, "step": 9167 }, { "epoch": 1.5056350460862604, "grad_norm": 0.3007208342378376, "learning_rate": 7.211173362698664e-06, "loss": 0.4659, "step": 9168 }, { "epoch": 1.505799273294603, "grad_norm": 0.38737030393235344, "learning_rate": 7.210799046379935e-06, "loss": 0.4686, "step": 9169 }, { "epoch": 1.5059635005029457, "grad_norm": 0.26963051081694284, "learning_rate": 7.210424700619259e-06, "loss": 0.4454, "step": 9170 }, { "epoch": 1.5061277277112886, "grad_norm": 0.34262459734970785, "learning_rate": 7.210050325420705e-06, "loss": 0.4565, "step": 9171 }, { "epoch": 1.5062919549196314, "grad_norm": 0.5174600911956027, "learning_rate": 7.209675920788338e-06, "loss": 0.4668, "step": 9172 }, { "epoch": 1.506456182127974, "grad_norm": 0.29129980320508153, "learning_rate": 7.209301486726226e-06, "loss": 0.4603, "step": 9173 }, { "epoch": 1.5066204093363167, "grad_norm": 0.2999281854094833, "learning_rate": 7.208927023238432e-06, "loss": 0.4459, "step": 9174 }, { "epoch": 1.5067846365446596, "grad_norm": 0.34509789834469307, "learning_rate": 7.208552530329026e-06, "loss": 0.4825, "step": 9175 }, { "epoch": 1.5069488637530024, "grad_norm": 0.33035112975237557, "learning_rate": 7.208178008002076e-06, "loss": 0.4466, "step": 9176 }, { "epoch": 1.507113090961345, "grad_norm": 0.33090831360597694, "learning_rate": 7.2078034562616465e-06, "loss": 0.4671, "step": 9177 }, { "epoch": 1.5072773181696877, "grad_norm": 0.2959356681925396, "learning_rate": 7.207428875111809e-06, "loss": 0.4513, "step": 9178 }, { "epoch": 1.5074415453780305, "grad_norm": 0.27555625602430894, "learning_rate": 7.207054264556629e-06, "loss": 0.4824, "step": 9179 }, { "epoch": 1.5076057725863734, "grad_norm": 0.3831379474711666, "learning_rate": 7.206679624600177e-06, "loss": 0.4436, "step": 9180 }, { "epoch": 1.507769999794716, "grad_norm": 0.3182477305511219, "learning_rate": 7.206304955246521e-06, "loss": 0.4693, "step": 9181 }, { "epoch": 1.5079342270030587, "grad_norm": 0.3604807686672664, "learning_rate": 7.2059302564997295e-06, "loss": 0.4604, "step": 9182 }, { "epoch": 1.5080984542114013, "grad_norm": 0.5300038312700972, "learning_rate": 7.205555528363875e-06, "loss": 0.4699, "step": 9183 }, { "epoch": 1.5082626814197442, "grad_norm": 0.2701038850534607, "learning_rate": 7.205180770843024e-06, "loss": 0.4949, "step": 9184 }, { "epoch": 1.508426908628087, "grad_norm": 0.7164451544357832, "learning_rate": 7.204805983941249e-06, "loss": 0.4636, "step": 9185 }, { "epoch": 1.5085911358364297, "grad_norm": 0.45159203892050004, "learning_rate": 7.20443116766262e-06, "loss": 0.4948, "step": 9186 }, { "epoch": 1.5087553630447723, "grad_norm": 0.28798609716907286, "learning_rate": 7.204056322011208e-06, "loss": 0.4756, "step": 9187 }, { "epoch": 1.5089195902531152, "grad_norm": 0.2880361608967198, "learning_rate": 7.203681446991084e-06, "loss": 0.4714, "step": 9188 }, { "epoch": 1.509083817461458, "grad_norm": 0.5247905124145498, "learning_rate": 7.2033065426063176e-06, "loss": 0.4727, "step": 9189 }, { "epoch": 1.5092480446698007, "grad_norm": 0.6860853909871801, "learning_rate": 7.202931608860984e-06, "loss": 0.4934, "step": 9190 }, { "epoch": 1.5094122718781433, "grad_norm": 0.28275800142480045, "learning_rate": 7.202556645759153e-06, "loss": 0.4777, "step": 9191 }, { "epoch": 1.5095764990864862, "grad_norm": 0.31027777276533725, "learning_rate": 7.2021816533049e-06, "loss": 0.4692, "step": 9192 }, { "epoch": 1.509740726294829, "grad_norm": 0.3172137811682593, "learning_rate": 7.2018066315022925e-06, "loss": 0.4735, "step": 9193 }, { "epoch": 1.5099049535031717, "grad_norm": 0.3372622416767625, "learning_rate": 7.201431580355408e-06, "loss": 0.4537, "step": 9194 }, { "epoch": 1.5100691807115143, "grad_norm": 0.31133923346230863, "learning_rate": 7.201056499868319e-06, "loss": 0.4743, "step": 9195 }, { "epoch": 1.5102334079198572, "grad_norm": 0.3781581429663542, "learning_rate": 7.2006813900451e-06, "loss": 0.4559, "step": 9196 }, { "epoch": 1.5103976351282, "grad_norm": 0.30980865474592634, "learning_rate": 7.2003062508898224e-06, "loss": 0.471, "step": 9197 }, { "epoch": 1.5105618623365427, "grad_norm": 0.3164800874921807, "learning_rate": 7.1999310824065624e-06, "loss": 0.4684, "step": 9198 }, { "epoch": 1.5107260895448853, "grad_norm": 0.33008071514670867, "learning_rate": 7.199555884599395e-06, "loss": 0.4589, "step": 9199 }, { "epoch": 1.510890316753228, "grad_norm": 0.3651681256005668, "learning_rate": 7.199180657472395e-06, "loss": 0.4586, "step": 9200 }, { "epoch": 1.5110545439615708, "grad_norm": 0.3513853825676322, "learning_rate": 7.198805401029636e-06, "loss": 0.4498, "step": 9201 }, { "epoch": 1.5112187711699137, "grad_norm": 0.3528677624533436, "learning_rate": 7.1984301152751956e-06, "loss": 0.4606, "step": 9202 }, { "epoch": 1.5113829983782563, "grad_norm": 0.4279584123728494, "learning_rate": 7.198054800213151e-06, "loss": 0.452, "step": 9203 }, { "epoch": 1.511547225586599, "grad_norm": 0.2921779445976517, "learning_rate": 7.1976794558475745e-06, "loss": 0.4739, "step": 9204 }, { "epoch": 1.5117114527949418, "grad_norm": 0.2706453446276023, "learning_rate": 7.1973040821825465e-06, "loss": 0.4623, "step": 9205 }, { "epoch": 1.5118756800032846, "grad_norm": 0.2520182801619176, "learning_rate": 7.196928679222141e-06, "loss": 0.4442, "step": 9206 }, { "epoch": 1.5120399072116273, "grad_norm": 0.29407078062875935, "learning_rate": 7.196553246970438e-06, "loss": 0.4711, "step": 9207 }, { "epoch": 1.51220413441997, "grad_norm": 0.3291641852823142, "learning_rate": 7.196177785431513e-06, "loss": 0.4488, "step": 9208 }, { "epoch": 1.5123683616283128, "grad_norm": 0.314166634713391, "learning_rate": 7.195802294609444e-06, "loss": 0.458, "step": 9209 }, { "epoch": 1.5125325888366556, "grad_norm": 0.30014099078366124, "learning_rate": 7.195426774508311e-06, "loss": 0.4725, "step": 9210 }, { "epoch": 1.5126968160449983, "grad_norm": 0.28804167027539607, "learning_rate": 7.19505122513219e-06, "loss": 0.4813, "step": 9211 }, { "epoch": 1.512861043253341, "grad_norm": 0.27774905359300783, "learning_rate": 7.194675646485161e-06, "loss": 0.4516, "step": 9212 }, { "epoch": 1.5130252704616838, "grad_norm": 0.3376729483173663, "learning_rate": 7.194300038571305e-06, "loss": 0.4664, "step": 9213 }, { "epoch": 1.5131894976700266, "grad_norm": 0.2990700174753199, "learning_rate": 7.193924401394699e-06, "loss": 0.4613, "step": 9214 }, { "epoch": 1.5133537248783693, "grad_norm": 0.2570384432016577, "learning_rate": 7.193548734959423e-06, "loss": 0.4556, "step": 9215 }, { "epoch": 1.513517952086712, "grad_norm": 0.3070126716863017, "learning_rate": 7.193173039269558e-06, "loss": 0.464, "step": 9216 }, { "epoch": 1.5136821792950546, "grad_norm": 0.29290536910157305, "learning_rate": 7.1927973143291835e-06, "loss": 0.4396, "step": 9217 }, { "epoch": 1.5138464065033974, "grad_norm": 0.3581538018351269, "learning_rate": 7.192421560142382e-06, "loss": 0.4615, "step": 9218 }, { "epoch": 1.5140106337117403, "grad_norm": 0.36202199207173685, "learning_rate": 7.192045776713232e-06, "loss": 0.4596, "step": 9219 }, { "epoch": 1.514174860920083, "grad_norm": 0.29895826521978897, "learning_rate": 7.191669964045818e-06, "loss": 0.466, "step": 9220 }, { "epoch": 1.5143390881284255, "grad_norm": 0.3183924651940098, "learning_rate": 7.191294122144217e-06, "loss": 0.48, "step": 9221 }, { "epoch": 1.5145033153367684, "grad_norm": 0.28198083034908455, "learning_rate": 7.190918251012517e-06, "loss": 0.4663, "step": 9222 }, { "epoch": 1.5146675425451113, "grad_norm": 0.3303474365929383, "learning_rate": 7.190542350654796e-06, "loss": 0.4923, "step": 9223 }, { "epoch": 1.514831769753454, "grad_norm": 0.4024297610145994, "learning_rate": 7.190166421075138e-06, "loss": 0.4718, "step": 9224 }, { "epoch": 1.5149959969617965, "grad_norm": 0.25837992364309836, "learning_rate": 7.189790462277626e-06, "loss": 0.4564, "step": 9225 }, { "epoch": 1.5151602241701394, "grad_norm": 0.39812967678782274, "learning_rate": 7.1894144742663435e-06, "loss": 0.4519, "step": 9226 }, { "epoch": 1.5153244513784823, "grad_norm": 0.3450635706066045, "learning_rate": 7.189038457045372e-06, "loss": 0.453, "step": 9227 }, { "epoch": 1.515488678586825, "grad_norm": 0.2783889625191982, "learning_rate": 7.1886624106188e-06, "loss": 0.475, "step": 9228 }, { "epoch": 1.5156529057951675, "grad_norm": 0.3376557375783973, "learning_rate": 7.1882863349907076e-06, "loss": 0.4526, "step": 9229 }, { "epoch": 1.5158171330035104, "grad_norm": 0.2896971589896287, "learning_rate": 7.187910230165181e-06, "loss": 0.4553, "step": 9230 }, { "epoch": 1.5159813602118533, "grad_norm": 0.3157032612136202, "learning_rate": 7.187534096146304e-06, "loss": 0.472, "step": 9231 }, { "epoch": 1.516145587420196, "grad_norm": 0.30994446974518053, "learning_rate": 7.1871579329381625e-06, "loss": 0.4639, "step": 9232 }, { "epoch": 1.5163098146285385, "grad_norm": 0.33537568493394626, "learning_rate": 7.186781740544842e-06, "loss": 0.4527, "step": 9233 }, { "epoch": 1.5164740418368812, "grad_norm": 0.3319121755581918, "learning_rate": 7.18640551897043e-06, "loss": 0.4741, "step": 9234 }, { "epoch": 1.516638269045224, "grad_norm": 0.29375157989929307, "learning_rate": 7.18602926821901e-06, "loss": 0.4805, "step": 9235 }, { "epoch": 1.516802496253567, "grad_norm": 0.278719060087101, "learning_rate": 7.185652988294668e-06, "loss": 0.459, "step": 9236 }, { "epoch": 1.5169667234619095, "grad_norm": 0.28645821773642316, "learning_rate": 7.185276679201494e-06, "loss": 0.4561, "step": 9237 }, { "epoch": 1.5171309506702522, "grad_norm": 0.3963936354834165, "learning_rate": 7.184900340943574e-06, "loss": 0.4494, "step": 9238 }, { "epoch": 1.517295177878595, "grad_norm": 0.26055817683213023, "learning_rate": 7.184523973524993e-06, "loss": 0.4488, "step": 9239 }, { "epoch": 1.5174594050869379, "grad_norm": 0.31956318401914047, "learning_rate": 7.184147576949841e-06, "loss": 0.4846, "step": 9240 }, { "epoch": 1.5176236322952805, "grad_norm": 0.6295143635584098, "learning_rate": 7.183771151222205e-06, "loss": 0.4712, "step": 9241 }, { "epoch": 1.5177878595036232, "grad_norm": 0.35754560129622853, "learning_rate": 7.183394696346175e-06, "loss": 0.4528, "step": 9242 }, { "epoch": 1.517952086711966, "grad_norm": 0.44717398192916935, "learning_rate": 7.1830182123258376e-06, "loss": 0.4685, "step": 9243 }, { "epoch": 1.5181163139203089, "grad_norm": 0.44012743422950795, "learning_rate": 7.1826416991652835e-06, "loss": 0.4705, "step": 9244 }, { "epoch": 1.5182805411286515, "grad_norm": 0.26859917683990014, "learning_rate": 7.182265156868599e-06, "loss": 0.4438, "step": 9245 }, { "epoch": 1.5184447683369942, "grad_norm": 0.28892403464626515, "learning_rate": 7.181888585439879e-06, "loss": 0.4714, "step": 9246 }, { "epoch": 1.518608995545337, "grad_norm": 0.38304813283172645, "learning_rate": 7.181511984883208e-06, "loss": 0.4867, "step": 9247 }, { "epoch": 1.5187732227536799, "grad_norm": 0.30305384307379923, "learning_rate": 7.181135355202679e-06, "loss": 0.4791, "step": 9248 }, { "epoch": 1.5189374499620225, "grad_norm": 0.3155490544939365, "learning_rate": 7.180758696402382e-06, "loss": 0.4766, "step": 9249 }, { "epoch": 1.5191016771703652, "grad_norm": 0.2906595932861458, "learning_rate": 7.180382008486409e-06, "loss": 0.4532, "step": 9250 }, { "epoch": 1.5192659043787078, "grad_norm": 0.4474126938917336, "learning_rate": 7.18000529145885e-06, "loss": 0.4571, "step": 9251 }, { "epoch": 1.5194301315870506, "grad_norm": 0.31794116167934067, "learning_rate": 7.179628545323797e-06, "loss": 0.4786, "step": 9252 }, { "epoch": 1.5195943587953935, "grad_norm": 0.3263806739083232, "learning_rate": 7.1792517700853405e-06, "loss": 0.4735, "step": 9253 }, { "epoch": 1.5197585860037361, "grad_norm": 0.3827263780224837, "learning_rate": 7.178874965747575e-06, "loss": 0.4625, "step": 9254 }, { "epoch": 1.5199228132120788, "grad_norm": 0.3192602339087632, "learning_rate": 7.178498132314591e-06, "loss": 0.4676, "step": 9255 }, { "epoch": 1.5200870404204216, "grad_norm": 0.39440388028059575, "learning_rate": 7.1781212697904815e-06, "loss": 0.4611, "step": 9256 }, { "epoch": 1.5202512676287645, "grad_norm": 0.32399627276062054, "learning_rate": 7.177744378179342e-06, "loss": 0.4473, "step": 9257 }, { "epoch": 1.5204154948371071, "grad_norm": 0.2790344377044542, "learning_rate": 7.177367457485262e-06, "loss": 0.4805, "step": 9258 }, { "epoch": 1.5205797220454498, "grad_norm": 0.3458950067863732, "learning_rate": 7.176990507712338e-06, "loss": 0.4459, "step": 9259 }, { "epoch": 1.5207439492537926, "grad_norm": 0.3416751416590733, "learning_rate": 7.176613528864664e-06, "loss": 0.4549, "step": 9260 }, { "epoch": 1.5209081764621355, "grad_norm": 0.3240434085579008, "learning_rate": 7.176236520946333e-06, "loss": 0.4541, "step": 9261 }, { "epoch": 1.5210724036704781, "grad_norm": 0.28624661378771976, "learning_rate": 7.175859483961441e-06, "loss": 0.4687, "step": 9262 }, { "epoch": 1.5212366308788208, "grad_norm": 0.31156116458197464, "learning_rate": 7.175482417914081e-06, "loss": 0.4775, "step": 9263 }, { "epoch": 1.5214008580871636, "grad_norm": 0.3201213181079633, "learning_rate": 7.1751053228083495e-06, "loss": 0.4542, "step": 9264 }, { "epoch": 1.5215650852955065, "grad_norm": 0.2896366480561736, "learning_rate": 7.174728198648343e-06, "loss": 0.4613, "step": 9265 }, { "epoch": 1.5217293125038491, "grad_norm": 1.5454545712227965, "learning_rate": 7.174351045438156e-06, "loss": 0.4528, "step": 9266 }, { "epoch": 1.5218935397121918, "grad_norm": 0.4130809329340727, "learning_rate": 7.173973863181886e-06, "loss": 0.4612, "step": 9267 }, { "epoch": 1.5220577669205344, "grad_norm": 0.5392643599819068, "learning_rate": 7.173596651883629e-06, "loss": 0.4734, "step": 9268 }, { "epoch": 1.5222219941288773, "grad_norm": 0.29554384020455526, "learning_rate": 7.173219411547483e-06, "loss": 0.4572, "step": 9269 }, { "epoch": 1.5223862213372201, "grad_norm": 0.37929201827686776, "learning_rate": 7.172842142177543e-06, "loss": 0.4761, "step": 9270 }, { "epoch": 1.5225504485455628, "grad_norm": 0.27942306578844844, "learning_rate": 7.172464843777907e-06, "loss": 0.4546, "step": 9271 }, { "epoch": 1.5227146757539054, "grad_norm": 0.31283795812998494, "learning_rate": 7.172087516352674e-06, "loss": 0.4632, "step": 9272 }, { "epoch": 1.5228789029622483, "grad_norm": 0.28612405394145773, "learning_rate": 7.171710159905943e-06, "loss": 0.4581, "step": 9273 }, { "epoch": 1.5230431301705911, "grad_norm": 0.4814978265268251, "learning_rate": 7.171332774441809e-06, "loss": 0.4654, "step": 9274 }, { "epoch": 1.5232073573789338, "grad_norm": 0.3974772117244747, "learning_rate": 7.170955359964373e-06, "loss": 0.4625, "step": 9275 }, { "epoch": 1.5233715845872764, "grad_norm": 0.3978478495668505, "learning_rate": 7.170577916477736e-06, "loss": 0.4508, "step": 9276 }, { "epoch": 1.5235358117956193, "grad_norm": 0.47194095448274265, "learning_rate": 7.170200443985993e-06, "loss": 0.4553, "step": 9277 }, { "epoch": 1.5237000390039621, "grad_norm": 0.37774460441156743, "learning_rate": 7.1698229424932476e-06, "loss": 0.4612, "step": 9278 }, { "epoch": 1.5238642662123048, "grad_norm": 0.36763814623347385, "learning_rate": 7.169445412003598e-06, "loss": 0.4604, "step": 9279 }, { "epoch": 1.5240284934206474, "grad_norm": 0.5037516746821502, "learning_rate": 7.169067852521144e-06, "loss": 0.478, "step": 9280 }, { "epoch": 1.5241927206289902, "grad_norm": 0.4212263940251124, "learning_rate": 7.1686902640499876e-06, "loss": 0.4634, "step": 9281 }, { "epoch": 1.524356947837333, "grad_norm": 0.639577271055163, "learning_rate": 7.168312646594228e-06, "loss": 0.4659, "step": 9282 }, { "epoch": 1.5245211750456757, "grad_norm": 0.31425965396324984, "learning_rate": 7.16793500015797e-06, "loss": 0.458, "step": 9283 }, { "epoch": 1.5246854022540184, "grad_norm": 0.3549092300339072, "learning_rate": 7.167557324745312e-06, "loss": 0.4847, "step": 9284 }, { "epoch": 1.524849629462361, "grad_norm": 0.464940002037064, "learning_rate": 7.167179620360357e-06, "loss": 0.4851, "step": 9285 }, { "epoch": 1.5250138566707039, "grad_norm": 0.7404536442112363, "learning_rate": 7.166801887007208e-06, "loss": 0.4645, "step": 9286 }, { "epoch": 1.5251780838790467, "grad_norm": 0.3797169402295166, "learning_rate": 7.166424124689965e-06, "loss": 0.4729, "step": 9287 }, { "epoch": 1.5253423110873894, "grad_norm": 0.4800510956035743, "learning_rate": 7.1660463334127345e-06, "loss": 0.473, "step": 9288 }, { "epoch": 1.525506538295732, "grad_norm": 0.29060595534750716, "learning_rate": 7.165668513179617e-06, "loss": 0.4534, "step": 9289 }, { "epoch": 1.5256707655040749, "grad_norm": 0.30087575863625227, "learning_rate": 7.165290663994715e-06, "loss": 0.4956, "step": 9290 }, { "epoch": 1.5258349927124177, "grad_norm": 0.339029574512295, "learning_rate": 7.1649127858621354e-06, "loss": 0.4774, "step": 9291 }, { "epoch": 1.5259992199207604, "grad_norm": 0.2916719047991023, "learning_rate": 7.164534878785982e-06, "loss": 0.4867, "step": 9292 }, { "epoch": 1.526163447129103, "grad_norm": 0.30316356818056006, "learning_rate": 7.1641569427703585e-06, "loss": 0.4491, "step": 9293 }, { "epoch": 1.5263276743374459, "grad_norm": 0.34009506124918254, "learning_rate": 7.163778977819368e-06, "loss": 0.4506, "step": 9294 }, { "epoch": 1.5264919015457887, "grad_norm": 0.3158249532017147, "learning_rate": 7.163400983937117e-06, "loss": 0.4728, "step": 9295 }, { "epoch": 1.5266561287541314, "grad_norm": 0.31123665592368355, "learning_rate": 7.163022961127711e-06, "loss": 0.4595, "step": 9296 }, { "epoch": 1.526820355962474, "grad_norm": 0.3281051407634187, "learning_rate": 7.162644909395256e-06, "loss": 0.4652, "step": 9297 }, { "epoch": 1.5269845831708169, "grad_norm": 0.29028559973315327, "learning_rate": 7.162266828743857e-06, "loss": 0.469, "step": 9298 }, { "epoch": 1.5271488103791597, "grad_norm": 0.26500384107962766, "learning_rate": 7.161888719177622e-06, "loss": 0.4717, "step": 9299 }, { "epoch": 1.5273130375875024, "grad_norm": 0.3146412183956378, "learning_rate": 7.161510580700656e-06, "loss": 0.4669, "step": 9300 }, { "epoch": 1.527477264795845, "grad_norm": 0.30019999592266705, "learning_rate": 7.161132413317068e-06, "loss": 0.4657, "step": 9301 }, { "epoch": 1.5276414920041876, "grad_norm": 0.28169330642062956, "learning_rate": 7.160754217030962e-06, "loss": 0.4525, "step": 9302 }, { "epoch": 1.5278057192125305, "grad_norm": 0.5323783758611011, "learning_rate": 7.1603759918464476e-06, "loss": 0.4877, "step": 9303 }, { "epoch": 1.5279699464208734, "grad_norm": 0.2690038165753098, "learning_rate": 7.159997737767632e-06, "loss": 0.4695, "step": 9304 }, { "epoch": 1.528134173629216, "grad_norm": 0.709221517163938, "learning_rate": 7.159619454798625e-06, "loss": 0.4505, "step": 9305 }, { "epoch": 1.5282984008375586, "grad_norm": 0.26711462113418744, "learning_rate": 7.159241142943533e-06, "loss": 0.4658, "step": 9306 }, { "epoch": 1.5284626280459015, "grad_norm": 0.3039516845191958, "learning_rate": 7.158862802206466e-06, "loss": 0.4671, "step": 9307 }, { "epoch": 1.5286268552542444, "grad_norm": 0.2631380237679185, "learning_rate": 7.158484432591534e-06, "loss": 0.457, "step": 9308 }, { "epoch": 1.528791082462587, "grad_norm": 0.34880055704945134, "learning_rate": 7.158106034102846e-06, "loss": 0.459, "step": 9309 }, { "epoch": 1.5289553096709296, "grad_norm": 0.4332345447097457, "learning_rate": 7.1577276067445094e-06, "loss": 0.481, "step": 9310 }, { "epoch": 1.5291195368792725, "grad_norm": 0.2701428254345847, "learning_rate": 7.157349150520636e-06, "loss": 0.464, "step": 9311 }, { "epoch": 1.5292837640876153, "grad_norm": 0.3191308013759308, "learning_rate": 7.156970665435338e-06, "loss": 0.461, "step": 9312 }, { "epoch": 1.529447991295958, "grad_norm": 0.3030079025076875, "learning_rate": 7.156592151492722e-06, "loss": 0.4735, "step": 9313 }, { "epoch": 1.5296122185043006, "grad_norm": 0.33155877053915367, "learning_rate": 7.156213608696904e-06, "loss": 0.5012, "step": 9314 }, { "epoch": 1.5297764457126435, "grad_norm": 0.3130320047585034, "learning_rate": 7.155835037051993e-06, "loss": 0.4917, "step": 9315 }, { "epoch": 1.5299406729209863, "grad_norm": 0.36060204593966044, "learning_rate": 7.155456436562098e-06, "loss": 0.4803, "step": 9316 }, { "epoch": 1.530104900129329, "grad_norm": 0.32398573562386035, "learning_rate": 7.155077807231336e-06, "loss": 0.4495, "step": 9317 }, { "epoch": 1.5302691273376716, "grad_norm": 0.27751479721468064, "learning_rate": 7.154699149063816e-06, "loss": 0.4826, "step": 9318 }, { "epoch": 1.5304333545460143, "grad_norm": 0.42071383808180096, "learning_rate": 7.1543204620636505e-06, "loss": 0.4529, "step": 9319 }, { "epoch": 1.5305975817543571, "grad_norm": 0.44110763680049825, "learning_rate": 7.153941746234953e-06, "loss": 0.4987, "step": 9320 }, { "epoch": 1.5307618089627, "grad_norm": 0.289458468189357, "learning_rate": 7.153563001581838e-06, "loss": 0.4733, "step": 9321 }, { "epoch": 1.5309260361710426, "grad_norm": 0.2512213588331616, "learning_rate": 7.153184228108419e-06, "loss": 0.4549, "step": 9322 }, { "epoch": 1.5310902633793853, "grad_norm": 0.3004273617854931, "learning_rate": 7.152805425818807e-06, "loss": 0.454, "step": 9323 }, { "epoch": 1.5312544905877281, "grad_norm": 0.32267924554730515, "learning_rate": 7.152426594717119e-06, "loss": 0.4796, "step": 9324 }, { "epoch": 1.531418717796071, "grad_norm": 0.2957664116913034, "learning_rate": 7.152047734807469e-06, "loss": 0.4688, "step": 9325 }, { "epoch": 1.5315829450044136, "grad_norm": 0.29607329025830087, "learning_rate": 7.151668846093971e-06, "loss": 0.4735, "step": 9326 }, { "epoch": 1.5317471722127562, "grad_norm": 0.34564464756084806, "learning_rate": 7.15128992858074e-06, "loss": 0.4746, "step": 9327 }, { "epoch": 1.531911399421099, "grad_norm": 0.3632349137268046, "learning_rate": 7.1509109822718915e-06, "loss": 0.4638, "step": 9328 }, { "epoch": 1.532075626629442, "grad_norm": 0.3528582463422724, "learning_rate": 7.150532007171542e-06, "loss": 0.4739, "step": 9329 }, { "epoch": 1.5322398538377846, "grad_norm": 0.2851865961310054, "learning_rate": 7.150153003283807e-06, "loss": 0.4625, "step": 9330 }, { "epoch": 1.5324040810461272, "grad_norm": 0.2644495220903839, "learning_rate": 7.149773970612804e-06, "loss": 0.4744, "step": 9331 }, { "epoch": 1.53256830825447, "grad_norm": 0.26070085543493976, "learning_rate": 7.149394909162648e-06, "loss": 0.4608, "step": 9332 }, { "epoch": 1.532732535462813, "grad_norm": 0.29613010697864284, "learning_rate": 7.149015818937456e-06, "loss": 0.4521, "step": 9333 }, { "epoch": 1.5328967626711556, "grad_norm": 0.31117686961305685, "learning_rate": 7.148636699941347e-06, "loss": 0.4644, "step": 9334 }, { "epoch": 1.5330609898794982, "grad_norm": 0.29211819423313984, "learning_rate": 7.148257552178438e-06, "loss": 0.4821, "step": 9335 }, { "epoch": 1.5332252170878409, "grad_norm": 0.2703181498755695, "learning_rate": 7.147878375652844e-06, "loss": 0.4721, "step": 9336 }, { "epoch": 1.5333894442961837, "grad_norm": 0.33278351582547366, "learning_rate": 7.147499170368688e-06, "loss": 0.4536, "step": 9337 }, { "epoch": 1.5335536715045266, "grad_norm": 0.28749069490029466, "learning_rate": 7.1471199363300845e-06, "loss": 0.4548, "step": 9338 }, { "epoch": 1.5337178987128692, "grad_norm": 0.29073060662806555, "learning_rate": 7.146740673541155e-06, "loss": 0.4615, "step": 9339 }, { "epoch": 1.5338821259212119, "grad_norm": 0.4272828402384313, "learning_rate": 7.146361382006019e-06, "loss": 0.4344, "step": 9340 }, { "epoch": 1.5340463531295547, "grad_norm": 0.28147195710433215, "learning_rate": 7.145982061728792e-06, "loss": 0.4752, "step": 9341 }, { "epoch": 1.5342105803378976, "grad_norm": 0.3439436486971339, "learning_rate": 7.145602712713598e-06, "loss": 0.4666, "step": 9342 }, { "epoch": 1.5343748075462402, "grad_norm": 0.3488722878311697, "learning_rate": 7.145223334964556e-06, "loss": 0.4615, "step": 9343 }, { "epoch": 1.5345390347545829, "grad_norm": 0.30250466922681474, "learning_rate": 7.144843928485786e-06, "loss": 0.4663, "step": 9344 }, { "epoch": 1.5347032619629257, "grad_norm": 0.2913076112847636, "learning_rate": 7.144464493281407e-06, "loss": 0.493, "step": 9345 }, { "epoch": 1.5348674891712686, "grad_norm": 0.27574286461367503, "learning_rate": 7.144085029355544e-06, "loss": 0.4568, "step": 9346 }, { "epoch": 1.5350317163796112, "grad_norm": 0.3085587250021239, "learning_rate": 7.143705536712316e-06, "loss": 0.459, "step": 9347 }, { "epoch": 1.5351959435879539, "grad_norm": 0.42885403672499844, "learning_rate": 7.143326015355844e-06, "loss": 0.4581, "step": 9348 }, { "epoch": 1.5353601707962967, "grad_norm": 0.2740916414682097, "learning_rate": 7.14294646529025e-06, "loss": 0.4518, "step": 9349 }, { "epoch": 1.5355243980046396, "grad_norm": 0.3450236813009234, "learning_rate": 7.1425668865196585e-06, "loss": 0.4523, "step": 9350 }, { "epoch": 1.5356886252129822, "grad_norm": 0.38502119714992583, "learning_rate": 7.14218727904819e-06, "loss": 0.4633, "step": 9351 }, { "epoch": 1.5358528524213249, "grad_norm": 0.4006288598907935, "learning_rate": 7.1418076428799685e-06, "loss": 0.4618, "step": 9352 }, { "epoch": 1.5360170796296675, "grad_norm": 0.5293557699101507, "learning_rate": 7.141427978019116e-06, "loss": 0.4661, "step": 9353 }, { "epoch": 1.5361813068380104, "grad_norm": 0.3574612680794133, "learning_rate": 7.141048284469758e-06, "loss": 0.4801, "step": 9354 }, { "epoch": 1.5363455340463532, "grad_norm": 0.3274060576534963, "learning_rate": 7.1406685622360174e-06, "loss": 0.4723, "step": 9355 }, { "epoch": 1.5365097612546958, "grad_norm": 0.3126087621576872, "learning_rate": 7.140288811322017e-06, "loss": 0.4491, "step": 9356 }, { "epoch": 1.5366739884630385, "grad_norm": 0.43435643215596625, "learning_rate": 7.139909031731883e-06, "loss": 0.476, "step": 9357 }, { "epoch": 1.5368382156713813, "grad_norm": 0.2922725077472286, "learning_rate": 7.139529223469738e-06, "loss": 0.4591, "step": 9358 }, { "epoch": 1.5370024428797242, "grad_norm": 0.33144720651032605, "learning_rate": 7.139149386539711e-06, "loss": 0.4807, "step": 9359 }, { "epoch": 1.5371666700880668, "grad_norm": 0.2945145824570666, "learning_rate": 7.138769520945925e-06, "loss": 0.4484, "step": 9360 }, { "epoch": 1.5373308972964095, "grad_norm": 0.2699345270357702, "learning_rate": 7.138389626692504e-06, "loss": 0.4663, "step": 9361 }, { "epoch": 1.5374951245047523, "grad_norm": 0.37423556894532223, "learning_rate": 7.138009703783577e-06, "loss": 0.4629, "step": 9362 }, { "epoch": 1.5376593517130952, "grad_norm": 0.28643072132489733, "learning_rate": 7.137629752223268e-06, "loss": 0.4626, "step": 9363 }, { "epoch": 1.5378235789214378, "grad_norm": 0.4011805145508984, "learning_rate": 7.137249772015707e-06, "loss": 0.4598, "step": 9364 }, { "epoch": 1.5379878061297805, "grad_norm": 0.28589490123057115, "learning_rate": 7.136869763165017e-06, "loss": 0.4401, "step": 9365 }, { "epoch": 1.5381520333381233, "grad_norm": 0.33402197609435524, "learning_rate": 7.136489725675328e-06, "loss": 0.4718, "step": 9366 }, { "epoch": 1.5383162605464662, "grad_norm": 0.3515709813075105, "learning_rate": 7.136109659550765e-06, "loss": 0.4692, "step": 9367 }, { "epoch": 1.5384804877548088, "grad_norm": 0.727042993615268, "learning_rate": 7.13572956479546e-06, "loss": 0.4434, "step": 9368 }, { "epoch": 1.5386447149631515, "grad_norm": 0.2803236782587894, "learning_rate": 7.135349441413538e-06, "loss": 0.4727, "step": 9369 }, { "epoch": 1.538808942171494, "grad_norm": 0.3226815325060115, "learning_rate": 7.134969289409126e-06, "loss": 0.4747, "step": 9370 }, { "epoch": 1.538973169379837, "grad_norm": 0.36235437219598443, "learning_rate": 7.134589108786357e-06, "loss": 0.4468, "step": 9371 }, { "epoch": 1.5391373965881798, "grad_norm": 0.35625948487457887, "learning_rate": 7.134208899549359e-06, "loss": 0.4486, "step": 9372 }, { "epoch": 1.5393016237965225, "grad_norm": 0.34735670003053626, "learning_rate": 7.133828661702259e-06, "loss": 0.4335, "step": 9373 }, { "epoch": 1.539465851004865, "grad_norm": 0.34199356720688007, "learning_rate": 7.133448395249189e-06, "loss": 0.4774, "step": 9374 }, { "epoch": 1.539630078213208, "grad_norm": 0.3075669052642566, "learning_rate": 7.133068100194278e-06, "loss": 0.466, "step": 9375 }, { "epoch": 1.5397943054215508, "grad_norm": 0.30933420404936507, "learning_rate": 7.132687776541658e-06, "loss": 0.4629, "step": 9376 }, { "epoch": 1.5399585326298935, "grad_norm": 0.3167400643057456, "learning_rate": 7.132307424295457e-06, "loss": 0.4694, "step": 9377 }, { "epoch": 1.540122759838236, "grad_norm": 0.3330410083602585, "learning_rate": 7.1319270434598095e-06, "loss": 0.4541, "step": 9378 }, { "epoch": 1.540286987046579, "grad_norm": 0.2608004017915481, "learning_rate": 7.131546634038843e-06, "loss": 0.4609, "step": 9379 }, { "epoch": 1.5404512142549218, "grad_norm": 0.32756145155673744, "learning_rate": 7.131166196036692e-06, "loss": 0.4679, "step": 9380 }, { "epoch": 1.5406154414632645, "grad_norm": 0.3215737601475467, "learning_rate": 7.130785729457487e-06, "loss": 0.4712, "step": 9381 }, { "epoch": 1.540779668671607, "grad_norm": 0.34381508752868223, "learning_rate": 7.13040523430536e-06, "loss": 0.4399, "step": 9382 }, { "epoch": 1.54094389587995, "grad_norm": 0.3004485729292794, "learning_rate": 7.1300247105844455e-06, "loss": 0.4567, "step": 9383 }, { "epoch": 1.5411081230882928, "grad_norm": 0.37942435111803424, "learning_rate": 7.1296441582988745e-06, "loss": 0.4677, "step": 9384 }, { "epoch": 1.5412723502966355, "grad_norm": 0.4357473073673098, "learning_rate": 7.129263577452781e-06, "loss": 0.4518, "step": 9385 }, { "epoch": 1.541436577504978, "grad_norm": 0.3087949525062071, "learning_rate": 7.128882968050298e-06, "loss": 0.4724, "step": 9386 }, { "epoch": 1.5416008047133207, "grad_norm": 0.35014647578381863, "learning_rate": 7.128502330095558e-06, "loss": 0.46, "step": 9387 }, { "epoch": 1.5417650319216636, "grad_norm": 0.3146885120825772, "learning_rate": 7.1281216635926985e-06, "loss": 0.4583, "step": 9388 }, { "epoch": 1.5419292591300064, "grad_norm": 0.3136789279242204, "learning_rate": 7.127740968545852e-06, "loss": 0.4843, "step": 9389 }, { "epoch": 1.542093486338349, "grad_norm": 0.5298025260222496, "learning_rate": 7.127360244959151e-06, "loss": 0.4812, "step": 9390 }, { "epoch": 1.5422577135466917, "grad_norm": 0.3844808012012636, "learning_rate": 7.126979492836736e-06, "loss": 0.4544, "step": 9391 }, { "epoch": 1.5424219407550346, "grad_norm": 0.39924791256605835, "learning_rate": 7.126598712182736e-06, "loss": 0.479, "step": 9392 }, { "epoch": 1.5425861679633774, "grad_norm": 0.3602246778936246, "learning_rate": 7.1262179030012925e-06, "loss": 0.4783, "step": 9393 }, { "epoch": 1.54275039517172, "grad_norm": 0.2732202036898396, "learning_rate": 7.1258370652965375e-06, "loss": 0.4785, "step": 9394 }, { "epoch": 1.5429146223800627, "grad_norm": 0.3263140799564156, "learning_rate": 7.125456199072609e-06, "loss": 0.4563, "step": 9395 }, { "epoch": 1.5430788495884056, "grad_norm": 0.3548333323535569, "learning_rate": 7.125075304333642e-06, "loss": 0.4517, "step": 9396 }, { "epoch": 1.5432430767967484, "grad_norm": 0.8067372128015823, "learning_rate": 7.1246943810837745e-06, "loss": 0.453, "step": 9397 }, { "epoch": 1.543407304005091, "grad_norm": 0.26504405806007353, "learning_rate": 7.1243134293271445e-06, "loss": 0.4349, "step": 9398 }, { "epoch": 1.5435715312134337, "grad_norm": 0.45675103704189807, "learning_rate": 7.123932449067888e-06, "loss": 0.4476, "step": 9399 }, { "epoch": 1.5437357584217766, "grad_norm": 0.32637847065326686, "learning_rate": 7.123551440310144e-06, "loss": 0.4862, "step": 9400 }, { "epoch": 1.5438999856301194, "grad_norm": 0.33195374509870645, "learning_rate": 7.1231704030580516e-06, "loss": 0.4596, "step": 9401 }, { "epoch": 1.544064212838462, "grad_norm": 0.255934132190599, "learning_rate": 7.122789337315745e-06, "loss": 0.4678, "step": 9402 }, { "epoch": 1.5442284400468047, "grad_norm": 0.33693985909373964, "learning_rate": 7.122408243087367e-06, "loss": 0.4686, "step": 9403 }, { "epoch": 1.5443926672551473, "grad_norm": 0.2996963732838922, "learning_rate": 7.122027120377055e-06, "loss": 0.4839, "step": 9404 }, { "epoch": 1.5445568944634902, "grad_norm": 0.2773729203223255, "learning_rate": 7.12164596918895e-06, "loss": 0.4862, "step": 9405 }, { "epoch": 1.544721121671833, "grad_norm": 0.25544315255098893, "learning_rate": 7.121264789527189e-06, "loss": 0.4691, "step": 9406 }, { "epoch": 1.5448853488801757, "grad_norm": 0.3592750822464238, "learning_rate": 7.120883581395914e-06, "loss": 0.4467, "step": 9407 }, { "epoch": 1.5450495760885183, "grad_norm": 0.28607035669715075, "learning_rate": 7.120502344799264e-06, "loss": 0.4544, "step": 9408 }, { "epoch": 1.5452138032968612, "grad_norm": 0.25531993325233804, "learning_rate": 7.120121079741381e-06, "loss": 0.4449, "step": 9409 }, { "epoch": 1.545378030505204, "grad_norm": 0.5343782428139733, "learning_rate": 7.119739786226406e-06, "loss": 0.473, "step": 9410 }, { "epoch": 1.5455422577135467, "grad_norm": 0.2864075940659775, "learning_rate": 7.119358464258478e-06, "loss": 0.4694, "step": 9411 }, { "epoch": 1.5457064849218893, "grad_norm": 0.2715392085229134, "learning_rate": 7.118977113841741e-06, "loss": 0.4486, "step": 9412 }, { "epoch": 1.5458707121302322, "grad_norm": 0.3593589381329291, "learning_rate": 7.1185957349803355e-06, "loss": 0.4578, "step": 9413 }, { "epoch": 1.546034939338575, "grad_norm": 0.3399272636588334, "learning_rate": 7.118214327678404e-06, "loss": 0.4822, "step": 9414 }, { "epoch": 1.5461991665469177, "grad_norm": 0.3378526292093369, "learning_rate": 7.1178328919400895e-06, "loss": 0.459, "step": 9415 }, { "epoch": 1.5463633937552603, "grad_norm": 0.29955207988397575, "learning_rate": 7.117451427769532e-06, "loss": 0.4605, "step": 9416 }, { "epoch": 1.5465276209636032, "grad_norm": 0.31826406899690846, "learning_rate": 7.117069935170879e-06, "loss": 0.4502, "step": 9417 }, { "epoch": 1.546691848171946, "grad_norm": 0.3419910247342894, "learning_rate": 7.11668841414827e-06, "loss": 0.481, "step": 9418 }, { "epoch": 1.5468560753802887, "grad_norm": 0.3465516444944875, "learning_rate": 7.1163068647058515e-06, "loss": 0.4589, "step": 9419 }, { "epoch": 1.5470203025886313, "grad_norm": 0.3299902597621926, "learning_rate": 7.115925286847767e-06, "loss": 0.4424, "step": 9420 }, { "epoch": 1.547184529796974, "grad_norm": 0.30635057158537654, "learning_rate": 7.115543680578159e-06, "loss": 0.4842, "step": 9421 }, { "epoch": 1.5473487570053168, "grad_norm": 0.34957654661682797, "learning_rate": 7.115162045901173e-06, "loss": 0.4825, "step": 9422 }, { "epoch": 1.5475129842136597, "grad_norm": 0.2953423418282763, "learning_rate": 7.114780382820955e-06, "loss": 0.4719, "step": 9423 }, { "epoch": 1.5476772114220023, "grad_norm": 0.3780817633359112, "learning_rate": 7.1143986913416495e-06, "loss": 0.4778, "step": 9424 }, { "epoch": 1.547841438630345, "grad_norm": 0.34843838102503016, "learning_rate": 7.114016971467401e-06, "loss": 0.4526, "step": 9425 }, { "epoch": 1.5480056658386878, "grad_norm": 0.355449539427888, "learning_rate": 7.113635223202358e-06, "loss": 0.4825, "step": 9426 }, { "epoch": 1.5481698930470307, "grad_norm": 0.3234611083942094, "learning_rate": 7.113253446550665e-06, "loss": 0.4869, "step": 9427 }, { "epoch": 1.5483341202553733, "grad_norm": 0.307306483199121, "learning_rate": 7.112871641516466e-06, "loss": 0.4694, "step": 9428 }, { "epoch": 1.548498347463716, "grad_norm": 0.30794715743718676, "learning_rate": 7.112489808103912e-06, "loss": 0.4873, "step": 9429 }, { "epoch": 1.5486625746720588, "grad_norm": 0.47467914042047893, "learning_rate": 7.1121079463171485e-06, "loss": 0.475, "step": 9430 }, { "epoch": 1.5488268018804017, "grad_norm": 0.29763731599021, "learning_rate": 7.111726056160322e-06, "loss": 0.4722, "step": 9431 }, { "epoch": 1.5489910290887443, "grad_norm": 0.5091962443448468, "learning_rate": 7.1113441376375815e-06, "loss": 0.4707, "step": 9432 }, { "epoch": 1.549155256297087, "grad_norm": 0.37582684669527816, "learning_rate": 7.110962190753074e-06, "loss": 0.4578, "step": 9433 }, { "epoch": 1.5493194835054298, "grad_norm": 0.28575068074639237, "learning_rate": 7.110580215510948e-06, "loss": 0.4823, "step": 9434 }, { "epoch": 1.5494837107137727, "grad_norm": 0.2629592017210659, "learning_rate": 7.1101982119153535e-06, "loss": 0.4744, "step": 9435 }, { "epoch": 1.5496479379221153, "grad_norm": 0.30478574127256014, "learning_rate": 7.109816179970438e-06, "loss": 0.4651, "step": 9436 }, { "epoch": 1.549812165130458, "grad_norm": 0.30396636516802095, "learning_rate": 7.10943411968035e-06, "loss": 0.4578, "step": 9437 }, { "epoch": 1.5499763923388006, "grad_norm": 0.34806655997770647, "learning_rate": 7.109052031049241e-06, "loss": 0.451, "step": 9438 }, { "epoch": 1.5501406195471434, "grad_norm": 0.3268077745593211, "learning_rate": 7.108669914081259e-06, "loss": 0.4668, "step": 9439 }, { "epoch": 1.5503048467554863, "grad_norm": 0.34825895403338536, "learning_rate": 7.108287768780558e-06, "loss": 0.4656, "step": 9440 }, { "epoch": 1.550469073963829, "grad_norm": 0.34054761958330737, "learning_rate": 7.107905595151283e-06, "loss": 0.4579, "step": 9441 }, { "epoch": 1.5506333011721716, "grad_norm": 0.32724562169367966, "learning_rate": 7.107523393197588e-06, "loss": 0.451, "step": 9442 }, { "epoch": 1.5507975283805144, "grad_norm": 0.30315162837296666, "learning_rate": 7.107141162923624e-06, "loss": 0.4703, "step": 9443 }, { "epoch": 1.5509617555888573, "grad_norm": 0.3294099327301469, "learning_rate": 7.1067589043335415e-06, "loss": 0.4831, "step": 9444 }, { "epoch": 1.5511259827972, "grad_norm": 0.3131121838015957, "learning_rate": 7.106376617431493e-06, "loss": 0.4784, "step": 9445 }, { "epoch": 1.5512902100055426, "grad_norm": 0.4850003427166011, "learning_rate": 7.105994302221629e-06, "loss": 0.4584, "step": 9446 }, { "epoch": 1.5514544372138854, "grad_norm": 0.2824536374442186, "learning_rate": 7.105611958708103e-06, "loss": 0.4684, "step": 9447 }, { "epoch": 1.5516186644222283, "grad_norm": 0.26487668424818395, "learning_rate": 7.105229586895069e-06, "loss": 0.4706, "step": 9448 }, { "epoch": 1.551782891630571, "grad_norm": 0.30186016930036735, "learning_rate": 7.104847186786678e-06, "loss": 0.4763, "step": 9449 }, { "epoch": 1.5519471188389136, "grad_norm": 0.2944963864288857, "learning_rate": 7.104464758387083e-06, "loss": 0.4645, "step": 9450 }, { "epoch": 1.5521113460472564, "grad_norm": 0.34167036610371465, "learning_rate": 7.104082301700439e-06, "loss": 0.4659, "step": 9451 }, { "epoch": 1.5522755732555993, "grad_norm": 0.29168994835833856, "learning_rate": 7.103699816730898e-06, "loss": 0.4683, "step": 9452 }, { "epoch": 1.552439800463942, "grad_norm": 0.31947429820977025, "learning_rate": 7.1033173034826165e-06, "loss": 0.4653, "step": 9453 }, { "epoch": 1.5526040276722846, "grad_norm": 0.3599259614381269, "learning_rate": 7.102934761959746e-06, "loss": 0.4491, "step": 9454 }, { "epoch": 1.5527682548806272, "grad_norm": 0.3837674128150548, "learning_rate": 7.102552192166445e-06, "loss": 0.4579, "step": 9455 }, { "epoch": 1.55293248208897, "grad_norm": 0.5835016569566674, "learning_rate": 7.102169594106867e-06, "loss": 0.4753, "step": 9456 }, { "epoch": 1.553096709297313, "grad_norm": 0.35557963697118555, "learning_rate": 7.101786967785166e-06, "loss": 0.4752, "step": 9457 }, { "epoch": 1.5532609365056556, "grad_norm": 0.36659569428207733, "learning_rate": 7.101404313205499e-06, "loss": 0.4628, "step": 9458 }, { "epoch": 1.5534251637139982, "grad_norm": 0.3163237320621313, "learning_rate": 7.101021630372021e-06, "loss": 0.4731, "step": 9459 }, { "epoch": 1.553589390922341, "grad_norm": 0.2771310527959539, "learning_rate": 7.1006389192888896e-06, "loss": 0.4713, "step": 9460 }, { "epoch": 1.553753618130684, "grad_norm": 0.305105596101068, "learning_rate": 7.100256179960261e-06, "loss": 0.4741, "step": 9461 }, { "epoch": 1.5539178453390265, "grad_norm": 0.31147161003315604, "learning_rate": 7.099873412390292e-06, "loss": 0.4736, "step": 9462 }, { "epoch": 1.5540820725473692, "grad_norm": 0.2704854712401249, "learning_rate": 7.099490616583138e-06, "loss": 0.4605, "step": 9463 }, { "epoch": 1.554246299755712, "grad_norm": 0.468442175046904, "learning_rate": 7.099107792542962e-06, "loss": 0.4638, "step": 9464 }, { "epoch": 1.554410526964055, "grad_norm": 0.2761597581553079, "learning_rate": 7.098724940273914e-06, "loss": 0.4456, "step": 9465 }, { "epoch": 1.5545747541723975, "grad_norm": 0.3244344994528494, "learning_rate": 7.098342059780159e-06, "loss": 0.4405, "step": 9466 }, { "epoch": 1.5547389813807402, "grad_norm": 0.4858686845931506, "learning_rate": 7.097959151065853e-06, "loss": 0.4572, "step": 9467 }, { "epoch": 1.554903208589083, "grad_norm": 0.24869488822776611, "learning_rate": 7.097576214135153e-06, "loss": 0.457, "step": 9468 }, { "epoch": 1.555067435797426, "grad_norm": 0.3540991970589224, "learning_rate": 7.097193248992222e-06, "loss": 0.4747, "step": 9469 }, { "epoch": 1.5552316630057685, "grad_norm": 0.3304816779100368, "learning_rate": 7.096810255641214e-06, "loss": 0.449, "step": 9470 }, { "epoch": 1.5553958902141112, "grad_norm": 0.38004786342989794, "learning_rate": 7.096427234086294e-06, "loss": 0.4592, "step": 9471 }, { "epoch": 1.5555601174224538, "grad_norm": 0.27383675654332823, "learning_rate": 7.096044184331619e-06, "loss": 0.4678, "step": 9472 }, { "epoch": 1.5557243446307967, "grad_norm": 0.3790453414043705, "learning_rate": 7.09566110638135e-06, "loss": 0.4699, "step": 9473 }, { "epoch": 1.5558885718391395, "grad_norm": 0.38324765092279667, "learning_rate": 7.095278000239646e-06, "loss": 0.4563, "step": 9474 }, { "epoch": 1.5560527990474822, "grad_norm": 0.33095846652110594, "learning_rate": 7.094894865910672e-06, "loss": 0.4408, "step": 9475 }, { "epoch": 1.5562170262558248, "grad_norm": 0.3003482283168898, "learning_rate": 7.094511703398586e-06, "loss": 0.448, "step": 9476 }, { "epoch": 1.5563812534641677, "grad_norm": 0.2829919076745514, "learning_rate": 7.094128512707549e-06, "loss": 0.4416, "step": 9477 }, { "epoch": 1.5565454806725105, "grad_norm": 0.4384164662797856, "learning_rate": 7.093745293841726e-06, "loss": 0.4616, "step": 9478 }, { "epoch": 1.5567097078808532, "grad_norm": 0.31266180818887007, "learning_rate": 7.093362046805277e-06, "loss": 0.46, "step": 9479 }, { "epoch": 1.5568739350891958, "grad_norm": 0.27640585641465, "learning_rate": 7.092978771602363e-06, "loss": 0.4869, "step": 9480 }, { "epoch": 1.5570381622975387, "grad_norm": 0.3111860240242668, "learning_rate": 7.0925954682371495e-06, "loss": 0.4859, "step": 9481 }, { "epoch": 1.5572023895058815, "grad_norm": 0.26418192548126807, "learning_rate": 7.092212136713798e-06, "loss": 0.4423, "step": 9482 }, { "epoch": 1.5573666167142242, "grad_norm": 0.32839296991893036, "learning_rate": 7.0918287770364725e-06, "loss": 0.4608, "step": 9483 }, { "epoch": 1.5575308439225668, "grad_norm": 0.29188899721436623, "learning_rate": 7.091445389209336e-06, "loss": 0.4668, "step": 9484 }, { "epoch": 1.5576950711309097, "grad_norm": 0.7384461938939119, "learning_rate": 7.091061973236555e-06, "loss": 0.4934, "step": 9485 }, { "epoch": 1.5578592983392525, "grad_norm": 0.325973263250149, "learning_rate": 7.0906785291222905e-06, "loss": 0.4679, "step": 9486 }, { "epoch": 1.5580235255475952, "grad_norm": 0.4237636097354834, "learning_rate": 7.090295056870706e-06, "loss": 0.447, "step": 9487 }, { "epoch": 1.5581877527559378, "grad_norm": 0.27640337167974977, "learning_rate": 7.089911556485971e-06, "loss": 0.4606, "step": 9488 }, { "epoch": 1.5583519799642804, "grad_norm": 0.2826604533459565, "learning_rate": 7.08952802797225e-06, "loss": 0.4604, "step": 9489 }, { "epoch": 1.5585162071726233, "grad_norm": 0.33442396415131775, "learning_rate": 7.089144471333703e-06, "loss": 0.4526, "step": 9490 }, { "epoch": 1.5586804343809662, "grad_norm": 0.3254632769404679, "learning_rate": 7.088760886574502e-06, "loss": 0.4542, "step": 9491 }, { "epoch": 1.5588446615893088, "grad_norm": 0.3473371450325595, "learning_rate": 7.08837727369881e-06, "loss": 0.4719, "step": 9492 }, { "epoch": 1.5590088887976514, "grad_norm": 0.3354920157988492, "learning_rate": 7.087993632710796e-06, "loss": 0.4702, "step": 9493 }, { "epoch": 1.5591731160059943, "grad_norm": 0.30499776435563025, "learning_rate": 7.087609963614622e-06, "loss": 0.4514, "step": 9494 }, { "epoch": 1.5593373432143371, "grad_norm": 0.3328412721849873, "learning_rate": 7.08722626641446e-06, "loss": 0.4576, "step": 9495 }, { "epoch": 1.5595015704226798, "grad_norm": 0.31693393803170694, "learning_rate": 7.086842541114474e-06, "loss": 0.4571, "step": 9496 }, { "epoch": 1.5596657976310224, "grad_norm": 0.35129703393382067, "learning_rate": 7.086458787718834e-06, "loss": 0.4543, "step": 9497 }, { "epoch": 1.5598300248393653, "grad_norm": 0.294075165507447, "learning_rate": 7.086075006231707e-06, "loss": 0.4705, "step": 9498 }, { "epoch": 1.5599942520477081, "grad_norm": 0.3011262860718532, "learning_rate": 7.085691196657259e-06, "loss": 0.464, "step": 9499 }, { "epoch": 1.5601584792560508, "grad_norm": 0.2934159176282551, "learning_rate": 7.085307358999662e-06, "loss": 0.4623, "step": 9500 }, { "epoch": 1.5603227064643934, "grad_norm": 0.31840048492371464, "learning_rate": 7.084923493263085e-06, "loss": 0.435, "step": 9501 }, { "epoch": 1.5604869336727363, "grad_norm": 0.38778892493126904, "learning_rate": 7.084539599451693e-06, "loss": 0.4533, "step": 9502 }, { "epoch": 1.5606511608810791, "grad_norm": 0.3121985866882979, "learning_rate": 7.084155677569659e-06, "loss": 0.476, "step": 9503 }, { "epoch": 1.5608153880894218, "grad_norm": 0.4007484244591844, "learning_rate": 7.0837717276211524e-06, "loss": 0.4822, "step": 9504 }, { "epoch": 1.5609796152977644, "grad_norm": 0.3096911825413162, "learning_rate": 7.0833877496103425e-06, "loss": 0.4582, "step": 9505 }, { "epoch": 1.561143842506107, "grad_norm": 0.3039014707731322, "learning_rate": 7.0830037435414e-06, "loss": 0.4361, "step": 9506 }, { "epoch": 1.56130806971445, "grad_norm": 0.3185728949963143, "learning_rate": 7.082619709418496e-06, "loss": 0.4464, "step": 9507 }, { "epoch": 1.5614722969227928, "grad_norm": 0.29381394262617466, "learning_rate": 7.0822356472458e-06, "loss": 0.4765, "step": 9508 }, { "epoch": 1.5616365241311354, "grad_norm": 0.30664391728474316, "learning_rate": 7.081851557027485e-06, "loss": 0.4621, "step": 9509 }, { "epoch": 1.561800751339478, "grad_norm": 0.5913865040452077, "learning_rate": 7.0814674387677216e-06, "loss": 0.4729, "step": 9510 }, { "epoch": 1.561964978547821, "grad_norm": 0.31667754417883187, "learning_rate": 7.081083292470681e-06, "loss": 0.4783, "step": 9511 }, { "epoch": 1.5621292057561638, "grad_norm": 0.3476930914201371, "learning_rate": 7.080699118140538e-06, "loss": 0.4527, "step": 9512 }, { "epoch": 1.5622934329645064, "grad_norm": 0.31491112655895354, "learning_rate": 7.080314915781463e-06, "loss": 0.462, "step": 9513 }, { "epoch": 1.562457660172849, "grad_norm": 0.3392259237310825, "learning_rate": 7.07993068539763e-06, "loss": 0.477, "step": 9514 }, { "epoch": 1.562621887381192, "grad_norm": 0.34770909353808194, "learning_rate": 7.079546426993211e-06, "loss": 0.4507, "step": 9515 }, { "epoch": 1.5627861145895348, "grad_norm": 0.31253636621680053, "learning_rate": 7.0791621405723785e-06, "loss": 0.4686, "step": 9516 }, { "epoch": 1.5629503417978774, "grad_norm": 0.319327369627498, "learning_rate": 7.078777826139309e-06, "loss": 0.4569, "step": 9517 }, { "epoch": 1.56311456900622, "grad_norm": 0.30529715870836194, "learning_rate": 7.078393483698175e-06, "loss": 0.453, "step": 9518 }, { "epoch": 1.563278796214563, "grad_norm": 0.265733050448355, "learning_rate": 7.078009113253151e-06, "loss": 0.4722, "step": 9519 }, { "epoch": 1.5634430234229058, "grad_norm": 0.31408912922552107, "learning_rate": 7.077624714808411e-06, "loss": 0.4641, "step": 9520 }, { "epoch": 1.5636072506312484, "grad_norm": 0.3132960285785013, "learning_rate": 7.077240288368131e-06, "loss": 0.454, "step": 9521 }, { "epoch": 1.563771477839591, "grad_norm": 0.3063188137151771, "learning_rate": 7.076855833936486e-06, "loss": 0.4662, "step": 9522 }, { "epoch": 1.5639357050479337, "grad_norm": 0.2720545580356203, "learning_rate": 7.0764713515176505e-06, "loss": 0.4629, "step": 9523 }, { "epoch": 1.5640999322562765, "grad_norm": 0.2956064257938657, "learning_rate": 7.076086841115802e-06, "loss": 0.4465, "step": 9524 }, { "epoch": 1.5642641594646194, "grad_norm": 0.2800826541100162, "learning_rate": 7.075702302735116e-06, "loss": 0.4575, "step": 9525 }, { "epoch": 1.564428386672962, "grad_norm": 0.41341271140566976, "learning_rate": 7.075317736379768e-06, "loss": 0.4594, "step": 9526 }, { "epoch": 1.5645926138813047, "grad_norm": 0.34150965202806, "learning_rate": 7.0749331420539365e-06, "loss": 0.4561, "step": 9527 }, { "epoch": 1.5647568410896475, "grad_norm": 0.24492469743953443, "learning_rate": 7.074548519761797e-06, "loss": 0.4475, "step": 9528 }, { "epoch": 1.5649210682979904, "grad_norm": 0.34562330583944856, "learning_rate": 7.074163869507528e-06, "loss": 0.472, "step": 9529 }, { "epoch": 1.565085295506333, "grad_norm": 0.292325354044597, "learning_rate": 7.073779191295306e-06, "loss": 0.4573, "step": 9530 }, { "epoch": 1.5652495227146757, "grad_norm": 0.4229715578432917, "learning_rate": 7.0733944851293116e-06, "loss": 0.4577, "step": 9531 }, { "epoch": 1.5654137499230185, "grad_norm": 0.32096121679499595, "learning_rate": 7.0730097510137204e-06, "loss": 0.4503, "step": 9532 }, { "epoch": 1.5655779771313614, "grad_norm": 0.27339951728229317, "learning_rate": 7.072624988952711e-06, "loss": 0.4557, "step": 9533 }, { "epoch": 1.565742204339704, "grad_norm": 0.30943985914503824, "learning_rate": 7.0722401989504634e-06, "loss": 0.4557, "step": 9534 }, { "epoch": 1.5659064315480467, "grad_norm": 0.33863811933806, "learning_rate": 7.071855381011157e-06, "loss": 0.4532, "step": 9535 }, { "epoch": 1.5660706587563895, "grad_norm": 0.32722389646224836, "learning_rate": 7.071470535138969e-06, "loss": 0.4534, "step": 9536 }, { "epoch": 1.5662348859647324, "grad_norm": 0.3616589600678771, "learning_rate": 7.071085661338083e-06, "loss": 0.4596, "step": 9537 }, { "epoch": 1.566399113173075, "grad_norm": 0.290065563681788, "learning_rate": 7.070700759612676e-06, "loss": 0.4419, "step": 9538 }, { "epoch": 1.5665633403814176, "grad_norm": 0.3681681484784924, "learning_rate": 7.070315829966932e-06, "loss": 0.4674, "step": 9539 }, { "epoch": 1.5667275675897603, "grad_norm": 0.29433431026892365, "learning_rate": 7.069930872405026e-06, "loss": 0.4681, "step": 9540 }, { "epoch": 1.5668917947981031, "grad_norm": 0.28165471409438464, "learning_rate": 7.069545886931144e-06, "loss": 0.4381, "step": 9541 }, { "epoch": 1.567056022006446, "grad_norm": 0.2646752230437082, "learning_rate": 7.069160873549464e-06, "loss": 0.4711, "step": 9542 }, { "epoch": 1.5672202492147886, "grad_norm": 0.29628517431852486, "learning_rate": 7.068775832264172e-06, "loss": 0.4607, "step": 9543 }, { "epoch": 1.5673844764231313, "grad_norm": 0.44146222206987495, "learning_rate": 7.068390763079445e-06, "loss": 0.4721, "step": 9544 }, { "epoch": 1.5675487036314741, "grad_norm": 0.3452702849381547, "learning_rate": 7.068005665999467e-06, "loss": 0.4667, "step": 9545 }, { "epoch": 1.567712930839817, "grad_norm": 0.32130074805891345, "learning_rate": 7.067620541028422e-06, "loss": 0.4643, "step": 9546 }, { "epoch": 1.5678771580481596, "grad_norm": 0.2856839609238298, "learning_rate": 7.0672353881704915e-06, "loss": 0.45, "step": 9547 }, { "epoch": 1.5680413852565023, "grad_norm": 0.41361729342399817, "learning_rate": 7.066850207429859e-06, "loss": 0.457, "step": 9548 }, { "epoch": 1.5682056124648451, "grad_norm": 0.4783024490657136, "learning_rate": 7.066464998810707e-06, "loss": 0.4673, "step": 9549 }, { "epoch": 1.568369839673188, "grad_norm": 0.26854722532828895, "learning_rate": 7.066079762317221e-06, "loss": 0.4533, "step": 9550 }, { "epoch": 1.5685340668815306, "grad_norm": 0.3243123197398592, "learning_rate": 7.0656944979535836e-06, "loss": 0.4703, "step": 9551 }, { "epoch": 1.5686982940898733, "grad_norm": 0.31882320116029533, "learning_rate": 7.06530920572398e-06, "loss": 0.4334, "step": 9552 }, { "epoch": 1.5688625212982161, "grad_norm": 0.3153813981363007, "learning_rate": 7.064923885632595e-06, "loss": 0.4629, "step": 9553 }, { "epoch": 1.569026748506559, "grad_norm": 0.32022797273174436, "learning_rate": 7.064538537683612e-06, "loss": 0.4655, "step": 9554 }, { "epoch": 1.5691909757149016, "grad_norm": 0.3000384583865678, "learning_rate": 7.0641531618812165e-06, "loss": 0.4616, "step": 9555 }, { "epoch": 1.5693552029232443, "grad_norm": 0.3007245952874813, "learning_rate": 7.063767758229597e-06, "loss": 0.458, "step": 9556 }, { "epoch": 1.569519430131587, "grad_norm": 0.3370373862995186, "learning_rate": 7.063382326732936e-06, "loss": 0.4564, "step": 9557 }, { "epoch": 1.5696836573399298, "grad_norm": 0.2668854475743722, "learning_rate": 7.062996867395421e-06, "loss": 0.4637, "step": 9558 }, { "epoch": 1.5698478845482726, "grad_norm": 0.32082269614047243, "learning_rate": 7.062611380221239e-06, "loss": 0.4557, "step": 9559 }, { "epoch": 1.5700121117566153, "grad_norm": 0.2642372498332476, "learning_rate": 7.0622258652145755e-06, "loss": 0.459, "step": 9560 }, { "epoch": 1.570176338964958, "grad_norm": 0.32119739559651794, "learning_rate": 7.061840322379618e-06, "loss": 0.4826, "step": 9561 }, { "epoch": 1.5703405661733008, "grad_norm": 0.33573520572974747, "learning_rate": 7.061454751720556e-06, "loss": 0.4773, "step": 9562 }, { "epoch": 1.5705047933816436, "grad_norm": 0.3727906802239041, "learning_rate": 7.061069153241572e-06, "loss": 0.467, "step": 9563 }, { "epoch": 1.5706690205899863, "grad_norm": 0.25635954624939483, "learning_rate": 7.06068352694686e-06, "loss": 0.4725, "step": 9564 }, { "epoch": 1.570833247798329, "grad_norm": 0.3569861822095909, "learning_rate": 7.060297872840604e-06, "loss": 0.4674, "step": 9565 }, { "epoch": 1.5709974750066718, "grad_norm": 0.4167333878551979, "learning_rate": 7.059912190926995e-06, "loss": 0.464, "step": 9566 }, { "epoch": 1.5711617022150146, "grad_norm": 0.2988223076469179, "learning_rate": 7.05952648121022e-06, "loss": 0.4883, "step": 9567 }, { "epoch": 1.5713259294233572, "grad_norm": 0.2830256329297102, "learning_rate": 7.059140743694471e-06, "loss": 0.4476, "step": 9568 }, { "epoch": 1.5714901566316999, "grad_norm": 0.372258375978891, "learning_rate": 7.058754978383934e-06, "loss": 0.4893, "step": 9569 }, { "epoch": 1.5716543838400427, "grad_norm": 0.2654171969672735, "learning_rate": 7.0583691852828e-06, "loss": 0.4525, "step": 9570 }, { "epoch": 1.5718186110483856, "grad_norm": 0.280300815255278, "learning_rate": 7.0579833643952605e-06, "loss": 0.455, "step": 9571 }, { "epoch": 1.5719828382567282, "grad_norm": 0.5008140822399365, "learning_rate": 7.0575975157255044e-06, "loss": 0.4672, "step": 9572 }, { "epoch": 1.5721470654650709, "grad_norm": 0.31955287284252826, "learning_rate": 7.057211639277725e-06, "loss": 0.477, "step": 9573 }, { "epoch": 1.5723112926734135, "grad_norm": 0.3489108249379957, "learning_rate": 7.05682573505611e-06, "loss": 0.4773, "step": 9574 }, { "epoch": 1.5724755198817564, "grad_norm": 0.35593903885934475, "learning_rate": 7.056439803064851e-06, "loss": 0.4744, "step": 9575 }, { "epoch": 1.5726397470900992, "grad_norm": 0.28457851992077404, "learning_rate": 7.056053843308141e-06, "loss": 0.4511, "step": 9576 }, { "epoch": 1.5728039742984419, "grad_norm": 0.27240800898902373, "learning_rate": 7.0556678557901725e-06, "loss": 0.4603, "step": 9577 }, { "epoch": 1.5729682015067845, "grad_norm": 0.2727708545101182, "learning_rate": 7.055281840515136e-06, "loss": 0.4814, "step": 9578 }, { "epoch": 1.5731324287151274, "grad_norm": 0.31236355231569163, "learning_rate": 7.054895797487223e-06, "loss": 0.4415, "step": 9579 }, { "epoch": 1.5732966559234702, "grad_norm": 0.27149555085526944, "learning_rate": 7.054509726710629e-06, "loss": 0.4602, "step": 9580 }, { "epoch": 1.5734608831318129, "grad_norm": 0.26192212609758236, "learning_rate": 7.054123628189548e-06, "loss": 0.473, "step": 9581 }, { "epoch": 1.5736251103401555, "grad_norm": 0.2604095204435766, "learning_rate": 7.0537375019281705e-06, "loss": 0.4786, "step": 9582 }, { "epoch": 1.5737893375484984, "grad_norm": 0.3173000671655432, "learning_rate": 7.05335134793069e-06, "loss": 0.4431, "step": 9583 }, { "epoch": 1.5739535647568412, "grad_norm": 0.4249155458956311, "learning_rate": 7.052965166201301e-06, "loss": 0.4422, "step": 9584 }, { "epoch": 1.5741177919651839, "grad_norm": 0.326714175509031, "learning_rate": 7.052578956744201e-06, "loss": 0.4533, "step": 9585 }, { "epoch": 1.5742820191735265, "grad_norm": 0.30076342807026163, "learning_rate": 7.052192719563582e-06, "loss": 0.4646, "step": 9586 }, { "epoch": 1.5744462463818694, "grad_norm": 0.5574470434140718, "learning_rate": 7.0518064546636375e-06, "loss": 0.4785, "step": 9587 }, { "epoch": 1.5746104735902122, "grad_norm": 0.3899021496848957, "learning_rate": 7.051420162048565e-06, "loss": 0.4645, "step": 9588 }, { "epoch": 1.5747747007985549, "grad_norm": 0.5009365869861512, "learning_rate": 7.051033841722559e-06, "loss": 0.4616, "step": 9589 }, { "epoch": 1.5749389280068975, "grad_norm": 0.3026411124363173, "learning_rate": 7.050647493689816e-06, "loss": 0.4617, "step": 9590 }, { "epoch": 1.5751031552152401, "grad_norm": 0.29061469419822633, "learning_rate": 7.050261117954531e-06, "loss": 0.4668, "step": 9591 }, { "epoch": 1.575267382423583, "grad_norm": 0.29963184439358226, "learning_rate": 7.049874714520902e-06, "loss": 0.4807, "step": 9592 }, { "epoch": 1.5754316096319259, "grad_norm": 0.32685838124755867, "learning_rate": 7.049488283393124e-06, "loss": 0.4554, "step": 9593 }, { "epoch": 1.5755958368402685, "grad_norm": 0.3098084135045518, "learning_rate": 7.049101824575395e-06, "loss": 0.4631, "step": 9594 }, { "epoch": 1.5757600640486111, "grad_norm": 0.5182353120987377, "learning_rate": 7.048715338071913e-06, "loss": 0.4556, "step": 9595 }, { "epoch": 1.575924291256954, "grad_norm": 0.33384098807841417, "learning_rate": 7.048328823886873e-06, "loss": 0.4752, "step": 9596 }, { "epoch": 1.5760885184652969, "grad_norm": 0.5106129343711809, "learning_rate": 7.047942282024477e-06, "loss": 0.4458, "step": 9597 }, { "epoch": 1.5762527456736395, "grad_norm": 0.2779558813719126, "learning_rate": 7.047555712488921e-06, "loss": 0.4627, "step": 9598 }, { "epoch": 1.5764169728819821, "grad_norm": 0.3036165546910705, "learning_rate": 7.047169115284401e-06, "loss": 0.4542, "step": 9599 }, { "epoch": 1.576581200090325, "grad_norm": 0.2813306295945631, "learning_rate": 7.0467824904151205e-06, "loss": 0.4574, "step": 9600 }, { "epoch": 1.5767454272986678, "grad_norm": 0.3756631418000488, "learning_rate": 7.046395837885276e-06, "loss": 0.4665, "step": 9601 }, { "epoch": 1.5769096545070105, "grad_norm": 0.3014834902402672, "learning_rate": 7.046009157699068e-06, "loss": 0.4686, "step": 9602 }, { "epoch": 1.5770738817153531, "grad_norm": 0.37031689284883956, "learning_rate": 7.045622449860695e-06, "loss": 0.438, "step": 9603 }, { "epoch": 1.577238108923696, "grad_norm": 0.5091529417201783, "learning_rate": 7.045235714374358e-06, "loss": 0.4596, "step": 9604 }, { "epoch": 1.5774023361320388, "grad_norm": 0.316980806826266, "learning_rate": 7.044848951244255e-06, "loss": 0.4717, "step": 9605 }, { "epoch": 1.5775665633403815, "grad_norm": 0.27563114539777, "learning_rate": 7.04446216047459e-06, "loss": 0.4652, "step": 9606 }, { "epoch": 1.5777307905487241, "grad_norm": 0.3254061465093312, "learning_rate": 7.044075342069563e-06, "loss": 0.4721, "step": 9607 }, { "epoch": 1.5778950177570668, "grad_norm": 0.3070070243971381, "learning_rate": 7.0436884960333746e-06, "loss": 0.4451, "step": 9608 }, { "epoch": 1.5780592449654096, "grad_norm": 0.3046083263392374, "learning_rate": 7.0433016223702255e-06, "loss": 0.4646, "step": 9609 }, { "epoch": 1.5782234721737525, "grad_norm": 0.3356082246504402, "learning_rate": 7.04291472108432e-06, "loss": 0.4773, "step": 9610 }, { "epoch": 1.578387699382095, "grad_norm": 0.3154556445315861, "learning_rate": 7.042527792179857e-06, "loss": 0.4668, "step": 9611 }, { "epoch": 1.5785519265904377, "grad_norm": 0.34093873199245545, "learning_rate": 7.042140835661042e-06, "loss": 0.4478, "step": 9612 }, { "epoch": 1.5787161537987806, "grad_norm": 0.4638435804483628, "learning_rate": 7.041753851532076e-06, "loss": 0.4545, "step": 9613 }, { "epoch": 1.5788803810071235, "grad_norm": 0.3080869937789505, "learning_rate": 7.041366839797163e-06, "loss": 0.4676, "step": 9614 }, { "epoch": 1.579044608215466, "grad_norm": 0.3208085604236869, "learning_rate": 7.040979800460505e-06, "loss": 0.4618, "step": 9615 }, { "epoch": 1.5792088354238087, "grad_norm": 0.37304515332678506, "learning_rate": 7.040592733526307e-06, "loss": 0.47, "step": 9616 }, { "epoch": 1.5793730626321516, "grad_norm": 0.3489077569804723, "learning_rate": 7.0402056389987716e-06, "loss": 0.4719, "step": 9617 }, { "epoch": 1.5795372898404945, "grad_norm": 0.3204051769035944, "learning_rate": 7.039818516882105e-06, "loss": 0.4643, "step": 9618 }, { "epoch": 1.579701517048837, "grad_norm": 0.33360715457799445, "learning_rate": 7.039431367180509e-06, "loss": 0.4711, "step": 9619 }, { "epoch": 1.5798657442571797, "grad_norm": 0.3188657125688107, "learning_rate": 7.03904418989819e-06, "loss": 0.478, "step": 9620 }, { "epoch": 1.5800299714655226, "grad_norm": 0.3406891273173139, "learning_rate": 7.038656985039353e-06, "loss": 0.464, "step": 9621 }, { "epoch": 1.5801941986738655, "grad_norm": 0.3344841323401284, "learning_rate": 7.038269752608205e-06, "loss": 0.4628, "step": 9622 }, { "epoch": 1.580358425882208, "grad_norm": 0.2962372762879127, "learning_rate": 7.0378824926089485e-06, "loss": 0.4557, "step": 9623 }, { "epoch": 1.5805226530905507, "grad_norm": 0.30877525105222614, "learning_rate": 7.0374952050457925e-06, "loss": 0.4545, "step": 9624 }, { "epoch": 1.5806868802988934, "grad_norm": 0.5755131888295433, "learning_rate": 7.037107889922941e-06, "loss": 0.4472, "step": 9625 }, { "epoch": 1.5808511075072362, "grad_norm": 0.25748170687714833, "learning_rate": 7.036720547244602e-06, "loss": 0.4641, "step": 9626 }, { "epoch": 1.581015334715579, "grad_norm": 0.36057283649245553, "learning_rate": 7.0363331770149826e-06, "loss": 0.468, "step": 9627 }, { "epoch": 1.5811795619239217, "grad_norm": 0.327370587050576, "learning_rate": 7.035945779238288e-06, "loss": 0.4446, "step": 9628 }, { "epoch": 1.5813437891322644, "grad_norm": 0.32141525897584117, "learning_rate": 7.035558353918728e-06, "loss": 0.4408, "step": 9629 }, { "epoch": 1.5815080163406072, "grad_norm": 0.2675636946309375, "learning_rate": 7.035170901060509e-06, "loss": 0.461, "step": 9630 }, { "epoch": 1.58167224354895, "grad_norm": 0.3485598277383404, "learning_rate": 7.034783420667841e-06, "loss": 0.4599, "step": 9631 }, { "epoch": 1.5818364707572927, "grad_norm": 0.40961186549980955, "learning_rate": 7.03439591274493e-06, "loss": 0.4485, "step": 9632 }, { "epoch": 1.5820006979656354, "grad_norm": 0.3150450672652272, "learning_rate": 7.034008377295987e-06, "loss": 0.4458, "step": 9633 }, { "epoch": 1.5821649251739782, "grad_norm": 0.2925616187190679, "learning_rate": 7.033620814325219e-06, "loss": 0.4864, "step": 9634 }, { "epoch": 1.582329152382321, "grad_norm": 0.33606955689744944, "learning_rate": 7.033233223836837e-06, "loss": 0.466, "step": 9635 }, { "epoch": 1.5824933795906637, "grad_norm": 0.4824169072771863, "learning_rate": 7.03284560583505e-06, "loss": 0.4528, "step": 9636 }, { "epoch": 1.5826576067990064, "grad_norm": 0.2906977714611012, "learning_rate": 7.032457960324066e-06, "loss": 0.4537, "step": 9637 }, { "epoch": 1.5828218340073492, "grad_norm": 0.279391411557559, "learning_rate": 7.032070287308099e-06, "loss": 0.4664, "step": 9638 }, { "epoch": 1.582986061215692, "grad_norm": 0.321052045706845, "learning_rate": 7.031682586791356e-06, "loss": 0.4575, "step": 9639 }, { "epoch": 1.5831502884240347, "grad_norm": 0.3399100775023264, "learning_rate": 7.03129485877805e-06, "loss": 0.4729, "step": 9640 }, { "epoch": 1.5833145156323774, "grad_norm": 0.32977031429239967, "learning_rate": 7.030907103272391e-06, "loss": 0.4716, "step": 9641 }, { "epoch": 1.58347874284072, "grad_norm": 0.31242153963448743, "learning_rate": 7.0305193202785905e-06, "loss": 0.473, "step": 9642 }, { "epoch": 1.5836429700490628, "grad_norm": 0.3019615032920058, "learning_rate": 7.030131509800861e-06, "loss": 0.4414, "step": 9643 }, { "epoch": 1.5838071972574057, "grad_norm": 0.3483822784558959, "learning_rate": 7.029743671843415e-06, "loss": 0.4646, "step": 9644 }, { "epoch": 1.5839714244657483, "grad_norm": 0.2991796623781081, "learning_rate": 7.029355806410462e-06, "loss": 0.4656, "step": 9645 }, { "epoch": 1.584135651674091, "grad_norm": 0.36838639847658394, "learning_rate": 7.028967913506217e-06, "loss": 0.4784, "step": 9646 }, { "epoch": 1.5842998788824338, "grad_norm": 0.30208778163975347, "learning_rate": 7.028579993134892e-06, "loss": 0.4827, "step": 9647 }, { "epoch": 1.5844641060907767, "grad_norm": 0.27407402707941875, "learning_rate": 7.028192045300701e-06, "loss": 0.4668, "step": 9648 }, { "epoch": 1.5846283332991193, "grad_norm": 0.2618196125378437, "learning_rate": 7.027804070007858e-06, "loss": 0.4807, "step": 9649 }, { "epoch": 1.584792560507462, "grad_norm": 0.3152951828088361, "learning_rate": 7.027416067260574e-06, "loss": 0.4632, "step": 9650 }, { "epoch": 1.5849567877158048, "grad_norm": 0.3463447699192336, "learning_rate": 7.027028037063066e-06, "loss": 0.45, "step": 9651 }, { "epoch": 1.5851210149241477, "grad_norm": 0.2943307574825071, "learning_rate": 7.0266399794195465e-06, "loss": 0.4598, "step": 9652 }, { "epoch": 1.5852852421324903, "grad_norm": 0.3006986761693188, "learning_rate": 7.026251894334231e-06, "loss": 0.4703, "step": 9653 }, { "epoch": 1.585449469340833, "grad_norm": 0.3068826226953967, "learning_rate": 7.025863781811335e-06, "loss": 0.4569, "step": 9654 }, { "epoch": 1.5856136965491758, "grad_norm": 0.2975266953823899, "learning_rate": 7.025475641855074e-06, "loss": 0.4541, "step": 9655 }, { "epoch": 1.5857779237575187, "grad_norm": 0.3719118805987487, "learning_rate": 7.025087474469661e-06, "loss": 0.4693, "step": 9656 }, { "epoch": 1.5859421509658613, "grad_norm": 0.316004408532983, "learning_rate": 7.024699279659314e-06, "loss": 0.4846, "step": 9657 }, { "epoch": 1.586106378174204, "grad_norm": 0.2750964430209116, "learning_rate": 7.024311057428249e-06, "loss": 0.45, "step": 9658 }, { "epoch": 1.5862706053825466, "grad_norm": 0.25877806352237737, "learning_rate": 7.023922807780682e-06, "loss": 0.4712, "step": 9659 }, { "epoch": 1.5864348325908895, "grad_norm": 0.30726318388169743, "learning_rate": 7.023534530720832e-06, "loss": 0.4505, "step": 9660 }, { "epoch": 1.5865990597992323, "grad_norm": 0.277175420733913, "learning_rate": 7.0231462262529125e-06, "loss": 0.4462, "step": 9661 }, { "epoch": 1.586763287007575, "grad_norm": 0.2480687810862842, "learning_rate": 7.022757894381143e-06, "loss": 0.4578, "step": 9662 }, { "epoch": 1.5869275142159176, "grad_norm": 0.310062585639788, "learning_rate": 7.02236953510974e-06, "loss": 0.4529, "step": 9663 }, { "epoch": 1.5870917414242605, "grad_norm": 0.29278447216754283, "learning_rate": 7.021981148442923e-06, "loss": 0.4479, "step": 9664 }, { "epoch": 1.5872559686326033, "grad_norm": 0.34205414183900085, "learning_rate": 7.0215927343849095e-06, "loss": 0.4567, "step": 9665 }, { "epoch": 1.587420195840946, "grad_norm": 0.30235224417403145, "learning_rate": 7.021204292939917e-06, "loss": 0.4504, "step": 9666 }, { "epoch": 1.5875844230492886, "grad_norm": 0.3204273273921009, "learning_rate": 7.020815824112166e-06, "loss": 0.4555, "step": 9667 }, { "epoch": 1.5877486502576315, "grad_norm": 0.29059833973456584, "learning_rate": 7.020427327905875e-06, "loss": 0.4524, "step": 9668 }, { "epoch": 1.5879128774659743, "grad_norm": 0.312532284678278, "learning_rate": 7.0200388043252635e-06, "loss": 0.4531, "step": 9669 }, { "epoch": 1.588077104674317, "grad_norm": 0.4001305657269944, "learning_rate": 7.019650253374552e-06, "loss": 0.4649, "step": 9670 }, { "epoch": 1.5882413318826596, "grad_norm": 0.29994775751366703, "learning_rate": 7.019261675057958e-06, "loss": 0.454, "step": 9671 }, { "epoch": 1.5884055590910025, "grad_norm": 0.3719043680773999, "learning_rate": 7.018873069379705e-06, "loss": 0.4631, "step": 9672 }, { "epoch": 1.5885697862993453, "grad_norm": 0.2949768760769632, "learning_rate": 7.018484436344012e-06, "loss": 0.4696, "step": 9673 }, { "epoch": 1.588734013507688, "grad_norm": 0.28422629123361914, "learning_rate": 7.018095775955099e-06, "loss": 0.4618, "step": 9674 }, { "epoch": 1.5888982407160306, "grad_norm": 0.27145460449253184, "learning_rate": 7.0177070882171895e-06, "loss": 0.4839, "step": 9675 }, { "epoch": 1.5890624679243732, "grad_norm": 0.3286664513768836, "learning_rate": 7.017318373134504e-06, "loss": 0.4594, "step": 9676 }, { "epoch": 1.589226695132716, "grad_norm": 0.3619168102990762, "learning_rate": 7.0169296307112635e-06, "loss": 0.4444, "step": 9677 }, { "epoch": 1.589390922341059, "grad_norm": 0.28446907903214214, "learning_rate": 7.0165408609516916e-06, "loss": 0.4572, "step": 9678 }, { "epoch": 1.5895551495494016, "grad_norm": 0.352917143530713, "learning_rate": 7.0161520638600105e-06, "loss": 0.4514, "step": 9679 }, { "epoch": 1.5897193767577442, "grad_norm": 0.35002679253958296, "learning_rate": 7.01576323944044e-06, "loss": 0.4588, "step": 9680 }, { "epoch": 1.589883603966087, "grad_norm": 0.32197653074915533, "learning_rate": 7.015374387697208e-06, "loss": 0.4674, "step": 9681 }, { "epoch": 1.59004783117443, "grad_norm": 0.29434146515200116, "learning_rate": 7.014985508634535e-06, "loss": 0.4843, "step": 9682 }, { "epoch": 1.5902120583827726, "grad_norm": 0.32987470643692046, "learning_rate": 7.014596602256644e-06, "loss": 0.4927, "step": 9683 }, { "epoch": 1.5903762855911152, "grad_norm": 0.43707017640769624, "learning_rate": 7.014207668567761e-06, "loss": 0.4806, "step": 9684 }, { "epoch": 1.590540512799458, "grad_norm": 0.37174042084246073, "learning_rate": 7.013818707572109e-06, "loss": 0.483, "step": 9685 }, { "epoch": 1.590704740007801, "grad_norm": 0.29865649575691555, "learning_rate": 7.013429719273912e-06, "loss": 0.4724, "step": 9686 }, { "epoch": 1.5908689672161436, "grad_norm": 0.28389824575492095, "learning_rate": 7.0130407036773955e-06, "loss": 0.4617, "step": 9687 }, { "epoch": 1.5910331944244862, "grad_norm": 0.4856752735113594, "learning_rate": 7.0126516607867845e-06, "loss": 0.4588, "step": 9688 }, { "epoch": 1.591197421632829, "grad_norm": 0.3280719264639045, "learning_rate": 7.012262590606304e-06, "loss": 0.4744, "step": 9689 }, { "epoch": 1.591361648841172, "grad_norm": 0.3017016566131072, "learning_rate": 7.011873493140181e-06, "loss": 0.4561, "step": 9690 }, { "epoch": 1.5915258760495146, "grad_norm": 0.27235999254119797, "learning_rate": 7.011484368392639e-06, "loss": 0.4589, "step": 9691 }, { "epoch": 1.5916901032578572, "grad_norm": 0.293826082990073, "learning_rate": 7.011095216367906e-06, "loss": 0.4538, "step": 9692 }, { "epoch": 1.5918543304661998, "grad_norm": 0.30234293241414956, "learning_rate": 7.010706037070209e-06, "loss": 0.461, "step": 9693 }, { "epoch": 1.5920185576745427, "grad_norm": 0.2807863448579779, "learning_rate": 7.010316830503775e-06, "loss": 0.4579, "step": 9694 }, { "epoch": 1.5921827848828856, "grad_norm": 0.25374035826197255, "learning_rate": 7.009927596672829e-06, "loss": 0.4783, "step": 9695 }, { "epoch": 1.5923470120912282, "grad_norm": 0.28295051020587353, "learning_rate": 7.009538335581601e-06, "loss": 0.4487, "step": 9696 }, { "epoch": 1.5925112392995708, "grad_norm": 0.9402708462138283, "learning_rate": 7.0091490472343165e-06, "loss": 0.4652, "step": 9697 }, { "epoch": 1.5926754665079137, "grad_norm": 0.3089325539254181, "learning_rate": 7.008759731635206e-06, "loss": 0.4736, "step": 9698 }, { "epoch": 1.5928396937162566, "grad_norm": 0.28210806545171774, "learning_rate": 7.008370388788496e-06, "loss": 0.4424, "step": 9699 }, { "epoch": 1.5930039209245992, "grad_norm": 0.33788709073616296, "learning_rate": 7.007981018698415e-06, "loss": 0.4705, "step": 9700 }, { "epoch": 1.5931681481329418, "grad_norm": 0.2810052632243497, "learning_rate": 7.007591621369193e-06, "loss": 0.4618, "step": 9701 }, { "epoch": 1.5933323753412847, "grad_norm": 0.3861673845526481, "learning_rate": 7.00720219680506e-06, "loss": 0.453, "step": 9702 }, { "epoch": 1.5934966025496275, "grad_norm": 0.29160915803694276, "learning_rate": 7.006812745010243e-06, "loss": 0.4733, "step": 9703 }, { "epoch": 1.5936608297579702, "grad_norm": 0.33914569659292165, "learning_rate": 7.006423265988972e-06, "loss": 0.4515, "step": 9704 }, { "epoch": 1.5938250569663128, "grad_norm": 0.2550988195503919, "learning_rate": 7.006033759745481e-06, "loss": 0.4619, "step": 9705 }, { "epoch": 1.5939892841746557, "grad_norm": 0.29945375984337574, "learning_rate": 7.005644226283997e-06, "loss": 0.4713, "step": 9706 }, { "epoch": 1.5941535113829983, "grad_norm": 0.25626086854928126, "learning_rate": 7.005254665608751e-06, "loss": 0.4694, "step": 9707 }, { "epoch": 1.5943177385913412, "grad_norm": 0.2478955917014123, "learning_rate": 7.004865077723974e-06, "loss": 0.4469, "step": 9708 }, { "epoch": 1.5944819657996838, "grad_norm": 0.261073698927775, "learning_rate": 7.0044754626339e-06, "loss": 0.4424, "step": 9709 }, { "epoch": 1.5946461930080265, "grad_norm": 0.29944897321085306, "learning_rate": 7.0040858203427555e-06, "loss": 0.4598, "step": 9710 }, { "epoch": 1.5948104202163693, "grad_norm": 0.3029852969342873, "learning_rate": 7.003696150854777e-06, "loss": 0.4568, "step": 9711 }, { "epoch": 1.5949746474247122, "grad_norm": 0.3545481979767949, "learning_rate": 7.003306454174195e-06, "loss": 0.4739, "step": 9712 }, { "epoch": 1.5951388746330548, "grad_norm": 0.2915600645900402, "learning_rate": 7.002916730305242e-06, "loss": 0.4731, "step": 9713 }, { "epoch": 1.5953031018413975, "grad_norm": 0.30375434938571066, "learning_rate": 7.00252697925215e-06, "loss": 0.4594, "step": 9714 }, { "epoch": 1.5954673290497403, "grad_norm": 0.3858113833425676, "learning_rate": 7.002137201019153e-06, "loss": 0.4711, "step": 9715 }, { "epoch": 1.5956315562580832, "grad_norm": 0.306509544782491, "learning_rate": 7.001747395610485e-06, "loss": 0.4478, "step": 9716 }, { "epoch": 1.5957957834664258, "grad_norm": 0.3747588996266856, "learning_rate": 7.001357563030378e-06, "loss": 0.4668, "step": 9717 }, { "epoch": 1.5959600106747684, "grad_norm": 0.24590682956768997, "learning_rate": 7.000967703283067e-06, "loss": 0.4767, "step": 9718 }, { "epoch": 1.5961242378831113, "grad_norm": 0.389786665880499, "learning_rate": 7.000577816372787e-06, "loss": 0.4499, "step": 9719 }, { "epoch": 1.5962884650914542, "grad_norm": 0.3980182584891584, "learning_rate": 7.0001879023037704e-06, "loss": 0.4691, "step": 9720 }, { "epoch": 1.5964526922997968, "grad_norm": 1.5256900334767118, "learning_rate": 6.999797961080255e-06, "loss": 0.4411, "step": 9721 }, { "epoch": 1.5966169195081394, "grad_norm": 0.341679700793873, "learning_rate": 6.999407992706472e-06, "loss": 0.4636, "step": 9722 }, { "epoch": 1.5967811467164823, "grad_norm": 0.3189785275812659, "learning_rate": 6.999017997186662e-06, "loss": 0.4502, "step": 9723 }, { "epoch": 1.596945373924825, "grad_norm": 0.3096324528487003, "learning_rate": 6.998627974525056e-06, "loss": 0.4658, "step": 9724 }, { "epoch": 1.5971096011331678, "grad_norm": 0.32705409411953407, "learning_rate": 6.998237924725891e-06, "loss": 0.473, "step": 9725 }, { "epoch": 1.5972738283415104, "grad_norm": 0.2877085709462597, "learning_rate": 6.997847847793406e-06, "loss": 0.4643, "step": 9726 }, { "epoch": 1.597438055549853, "grad_norm": 0.3923418706541653, "learning_rate": 6.997457743731836e-06, "loss": 0.4555, "step": 9727 }, { "epoch": 1.597602282758196, "grad_norm": 0.3107603017489276, "learning_rate": 6.997067612545416e-06, "loss": 0.4443, "step": 9728 }, { "epoch": 1.5977665099665388, "grad_norm": 0.32297011284910904, "learning_rate": 6.996677454238386e-06, "loss": 0.4674, "step": 9729 }, { "epoch": 1.5979307371748814, "grad_norm": 0.31297648005174855, "learning_rate": 6.996287268814981e-06, "loss": 0.4502, "step": 9730 }, { "epoch": 1.598094964383224, "grad_norm": 0.33511246862645244, "learning_rate": 6.9958970562794435e-06, "loss": 0.4729, "step": 9731 }, { "epoch": 1.598259191591567, "grad_norm": 0.34563244047173136, "learning_rate": 6.995506816636005e-06, "loss": 0.4593, "step": 9732 }, { "epoch": 1.5984234187999098, "grad_norm": 0.2745624451603022, "learning_rate": 6.9951165498889085e-06, "loss": 0.4536, "step": 9733 }, { "epoch": 1.5985876460082524, "grad_norm": 0.3344724710839866, "learning_rate": 6.994726256042392e-06, "loss": 0.4574, "step": 9734 }, { "epoch": 1.598751873216595, "grad_norm": 0.9641068009989658, "learning_rate": 6.994335935100693e-06, "loss": 0.4694, "step": 9735 }, { "epoch": 1.598916100424938, "grad_norm": 0.32579840476566196, "learning_rate": 6.993945587068053e-06, "loss": 0.4849, "step": 9736 }, { "epoch": 1.5990803276332808, "grad_norm": 0.33205921777574166, "learning_rate": 6.993555211948709e-06, "loss": 0.4692, "step": 9737 }, { "epoch": 1.5992445548416234, "grad_norm": 0.2770176558361628, "learning_rate": 6.993164809746901e-06, "loss": 0.4531, "step": 9738 }, { "epoch": 1.599408782049966, "grad_norm": 0.2972289466677687, "learning_rate": 6.992774380466872e-06, "loss": 0.4647, "step": 9739 }, { "epoch": 1.599573009258309, "grad_norm": 0.5161134035043563, "learning_rate": 6.99238392411286e-06, "loss": 0.4444, "step": 9740 }, { "epoch": 1.5997372364666516, "grad_norm": 0.3632176478786719, "learning_rate": 6.991993440689107e-06, "loss": 0.4536, "step": 9741 }, { "epoch": 1.5999014636749944, "grad_norm": 0.3972599771589535, "learning_rate": 6.991602930199853e-06, "loss": 0.4565, "step": 9742 }, { "epoch": 1.600065690883337, "grad_norm": 0.34852573709915075, "learning_rate": 6.991212392649341e-06, "loss": 0.4395, "step": 9743 }, { "epoch": 1.6002299180916797, "grad_norm": 0.378242100179647, "learning_rate": 6.990821828041809e-06, "loss": 0.4815, "step": 9744 }, { "epoch": 1.6003941453000226, "grad_norm": 0.2997246411725691, "learning_rate": 6.990431236381503e-06, "loss": 0.4667, "step": 9745 }, { "epoch": 1.6005583725083654, "grad_norm": 0.2928757734583082, "learning_rate": 6.990040617672663e-06, "loss": 0.464, "step": 9746 }, { "epoch": 1.600722599716708, "grad_norm": 0.2671624667674582, "learning_rate": 6.989649971919531e-06, "loss": 0.4643, "step": 9747 }, { "epoch": 1.6008868269250507, "grad_norm": 0.2615783823407928, "learning_rate": 6.989259299126353e-06, "loss": 0.45, "step": 9748 }, { "epoch": 1.6010510541333935, "grad_norm": 0.31506464269600354, "learning_rate": 6.988868599297368e-06, "loss": 0.456, "step": 9749 }, { "epoch": 1.6012152813417364, "grad_norm": 0.2857207363724411, "learning_rate": 6.988477872436822e-06, "loss": 0.4596, "step": 9750 }, { "epoch": 1.601379508550079, "grad_norm": 0.3117589736533758, "learning_rate": 6.988087118548959e-06, "loss": 0.4498, "step": 9751 }, { "epoch": 1.6015437357584217, "grad_norm": 0.26792546533856765, "learning_rate": 6.9876963376380206e-06, "loss": 0.4418, "step": 9752 }, { "epoch": 1.6017079629667645, "grad_norm": 0.3510529580662092, "learning_rate": 6.987305529708253e-06, "loss": 0.4754, "step": 9753 }, { "epoch": 1.6018721901751074, "grad_norm": 0.34665871073162313, "learning_rate": 6.986914694763899e-06, "loss": 0.4495, "step": 9754 }, { "epoch": 1.60203641738345, "grad_norm": 0.2704560117494492, "learning_rate": 6.986523832809207e-06, "loss": 0.4563, "step": 9755 }, { "epoch": 1.6022006445917927, "grad_norm": 0.39103153382891753, "learning_rate": 6.986132943848418e-06, "loss": 0.4657, "step": 9756 }, { "epoch": 1.6023648718001355, "grad_norm": 0.27479927593805215, "learning_rate": 6.985742027885779e-06, "loss": 0.4703, "step": 9757 }, { "epoch": 1.6025290990084782, "grad_norm": 0.34785620793652167, "learning_rate": 6.985351084925537e-06, "loss": 0.4459, "step": 9758 }, { "epoch": 1.602693326216821, "grad_norm": 0.3060687588327717, "learning_rate": 6.984960114971936e-06, "loss": 0.4774, "step": 9759 }, { "epoch": 1.6028575534251637, "grad_norm": 0.37563535546956034, "learning_rate": 6.9845691180292235e-06, "loss": 0.4805, "step": 9760 }, { "epoch": 1.6030217806335063, "grad_norm": 0.24377779296879026, "learning_rate": 6.984178094101647e-06, "loss": 0.4809, "step": 9761 }, { "epoch": 1.6031860078418492, "grad_norm": 0.2571146440045776, "learning_rate": 6.983787043193452e-06, "loss": 0.4634, "step": 9762 }, { "epoch": 1.603350235050192, "grad_norm": 0.3778507318993349, "learning_rate": 6.983395965308885e-06, "loss": 0.4464, "step": 9763 }, { "epoch": 1.6035144622585347, "grad_norm": 0.30104596005073675, "learning_rate": 6.983004860452195e-06, "loss": 0.463, "step": 9764 }, { "epoch": 1.6036786894668773, "grad_norm": 0.29668588814681535, "learning_rate": 6.982613728627629e-06, "loss": 0.4557, "step": 9765 }, { "epoch": 1.6038429166752202, "grad_norm": 0.4248514538815655, "learning_rate": 6.982222569839436e-06, "loss": 0.4238, "step": 9766 }, { "epoch": 1.604007143883563, "grad_norm": 0.36176526973408535, "learning_rate": 6.981831384091863e-06, "loss": 0.4639, "step": 9767 }, { "epoch": 1.6041713710919057, "grad_norm": 0.3367393011027692, "learning_rate": 6.981440171389158e-06, "loss": 0.4729, "step": 9768 }, { "epoch": 1.6043355983002483, "grad_norm": 0.29669673074478037, "learning_rate": 6.981048931735574e-06, "loss": 0.4804, "step": 9769 }, { "epoch": 1.6044998255085912, "grad_norm": 0.2980190900348066, "learning_rate": 6.980657665135357e-06, "loss": 0.4661, "step": 9770 }, { "epoch": 1.604664052716934, "grad_norm": 0.332201811865359, "learning_rate": 6.980266371592756e-06, "loss": 0.4418, "step": 9771 }, { "epoch": 1.6048282799252767, "grad_norm": 0.40769861897148146, "learning_rate": 6.979875051112023e-06, "loss": 0.4509, "step": 9772 }, { "epoch": 1.6049925071336193, "grad_norm": 0.28501944575324484, "learning_rate": 6.979483703697408e-06, "loss": 0.4693, "step": 9773 }, { "epoch": 1.6051567343419622, "grad_norm": 0.2760017459121368, "learning_rate": 6.979092329353159e-06, "loss": 0.4627, "step": 9774 }, { "epoch": 1.6053209615503048, "grad_norm": 0.3752488518830528, "learning_rate": 6.978700928083527e-06, "loss": 0.4763, "step": 9775 }, { "epoch": 1.6054851887586477, "grad_norm": 0.3251274051588068, "learning_rate": 6.9783094998927655e-06, "loss": 0.4671, "step": 9776 }, { "epoch": 1.6056494159669903, "grad_norm": 0.36151954149272725, "learning_rate": 6.977918044785125e-06, "loss": 0.453, "step": 9777 }, { "epoch": 1.605813643175333, "grad_norm": 0.37377513986589384, "learning_rate": 6.9775265627648565e-06, "loss": 0.4644, "step": 9778 }, { "epoch": 1.6059778703836758, "grad_norm": 0.28806517291802397, "learning_rate": 6.977135053836211e-06, "loss": 0.4641, "step": 9779 }, { "epoch": 1.6061420975920186, "grad_norm": 0.25543690612044667, "learning_rate": 6.976743518003443e-06, "loss": 0.4595, "step": 9780 }, { "epoch": 1.6063063248003613, "grad_norm": 0.2927473928733643, "learning_rate": 6.976351955270803e-06, "loss": 0.4852, "step": 9781 }, { "epoch": 1.606470552008704, "grad_norm": 0.32663703524442445, "learning_rate": 6.975960365642544e-06, "loss": 0.4828, "step": 9782 }, { "epoch": 1.6066347792170468, "grad_norm": 0.33126405619204863, "learning_rate": 6.9755687491229195e-06, "loss": 0.4574, "step": 9783 }, { "epoch": 1.6067990064253896, "grad_norm": 0.27115302052887735, "learning_rate": 6.975177105716184e-06, "loss": 0.4554, "step": 9784 }, { "epoch": 1.6069632336337323, "grad_norm": 0.5938177654376247, "learning_rate": 6.974785435426588e-06, "loss": 0.4748, "step": 9785 }, { "epoch": 1.607127460842075, "grad_norm": 0.3709570909137645, "learning_rate": 6.974393738258388e-06, "loss": 0.4737, "step": 9786 }, { "epoch": 1.6072916880504178, "grad_norm": 0.29217409026482316, "learning_rate": 6.974002014215839e-06, "loss": 0.4594, "step": 9787 }, { "epoch": 1.6074559152587606, "grad_norm": 0.5522762635217223, "learning_rate": 6.973610263303191e-06, "loss": 0.4405, "step": 9788 }, { "epoch": 1.6076201424671033, "grad_norm": 0.2700062333742084, "learning_rate": 6.973218485524704e-06, "loss": 0.4527, "step": 9789 }, { "epoch": 1.607784369675446, "grad_norm": 0.2752977343062548, "learning_rate": 6.972826680884631e-06, "loss": 0.4579, "step": 9790 }, { "epoch": 1.6079485968837888, "grad_norm": 0.351632135170519, "learning_rate": 6.972434849387226e-06, "loss": 0.4488, "step": 9791 }, { "epoch": 1.6081128240921314, "grad_norm": 0.30396405938296145, "learning_rate": 6.972042991036747e-06, "loss": 0.4552, "step": 9792 }, { "epoch": 1.6082770513004743, "grad_norm": 0.2923401297274902, "learning_rate": 6.971651105837449e-06, "loss": 0.4671, "step": 9793 }, { "epoch": 1.608441278508817, "grad_norm": 0.36307972078813605, "learning_rate": 6.971259193793588e-06, "loss": 0.4382, "step": 9794 }, { "epoch": 1.6086055057171595, "grad_norm": 0.31702386181625597, "learning_rate": 6.970867254909422e-06, "loss": 0.4675, "step": 9795 }, { "epoch": 1.6087697329255024, "grad_norm": 0.3142731818838145, "learning_rate": 6.970475289189204e-06, "loss": 0.459, "step": 9796 }, { "epoch": 1.6089339601338453, "grad_norm": 0.36241745736101644, "learning_rate": 6.970083296637196e-06, "loss": 0.4748, "step": 9797 }, { "epoch": 1.609098187342188, "grad_norm": 0.3470417056667347, "learning_rate": 6.969691277257652e-06, "loss": 0.469, "step": 9798 }, { "epoch": 1.6092624145505305, "grad_norm": 0.41937451840307666, "learning_rate": 6.969299231054831e-06, "loss": 0.4564, "step": 9799 }, { "epoch": 1.6094266417588734, "grad_norm": 0.3262573052858504, "learning_rate": 6.9689071580329905e-06, "loss": 0.4587, "step": 9800 }, { "epoch": 1.6095908689672163, "grad_norm": 0.2947421917980309, "learning_rate": 6.96851505819639e-06, "loss": 0.4533, "step": 9801 }, { "epoch": 1.609755096175559, "grad_norm": 0.2521016894403766, "learning_rate": 6.968122931549288e-06, "loss": 0.4593, "step": 9802 }, { "epoch": 1.6099193233839015, "grad_norm": 0.2955673416323675, "learning_rate": 6.9677307780959416e-06, "loss": 0.4666, "step": 9803 }, { "epoch": 1.6100835505922444, "grad_norm": 0.27659905950201386, "learning_rate": 6.96733859784061e-06, "loss": 0.4489, "step": 9804 }, { "epoch": 1.6102477778005873, "grad_norm": 0.3404542615909959, "learning_rate": 6.966946390787554e-06, "loss": 0.4602, "step": 9805 }, { "epoch": 1.61041200500893, "grad_norm": 0.2845051029226587, "learning_rate": 6.9665541569410355e-06, "loss": 0.4692, "step": 9806 }, { "epoch": 1.6105762322172725, "grad_norm": 0.38260420051370825, "learning_rate": 6.96616189630531e-06, "loss": 0.4759, "step": 9807 }, { "epoch": 1.6107404594256154, "grad_norm": 0.26683465973181375, "learning_rate": 6.96576960888464e-06, "loss": 0.4739, "step": 9808 }, { "epoch": 1.610904686633958, "grad_norm": 0.384929446621508, "learning_rate": 6.965377294683286e-06, "loss": 0.438, "step": 9809 }, { "epoch": 1.6110689138423009, "grad_norm": 0.38374467343084134, "learning_rate": 6.964984953705509e-06, "loss": 0.4729, "step": 9810 }, { "epoch": 1.6112331410506435, "grad_norm": 0.2694526018583905, "learning_rate": 6.964592585955571e-06, "loss": 0.4557, "step": 9811 }, { "epoch": 1.6113973682589862, "grad_norm": 0.3115303986468197, "learning_rate": 6.964200191437732e-06, "loss": 0.4506, "step": 9812 }, { "epoch": 1.611561595467329, "grad_norm": 0.3035293723273839, "learning_rate": 6.963807770156254e-06, "loss": 0.4641, "step": 9813 }, { "epoch": 1.6117258226756719, "grad_norm": 0.2992017924804873, "learning_rate": 6.963415322115402e-06, "loss": 0.4536, "step": 9814 }, { "epoch": 1.6118900498840145, "grad_norm": 0.31667569525527806, "learning_rate": 6.963022847319434e-06, "loss": 0.4551, "step": 9815 }, { "epoch": 1.6120542770923572, "grad_norm": 0.2540364674293299, "learning_rate": 6.962630345772615e-06, "loss": 0.4708, "step": 9816 }, { "epoch": 1.6122185043007, "grad_norm": 0.4442076282948899, "learning_rate": 6.962237817479207e-06, "loss": 0.4743, "step": 9817 }, { "epoch": 1.6123827315090429, "grad_norm": 0.2903014811736007, "learning_rate": 6.961845262443474e-06, "loss": 0.4645, "step": 9818 }, { "epoch": 1.6125469587173855, "grad_norm": 0.36913889456903465, "learning_rate": 6.96145268066968e-06, "loss": 0.4619, "step": 9819 }, { "epoch": 1.6127111859257282, "grad_norm": 0.4006840087480322, "learning_rate": 6.961060072162087e-06, "loss": 0.4707, "step": 9820 }, { "epoch": 1.612875413134071, "grad_norm": 0.3353180038710587, "learning_rate": 6.960667436924961e-06, "loss": 0.4773, "step": 9821 }, { "epoch": 1.6130396403424139, "grad_norm": 0.28696136203207506, "learning_rate": 6.960274774962565e-06, "loss": 0.4412, "step": 9822 }, { "epoch": 1.6132038675507565, "grad_norm": 0.2864030214755703, "learning_rate": 6.959882086279166e-06, "loss": 0.4622, "step": 9823 }, { "epoch": 1.6133680947590991, "grad_norm": 0.2985379366016787, "learning_rate": 6.959489370879026e-06, "loss": 0.4611, "step": 9824 }, { "epoch": 1.613532321967442, "grad_norm": 0.28314934992789903, "learning_rate": 6.959096628766411e-06, "loss": 0.4729, "step": 9825 }, { "epoch": 1.6136965491757846, "grad_norm": 0.38930289972293297, "learning_rate": 6.9587038599455874e-06, "loss": 0.4515, "step": 9826 }, { "epoch": 1.6138607763841275, "grad_norm": 0.787582127701228, "learning_rate": 6.958311064420822e-06, "loss": 0.4569, "step": 9827 }, { "epoch": 1.6140250035924701, "grad_norm": 0.27450397102919527, "learning_rate": 6.957918242196379e-06, "loss": 0.4619, "step": 9828 }, { "epoch": 1.6141892308008128, "grad_norm": 0.4634130614046554, "learning_rate": 6.957525393276526e-06, "loss": 0.4574, "step": 9829 }, { "epoch": 1.6143534580091556, "grad_norm": 0.32687976750827596, "learning_rate": 6.957132517665529e-06, "loss": 0.4478, "step": 9830 }, { "epoch": 1.6145176852174985, "grad_norm": 0.2749625418731786, "learning_rate": 6.9567396153676556e-06, "loss": 0.4787, "step": 9831 }, { "epoch": 1.6146819124258411, "grad_norm": 0.2686148625927595, "learning_rate": 6.956346686387174e-06, "loss": 0.4641, "step": 9832 }, { "epoch": 1.6148461396341838, "grad_norm": 0.27749373643361147, "learning_rate": 6.955953730728349e-06, "loss": 0.4478, "step": 9833 }, { "epoch": 1.6150103668425266, "grad_norm": 0.33035669372922283, "learning_rate": 6.95556074839545e-06, "loss": 0.4574, "step": 9834 }, { "epoch": 1.6151745940508695, "grad_norm": 0.319743000157342, "learning_rate": 6.955167739392747e-06, "loss": 0.4471, "step": 9835 }, { "epoch": 1.6153388212592121, "grad_norm": 0.26117931070035566, "learning_rate": 6.954774703724506e-06, "loss": 0.4639, "step": 9836 }, { "epoch": 1.6155030484675548, "grad_norm": 0.27760241909976285, "learning_rate": 6.9543816413949965e-06, "loss": 0.4572, "step": 9837 }, { "epoch": 1.6156672756758976, "grad_norm": 0.2998866914995418, "learning_rate": 6.953988552408487e-06, "loss": 0.4551, "step": 9838 }, { "epoch": 1.6158315028842405, "grad_norm": 0.30021599310248603, "learning_rate": 6.953595436769248e-06, "loss": 0.4615, "step": 9839 }, { "epoch": 1.6159957300925831, "grad_norm": 0.31256899332836824, "learning_rate": 6.953202294481548e-06, "loss": 0.4541, "step": 9840 }, { "epoch": 1.6161599573009258, "grad_norm": 0.3899196834494022, "learning_rate": 6.952809125549658e-06, "loss": 0.4771, "step": 9841 }, { "epoch": 1.6163241845092686, "grad_norm": 0.3010082901322875, "learning_rate": 6.952415929977848e-06, "loss": 0.4719, "step": 9842 }, { "epoch": 1.6164884117176113, "grad_norm": 0.3576168449635639, "learning_rate": 6.952022707770387e-06, "loss": 0.4404, "step": 9843 }, { "epoch": 1.6166526389259541, "grad_norm": 0.2760702244129268, "learning_rate": 6.951629458931548e-06, "loss": 0.4578, "step": 9844 }, { "epoch": 1.6168168661342968, "grad_norm": 0.2749007165277947, "learning_rate": 6.951236183465601e-06, "loss": 0.45, "step": 9845 }, { "epoch": 1.6169810933426394, "grad_norm": 0.31390505673084157, "learning_rate": 6.950842881376816e-06, "loss": 0.4495, "step": 9846 }, { "epoch": 1.6171453205509823, "grad_norm": 0.2868629433154881, "learning_rate": 6.9504495526694675e-06, "loss": 0.4534, "step": 9847 }, { "epoch": 1.6173095477593251, "grad_norm": 0.312588550829176, "learning_rate": 6.9500561973478264e-06, "loss": 0.4478, "step": 9848 }, { "epoch": 1.6174737749676678, "grad_norm": 0.2958477322250829, "learning_rate": 6.949662815416163e-06, "loss": 0.464, "step": 9849 }, { "epoch": 1.6176380021760104, "grad_norm": 0.29871300762114433, "learning_rate": 6.949269406878752e-06, "loss": 0.4636, "step": 9850 }, { "epoch": 1.6178022293843533, "grad_norm": 0.40407588926968097, "learning_rate": 6.9488759717398645e-06, "loss": 0.4673, "step": 9851 }, { "epoch": 1.6179664565926961, "grad_norm": 0.27940144990316745, "learning_rate": 6.948482510003776e-06, "loss": 0.4671, "step": 9852 }, { "epoch": 1.6181306838010387, "grad_norm": 0.30202586554252114, "learning_rate": 6.948089021674758e-06, "loss": 0.4433, "step": 9853 }, { "epoch": 1.6182949110093814, "grad_norm": 0.285583505265655, "learning_rate": 6.947695506757084e-06, "loss": 0.4786, "step": 9854 }, { "epoch": 1.6184591382177242, "grad_norm": 0.39905382824632013, "learning_rate": 6.947301965255029e-06, "loss": 0.4724, "step": 9855 }, { "epoch": 1.618623365426067, "grad_norm": 0.307863136962308, "learning_rate": 6.946908397172866e-06, "loss": 0.4232, "step": 9856 }, { "epoch": 1.6187875926344097, "grad_norm": 0.43057042653410355, "learning_rate": 6.946514802514872e-06, "loss": 0.451, "step": 9857 }, { "epoch": 1.6189518198427524, "grad_norm": 0.3171082589247781, "learning_rate": 6.94612118128532e-06, "loss": 0.4396, "step": 9858 }, { "epoch": 1.6191160470510952, "grad_norm": 0.29330682282221376, "learning_rate": 6.945727533488483e-06, "loss": 0.4546, "step": 9859 }, { "epoch": 1.6192802742594379, "grad_norm": 0.2870797937588179, "learning_rate": 6.945333859128642e-06, "loss": 0.4634, "step": 9860 }, { "epoch": 1.6194445014677807, "grad_norm": 0.3051040740644912, "learning_rate": 6.944940158210067e-06, "loss": 0.4536, "step": 9861 }, { "epoch": 1.6196087286761234, "grad_norm": 0.41361444535474173, "learning_rate": 6.9445464307370375e-06, "loss": 0.458, "step": 9862 }, { "epoch": 1.619772955884466, "grad_norm": 0.25243414567729117, "learning_rate": 6.944152676713828e-06, "loss": 0.4663, "step": 9863 }, { "epoch": 1.6199371830928089, "grad_norm": 0.3815051393474824, "learning_rate": 6.943758896144715e-06, "loss": 0.444, "step": 9864 }, { "epoch": 1.6201014103011517, "grad_norm": 0.38218490759074814, "learning_rate": 6.943365089033979e-06, "loss": 0.4714, "step": 9865 }, { "epoch": 1.6202656375094944, "grad_norm": 0.28814253453168126, "learning_rate": 6.942971255385893e-06, "loss": 0.4753, "step": 9866 }, { "epoch": 1.620429864717837, "grad_norm": 0.337629238841208, "learning_rate": 6.9425773952047335e-06, "loss": 0.4506, "step": 9867 }, { "epoch": 1.6205940919261799, "grad_norm": 0.25032929018512107, "learning_rate": 6.942183508494782e-06, "loss": 0.4624, "step": 9868 }, { "epoch": 1.6207583191345227, "grad_norm": 0.28775404020259854, "learning_rate": 6.941789595260315e-06, "loss": 0.4854, "step": 9869 }, { "epoch": 1.6209225463428654, "grad_norm": 0.28379877031843614, "learning_rate": 6.941395655505611e-06, "loss": 0.4747, "step": 9870 }, { "epoch": 1.621086773551208, "grad_norm": 0.8202653712403721, "learning_rate": 6.941001689234946e-06, "loss": 0.4387, "step": 9871 }, { "epoch": 1.6212510007595509, "grad_norm": 0.42955934255178224, "learning_rate": 6.940607696452603e-06, "loss": 0.4522, "step": 9872 }, { "epoch": 1.6214152279678937, "grad_norm": 0.3259530932672982, "learning_rate": 6.940213677162859e-06, "loss": 0.4552, "step": 9873 }, { "epoch": 1.6215794551762364, "grad_norm": 0.38494136138535234, "learning_rate": 6.939819631369992e-06, "loss": 0.4636, "step": 9874 }, { "epoch": 1.621743682384579, "grad_norm": 0.2880001166580629, "learning_rate": 6.939425559078285e-06, "loss": 0.4682, "step": 9875 }, { "epoch": 1.6219079095929219, "grad_norm": 0.2530128315191015, "learning_rate": 6.939031460292016e-06, "loss": 0.4465, "step": 9876 }, { "epoch": 1.6220721368012645, "grad_norm": 0.29707957324021506, "learning_rate": 6.938637335015466e-06, "loss": 0.4585, "step": 9877 }, { "epoch": 1.6222363640096074, "grad_norm": 0.2835923151500123, "learning_rate": 6.938243183252914e-06, "loss": 0.4799, "step": 9878 }, { "epoch": 1.62240059121795, "grad_norm": 0.3185739450507327, "learning_rate": 6.937849005008643e-06, "loss": 0.4754, "step": 9879 }, { "epoch": 1.6225648184262926, "grad_norm": 0.2816869463340398, "learning_rate": 6.937454800286933e-06, "loss": 0.4432, "step": 9880 }, { "epoch": 1.6227290456346355, "grad_norm": 0.2856348714662334, "learning_rate": 6.937060569092066e-06, "loss": 0.4584, "step": 9881 }, { "epoch": 1.6228932728429784, "grad_norm": 0.39852623208599847, "learning_rate": 6.936666311428324e-06, "loss": 0.4808, "step": 9882 }, { "epoch": 1.623057500051321, "grad_norm": 0.2881186208127396, "learning_rate": 6.936272027299987e-06, "loss": 0.4663, "step": 9883 }, { "epoch": 1.6232217272596636, "grad_norm": 0.28622926544540417, "learning_rate": 6.935877716711341e-06, "loss": 0.4604, "step": 9884 }, { "epoch": 1.6233859544680065, "grad_norm": 0.25634085668901097, "learning_rate": 6.935483379666665e-06, "loss": 0.4489, "step": 9885 }, { "epoch": 1.6235501816763493, "grad_norm": 0.35586491846358337, "learning_rate": 6.935089016170243e-06, "loss": 0.4747, "step": 9886 }, { "epoch": 1.623714408884692, "grad_norm": 0.45630619008435724, "learning_rate": 6.934694626226359e-06, "loss": 0.4803, "step": 9887 }, { "epoch": 1.6238786360930346, "grad_norm": 0.414835910725295, "learning_rate": 6.934300209839296e-06, "loss": 0.4619, "step": 9888 }, { "epoch": 1.6240428633013775, "grad_norm": 0.39488106594808875, "learning_rate": 6.933905767013337e-06, "loss": 0.4804, "step": 9889 }, { "epoch": 1.6242070905097203, "grad_norm": 0.25434236302766866, "learning_rate": 6.933511297752769e-06, "loss": 0.4511, "step": 9890 }, { "epoch": 1.624371317718063, "grad_norm": 0.2852061649295925, "learning_rate": 6.93311680206187e-06, "loss": 0.4452, "step": 9891 }, { "epoch": 1.6245355449264056, "grad_norm": 0.26183435681063855, "learning_rate": 6.932722279944933e-06, "loss": 0.443, "step": 9892 }, { "epoch": 1.6246997721347485, "grad_norm": 0.3371400824807553, "learning_rate": 6.932327731406235e-06, "loss": 0.4562, "step": 9893 }, { "epoch": 1.6248639993430911, "grad_norm": 0.259702383047695, "learning_rate": 6.931933156450068e-06, "loss": 0.453, "step": 9894 }, { "epoch": 1.625028226551434, "grad_norm": 0.2845006703877606, "learning_rate": 6.931538555080712e-06, "loss": 0.4577, "step": 9895 }, { "epoch": 1.6251924537597766, "grad_norm": 0.2900661570644532, "learning_rate": 6.931143927302455e-06, "loss": 0.4455, "step": 9896 }, { "epoch": 1.6253566809681192, "grad_norm": 0.2464969379598356, "learning_rate": 6.930749273119583e-06, "loss": 0.4546, "step": 9897 }, { "epoch": 1.625520908176462, "grad_norm": 0.2571761908465857, "learning_rate": 6.930354592536384e-06, "loss": 0.4719, "step": 9898 }, { "epoch": 1.625685135384805, "grad_norm": 0.3131139124840701, "learning_rate": 6.929959885557142e-06, "loss": 0.4357, "step": 9899 }, { "epoch": 1.6258493625931476, "grad_norm": 0.2794163509230121, "learning_rate": 6.929565152186145e-06, "loss": 0.4645, "step": 9900 }, { "epoch": 1.6260135898014902, "grad_norm": 0.3279839951388422, "learning_rate": 6.9291703924276795e-06, "loss": 0.4668, "step": 9901 }, { "epoch": 1.626177817009833, "grad_norm": 0.28360673429548217, "learning_rate": 6.928775606286034e-06, "loss": 0.4366, "step": 9902 }, { "epoch": 1.626342044218176, "grad_norm": 0.26088450799046986, "learning_rate": 6.928380793765495e-06, "loss": 0.4621, "step": 9903 }, { "epoch": 1.6265062714265186, "grad_norm": 0.31179713297315687, "learning_rate": 6.927985954870352e-06, "loss": 0.4639, "step": 9904 }, { "epoch": 1.6266704986348612, "grad_norm": 0.31748956718889826, "learning_rate": 6.927591089604894e-06, "loss": 0.4539, "step": 9905 }, { "epoch": 1.626834725843204, "grad_norm": 0.2803655727982917, "learning_rate": 6.927196197973406e-06, "loss": 0.4618, "step": 9906 }, { "epoch": 1.626998953051547, "grad_norm": 0.27920456966557644, "learning_rate": 6.926801279980181e-06, "loss": 0.4754, "step": 9907 }, { "epoch": 1.6271631802598896, "grad_norm": 0.35933877215535787, "learning_rate": 6.926406335629506e-06, "loss": 0.4694, "step": 9908 }, { "epoch": 1.6273274074682322, "grad_norm": 0.32910657102073315, "learning_rate": 6.926011364925671e-06, "loss": 0.4563, "step": 9909 }, { "epoch": 1.627491634676575, "grad_norm": 0.28716458893408375, "learning_rate": 6.925616367872966e-06, "loss": 0.4486, "step": 9910 }, { "epoch": 1.6276558618849177, "grad_norm": 0.3223186286773691, "learning_rate": 6.92522134447568e-06, "loss": 0.4487, "step": 9911 }, { "epoch": 1.6278200890932606, "grad_norm": 0.28144093144592025, "learning_rate": 6.924826294738104e-06, "loss": 0.4602, "step": 9912 }, { "epoch": 1.6279843163016032, "grad_norm": 0.30356735564352616, "learning_rate": 6.924431218664529e-06, "loss": 0.4542, "step": 9913 }, { "epoch": 1.6281485435099459, "grad_norm": 0.27955424056774203, "learning_rate": 6.9240361162592466e-06, "loss": 0.4532, "step": 9914 }, { "epoch": 1.6283127707182887, "grad_norm": 0.27703469310058093, "learning_rate": 6.923640987526547e-06, "loss": 0.4641, "step": 9915 }, { "epoch": 1.6284769979266316, "grad_norm": 0.2741045415309227, "learning_rate": 6.92324583247072e-06, "loss": 0.4624, "step": 9916 }, { "epoch": 1.6286412251349742, "grad_norm": 0.566318151594307, "learning_rate": 6.9228506510960595e-06, "loss": 0.4463, "step": 9917 }, { "epoch": 1.6288054523433169, "grad_norm": 0.4132857679172401, "learning_rate": 6.922455443406858e-06, "loss": 0.4639, "step": 9918 }, { "epoch": 1.6289696795516597, "grad_norm": 0.3322732173475824, "learning_rate": 6.922060209407407e-06, "loss": 0.4523, "step": 9919 }, { "epoch": 1.6291339067600026, "grad_norm": 0.26147560000120657, "learning_rate": 6.921664949102e-06, "loss": 0.432, "step": 9920 }, { "epoch": 1.6292981339683452, "grad_norm": 0.26057303179752594, "learning_rate": 6.921269662494927e-06, "loss": 0.4612, "step": 9921 }, { "epoch": 1.6294623611766879, "grad_norm": 0.40536918485757667, "learning_rate": 6.9208743495904846e-06, "loss": 0.4676, "step": 9922 }, { "epoch": 1.6296265883850307, "grad_norm": 0.2811103795445873, "learning_rate": 6.920479010392964e-06, "loss": 0.4848, "step": 9923 }, { "epoch": 1.6297908155933736, "grad_norm": 0.28369942619293775, "learning_rate": 6.920083644906659e-06, "loss": 0.4575, "step": 9924 }, { "epoch": 1.6299550428017162, "grad_norm": 0.9300496984621315, "learning_rate": 6.919688253135867e-06, "loss": 0.4596, "step": 9925 }, { "epoch": 1.6301192700100589, "grad_norm": 0.629522730790281, "learning_rate": 6.919292835084879e-06, "loss": 0.4461, "step": 9926 }, { "epoch": 1.6302834972184017, "grad_norm": 0.2516198118246472, "learning_rate": 6.91889739075799e-06, "loss": 0.4709, "step": 9927 }, { "epoch": 1.6304477244267443, "grad_norm": 0.3026698120551612, "learning_rate": 6.918501920159496e-06, "loss": 0.4531, "step": 9928 }, { "epoch": 1.6306119516350872, "grad_norm": 0.26043558936113975, "learning_rate": 6.9181064232936926e-06, "loss": 0.454, "step": 9929 }, { "epoch": 1.6307761788434298, "grad_norm": 0.2549099028558008, "learning_rate": 6.917710900164873e-06, "loss": 0.4673, "step": 9930 }, { "epoch": 1.6309404060517725, "grad_norm": 0.3021568552153403, "learning_rate": 6.917315350777335e-06, "loss": 0.4532, "step": 9931 }, { "epoch": 1.6311046332601153, "grad_norm": 0.3000004873374244, "learning_rate": 6.916919775135374e-06, "loss": 0.4772, "step": 9932 }, { "epoch": 1.6312688604684582, "grad_norm": 0.32110553303472866, "learning_rate": 6.916524173243285e-06, "loss": 0.4524, "step": 9933 }, { "epoch": 1.6314330876768008, "grad_norm": 0.31698803998851016, "learning_rate": 6.916128545105368e-06, "loss": 0.4602, "step": 9934 }, { "epoch": 1.6315973148851435, "grad_norm": 0.28149338690697373, "learning_rate": 6.915732890725915e-06, "loss": 0.4535, "step": 9935 }, { "epoch": 1.6317615420934863, "grad_norm": 0.3373716032595957, "learning_rate": 6.91533721010923e-06, "loss": 0.4603, "step": 9936 }, { "epoch": 1.6319257693018292, "grad_norm": 0.2547340712685884, "learning_rate": 6.914941503259604e-06, "loss": 0.4513, "step": 9937 }, { "epoch": 1.6320899965101718, "grad_norm": 0.2737540154463338, "learning_rate": 6.9145457701813365e-06, "loss": 0.4644, "step": 9938 }, { "epoch": 1.6322542237185145, "grad_norm": 0.3191457901931209, "learning_rate": 6.9141500108787284e-06, "loss": 0.4609, "step": 9939 }, { "epoch": 1.6324184509268573, "grad_norm": 0.30132448751628016, "learning_rate": 6.913754225356075e-06, "loss": 0.4529, "step": 9940 }, { "epoch": 1.6325826781352002, "grad_norm": 0.3397790738337766, "learning_rate": 6.9133584136176755e-06, "loss": 0.4414, "step": 9941 }, { "epoch": 1.6327469053435428, "grad_norm": 0.294003400100843, "learning_rate": 6.91296257566783e-06, "loss": 0.4663, "step": 9942 }, { "epoch": 1.6329111325518855, "grad_norm": 0.3010856698778934, "learning_rate": 6.912566711510836e-06, "loss": 0.4773, "step": 9943 }, { "epoch": 1.6330753597602283, "grad_norm": 0.4470639674793449, "learning_rate": 6.912170821150994e-06, "loss": 0.4485, "step": 9944 }, { "epoch": 1.633239586968571, "grad_norm": 0.3158933653529828, "learning_rate": 6.911774904592605e-06, "loss": 0.4678, "step": 9945 }, { "epoch": 1.6334038141769138, "grad_norm": 0.35320107780642485, "learning_rate": 6.911378961839966e-06, "loss": 0.4766, "step": 9946 }, { "epoch": 1.6335680413852565, "grad_norm": 0.32146417670514194, "learning_rate": 6.9109829928973794e-06, "loss": 0.4507, "step": 9947 }, { "epoch": 1.633732268593599, "grad_norm": 0.39318528095594146, "learning_rate": 6.910586997769147e-06, "loss": 0.4354, "step": 9948 }, { "epoch": 1.633896495801942, "grad_norm": 0.31839866812565076, "learning_rate": 6.910190976459568e-06, "loss": 0.4575, "step": 9949 }, { "epoch": 1.6340607230102848, "grad_norm": 0.4040545593174407, "learning_rate": 6.9097949289729416e-06, "loss": 0.4536, "step": 9950 }, { "epoch": 1.6342249502186275, "grad_norm": 0.3204970703934227, "learning_rate": 6.909398855313572e-06, "loss": 0.4437, "step": 9951 }, { "epoch": 1.63438917742697, "grad_norm": 0.35954942581069016, "learning_rate": 6.90900275548576e-06, "loss": 0.4657, "step": 9952 }, { "epoch": 1.634553404635313, "grad_norm": 0.3009896626505442, "learning_rate": 6.908606629493809e-06, "loss": 0.4508, "step": 9953 }, { "epoch": 1.6347176318436558, "grad_norm": 0.32257264785454315, "learning_rate": 6.908210477342019e-06, "loss": 0.4328, "step": 9954 }, { "epoch": 1.6348818590519985, "grad_norm": 0.4503379019831764, "learning_rate": 6.907814299034695e-06, "loss": 0.4706, "step": 9955 }, { "epoch": 1.635046086260341, "grad_norm": 0.3434774458899971, "learning_rate": 6.907418094576138e-06, "loss": 0.4926, "step": 9956 }, { "epoch": 1.635210313468684, "grad_norm": 0.4039715002404461, "learning_rate": 6.9070218639706535e-06, "loss": 0.4568, "step": 9957 }, { "epoch": 1.6353745406770268, "grad_norm": 0.32002629237152624, "learning_rate": 6.906625607222541e-06, "loss": 0.4694, "step": 9958 }, { "epoch": 1.6355387678853694, "grad_norm": 0.28880005235928163, "learning_rate": 6.906229324336109e-06, "loss": 0.4752, "step": 9959 }, { "epoch": 1.635702995093712, "grad_norm": 0.300776456819465, "learning_rate": 6.905833015315657e-06, "loss": 0.469, "step": 9960 }, { "epoch": 1.635867222302055, "grad_norm": 0.3689131755069731, "learning_rate": 6.905436680165493e-06, "loss": 0.4483, "step": 9961 }, { "epoch": 1.6360314495103976, "grad_norm": 0.34492706183414207, "learning_rate": 6.905040318889919e-06, "loss": 0.4574, "step": 9962 }, { "epoch": 1.6361956767187404, "grad_norm": 0.3119084180868803, "learning_rate": 6.904643931493241e-06, "loss": 0.4612, "step": 9963 }, { "epoch": 1.636359903927083, "grad_norm": 0.25489213577479997, "learning_rate": 6.904247517979764e-06, "loss": 0.4543, "step": 9964 }, { "epoch": 1.6365241311354257, "grad_norm": 0.29549691545907814, "learning_rate": 6.903851078353795e-06, "loss": 0.4733, "step": 9965 }, { "epoch": 1.6366883583437686, "grad_norm": 0.28335212002696497, "learning_rate": 6.903454612619636e-06, "loss": 0.4608, "step": 9966 }, { "epoch": 1.6368525855521114, "grad_norm": 0.2993104367804374, "learning_rate": 6.903058120781597e-06, "loss": 0.4672, "step": 9967 }, { "epoch": 1.637016812760454, "grad_norm": 0.28332822889276027, "learning_rate": 6.902661602843981e-06, "loss": 0.4487, "step": 9968 }, { "epoch": 1.6371810399687967, "grad_norm": 0.25729560828720943, "learning_rate": 6.9022650588110985e-06, "loss": 0.4184, "step": 9969 }, { "epoch": 1.6373452671771396, "grad_norm": 0.30264712506892016, "learning_rate": 6.901868488687251e-06, "loss": 0.4596, "step": 9970 }, { "epoch": 1.6375094943854824, "grad_norm": 0.2672502724121429, "learning_rate": 6.901471892476751e-06, "loss": 0.4603, "step": 9971 }, { "epoch": 1.637673721593825, "grad_norm": 0.28382573152561147, "learning_rate": 6.901075270183901e-06, "loss": 0.4805, "step": 9972 }, { "epoch": 1.6378379488021677, "grad_norm": 0.29678729782940755, "learning_rate": 6.900678621813014e-06, "loss": 0.4696, "step": 9973 }, { "epoch": 1.6380021760105106, "grad_norm": 0.2438202408021163, "learning_rate": 6.900281947368394e-06, "loss": 0.4582, "step": 9974 }, { "epoch": 1.6381664032188534, "grad_norm": 0.3363567609882665, "learning_rate": 6.8998852468543495e-06, "loss": 0.4349, "step": 9975 }, { "epoch": 1.638330630427196, "grad_norm": 0.2897556757604139, "learning_rate": 6.8994885202751905e-06, "loss": 0.4581, "step": 9976 }, { "epoch": 1.6384948576355387, "grad_norm": 0.3406024616786714, "learning_rate": 6.899091767635226e-06, "loss": 0.4669, "step": 9977 }, { "epoch": 1.6386590848438816, "grad_norm": 0.3571028862398934, "learning_rate": 6.8986949889387655e-06, "loss": 0.4622, "step": 9978 }, { "epoch": 1.6388233120522242, "grad_norm": 0.28486072032707144, "learning_rate": 6.898298184190116e-06, "loss": 0.4584, "step": 9979 }, { "epoch": 1.638987539260567, "grad_norm": 0.6487043597141194, "learning_rate": 6.897901353393588e-06, "loss": 0.474, "step": 9980 }, { "epoch": 1.6391517664689097, "grad_norm": 0.2955606209398538, "learning_rate": 6.897504496553493e-06, "loss": 0.4613, "step": 9981 }, { "epoch": 1.6393159936772523, "grad_norm": 0.3960711855428685, "learning_rate": 6.89710761367414e-06, "loss": 0.4384, "step": 9982 }, { "epoch": 1.6394802208855952, "grad_norm": 0.32856200591871165, "learning_rate": 6.8967107047598405e-06, "loss": 0.4834, "step": 9983 }, { "epoch": 1.639644448093938, "grad_norm": 0.2726115586771324, "learning_rate": 6.896313769814905e-06, "loss": 0.4681, "step": 9984 }, { "epoch": 1.6398086753022807, "grad_norm": 0.2689421461804761, "learning_rate": 6.895916808843643e-06, "loss": 0.4533, "step": 9985 }, { "epoch": 1.6399729025106233, "grad_norm": 0.2627988789746831, "learning_rate": 6.895519821850368e-06, "loss": 0.4709, "step": 9986 }, { "epoch": 1.6401371297189662, "grad_norm": 0.32357716022869204, "learning_rate": 6.895122808839391e-06, "loss": 0.4752, "step": 9987 }, { "epoch": 1.640301356927309, "grad_norm": 0.35351722543048564, "learning_rate": 6.894725769815023e-06, "loss": 0.4596, "step": 9988 }, { "epoch": 1.6404655841356517, "grad_norm": 0.30283147058524224, "learning_rate": 6.894328704781578e-06, "loss": 0.4602, "step": 9989 }, { "epoch": 1.6406298113439943, "grad_norm": 0.32871065213422807, "learning_rate": 6.893931613743367e-06, "loss": 0.4398, "step": 9990 }, { "epoch": 1.6407940385523372, "grad_norm": 0.2909440710280135, "learning_rate": 6.893534496704704e-06, "loss": 0.4562, "step": 9991 }, { "epoch": 1.64095826576068, "grad_norm": 0.26730344436772585, "learning_rate": 6.893137353669899e-06, "loss": 0.4363, "step": 9992 }, { "epoch": 1.6411224929690227, "grad_norm": 0.3766542819585991, "learning_rate": 6.892740184643271e-06, "loss": 0.4577, "step": 9993 }, { "epoch": 1.6412867201773653, "grad_norm": 0.2845863213745585, "learning_rate": 6.8923429896291295e-06, "loss": 0.4547, "step": 9994 }, { "epoch": 1.6414509473857082, "grad_norm": 0.28734066949876436, "learning_rate": 6.891945768631789e-06, "loss": 0.47, "step": 9995 }, { "epoch": 1.6416151745940508, "grad_norm": 0.4035762230136988, "learning_rate": 6.891548521655563e-06, "loss": 0.4615, "step": 9996 }, { "epoch": 1.6417794018023937, "grad_norm": 0.27005177748526993, "learning_rate": 6.891151248704769e-06, "loss": 0.4527, "step": 9997 }, { "epoch": 1.6419436290107363, "grad_norm": 1.0453460937427654, "learning_rate": 6.890753949783719e-06, "loss": 0.4578, "step": 9998 }, { "epoch": 1.642107856219079, "grad_norm": 0.38637735669787354, "learning_rate": 6.8903566248967296e-06, "loss": 0.4528, "step": 9999 }, { "epoch": 1.6422720834274218, "grad_norm": 0.26584240115120206, "learning_rate": 6.889959274048115e-06, "loss": 0.4463, "step": 10000 }, { "epoch": 1.6424363106357647, "grad_norm": 0.390070342324435, "learning_rate": 6.889561897242191e-06, "loss": 0.4599, "step": 10001 }, { "epoch": 1.6426005378441073, "grad_norm": 0.24050547042083945, "learning_rate": 6.889164494483274e-06, "loss": 0.4414, "step": 10002 }, { "epoch": 1.64276476505245, "grad_norm": 0.2653471004908911, "learning_rate": 6.888767065775681e-06, "loss": 0.4624, "step": 10003 }, { "epoch": 1.6429289922607928, "grad_norm": 0.2749182908648262, "learning_rate": 6.888369611123726e-06, "loss": 0.4613, "step": 10004 }, { "epoch": 1.6430932194691357, "grad_norm": 0.3815791887017438, "learning_rate": 6.887972130531728e-06, "loss": 0.445, "step": 10005 }, { "epoch": 1.6432574466774783, "grad_norm": 0.3873361429992468, "learning_rate": 6.887574624004002e-06, "loss": 0.4693, "step": 10006 }, { "epoch": 1.643421673885821, "grad_norm": 0.40267621321284247, "learning_rate": 6.8871770915448666e-06, "loss": 0.4621, "step": 10007 }, { "epoch": 1.6435859010941638, "grad_norm": 0.2863259481455292, "learning_rate": 6.886779533158642e-06, "loss": 0.4698, "step": 10008 }, { "epoch": 1.6437501283025067, "grad_norm": 0.3518634637082607, "learning_rate": 6.88638194884964e-06, "loss": 0.4403, "step": 10009 }, { "epoch": 1.6439143555108493, "grad_norm": 0.2604027741918767, "learning_rate": 6.885984338622183e-06, "loss": 0.4512, "step": 10010 }, { "epoch": 1.644078582719192, "grad_norm": 0.28233015945831985, "learning_rate": 6.8855867024805904e-06, "loss": 0.4521, "step": 10011 }, { "epoch": 1.6442428099275348, "grad_norm": 0.3275936381929935, "learning_rate": 6.885189040429179e-06, "loss": 0.4734, "step": 10012 }, { "epoch": 1.6444070371358774, "grad_norm": 0.2791456316545117, "learning_rate": 6.884791352472266e-06, "loss": 0.4579, "step": 10013 }, { "epoch": 1.6445712643442203, "grad_norm": 0.32228759885779773, "learning_rate": 6.884393638614173e-06, "loss": 0.4712, "step": 10014 }, { "epoch": 1.644735491552563, "grad_norm": 0.43417519786081893, "learning_rate": 6.88399589885922e-06, "loss": 0.4557, "step": 10015 }, { "epoch": 1.6448997187609056, "grad_norm": 0.36224933578030566, "learning_rate": 6.8835981332117256e-06, "loss": 0.4663, "step": 10016 }, { "epoch": 1.6450639459692484, "grad_norm": 0.4044672096369628, "learning_rate": 6.883200341676011e-06, "loss": 0.4646, "step": 10017 }, { "epoch": 1.6452281731775913, "grad_norm": 0.5066739693604935, "learning_rate": 6.882802524256395e-06, "loss": 0.4484, "step": 10018 }, { "epoch": 1.645392400385934, "grad_norm": 0.31496716290507054, "learning_rate": 6.8824046809571985e-06, "loss": 0.4647, "step": 10019 }, { "epoch": 1.6455566275942766, "grad_norm": 0.28390866682913946, "learning_rate": 6.882006811782745e-06, "loss": 0.4529, "step": 10020 }, { "epoch": 1.6457208548026194, "grad_norm": 0.3395815099915771, "learning_rate": 6.881608916737352e-06, "loss": 0.4805, "step": 10021 }, { "epoch": 1.6458850820109623, "grad_norm": 0.4099494282068759, "learning_rate": 6.881210995825344e-06, "loss": 0.4478, "step": 10022 }, { "epoch": 1.646049309219305, "grad_norm": 0.30184948710719806, "learning_rate": 6.880813049051043e-06, "loss": 0.45, "step": 10023 }, { "epoch": 1.6462135364276476, "grad_norm": 0.38529826858875593, "learning_rate": 6.880415076418768e-06, "loss": 0.4433, "step": 10024 }, { "epoch": 1.6463777636359904, "grad_norm": 0.3318275138640135, "learning_rate": 6.880017077932844e-06, "loss": 0.4388, "step": 10025 }, { "epoch": 1.6465419908443333, "grad_norm": 0.5649606434374844, "learning_rate": 6.879619053597593e-06, "loss": 0.4452, "step": 10026 }, { "epoch": 1.646706218052676, "grad_norm": 0.30558762969639924, "learning_rate": 6.879221003417338e-06, "loss": 0.4533, "step": 10027 }, { "epoch": 1.6468704452610186, "grad_norm": 0.3017474883745442, "learning_rate": 6.878822927396402e-06, "loss": 0.4363, "step": 10028 }, { "epoch": 1.6470346724693614, "grad_norm": 0.3065943760906928, "learning_rate": 6.878424825539108e-06, "loss": 0.4582, "step": 10029 }, { "epoch": 1.647198899677704, "grad_norm": 0.33933242129544633, "learning_rate": 6.8780266978497805e-06, "loss": 0.4664, "step": 10030 }, { "epoch": 1.647363126886047, "grad_norm": 0.6614223853975514, "learning_rate": 6.877628544332744e-06, "loss": 0.4433, "step": 10031 }, { "epoch": 1.6475273540943896, "grad_norm": 0.3048403727802867, "learning_rate": 6.877230364992322e-06, "loss": 0.4547, "step": 10032 }, { "epoch": 1.6476915813027322, "grad_norm": 0.29375069237212753, "learning_rate": 6.87683215983284e-06, "loss": 0.4735, "step": 10033 }, { "epoch": 1.647855808511075, "grad_norm": 0.2877086753406038, "learning_rate": 6.876433928858621e-06, "loss": 0.4635, "step": 10034 }, { "epoch": 1.648020035719418, "grad_norm": 0.3292038999675611, "learning_rate": 6.8760356720739906e-06, "loss": 0.4748, "step": 10035 }, { "epoch": 1.6481842629277605, "grad_norm": 0.28454457736434846, "learning_rate": 6.875637389483278e-06, "loss": 0.4575, "step": 10036 }, { "epoch": 1.6483484901361032, "grad_norm": 0.46952622537098326, "learning_rate": 6.875239081090805e-06, "loss": 0.4685, "step": 10037 }, { "epoch": 1.648512717344446, "grad_norm": 0.48947938405660096, "learning_rate": 6.8748407469008975e-06, "loss": 0.4523, "step": 10038 }, { "epoch": 1.648676944552789, "grad_norm": 0.296064157512238, "learning_rate": 6.874442386917883e-06, "loss": 0.4504, "step": 10039 }, { "epoch": 1.6488411717611315, "grad_norm": 0.29222940294924704, "learning_rate": 6.87404400114609e-06, "loss": 0.463, "step": 10040 }, { "epoch": 1.6490053989694742, "grad_norm": 0.3095877404451654, "learning_rate": 6.873645589589842e-06, "loss": 0.4656, "step": 10041 }, { "epoch": 1.649169626177817, "grad_norm": 0.6592237090493654, "learning_rate": 6.8732471522534675e-06, "loss": 0.4323, "step": 10042 }, { "epoch": 1.64933385338616, "grad_norm": 0.36761926537069284, "learning_rate": 6.872848689141294e-06, "loss": 0.4479, "step": 10043 }, { "epoch": 1.6494980805945025, "grad_norm": 0.4189296399366135, "learning_rate": 6.872450200257648e-06, "loss": 0.4572, "step": 10044 }, { "epoch": 1.6496623078028452, "grad_norm": 0.3915907380874519, "learning_rate": 6.872051685606861e-06, "loss": 0.4438, "step": 10045 }, { "epoch": 1.649826535011188, "grad_norm": 0.7110489512948105, "learning_rate": 6.871653145193258e-06, "loss": 0.4649, "step": 10046 }, { "epoch": 1.6499907622195307, "grad_norm": 0.34437855980356424, "learning_rate": 6.871254579021168e-06, "loss": 0.4823, "step": 10047 }, { "epoch": 1.6501549894278735, "grad_norm": 0.30378428528681245, "learning_rate": 6.8708559870949205e-06, "loss": 0.4661, "step": 10048 }, { "epoch": 1.6503192166362162, "grad_norm": 0.3439466847782822, "learning_rate": 6.8704573694188455e-06, "loss": 0.4542, "step": 10049 }, { "epoch": 1.6504834438445588, "grad_norm": 0.27429610500978663, "learning_rate": 6.870058725997269e-06, "loss": 0.4437, "step": 10050 }, { "epoch": 1.6506476710529017, "grad_norm": 0.3041048998211635, "learning_rate": 6.8696600568345235e-06, "loss": 0.4444, "step": 10051 }, { "epoch": 1.6508118982612445, "grad_norm": 0.3819352975581534, "learning_rate": 6.869261361934939e-06, "loss": 0.4556, "step": 10052 }, { "epoch": 1.6509761254695872, "grad_norm": 0.3133404616783929, "learning_rate": 6.8688626413028455e-06, "loss": 0.4369, "step": 10053 }, { "epoch": 1.6511403526779298, "grad_norm": 0.30859949689052946, "learning_rate": 6.868463894942572e-06, "loss": 0.4505, "step": 10054 }, { "epoch": 1.6513045798862727, "grad_norm": 0.2569151609312324, "learning_rate": 6.868065122858452e-06, "loss": 0.4494, "step": 10055 }, { "epoch": 1.6514688070946155, "grad_norm": 0.29726416618644635, "learning_rate": 6.867666325054813e-06, "loss": 0.4643, "step": 10056 }, { "epoch": 1.6516330343029582, "grad_norm": 0.30981990269827364, "learning_rate": 6.86726750153599e-06, "loss": 0.4526, "step": 10057 }, { "epoch": 1.6517972615113008, "grad_norm": 0.3135006032323148, "learning_rate": 6.866868652306312e-06, "loss": 0.4547, "step": 10058 }, { "epoch": 1.6519614887196437, "grad_norm": 0.31218503934827935, "learning_rate": 6.866469777370111e-06, "loss": 0.4615, "step": 10059 }, { "epoch": 1.6521257159279865, "grad_norm": 0.28950250972669206, "learning_rate": 6.8660708767317204e-06, "loss": 0.4606, "step": 10060 }, { "epoch": 1.6522899431363292, "grad_norm": 0.32905922986746405, "learning_rate": 6.865671950395474e-06, "loss": 0.4595, "step": 10061 }, { "epoch": 1.6524541703446718, "grad_norm": 0.28969266444342057, "learning_rate": 6.8652729983656995e-06, "loss": 0.4603, "step": 10062 }, { "epoch": 1.6526183975530147, "grad_norm": 0.39683467383680576, "learning_rate": 6.8648740206467345e-06, "loss": 0.4676, "step": 10063 }, { "epoch": 1.6527826247613573, "grad_norm": 0.5331896140060073, "learning_rate": 6.864475017242911e-06, "loss": 0.4477, "step": 10064 }, { "epoch": 1.6529468519697001, "grad_norm": 0.2953487815232206, "learning_rate": 6.8640759881585635e-06, "loss": 0.4531, "step": 10065 }, { "epoch": 1.6531110791780428, "grad_norm": 0.3145480336655805, "learning_rate": 6.863676933398024e-06, "loss": 0.4483, "step": 10066 }, { "epoch": 1.6532753063863854, "grad_norm": 0.27112246157431824, "learning_rate": 6.863277852965627e-06, "loss": 0.4821, "step": 10067 }, { "epoch": 1.6534395335947283, "grad_norm": 0.33070118687121697, "learning_rate": 6.862878746865708e-06, "loss": 0.47, "step": 10068 }, { "epoch": 1.6536037608030711, "grad_norm": 0.3125265983720962, "learning_rate": 6.8624796151026e-06, "loss": 0.4612, "step": 10069 }, { "epoch": 1.6537679880114138, "grad_norm": 0.3305711511682732, "learning_rate": 6.862080457680641e-06, "loss": 0.4693, "step": 10070 }, { "epoch": 1.6539322152197564, "grad_norm": 0.29359255539804346, "learning_rate": 6.861681274604163e-06, "loss": 0.4603, "step": 10071 }, { "epoch": 1.6540964424280993, "grad_norm": 0.3449893242439603, "learning_rate": 6.861282065877503e-06, "loss": 0.4705, "step": 10072 }, { "epoch": 1.6542606696364421, "grad_norm": 0.44332745359425263, "learning_rate": 6.860882831504996e-06, "loss": 0.4621, "step": 10073 }, { "epoch": 1.6544248968447848, "grad_norm": 0.3169582268764672, "learning_rate": 6.86048357149098e-06, "loss": 0.4606, "step": 10074 }, { "epoch": 1.6545891240531274, "grad_norm": 0.2812684006764611, "learning_rate": 6.860084285839787e-06, "loss": 0.4624, "step": 10075 }, { "epoch": 1.6547533512614703, "grad_norm": 0.3030134510583051, "learning_rate": 6.85968497455576e-06, "loss": 0.4628, "step": 10076 }, { "epoch": 1.6549175784698131, "grad_norm": 0.5621866248997615, "learning_rate": 6.859285637643231e-06, "loss": 0.4475, "step": 10077 }, { "epoch": 1.6550818056781558, "grad_norm": 0.2846808009420695, "learning_rate": 6.85888627510654e-06, "loss": 0.4467, "step": 10078 }, { "epoch": 1.6552460328864984, "grad_norm": 0.5413683398262233, "learning_rate": 6.858486886950022e-06, "loss": 0.4525, "step": 10079 }, { "epoch": 1.6554102600948413, "grad_norm": 0.26694224344114137, "learning_rate": 6.858087473178015e-06, "loss": 0.4613, "step": 10080 }, { "epoch": 1.655574487303184, "grad_norm": 0.31430274465533675, "learning_rate": 6.85768803379486e-06, "loss": 0.4487, "step": 10081 }, { "epoch": 1.6557387145115268, "grad_norm": 0.3048388379627296, "learning_rate": 6.8572885688048935e-06, "loss": 0.4686, "step": 10082 }, { "epoch": 1.6559029417198694, "grad_norm": 0.4029978714207957, "learning_rate": 6.856889078212452e-06, "loss": 0.4556, "step": 10083 }, { "epoch": 1.656067168928212, "grad_norm": 0.2713347612783681, "learning_rate": 6.856489562021877e-06, "loss": 0.4522, "step": 10084 }, { "epoch": 1.656231396136555, "grad_norm": 0.3227466412547536, "learning_rate": 6.856090020237507e-06, "loss": 0.4526, "step": 10085 }, { "epoch": 1.6563956233448978, "grad_norm": 0.30791311562056967, "learning_rate": 6.855690452863681e-06, "loss": 0.4426, "step": 10086 }, { "epoch": 1.6565598505532404, "grad_norm": 0.28536385334132947, "learning_rate": 6.85529085990474e-06, "loss": 0.4503, "step": 10087 }, { "epoch": 1.656724077761583, "grad_norm": 0.3003422483466558, "learning_rate": 6.854891241365023e-06, "loss": 0.484, "step": 10088 }, { "epoch": 1.656888304969926, "grad_norm": 0.3026878855756908, "learning_rate": 6.8544915972488685e-06, "loss": 0.4474, "step": 10089 }, { "epoch": 1.6570525321782688, "grad_norm": 0.2629547019528613, "learning_rate": 6.85409192756062e-06, "loss": 0.4556, "step": 10090 }, { "epoch": 1.6572167593866114, "grad_norm": 0.5892721961165369, "learning_rate": 6.8536922323046175e-06, "loss": 0.4635, "step": 10091 }, { "epoch": 1.657380986594954, "grad_norm": 0.31233894549114827, "learning_rate": 6.8532925114852005e-06, "loss": 0.4504, "step": 10092 }, { "epoch": 1.657545213803297, "grad_norm": 0.9927882559085216, "learning_rate": 6.852892765106712e-06, "loss": 0.4446, "step": 10093 }, { "epoch": 1.6577094410116398, "grad_norm": 0.27389626760484753, "learning_rate": 6.852492993173493e-06, "loss": 0.4438, "step": 10094 }, { "epoch": 1.6578736682199824, "grad_norm": 0.28999814533998897, "learning_rate": 6.852093195689886e-06, "loss": 0.4749, "step": 10095 }, { "epoch": 1.658037895428325, "grad_norm": 0.28344610002066106, "learning_rate": 6.851693372660232e-06, "loss": 0.4621, "step": 10096 }, { "epoch": 1.6582021226366677, "grad_norm": 0.29129088168434936, "learning_rate": 6.851293524088875e-06, "loss": 0.4675, "step": 10097 }, { "epoch": 1.6583663498450105, "grad_norm": 0.26928559045198974, "learning_rate": 6.850893649980156e-06, "loss": 0.4559, "step": 10098 }, { "epoch": 1.6585305770533534, "grad_norm": 0.3608111549504681, "learning_rate": 6.85049375033842e-06, "loss": 0.4795, "step": 10099 }, { "epoch": 1.658694804261696, "grad_norm": 0.31003894818762734, "learning_rate": 6.850093825168009e-06, "loss": 0.4503, "step": 10100 }, { "epoch": 1.6588590314700387, "grad_norm": 0.2982802514934513, "learning_rate": 6.849693874473266e-06, "loss": 0.4603, "step": 10101 }, { "epoch": 1.6590232586783815, "grad_norm": 0.36819864548273795, "learning_rate": 6.849293898258537e-06, "loss": 0.4573, "step": 10102 }, { "epoch": 1.6591874858867244, "grad_norm": 0.3051585703764991, "learning_rate": 6.8488938965281645e-06, "loss": 0.4663, "step": 10103 }, { "epoch": 1.659351713095067, "grad_norm": 0.33465803831927327, "learning_rate": 6.848493869286493e-06, "loss": 0.4502, "step": 10104 }, { "epoch": 1.6595159403034097, "grad_norm": 0.38292256300891603, "learning_rate": 6.848093816537868e-06, "loss": 0.455, "step": 10105 }, { "epoch": 1.6596801675117525, "grad_norm": 0.4288122393071528, "learning_rate": 6.847693738286633e-06, "loss": 0.4597, "step": 10106 }, { "epoch": 1.6598443947200954, "grad_norm": 0.24811801866347535, "learning_rate": 6.847293634537135e-06, "loss": 0.4505, "step": 10107 }, { "epoch": 1.660008621928438, "grad_norm": 0.3088836539894647, "learning_rate": 6.846893505293719e-06, "loss": 0.4509, "step": 10108 }, { "epoch": 1.6601728491367806, "grad_norm": 0.3003313846361625, "learning_rate": 6.846493350560729e-06, "loss": 0.4523, "step": 10109 }, { "epoch": 1.6603370763451235, "grad_norm": 0.5284141833491072, "learning_rate": 6.846093170342515e-06, "loss": 0.4548, "step": 10110 }, { "epoch": 1.6605013035534664, "grad_norm": 0.3918097946018369, "learning_rate": 6.84569296464342e-06, "loss": 0.4502, "step": 10111 }, { "epoch": 1.660665530761809, "grad_norm": 0.3162058034183177, "learning_rate": 6.84529273346779e-06, "loss": 0.4537, "step": 10112 }, { "epoch": 1.6608297579701516, "grad_norm": 0.3712791403292991, "learning_rate": 6.844892476819973e-06, "loss": 0.4753, "step": 10113 }, { "epoch": 1.6609939851784943, "grad_norm": 0.2919157998372654, "learning_rate": 6.84449219470432e-06, "loss": 0.4555, "step": 10114 }, { "epoch": 1.6611582123868371, "grad_norm": 0.3444696305060915, "learning_rate": 6.844091887125172e-06, "loss": 0.4596, "step": 10115 }, { "epoch": 1.66132243959518, "grad_norm": 0.3506674472551571, "learning_rate": 6.843691554086882e-06, "loss": 0.4696, "step": 10116 }, { "epoch": 1.6614866668035226, "grad_norm": 0.3444951783000774, "learning_rate": 6.8432911955937935e-06, "loss": 0.4573, "step": 10117 }, { "epoch": 1.6616508940118653, "grad_norm": 0.4158734419460173, "learning_rate": 6.8428908116502595e-06, "loss": 0.4593, "step": 10118 }, { "epoch": 1.6618151212202081, "grad_norm": 0.30382622051819197, "learning_rate": 6.842490402260625e-06, "loss": 0.4731, "step": 10119 }, { "epoch": 1.661979348428551, "grad_norm": 0.2730468701565896, "learning_rate": 6.8420899674292405e-06, "loss": 0.4655, "step": 10120 }, { "epoch": 1.6621435756368936, "grad_norm": 0.3113059653283631, "learning_rate": 6.8416895071604545e-06, "loss": 0.4715, "step": 10121 }, { "epoch": 1.6623078028452363, "grad_norm": 0.4526111377968724, "learning_rate": 6.841289021458617e-06, "loss": 0.4748, "step": 10122 }, { "epoch": 1.6624720300535791, "grad_norm": 0.3076876962554403, "learning_rate": 6.840888510328075e-06, "loss": 0.4424, "step": 10123 }, { "epoch": 1.662636257261922, "grad_norm": 0.4323855586209853, "learning_rate": 6.840487973773183e-06, "loss": 0.4513, "step": 10124 }, { "epoch": 1.6628004844702646, "grad_norm": 0.2948138042150791, "learning_rate": 6.840087411798289e-06, "loss": 0.4572, "step": 10125 }, { "epoch": 1.6629647116786073, "grad_norm": 0.3235230615740369, "learning_rate": 6.839686824407742e-06, "loss": 0.4566, "step": 10126 }, { "epoch": 1.6631289388869501, "grad_norm": 0.35818271322707407, "learning_rate": 6.8392862116058945e-06, "loss": 0.4557, "step": 10127 }, { "epoch": 1.663293166095293, "grad_norm": 0.28832099909946796, "learning_rate": 6.8388855733970975e-06, "loss": 0.464, "step": 10128 }, { "epoch": 1.6634573933036356, "grad_norm": 0.32200541250106973, "learning_rate": 6.838484909785702e-06, "loss": 0.4468, "step": 10129 }, { "epoch": 1.6636216205119783, "grad_norm": 0.2759151795686372, "learning_rate": 6.838084220776061e-06, "loss": 0.4549, "step": 10130 }, { "epoch": 1.663785847720321, "grad_norm": 0.38764451430397096, "learning_rate": 6.8376835063725216e-06, "loss": 0.4591, "step": 10131 }, { "epoch": 1.6639500749286638, "grad_norm": 0.3689015443052184, "learning_rate": 6.837282766579442e-06, "loss": 0.4512, "step": 10132 }, { "epoch": 1.6641143021370066, "grad_norm": 0.3136526314101458, "learning_rate": 6.836882001401171e-06, "loss": 0.4673, "step": 10133 }, { "epoch": 1.6642785293453493, "grad_norm": 0.29260575496034674, "learning_rate": 6.836481210842064e-06, "loss": 0.4675, "step": 10134 }, { "epoch": 1.664442756553692, "grad_norm": 0.3097745397723282, "learning_rate": 6.83608039490647e-06, "loss": 0.4534, "step": 10135 }, { "epoch": 1.6646069837620348, "grad_norm": 0.2974482169941179, "learning_rate": 6.835679553598746e-06, "loss": 0.4403, "step": 10136 }, { "epoch": 1.6647712109703776, "grad_norm": 0.5267663399483836, "learning_rate": 6.835278686923242e-06, "loss": 0.4649, "step": 10137 }, { "epoch": 1.6649354381787203, "grad_norm": 0.28156728840426715, "learning_rate": 6.834877794884314e-06, "loss": 0.4638, "step": 10138 }, { "epoch": 1.6650996653870629, "grad_norm": 0.44869677574882944, "learning_rate": 6.834476877486318e-06, "loss": 0.453, "step": 10139 }, { "epoch": 1.6652638925954057, "grad_norm": 0.30123810086379144, "learning_rate": 6.8340759347336056e-06, "loss": 0.4601, "step": 10140 }, { "epoch": 1.6654281198037486, "grad_norm": 0.5795294618527556, "learning_rate": 6.833674966630533e-06, "loss": 0.4615, "step": 10141 }, { "epoch": 1.6655923470120912, "grad_norm": 0.40046742999378, "learning_rate": 6.833273973181453e-06, "loss": 0.47, "step": 10142 }, { "epoch": 1.6657565742204339, "grad_norm": 0.2708138308319611, "learning_rate": 6.832872954390723e-06, "loss": 0.4649, "step": 10143 }, { "epoch": 1.6659208014287767, "grad_norm": 0.4570428523808035, "learning_rate": 6.832471910262696e-06, "loss": 0.4627, "step": 10144 }, { "epoch": 1.6660850286371196, "grad_norm": 0.4293640775402717, "learning_rate": 6.8320708408017305e-06, "loss": 0.4579, "step": 10145 }, { "epoch": 1.6662492558454622, "grad_norm": 0.2685026647291335, "learning_rate": 6.83166974601218e-06, "loss": 0.4509, "step": 10146 }, { "epoch": 1.6664134830538049, "grad_norm": 0.31228397957090265, "learning_rate": 6.831268625898402e-06, "loss": 0.479, "step": 10147 }, { "epoch": 1.6665777102621475, "grad_norm": 0.3137721705986118, "learning_rate": 6.830867480464754e-06, "loss": 0.4437, "step": 10148 }, { "epoch": 1.6667419374704904, "grad_norm": 0.5708636843935152, "learning_rate": 6.830466309715593e-06, "loss": 0.4512, "step": 10149 }, { "epoch": 1.6669061646788332, "grad_norm": 0.2653212103007819, "learning_rate": 6.830065113655272e-06, "loss": 0.4496, "step": 10150 }, { "epoch": 1.6670703918871759, "grad_norm": 0.7572840674435325, "learning_rate": 6.829663892288155e-06, "loss": 0.4401, "step": 10151 }, { "epoch": 1.6672346190955185, "grad_norm": 0.425478297583331, "learning_rate": 6.829262645618592e-06, "loss": 0.4681, "step": 10152 }, { "epoch": 1.6673988463038614, "grad_norm": 0.2559404822460795, "learning_rate": 6.8288613736509485e-06, "loss": 0.4491, "step": 10153 }, { "epoch": 1.6675630735122042, "grad_norm": 0.2704039864595664, "learning_rate": 6.828460076389577e-06, "loss": 0.4651, "step": 10154 }, { "epoch": 1.6677273007205469, "grad_norm": 0.29909003380134896, "learning_rate": 6.82805875383884e-06, "loss": 0.4391, "step": 10155 }, { "epoch": 1.6678915279288895, "grad_norm": 0.3071232266044963, "learning_rate": 6.827657406003092e-06, "loss": 0.4543, "step": 10156 }, { "epoch": 1.6680557551372324, "grad_norm": 0.33138094181705335, "learning_rate": 6.8272560328866965e-06, "loss": 0.4758, "step": 10157 }, { "epoch": 1.6682199823455752, "grad_norm": 0.7843415418694715, "learning_rate": 6.826854634494011e-06, "loss": 0.4437, "step": 10158 }, { "epoch": 1.6683842095539179, "grad_norm": 0.2925059010158654, "learning_rate": 6.8264532108293936e-06, "loss": 0.4652, "step": 10159 }, { "epoch": 1.6685484367622605, "grad_norm": 0.31203921925175077, "learning_rate": 6.826051761897205e-06, "loss": 0.4407, "step": 10160 }, { "epoch": 1.6687126639706034, "grad_norm": 0.3378418281840202, "learning_rate": 6.825650287701807e-06, "loss": 0.4396, "step": 10161 }, { "epoch": 1.6688768911789462, "grad_norm": 0.6728722464644683, "learning_rate": 6.825248788247557e-06, "loss": 0.4732, "step": 10162 }, { "epoch": 1.6690411183872889, "grad_norm": 0.4629013914353093, "learning_rate": 6.82484726353882e-06, "loss": 0.4691, "step": 10163 }, { "epoch": 1.6692053455956315, "grad_norm": 0.31808519111750916, "learning_rate": 6.824445713579954e-06, "loss": 0.4594, "step": 10164 }, { "epoch": 1.6693695728039741, "grad_norm": 0.2995549769221818, "learning_rate": 6.824044138375318e-06, "loss": 0.4584, "step": 10165 }, { "epoch": 1.669533800012317, "grad_norm": 1.8078725563334275, "learning_rate": 6.823642537929278e-06, "loss": 0.4445, "step": 10166 }, { "epoch": 1.6696980272206599, "grad_norm": 0.2907891043954342, "learning_rate": 6.823240912246193e-06, "loss": 0.4537, "step": 10167 }, { "epoch": 1.6698622544290025, "grad_norm": 0.5084326002415936, "learning_rate": 6.8228392613304285e-06, "loss": 0.4627, "step": 10168 }, { "epoch": 1.6700264816373451, "grad_norm": 0.29511217662791017, "learning_rate": 6.822437585186341e-06, "loss": 0.4779, "step": 10169 }, { "epoch": 1.670190708845688, "grad_norm": 0.2693223126141609, "learning_rate": 6.822035883818299e-06, "loss": 0.4466, "step": 10170 }, { "epoch": 1.6703549360540308, "grad_norm": 0.3933448320738575, "learning_rate": 6.821634157230661e-06, "loss": 0.4772, "step": 10171 }, { "epoch": 1.6705191632623735, "grad_norm": 0.2942491456962361, "learning_rate": 6.821232405427791e-06, "loss": 0.452, "step": 10172 }, { "epoch": 1.6706833904707161, "grad_norm": 0.29925065753639407, "learning_rate": 6.820830628414056e-06, "loss": 0.4626, "step": 10173 }, { "epoch": 1.670847617679059, "grad_norm": 0.3433489098652025, "learning_rate": 6.820428826193816e-06, "loss": 0.4565, "step": 10174 }, { "epoch": 1.6710118448874018, "grad_norm": 0.297289729417751, "learning_rate": 6.820026998771435e-06, "loss": 0.468, "step": 10175 }, { "epoch": 1.6711760720957445, "grad_norm": 0.2536699644456774, "learning_rate": 6.819625146151278e-06, "loss": 0.4534, "step": 10176 }, { "epoch": 1.6713402993040871, "grad_norm": 0.25733154085588766, "learning_rate": 6.81922326833771e-06, "loss": 0.4698, "step": 10177 }, { "epoch": 1.67150452651243, "grad_norm": 0.7738151356879273, "learning_rate": 6.818821365335097e-06, "loss": 0.4686, "step": 10178 }, { "epoch": 1.6716687537207728, "grad_norm": 0.2918725037566939, "learning_rate": 6.818419437147802e-06, "loss": 0.4822, "step": 10179 }, { "epoch": 1.6718329809291155, "grad_norm": 0.3340710811833185, "learning_rate": 6.81801748378019e-06, "loss": 0.4595, "step": 10180 }, { "epoch": 1.6719972081374581, "grad_norm": 0.30776634460261865, "learning_rate": 6.817615505236627e-06, "loss": 0.4676, "step": 10181 }, { "epoch": 1.6721614353458008, "grad_norm": 0.2942163724177741, "learning_rate": 6.81721350152148e-06, "loss": 0.4607, "step": 10182 }, { "epoch": 1.6723256625541436, "grad_norm": 0.6801375014109976, "learning_rate": 6.816811472639116e-06, "loss": 0.4689, "step": 10183 }, { "epoch": 1.6724898897624865, "grad_norm": 0.31395840415757875, "learning_rate": 6.8164094185938986e-06, "loss": 0.4637, "step": 10184 }, { "epoch": 1.672654116970829, "grad_norm": 0.3058483374593646, "learning_rate": 6.816007339390195e-06, "loss": 0.4504, "step": 10185 }, { "epoch": 1.6728183441791717, "grad_norm": 0.31385256374761467, "learning_rate": 6.815605235032374e-06, "loss": 0.4518, "step": 10186 }, { "epoch": 1.6729825713875146, "grad_norm": 0.29066102734867516, "learning_rate": 6.815203105524803e-06, "loss": 0.4303, "step": 10187 }, { "epoch": 1.6731467985958575, "grad_norm": 0.3231898610109705, "learning_rate": 6.814800950871848e-06, "loss": 0.4578, "step": 10188 }, { "epoch": 1.6733110258042, "grad_norm": 0.305140836240839, "learning_rate": 6.8143987710778764e-06, "loss": 0.4656, "step": 10189 }, { "epoch": 1.6734752530125427, "grad_norm": 0.301483387719651, "learning_rate": 6.813996566147257e-06, "loss": 0.4879, "step": 10190 }, { "epoch": 1.6736394802208856, "grad_norm": 1.2463700741489872, "learning_rate": 6.813594336084359e-06, "loss": 0.4543, "step": 10191 }, { "epoch": 1.6738037074292285, "grad_norm": 0.39223622977769473, "learning_rate": 6.81319208089355e-06, "loss": 0.4681, "step": 10192 }, { "epoch": 1.673967934637571, "grad_norm": 0.3176465043128236, "learning_rate": 6.812789800579198e-06, "loss": 0.4456, "step": 10193 }, { "epoch": 1.6741321618459137, "grad_norm": 0.2903606994722687, "learning_rate": 6.812387495145675e-06, "loss": 0.4687, "step": 10194 }, { "epoch": 1.6742963890542566, "grad_norm": 0.3094943548301401, "learning_rate": 6.811985164597348e-06, "loss": 0.4565, "step": 10195 }, { "epoch": 1.6744606162625995, "grad_norm": 0.4317000637297288, "learning_rate": 6.811582808938587e-06, "loss": 0.449, "step": 10196 }, { "epoch": 1.674624843470942, "grad_norm": 0.45232778899457615, "learning_rate": 6.8111804281737636e-06, "loss": 0.4724, "step": 10197 }, { "epoch": 1.6747890706792847, "grad_norm": 0.33289301304418345, "learning_rate": 6.810778022307245e-06, "loss": 0.4624, "step": 10198 }, { "epoch": 1.6749532978876274, "grad_norm": 0.8288740572151884, "learning_rate": 6.810375591343405e-06, "loss": 0.4414, "step": 10199 }, { "epoch": 1.6751175250959702, "grad_norm": 0.5574232589287477, "learning_rate": 6.809973135286613e-06, "loss": 0.4157, "step": 10200 }, { "epoch": 1.675281752304313, "grad_norm": 0.40580094718204485, "learning_rate": 6.809570654141239e-06, "loss": 0.4702, "step": 10201 }, { "epoch": 1.6754459795126557, "grad_norm": 0.337536087662044, "learning_rate": 6.809168147911656e-06, "loss": 0.4573, "step": 10202 }, { "epoch": 1.6756102067209984, "grad_norm": 0.3024412512663363, "learning_rate": 6.808765616602236e-06, "loss": 0.4379, "step": 10203 }, { "epoch": 1.6757744339293412, "grad_norm": 0.36910762147354104, "learning_rate": 6.808363060217348e-06, "loss": 0.4482, "step": 10204 }, { "epoch": 1.675938661137684, "grad_norm": 0.4531128443431689, "learning_rate": 6.8079604787613664e-06, "loss": 0.4615, "step": 10205 }, { "epoch": 1.6761028883460267, "grad_norm": 0.25406746010569026, "learning_rate": 6.8075578722386646e-06, "loss": 0.4625, "step": 10206 }, { "epoch": 1.6762671155543694, "grad_norm": 0.4489461763784889, "learning_rate": 6.807155240653614e-06, "loss": 0.4557, "step": 10207 }, { "epoch": 1.6764313427627122, "grad_norm": 0.32368261589902353, "learning_rate": 6.806752584010586e-06, "loss": 0.4512, "step": 10208 }, { "epoch": 1.676595569971055, "grad_norm": 0.30499813536039555, "learning_rate": 6.8063499023139565e-06, "loss": 0.4653, "step": 10209 }, { "epoch": 1.6767597971793977, "grad_norm": 0.28669103974464233, "learning_rate": 6.805947195568096e-06, "loss": 0.4656, "step": 10210 }, { "epoch": 1.6769240243877404, "grad_norm": 0.342212151906084, "learning_rate": 6.805544463777383e-06, "loss": 0.4629, "step": 10211 }, { "epoch": 1.6770882515960832, "grad_norm": 0.2878312678429544, "learning_rate": 6.805141706946188e-06, "loss": 0.4674, "step": 10212 }, { "epoch": 1.677252478804426, "grad_norm": 0.2932394604869272, "learning_rate": 6.804738925078885e-06, "loss": 0.4713, "step": 10213 }, { "epoch": 1.6774167060127687, "grad_norm": 0.29840250165017845, "learning_rate": 6.8043361181798515e-06, "loss": 0.4747, "step": 10214 }, { "epoch": 1.6775809332211113, "grad_norm": 0.3088400439236326, "learning_rate": 6.803933286253458e-06, "loss": 0.4733, "step": 10215 }, { "epoch": 1.677745160429454, "grad_norm": 0.3900409979120372, "learning_rate": 6.803530429304084e-06, "loss": 0.4675, "step": 10216 }, { "epoch": 1.6779093876377968, "grad_norm": 0.30596119386700804, "learning_rate": 6.803127547336104e-06, "loss": 0.4477, "step": 10217 }, { "epoch": 1.6780736148461397, "grad_norm": 0.3396698020804204, "learning_rate": 6.802724640353891e-06, "loss": 0.4533, "step": 10218 }, { "epoch": 1.6782378420544823, "grad_norm": 0.36186525495151045, "learning_rate": 6.802321708361823e-06, "loss": 0.4737, "step": 10219 }, { "epoch": 1.678402069262825, "grad_norm": 0.32655803334282024, "learning_rate": 6.8019187513642775e-06, "loss": 0.4307, "step": 10220 }, { "epoch": 1.6785662964711678, "grad_norm": 0.2625813564960309, "learning_rate": 6.801515769365629e-06, "loss": 0.4823, "step": 10221 }, { "epoch": 1.6787305236795107, "grad_norm": 0.29774752894997863, "learning_rate": 6.801112762370254e-06, "loss": 0.4524, "step": 10222 }, { "epoch": 1.6788947508878533, "grad_norm": 0.3127505478958254, "learning_rate": 6.800709730382531e-06, "loss": 0.4403, "step": 10223 }, { "epoch": 1.679058978096196, "grad_norm": 0.30376527080229004, "learning_rate": 6.8003066734068374e-06, "loss": 0.4531, "step": 10224 }, { "epoch": 1.6792232053045388, "grad_norm": 0.3136688315705886, "learning_rate": 6.799903591447548e-06, "loss": 0.4698, "step": 10225 }, { "epoch": 1.6793874325128817, "grad_norm": 0.28186818901257743, "learning_rate": 6.799500484509046e-06, "loss": 0.4312, "step": 10226 }, { "epoch": 1.6795516597212243, "grad_norm": 0.30242428453226355, "learning_rate": 6.799097352595704e-06, "loss": 0.4545, "step": 10227 }, { "epoch": 1.679715886929567, "grad_norm": 0.33302698471723835, "learning_rate": 6.798694195711903e-06, "loss": 0.4716, "step": 10228 }, { "epoch": 1.6798801141379098, "grad_norm": 0.34080784140890574, "learning_rate": 6.798291013862023e-06, "loss": 0.4677, "step": 10229 }, { "epoch": 1.6800443413462527, "grad_norm": 0.3374671449414891, "learning_rate": 6.797887807050439e-06, "loss": 0.4534, "step": 10230 }, { "epoch": 1.6802085685545953, "grad_norm": 0.3153387804540685, "learning_rate": 6.797484575281535e-06, "loss": 0.463, "step": 10231 }, { "epoch": 1.680372795762938, "grad_norm": 0.402795624677278, "learning_rate": 6.797081318559686e-06, "loss": 0.4644, "step": 10232 }, { "epoch": 1.6805370229712806, "grad_norm": 0.30953687154793585, "learning_rate": 6.796678036889275e-06, "loss": 0.4674, "step": 10233 }, { "epoch": 1.6807012501796235, "grad_norm": 0.3317048558219616, "learning_rate": 6.79627473027468e-06, "loss": 0.4857, "step": 10234 }, { "epoch": 1.6808654773879663, "grad_norm": 0.3802312298770638, "learning_rate": 6.795871398720282e-06, "loss": 0.4726, "step": 10235 }, { "epoch": 1.681029704596309, "grad_norm": 0.2997172652219126, "learning_rate": 6.795468042230464e-06, "loss": 0.4712, "step": 10236 }, { "epoch": 1.6811939318046516, "grad_norm": 0.3335225931142483, "learning_rate": 6.795064660809604e-06, "loss": 0.4387, "step": 10237 }, { "epoch": 1.6813581590129945, "grad_norm": 0.2824447347067514, "learning_rate": 6.7946612544620825e-06, "loss": 0.4538, "step": 10238 }, { "epoch": 1.6815223862213373, "grad_norm": 0.29554559306343914, "learning_rate": 6.794257823192282e-06, "loss": 0.4443, "step": 10239 }, { "epoch": 1.68168661342968, "grad_norm": 0.2963918421520866, "learning_rate": 6.793854367004585e-06, "loss": 0.4639, "step": 10240 }, { "epoch": 1.6818508406380226, "grad_norm": 0.36615082692498374, "learning_rate": 6.793450885903374e-06, "loss": 0.4534, "step": 10241 }, { "epoch": 1.6820150678463655, "grad_norm": 0.32269538559501254, "learning_rate": 6.793047379893027e-06, "loss": 0.4743, "step": 10242 }, { "epoch": 1.6821792950547083, "grad_norm": 0.35870366829155603, "learning_rate": 6.7926438489779315e-06, "loss": 0.4643, "step": 10243 }, { "epoch": 1.682343522263051, "grad_norm": 0.3287341790332416, "learning_rate": 6.792240293162467e-06, "loss": 0.4611, "step": 10244 }, { "epoch": 1.6825077494713936, "grad_norm": 0.36393328763510463, "learning_rate": 6.791836712451018e-06, "loss": 0.4727, "step": 10245 }, { "epoch": 1.6826719766797364, "grad_norm": 0.32174905680140364, "learning_rate": 6.791433106847968e-06, "loss": 0.4446, "step": 10246 }, { "epoch": 1.6828362038880793, "grad_norm": 0.341965634713859, "learning_rate": 6.7910294763577e-06, "loss": 0.4682, "step": 10247 }, { "epoch": 1.683000431096422, "grad_norm": 0.3729904454177416, "learning_rate": 6.790625820984597e-06, "loss": 0.4374, "step": 10248 }, { "epoch": 1.6831646583047646, "grad_norm": 0.3070748646257641, "learning_rate": 6.790222140733044e-06, "loss": 0.4516, "step": 10249 }, { "epoch": 1.6833288855131072, "grad_norm": 0.33665130707619334, "learning_rate": 6.789818435607426e-06, "loss": 0.4436, "step": 10250 }, { "epoch": 1.68349311272145, "grad_norm": 0.2835961697412146, "learning_rate": 6.789414705612128e-06, "loss": 0.4449, "step": 10251 }, { "epoch": 1.683657339929793, "grad_norm": 0.33480743026989657, "learning_rate": 6.7890109507515315e-06, "loss": 0.4691, "step": 10252 }, { "epoch": 1.6838215671381356, "grad_norm": 0.3456039394184217, "learning_rate": 6.788607171030025e-06, "loss": 0.4514, "step": 10253 }, { "epoch": 1.6839857943464782, "grad_norm": 0.27234082924457936, "learning_rate": 6.788203366451993e-06, "loss": 0.4447, "step": 10254 }, { "epoch": 1.684150021554821, "grad_norm": 0.34880013350903516, "learning_rate": 6.78779953702182e-06, "loss": 0.4709, "step": 10255 }, { "epoch": 1.684314248763164, "grad_norm": 0.33395070260387977, "learning_rate": 6.787395682743895e-06, "loss": 0.4554, "step": 10256 }, { "epoch": 1.6844784759715066, "grad_norm": 0.28524661530688, "learning_rate": 6.786991803622602e-06, "loss": 0.485, "step": 10257 }, { "epoch": 1.6846427031798492, "grad_norm": 0.3068358586063563, "learning_rate": 6.786587899662327e-06, "loss": 0.474, "step": 10258 }, { "epoch": 1.684806930388192, "grad_norm": 0.2848755321460969, "learning_rate": 6.786183970867458e-06, "loss": 0.4548, "step": 10259 }, { "epoch": 1.684971157596535, "grad_norm": 0.3643720044509586, "learning_rate": 6.785780017242382e-06, "loss": 0.4391, "step": 10260 }, { "epoch": 1.6851353848048776, "grad_norm": 0.2542353434490754, "learning_rate": 6.785376038791486e-06, "loss": 0.4491, "step": 10261 }, { "epoch": 1.6852996120132202, "grad_norm": 0.6157108993224596, "learning_rate": 6.784972035519159e-06, "loss": 0.4631, "step": 10262 }, { "epoch": 1.685463839221563, "grad_norm": 0.2588069234838315, "learning_rate": 6.784568007429786e-06, "loss": 0.4453, "step": 10263 }, { "epoch": 1.685628066429906, "grad_norm": 0.3417690274270287, "learning_rate": 6.784163954527755e-06, "loss": 0.4588, "step": 10264 }, { "epoch": 1.6857922936382486, "grad_norm": 0.41849951712013345, "learning_rate": 6.7837598768174595e-06, "loss": 0.4612, "step": 10265 }, { "epoch": 1.6859565208465912, "grad_norm": 0.7638550414307494, "learning_rate": 6.783355774303284e-06, "loss": 0.4342, "step": 10266 }, { "epoch": 1.6861207480549338, "grad_norm": 0.34278375489175383, "learning_rate": 6.782951646989617e-06, "loss": 0.4427, "step": 10267 }, { "epoch": 1.6862849752632767, "grad_norm": 0.36146825412263345, "learning_rate": 6.7825474948808495e-06, "loss": 0.4443, "step": 10268 }, { "epoch": 1.6864492024716196, "grad_norm": 0.29089142141868596, "learning_rate": 6.782143317981371e-06, "loss": 0.4551, "step": 10269 }, { "epoch": 1.6866134296799622, "grad_norm": 0.30031960919690487, "learning_rate": 6.78173911629557e-06, "loss": 0.4703, "step": 10270 }, { "epoch": 1.6867776568883048, "grad_norm": 0.3250331596103951, "learning_rate": 6.78133488982784e-06, "loss": 0.4714, "step": 10271 }, { "epoch": 1.6869418840966477, "grad_norm": 0.3971300159279936, "learning_rate": 6.780930638582566e-06, "loss": 0.4546, "step": 10272 }, { "epoch": 1.6871061113049906, "grad_norm": 0.29162352290664173, "learning_rate": 6.7805263625641394e-06, "loss": 0.4755, "step": 10273 }, { "epoch": 1.6872703385133332, "grad_norm": 0.26532718169105196, "learning_rate": 6.780122061776957e-06, "loss": 0.443, "step": 10274 }, { "epoch": 1.6874345657216758, "grad_norm": 0.2722156753431404, "learning_rate": 6.7797177362254035e-06, "loss": 0.4593, "step": 10275 }, { "epoch": 1.6875987929300187, "grad_norm": 0.26193853080166557, "learning_rate": 6.779313385913872e-06, "loss": 0.4471, "step": 10276 }, { "epoch": 1.6877630201383615, "grad_norm": 0.37316229355423847, "learning_rate": 6.778909010846754e-06, "loss": 0.4663, "step": 10277 }, { "epoch": 1.6879272473467042, "grad_norm": 0.3047724813156029, "learning_rate": 6.778504611028443e-06, "loss": 0.4729, "step": 10278 }, { "epoch": 1.6880914745550468, "grad_norm": 0.6649079988395612, "learning_rate": 6.778100186463331e-06, "loss": 0.4553, "step": 10279 }, { "epoch": 1.6882557017633897, "grad_norm": 0.29209916143149306, "learning_rate": 6.777695737155809e-06, "loss": 0.4535, "step": 10280 }, { "epoch": 1.6884199289717325, "grad_norm": 0.31165383824990045, "learning_rate": 6.77729126311027e-06, "loss": 0.4529, "step": 10281 }, { "epoch": 1.6885841561800752, "grad_norm": 0.30989188157862446, "learning_rate": 6.776886764331108e-06, "loss": 0.4538, "step": 10282 }, { "epoch": 1.6887483833884178, "grad_norm": 0.3244201798525414, "learning_rate": 6.776482240822715e-06, "loss": 0.4484, "step": 10283 }, { "epoch": 1.6889126105967605, "grad_norm": 0.29740012853549874, "learning_rate": 6.776077692589484e-06, "loss": 0.4392, "step": 10284 }, { "epoch": 1.6890768378051033, "grad_norm": 0.31728604602189964, "learning_rate": 6.775673119635812e-06, "loss": 0.4747, "step": 10285 }, { "epoch": 1.6892410650134462, "grad_norm": 0.25451478660174215, "learning_rate": 6.775268521966091e-06, "loss": 0.4455, "step": 10286 }, { "epoch": 1.6894052922217888, "grad_norm": 0.28516624095149246, "learning_rate": 6.774863899584714e-06, "loss": 0.4463, "step": 10287 }, { "epoch": 1.6895695194301315, "grad_norm": 0.38342411007047644, "learning_rate": 6.774459252496077e-06, "loss": 0.4654, "step": 10288 }, { "epoch": 1.6897337466384743, "grad_norm": 0.26453823657971653, "learning_rate": 6.774054580704576e-06, "loss": 0.4571, "step": 10289 }, { "epoch": 1.6898979738468172, "grad_norm": 0.2918895135048132, "learning_rate": 6.773649884214603e-06, "loss": 0.443, "step": 10290 }, { "epoch": 1.6900622010551598, "grad_norm": 0.3251405163584019, "learning_rate": 6.773245163030557e-06, "loss": 0.4578, "step": 10291 }, { "epoch": 1.6902264282635024, "grad_norm": 0.2912874050807492, "learning_rate": 6.772840417156831e-06, "loss": 0.4555, "step": 10292 }, { "epoch": 1.6903906554718453, "grad_norm": 0.30295294439343956, "learning_rate": 6.77243564659782e-06, "loss": 0.4802, "step": 10293 }, { "epoch": 1.6905548826801882, "grad_norm": 0.2914439106468315, "learning_rate": 6.7720308513579255e-06, "loss": 0.4621, "step": 10294 }, { "epoch": 1.6907191098885308, "grad_norm": 0.30084357916317045, "learning_rate": 6.77162603144154e-06, "loss": 0.4574, "step": 10295 }, { "epoch": 1.6908833370968734, "grad_norm": 0.31309506377140034, "learning_rate": 6.771221186853059e-06, "loss": 0.4383, "step": 10296 }, { "epoch": 1.6910475643052163, "grad_norm": 0.32088881385022494, "learning_rate": 6.770816317596882e-06, "loss": 0.4492, "step": 10297 }, { "epoch": 1.6912117915135592, "grad_norm": 0.2979143781946795, "learning_rate": 6.770411423677406e-06, "loss": 0.4547, "step": 10298 }, { "epoch": 1.6913760187219018, "grad_norm": 0.24331679942835663, "learning_rate": 6.770006505099029e-06, "loss": 0.4569, "step": 10299 }, { "epoch": 1.6915402459302444, "grad_norm": 0.29674736960822723, "learning_rate": 6.769601561866147e-06, "loss": 0.452, "step": 10300 }, { "epoch": 1.691704473138587, "grad_norm": 0.27199210828461945, "learning_rate": 6.76919659398316e-06, "loss": 0.4464, "step": 10301 }, { "epoch": 1.69186870034693, "grad_norm": 0.26438612536458783, "learning_rate": 6.7687916014544635e-06, "loss": 0.4382, "step": 10302 }, { "epoch": 1.6920329275552728, "grad_norm": 0.32917048248376596, "learning_rate": 6.768386584284458e-06, "loss": 0.4457, "step": 10303 }, { "epoch": 1.6921971547636154, "grad_norm": 0.33675980630175334, "learning_rate": 6.767981542477545e-06, "loss": 0.4518, "step": 10304 }, { "epoch": 1.692361381971958, "grad_norm": 0.32689547936397234, "learning_rate": 6.767576476038119e-06, "loss": 0.457, "step": 10305 }, { "epoch": 1.692525609180301, "grad_norm": 0.33346484391816406, "learning_rate": 6.767171384970583e-06, "loss": 0.4844, "step": 10306 }, { "epoch": 1.6926898363886438, "grad_norm": 0.308072789313566, "learning_rate": 6.766766269279333e-06, "loss": 0.4584, "step": 10307 }, { "epoch": 1.6928540635969864, "grad_norm": 0.287864748449835, "learning_rate": 6.7663611289687725e-06, "loss": 0.4439, "step": 10308 }, { "epoch": 1.693018290805329, "grad_norm": 0.3082917399631894, "learning_rate": 6.765955964043302e-06, "loss": 0.4588, "step": 10309 }, { "epoch": 1.693182518013672, "grad_norm": 0.29546090273406916, "learning_rate": 6.765550774507317e-06, "loss": 0.4353, "step": 10310 }, { "epoch": 1.6933467452220148, "grad_norm": 0.2858280741500684, "learning_rate": 6.765145560365224e-06, "loss": 0.4591, "step": 10311 }, { "epoch": 1.6935109724303574, "grad_norm": 0.32008558469929194, "learning_rate": 6.7647403216214205e-06, "loss": 0.4529, "step": 10312 }, { "epoch": 1.6936751996387, "grad_norm": 0.3146694953864376, "learning_rate": 6.764335058280309e-06, "loss": 0.4696, "step": 10313 }, { "epoch": 1.693839426847043, "grad_norm": 0.3219358380569886, "learning_rate": 6.7639297703462916e-06, "loss": 0.4394, "step": 10314 }, { "epoch": 1.6940036540553858, "grad_norm": 0.3004978132549195, "learning_rate": 6.763524457823768e-06, "loss": 0.4607, "step": 10315 }, { "epoch": 1.6941678812637284, "grad_norm": 0.2733719772784754, "learning_rate": 6.763119120717143e-06, "loss": 0.467, "step": 10316 }, { "epoch": 1.694332108472071, "grad_norm": 0.2706089474104434, "learning_rate": 6.762713759030817e-06, "loss": 0.449, "step": 10317 }, { "epoch": 1.6944963356804137, "grad_norm": 0.42130473705280985, "learning_rate": 6.762308372769194e-06, "loss": 0.4529, "step": 10318 }, { "epoch": 1.6946605628887565, "grad_norm": 0.36806728788529874, "learning_rate": 6.761902961936676e-06, "loss": 0.4709, "step": 10319 }, { "epoch": 1.6948247900970994, "grad_norm": 0.30950909755417966, "learning_rate": 6.761497526537668e-06, "loss": 0.4662, "step": 10320 }, { "epoch": 1.694989017305442, "grad_norm": 0.25509018884086976, "learning_rate": 6.761092066576569e-06, "loss": 0.4302, "step": 10321 }, { "epoch": 1.6951532445137847, "grad_norm": 0.3542012605063986, "learning_rate": 6.760686582057787e-06, "loss": 0.4785, "step": 10322 }, { "epoch": 1.6953174717221275, "grad_norm": 0.27189101345521893, "learning_rate": 6.760281072985725e-06, "loss": 0.471, "step": 10323 }, { "epoch": 1.6954816989304704, "grad_norm": 0.2690817881529284, "learning_rate": 6.7598755393647855e-06, "loss": 0.4621, "step": 10324 }, { "epoch": 1.695645926138813, "grad_norm": 0.2796687617198455, "learning_rate": 6.759469981199375e-06, "loss": 0.4409, "step": 10325 }, { "epoch": 1.6958101533471557, "grad_norm": 0.34430847635917744, "learning_rate": 6.7590643984938965e-06, "loss": 0.4662, "step": 10326 }, { "epoch": 1.6959743805554985, "grad_norm": 0.3545725970327559, "learning_rate": 6.7586587912527575e-06, "loss": 0.4882, "step": 10327 }, { "epoch": 1.6961386077638414, "grad_norm": 0.45759440356995434, "learning_rate": 6.758253159480362e-06, "loss": 0.4548, "step": 10328 }, { "epoch": 1.696302834972184, "grad_norm": 0.3371943712343659, "learning_rate": 6.757847503181114e-06, "loss": 0.4493, "step": 10329 }, { "epoch": 1.6964670621805267, "grad_norm": 0.38712254849700195, "learning_rate": 6.757441822359422e-06, "loss": 0.4553, "step": 10330 }, { "epoch": 1.6966312893888695, "grad_norm": 0.367661804842924, "learning_rate": 6.757036117019689e-06, "loss": 0.4605, "step": 10331 }, { "epoch": 1.6967955165972124, "grad_norm": 0.28797269409836235, "learning_rate": 6.756630387166324e-06, "loss": 0.4527, "step": 10332 }, { "epoch": 1.696959743805555, "grad_norm": 0.31382333583278105, "learning_rate": 6.756224632803734e-06, "loss": 0.4749, "step": 10333 }, { "epoch": 1.6971239710138977, "grad_norm": 0.3185347219714828, "learning_rate": 6.755818853936323e-06, "loss": 0.4722, "step": 10334 }, { "epoch": 1.6972881982222403, "grad_norm": 0.31584226638721585, "learning_rate": 6.755413050568501e-06, "loss": 0.4729, "step": 10335 }, { "epoch": 1.6974524254305832, "grad_norm": 0.3821555035764605, "learning_rate": 6.755007222704674e-06, "loss": 0.468, "step": 10336 }, { "epoch": 1.697616652638926, "grad_norm": 0.3381119959701247, "learning_rate": 6.754601370349249e-06, "loss": 0.4728, "step": 10337 }, { "epoch": 1.6977808798472687, "grad_norm": 0.26045264306014676, "learning_rate": 6.754195493506635e-06, "loss": 0.4314, "step": 10338 }, { "epoch": 1.6979451070556113, "grad_norm": 0.29197349217378316, "learning_rate": 6.753789592181241e-06, "loss": 0.4717, "step": 10339 }, { "epoch": 1.6981093342639542, "grad_norm": 0.341468042844591, "learning_rate": 6.753383666377474e-06, "loss": 0.4558, "step": 10340 }, { "epoch": 1.698273561472297, "grad_norm": 0.2817026922401525, "learning_rate": 6.752977716099744e-06, "loss": 0.4563, "step": 10341 }, { "epoch": 1.6984377886806397, "grad_norm": 0.29656847976847733, "learning_rate": 6.752571741352459e-06, "loss": 0.4578, "step": 10342 }, { "epoch": 1.6986020158889823, "grad_norm": 0.28247072661654443, "learning_rate": 6.752165742140029e-06, "loss": 0.439, "step": 10343 }, { "epoch": 1.6987662430973252, "grad_norm": 0.28792432296827525, "learning_rate": 6.751759718466862e-06, "loss": 0.454, "step": 10344 }, { "epoch": 1.698930470305668, "grad_norm": 0.3713145261232129, "learning_rate": 6.751353670337371e-06, "loss": 0.4646, "step": 10345 }, { "epoch": 1.6990946975140107, "grad_norm": 0.3210942286713483, "learning_rate": 6.7509475977559614e-06, "loss": 0.4533, "step": 10346 }, { "epoch": 1.6992589247223533, "grad_norm": 0.25200029858425016, "learning_rate": 6.750541500727048e-06, "loss": 0.4567, "step": 10347 }, { "epoch": 1.6994231519306962, "grad_norm": 0.2854102830910053, "learning_rate": 6.7501353792550404e-06, "loss": 0.4514, "step": 10348 }, { "epoch": 1.699587379139039, "grad_norm": 0.4609283687810497, "learning_rate": 6.749729233344347e-06, "loss": 0.462, "step": 10349 }, { "epoch": 1.6997516063473816, "grad_norm": 0.2920906286091975, "learning_rate": 6.749323062999382e-06, "loss": 0.4496, "step": 10350 }, { "epoch": 1.6999158335557243, "grad_norm": 0.34357740596563463, "learning_rate": 6.748916868224554e-06, "loss": 0.467, "step": 10351 }, { "epoch": 1.700080060764067, "grad_norm": 0.35815701259371335, "learning_rate": 6.748510649024277e-06, "loss": 0.4445, "step": 10352 }, { "epoch": 1.7002442879724098, "grad_norm": 0.2697156568134558, "learning_rate": 6.748104405402963e-06, "loss": 0.4506, "step": 10353 }, { "epoch": 1.7004085151807526, "grad_norm": 0.3074531144110258, "learning_rate": 6.747698137365023e-06, "loss": 0.4523, "step": 10354 }, { "epoch": 1.7005727423890953, "grad_norm": 0.33685069490300174, "learning_rate": 6.7472918449148695e-06, "loss": 0.4725, "step": 10355 }, { "epoch": 1.700736969597438, "grad_norm": 0.3240178685891051, "learning_rate": 6.746885528056915e-06, "loss": 0.4745, "step": 10356 }, { "epoch": 1.7009011968057808, "grad_norm": 0.30519208673786224, "learning_rate": 6.746479186795573e-06, "loss": 0.4722, "step": 10357 }, { "epoch": 1.7010654240141236, "grad_norm": 0.284689567099896, "learning_rate": 6.746072821135258e-06, "loss": 0.469, "step": 10358 }, { "epoch": 1.7012296512224663, "grad_norm": 0.3561436791068111, "learning_rate": 6.745666431080382e-06, "loss": 0.4613, "step": 10359 }, { "epoch": 1.701393878430809, "grad_norm": 0.2637723404608694, "learning_rate": 6.745260016635358e-06, "loss": 0.4644, "step": 10360 }, { "epoch": 1.7015581056391518, "grad_norm": 0.26636461148580687, "learning_rate": 6.744853577804601e-06, "loss": 0.4464, "step": 10361 }, { "epoch": 1.7017223328474946, "grad_norm": 1.0327093958429159, "learning_rate": 6.744447114592526e-06, "loss": 0.4665, "step": 10362 }, { "epoch": 1.7018865600558373, "grad_norm": 0.2762304703339504, "learning_rate": 6.744040627003549e-06, "loss": 0.473, "step": 10363 }, { "epoch": 1.70205078726418, "grad_norm": 0.41730054692606017, "learning_rate": 6.743634115042082e-06, "loss": 0.4709, "step": 10364 }, { "epoch": 1.7022150144725228, "grad_norm": 0.25904651355072494, "learning_rate": 6.743227578712539e-06, "loss": 0.444, "step": 10365 }, { "epoch": 1.7023792416808656, "grad_norm": 0.980301374420022, "learning_rate": 6.74282101801934e-06, "loss": 0.4497, "step": 10366 }, { "epoch": 1.7025434688892083, "grad_norm": 0.299626853323015, "learning_rate": 6.742414432966896e-06, "loss": 0.4636, "step": 10367 }, { "epoch": 1.702707696097551, "grad_norm": 0.3115433025902387, "learning_rate": 6.742007823559627e-06, "loss": 0.4485, "step": 10368 }, { "epoch": 1.7028719233058935, "grad_norm": 0.3507573006468152, "learning_rate": 6.741601189801946e-06, "loss": 0.456, "step": 10369 }, { "epoch": 1.7030361505142364, "grad_norm": 0.36607655888975515, "learning_rate": 6.741194531698271e-06, "loss": 0.4585, "step": 10370 }, { "epoch": 1.7032003777225793, "grad_norm": 0.283069528317907, "learning_rate": 6.740787849253018e-06, "loss": 0.4582, "step": 10371 }, { "epoch": 1.703364604930922, "grad_norm": 0.3830473238004654, "learning_rate": 6.740381142470605e-06, "loss": 0.4409, "step": 10372 }, { "epoch": 1.7035288321392645, "grad_norm": 0.2768537602884907, "learning_rate": 6.739974411355448e-06, "loss": 0.4537, "step": 10373 }, { "epoch": 1.7036930593476074, "grad_norm": 0.3490536084340397, "learning_rate": 6.739567655911965e-06, "loss": 0.453, "step": 10374 }, { "epoch": 1.7038572865559503, "grad_norm": 0.36771767087617424, "learning_rate": 6.739160876144575e-06, "loss": 0.4766, "step": 10375 }, { "epoch": 1.704021513764293, "grad_norm": 0.2878841697769496, "learning_rate": 6.738754072057693e-06, "loss": 0.4759, "step": 10376 }, { "epoch": 1.7041857409726355, "grad_norm": 0.3020927112421301, "learning_rate": 6.738347243655741e-06, "loss": 0.4874, "step": 10377 }, { "epoch": 1.7043499681809784, "grad_norm": 0.29368059878224284, "learning_rate": 6.737940390943134e-06, "loss": 0.4657, "step": 10378 }, { "epoch": 1.7045141953893213, "grad_norm": 0.339697488464221, "learning_rate": 6.7375335139242936e-06, "loss": 0.4602, "step": 10379 }, { "epoch": 1.704678422597664, "grad_norm": 0.4986765984982801, "learning_rate": 6.737126612603637e-06, "loss": 0.4584, "step": 10380 }, { "epoch": 1.7048426498060065, "grad_norm": 0.28694916511887175, "learning_rate": 6.7367196869855845e-06, "loss": 0.447, "step": 10381 }, { "epoch": 1.7050068770143494, "grad_norm": 0.37107709671599887, "learning_rate": 6.736312737074557e-06, "loss": 0.4554, "step": 10382 }, { "epoch": 1.7051711042226922, "grad_norm": 0.32138012549222017, "learning_rate": 6.735905762874972e-06, "loss": 0.4549, "step": 10383 }, { "epoch": 1.7053353314310349, "grad_norm": 0.29340690471890696, "learning_rate": 6.73549876439125e-06, "loss": 0.4647, "step": 10384 }, { "epoch": 1.7054995586393775, "grad_norm": 0.35984360098887486, "learning_rate": 6.735091741627811e-06, "loss": 0.4483, "step": 10385 }, { "epoch": 1.7056637858477202, "grad_norm": 0.2811151158720807, "learning_rate": 6.734684694589078e-06, "loss": 0.4787, "step": 10386 }, { "epoch": 1.705828013056063, "grad_norm": 0.3031459072365189, "learning_rate": 6.7342776232794725e-06, "loss": 0.4511, "step": 10387 }, { "epoch": 1.7059922402644059, "grad_norm": 0.2571523484193839, "learning_rate": 6.733870527703411e-06, "loss": 0.4524, "step": 10388 }, { "epoch": 1.7061564674727485, "grad_norm": 0.3361334913411509, "learning_rate": 6.733463407865319e-06, "loss": 0.4628, "step": 10389 }, { "epoch": 1.7063206946810912, "grad_norm": 0.27863555359391223, "learning_rate": 6.733056263769616e-06, "loss": 0.4745, "step": 10390 }, { "epoch": 1.706484921889434, "grad_norm": 0.31072955149297077, "learning_rate": 6.732649095420726e-06, "loss": 0.457, "step": 10391 }, { "epoch": 1.7066491490977769, "grad_norm": 0.388313133889824, "learning_rate": 6.73224190282307e-06, "loss": 0.4623, "step": 10392 }, { "epoch": 1.7068133763061195, "grad_norm": 1.0394211962031206, "learning_rate": 6.73183468598107e-06, "loss": 0.4495, "step": 10393 }, { "epoch": 1.7069776035144621, "grad_norm": 0.40572430108763996, "learning_rate": 6.73142744489915e-06, "loss": 0.4471, "step": 10394 }, { "epoch": 1.707141830722805, "grad_norm": 0.242324759201867, "learning_rate": 6.731020179581732e-06, "loss": 0.4317, "step": 10395 }, { "epoch": 1.7073060579311479, "grad_norm": 0.31949901700226574, "learning_rate": 6.7306128900332405e-06, "loss": 0.4409, "step": 10396 }, { "epoch": 1.7074702851394905, "grad_norm": 0.2662188504303161, "learning_rate": 6.730205576258099e-06, "loss": 0.4576, "step": 10397 }, { "epoch": 1.7076345123478331, "grad_norm": 0.34371474136704083, "learning_rate": 6.7297982382607295e-06, "loss": 0.4603, "step": 10398 }, { "epoch": 1.707798739556176, "grad_norm": 0.5706657024675674, "learning_rate": 6.7293908760455575e-06, "loss": 0.4607, "step": 10399 }, { "epoch": 1.7079629667645189, "grad_norm": 0.4611294102411975, "learning_rate": 6.728983489617008e-06, "loss": 0.4773, "step": 10400 }, { "epoch": 1.7081271939728615, "grad_norm": 0.2924681797853017, "learning_rate": 6.728576078979503e-06, "loss": 0.4431, "step": 10401 }, { "epoch": 1.7082914211812041, "grad_norm": 0.34761075327557933, "learning_rate": 6.7281686441374705e-06, "loss": 0.4424, "step": 10402 }, { "epoch": 1.7084556483895468, "grad_norm": 0.26384299959420315, "learning_rate": 6.727761185095334e-06, "loss": 0.4524, "step": 10403 }, { "epoch": 1.7086198755978896, "grad_norm": 0.3084678291137836, "learning_rate": 6.727353701857519e-06, "loss": 0.4659, "step": 10404 }, { "epoch": 1.7087841028062325, "grad_norm": 0.2901345922105834, "learning_rate": 6.7269461944284525e-06, "loss": 0.4788, "step": 10405 }, { "epoch": 1.7089483300145751, "grad_norm": 0.27440293975090463, "learning_rate": 6.7265386628125584e-06, "loss": 0.4579, "step": 10406 }, { "epoch": 1.7091125572229178, "grad_norm": 0.3972081442525347, "learning_rate": 6.726131107014264e-06, "loss": 0.4322, "step": 10407 }, { "epoch": 1.7092767844312606, "grad_norm": 0.334535225703064, "learning_rate": 6.7257235270379955e-06, "loss": 0.4618, "step": 10408 }, { "epoch": 1.7094410116396035, "grad_norm": 0.2765674491027994, "learning_rate": 6.725315922888179e-06, "loss": 0.4544, "step": 10409 }, { "epoch": 1.7096052388479461, "grad_norm": 0.34975187365135374, "learning_rate": 6.724908294569242e-06, "loss": 0.4603, "step": 10410 }, { "epoch": 1.7097694660562888, "grad_norm": 0.2856071808433099, "learning_rate": 6.724500642085614e-06, "loss": 0.4412, "step": 10411 }, { "epoch": 1.7099336932646316, "grad_norm": 0.8417017909558503, "learning_rate": 6.7240929654417196e-06, "loss": 0.4508, "step": 10412 }, { "epoch": 1.7100979204729745, "grad_norm": 0.2939192815435623, "learning_rate": 6.723685264641986e-06, "loss": 0.4666, "step": 10413 }, { "epoch": 1.7102621476813171, "grad_norm": 0.26199379189429295, "learning_rate": 6.723277539690842e-06, "loss": 0.4474, "step": 10414 }, { "epoch": 1.7104263748896598, "grad_norm": 0.29879744281049325, "learning_rate": 6.7228697905927184e-06, "loss": 0.4793, "step": 10415 }, { "epoch": 1.7105906020980026, "grad_norm": 0.2918493779231738, "learning_rate": 6.722462017352042e-06, "loss": 0.4618, "step": 10416 }, { "epoch": 1.7107548293063455, "grad_norm": 0.3098008325301708, "learning_rate": 6.722054219973242e-06, "loss": 0.4552, "step": 10417 }, { "epoch": 1.7109190565146881, "grad_norm": 0.3165158551796311, "learning_rate": 6.721646398460745e-06, "loss": 0.4707, "step": 10418 }, { "epoch": 1.7110832837230308, "grad_norm": 0.30672549273761096, "learning_rate": 6.721238552818983e-06, "loss": 0.4417, "step": 10419 }, { "epoch": 1.7112475109313734, "grad_norm": 0.2584646055648473, "learning_rate": 6.7208306830523844e-06, "loss": 0.4383, "step": 10420 }, { "epoch": 1.7114117381397163, "grad_norm": 0.2717102838108191, "learning_rate": 6.720422789165382e-06, "loss": 0.4503, "step": 10421 }, { "epoch": 1.7115759653480591, "grad_norm": 0.37441039548738764, "learning_rate": 6.720014871162402e-06, "loss": 0.448, "step": 10422 }, { "epoch": 1.7117401925564018, "grad_norm": 0.3053067938918457, "learning_rate": 6.719606929047875e-06, "loss": 0.4555, "step": 10423 }, { "epoch": 1.7119044197647444, "grad_norm": 0.33951489319508066, "learning_rate": 6.719198962826234e-06, "loss": 0.4508, "step": 10424 }, { "epoch": 1.7120686469730872, "grad_norm": 0.40669330323048297, "learning_rate": 6.718790972501909e-06, "loss": 0.4462, "step": 10425 }, { "epoch": 1.71223287418143, "grad_norm": 0.31788264969946245, "learning_rate": 6.718382958079332e-06, "loss": 0.4448, "step": 10426 }, { "epoch": 1.7123971013897727, "grad_norm": 0.2972005514734704, "learning_rate": 6.717974919562932e-06, "loss": 0.4546, "step": 10427 }, { "epoch": 1.7125613285981154, "grad_norm": 0.3405902179433068, "learning_rate": 6.717566856957143e-06, "loss": 0.4451, "step": 10428 }, { "epoch": 1.7127255558064582, "grad_norm": 0.27053272243654325, "learning_rate": 6.717158770266396e-06, "loss": 0.4484, "step": 10429 }, { "epoch": 1.712889783014801, "grad_norm": 0.2926589561710632, "learning_rate": 6.716750659495123e-06, "loss": 0.4565, "step": 10430 }, { "epoch": 1.7130540102231437, "grad_norm": 0.3146258821459904, "learning_rate": 6.716342524647757e-06, "loss": 0.4634, "step": 10431 }, { "epoch": 1.7132182374314864, "grad_norm": 0.45155855350560037, "learning_rate": 6.71593436572873e-06, "loss": 0.456, "step": 10432 }, { "epoch": 1.7133824646398292, "grad_norm": 0.31378119401303933, "learning_rate": 6.715526182742477e-06, "loss": 0.4758, "step": 10433 }, { "epoch": 1.713546691848172, "grad_norm": 0.26711786147597105, "learning_rate": 6.715117975693429e-06, "loss": 0.4353, "step": 10434 }, { "epoch": 1.7137109190565147, "grad_norm": 0.28845664173005386, "learning_rate": 6.71470974458602e-06, "loss": 0.4627, "step": 10435 }, { "epoch": 1.7138751462648574, "grad_norm": 0.4823107200435918, "learning_rate": 6.7143014894246846e-06, "loss": 0.4466, "step": 10436 }, { "epoch": 1.7140393734732, "grad_norm": 0.2786151787980394, "learning_rate": 6.713893210213857e-06, "loss": 0.45, "step": 10437 }, { "epoch": 1.7142036006815429, "grad_norm": 0.2935876494132793, "learning_rate": 6.713484906957971e-06, "loss": 0.492, "step": 10438 }, { "epoch": 1.7143678278898857, "grad_norm": 0.28236963636794876, "learning_rate": 6.71307657966146e-06, "loss": 0.4516, "step": 10439 }, { "epoch": 1.7145320550982284, "grad_norm": 0.6496068241742552, "learning_rate": 6.712668228328761e-06, "loss": 0.4313, "step": 10440 }, { "epoch": 1.714696282306571, "grad_norm": 0.6204538490273221, "learning_rate": 6.712259852964308e-06, "loss": 0.4574, "step": 10441 }, { "epoch": 1.7148605095149139, "grad_norm": 0.2793118226529691, "learning_rate": 6.711851453572535e-06, "loss": 0.4689, "step": 10442 }, { "epoch": 1.7150247367232567, "grad_norm": 0.2744098757190267, "learning_rate": 6.71144303015788e-06, "loss": 0.4442, "step": 10443 }, { "epoch": 1.7151889639315994, "grad_norm": 0.34174424481501375, "learning_rate": 6.711034582724778e-06, "loss": 0.4657, "step": 10444 }, { "epoch": 1.715353191139942, "grad_norm": 0.2955658019877106, "learning_rate": 6.710626111277666e-06, "loss": 0.4418, "step": 10445 }, { "epoch": 1.7155174183482849, "grad_norm": 0.2634439956111268, "learning_rate": 6.710217615820979e-06, "loss": 0.4597, "step": 10446 }, { "epoch": 1.7156816455566277, "grad_norm": 0.3298989104289385, "learning_rate": 6.709809096359153e-06, "loss": 0.4435, "step": 10447 }, { "epoch": 1.7158458727649704, "grad_norm": 0.314188393745869, "learning_rate": 6.709400552896627e-06, "loss": 0.4441, "step": 10448 }, { "epoch": 1.716010099973313, "grad_norm": 0.2666842409720632, "learning_rate": 6.708991985437836e-06, "loss": 0.4552, "step": 10449 }, { "epoch": 1.7161743271816559, "grad_norm": 0.3231120636371941, "learning_rate": 6.70858339398722e-06, "loss": 0.4706, "step": 10450 }, { "epoch": 1.7163385543899987, "grad_norm": 0.2752932839799465, "learning_rate": 6.708174778549216e-06, "loss": 0.4474, "step": 10451 }, { "epoch": 1.7165027815983414, "grad_norm": 0.30247348745969427, "learning_rate": 6.70776613912826e-06, "loss": 0.4403, "step": 10452 }, { "epoch": 1.716667008806684, "grad_norm": 0.37340998671693054, "learning_rate": 6.707357475728792e-06, "loss": 0.4617, "step": 10453 }, { "epoch": 1.7168312360150266, "grad_norm": 0.2745424306375286, "learning_rate": 6.70694878835525e-06, "loss": 0.4498, "step": 10454 }, { "epoch": 1.7169954632233695, "grad_norm": 0.43213806007948663, "learning_rate": 6.706540077012074e-06, "loss": 0.4333, "step": 10455 }, { "epoch": 1.7171596904317123, "grad_norm": 0.31565939323718656, "learning_rate": 6.7061313417037e-06, "loss": 0.4752, "step": 10456 }, { "epoch": 1.717323917640055, "grad_norm": 0.5251729427395571, "learning_rate": 6.705722582434569e-06, "loss": 0.4505, "step": 10457 }, { "epoch": 1.7174881448483976, "grad_norm": 0.3968038722142534, "learning_rate": 6.705313799209123e-06, "loss": 0.46, "step": 10458 }, { "epoch": 1.7176523720567405, "grad_norm": 0.36237150280496244, "learning_rate": 6.704904992031796e-06, "loss": 0.4517, "step": 10459 }, { "epoch": 1.7178165992650833, "grad_norm": 0.3266331407754335, "learning_rate": 6.704496160907035e-06, "loss": 0.4553, "step": 10460 }, { "epoch": 1.717980826473426, "grad_norm": 0.35061228607963685, "learning_rate": 6.704087305839275e-06, "loss": 0.449, "step": 10461 }, { "epoch": 1.7181450536817686, "grad_norm": 0.3011777257227322, "learning_rate": 6.703678426832958e-06, "loss": 0.4715, "step": 10462 }, { "epoch": 1.7183092808901115, "grad_norm": 0.3016498684711106, "learning_rate": 6.703269523892526e-06, "loss": 0.4533, "step": 10463 }, { "epoch": 1.7184735080984543, "grad_norm": 0.3188339916151969, "learning_rate": 6.7028605970224175e-06, "loss": 0.4638, "step": 10464 }, { "epoch": 1.718637735306797, "grad_norm": 0.2868947673367226, "learning_rate": 6.702451646227077e-06, "loss": 0.4535, "step": 10465 }, { "epoch": 1.7188019625151396, "grad_norm": 0.3513689796636359, "learning_rate": 6.702042671510945e-06, "loss": 0.4534, "step": 10466 }, { "epoch": 1.7189661897234825, "grad_norm": 0.2835644141881405, "learning_rate": 6.701633672878463e-06, "loss": 0.4514, "step": 10467 }, { "epoch": 1.7191304169318253, "grad_norm": 0.4246394600668017, "learning_rate": 6.701224650334072e-06, "loss": 0.4525, "step": 10468 }, { "epoch": 1.719294644140168, "grad_norm": 0.4832348262491999, "learning_rate": 6.700815603882218e-06, "loss": 0.467, "step": 10469 }, { "epoch": 1.7194588713485106, "grad_norm": 0.2393193885818513, "learning_rate": 6.700406533527338e-06, "loss": 0.4606, "step": 10470 }, { "epoch": 1.7196230985568532, "grad_norm": 0.28340704067344497, "learning_rate": 6.699997439273881e-06, "loss": 0.473, "step": 10471 }, { "epoch": 1.719787325765196, "grad_norm": 0.29619829364482575, "learning_rate": 6.6995883211262855e-06, "loss": 0.467, "step": 10472 }, { "epoch": 1.719951552973539, "grad_norm": 0.2680896560474138, "learning_rate": 6.699179179088998e-06, "loss": 0.4677, "step": 10473 }, { "epoch": 1.7201157801818816, "grad_norm": 0.29926212745864955, "learning_rate": 6.69877001316646e-06, "loss": 0.4517, "step": 10474 }, { "epoch": 1.7202800073902242, "grad_norm": 0.30751652357242387, "learning_rate": 6.698360823363117e-06, "loss": 0.4574, "step": 10475 }, { "epoch": 1.720444234598567, "grad_norm": 0.3328073603049628, "learning_rate": 6.697951609683412e-06, "loss": 0.4514, "step": 10476 }, { "epoch": 1.72060846180691, "grad_norm": 0.24904973625178245, "learning_rate": 6.697542372131789e-06, "loss": 0.4691, "step": 10477 }, { "epoch": 1.7207726890152526, "grad_norm": 0.3055567768298029, "learning_rate": 6.697133110712695e-06, "loss": 0.4415, "step": 10478 }, { "epoch": 1.7209369162235952, "grad_norm": 0.25915721154981863, "learning_rate": 6.696723825430574e-06, "loss": 0.4787, "step": 10479 }, { "epoch": 1.721101143431938, "grad_norm": 0.2802197692823335, "learning_rate": 6.696314516289872e-06, "loss": 0.4601, "step": 10480 }, { "epoch": 1.721265370640281, "grad_norm": 0.3525676666157653, "learning_rate": 6.695905183295031e-06, "loss": 0.4588, "step": 10481 }, { "epoch": 1.7214295978486236, "grad_norm": 0.2727601150327771, "learning_rate": 6.695495826450501e-06, "loss": 0.4584, "step": 10482 }, { "epoch": 1.7215938250569662, "grad_norm": 0.32149351547771154, "learning_rate": 6.695086445760725e-06, "loss": 0.456, "step": 10483 }, { "epoch": 1.721758052265309, "grad_norm": 0.39117096357713665, "learning_rate": 6.694677041230152e-06, "loss": 0.4586, "step": 10484 }, { "epoch": 1.721922279473652, "grad_norm": 0.3322750991270649, "learning_rate": 6.694267612863227e-06, "loss": 0.4635, "step": 10485 }, { "epoch": 1.7220865066819946, "grad_norm": 0.32418802055402957, "learning_rate": 6.693858160664394e-06, "loss": 0.447, "step": 10486 }, { "epoch": 1.7222507338903372, "grad_norm": 0.2675172834941799, "learning_rate": 6.693448684638106e-06, "loss": 0.435, "step": 10487 }, { "epoch": 1.7224149610986799, "grad_norm": 0.2947083214764386, "learning_rate": 6.693039184788806e-06, "loss": 0.4471, "step": 10488 }, { "epoch": 1.7225791883070227, "grad_norm": 0.29546155758230347, "learning_rate": 6.692629661120944e-06, "loss": 0.4609, "step": 10489 }, { "epoch": 1.7227434155153656, "grad_norm": 0.3072763602251934, "learning_rate": 6.692220113638965e-06, "loss": 0.4698, "step": 10490 }, { "epoch": 1.7229076427237082, "grad_norm": 0.2612485647618418, "learning_rate": 6.691810542347319e-06, "loss": 0.4593, "step": 10491 }, { "epoch": 1.7230718699320509, "grad_norm": 0.27610091859399394, "learning_rate": 6.691400947250454e-06, "loss": 0.4466, "step": 10492 }, { "epoch": 1.7232360971403937, "grad_norm": 0.2994420717725685, "learning_rate": 6.690991328352819e-06, "loss": 0.4596, "step": 10493 }, { "epoch": 1.7234003243487366, "grad_norm": 0.3337391439613997, "learning_rate": 6.690581685658863e-06, "loss": 0.4519, "step": 10494 }, { "epoch": 1.7235645515570792, "grad_norm": 0.283817248370631, "learning_rate": 6.690172019173035e-06, "loss": 0.4538, "step": 10495 }, { "epoch": 1.7237287787654219, "grad_norm": 0.27336394201895664, "learning_rate": 6.689762328899783e-06, "loss": 0.4741, "step": 10496 }, { "epoch": 1.7238930059737647, "grad_norm": 0.4125973348785593, "learning_rate": 6.689352614843557e-06, "loss": 0.476, "step": 10497 }, { "epoch": 1.7240572331821076, "grad_norm": 0.35607773234902174, "learning_rate": 6.688942877008809e-06, "loss": 0.4361, "step": 10498 }, { "epoch": 1.7242214603904502, "grad_norm": 0.3158612111856992, "learning_rate": 6.688533115399987e-06, "loss": 0.4657, "step": 10499 }, { "epoch": 1.7243856875987928, "grad_norm": 0.30310414980860667, "learning_rate": 6.688123330021543e-06, "loss": 0.4622, "step": 10500 }, { "epoch": 1.7245499148071357, "grad_norm": 0.29849694832345836, "learning_rate": 6.687713520877926e-06, "loss": 0.4584, "step": 10501 }, { "epoch": 1.7247141420154786, "grad_norm": 0.2651875391881616, "learning_rate": 6.687303687973588e-06, "loss": 0.4753, "step": 10502 }, { "epoch": 1.7248783692238212, "grad_norm": 0.31880123609608296, "learning_rate": 6.68689383131298e-06, "loss": 0.4422, "step": 10503 }, { "epoch": 1.7250425964321638, "grad_norm": 0.30651866368968245, "learning_rate": 6.6864839509005534e-06, "loss": 0.4674, "step": 10504 }, { "epoch": 1.7252068236405065, "grad_norm": 0.273843576978905, "learning_rate": 6.6860740467407594e-06, "loss": 0.4542, "step": 10505 }, { "epoch": 1.7253710508488493, "grad_norm": 0.33736322841446215, "learning_rate": 6.685664118838051e-06, "loss": 0.4733, "step": 10506 }, { "epoch": 1.7255352780571922, "grad_norm": 0.28310879953417917, "learning_rate": 6.685254167196879e-06, "loss": 0.4325, "step": 10507 }, { "epoch": 1.7256995052655348, "grad_norm": 0.30625720125880435, "learning_rate": 6.684844191821698e-06, "loss": 0.4654, "step": 10508 }, { "epoch": 1.7258637324738775, "grad_norm": 0.4041210802480253, "learning_rate": 6.684434192716959e-06, "loss": 0.4629, "step": 10509 }, { "epoch": 1.7260279596822203, "grad_norm": 0.3285621834318125, "learning_rate": 6.684024169887115e-06, "loss": 0.4517, "step": 10510 }, { "epoch": 1.7261921868905632, "grad_norm": 0.5371314885182225, "learning_rate": 6.68361412333662e-06, "loss": 0.4544, "step": 10511 }, { "epoch": 1.7263564140989058, "grad_norm": 0.3346484736752237, "learning_rate": 6.683204053069928e-06, "loss": 0.4483, "step": 10512 }, { "epoch": 1.7265206413072485, "grad_norm": 0.265535426288644, "learning_rate": 6.68279395909149e-06, "loss": 0.455, "step": 10513 }, { "epoch": 1.7266848685155913, "grad_norm": 0.272151834494228, "learning_rate": 6.682383841405764e-06, "loss": 0.4627, "step": 10514 }, { "epoch": 1.7268490957239342, "grad_norm": 0.2768667797354831, "learning_rate": 6.681973700017202e-06, "loss": 0.4622, "step": 10515 }, { "epoch": 1.7270133229322768, "grad_norm": 0.2611288871413042, "learning_rate": 6.681563534930258e-06, "loss": 0.4692, "step": 10516 }, { "epoch": 1.7271775501406195, "grad_norm": 0.30032015789813, "learning_rate": 6.681153346149388e-06, "loss": 0.4425, "step": 10517 }, { "epoch": 1.7273417773489623, "grad_norm": 0.2765399919998032, "learning_rate": 6.680743133679048e-06, "loss": 0.4418, "step": 10518 }, { "epoch": 1.7275060045573052, "grad_norm": 0.2637540347081802, "learning_rate": 6.6803328975236904e-06, "loss": 0.4579, "step": 10519 }, { "epoch": 1.7276702317656478, "grad_norm": 0.405385672011455, "learning_rate": 6.679922637687772e-06, "loss": 0.4557, "step": 10520 }, { "epoch": 1.7278344589739905, "grad_norm": 0.2911681189325491, "learning_rate": 6.679512354175751e-06, "loss": 0.4586, "step": 10521 }, { "epoch": 1.727998686182333, "grad_norm": 0.2675727811944536, "learning_rate": 6.679102046992079e-06, "loss": 0.4376, "step": 10522 }, { "epoch": 1.728162913390676, "grad_norm": 0.2817047223528335, "learning_rate": 6.678691716141217e-06, "loss": 0.4556, "step": 10523 }, { "epoch": 1.7283271405990188, "grad_norm": 0.30171073273169, "learning_rate": 6.678281361627619e-06, "loss": 0.433, "step": 10524 }, { "epoch": 1.7284913678073615, "grad_norm": 0.33379983997579427, "learning_rate": 6.677870983455741e-06, "loss": 0.4654, "step": 10525 }, { "epoch": 1.728655595015704, "grad_norm": 0.2722756149058063, "learning_rate": 6.677460581630043e-06, "loss": 0.4506, "step": 10526 }, { "epoch": 1.728819822224047, "grad_norm": 0.8752736675862947, "learning_rate": 6.677050156154979e-06, "loss": 0.4607, "step": 10527 }, { "epoch": 1.7289840494323898, "grad_norm": 0.8938119846179062, "learning_rate": 6.67663970703501e-06, "loss": 0.4628, "step": 10528 }, { "epoch": 1.7291482766407325, "grad_norm": 0.29687175309764746, "learning_rate": 6.676229234274592e-06, "loss": 0.4473, "step": 10529 }, { "epoch": 1.729312503849075, "grad_norm": 0.2983652847761805, "learning_rate": 6.675818737878183e-06, "loss": 0.464, "step": 10530 }, { "epoch": 1.729476731057418, "grad_norm": 0.2882164982567439, "learning_rate": 6.67540821785024e-06, "loss": 0.4656, "step": 10531 }, { "epoch": 1.7296409582657608, "grad_norm": 0.3597729555329601, "learning_rate": 6.674997674195225e-06, "loss": 0.4854, "step": 10532 }, { "epoch": 1.7298051854741034, "grad_norm": 0.23773034919142239, "learning_rate": 6.674587106917597e-06, "loss": 0.419, "step": 10533 }, { "epoch": 1.729969412682446, "grad_norm": 0.3158190970863376, "learning_rate": 6.674176516021812e-06, "loss": 0.457, "step": 10534 }, { "epoch": 1.730133639890789, "grad_norm": 0.32428977605563186, "learning_rate": 6.6737659015123315e-06, "loss": 0.4549, "step": 10535 }, { "epoch": 1.7302978670991318, "grad_norm": 0.2866692455873746, "learning_rate": 6.673355263393612e-06, "loss": 0.4351, "step": 10536 }, { "epoch": 1.7304620943074744, "grad_norm": 0.38551922492958735, "learning_rate": 6.6729446016701195e-06, "loss": 0.475, "step": 10537 }, { "epoch": 1.730626321515817, "grad_norm": 0.5243113966027705, "learning_rate": 6.672533916346309e-06, "loss": 0.475, "step": 10538 }, { "epoch": 1.7307905487241597, "grad_norm": 0.2515301096578204, "learning_rate": 6.672123207426644e-06, "loss": 0.4438, "step": 10539 }, { "epoch": 1.7309547759325026, "grad_norm": 0.3074191829940396, "learning_rate": 6.671712474915583e-06, "loss": 0.4616, "step": 10540 }, { "epoch": 1.7311190031408454, "grad_norm": 0.2880000746192107, "learning_rate": 6.671301718817586e-06, "loss": 0.4564, "step": 10541 }, { "epoch": 1.731283230349188, "grad_norm": 0.2962969244487233, "learning_rate": 6.67089093913712e-06, "loss": 0.471, "step": 10542 }, { "epoch": 1.7314474575575307, "grad_norm": 0.27218947530068305, "learning_rate": 6.67048013587864e-06, "loss": 0.4569, "step": 10543 }, { "epoch": 1.7316116847658736, "grad_norm": 0.3374040851327258, "learning_rate": 6.670069309046611e-06, "loss": 0.4434, "step": 10544 }, { "epoch": 1.7317759119742164, "grad_norm": 0.3706121145690397, "learning_rate": 6.669658458645493e-06, "loss": 0.4644, "step": 10545 }, { "epoch": 1.731940139182559, "grad_norm": 0.3597240408303545, "learning_rate": 6.669247584679751e-06, "loss": 0.4718, "step": 10546 }, { "epoch": 1.7321043663909017, "grad_norm": 0.34526245876029543, "learning_rate": 6.668836687153844e-06, "loss": 0.4502, "step": 10547 }, { "epoch": 1.7322685935992446, "grad_norm": 0.25465094933578114, "learning_rate": 6.668425766072239e-06, "loss": 0.4451, "step": 10548 }, { "epoch": 1.7324328208075874, "grad_norm": 0.2753508045274311, "learning_rate": 6.6680148214393965e-06, "loss": 0.4478, "step": 10549 }, { "epoch": 1.73259704801593, "grad_norm": 0.2400898508376296, "learning_rate": 6.667603853259779e-06, "loss": 0.4785, "step": 10550 }, { "epoch": 1.7327612752242727, "grad_norm": 0.2739499399340202, "learning_rate": 6.667192861537851e-06, "loss": 0.4763, "step": 10551 }, { "epoch": 1.7329255024326156, "grad_norm": 0.2838378785010479, "learning_rate": 6.666781846278077e-06, "loss": 0.4431, "step": 10552 }, { "epoch": 1.7330897296409584, "grad_norm": 0.3275742837311978, "learning_rate": 6.6663708074849195e-06, "loss": 0.4555, "step": 10553 }, { "epoch": 1.733253956849301, "grad_norm": 0.35097480269581555, "learning_rate": 6.665959745162845e-06, "loss": 0.4681, "step": 10554 }, { "epoch": 1.7334181840576437, "grad_norm": 0.9181025874191894, "learning_rate": 6.6655486593163155e-06, "loss": 0.4616, "step": 10555 }, { "epoch": 1.7335824112659863, "grad_norm": 0.7894254312466302, "learning_rate": 6.665137549949797e-06, "loss": 0.4539, "step": 10556 }, { "epoch": 1.7337466384743292, "grad_norm": 0.43038099202683344, "learning_rate": 6.664726417067755e-06, "loss": 0.4593, "step": 10557 }, { "epoch": 1.733910865682672, "grad_norm": 0.4093328771869033, "learning_rate": 6.664315260674654e-06, "loss": 0.4361, "step": 10558 }, { "epoch": 1.7340750928910147, "grad_norm": 0.3082230765897813, "learning_rate": 6.66390408077496e-06, "loss": 0.4586, "step": 10559 }, { "epoch": 1.7342393200993573, "grad_norm": 0.32275708574988343, "learning_rate": 6.663492877373138e-06, "loss": 0.4461, "step": 10560 }, { "epoch": 1.7344035473077002, "grad_norm": 0.31035788426403466, "learning_rate": 6.663081650473655e-06, "loss": 0.4617, "step": 10561 }, { "epoch": 1.734567774516043, "grad_norm": 0.31601675827456194, "learning_rate": 6.662670400080978e-06, "loss": 0.4706, "step": 10562 }, { "epoch": 1.7347320017243857, "grad_norm": 0.33103811875238215, "learning_rate": 6.662259126199573e-06, "loss": 0.4919, "step": 10563 }, { "epoch": 1.7348962289327283, "grad_norm": 0.41689720596390145, "learning_rate": 6.661847828833905e-06, "loss": 0.46, "step": 10564 }, { "epoch": 1.7350604561410712, "grad_norm": 0.35258440749097886, "learning_rate": 6.661436507988442e-06, "loss": 0.4874, "step": 10565 }, { "epoch": 1.735224683349414, "grad_norm": 0.38367876380098803, "learning_rate": 6.6610251636676536e-06, "loss": 0.4711, "step": 10566 }, { "epoch": 1.7353889105577567, "grad_norm": 0.35911279259469897, "learning_rate": 6.660613795876007e-06, "loss": 0.4483, "step": 10567 }, { "epoch": 1.7355531377660993, "grad_norm": 0.2888127223228145, "learning_rate": 6.6602024046179665e-06, "loss": 0.4533, "step": 10568 }, { "epoch": 1.7357173649744422, "grad_norm": 0.2798102991264217, "learning_rate": 6.659790989898002e-06, "loss": 0.4543, "step": 10569 }, { "epoch": 1.735881592182785, "grad_norm": 0.32600684688107623, "learning_rate": 6.659379551720584e-06, "loss": 0.4565, "step": 10570 }, { "epoch": 1.7360458193911277, "grad_norm": 0.3049264362217875, "learning_rate": 6.658968090090179e-06, "loss": 0.4613, "step": 10571 }, { "epoch": 1.7362100465994703, "grad_norm": 0.27547939138967337, "learning_rate": 6.658556605011257e-06, "loss": 0.4423, "step": 10572 }, { "epoch": 1.736374273807813, "grad_norm": 0.31268202539302475, "learning_rate": 6.6581450964882865e-06, "loss": 0.4649, "step": 10573 }, { "epoch": 1.7365385010161558, "grad_norm": 0.2947480651912641, "learning_rate": 6.6577335645257356e-06, "loss": 0.4667, "step": 10574 }, { "epoch": 1.7367027282244987, "grad_norm": 0.3829571158229656, "learning_rate": 6.657322009128077e-06, "loss": 0.4471, "step": 10575 }, { "epoch": 1.7368669554328413, "grad_norm": 0.9143886610861583, "learning_rate": 6.656910430299777e-06, "loss": 0.4641, "step": 10576 }, { "epoch": 1.737031182641184, "grad_norm": 0.27884086419625775, "learning_rate": 6.65649882804531e-06, "loss": 0.4774, "step": 10577 }, { "epoch": 1.7371954098495268, "grad_norm": 0.33456889981474314, "learning_rate": 6.656087202369142e-06, "loss": 0.4481, "step": 10578 }, { "epoch": 1.7373596370578697, "grad_norm": 0.30338483694912066, "learning_rate": 6.655675553275747e-06, "loss": 0.4408, "step": 10579 }, { "epoch": 1.7375238642662123, "grad_norm": 0.29204954685142154, "learning_rate": 6.655263880769593e-06, "loss": 0.4561, "step": 10580 }, { "epoch": 1.737688091474555, "grad_norm": 0.28477453765180205, "learning_rate": 6.654852184855153e-06, "loss": 0.473, "step": 10581 }, { "epoch": 1.7378523186828978, "grad_norm": 0.3611993605343738, "learning_rate": 6.654440465536899e-06, "loss": 0.4816, "step": 10582 }, { "epoch": 1.7380165458912407, "grad_norm": 0.2788356026362653, "learning_rate": 6.654028722819303e-06, "loss": 0.4878, "step": 10583 }, { "epoch": 1.7381807730995833, "grad_norm": 0.33148173481642546, "learning_rate": 6.653616956706834e-06, "loss": 0.4545, "step": 10584 }, { "epoch": 1.738345000307926, "grad_norm": 0.3035096767740506, "learning_rate": 6.653205167203966e-06, "loss": 0.4748, "step": 10585 }, { "epoch": 1.7385092275162688, "grad_norm": 0.3493164699571448, "learning_rate": 6.652793354315173e-06, "loss": 0.4618, "step": 10586 }, { "epoch": 1.7386734547246117, "grad_norm": 0.3404445961452418, "learning_rate": 6.652381518044924e-06, "loss": 0.471, "step": 10587 }, { "epoch": 1.7388376819329543, "grad_norm": 0.33019863805209754, "learning_rate": 6.651969658397696e-06, "loss": 0.4659, "step": 10588 }, { "epoch": 1.739001909141297, "grad_norm": 0.26683706560142134, "learning_rate": 6.651557775377958e-06, "loss": 0.4395, "step": 10589 }, { "epoch": 1.7391661363496396, "grad_norm": 0.3633868009011593, "learning_rate": 6.651145868990188e-06, "loss": 0.4582, "step": 10590 }, { "epoch": 1.7393303635579824, "grad_norm": 0.35217765494100506, "learning_rate": 6.650733939238857e-06, "loss": 0.4377, "step": 10591 }, { "epoch": 1.7394945907663253, "grad_norm": 0.36982069809605017, "learning_rate": 6.650321986128439e-06, "loss": 0.4503, "step": 10592 }, { "epoch": 1.739658817974668, "grad_norm": 0.307761272677984, "learning_rate": 6.649910009663408e-06, "loss": 0.4752, "step": 10593 }, { "epoch": 1.7398230451830106, "grad_norm": 0.28942145592305923, "learning_rate": 6.649498009848239e-06, "loss": 0.4665, "step": 10594 }, { "epoch": 1.7399872723913534, "grad_norm": 0.27124401320804664, "learning_rate": 6.649085986687406e-06, "loss": 0.4398, "step": 10595 }, { "epoch": 1.7401514995996963, "grad_norm": 0.3261971310442718, "learning_rate": 6.648673940185388e-06, "loss": 0.4601, "step": 10596 }, { "epoch": 1.740315726808039, "grad_norm": 0.2843498854236714, "learning_rate": 6.6482618703466545e-06, "loss": 0.4694, "step": 10597 }, { "epoch": 1.7404799540163816, "grad_norm": 0.37333399523345706, "learning_rate": 6.6478497771756845e-06, "loss": 0.4482, "step": 10598 }, { "epoch": 1.7406441812247244, "grad_norm": 0.2788317231506996, "learning_rate": 6.647437660676951e-06, "loss": 0.4513, "step": 10599 }, { "epoch": 1.7408084084330673, "grad_norm": 0.31375562829679565, "learning_rate": 6.647025520854934e-06, "loss": 0.4624, "step": 10600 }, { "epoch": 1.74097263564141, "grad_norm": 0.3615619772581296, "learning_rate": 6.646613357714107e-06, "loss": 0.4436, "step": 10601 }, { "epoch": 1.7411368628497526, "grad_norm": 0.3315910833247448, "learning_rate": 6.646201171258946e-06, "loss": 0.4512, "step": 10602 }, { "epoch": 1.7413010900580954, "grad_norm": 0.2708299030595272, "learning_rate": 6.645788961493929e-06, "loss": 0.4426, "step": 10603 }, { "epoch": 1.7414653172664383, "grad_norm": 0.27840725603169686, "learning_rate": 6.645376728423533e-06, "loss": 0.4385, "step": 10604 }, { "epoch": 1.741629544474781, "grad_norm": 0.2569884855267863, "learning_rate": 6.644964472052234e-06, "loss": 0.4575, "step": 10605 }, { "epoch": 1.7417937716831235, "grad_norm": 0.28479141295519006, "learning_rate": 6.644552192384512e-06, "loss": 0.4424, "step": 10606 }, { "epoch": 1.7419579988914662, "grad_norm": 0.2951760227251854, "learning_rate": 6.644139889424842e-06, "loss": 0.4657, "step": 10607 }, { "epoch": 1.742122226099809, "grad_norm": 0.30186113653350727, "learning_rate": 6.643727563177704e-06, "loss": 0.4754, "step": 10608 }, { "epoch": 1.742286453308152, "grad_norm": 0.2612584685088557, "learning_rate": 6.643315213647575e-06, "loss": 0.4453, "step": 10609 }, { "epoch": 1.7424506805164945, "grad_norm": 0.6947221272165853, "learning_rate": 6.642902840838934e-06, "loss": 0.4571, "step": 10610 }, { "epoch": 1.7426149077248372, "grad_norm": 0.3120014744182741, "learning_rate": 6.6424904447562615e-06, "loss": 0.4474, "step": 10611 }, { "epoch": 1.74277913493318, "grad_norm": 0.3068805224277791, "learning_rate": 6.642078025404033e-06, "loss": 0.4649, "step": 10612 }, { "epoch": 1.742943362141523, "grad_norm": 0.37785825035727366, "learning_rate": 6.641665582786731e-06, "loss": 0.4277, "step": 10613 }, { "epoch": 1.7431075893498655, "grad_norm": 0.32257286709905886, "learning_rate": 6.6412531169088325e-06, "loss": 0.4476, "step": 10614 }, { "epoch": 1.7432718165582082, "grad_norm": 0.27288879363747537, "learning_rate": 6.6408406277748176e-06, "loss": 0.4439, "step": 10615 }, { "epoch": 1.743436043766551, "grad_norm": 0.29328108390781554, "learning_rate": 6.640428115389168e-06, "loss": 0.4833, "step": 10616 }, { "epoch": 1.743600270974894, "grad_norm": 0.4506991914279768, "learning_rate": 6.640015579756364e-06, "loss": 0.4652, "step": 10617 }, { "epoch": 1.7437644981832365, "grad_norm": 0.3087380589024369, "learning_rate": 6.639603020880885e-06, "loss": 0.4793, "step": 10618 }, { "epoch": 1.7439287253915792, "grad_norm": 0.34152603811976384, "learning_rate": 6.639190438767211e-06, "loss": 0.4486, "step": 10619 }, { "epoch": 1.744092952599922, "grad_norm": 0.3100709807557343, "learning_rate": 6.638777833419825e-06, "loss": 0.4602, "step": 10620 }, { "epoch": 1.744257179808265, "grad_norm": 0.3054017542582119, "learning_rate": 6.638365204843209e-06, "loss": 0.4642, "step": 10621 }, { "epoch": 1.7444214070166075, "grad_norm": 0.34891463707536224, "learning_rate": 6.637952553041842e-06, "loss": 0.4696, "step": 10622 }, { "epoch": 1.7445856342249502, "grad_norm": 0.3379641255537083, "learning_rate": 6.637539878020205e-06, "loss": 0.46, "step": 10623 }, { "epoch": 1.7447498614332928, "grad_norm": 0.4668367355877062, "learning_rate": 6.637127179782782e-06, "loss": 0.4542, "step": 10624 }, { "epoch": 1.7449140886416357, "grad_norm": 0.8209300021742912, "learning_rate": 6.636714458334057e-06, "loss": 0.4623, "step": 10625 }, { "epoch": 1.7450783158499785, "grad_norm": 0.28705239054834886, "learning_rate": 6.636301713678511e-06, "loss": 0.4507, "step": 10626 }, { "epoch": 1.7452425430583212, "grad_norm": 0.3355537786733906, "learning_rate": 6.635888945820625e-06, "loss": 0.4302, "step": 10627 }, { "epoch": 1.7454067702666638, "grad_norm": 0.37192872092910106, "learning_rate": 6.635476154764884e-06, "loss": 0.452, "step": 10628 }, { "epoch": 1.7455709974750067, "grad_norm": 0.26525282249639376, "learning_rate": 6.635063340515772e-06, "loss": 0.4537, "step": 10629 }, { "epoch": 1.7457352246833495, "grad_norm": 0.30958646418527175, "learning_rate": 6.63465050307777e-06, "loss": 0.4485, "step": 10630 }, { "epoch": 1.7458994518916922, "grad_norm": 0.33668426160861986, "learning_rate": 6.634237642455365e-06, "loss": 0.4624, "step": 10631 }, { "epoch": 1.7460636791000348, "grad_norm": 0.3366417579129416, "learning_rate": 6.633824758653038e-06, "loss": 0.4421, "step": 10632 }, { "epoch": 1.7462279063083777, "grad_norm": 0.3456242095455785, "learning_rate": 6.633411851675275e-06, "loss": 0.4655, "step": 10633 }, { "epoch": 1.7463921335167205, "grad_norm": 0.32458901913886634, "learning_rate": 6.63299892152656e-06, "loss": 0.4458, "step": 10634 }, { "epoch": 1.7465563607250632, "grad_norm": 0.29732800671455417, "learning_rate": 6.632585968211379e-06, "loss": 0.4663, "step": 10635 }, { "epoch": 1.7467205879334058, "grad_norm": 0.35012774391383994, "learning_rate": 6.632172991734216e-06, "loss": 0.4716, "step": 10636 }, { "epoch": 1.7468848151417486, "grad_norm": 0.37338988277876983, "learning_rate": 6.6317599920995555e-06, "loss": 0.4591, "step": 10637 }, { "epoch": 1.7470490423500915, "grad_norm": 0.3799089211952642, "learning_rate": 6.631346969311886e-06, "loss": 0.4604, "step": 10638 }, { "epoch": 1.7472132695584341, "grad_norm": 0.2894958279907993, "learning_rate": 6.6309339233756894e-06, "loss": 0.4603, "step": 10639 }, { "epoch": 1.7473774967667768, "grad_norm": 0.3575252568448904, "learning_rate": 6.630520854295455e-06, "loss": 0.4781, "step": 10640 }, { "epoch": 1.7475417239751194, "grad_norm": 0.27491260487392993, "learning_rate": 6.630107762075668e-06, "loss": 0.4522, "step": 10641 }, { "epoch": 1.7477059511834623, "grad_norm": 0.28858432050151406, "learning_rate": 6.629694646720815e-06, "loss": 0.4569, "step": 10642 }, { "epoch": 1.7478701783918051, "grad_norm": 0.25855245592740905, "learning_rate": 6.6292815082353825e-06, "loss": 0.4733, "step": 10643 }, { "epoch": 1.7480344056001478, "grad_norm": 0.28350226598092165, "learning_rate": 6.628868346623858e-06, "loss": 0.4555, "step": 10644 }, { "epoch": 1.7481986328084904, "grad_norm": 0.3637068588878069, "learning_rate": 6.6284551618907284e-06, "loss": 0.4548, "step": 10645 }, { "epoch": 1.7483628600168333, "grad_norm": 0.3527726874714712, "learning_rate": 6.628041954040482e-06, "loss": 0.4544, "step": 10646 }, { "epoch": 1.7485270872251761, "grad_norm": 0.26024031603615716, "learning_rate": 6.627628723077606e-06, "loss": 0.46, "step": 10647 }, { "epoch": 1.7486913144335188, "grad_norm": 0.3288597058710364, "learning_rate": 6.627215469006589e-06, "loss": 0.447, "step": 10648 }, { "epoch": 1.7488555416418614, "grad_norm": 0.25892417052957034, "learning_rate": 6.626802191831919e-06, "loss": 0.4713, "step": 10649 }, { "epoch": 1.7490197688502043, "grad_norm": 0.4091246145076616, "learning_rate": 6.626388891558086e-06, "loss": 0.4735, "step": 10650 }, { "epoch": 1.7491839960585471, "grad_norm": 0.25989391360676, "learning_rate": 6.625975568189575e-06, "loss": 0.4459, "step": 10651 }, { "epoch": 1.7493482232668898, "grad_norm": 0.34067425289731734, "learning_rate": 6.62556222173088e-06, "loss": 0.4395, "step": 10652 }, { "epoch": 1.7495124504752324, "grad_norm": 0.36332303622096074, "learning_rate": 6.625148852186485e-06, "loss": 0.466, "step": 10653 }, { "epoch": 1.7496766776835753, "grad_norm": 0.40636172529249653, "learning_rate": 6.624735459560886e-06, "loss": 0.4526, "step": 10654 }, { "epoch": 1.7498409048919181, "grad_norm": 0.35763187050169826, "learning_rate": 6.6243220438585685e-06, "loss": 0.4465, "step": 10655 }, { "epoch": 1.7500051321002608, "grad_norm": 0.2928042263761072, "learning_rate": 6.623908605084023e-06, "loss": 0.4452, "step": 10656 }, { "epoch": 1.7501693593086034, "grad_norm": 0.32058362491336445, "learning_rate": 6.623495143241739e-06, "loss": 0.4424, "step": 10657 }, { "epoch": 1.750333586516946, "grad_norm": 0.3051735078726969, "learning_rate": 6.623081658336211e-06, "loss": 0.4478, "step": 10658 }, { "epoch": 1.750497813725289, "grad_norm": 0.2846557283714331, "learning_rate": 6.622668150371925e-06, "loss": 0.4746, "step": 10659 }, { "epoch": 1.7506620409336318, "grad_norm": 0.2631461096010346, "learning_rate": 6.622254619353377e-06, "loss": 0.4806, "step": 10660 }, { "epoch": 1.7508262681419744, "grad_norm": 0.27165081583272227, "learning_rate": 6.621841065285054e-06, "loss": 0.4629, "step": 10661 }, { "epoch": 1.750990495350317, "grad_norm": 0.34897299039333585, "learning_rate": 6.62142748817145e-06, "loss": 0.4733, "step": 10662 }, { "epoch": 1.75115472255866, "grad_norm": 0.2967986687922061, "learning_rate": 6.621013888017057e-06, "loss": 0.4721, "step": 10663 }, { "epoch": 1.7513189497670028, "grad_norm": 0.2696164577840737, "learning_rate": 6.620600264826365e-06, "loss": 0.4533, "step": 10664 }, { "epoch": 1.7514831769753454, "grad_norm": 0.30955662042741533, "learning_rate": 6.620186618603869e-06, "loss": 0.4681, "step": 10665 }, { "epoch": 1.751647404183688, "grad_norm": 0.2663600728753576, "learning_rate": 6.6197729493540595e-06, "loss": 0.4527, "step": 10666 }, { "epoch": 1.7518116313920309, "grad_norm": 0.39339227377818786, "learning_rate": 6.619359257081431e-06, "loss": 0.4404, "step": 10667 }, { "epoch": 1.7519758586003737, "grad_norm": 0.2943000935613823, "learning_rate": 6.618945541790474e-06, "loss": 0.4437, "step": 10668 }, { "epoch": 1.7521400858087164, "grad_norm": 0.29804979688660815, "learning_rate": 6.618531803485686e-06, "loss": 0.4505, "step": 10669 }, { "epoch": 1.752304313017059, "grad_norm": 0.29331311411628713, "learning_rate": 6.6181180421715574e-06, "loss": 0.4579, "step": 10670 }, { "epoch": 1.7524685402254019, "grad_norm": 0.2996471216969548, "learning_rate": 6.617704257852583e-06, "loss": 0.4488, "step": 10671 }, { "epoch": 1.7526327674337447, "grad_norm": 0.45540878251191624, "learning_rate": 6.6172904505332555e-06, "loss": 0.4634, "step": 10672 }, { "epoch": 1.7527969946420874, "grad_norm": 0.3484322554734612, "learning_rate": 6.616876620218071e-06, "loss": 0.4566, "step": 10673 }, { "epoch": 1.75296122185043, "grad_norm": 0.293755535000031, "learning_rate": 6.616462766911525e-06, "loss": 0.4753, "step": 10674 }, { "epoch": 1.7531254490587727, "grad_norm": 0.33996324247139664, "learning_rate": 6.616048890618111e-06, "loss": 0.4636, "step": 10675 }, { "epoch": 1.7532896762671155, "grad_norm": 0.41606836561116783, "learning_rate": 6.615634991342323e-06, "loss": 0.4418, "step": 10676 }, { "epoch": 1.7534539034754584, "grad_norm": 0.3132030327583419, "learning_rate": 6.615221069088658e-06, "loss": 0.4685, "step": 10677 }, { "epoch": 1.753618130683801, "grad_norm": 0.2993572772718032, "learning_rate": 6.614807123861611e-06, "loss": 0.4639, "step": 10678 }, { "epoch": 1.7537823578921437, "grad_norm": 0.4478176444935102, "learning_rate": 6.614393155665678e-06, "loss": 0.4475, "step": 10679 }, { "epoch": 1.7539465851004865, "grad_norm": 0.30048533535202293, "learning_rate": 6.613979164505355e-06, "loss": 0.4403, "step": 10680 }, { "epoch": 1.7541108123088294, "grad_norm": 0.2908512457695989, "learning_rate": 6.613565150385138e-06, "loss": 0.4559, "step": 10681 }, { "epoch": 1.754275039517172, "grad_norm": 0.30416948079747663, "learning_rate": 6.613151113309524e-06, "loss": 0.4624, "step": 10682 }, { "epoch": 1.7544392667255146, "grad_norm": 0.3456723939038373, "learning_rate": 6.6127370532830105e-06, "loss": 0.4458, "step": 10683 }, { "epoch": 1.7546034939338575, "grad_norm": 0.2688557993639089, "learning_rate": 6.612322970310094e-06, "loss": 0.4395, "step": 10684 }, { "epoch": 1.7547677211422004, "grad_norm": 0.2857108442653925, "learning_rate": 6.6119088643952715e-06, "loss": 0.4616, "step": 10685 }, { "epoch": 1.754931948350543, "grad_norm": 0.2946948925070282, "learning_rate": 6.611494735543041e-06, "loss": 0.4428, "step": 10686 }, { "epoch": 1.7550961755588856, "grad_norm": 0.2816529774982687, "learning_rate": 6.611080583757899e-06, "loss": 0.4602, "step": 10687 }, { "epoch": 1.7552604027672285, "grad_norm": 0.290157879388694, "learning_rate": 6.610666409044347e-06, "loss": 0.4696, "step": 10688 }, { "epoch": 1.7554246299755714, "grad_norm": 0.36955605038324496, "learning_rate": 6.61025221140688e-06, "loss": 0.4493, "step": 10689 }, { "epoch": 1.755588857183914, "grad_norm": 0.3095960553714322, "learning_rate": 6.609837990849999e-06, "loss": 0.4315, "step": 10690 }, { "epoch": 1.7557530843922566, "grad_norm": 0.47185918895177503, "learning_rate": 6.609423747378199e-06, "loss": 0.4817, "step": 10691 }, { "epoch": 1.7559173116005993, "grad_norm": 0.28115335143313835, "learning_rate": 6.609009480995984e-06, "loss": 0.4312, "step": 10692 }, { "epoch": 1.7560815388089421, "grad_norm": 0.30650451169190884, "learning_rate": 6.608595191707851e-06, "loss": 0.4793, "step": 10693 }, { "epoch": 1.756245766017285, "grad_norm": 0.34971127443580935, "learning_rate": 6.608180879518299e-06, "loss": 0.4604, "step": 10694 }, { "epoch": 1.7564099932256276, "grad_norm": 0.3094900938094681, "learning_rate": 6.607766544431828e-06, "loss": 0.4473, "step": 10695 }, { "epoch": 1.7565742204339703, "grad_norm": 0.3187215929719953, "learning_rate": 6.6073521864529395e-06, "loss": 0.4662, "step": 10696 }, { "epoch": 1.7567384476423131, "grad_norm": 0.3338168033020335, "learning_rate": 6.606937805586132e-06, "loss": 0.4582, "step": 10697 }, { "epoch": 1.756902674850656, "grad_norm": 0.28507866277484556, "learning_rate": 6.606523401835907e-06, "loss": 0.4632, "step": 10698 }, { "epoch": 1.7570669020589986, "grad_norm": 1.3822681586763514, "learning_rate": 6.606108975206767e-06, "loss": 0.4564, "step": 10699 }, { "epoch": 1.7572311292673413, "grad_norm": 0.2818682573662767, "learning_rate": 6.605694525703209e-06, "loss": 0.4744, "step": 10700 }, { "epoch": 1.7573953564756841, "grad_norm": 0.3082291536949848, "learning_rate": 6.605280053329738e-06, "loss": 0.4434, "step": 10701 }, { "epoch": 1.757559583684027, "grad_norm": 0.30796809315309, "learning_rate": 6.604865558090854e-06, "loss": 0.4537, "step": 10702 }, { "epoch": 1.7577238108923696, "grad_norm": 0.2785442087505172, "learning_rate": 6.604451039991059e-06, "loss": 0.4688, "step": 10703 }, { "epoch": 1.7578880381007123, "grad_norm": 0.26598099036361045, "learning_rate": 6.6040364990348556e-06, "loss": 0.4381, "step": 10704 }, { "epoch": 1.7580522653090551, "grad_norm": 0.3066077419739165, "learning_rate": 6.603621935226746e-06, "loss": 0.4783, "step": 10705 }, { "epoch": 1.758216492517398, "grad_norm": 0.28792843857434686, "learning_rate": 6.603207348571231e-06, "loss": 0.4533, "step": 10706 }, { "epoch": 1.7583807197257406, "grad_norm": 0.2912059270129017, "learning_rate": 6.602792739072817e-06, "loss": 0.4634, "step": 10707 }, { "epoch": 1.7585449469340833, "grad_norm": 0.2938487886132544, "learning_rate": 6.6023781067360035e-06, "loss": 0.4729, "step": 10708 }, { "epoch": 1.758709174142426, "grad_norm": 0.32239400228435916, "learning_rate": 6.601963451565297e-06, "loss": 0.461, "step": 10709 }, { "epoch": 1.7588734013507688, "grad_norm": 0.37857542453820436, "learning_rate": 6.601548773565197e-06, "loss": 0.4429, "step": 10710 }, { "epoch": 1.7590376285591116, "grad_norm": 0.3075074168414816, "learning_rate": 6.601134072740211e-06, "loss": 0.4403, "step": 10711 }, { "epoch": 1.7592018557674542, "grad_norm": 0.7430190716889086, "learning_rate": 6.600719349094841e-06, "loss": 0.4448, "step": 10712 }, { "epoch": 1.7593660829757969, "grad_norm": 0.44458514655301934, "learning_rate": 6.600304602633594e-06, "loss": 0.4563, "step": 10713 }, { "epoch": 1.7595303101841397, "grad_norm": 0.5312318676070004, "learning_rate": 6.5998898333609715e-06, "loss": 0.4531, "step": 10714 }, { "epoch": 1.7596945373924826, "grad_norm": 0.29153224613219314, "learning_rate": 6.599475041281479e-06, "loss": 0.4433, "step": 10715 }, { "epoch": 1.7598587646008252, "grad_norm": 0.8133344159717972, "learning_rate": 6.59906022639962e-06, "loss": 0.4555, "step": 10716 }, { "epoch": 1.7600229918091679, "grad_norm": 0.29733696086637973, "learning_rate": 6.598645388719905e-06, "loss": 0.448, "step": 10717 }, { "epoch": 1.7601872190175107, "grad_norm": 0.3456375401995861, "learning_rate": 6.598230528246835e-06, "loss": 0.4571, "step": 10718 }, { "epoch": 1.7603514462258536, "grad_norm": 0.3311963527266412, "learning_rate": 6.597815644984918e-06, "loss": 0.4506, "step": 10719 }, { "epoch": 1.7605156734341962, "grad_norm": 0.38540894382757607, "learning_rate": 6.597400738938658e-06, "loss": 0.4657, "step": 10720 }, { "epoch": 1.7606799006425389, "grad_norm": 0.3139327570135372, "learning_rate": 6.596985810112563e-06, "loss": 0.4371, "step": 10721 }, { "epoch": 1.7608441278508817, "grad_norm": 0.2927446497689957, "learning_rate": 6.596570858511138e-06, "loss": 0.4377, "step": 10722 }, { "epoch": 1.7610083550592246, "grad_norm": 0.3170970048028251, "learning_rate": 6.5961558841388915e-06, "loss": 0.4459, "step": 10723 }, { "epoch": 1.7611725822675672, "grad_norm": 0.2895232317380216, "learning_rate": 6.59574088700033e-06, "loss": 0.4463, "step": 10724 }, { "epoch": 1.7613368094759099, "grad_norm": 0.30132577539583216, "learning_rate": 6.59532586709996e-06, "loss": 0.4479, "step": 10725 }, { "epoch": 1.7615010366842525, "grad_norm": 0.297712209809135, "learning_rate": 6.59491082444229e-06, "loss": 0.4812, "step": 10726 }, { "epoch": 1.7616652638925954, "grad_norm": 0.30872107113301706, "learning_rate": 6.594495759031826e-06, "loss": 0.4532, "step": 10727 }, { "epoch": 1.7618294911009382, "grad_norm": 0.28669341623275213, "learning_rate": 6.594080670873079e-06, "loss": 0.4717, "step": 10728 }, { "epoch": 1.7619937183092809, "grad_norm": 0.3179139571288722, "learning_rate": 6.593665559970555e-06, "loss": 0.4736, "step": 10729 }, { "epoch": 1.7621579455176235, "grad_norm": 0.32432497751797623, "learning_rate": 6.5932504263287636e-06, "loss": 0.4646, "step": 10730 }, { "epoch": 1.7623221727259664, "grad_norm": 0.29703929983463007, "learning_rate": 6.592835269952212e-06, "loss": 0.4648, "step": 10731 }, { "epoch": 1.7624863999343092, "grad_norm": 0.34219272476991125, "learning_rate": 6.592420090845412e-06, "loss": 0.4648, "step": 10732 }, { "epoch": 1.7626506271426519, "grad_norm": 0.3240563250775775, "learning_rate": 6.59200488901287e-06, "loss": 0.4554, "step": 10733 }, { "epoch": 1.7628148543509945, "grad_norm": 0.3625809225011669, "learning_rate": 6.591589664459096e-06, "loss": 0.4584, "step": 10734 }, { "epoch": 1.7629790815593374, "grad_norm": 0.6478536789807493, "learning_rate": 6.5911744171886016e-06, "loss": 0.4771, "step": 10735 }, { "epoch": 1.7631433087676802, "grad_norm": 0.3228958516773163, "learning_rate": 6.590759147205895e-06, "loss": 0.4507, "step": 10736 }, { "epoch": 1.7633075359760229, "grad_norm": 0.26105724500638866, "learning_rate": 6.590343854515487e-06, "loss": 0.4801, "step": 10737 }, { "epoch": 1.7634717631843655, "grad_norm": 0.3364783517266538, "learning_rate": 6.589928539121889e-06, "loss": 0.4448, "step": 10738 }, { "epoch": 1.7636359903927084, "grad_norm": 0.2677527481328561, "learning_rate": 6.589513201029609e-06, "loss": 0.4705, "step": 10739 }, { "epoch": 1.7638002176010512, "grad_norm": 0.28157915471787903, "learning_rate": 6.589097840243162e-06, "loss": 0.4264, "step": 10740 }, { "epoch": 1.7639644448093939, "grad_norm": 0.2877591902737641, "learning_rate": 6.588682456767055e-06, "loss": 0.4307, "step": 10741 }, { "epoch": 1.7641286720177365, "grad_norm": 0.3628181269741827, "learning_rate": 6.588267050605803e-06, "loss": 0.4588, "step": 10742 }, { "epoch": 1.7642928992260791, "grad_norm": 0.3245284684820889, "learning_rate": 6.587851621763916e-06, "loss": 0.4455, "step": 10743 }, { "epoch": 1.764457126434422, "grad_norm": 0.4464116642828582, "learning_rate": 6.587436170245907e-06, "loss": 0.446, "step": 10744 }, { "epoch": 1.7646213536427648, "grad_norm": 0.3618431017109906, "learning_rate": 6.587020696056285e-06, "loss": 0.4663, "step": 10745 }, { "epoch": 1.7647855808511075, "grad_norm": 0.7835075103239901, "learning_rate": 6.586605199199567e-06, "loss": 0.4709, "step": 10746 }, { "epoch": 1.7649498080594501, "grad_norm": 0.30836303357783795, "learning_rate": 6.586189679680263e-06, "loss": 0.4497, "step": 10747 }, { "epoch": 1.765114035267793, "grad_norm": 0.5197716220523791, "learning_rate": 6.585774137502887e-06, "loss": 0.4672, "step": 10748 }, { "epoch": 1.7652782624761358, "grad_norm": 0.37370315931183823, "learning_rate": 6.585358572671951e-06, "loss": 0.4688, "step": 10749 }, { "epoch": 1.7654424896844785, "grad_norm": 0.30915793884894854, "learning_rate": 6.584942985191969e-06, "loss": 0.4522, "step": 10750 }, { "epoch": 1.7656067168928211, "grad_norm": 0.2850764149367957, "learning_rate": 6.584527375067456e-06, "loss": 0.4426, "step": 10751 }, { "epoch": 1.765770944101164, "grad_norm": 0.3175691534431449, "learning_rate": 6.584111742302924e-06, "loss": 0.4661, "step": 10752 }, { "epoch": 1.7659351713095068, "grad_norm": 0.2726331377292973, "learning_rate": 6.583696086902888e-06, "loss": 0.4386, "step": 10753 }, { "epoch": 1.7660993985178495, "grad_norm": 0.3159980495549621, "learning_rate": 6.583280408871862e-06, "loss": 0.4281, "step": 10754 }, { "epoch": 1.766263625726192, "grad_norm": 0.30476048593373595, "learning_rate": 6.5828647082143624e-06, "loss": 0.4386, "step": 10755 }, { "epoch": 1.766427852934535, "grad_norm": 0.5184037524101708, "learning_rate": 6.582448984934901e-06, "loss": 0.4596, "step": 10756 }, { "epoch": 1.7665920801428778, "grad_norm": 0.3719752223361219, "learning_rate": 6.582033239037997e-06, "loss": 0.4586, "step": 10757 }, { "epoch": 1.7667563073512205, "grad_norm": 0.42817232060131705, "learning_rate": 6.581617470528162e-06, "loss": 0.4301, "step": 10758 }, { "epoch": 1.766920534559563, "grad_norm": 0.274656189593246, "learning_rate": 6.5812016794099144e-06, "loss": 0.4472, "step": 10759 }, { "epoch": 1.7670847617679057, "grad_norm": 0.3350111125117778, "learning_rate": 6.5807858656877675e-06, "loss": 0.4384, "step": 10760 }, { "epoch": 1.7672489889762486, "grad_norm": 0.3961802579412799, "learning_rate": 6.580370029366239e-06, "loss": 0.4514, "step": 10761 }, { "epoch": 1.7674132161845915, "grad_norm": 0.2810769194265865, "learning_rate": 6.579954170449847e-06, "loss": 0.4389, "step": 10762 }, { "epoch": 1.767577443392934, "grad_norm": 0.2939767282116434, "learning_rate": 6.5795382889431045e-06, "loss": 0.4733, "step": 10763 }, { "epoch": 1.7677416706012767, "grad_norm": 0.35139246676594316, "learning_rate": 6.5791223848505305e-06, "loss": 0.46, "step": 10764 }, { "epoch": 1.7679058978096196, "grad_norm": 0.2923901618315435, "learning_rate": 6.578706458176642e-06, "loss": 0.4569, "step": 10765 }, { "epoch": 1.7680701250179625, "grad_norm": 0.3661090744129942, "learning_rate": 6.578290508925957e-06, "loss": 0.4572, "step": 10766 }, { "epoch": 1.768234352226305, "grad_norm": 0.3365126819034772, "learning_rate": 6.577874537102991e-06, "loss": 0.4611, "step": 10767 }, { "epoch": 1.7683985794346477, "grad_norm": 0.3998899420071638, "learning_rate": 6.577458542712263e-06, "loss": 0.4459, "step": 10768 }, { "epoch": 1.7685628066429906, "grad_norm": 0.32391092810763855, "learning_rate": 6.5770425257582926e-06, "loss": 0.4644, "step": 10769 }, { "epoch": 1.7687270338513335, "grad_norm": 0.3529853865117939, "learning_rate": 6.576626486245596e-06, "loss": 0.4698, "step": 10770 }, { "epoch": 1.768891261059676, "grad_norm": 0.2915016282853621, "learning_rate": 6.576210424178693e-06, "loss": 0.4494, "step": 10771 }, { "epoch": 1.7690554882680187, "grad_norm": 0.29888633281109755, "learning_rate": 6.575794339562103e-06, "loss": 0.4623, "step": 10772 }, { "epoch": 1.7692197154763616, "grad_norm": 0.33782021114077626, "learning_rate": 6.575378232400343e-06, "loss": 0.4722, "step": 10773 }, { "epoch": 1.7693839426847044, "grad_norm": 0.33197396780011823, "learning_rate": 6.574962102697932e-06, "loss": 0.446, "step": 10774 }, { "epoch": 1.769548169893047, "grad_norm": 0.3498934678387365, "learning_rate": 6.574545950459393e-06, "loss": 0.471, "step": 10775 }, { "epoch": 1.7697123971013897, "grad_norm": 0.2838093993191466, "learning_rate": 6.574129775689244e-06, "loss": 0.4541, "step": 10776 }, { "epoch": 1.7698766243097324, "grad_norm": 0.31636922709474874, "learning_rate": 6.573713578392005e-06, "loss": 0.4339, "step": 10777 }, { "epoch": 1.7700408515180752, "grad_norm": 0.49559821090645556, "learning_rate": 6.5732973585721955e-06, "loss": 0.4695, "step": 10778 }, { "epoch": 1.770205078726418, "grad_norm": 0.28710379603213987, "learning_rate": 6.572881116234337e-06, "loss": 0.4597, "step": 10779 }, { "epoch": 1.7703693059347607, "grad_norm": 0.2641290921404477, "learning_rate": 6.57246485138295e-06, "loss": 0.4399, "step": 10780 }, { "epoch": 1.7705335331431034, "grad_norm": 0.31354698332702563, "learning_rate": 6.572048564022557e-06, "loss": 0.4761, "step": 10781 }, { "epoch": 1.7706977603514462, "grad_norm": 0.27293143060893843, "learning_rate": 6.571632254157676e-06, "loss": 0.4702, "step": 10782 }, { "epoch": 1.770861987559789, "grad_norm": 0.2789244175301241, "learning_rate": 6.571215921792832e-06, "loss": 0.4639, "step": 10783 }, { "epoch": 1.7710262147681317, "grad_norm": 0.3119449859394376, "learning_rate": 6.570799566932545e-06, "loss": 0.4298, "step": 10784 }, { "epoch": 1.7711904419764744, "grad_norm": 0.4196702056062995, "learning_rate": 6.570383189581336e-06, "loss": 0.4453, "step": 10785 }, { "epoch": 1.7713546691848172, "grad_norm": 0.3100662001480286, "learning_rate": 6.569966789743731e-06, "loss": 0.4666, "step": 10786 }, { "epoch": 1.77151889639316, "grad_norm": 0.3417203555284162, "learning_rate": 6.569550367424248e-06, "loss": 0.4489, "step": 10787 }, { "epoch": 1.7716831236015027, "grad_norm": 0.2997760203724878, "learning_rate": 6.569133922627413e-06, "loss": 0.4672, "step": 10788 }, { "epoch": 1.7718473508098453, "grad_norm": 0.3650237571988705, "learning_rate": 6.5687174553577475e-06, "loss": 0.4398, "step": 10789 }, { "epoch": 1.7720115780181882, "grad_norm": 0.40735904021853114, "learning_rate": 6.568300965619775e-06, "loss": 0.4582, "step": 10790 }, { "epoch": 1.772175805226531, "grad_norm": 0.41041486738406796, "learning_rate": 6.56788445341802e-06, "loss": 0.4596, "step": 10791 }, { "epoch": 1.7723400324348737, "grad_norm": 0.34668412608676713, "learning_rate": 6.567467918757004e-06, "loss": 0.4445, "step": 10792 }, { "epoch": 1.7725042596432163, "grad_norm": 0.31068727442963523, "learning_rate": 6.5670513616412525e-06, "loss": 0.4421, "step": 10793 }, { "epoch": 1.772668486851559, "grad_norm": 0.3138059771083177, "learning_rate": 6.5666347820752895e-06, "loss": 0.4358, "step": 10794 }, { "epoch": 1.7728327140599018, "grad_norm": 0.2586739324491047, "learning_rate": 6.5662181800636395e-06, "loss": 0.4436, "step": 10795 }, { "epoch": 1.7729969412682447, "grad_norm": 0.30324835791213506, "learning_rate": 6.565801555610827e-06, "loss": 0.4382, "step": 10796 }, { "epoch": 1.7731611684765873, "grad_norm": 0.33744280019832007, "learning_rate": 6.565384908721379e-06, "loss": 0.4297, "step": 10797 }, { "epoch": 1.77332539568493, "grad_norm": 0.2838435130478732, "learning_rate": 6.564968239399816e-06, "loss": 0.4328, "step": 10798 }, { "epoch": 1.7734896228932728, "grad_norm": 0.29658021102022636, "learning_rate": 6.564551547650668e-06, "loss": 0.4576, "step": 10799 }, { "epoch": 1.7736538501016157, "grad_norm": 0.3007907744005072, "learning_rate": 6.564134833478459e-06, "loss": 0.4695, "step": 10800 }, { "epoch": 1.7738386057110012, "grad_norm": 0.285764971226959, "learning_rate": 6.563718096887715e-06, "loss": 0.4597, "step": 10801 }, { "epoch": 1.774002832919344, "grad_norm": 0.28967808908806614, "learning_rate": 6.56330133788296e-06, "loss": 0.4587, "step": 10802 }, { "epoch": 1.7741670601276867, "grad_norm": 0.2703467117021694, "learning_rate": 6.562884556468725e-06, "loss": 0.445, "step": 10803 }, { "epoch": 1.7743312873360293, "grad_norm": 0.2627977347482234, "learning_rate": 6.562467752649532e-06, "loss": 0.4707, "step": 10804 }, { "epoch": 1.7744955145443722, "grad_norm": 0.2966307541960856, "learning_rate": 6.562050926429912e-06, "loss": 0.4636, "step": 10805 }, { "epoch": 1.774659741752715, "grad_norm": 0.39316975585417724, "learning_rate": 6.561634077814389e-06, "loss": 0.4592, "step": 10806 }, { "epoch": 1.7748239689610577, "grad_norm": 0.28644623247691015, "learning_rate": 6.561217206807491e-06, "loss": 0.4627, "step": 10807 }, { "epoch": 1.7749881961694003, "grad_norm": 0.38443342350967297, "learning_rate": 6.5608003134137465e-06, "loss": 0.4639, "step": 10808 }, { "epoch": 1.7751524233777431, "grad_norm": 0.388144163612288, "learning_rate": 6.560383397637684e-06, "loss": 0.4733, "step": 10809 }, { "epoch": 1.775316650586086, "grad_norm": 0.32983894347945997, "learning_rate": 6.55996645948383e-06, "loss": 0.4477, "step": 10810 }, { "epoch": 1.7754808777944286, "grad_norm": 0.4331970351898739, "learning_rate": 6.559549498956715e-06, "loss": 0.4576, "step": 10811 }, { "epoch": 1.7756451050027713, "grad_norm": 0.298687059323067, "learning_rate": 6.559132516060865e-06, "loss": 0.4534, "step": 10812 }, { "epoch": 1.775809332211114, "grad_norm": 0.2718148542045194, "learning_rate": 6.55871551080081e-06, "loss": 0.4431, "step": 10813 }, { "epoch": 1.7759735594194568, "grad_norm": 0.3192924286136304, "learning_rate": 6.558298483181078e-06, "loss": 0.4637, "step": 10814 }, { "epoch": 1.7761377866277996, "grad_norm": 0.3229275703672246, "learning_rate": 6.5578814332062e-06, "loss": 0.4517, "step": 10815 }, { "epoch": 1.7763020138361423, "grad_norm": 0.3074386241153964, "learning_rate": 6.557464360880704e-06, "loss": 0.4425, "step": 10816 }, { "epoch": 1.776466241044485, "grad_norm": 0.2904906686506346, "learning_rate": 6.557047266209123e-06, "loss": 0.4571, "step": 10817 }, { "epoch": 1.7766304682528278, "grad_norm": 0.27502918069961163, "learning_rate": 6.556630149195984e-06, "loss": 0.4583, "step": 10818 }, { "epoch": 1.7767946954611706, "grad_norm": 0.34980850402681873, "learning_rate": 6.5562130098458175e-06, "loss": 0.4665, "step": 10819 }, { "epoch": 1.7769589226695133, "grad_norm": 0.32905826580936176, "learning_rate": 6.555795848163155e-06, "loss": 0.4326, "step": 10820 }, { "epoch": 1.777123149877856, "grad_norm": 0.31399784018566634, "learning_rate": 6.5553786641525266e-06, "loss": 0.4728, "step": 10821 }, { "epoch": 1.7772873770861988, "grad_norm": 0.28220727942929263, "learning_rate": 6.554961457818464e-06, "loss": 0.4689, "step": 10822 }, { "epoch": 1.7774516042945416, "grad_norm": 0.350203078583294, "learning_rate": 6.554544229165498e-06, "loss": 0.4778, "step": 10823 }, { "epoch": 1.7776158315028843, "grad_norm": 0.43535540364275027, "learning_rate": 6.55412697819816e-06, "loss": 0.4483, "step": 10824 }, { "epoch": 1.777780058711227, "grad_norm": 0.30355242827628565, "learning_rate": 6.553709704920984e-06, "loss": 0.4592, "step": 10825 }, { "epoch": 1.7779442859195698, "grad_norm": 0.3460292247959962, "learning_rate": 6.553292409338499e-06, "loss": 0.4647, "step": 10826 }, { "epoch": 1.7781085131279126, "grad_norm": 0.3144679395520833, "learning_rate": 6.552875091455237e-06, "loss": 0.4783, "step": 10827 }, { "epoch": 1.7782727403362553, "grad_norm": 0.2956641392050019, "learning_rate": 6.552457751275732e-06, "loss": 0.463, "step": 10828 }, { "epoch": 1.778436967544598, "grad_norm": 1.2627729567562462, "learning_rate": 6.5520403888045175e-06, "loss": 0.4429, "step": 10829 }, { "epoch": 1.7786011947529405, "grad_norm": 0.2850528949825912, "learning_rate": 6.551623004046125e-06, "loss": 0.4428, "step": 10830 }, { "epoch": 1.7787654219612834, "grad_norm": 0.3196432030904305, "learning_rate": 6.551205597005088e-06, "loss": 0.4663, "step": 10831 }, { "epoch": 1.7789296491696263, "grad_norm": 0.2575516148408011, "learning_rate": 6.550788167685941e-06, "loss": 0.4293, "step": 10832 }, { "epoch": 1.779093876377969, "grad_norm": 0.25567605917187286, "learning_rate": 6.550370716093215e-06, "loss": 0.4511, "step": 10833 }, { "epoch": 1.7792581035863115, "grad_norm": 0.3174637713246571, "learning_rate": 6.549953242231447e-06, "loss": 0.4611, "step": 10834 }, { "epoch": 1.7794223307946544, "grad_norm": 0.5439191573102424, "learning_rate": 6.549535746105171e-06, "loss": 0.4454, "step": 10835 }, { "epoch": 1.7795865580029973, "grad_norm": 0.3170447961287215, "learning_rate": 6.549118227718918e-06, "loss": 0.4606, "step": 10836 }, { "epoch": 1.77975078521134, "grad_norm": 0.36447664378677996, "learning_rate": 6.548700687077226e-06, "loss": 0.4711, "step": 10837 }, { "epoch": 1.7799150124196825, "grad_norm": 0.2904792276977439, "learning_rate": 6.5482831241846284e-06, "loss": 0.4585, "step": 10838 }, { "epoch": 1.7800792396280254, "grad_norm": 0.2719058857151288, "learning_rate": 6.547865539045661e-06, "loss": 0.467, "step": 10839 }, { "epoch": 1.7802434668363682, "grad_norm": 0.3222765664208403, "learning_rate": 6.54744793166486e-06, "loss": 0.4411, "step": 10840 }, { "epoch": 1.7804076940447109, "grad_norm": 0.3264407930977664, "learning_rate": 6.547030302046759e-06, "loss": 0.4493, "step": 10841 }, { "epoch": 1.7805719212530535, "grad_norm": 0.3601154985976864, "learning_rate": 6.546612650195897e-06, "loss": 0.4593, "step": 10842 }, { "epoch": 1.7807361484613964, "grad_norm": 0.29387278390753235, "learning_rate": 6.546194976116805e-06, "loss": 0.4364, "step": 10843 }, { "epoch": 1.7809003756697392, "grad_norm": 0.32486471922003535, "learning_rate": 6.545777279814024e-06, "loss": 0.4613, "step": 10844 }, { "epoch": 1.7810646028780819, "grad_norm": 0.3041127864331923, "learning_rate": 6.5453595612920885e-06, "loss": 0.4567, "step": 10845 }, { "epoch": 1.7812288300864245, "grad_norm": 0.3548441166557751, "learning_rate": 6.544941820555536e-06, "loss": 0.4446, "step": 10846 }, { "epoch": 1.7813930572947672, "grad_norm": 0.33470252658410204, "learning_rate": 6.544524057608904e-06, "loss": 0.4537, "step": 10847 }, { "epoch": 1.78155728450311, "grad_norm": 0.2989380276370097, "learning_rate": 6.544106272456727e-06, "loss": 0.4557, "step": 10848 }, { "epoch": 1.7817215117114529, "grad_norm": 0.2997943871744751, "learning_rate": 6.543688465103548e-06, "loss": 0.4559, "step": 10849 }, { "epoch": 1.7818857389197955, "grad_norm": 0.2936293590695143, "learning_rate": 6.5432706355538985e-06, "loss": 0.4467, "step": 10850 }, { "epoch": 1.7820499661281382, "grad_norm": 0.25972399431795706, "learning_rate": 6.5428527838123215e-06, "loss": 0.4515, "step": 10851 }, { "epoch": 1.782214193336481, "grad_norm": 0.34117830092565526, "learning_rate": 6.5424349098833534e-06, "loss": 0.4497, "step": 10852 }, { "epoch": 1.7823784205448239, "grad_norm": 0.40292122778145445, "learning_rate": 6.542017013771531e-06, "loss": 0.4445, "step": 10853 }, { "epoch": 1.7825426477531665, "grad_norm": 0.29530119778119707, "learning_rate": 6.541599095481396e-06, "loss": 0.465, "step": 10854 }, { "epoch": 1.7827068749615091, "grad_norm": 0.31448321131899243, "learning_rate": 6.541181155017487e-06, "loss": 0.4569, "step": 10855 }, { "epoch": 1.782871102169852, "grad_norm": 0.28643134276254606, "learning_rate": 6.540763192384341e-06, "loss": 0.455, "step": 10856 }, { "epoch": 1.7830353293781949, "grad_norm": 0.30659806432999265, "learning_rate": 6.540345207586498e-06, "loss": 0.4507, "step": 10857 }, { "epoch": 1.7831995565865375, "grad_norm": 0.26947636257084084, "learning_rate": 6.5399272006285e-06, "loss": 0.4635, "step": 10858 }, { "epoch": 1.7833637837948801, "grad_norm": 0.30243946446801434, "learning_rate": 6.539509171514888e-06, "loss": 0.4348, "step": 10859 }, { "epoch": 1.783528011003223, "grad_norm": 0.29964948420515214, "learning_rate": 6.539091120250196e-06, "loss": 0.4556, "step": 10860 }, { "epoch": 1.7836922382115659, "grad_norm": 0.4948757323995327, "learning_rate": 6.53867304683897e-06, "loss": 0.4437, "step": 10861 }, { "epoch": 1.7838564654199085, "grad_norm": 0.27079970013608673, "learning_rate": 6.538254951285747e-06, "loss": 0.4639, "step": 10862 }, { "epoch": 1.7840206926282511, "grad_norm": 0.4382147974067117, "learning_rate": 6.5378368335950716e-06, "loss": 0.4524, "step": 10863 }, { "epoch": 1.7841849198365938, "grad_norm": 0.3276204275339086, "learning_rate": 6.537418693771484e-06, "loss": 0.4729, "step": 10864 }, { "epoch": 1.7843491470449366, "grad_norm": 0.44798273637267755, "learning_rate": 6.537000531819523e-06, "loss": 0.4515, "step": 10865 }, { "epoch": 1.7845133742532795, "grad_norm": 0.2958121807443287, "learning_rate": 6.536582347743732e-06, "loss": 0.4713, "step": 10866 }, { "epoch": 1.7846776014616221, "grad_norm": 0.3204812397349806, "learning_rate": 6.536164141548654e-06, "loss": 0.4734, "step": 10867 }, { "epoch": 1.7848418286699648, "grad_norm": 0.27522351558354125, "learning_rate": 6.535745913238831e-06, "loss": 0.4507, "step": 10868 }, { "epoch": 1.7850060558783076, "grad_norm": 0.31930333118326126, "learning_rate": 6.535327662818804e-06, "loss": 0.4608, "step": 10869 }, { "epoch": 1.7851702830866505, "grad_norm": 0.3058979140390943, "learning_rate": 6.534909390293115e-06, "loss": 0.4617, "step": 10870 }, { "epoch": 1.7853345102949931, "grad_norm": 0.29387217939437715, "learning_rate": 6.534491095666308e-06, "loss": 0.4646, "step": 10871 }, { "epoch": 1.7854987375033358, "grad_norm": 0.2930336057878312, "learning_rate": 6.534072778942927e-06, "loss": 0.4641, "step": 10872 }, { "epoch": 1.7856629647116786, "grad_norm": 0.2594508867244782, "learning_rate": 6.533654440127514e-06, "loss": 0.4437, "step": 10873 }, { "epoch": 1.7858271919200215, "grad_norm": 0.2716593847223371, "learning_rate": 6.5332360792246125e-06, "loss": 0.4558, "step": 10874 }, { "epoch": 1.7859914191283641, "grad_norm": 0.28558707454108323, "learning_rate": 6.532817696238766e-06, "loss": 0.4584, "step": 10875 }, { "epoch": 1.7861556463367068, "grad_norm": 0.35979502883415454, "learning_rate": 6.532399291174521e-06, "loss": 0.4617, "step": 10876 }, { "epoch": 1.7863198735450496, "grad_norm": 0.28513575240988615, "learning_rate": 6.531980864036419e-06, "loss": 0.4546, "step": 10877 }, { "epoch": 1.7864841007533925, "grad_norm": 0.3096452283539566, "learning_rate": 6.531562414829007e-06, "loss": 0.4776, "step": 10878 }, { "epoch": 1.7866483279617351, "grad_norm": 0.3335856723002851, "learning_rate": 6.5311439435568275e-06, "loss": 0.4468, "step": 10879 }, { "epoch": 1.7868125551700778, "grad_norm": 0.2934169575309989, "learning_rate": 6.530725450224426e-06, "loss": 0.4379, "step": 10880 }, { "epoch": 1.7869767823784204, "grad_norm": 0.27505220382016676, "learning_rate": 6.530306934836349e-06, "loss": 0.4481, "step": 10881 }, { "epoch": 1.7871410095867633, "grad_norm": 0.30139640369351783, "learning_rate": 6.52988839739714e-06, "loss": 0.4537, "step": 10882 }, { "epoch": 1.7873052367951061, "grad_norm": 0.2989131874209402, "learning_rate": 6.529469837911347e-06, "loss": 0.427, "step": 10883 }, { "epoch": 1.7874694640034487, "grad_norm": 0.5210680923032412, "learning_rate": 6.529051256383515e-06, "loss": 0.4633, "step": 10884 }, { "epoch": 1.7876336912117914, "grad_norm": 0.29927676663364566, "learning_rate": 6.528632652818189e-06, "loss": 0.482, "step": 10885 }, { "epoch": 1.7877979184201342, "grad_norm": 0.30191373230381446, "learning_rate": 6.528214027219916e-06, "loss": 0.4607, "step": 10886 }, { "epoch": 1.787962145628477, "grad_norm": 0.293916130122298, "learning_rate": 6.527795379593244e-06, "loss": 0.4591, "step": 10887 }, { "epoch": 1.7881263728368197, "grad_norm": 0.4352563222467592, "learning_rate": 6.52737670994272e-06, "loss": 0.4788, "step": 10888 }, { "epoch": 1.7882906000451624, "grad_norm": 0.9674724434130556, "learning_rate": 6.52695801827289e-06, "loss": 0.4533, "step": 10889 }, { "epoch": 1.7884548272535052, "grad_norm": 0.2905113824899683, "learning_rate": 6.5265393045882995e-06, "loss": 0.442, "step": 10890 }, { "epoch": 1.788619054461848, "grad_norm": 0.2708018683006927, "learning_rate": 6.5261205688935e-06, "loss": 0.4559, "step": 10891 }, { "epoch": 1.7887832816701907, "grad_norm": 0.32087345113081384, "learning_rate": 6.525701811193037e-06, "loss": 0.4751, "step": 10892 }, { "epoch": 1.7889475088785334, "grad_norm": 0.37785896852038653, "learning_rate": 6.52528303149146e-06, "loss": 0.4552, "step": 10893 }, { "epoch": 1.7891117360868762, "grad_norm": 0.430886228611665, "learning_rate": 6.524864229793317e-06, "loss": 0.4806, "step": 10894 }, { "epoch": 1.789275963295219, "grad_norm": 0.290804423309023, "learning_rate": 6.524445406103155e-06, "loss": 0.4715, "step": 10895 }, { "epoch": 1.7894401905035617, "grad_norm": 0.503444044730968, "learning_rate": 6.524026560425525e-06, "loss": 0.4692, "step": 10896 }, { "epoch": 1.7896044177119044, "grad_norm": 0.31224124708725315, "learning_rate": 6.523607692764976e-06, "loss": 0.4474, "step": 10897 }, { "epoch": 1.789768644920247, "grad_norm": 0.2946202941953378, "learning_rate": 6.523188803126056e-06, "loss": 0.4633, "step": 10898 }, { "epoch": 1.7899328721285899, "grad_norm": 0.2758037376117042, "learning_rate": 6.522769891513314e-06, "loss": 0.4693, "step": 10899 }, { "epoch": 1.7900970993369327, "grad_norm": 0.32328162302086266, "learning_rate": 6.522350957931301e-06, "loss": 0.4612, "step": 10900 }, { "epoch": 1.7902613265452754, "grad_norm": 0.3282936126324164, "learning_rate": 6.521932002384568e-06, "loss": 0.4419, "step": 10901 }, { "epoch": 1.790425553753618, "grad_norm": 0.389470936266045, "learning_rate": 6.5215130248776625e-06, "loss": 0.4575, "step": 10902 }, { "epoch": 1.7905897809619609, "grad_norm": 0.35674404043827046, "learning_rate": 6.521094025415138e-06, "loss": 0.4636, "step": 10903 }, { "epoch": 1.7907540081703037, "grad_norm": 0.45021462450787103, "learning_rate": 6.520675004001544e-06, "loss": 0.4805, "step": 10904 }, { "epoch": 1.7909182353786464, "grad_norm": 0.3029242489250928, "learning_rate": 6.520255960641431e-06, "loss": 0.4538, "step": 10905 }, { "epoch": 1.791082462586989, "grad_norm": 0.35349088557231206, "learning_rate": 6.5198368953393505e-06, "loss": 0.47, "step": 10906 }, { "epoch": 1.7912466897953319, "grad_norm": 0.2986364223054352, "learning_rate": 6.519417808099853e-06, "loss": 0.4348, "step": 10907 }, { "epoch": 1.7914109170036747, "grad_norm": 0.2913259353520882, "learning_rate": 6.5189986989274925e-06, "loss": 0.4525, "step": 10908 }, { "epoch": 1.7915751442120174, "grad_norm": 0.3301936968659294, "learning_rate": 6.518579567826821e-06, "loss": 0.4729, "step": 10909 }, { "epoch": 1.79173937142036, "grad_norm": 0.38417340767056996, "learning_rate": 6.518160414802386e-06, "loss": 0.4485, "step": 10910 }, { "epoch": 1.7919035986287029, "grad_norm": 0.344684065145805, "learning_rate": 6.517741239858746e-06, "loss": 0.4345, "step": 10911 }, { "epoch": 1.7920678258370457, "grad_norm": 0.2605300283061335, "learning_rate": 6.5173220430004505e-06, "loss": 0.4607, "step": 10912 }, { "epoch": 1.7922320530453884, "grad_norm": 0.2659233305573, "learning_rate": 6.5169028242320535e-06, "loss": 0.4444, "step": 10913 }, { "epoch": 1.792396280253731, "grad_norm": 0.4184644221606819, "learning_rate": 6.516483583558105e-06, "loss": 0.4402, "step": 10914 }, { "epoch": 1.7925605074620736, "grad_norm": 0.3084055212966688, "learning_rate": 6.5160643209831625e-06, "loss": 0.4519, "step": 10915 }, { "epoch": 1.7927247346704165, "grad_norm": 0.31404721713067635, "learning_rate": 6.5156450365117775e-06, "loss": 0.4551, "step": 10916 }, { "epoch": 1.7928889618787593, "grad_norm": 0.3102731776923771, "learning_rate": 6.515225730148504e-06, "loss": 0.4377, "step": 10917 }, { "epoch": 1.793053189087102, "grad_norm": 0.2839548794420706, "learning_rate": 6.514806401897898e-06, "loss": 0.457, "step": 10918 }, { "epoch": 1.7932174162954446, "grad_norm": 0.3763346060238748, "learning_rate": 6.51438705176451e-06, "loss": 0.4337, "step": 10919 }, { "epoch": 1.7933816435037875, "grad_norm": 0.3949426628078843, "learning_rate": 6.513967679752898e-06, "loss": 0.4731, "step": 10920 }, { "epoch": 1.7935458707121303, "grad_norm": 0.3935792482843286, "learning_rate": 6.513548285867615e-06, "loss": 0.4436, "step": 10921 }, { "epoch": 1.793710097920473, "grad_norm": 0.315815220534042, "learning_rate": 6.513128870113217e-06, "loss": 0.4442, "step": 10922 }, { "epoch": 1.7938743251288156, "grad_norm": 0.34375092404008345, "learning_rate": 6.51270943249426e-06, "loss": 0.4646, "step": 10923 }, { "epoch": 1.7940385523371585, "grad_norm": 0.31256531427721074, "learning_rate": 6.512289973015296e-06, "loss": 0.4668, "step": 10924 }, { "epoch": 1.7942027795455013, "grad_norm": 0.37261118367690316, "learning_rate": 6.511870491680884e-06, "loss": 0.449, "step": 10925 }, { "epoch": 1.794367006753844, "grad_norm": 0.3237806057691401, "learning_rate": 6.511450988495579e-06, "loss": 0.4706, "step": 10926 }, { "epoch": 1.7945312339621866, "grad_norm": 0.3194253833424651, "learning_rate": 6.511031463463938e-06, "loss": 0.4614, "step": 10927 }, { "epoch": 1.7946954611705295, "grad_norm": 0.32585747743809507, "learning_rate": 6.510611916590516e-06, "loss": 0.4661, "step": 10928 }, { "epoch": 1.7948596883788723, "grad_norm": 0.3586057403191899, "learning_rate": 6.51019234787987e-06, "loss": 0.4613, "step": 10929 }, { "epoch": 1.795023915587215, "grad_norm": 0.346056732069319, "learning_rate": 6.5097727573365585e-06, "loss": 0.4798, "step": 10930 }, { "epoch": 1.7951881427955576, "grad_norm": 0.3126585681021275, "learning_rate": 6.509353144965137e-06, "loss": 0.4404, "step": 10931 }, { "epoch": 1.7953523700039002, "grad_norm": 0.3535289870550478, "learning_rate": 6.508933510770163e-06, "loss": 0.4505, "step": 10932 }, { "epoch": 1.795516597212243, "grad_norm": 0.3555341152205804, "learning_rate": 6.508513854756194e-06, "loss": 0.4561, "step": 10933 }, { "epoch": 1.795680824420586, "grad_norm": 0.29507904491048875, "learning_rate": 6.5080941769277895e-06, "loss": 0.4435, "step": 10934 }, { "epoch": 1.7958450516289286, "grad_norm": 0.5882288729640641, "learning_rate": 6.5076744772895066e-06, "loss": 0.456, "step": 10935 }, { "epoch": 1.7960092788372712, "grad_norm": 0.45412600871907816, "learning_rate": 6.507254755845903e-06, "loss": 0.4564, "step": 10936 }, { "epoch": 1.796173506045614, "grad_norm": 0.39575488914845575, "learning_rate": 6.506835012601538e-06, "loss": 0.4669, "step": 10937 }, { "epoch": 1.796337733253957, "grad_norm": 0.48392008032248435, "learning_rate": 6.50641524756097e-06, "loss": 0.4518, "step": 10938 }, { "epoch": 1.7965019604622996, "grad_norm": 0.5955498416084375, "learning_rate": 6.505995460728759e-06, "loss": 0.4419, "step": 10939 }, { "epoch": 1.7966661876706422, "grad_norm": 0.32689911038398983, "learning_rate": 6.505575652109464e-06, "loss": 0.4531, "step": 10940 }, { "epoch": 1.796830414878985, "grad_norm": 0.34195713441440434, "learning_rate": 6.505155821707642e-06, "loss": 0.4402, "step": 10941 }, { "epoch": 1.796994642087328, "grad_norm": 0.2566054657671745, "learning_rate": 6.504735969527858e-06, "loss": 0.4404, "step": 10942 }, { "epoch": 1.7971588692956706, "grad_norm": 0.32830307058291525, "learning_rate": 6.504316095574668e-06, "loss": 0.4713, "step": 10943 }, { "epoch": 1.7973230965040132, "grad_norm": 0.2556867194390802, "learning_rate": 6.503896199852632e-06, "loss": 0.447, "step": 10944 }, { "epoch": 1.797487323712356, "grad_norm": 0.33439323742377086, "learning_rate": 6.503476282366313e-06, "loss": 0.4523, "step": 10945 }, { "epoch": 1.797651550920699, "grad_norm": 0.369790100012626, "learning_rate": 6.50305634312027e-06, "loss": 0.4555, "step": 10946 }, { "epoch": 1.7978157781290416, "grad_norm": 0.31309996544936336, "learning_rate": 6.502636382119064e-06, "loss": 0.4618, "step": 10947 }, { "epoch": 1.7979800053373842, "grad_norm": 0.28018315473575545, "learning_rate": 6.5022163993672575e-06, "loss": 0.47, "step": 10948 }, { "epoch": 1.7981442325457269, "grad_norm": 0.33690209073758887, "learning_rate": 6.5017963948694094e-06, "loss": 0.4501, "step": 10949 }, { "epoch": 1.7983084597540697, "grad_norm": 0.4252119518199385, "learning_rate": 6.501376368630083e-06, "loss": 0.4494, "step": 10950 }, { "epoch": 1.7984726869624126, "grad_norm": 0.3752736500074051, "learning_rate": 6.5009563206538426e-06, "loss": 0.4607, "step": 10951 }, { "epoch": 1.7986369141707552, "grad_norm": 0.35097152026242395, "learning_rate": 6.500536250945247e-06, "loss": 0.4517, "step": 10952 }, { "epoch": 1.7988011413790979, "grad_norm": 0.3815338566089623, "learning_rate": 6.500116159508858e-06, "loss": 0.4392, "step": 10953 }, { "epoch": 1.7989653685874407, "grad_norm": 0.47223690095875387, "learning_rate": 6.49969604634924e-06, "loss": 0.4736, "step": 10954 }, { "epoch": 1.7991295957957836, "grad_norm": 0.35101255145850957, "learning_rate": 6.499275911470957e-06, "loss": 0.4562, "step": 10955 }, { "epoch": 1.7992938230041262, "grad_norm": 0.6402324900212448, "learning_rate": 6.498855754878569e-06, "loss": 0.4366, "step": 10956 }, { "epoch": 1.7994580502124689, "grad_norm": 0.3048648173075826, "learning_rate": 6.498435576576641e-06, "loss": 0.4421, "step": 10957 }, { "epoch": 1.7996222774208117, "grad_norm": 0.34111976311589354, "learning_rate": 6.498015376569737e-06, "loss": 0.4375, "step": 10958 }, { "epoch": 1.7997865046291546, "grad_norm": 0.30784589349110625, "learning_rate": 6.49759515486242e-06, "loss": 0.4567, "step": 10959 }, { "epoch": 1.7999507318374972, "grad_norm": 0.31297748416124943, "learning_rate": 6.497174911459255e-06, "loss": 0.4631, "step": 10960 }, { "epoch": 1.8001149590458398, "grad_norm": 0.3201823680748511, "learning_rate": 6.496754646364805e-06, "loss": 0.4813, "step": 10961 }, { "epoch": 1.8002791862541827, "grad_norm": 0.2898450135675172, "learning_rate": 6.496334359583635e-06, "loss": 0.4512, "step": 10962 }, { "epoch": 1.8004434134625256, "grad_norm": 0.3354166298240835, "learning_rate": 6.4959140511203085e-06, "loss": 0.4553, "step": 10963 }, { "epoch": 1.8006076406708682, "grad_norm": 0.3826613416674712, "learning_rate": 6.495493720979394e-06, "loss": 0.4511, "step": 10964 }, { "epoch": 1.8007718678792108, "grad_norm": 0.31587237057764445, "learning_rate": 6.495073369165452e-06, "loss": 0.4801, "step": 10965 }, { "epoch": 1.8009360950875535, "grad_norm": 0.3074217908036942, "learning_rate": 6.494652995683053e-06, "loss": 0.4202, "step": 10966 }, { "epoch": 1.8011003222958963, "grad_norm": 0.3223590332544111, "learning_rate": 6.494232600536757e-06, "loss": 0.4732, "step": 10967 }, { "epoch": 1.8012645495042392, "grad_norm": 0.34342528444048104, "learning_rate": 6.493812183731135e-06, "loss": 0.4386, "step": 10968 }, { "epoch": 1.8014287767125818, "grad_norm": 0.28160135914564627, "learning_rate": 6.49339174527075e-06, "loss": 0.428, "step": 10969 }, { "epoch": 1.8015930039209245, "grad_norm": 0.32430009941633803, "learning_rate": 6.492971285160169e-06, "loss": 0.4391, "step": 10970 }, { "epoch": 1.8017572311292673, "grad_norm": 0.364125940598261, "learning_rate": 6.492550803403962e-06, "loss": 0.4454, "step": 10971 }, { "epoch": 1.8019214583376102, "grad_norm": 0.27861356365010376, "learning_rate": 6.492130300006691e-06, "loss": 0.4506, "step": 10972 }, { "epoch": 1.8020856855459528, "grad_norm": 0.29793446395521767, "learning_rate": 6.491709774972923e-06, "loss": 0.4528, "step": 10973 }, { "epoch": 1.8022499127542955, "grad_norm": 0.30617337367580927, "learning_rate": 6.491289228307229e-06, "loss": 0.4604, "step": 10974 }, { "epoch": 1.8024141399626383, "grad_norm": 0.35553986727454073, "learning_rate": 6.490868660014175e-06, "loss": 0.4618, "step": 10975 }, { "epoch": 1.8025783671709812, "grad_norm": 0.43792944372602643, "learning_rate": 6.49044807009833e-06, "loss": 0.444, "step": 10976 }, { "epoch": 1.8027425943793238, "grad_norm": 0.309443097946476, "learning_rate": 6.490027458564258e-06, "loss": 0.4633, "step": 10977 }, { "epoch": 1.8029068215876665, "grad_norm": 0.29035938841687275, "learning_rate": 6.489606825416531e-06, "loss": 0.4657, "step": 10978 }, { "epoch": 1.8030710487960093, "grad_norm": 0.38280437482755186, "learning_rate": 6.489186170659715e-06, "loss": 0.4412, "step": 10979 }, { "epoch": 1.8032352760043522, "grad_norm": 0.3268712043393675, "learning_rate": 6.488765494298382e-06, "loss": 0.461, "step": 10980 }, { "epoch": 1.8033995032126948, "grad_norm": 0.5975993158922271, "learning_rate": 6.488344796337099e-06, "loss": 0.4575, "step": 10981 }, { "epoch": 1.8035637304210375, "grad_norm": 0.28730055670229004, "learning_rate": 6.487924076780434e-06, "loss": 0.4605, "step": 10982 }, { "epoch": 1.80372795762938, "grad_norm": 0.3067576354459948, "learning_rate": 6.487503335632958e-06, "loss": 0.4579, "step": 10983 }, { "epoch": 1.803892184837723, "grad_norm": 0.3178578324928383, "learning_rate": 6.48708257289924e-06, "loss": 0.4553, "step": 10984 }, { "epoch": 1.8040564120460658, "grad_norm": 0.31296466092563197, "learning_rate": 6.486661788583851e-06, "loss": 0.4701, "step": 10985 }, { "epoch": 1.8042206392544085, "grad_norm": 0.27868046588972933, "learning_rate": 6.4862409826913615e-06, "loss": 0.4234, "step": 10986 }, { "epoch": 1.804384866462751, "grad_norm": 0.2894081748376271, "learning_rate": 6.485820155226339e-06, "loss": 0.4575, "step": 10987 }, { "epoch": 1.804549093671094, "grad_norm": 0.3840278851261531, "learning_rate": 6.485399306193356e-06, "loss": 0.4688, "step": 10988 }, { "epoch": 1.8047133208794368, "grad_norm": 0.35329064565706103, "learning_rate": 6.484978435596983e-06, "loss": 0.4492, "step": 10989 }, { "epoch": 1.8048775480877794, "grad_norm": 0.2666745016209278, "learning_rate": 6.484557543441792e-06, "loss": 0.451, "step": 10990 }, { "epoch": 1.805041775296122, "grad_norm": 0.2727701044292131, "learning_rate": 6.484136629732354e-06, "loss": 0.4699, "step": 10991 }, { "epoch": 1.805206002504465, "grad_norm": 0.2876975201051365, "learning_rate": 6.483715694473239e-06, "loss": 0.452, "step": 10992 }, { "epoch": 1.8053702297128078, "grad_norm": 0.30486164251675607, "learning_rate": 6.483294737669021e-06, "loss": 0.4638, "step": 10993 }, { "epoch": 1.8055344569211504, "grad_norm": 0.3242566132554888, "learning_rate": 6.482873759324268e-06, "loss": 0.4656, "step": 10994 }, { "epoch": 1.805698684129493, "grad_norm": 0.27753825968593376, "learning_rate": 6.4824527594435586e-06, "loss": 0.4425, "step": 10995 }, { "epoch": 1.805862911337836, "grad_norm": 0.32392349033908413, "learning_rate": 6.48203173803146e-06, "loss": 0.4592, "step": 10996 }, { "epoch": 1.8060271385461788, "grad_norm": 0.2700411488707498, "learning_rate": 6.481610695092547e-06, "loss": 0.4622, "step": 10997 }, { "epoch": 1.8061913657545214, "grad_norm": 0.3150824644921173, "learning_rate": 6.481189630631392e-06, "loss": 0.4466, "step": 10998 }, { "epoch": 1.806355592962864, "grad_norm": 0.4169412074763378, "learning_rate": 6.480768544652569e-06, "loss": 0.4512, "step": 10999 }, { "epoch": 1.8065198201712067, "grad_norm": 0.33008365407804763, "learning_rate": 6.48034743716065e-06, "loss": 0.4525, "step": 11000 }, { "epoch": 1.8066840473795496, "grad_norm": 0.3184161468360843, "learning_rate": 6.479926308160211e-06, "loss": 0.4638, "step": 11001 }, { "epoch": 1.8068482745878924, "grad_norm": 0.2894848075749476, "learning_rate": 6.479505157655822e-06, "loss": 0.443, "step": 11002 }, { "epoch": 1.807012501796235, "grad_norm": 0.319981318393346, "learning_rate": 6.4790839856520605e-06, "loss": 0.4473, "step": 11003 }, { "epoch": 1.8071767290045777, "grad_norm": 0.3540499062083888, "learning_rate": 6.4786627921534985e-06, "loss": 0.4304, "step": 11004 }, { "epoch": 1.8073409562129206, "grad_norm": 0.294134878699911, "learning_rate": 6.4782415771647145e-06, "loss": 0.4544, "step": 11005 }, { "epoch": 1.8075051834212634, "grad_norm": 0.4010107953685391, "learning_rate": 6.47782034069028e-06, "loss": 0.4226, "step": 11006 }, { "epoch": 1.807669410629606, "grad_norm": 0.3958453380626719, "learning_rate": 6.477399082734769e-06, "loss": 0.4563, "step": 11007 }, { "epoch": 1.8078336378379487, "grad_norm": 0.49181283823773797, "learning_rate": 6.476977803302758e-06, "loss": 0.4549, "step": 11008 }, { "epoch": 1.8079978650462916, "grad_norm": 0.32707082518126557, "learning_rate": 6.476556502398825e-06, "loss": 0.437, "step": 11009 }, { "epoch": 1.8081620922546344, "grad_norm": 0.32247170989748514, "learning_rate": 6.476135180027544e-06, "loss": 0.4545, "step": 11010 }, { "epoch": 1.808326319462977, "grad_norm": 0.2571814591641584, "learning_rate": 6.47571383619349e-06, "loss": 0.4704, "step": 11011 }, { "epoch": 1.8084905466713197, "grad_norm": 0.27433283480124443, "learning_rate": 6.4752924709012385e-06, "loss": 0.4397, "step": 11012 }, { "epoch": 1.8086547738796626, "grad_norm": 0.30296470989492685, "learning_rate": 6.474871084155368e-06, "loss": 0.4827, "step": 11013 }, { "epoch": 1.8088190010880054, "grad_norm": 0.329768978817267, "learning_rate": 6.474449675960455e-06, "loss": 0.4383, "step": 11014 }, { "epoch": 1.808983228296348, "grad_norm": 0.3012850081596831, "learning_rate": 6.474028246321077e-06, "loss": 0.4354, "step": 11015 }, { "epoch": 1.8091474555046907, "grad_norm": 0.3940716281617523, "learning_rate": 6.47360679524181e-06, "loss": 0.4473, "step": 11016 }, { "epoch": 1.8093116827130333, "grad_norm": 0.29057796041348144, "learning_rate": 6.473185322727228e-06, "loss": 0.456, "step": 11017 }, { "epoch": 1.8094759099213762, "grad_norm": 0.3647116420745295, "learning_rate": 6.472763828781916e-06, "loss": 0.4429, "step": 11018 }, { "epoch": 1.809640137129719, "grad_norm": 0.4227525053223582, "learning_rate": 6.472342313410446e-06, "loss": 0.4582, "step": 11019 }, { "epoch": 1.8098043643380617, "grad_norm": 0.3717185423227278, "learning_rate": 6.471920776617399e-06, "loss": 0.4441, "step": 11020 }, { "epoch": 1.8099685915464043, "grad_norm": 0.27670914914335104, "learning_rate": 6.471499218407351e-06, "loss": 0.4658, "step": 11021 }, { "epoch": 1.8101328187547472, "grad_norm": 0.34867878518606726, "learning_rate": 6.471077638784882e-06, "loss": 0.4565, "step": 11022 }, { "epoch": 1.81029704596309, "grad_norm": 0.27587572155017304, "learning_rate": 6.470656037754571e-06, "loss": 0.4582, "step": 11023 }, { "epoch": 1.8104612731714327, "grad_norm": 0.7304768832794305, "learning_rate": 6.470234415320997e-06, "loss": 0.4771, "step": 11024 }, { "epoch": 1.8106255003797753, "grad_norm": 0.3823763993220649, "learning_rate": 6.469812771488737e-06, "loss": 0.4675, "step": 11025 }, { "epoch": 1.8107897275881182, "grad_norm": 0.333588611062913, "learning_rate": 6.469391106262375e-06, "loss": 0.4481, "step": 11026 }, { "epoch": 1.810953954796461, "grad_norm": 0.3375432724883843, "learning_rate": 6.468969419646486e-06, "loss": 0.4603, "step": 11027 }, { "epoch": 1.8111181820048037, "grad_norm": 0.3195629064261066, "learning_rate": 6.468547711645652e-06, "loss": 0.4542, "step": 11028 }, { "epoch": 1.8112824092131463, "grad_norm": 0.28394558610822923, "learning_rate": 6.468125982264454e-06, "loss": 0.4513, "step": 11029 }, { "epoch": 1.8114466364214892, "grad_norm": 0.33674311931385514, "learning_rate": 6.4677042315074715e-06, "loss": 0.4662, "step": 11030 }, { "epoch": 1.811610863629832, "grad_norm": 0.38153871812642226, "learning_rate": 6.4672824593792835e-06, "loss": 0.4618, "step": 11031 }, { "epoch": 1.8117750908381747, "grad_norm": 0.30354053866531616, "learning_rate": 6.466860665884473e-06, "loss": 0.4538, "step": 11032 }, { "epoch": 1.8119393180465173, "grad_norm": 0.3405080385317878, "learning_rate": 6.466438851027622e-06, "loss": 0.4433, "step": 11033 }, { "epoch": 1.81210354525486, "grad_norm": 0.29623350679986754, "learning_rate": 6.46601701481331e-06, "loss": 0.4314, "step": 11034 }, { "epoch": 1.8122677724632028, "grad_norm": 0.2935779674611224, "learning_rate": 6.465595157246118e-06, "loss": 0.4527, "step": 11035 }, { "epoch": 1.8124319996715457, "grad_norm": 0.3068991690315416, "learning_rate": 6.4651732783306285e-06, "loss": 0.4497, "step": 11036 }, { "epoch": 1.8125962268798883, "grad_norm": 0.40453061082958175, "learning_rate": 6.464751378071424e-06, "loss": 0.4602, "step": 11037 }, { "epoch": 1.812760454088231, "grad_norm": 0.31945911053068904, "learning_rate": 6.464329456473086e-06, "loss": 0.464, "step": 11038 }, { "epoch": 1.8129246812965738, "grad_norm": 0.311919813694301, "learning_rate": 6.4639075135402e-06, "loss": 0.4706, "step": 11039 }, { "epoch": 1.8130889085049167, "grad_norm": 0.37696971232973997, "learning_rate": 6.463485549277343e-06, "loss": 0.4397, "step": 11040 }, { "epoch": 1.8132531357132593, "grad_norm": 0.3312339585677867, "learning_rate": 6.463063563689103e-06, "loss": 0.4468, "step": 11041 }, { "epoch": 1.813417362921602, "grad_norm": 1.0677719619640302, "learning_rate": 6.46264155678006e-06, "loss": 0.4389, "step": 11042 }, { "epoch": 1.8135815901299448, "grad_norm": 0.3297430212323639, "learning_rate": 6.4622195285548e-06, "loss": 0.4531, "step": 11043 }, { "epoch": 1.8137458173382877, "grad_norm": 0.2958232682654792, "learning_rate": 6.461797479017906e-06, "loss": 0.4471, "step": 11044 }, { "epoch": 1.8139100445466303, "grad_norm": 0.327250642966852, "learning_rate": 6.46137540817396e-06, "loss": 0.4401, "step": 11045 }, { "epoch": 1.814074271754973, "grad_norm": 0.4175480266474759, "learning_rate": 6.4609533160275465e-06, "loss": 0.4378, "step": 11046 }, { "epoch": 1.8142384989633158, "grad_norm": 0.38417408251346336, "learning_rate": 6.460531202583252e-06, "loss": 0.4641, "step": 11047 }, { "epoch": 1.8144027261716587, "grad_norm": 1.088990431170157, "learning_rate": 6.460109067845658e-06, "loss": 0.4307, "step": 11048 }, { "epoch": 1.8145669533800013, "grad_norm": 0.4952726764230832, "learning_rate": 6.459686911819353e-06, "loss": 0.4457, "step": 11049 }, { "epoch": 1.814731180588344, "grad_norm": 0.31313614766898384, "learning_rate": 6.45926473450892e-06, "loss": 0.4671, "step": 11050 }, { "epoch": 1.8148954077966866, "grad_norm": 0.3303499375852876, "learning_rate": 6.458842535918944e-06, "loss": 0.4383, "step": 11051 }, { "epoch": 1.8150596350050294, "grad_norm": 0.29281553354836926, "learning_rate": 6.4584203160540105e-06, "loss": 0.4327, "step": 11052 }, { "epoch": 1.8152238622133723, "grad_norm": 0.38270445224557276, "learning_rate": 6.457998074918705e-06, "loss": 0.4503, "step": 11053 }, { "epoch": 1.815388089421715, "grad_norm": 0.4228231426560049, "learning_rate": 6.457575812517615e-06, "loss": 0.4482, "step": 11054 }, { "epoch": 1.8155523166300576, "grad_norm": 0.34167907967027655, "learning_rate": 6.457153528855325e-06, "loss": 0.4512, "step": 11055 }, { "epoch": 1.8157165438384004, "grad_norm": 0.41697598086361165, "learning_rate": 6.456731223936423e-06, "loss": 0.4805, "step": 11056 }, { "epoch": 1.8158807710467433, "grad_norm": 0.3569020023794603, "learning_rate": 6.456308897765494e-06, "loss": 0.4532, "step": 11057 }, { "epoch": 1.816044998255086, "grad_norm": 0.3618170825730565, "learning_rate": 6.455886550347127e-06, "loss": 0.4494, "step": 11058 }, { "epoch": 1.8162092254634286, "grad_norm": 0.3233994158463805, "learning_rate": 6.455464181685904e-06, "loss": 0.4636, "step": 11059 }, { "epoch": 1.8163734526717714, "grad_norm": 0.2949245076583048, "learning_rate": 6.45504179178642e-06, "loss": 0.4542, "step": 11060 }, { "epoch": 1.8165376798801143, "grad_norm": 0.3139588667284323, "learning_rate": 6.454619380653257e-06, "loss": 0.4656, "step": 11061 }, { "epoch": 1.816701907088457, "grad_norm": 0.29795405508359124, "learning_rate": 6.4541969482910044e-06, "loss": 0.4467, "step": 11062 }, { "epoch": 1.8168661342967996, "grad_norm": 0.3168979315435727, "learning_rate": 6.453774494704251e-06, "loss": 0.4384, "step": 11063 }, { "epoch": 1.8170303615051424, "grad_norm": 0.32749872421298704, "learning_rate": 6.453352019897584e-06, "loss": 0.4554, "step": 11064 }, { "epoch": 1.8171945887134853, "grad_norm": 0.42556222068988175, "learning_rate": 6.452929523875592e-06, "loss": 0.4552, "step": 11065 }, { "epoch": 1.817358815921828, "grad_norm": 0.44368552613487633, "learning_rate": 6.452507006642863e-06, "loss": 0.4534, "step": 11066 }, { "epoch": 1.8175230431301705, "grad_norm": 0.3223174577268706, "learning_rate": 6.452084468203988e-06, "loss": 0.4479, "step": 11067 }, { "epoch": 1.8176872703385132, "grad_norm": 0.33800164745732303, "learning_rate": 6.4516619085635555e-06, "loss": 0.4734, "step": 11068 }, { "epoch": 1.817851497546856, "grad_norm": 0.3452294395819774, "learning_rate": 6.451239327726155e-06, "loss": 0.4517, "step": 11069 }, { "epoch": 1.818015724755199, "grad_norm": 0.4589685077156546, "learning_rate": 6.4508167256963735e-06, "loss": 0.4278, "step": 11070 }, { "epoch": 1.8181799519635415, "grad_norm": 0.4040673809335704, "learning_rate": 6.450394102478804e-06, "loss": 0.4348, "step": 11071 }, { "epoch": 1.8183441791718842, "grad_norm": 0.3518608635195822, "learning_rate": 6.449971458078036e-06, "loss": 0.4628, "step": 11072 }, { "epoch": 1.818508406380227, "grad_norm": 0.40018200265748566, "learning_rate": 6.44954879249866e-06, "loss": 0.432, "step": 11073 }, { "epoch": 1.81867263358857, "grad_norm": 0.3737532515019579, "learning_rate": 6.4491261057452644e-06, "loss": 0.4505, "step": 11074 }, { "epoch": 1.8188368607969125, "grad_norm": 0.31055544483008213, "learning_rate": 6.448703397822442e-06, "loss": 0.4481, "step": 11075 }, { "epoch": 1.8190010880052552, "grad_norm": 0.3225928339655794, "learning_rate": 6.448280668734785e-06, "loss": 0.463, "step": 11076 }, { "epoch": 1.819165315213598, "grad_norm": 0.3259872807174308, "learning_rate": 6.447857918486881e-06, "loss": 0.4619, "step": 11077 }, { "epoch": 1.819329542421941, "grad_norm": 0.3702360876886552, "learning_rate": 6.447435147083326e-06, "loss": 0.4555, "step": 11078 }, { "epoch": 1.8194937696302835, "grad_norm": 0.3691408949553295, "learning_rate": 6.447012354528708e-06, "loss": 0.4432, "step": 11079 }, { "epoch": 1.8196579968386262, "grad_norm": 0.3041321344097473, "learning_rate": 6.446589540827619e-06, "loss": 0.4201, "step": 11080 }, { "epoch": 1.819822224046969, "grad_norm": 0.30894935051624056, "learning_rate": 6.446166705984654e-06, "loss": 0.4367, "step": 11081 }, { "epoch": 1.8199864512553119, "grad_norm": 0.3979234912372484, "learning_rate": 6.4457438500044025e-06, "loss": 0.4442, "step": 11082 }, { "epoch": 1.8201506784636545, "grad_norm": 0.35321748362975847, "learning_rate": 6.44532097289146e-06, "loss": 0.4585, "step": 11083 }, { "epoch": 1.8203149056719972, "grad_norm": 0.32382149930434995, "learning_rate": 6.444898074650416e-06, "loss": 0.4504, "step": 11084 }, { "epoch": 1.8204791328803398, "grad_norm": 0.3905287570407914, "learning_rate": 6.444475155285867e-06, "loss": 0.4671, "step": 11085 }, { "epoch": 1.8206433600886827, "grad_norm": 0.3364174209242473, "learning_rate": 6.444052214802404e-06, "loss": 0.4514, "step": 11086 }, { "epoch": 1.8208075872970255, "grad_norm": 0.3132902251492512, "learning_rate": 6.443629253204621e-06, "loss": 0.4593, "step": 11087 }, { "epoch": 1.8209718145053682, "grad_norm": 0.31611007055962465, "learning_rate": 6.443206270497113e-06, "loss": 0.4289, "step": 11088 }, { "epoch": 1.8211360417137108, "grad_norm": 0.3183713282494217, "learning_rate": 6.4427832666844725e-06, "loss": 0.4557, "step": 11089 }, { "epoch": 1.8213002689220537, "grad_norm": 0.3550373150593315, "learning_rate": 6.442360241771294e-06, "loss": 0.4428, "step": 11090 }, { "epoch": 1.8214644961303965, "grad_norm": 0.32252481795074844, "learning_rate": 6.4419371957621726e-06, "loss": 0.4485, "step": 11091 }, { "epoch": 1.8216287233387392, "grad_norm": 0.31700873209102415, "learning_rate": 6.441514128661702e-06, "loss": 0.4577, "step": 11092 }, { "epoch": 1.8217929505470818, "grad_norm": 0.3785064320067957, "learning_rate": 6.44109104047448e-06, "loss": 0.4385, "step": 11093 }, { "epoch": 1.8219571777554247, "grad_norm": 0.29377408501971125, "learning_rate": 6.440667931205097e-06, "loss": 0.4443, "step": 11094 }, { "epoch": 1.8221214049637675, "grad_norm": 0.30032625362802456, "learning_rate": 6.440244800858152e-06, "loss": 0.4331, "step": 11095 }, { "epoch": 1.8222856321721101, "grad_norm": 0.3718303109112087, "learning_rate": 6.4398216494382386e-06, "loss": 0.4572, "step": 11096 }, { "epoch": 1.8224498593804528, "grad_norm": 0.2960251316220687, "learning_rate": 6.439398476949954e-06, "loss": 0.4621, "step": 11097 }, { "epoch": 1.8226140865887956, "grad_norm": 0.33123020320172913, "learning_rate": 6.438975283397895e-06, "loss": 0.4432, "step": 11098 }, { "epoch": 1.8227783137971385, "grad_norm": 0.2742503490676697, "learning_rate": 6.4385520687866554e-06, "loss": 0.4535, "step": 11099 }, { "epoch": 1.8229425410054811, "grad_norm": 0.2836794155117891, "learning_rate": 6.438128833120833e-06, "loss": 0.4517, "step": 11100 }, { "epoch": 1.8231067682138238, "grad_norm": 0.30579928885691016, "learning_rate": 6.437705576405025e-06, "loss": 0.4649, "step": 11101 }, { "epoch": 1.8232709954221664, "grad_norm": 0.3615411845518621, "learning_rate": 6.437282298643828e-06, "loss": 0.4617, "step": 11102 }, { "epoch": 1.8234352226305093, "grad_norm": 0.30059915648933905, "learning_rate": 6.43685899984184e-06, "loss": 0.4397, "step": 11103 }, { "epoch": 1.8235994498388521, "grad_norm": 0.9129523653870982, "learning_rate": 6.4364356800036555e-06, "loss": 0.4795, "step": 11104 }, { "epoch": 1.8237636770471948, "grad_norm": 0.4191096623708467, "learning_rate": 6.436012339133876e-06, "loss": 0.4281, "step": 11105 }, { "epoch": 1.8239279042555374, "grad_norm": 0.3097106124164847, "learning_rate": 6.435588977237098e-06, "loss": 0.4599, "step": 11106 }, { "epoch": 1.8240921314638803, "grad_norm": 0.25366945090881754, "learning_rate": 6.435165594317919e-06, "loss": 0.452, "step": 11107 }, { "epoch": 1.8242563586722231, "grad_norm": 0.42724123488796834, "learning_rate": 6.434742190380938e-06, "loss": 0.4489, "step": 11108 }, { "epoch": 1.8244205858805658, "grad_norm": 0.4470158326079735, "learning_rate": 6.4343187654307516e-06, "loss": 0.4586, "step": 11109 }, { "epoch": 1.8245848130889084, "grad_norm": 0.2843995188282995, "learning_rate": 6.4338953194719625e-06, "loss": 0.4499, "step": 11110 }, { "epoch": 1.8247490402972513, "grad_norm": 0.3535728345730193, "learning_rate": 6.433471852509166e-06, "loss": 0.4527, "step": 11111 }, { "epoch": 1.8249132675055941, "grad_norm": 0.390488775163116, "learning_rate": 6.433048364546963e-06, "loss": 0.4499, "step": 11112 }, { "epoch": 1.8250774947139368, "grad_norm": 0.2856644175201384, "learning_rate": 6.4326248555899535e-06, "loss": 0.4562, "step": 11113 }, { "epoch": 1.8252417219222794, "grad_norm": 0.25416212783117076, "learning_rate": 6.432201325642737e-06, "loss": 0.4399, "step": 11114 }, { "epoch": 1.8254059491306223, "grad_norm": 0.41194388605844706, "learning_rate": 6.431777774709912e-06, "loss": 0.4593, "step": 11115 }, { "epoch": 1.8255701763389651, "grad_norm": 0.3144676904089028, "learning_rate": 6.43135420279608e-06, "loss": 0.443, "step": 11116 }, { "epoch": 1.8257344035473078, "grad_norm": 0.393080464105788, "learning_rate": 6.430930609905842e-06, "loss": 0.4569, "step": 11117 }, { "epoch": 1.8258986307556504, "grad_norm": 0.33847400830923585, "learning_rate": 6.430506996043798e-06, "loss": 0.4589, "step": 11118 }, { "epoch": 1.826062857963993, "grad_norm": 0.36415414734893137, "learning_rate": 6.430083361214547e-06, "loss": 0.468, "step": 11119 }, { "epoch": 1.826227085172336, "grad_norm": 0.37557718876286633, "learning_rate": 6.429659705422693e-06, "loss": 0.4628, "step": 11120 }, { "epoch": 1.8263913123806788, "grad_norm": 0.32069328835487537, "learning_rate": 6.429236028672834e-06, "loss": 0.4346, "step": 11121 }, { "epoch": 1.8265555395890214, "grad_norm": 0.31544843970795344, "learning_rate": 6.428812330969576e-06, "loss": 0.4485, "step": 11122 }, { "epoch": 1.826719766797364, "grad_norm": 0.29920140008668483, "learning_rate": 6.428388612317519e-06, "loss": 0.4633, "step": 11123 }, { "epoch": 1.826883994005707, "grad_norm": 0.26314160528688985, "learning_rate": 6.427964872721262e-06, "loss": 0.4446, "step": 11124 }, { "epoch": 1.8270482212140498, "grad_norm": 0.3417200243928961, "learning_rate": 6.4275411121854095e-06, "loss": 0.447, "step": 11125 }, { "epoch": 1.8272124484223924, "grad_norm": 0.4288052637609021, "learning_rate": 6.427117330714566e-06, "loss": 0.4726, "step": 11126 }, { "epoch": 1.827376675630735, "grad_norm": 0.3291954670980573, "learning_rate": 6.426693528313333e-06, "loss": 0.4301, "step": 11127 }, { "epoch": 1.8275409028390779, "grad_norm": 0.3432703402034783, "learning_rate": 6.4262697049863106e-06, "loss": 0.4617, "step": 11128 }, { "epoch": 1.8277051300474207, "grad_norm": 0.31460182638244183, "learning_rate": 6.425845860738104e-06, "loss": 0.4361, "step": 11129 }, { "epoch": 1.8278693572557634, "grad_norm": 0.3356042150563821, "learning_rate": 6.4254219955733166e-06, "loss": 0.4502, "step": 11130 }, { "epoch": 1.828033584464106, "grad_norm": 0.3417134062143049, "learning_rate": 6.424998109496554e-06, "loss": 0.4591, "step": 11131 }, { "epoch": 1.8281978116724489, "grad_norm": 0.29443259361596996, "learning_rate": 6.4245742025124165e-06, "loss": 0.4615, "step": 11132 }, { "epoch": 1.8283620388807917, "grad_norm": 0.3027837712996636, "learning_rate": 6.424150274625509e-06, "loss": 0.4432, "step": 11133 }, { "epoch": 1.8285262660891344, "grad_norm": 0.3859255584228414, "learning_rate": 6.423726325840437e-06, "loss": 0.4525, "step": 11134 }, { "epoch": 1.828690493297477, "grad_norm": 0.8084532470537148, "learning_rate": 6.423302356161805e-06, "loss": 0.4585, "step": 11135 }, { "epoch": 1.8288547205058197, "grad_norm": 0.3222083942212579, "learning_rate": 6.422878365594217e-06, "loss": 0.4348, "step": 11136 }, { "epoch": 1.8290189477141625, "grad_norm": 0.5974350173854517, "learning_rate": 6.422454354142277e-06, "loss": 0.445, "step": 11137 }, { "epoch": 1.8291831749225054, "grad_norm": 0.48381322250594383, "learning_rate": 6.422030321810592e-06, "loss": 0.4752, "step": 11138 }, { "epoch": 1.829347402130848, "grad_norm": 0.43774133544125177, "learning_rate": 6.421606268603767e-06, "loss": 0.4676, "step": 11139 }, { "epoch": 1.8295116293391906, "grad_norm": 0.5109065743607105, "learning_rate": 6.421182194526407e-06, "loss": 0.4339, "step": 11140 }, { "epoch": 1.8296758565475335, "grad_norm": 0.2663502067197082, "learning_rate": 6.420758099583119e-06, "loss": 0.4548, "step": 11141 }, { "epoch": 1.8298400837558764, "grad_norm": 0.35162665093102086, "learning_rate": 6.420333983778507e-06, "loss": 0.4549, "step": 11142 }, { "epoch": 1.830004310964219, "grad_norm": 0.3582562471813268, "learning_rate": 6.419909847117179e-06, "loss": 0.4602, "step": 11143 }, { "epoch": 1.8301685381725616, "grad_norm": 0.3064404860661055, "learning_rate": 6.4194856896037416e-06, "loss": 0.4517, "step": 11144 }, { "epoch": 1.8303327653809045, "grad_norm": 0.3968100953955666, "learning_rate": 6.419061511242799e-06, "loss": 0.4455, "step": 11145 }, { "epoch": 1.8304969925892474, "grad_norm": 0.3523584283490642, "learning_rate": 6.418637312038963e-06, "loss": 0.4545, "step": 11146 }, { "epoch": 1.83066121979759, "grad_norm": 0.335856440378906, "learning_rate": 6.4182130919968375e-06, "loss": 0.4475, "step": 11147 }, { "epoch": 1.8308254470059326, "grad_norm": 0.3146367836178673, "learning_rate": 6.41778885112103e-06, "loss": 0.4426, "step": 11148 }, { "epoch": 1.8309896742142755, "grad_norm": 0.3653371520959425, "learning_rate": 6.417364589416148e-06, "loss": 0.45, "step": 11149 }, { "epoch": 1.8311539014226184, "grad_norm": 0.3708603837292037, "learning_rate": 6.4169403068868e-06, "loss": 0.4656, "step": 11150 }, { "epoch": 1.831318128630961, "grad_norm": 0.29649438936926636, "learning_rate": 6.416516003537597e-06, "loss": 0.4495, "step": 11151 }, { "epoch": 1.8314823558393036, "grad_norm": 0.3521235978642944, "learning_rate": 6.416091679373144e-06, "loss": 0.4578, "step": 11152 }, { "epoch": 1.8316465830476463, "grad_norm": 0.39789361916375526, "learning_rate": 6.415667334398047e-06, "loss": 0.4686, "step": 11153 }, { "epoch": 1.8318108102559891, "grad_norm": 3.7291295388929075, "learning_rate": 6.4152429686169195e-06, "loss": 0.4464, "step": 11154 }, { "epoch": 1.831975037464332, "grad_norm": 0.3062939593911645, "learning_rate": 6.414818582034371e-06, "loss": 0.4477, "step": 11155 }, { "epoch": 1.8321392646726746, "grad_norm": 0.42487906457102476, "learning_rate": 6.414394174655007e-06, "loss": 0.4378, "step": 11156 }, { "epoch": 1.8323034918810173, "grad_norm": 0.4173646928078844, "learning_rate": 6.413969746483439e-06, "loss": 0.4466, "step": 11157 }, { "epoch": 1.8324677190893601, "grad_norm": 0.29528763334595215, "learning_rate": 6.413545297524276e-06, "loss": 0.4338, "step": 11158 }, { "epoch": 1.832631946297703, "grad_norm": 0.3550556187269253, "learning_rate": 6.413120827782128e-06, "loss": 0.4294, "step": 11159 }, { "epoch": 1.8327961735060456, "grad_norm": 0.35767683689815905, "learning_rate": 6.412696337261608e-06, "loss": 0.458, "step": 11160 }, { "epoch": 1.8329604007143883, "grad_norm": 0.28119233715005026, "learning_rate": 6.412271825967322e-06, "loss": 0.4302, "step": 11161 }, { "epoch": 1.8331246279227311, "grad_norm": 0.4170109914341217, "learning_rate": 6.411847293903883e-06, "loss": 0.4465, "step": 11162 }, { "epoch": 1.833288855131074, "grad_norm": 0.34109494612960684, "learning_rate": 6.4114227410759004e-06, "loss": 0.4252, "step": 11163 }, { "epoch": 1.8334530823394166, "grad_norm": 0.3907746165563666, "learning_rate": 6.410998167487988e-06, "loss": 0.4729, "step": 11164 }, { "epoch": 1.8336173095477593, "grad_norm": 0.2953268045869606, "learning_rate": 6.410573573144754e-06, "loss": 0.4539, "step": 11165 }, { "epoch": 1.8337815367561021, "grad_norm": 0.3687647764909497, "learning_rate": 6.410148958050813e-06, "loss": 0.4563, "step": 11166 }, { "epoch": 1.833945763964445, "grad_norm": 0.49949614089435895, "learning_rate": 6.409724322210772e-06, "loss": 0.4785, "step": 11167 }, { "epoch": 1.8341099911727876, "grad_norm": 0.35319171075997546, "learning_rate": 6.4092996656292495e-06, "loss": 0.4475, "step": 11168 }, { "epoch": 1.8342742183811303, "grad_norm": 0.3578914249392386, "learning_rate": 6.408874988310852e-06, "loss": 0.4555, "step": 11169 }, { "epoch": 1.8344384455894729, "grad_norm": 0.5233504521881581, "learning_rate": 6.4084502902601946e-06, "loss": 0.4535, "step": 11170 }, { "epoch": 1.8346026727978157, "grad_norm": 0.3695067674128277, "learning_rate": 6.408025571481889e-06, "loss": 0.4489, "step": 11171 }, { "epoch": 1.8347669000061586, "grad_norm": 0.3542537195814928, "learning_rate": 6.407600831980548e-06, "loss": 0.444, "step": 11172 }, { "epoch": 1.8349311272145012, "grad_norm": 0.433248517239817, "learning_rate": 6.407176071760787e-06, "loss": 0.4562, "step": 11173 }, { "epoch": 1.8350953544228439, "grad_norm": 0.43227636924692336, "learning_rate": 6.406751290827214e-06, "loss": 0.4399, "step": 11174 }, { "epoch": 1.8352595816311867, "grad_norm": 0.43145471772441574, "learning_rate": 6.40632648918445e-06, "loss": 0.4457, "step": 11175 }, { "epoch": 1.8354238088395296, "grad_norm": 0.3710357940741841, "learning_rate": 6.405901666837102e-06, "loss": 0.4319, "step": 11176 }, { "epoch": 1.8355880360478722, "grad_norm": 0.3588178442046596, "learning_rate": 6.405476823789788e-06, "loss": 0.4713, "step": 11177 }, { "epoch": 1.8357522632562149, "grad_norm": 0.3383423727372397, "learning_rate": 6.4050519600471205e-06, "loss": 0.45, "step": 11178 }, { "epoch": 1.8359164904645577, "grad_norm": 0.3492514766969178, "learning_rate": 6.404627075613715e-06, "loss": 0.471, "step": 11179 }, { "epoch": 1.8360807176729006, "grad_norm": 0.41424669052303414, "learning_rate": 6.404202170494184e-06, "loss": 0.4609, "step": 11180 }, { "epoch": 1.8362449448812432, "grad_norm": 0.36182965770449854, "learning_rate": 6.403777244693146e-06, "loss": 0.4654, "step": 11181 }, { "epoch": 1.8364091720895859, "grad_norm": 0.3327741789191045, "learning_rate": 6.403352298215212e-06, "loss": 0.4618, "step": 11182 }, { "epoch": 1.8365733992979287, "grad_norm": 0.41298626199924565, "learning_rate": 6.402927331065001e-06, "loss": 0.4606, "step": 11183 }, { "epoch": 1.8367376265062716, "grad_norm": 0.2869633366868695, "learning_rate": 6.402502343247126e-06, "loss": 0.4676, "step": 11184 }, { "epoch": 1.8369018537146142, "grad_norm": 0.37976823478720945, "learning_rate": 6.402077334766204e-06, "loss": 0.4525, "step": 11185 }, { "epoch": 1.8370660809229569, "grad_norm": 0.41990835423104483, "learning_rate": 6.401652305626852e-06, "loss": 0.4548, "step": 11186 }, { "epoch": 1.8372303081312995, "grad_norm": 0.3411060115422886, "learning_rate": 6.401227255833683e-06, "loss": 0.4496, "step": 11187 }, { "epoch": 1.8373945353396424, "grad_norm": 1.1713248336301512, "learning_rate": 6.400802185391317e-06, "loss": 0.4754, "step": 11188 }, { "epoch": 1.8375587625479852, "grad_norm": 1.1536385664772004, "learning_rate": 6.4003770943043685e-06, "loss": 0.4581, "step": 11189 }, { "epoch": 1.8377229897563279, "grad_norm": 0.32862778514012486, "learning_rate": 6.399951982577456e-06, "loss": 0.4752, "step": 11190 }, { "epoch": 1.8378872169646705, "grad_norm": 0.47493385870865834, "learning_rate": 6.399526850215195e-06, "loss": 0.4555, "step": 11191 }, { "epoch": 1.8380514441730134, "grad_norm": 0.31223427158800315, "learning_rate": 6.399101697222202e-06, "loss": 0.4529, "step": 11192 }, { "epoch": 1.8382156713813562, "grad_norm": 0.31446797620165784, "learning_rate": 6.3986765236030975e-06, "loss": 0.4471, "step": 11193 }, { "epoch": 1.8383798985896989, "grad_norm": 0.44653437318643374, "learning_rate": 6.398251329362498e-06, "loss": 0.4879, "step": 11194 }, { "epoch": 1.8385441257980415, "grad_norm": 0.3569741883200403, "learning_rate": 6.397826114505022e-06, "loss": 0.4637, "step": 11195 }, { "epoch": 1.8387083530063844, "grad_norm": 0.2930468326068091, "learning_rate": 6.397400879035285e-06, "loss": 0.4349, "step": 11196 }, { "epoch": 1.8388725802147272, "grad_norm": 0.3441165187924981, "learning_rate": 6.3969756229579085e-06, "loss": 0.4466, "step": 11197 }, { "epoch": 1.8390368074230699, "grad_norm": 0.3680111685540521, "learning_rate": 6.396550346277512e-06, "loss": 0.4433, "step": 11198 }, { "epoch": 1.8392010346314125, "grad_norm": 0.4387326278721702, "learning_rate": 6.396125048998711e-06, "loss": 0.4499, "step": 11199 }, { "epoch": 1.8393652618397553, "grad_norm": 0.29303807948267785, "learning_rate": 6.395699731126128e-06, "loss": 0.4655, "step": 11200 }, { "epoch": 1.8395294890480982, "grad_norm": 0.535697689652598, "learning_rate": 6.3952743926643795e-06, "loss": 0.4374, "step": 11201 }, { "epoch": 1.8396937162564408, "grad_norm": 0.3118572177188081, "learning_rate": 6.394849033618087e-06, "loss": 0.4725, "step": 11202 }, { "epoch": 1.8398579434647835, "grad_norm": 0.3140766818191945, "learning_rate": 6.394423653991869e-06, "loss": 0.4267, "step": 11203 }, { "epoch": 1.8400221706731261, "grad_norm": 0.37368035891343115, "learning_rate": 6.393998253790347e-06, "loss": 0.4639, "step": 11204 }, { "epoch": 1.840186397881469, "grad_norm": 0.2971672289149953, "learning_rate": 6.39357283301814e-06, "loss": 0.4433, "step": 11205 }, { "epoch": 1.8403506250898118, "grad_norm": 0.3390585276018315, "learning_rate": 6.3931473916798705e-06, "loss": 0.4781, "step": 11206 }, { "epoch": 1.8405148522981545, "grad_norm": 0.38748554649335143, "learning_rate": 6.3927219297801555e-06, "loss": 0.4494, "step": 11207 }, { "epoch": 1.8406790795064971, "grad_norm": 0.49663527474275104, "learning_rate": 6.39229644732362e-06, "loss": 0.4611, "step": 11208 }, { "epoch": 1.84084330671484, "grad_norm": 0.29911888732308106, "learning_rate": 6.391870944314882e-06, "loss": 0.4591, "step": 11209 }, { "epoch": 1.8410075339231828, "grad_norm": 0.40218304374728053, "learning_rate": 6.391445420758565e-06, "loss": 0.4715, "step": 11210 }, { "epoch": 1.8411717611315255, "grad_norm": 0.3619310185273555, "learning_rate": 6.39101987665929e-06, "loss": 0.4516, "step": 11211 }, { "epoch": 1.8413359883398681, "grad_norm": 0.4285679890613997, "learning_rate": 6.390594312021677e-06, "loss": 0.4587, "step": 11212 }, { "epoch": 1.841500215548211, "grad_norm": 0.2965380574492834, "learning_rate": 6.390168726850351e-06, "loss": 0.4338, "step": 11213 }, { "epoch": 1.8416644427565538, "grad_norm": 0.3859482067158745, "learning_rate": 6.3897431211499325e-06, "loss": 0.4529, "step": 11214 }, { "epoch": 1.8418286699648965, "grad_norm": 0.41682659190036797, "learning_rate": 6.389317494925046e-06, "loss": 0.4431, "step": 11215 }, { "epoch": 1.841992897173239, "grad_norm": 0.3187290191100235, "learning_rate": 6.388891848180311e-06, "loss": 0.4507, "step": 11216 }, { "epoch": 1.842157124381582, "grad_norm": 0.3103138081670325, "learning_rate": 6.388466180920351e-06, "loss": 0.4699, "step": 11217 }, { "epoch": 1.8423213515899248, "grad_norm": 0.4371948820010515, "learning_rate": 6.388040493149793e-06, "loss": 0.4592, "step": 11218 }, { "epoch": 1.8424855787982675, "grad_norm": 0.3792863854932938, "learning_rate": 6.387614784873257e-06, "loss": 0.4655, "step": 11219 }, { "epoch": 1.84264980600661, "grad_norm": 0.454009914635368, "learning_rate": 6.387189056095367e-06, "loss": 0.4589, "step": 11220 }, { "epoch": 1.8428140332149527, "grad_norm": 0.36236305614927683, "learning_rate": 6.386763306820746e-06, "loss": 0.4805, "step": 11221 }, { "epoch": 1.8429782604232956, "grad_norm": 0.32699570043919124, "learning_rate": 6.38633753705402e-06, "loss": 0.4348, "step": 11222 }, { "epoch": 1.8431424876316385, "grad_norm": 0.27557523057430056, "learning_rate": 6.385911746799812e-06, "loss": 0.4529, "step": 11223 }, { "epoch": 1.843306714839981, "grad_norm": 0.3092554373958321, "learning_rate": 6.385485936062749e-06, "loss": 0.4562, "step": 11224 }, { "epoch": 1.8434709420483237, "grad_norm": 0.36892198796982645, "learning_rate": 6.3850601048474516e-06, "loss": 0.4672, "step": 11225 }, { "epoch": 1.8436351692566666, "grad_norm": 0.2739936180230769, "learning_rate": 6.384634253158546e-06, "loss": 0.4637, "step": 11226 }, { "epoch": 1.8437993964650095, "grad_norm": 0.7730659708755419, "learning_rate": 6.38420838100066e-06, "loss": 0.4554, "step": 11227 }, { "epoch": 1.843963623673352, "grad_norm": 0.691066161259853, "learning_rate": 6.383782488378416e-06, "loss": 0.456, "step": 11228 }, { "epoch": 1.8441278508816947, "grad_norm": 0.3885199433170536, "learning_rate": 6.3833565752964415e-06, "loss": 0.4622, "step": 11229 }, { "epoch": 1.8442920780900376, "grad_norm": 0.3272844786118214, "learning_rate": 6.382930641759361e-06, "loss": 0.4454, "step": 11230 }, { "epoch": 1.8444563052983804, "grad_norm": 0.2924812894819204, "learning_rate": 6.382504687771804e-06, "loss": 0.4606, "step": 11231 }, { "epoch": 1.844620532506723, "grad_norm": 0.30384658501138234, "learning_rate": 6.382078713338391e-06, "loss": 0.4614, "step": 11232 }, { "epoch": 1.8447847597150657, "grad_norm": 0.2959473309078626, "learning_rate": 6.3816527184637514e-06, "loss": 0.4711, "step": 11233 }, { "epoch": 1.8449489869234086, "grad_norm": 0.3025842567454638, "learning_rate": 6.3812267031525125e-06, "loss": 0.4502, "step": 11234 }, { "epoch": 1.8451132141317512, "grad_norm": 0.2605784588380211, "learning_rate": 6.3808006674093015e-06, "loss": 0.4534, "step": 11235 }, { "epoch": 1.845277441340094, "grad_norm": 0.48953924584015085, "learning_rate": 6.380374611238743e-06, "loss": 0.4455, "step": 11236 }, { "epoch": 1.8454416685484367, "grad_norm": 0.629228985946868, "learning_rate": 6.3799485346454685e-06, "loss": 0.4351, "step": 11237 }, { "epoch": 1.8456058957567794, "grad_norm": 0.3061764968428075, "learning_rate": 6.379522437634102e-06, "loss": 0.4563, "step": 11238 }, { "epoch": 1.8457701229651222, "grad_norm": 1.4709802223407416, "learning_rate": 6.379096320209273e-06, "loss": 0.4539, "step": 11239 }, { "epoch": 1.845934350173465, "grad_norm": 0.323637509736383, "learning_rate": 6.378670182375609e-06, "loss": 0.451, "step": 11240 }, { "epoch": 1.8460985773818077, "grad_norm": 0.3057557907583729, "learning_rate": 6.3782440241377375e-06, "loss": 0.4581, "step": 11241 }, { "epoch": 1.8462628045901504, "grad_norm": 0.304889386260149, "learning_rate": 6.377817845500289e-06, "loss": 0.4325, "step": 11242 }, { "epoch": 1.8464270317984932, "grad_norm": 0.33035100993678235, "learning_rate": 6.377391646467891e-06, "loss": 0.4502, "step": 11243 }, { "epoch": 1.846591259006836, "grad_norm": 0.29746641796158035, "learning_rate": 6.376965427045173e-06, "loss": 0.4503, "step": 11244 }, { "epoch": 1.8467554862151787, "grad_norm": 0.32982674950063423, "learning_rate": 6.376539187236764e-06, "loss": 0.4692, "step": 11245 }, { "epoch": 1.8469197134235213, "grad_norm": 0.3033746048395144, "learning_rate": 6.376112927047292e-06, "loss": 0.456, "step": 11246 }, { "epoch": 1.8470839406318642, "grad_norm": 0.34344852873284026, "learning_rate": 6.375686646481388e-06, "loss": 0.4693, "step": 11247 }, { "epoch": 1.847248167840207, "grad_norm": 0.3152472718518401, "learning_rate": 6.375260345543683e-06, "loss": 0.4738, "step": 11248 }, { "epoch": 1.8474123950485497, "grad_norm": 0.34465837766764895, "learning_rate": 6.374834024238805e-06, "loss": 0.4589, "step": 11249 }, { "epoch": 1.8475766222568923, "grad_norm": 0.33303273260166083, "learning_rate": 6.374407682571384e-06, "loss": 0.481, "step": 11250 }, { "epoch": 1.8477408494652352, "grad_norm": 0.2974928118083777, "learning_rate": 6.373981320546051e-06, "loss": 0.4489, "step": 11251 }, { "epoch": 1.8479050766735778, "grad_norm": 0.3249645450729173, "learning_rate": 6.373554938167439e-06, "loss": 0.4445, "step": 11252 }, { "epoch": 1.8480693038819207, "grad_norm": 0.3307822690469634, "learning_rate": 6.373128535440177e-06, "loss": 0.471, "step": 11253 }, { "epoch": 1.8482335310902633, "grad_norm": 0.3783593787704891, "learning_rate": 6.372702112368894e-06, "loss": 0.4261, "step": 11254 }, { "epoch": 1.848397758298606, "grad_norm": 0.3394670412812821, "learning_rate": 6.372275668958225e-06, "loss": 0.472, "step": 11255 }, { "epoch": 1.8485619855069488, "grad_norm": 0.33822575181629483, "learning_rate": 6.371849205212801e-06, "loss": 0.4439, "step": 11256 }, { "epoch": 1.8487262127152917, "grad_norm": 0.3142075629376298, "learning_rate": 6.371422721137252e-06, "loss": 0.4464, "step": 11257 }, { "epoch": 1.8488904399236343, "grad_norm": 0.3342951923344951, "learning_rate": 6.370996216736211e-06, "loss": 0.4779, "step": 11258 }, { "epoch": 1.849054667131977, "grad_norm": 0.349350432116345, "learning_rate": 6.370569692014309e-06, "loss": 0.4233, "step": 11259 }, { "epoch": 1.8492188943403198, "grad_norm": 0.29426312829915247, "learning_rate": 6.370143146976182e-06, "loss": 0.4605, "step": 11260 }, { "epoch": 1.8493831215486627, "grad_norm": 0.6891572867343618, "learning_rate": 6.369716581626459e-06, "loss": 0.4539, "step": 11261 }, { "epoch": 1.8495473487570053, "grad_norm": 0.3566966922620718, "learning_rate": 6.3692899959697735e-06, "loss": 0.4473, "step": 11262 }, { "epoch": 1.849711575965348, "grad_norm": 0.3671286591601414, "learning_rate": 6.368863390010762e-06, "loss": 0.4626, "step": 11263 }, { "epoch": 1.8498758031736908, "grad_norm": 0.4516765235822067, "learning_rate": 6.368436763754055e-06, "loss": 0.4572, "step": 11264 }, { "epoch": 1.8500400303820337, "grad_norm": 0.2954919181729513, "learning_rate": 6.368010117204286e-06, "loss": 0.4393, "step": 11265 }, { "epoch": 1.8502042575903763, "grad_norm": 0.3493134392224947, "learning_rate": 6.367583450366087e-06, "loss": 0.4567, "step": 11266 }, { "epoch": 1.850368484798719, "grad_norm": 0.35872074525718806, "learning_rate": 6.367156763244097e-06, "loss": 0.4561, "step": 11267 }, { "epoch": 1.8505327120070618, "grad_norm": 0.42804427248337146, "learning_rate": 6.3667300558429475e-06, "loss": 0.4678, "step": 11268 }, { "epoch": 1.8506969392154045, "grad_norm": 0.39557124706471175, "learning_rate": 6.366303328167273e-06, "loss": 0.4527, "step": 11269 }, { "epoch": 1.8508611664237473, "grad_norm": 0.3645519450986868, "learning_rate": 6.365876580221706e-06, "loss": 0.4439, "step": 11270 }, { "epoch": 1.85102539363209, "grad_norm": 0.2611586402914713, "learning_rate": 6.365449812010884e-06, "loss": 0.4624, "step": 11271 }, { "epoch": 1.8511896208404326, "grad_norm": 0.4224856050672986, "learning_rate": 6.365023023539444e-06, "loss": 0.4262, "step": 11272 }, { "epoch": 1.8513538480487755, "grad_norm": 0.34426711144349553, "learning_rate": 6.364596214812018e-06, "loss": 0.4635, "step": 11273 }, { "epoch": 1.8515180752571183, "grad_norm": 0.33044232606364654, "learning_rate": 6.364169385833242e-06, "loss": 0.4413, "step": 11274 }, { "epoch": 1.851682302465461, "grad_norm": 0.3566030564790324, "learning_rate": 6.363742536607753e-06, "loss": 0.4657, "step": 11275 }, { "epoch": 1.8518465296738036, "grad_norm": 0.32664316500962876, "learning_rate": 6.363315667140185e-06, "loss": 0.4231, "step": 11276 }, { "epoch": 1.8520107568821464, "grad_norm": 0.3192712622806749, "learning_rate": 6.362888777435177e-06, "loss": 0.4429, "step": 11277 }, { "epoch": 1.8521749840904893, "grad_norm": 0.3483004909516952, "learning_rate": 6.362461867497364e-06, "loss": 0.4459, "step": 11278 }, { "epoch": 1.852339211298832, "grad_norm": 0.3238437276102331, "learning_rate": 6.362034937331382e-06, "loss": 0.4658, "step": 11279 }, { "epoch": 1.8525034385071746, "grad_norm": 0.4155190721399048, "learning_rate": 6.361607986941869e-06, "loss": 0.4594, "step": 11280 }, { "epoch": 1.8526676657155174, "grad_norm": 0.4259720186833352, "learning_rate": 6.361181016333462e-06, "loss": 0.4427, "step": 11281 }, { "epoch": 1.8528318929238603, "grad_norm": 0.39788743048437325, "learning_rate": 6.360754025510797e-06, "loss": 0.4389, "step": 11282 }, { "epoch": 1.852996120132203, "grad_norm": 0.4959653416913325, "learning_rate": 6.360327014478513e-06, "loss": 0.4683, "step": 11283 }, { "epoch": 1.8531603473405456, "grad_norm": 0.9145142089162646, "learning_rate": 6.359899983241248e-06, "loss": 0.4678, "step": 11284 }, { "epoch": 1.8533245745488884, "grad_norm": 0.34958711678494037, "learning_rate": 6.3594729318036395e-06, "loss": 0.4392, "step": 11285 }, { "epoch": 1.853488801757231, "grad_norm": 0.2915456832169805, "learning_rate": 6.3590458601703234e-06, "loss": 0.4396, "step": 11286 }, { "epoch": 1.853653028965574, "grad_norm": 0.3523839879611003, "learning_rate": 6.358618768345943e-06, "loss": 0.4452, "step": 11287 }, { "epoch": 1.8538172561739166, "grad_norm": 0.39252336971886814, "learning_rate": 6.358191656335133e-06, "loss": 0.4503, "step": 11288 }, { "epoch": 1.8539814833822592, "grad_norm": 0.29758768939411184, "learning_rate": 6.357764524142533e-06, "loss": 0.4371, "step": 11289 }, { "epoch": 1.854145710590602, "grad_norm": 0.2923988521358031, "learning_rate": 6.3573373717727814e-06, "loss": 0.4435, "step": 11290 }, { "epoch": 1.854309937798945, "grad_norm": 0.3797175874288087, "learning_rate": 6.35691019923052e-06, "loss": 0.4735, "step": 11291 }, { "epoch": 1.8544741650072876, "grad_norm": 0.3943571084660884, "learning_rate": 6.356483006520387e-06, "loss": 0.4385, "step": 11292 }, { "epoch": 1.8546383922156302, "grad_norm": 0.422371885454595, "learning_rate": 6.356055793647021e-06, "loss": 0.4549, "step": 11293 }, { "epoch": 1.854802619423973, "grad_norm": 0.2881492886554232, "learning_rate": 6.3556285606150645e-06, "loss": 0.4354, "step": 11294 }, { "epoch": 1.854966846632316, "grad_norm": 0.2895915499671857, "learning_rate": 6.355201307429155e-06, "loss": 0.4554, "step": 11295 }, { "epoch": 1.8551310738406586, "grad_norm": 0.27380290883797564, "learning_rate": 6.354774034093934e-06, "loss": 0.4457, "step": 11296 }, { "epoch": 1.8552953010490012, "grad_norm": 0.3123756640040171, "learning_rate": 6.354346740614043e-06, "loss": 0.4317, "step": 11297 }, { "epoch": 1.855459528257344, "grad_norm": 0.3097783710970955, "learning_rate": 6.353919426994121e-06, "loss": 0.4458, "step": 11298 }, { "epoch": 1.855623755465687, "grad_norm": 0.31806916318986367, "learning_rate": 6.353492093238811e-06, "loss": 0.4459, "step": 11299 }, { "epoch": 1.8557879826740296, "grad_norm": 0.42610233945463255, "learning_rate": 6.353064739352752e-06, "loss": 0.452, "step": 11300 }, { "epoch": 1.8559522098823722, "grad_norm": 0.3118649517740116, "learning_rate": 6.352637365340588e-06, "loss": 0.4414, "step": 11301 }, { "epoch": 1.856116437090715, "grad_norm": 0.2883834157653145, "learning_rate": 6.352209971206959e-06, "loss": 0.4447, "step": 11302 }, { "epoch": 1.8562806642990577, "grad_norm": 0.30425811238812217, "learning_rate": 6.3517825569565074e-06, "loss": 0.4677, "step": 11303 }, { "epoch": 1.8564448915074006, "grad_norm": 0.3157632228032478, "learning_rate": 6.351355122593875e-06, "loss": 0.451, "step": 11304 }, { "epoch": 1.8566091187157432, "grad_norm": 0.31606115308966193, "learning_rate": 6.350927668123704e-06, "loss": 0.4615, "step": 11305 }, { "epoch": 1.8567733459240858, "grad_norm": 0.2724015541206075, "learning_rate": 6.350500193550638e-06, "loss": 0.4353, "step": 11306 }, { "epoch": 1.8569375731324287, "grad_norm": 0.2767441122373085, "learning_rate": 6.35007269887932e-06, "loss": 0.4567, "step": 11307 }, { "epoch": 1.8571018003407715, "grad_norm": 0.29273901495114507, "learning_rate": 6.349645184114392e-06, "loss": 0.4567, "step": 11308 }, { "epoch": 1.8572660275491142, "grad_norm": 0.2969139208502846, "learning_rate": 6.349217649260497e-06, "loss": 0.4342, "step": 11309 }, { "epoch": 1.8574302547574568, "grad_norm": 0.35579388252131244, "learning_rate": 6.34879009432228e-06, "loss": 0.4611, "step": 11310 }, { "epoch": 1.8575944819657997, "grad_norm": 0.34158770325629234, "learning_rate": 6.348362519304382e-06, "loss": 0.4616, "step": 11311 }, { "epoch": 1.8577587091741425, "grad_norm": 0.28067291327273947, "learning_rate": 6.34793492421145e-06, "loss": 0.4455, "step": 11312 }, { "epoch": 1.8579229363824852, "grad_norm": 0.30837793967609967, "learning_rate": 6.347507309048125e-06, "loss": 0.4578, "step": 11313 }, { "epoch": 1.8580871635908278, "grad_norm": 0.3125025367261328, "learning_rate": 6.347079673819053e-06, "loss": 0.4472, "step": 11314 }, { "epoch": 1.8582513907991707, "grad_norm": 0.5052978416465356, "learning_rate": 6.34665201852888e-06, "loss": 0.4471, "step": 11315 }, { "epoch": 1.8584156180075135, "grad_norm": 0.31536032442555645, "learning_rate": 6.346224343182248e-06, "loss": 0.483, "step": 11316 }, { "epoch": 1.8585798452158562, "grad_norm": 0.4470949513656592, "learning_rate": 6.345796647783804e-06, "loss": 0.4606, "step": 11317 }, { "epoch": 1.8587440724241988, "grad_norm": 0.3170649277211733, "learning_rate": 6.345368932338192e-06, "loss": 0.4342, "step": 11318 }, { "epoch": 1.8589082996325417, "grad_norm": 0.3424326385751127, "learning_rate": 6.344941196850058e-06, "loss": 0.463, "step": 11319 }, { "epoch": 1.8590725268408843, "grad_norm": 0.3128336163128946, "learning_rate": 6.344513441324048e-06, "loss": 0.4494, "step": 11320 }, { "epoch": 1.8592367540492272, "grad_norm": 0.4693741408208788, "learning_rate": 6.344085665764806e-06, "loss": 0.4228, "step": 11321 }, { "epoch": 1.8594009812575698, "grad_norm": 0.3938432577957824, "learning_rate": 6.343657870176979e-06, "loss": 0.4579, "step": 11322 }, { "epoch": 1.8595652084659124, "grad_norm": 0.2665157963971842, "learning_rate": 6.343230054565215e-06, "loss": 0.4559, "step": 11323 }, { "epoch": 1.8597294356742553, "grad_norm": 0.5765119530939804, "learning_rate": 6.342802218934159e-06, "loss": 0.4366, "step": 11324 }, { "epoch": 1.8598936628825982, "grad_norm": 0.6677510627653236, "learning_rate": 6.342374363288456e-06, "loss": 0.4634, "step": 11325 }, { "epoch": 1.8600578900909408, "grad_norm": 0.3405761435040425, "learning_rate": 6.341946487632758e-06, "loss": 0.4518, "step": 11326 }, { "epoch": 1.8602221172992834, "grad_norm": 0.28552553151580606, "learning_rate": 6.341518591971707e-06, "loss": 0.4468, "step": 11327 }, { "epoch": 1.8603863445076263, "grad_norm": 0.3218955469025756, "learning_rate": 6.341090676309951e-06, "loss": 0.4628, "step": 11328 }, { "epoch": 1.8605505717159692, "grad_norm": 0.3214289953278923, "learning_rate": 6.340662740652141e-06, "loss": 0.4289, "step": 11329 }, { "epoch": 1.8607147989243118, "grad_norm": 0.2527221904206092, "learning_rate": 6.340234785002922e-06, "loss": 0.4347, "step": 11330 }, { "epoch": 1.8608790261326544, "grad_norm": 0.39361376413646876, "learning_rate": 6.339806809366942e-06, "loss": 0.4583, "step": 11331 }, { "epoch": 1.8610432533409973, "grad_norm": 0.36043320353855923, "learning_rate": 6.339378813748852e-06, "loss": 0.4427, "step": 11332 }, { "epoch": 1.8612074805493402, "grad_norm": 0.8114253982192287, "learning_rate": 6.338950798153295e-06, "loss": 0.4365, "step": 11333 }, { "epoch": 1.8613717077576828, "grad_norm": 0.2778367874294855, "learning_rate": 6.338522762584925e-06, "loss": 0.447, "step": 11334 }, { "epoch": 1.8615359349660254, "grad_norm": 0.3560822146400267, "learning_rate": 6.338094707048389e-06, "loss": 0.4356, "step": 11335 }, { "epoch": 1.8617001621743683, "grad_norm": 0.27192416670571484, "learning_rate": 6.337666631548337e-06, "loss": 0.4498, "step": 11336 }, { "epoch": 1.861864389382711, "grad_norm": 0.4327915472121516, "learning_rate": 6.337238536089416e-06, "loss": 0.4366, "step": 11337 }, { "epoch": 1.8620286165910538, "grad_norm": 0.4133339322356034, "learning_rate": 6.336810420676277e-06, "loss": 0.4509, "step": 11338 }, { "epoch": 1.8621928437993964, "grad_norm": 0.2788288955428656, "learning_rate": 6.336382285313569e-06, "loss": 0.4571, "step": 11339 }, { "epoch": 1.862357071007739, "grad_norm": 0.3527929954753714, "learning_rate": 6.335954130005945e-06, "loss": 0.4516, "step": 11340 }, { "epoch": 1.862521298216082, "grad_norm": 0.4160277813715837, "learning_rate": 6.335525954758051e-06, "loss": 0.4483, "step": 11341 }, { "epoch": 1.8626855254244248, "grad_norm": 0.41947127135248397, "learning_rate": 6.335097759574539e-06, "loss": 0.4715, "step": 11342 }, { "epoch": 1.8628497526327674, "grad_norm": 0.30891972256795924, "learning_rate": 6.33466954446006e-06, "loss": 0.4757, "step": 11343 }, { "epoch": 1.86301397984111, "grad_norm": 0.32281669512362693, "learning_rate": 6.334241309419265e-06, "loss": 0.4487, "step": 11344 }, { "epoch": 1.863178207049453, "grad_norm": 0.3710096514926429, "learning_rate": 6.333813054456805e-06, "loss": 0.4512, "step": 11345 }, { "epoch": 1.8633424342577958, "grad_norm": 0.297366741235457, "learning_rate": 6.33338477957733e-06, "loss": 0.4876, "step": 11346 }, { "epoch": 1.8635066614661384, "grad_norm": 0.287759306786484, "learning_rate": 6.332956484785495e-06, "loss": 0.4574, "step": 11347 }, { "epoch": 1.863670888674481, "grad_norm": 0.5127446230381244, "learning_rate": 6.332528170085947e-06, "loss": 0.4564, "step": 11348 }, { "epoch": 1.863835115882824, "grad_norm": 0.3181484291823922, "learning_rate": 6.33209983548334e-06, "loss": 0.4511, "step": 11349 }, { "epoch": 1.8639993430911668, "grad_norm": 0.36182292779200886, "learning_rate": 6.331671480982328e-06, "loss": 0.4509, "step": 11350 }, { "epoch": 1.8641635702995094, "grad_norm": 0.3005556130697668, "learning_rate": 6.3312431065875596e-06, "loss": 0.4439, "step": 11351 }, { "epoch": 1.864327797507852, "grad_norm": 0.31933998986102374, "learning_rate": 6.33081471230369e-06, "loss": 0.4796, "step": 11352 }, { "epoch": 1.864492024716195, "grad_norm": 0.43163246367148256, "learning_rate": 6.330386298135372e-06, "loss": 0.4495, "step": 11353 }, { "epoch": 1.8646562519245375, "grad_norm": 0.37365493661552046, "learning_rate": 6.329957864087256e-06, "loss": 0.4458, "step": 11354 }, { "epoch": 1.8648204791328804, "grad_norm": 0.28485223530004894, "learning_rate": 6.329529410163999e-06, "loss": 0.4384, "step": 11355 }, { "epoch": 1.864984706341223, "grad_norm": 0.3407623561451519, "learning_rate": 6.329100936370253e-06, "loss": 0.4628, "step": 11356 }, { "epoch": 1.8651489335495657, "grad_norm": 0.3862070798121785, "learning_rate": 6.328672442710671e-06, "loss": 0.4508, "step": 11357 }, { "epoch": 1.8653131607579085, "grad_norm": 0.3748507949476453, "learning_rate": 6.328243929189905e-06, "loss": 0.4458, "step": 11358 }, { "epoch": 1.8654773879662514, "grad_norm": 0.38990948562174965, "learning_rate": 6.327815395812613e-06, "loss": 0.4455, "step": 11359 }, { "epoch": 1.865641615174594, "grad_norm": 0.37073709138352673, "learning_rate": 6.327386842583447e-06, "loss": 0.4339, "step": 11360 }, { "epoch": 1.8658058423829367, "grad_norm": 0.3471538957326833, "learning_rate": 6.326958269507063e-06, "loss": 0.4536, "step": 11361 }, { "epoch": 1.8659700695912795, "grad_norm": 0.3642542342861628, "learning_rate": 6.326529676588114e-06, "loss": 0.4466, "step": 11362 }, { "epoch": 1.8661342967996224, "grad_norm": 0.3782161620612055, "learning_rate": 6.326101063831254e-06, "loss": 0.4516, "step": 11363 }, { "epoch": 1.866298524007965, "grad_norm": 0.3331236609817597, "learning_rate": 6.325672431241142e-06, "loss": 0.4546, "step": 11364 }, { "epoch": 1.8664627512163077, "grad_norm": 1.0242190816400079, "learning_rate": 6.325243778822431e-06, "loss": 0.4514, "step": 11365 }, { "epoch": 1.8666269784246505, "grad_norm": 0.27978995786744143, "learning_rate": 6.324815106579777e-06, "loss": 0.459, "step": 11366 }, { "epoch": 1.8667912056329934, "grad_norm": 0.3215904181313786, "learning_rate": 6.324386414517834e-06, "loss": 0.452, "step": 11367 }, { "epoch": 1.866955432841336, "grad_norm": 0.36826949228960465, "learning_rate": 6.32395770264126e-06, "loss": 0.4561, "step": 11368 }, { "epoch": 1.8671196600496787, "grad_norm": 0.3002113463960166, "learning_rate": 6.323528970954711e-06, "loss": 0.4545, "step": 11369 }, { "epoch": 1.8672838872580215, "grad_norm": 0.327024602938653, "learning_rate": 6.323100219462844e-06, "loss": 0.4568, "step": 11370 }, { "epoch": 1.8674481144663642, "grad_norm": 0.44713301545973794, "learning_rate": 6.322671448170314e-06, "loss": 0.4622, "step": 11371 }, { "epoch": 1.867612341674707, "grad_norm": 0.37617685259242234, "learning_rate": 6.322242657081779e-06, "loss": 0.451, "step": 11372 }, { "epoch": 1.8677765688830497, "grad_norm": 0.4999178729558093, "learning_rate": 6.321813846201897e-06, "loss": 0.4676, "step": 11373 }, { "epoch": 1.8679407960913923, "grad_norm": 0.3485035319342385, "learning_rate": 6.321385015535323e-06, "loss": 0.4607, "step": 11374 }, { "epoch": 1.8681050232997352, "grad_norm": 0.3385822781097892, "learning_rate": 6.320956165086716e-06, "loss": 0.4748, "step": 11375 }, { "epoch": 1.868269250508078, "grad_norm": 0.3730575244192388, "learning_rate": 6.320527294860734e-06, "loss": 0.4713, "step": 11376 }, { "epoch": 1.8684334777164207, "grad_norm": 0.3223886623461952, "learning_rate": 6.3200984048620335e-06, "loss": 0.4469, "step": 11377 }, { "epoch": 1.8685977049247633, "grad_norm": 0.31862136491199516, "learning_rate": 6.319669495095275e-06, "loss": 0.4537, "step": 11378 }, { "epoch": 1.8687619321331062, "grad_norm": 0.29791203455607024, "learning_rate": 6.3192405655651125e-06, "loss": 0.4598, "step": 11379 }, { "epoch": 1.868926159341449, "grad_norm": 0.32176269966805215, "learning_rate": 6.318811616276211e-06, "loss": 0.4483, "step": 11380 }, { "epoch": 1.8690903865497916, "grad_norm": 0.43102468936947047, "learning_rate": 6.318382647233225e-06, "loss": 0.4427, "step": 11381 }, { "epoch": 1.8692546137581343, "grad_norm": 0.33077619486864357, "learning_rate": 6.3179536584408135e-06, "loss": 0.4667, "step": 11382 }, { "epoch": 1.8694188409664771, "grad_norm": 0.34418501391086825, "learning_rate": 6.317524649903637e-06, "loss": 0.4298, "step": 11383 }, { "epoch": 1.86958306817482, "grad_norm": 0.29682961680933806, "learning_rate": 6.317095621626354e-06, "loss": 0.4531, "step": 11384 }, { "epoch": 1.8697472953831626, "grad_norm": 0.3043037241928832, "learning_rate": 6.316666573613625e-06, "loss": 0.4474, "step": 11385 }, { "epoch": 1.8699115225915053, "grad_norm": 0.39672356587770125, "learning_rate": 6.316237505870111e-06, "loss": 0.4473, "step": 11386 }, { "epoch": 1.8700757497998481, "grad_norm": 0.32709641612353896, "learning_rate": 6.315808418400469e-06, "loss": 0.4533, "step": 11387 }, { "epoch": 1.8702399770081908, "grad_norm": 0.571241424685496, "learning_rate": 6.315379311209362e-06, "loss": 0.4535, "step": 11388 }, { "epoch": 1.8704042042165336, "grad_norm": 0.37875251809804106, "learning_rate": 6.31495018430145e-06, "loss": 0.452, "step": 11389 }, { "epoch": 1.8705684314248763, "grad_norm": 0.35808189075596303, "learning_rate": 6.3145210376813925e-06, "loss": 0.4142, "step": 11390 }, { "epoch": 1.870732658633219, "grad_norm": 0.33499970260438333, "learning_rate": 6.314091871353852e-06, "loss": 0.4386, "step": 11391 }, { "epoch": 1.8708968858415618, "grad_norm": 0.3271825235316504, "learning_rate": 6.313662685323488e-06, "loss": 0.4348, "step": 11392 }, { "epoch": 1.8710611130499046, "grad_norm": 0.3161373090038315, "learning_rate": 6.313233479594963e-06, "loss": 0.4542, "step": 11393 }, { "epoch": 1.8712253402582473, "grad_norm": 0.3103016233161706, "learning_rate": 6.312804254172938e-06, "loss": 0.4525, "step": 11394 }, { "epoch": 1.87138956746659, "grad_norm": 0.4571370996671186, "learning_rate": 6.312375009062078e-06, "loss": 0.4334, "step": 11395 }, { "epoch": 1.8715537946749328, "grad_norm": 0.31005290994916324, "learning_rate": 6.311945744267039e-06, "loss": 0.4298, "step": 11396 }, { "epoch": 1.8717180218832756, "grad_norm": 0.32531698774417556, "learning_rate": 6.311516459792488e-06, "loss": 0.4134, "step": 11397 }, { "epoch": 1.8718822490916183, "grad_norm": 0.2637088668005535, "learning_rate": 6.311087155643087e-06, "loss": 0.4277, "step": 11398 }, { "epoch": 1.872046476299961, "grad_norm": 0.352195618024151, "learning_rate": 6.310657831823495e-06, "loss": 0.4655, "step": 11399 }, { "epoch": 1.8722107035083038, "grad_norm": 0.36961737788626986, "learning_rate": 6.310228488338379e-06, "loss": 0.4602, "step": 11400 }, { "epoch": 1.8723749307166466, "grad_norm": 0.29607974177504476, "learning_rate": 6.3097991251923995e-06, "loss": 0.4531, "step": 11401 }, { "epoch": 1.8725391579249893, "grad_norm": 0.3992638430794492, "learning_rate": 6.309369742390224e-06, "loss": 0.4544, "step": 11402 }, { "epoch": 1.872703385133332, "grad_norm": 0.2969144434983537, "learning_rate": 6.308940339936509e-06, "loss": 0.4434, "step": 11403 }, { "epoch": 1.8728676123416748, "grad_norm": 0.2615427443565544, "learning_rate": 6.3085109178359245e-06, "loss": 0.4419, "step": 11404 }, { "epoch": 1.8730318395500174, "grad_norm": 0.32425811970912993, "learning_rate": 6.308081476093131e-06, "loss": 0.4489, "step": 11405 }, { "epoch": 1.8731960667583603, "grad_norm": 0.47585657206487836, "learning_rate": 6.3076520147127956e-06, "loss": 0.4424, "step": 11406 }, { "epoch": 1.873360293966703, "grad_norm": 0.30516338170610596, "learning_rate": 6.30722253369958e-06, "loss": 0.4413, "step": 11407 }, { "epoch": 1.8735245211750455, "grad_norm": 0.39396978310804875, "learning_rate": 6.306793033058147e-06, "loss": 0.4667, "step": 11408 }, { "epoch": 1.8736887483833884, "grad_norm": 0.3038677691494423, "learning_rate": 6.306363512793167e-06, "loss": 0.4502, "step": 11409 }, { "epoch": 1.8738529755917313, "grad_norm": 0.31531753590896405, "learning_rate": 6.305933972909301e-06, "loss": 0.4226, "step": 11410 }, { "epoch": 1.874017202800074, "grad_norm": 0.3287934612429399, "learning_rate": 6.3055044134112165e-06, "loss": 0.4417, "step": 11411 }, { "epoch": 1.8741814300084165, "grad_norm": 0.27368395270120466, "learning_rate": 6.305074834303576e-06, "loss": 0.44, "step": 11412 }, { "epoch": 1.8743456572167594, "grad_norm": 0.33455145144424686, "learning_rate": 6.3046452355910465e-06, "loss": 0.4573, "step": 11413 }, { "epoch": 1.8745098844251022, "grad_norm": 0.3506306212009912, "learning_rate": 6.304215617278296e-06, "loss": 0.4549, "step": 11414 }, { "epoch": 1.8746741116334449, "grad_norm": 0.3501676831247077, "learning_rate": 6.303785979369988e-06, "loss": 0.4774, "step": 11415 }, { "epoch": 1.8748383388417875, "grad_norm": 0.2696692929128657, "learning_rate": 6.30335632187079e-06, "loss": 0.4582, "step": 11416 }, { "epoch": 1.8750025660501304, "grad_norm": 0.2685861402212932, "learning_rate": 6.302926644785367e-06, "loss": 0.4552, "step": 11417 }, { "epoch": 1.8751667932584732, "grad_norm": 0.5705129421412283, "learning_rate": 6.302496948118388e-06, "loss": 0.4575, "step": 11418 }, { "epoch": 1.8753310204668159, "grad_norm": 0.27189216908107755, "learning_rate": 6.302067231874519e-06, "loss": 0.4584, "step": 11419 }, { "epoch": 1.8754952476751585, "grad_norm": 0.30435635762114543, "learning_rate": 6.3016374960584245e-06, "loss": 0.4211, "step": 11420 }, { "epoch": 1.8756594748835014, "grad_norm": 0.35066223874902763, "learning_rate": 6.301207740674776e-06, "loss": 0.4382, "step": 11421 }, { "epoch": 1.875823702091844, "grad_norm": 0.2863261329375365, "learning_rate": 6.300777965728238e-06, "loss": 0.4595, "step": 11422 }, { "epoch": 1.8759879293001869, "grad_norm": 0.3336400798811265, "learning_rate": 6.300348171223482e-06, "loss": 0.4607, "step": 11423 }, { "epoch": 1.8761521565085295, "grad_norm": 0.265241509790696, "learning_rate": 6.299918357165172e-06, "loss": 0.4402, "step": 11424 }, { "epoch": 1.8763163837168721, "grad_norm": 0.3908903746403792, "learning_rate": 6.299488523557977e-06, "loss": 0.4574, "step": 11425 }, { "epoch": 1.876480610925215, "grad_norm": 0.3081261018458057, "learning_rate": 6.299058670406567e-06, "loss": 0.4411, "step": 11426 }, { "epoch": 1.8766448381335579, "grad_norm": 0.36235121919155894, "learning_rate": 6.298628797715611e-06, "loss": 0.4471, "step": 11427 }, { "epoch": 1.8768090653419005, "grad_norm": 0.3715659252716504, "learning_rate": 6.298198905489775e-06, "loss": 0.4622, "step": 11428 }, { "epoch": 1.8769732925502431, "grad_norm": 0.2812323777900102, "learning_rate": 6.297768993733731e-06, "loss": 0.455, "step": 11429 }, { "epoch": 1.877137519758586, "grad_norm": 0.5437456561571391, "learning_rate": 6.297339062452145e-06, "loss": 0.4685, "step": 11430 }, { "epoch": 1.8773017469669289, "grad_norm": 0.3013365443414987, "learning_rate": 6.296909111649689e-06, "loss": 0.4403, "step": 11431 }, { "epoch": 1.8774659741752715, "grad_norm": 0.3600951978257436, "learning_rate": 6.296479141331033e-06, "loss": 0.4688, "step": 11432 }, { "epoch": 1.8776302013836141, "grad_norm": 0.32973648411913603, "learning_rate": 6.296049151500847e-06, "loss": 0.4671, "step": 11433 }, { "epoch": 1.877794428591957, "grad_norm": 0.77516515033505, "learning_rate": 6.295619142163799e-06, "loss": 0.448, "step": 11434 }, { "epoch": 1.8779586558002999, "grad_norm": 0.2813318600336481, "learning_rate": 6.295189113324559e-06, "loss": 0.4392, "step": 11435 }, { "epoch": 1.8781228830086425, "grad_norm": 0.38431140428798827, "learning_rate": 6.294759064987801e-06, "loss": 0.4476, "step": 11436 }, { "epoch": 1.8782871102169851, "grad_norm": 0.35869495335306845, "learning_rate": 6.294328997158193e-06, "loss": 0.428, "step": 11437 }, { "epoch": 1.878451337425328, "grad_norm": 0.34393025001890787, "learning_rate": 6.293898909840407e-06, "loss": 0.443, "step": 11438 }, { "epoch": 1.8786155646336706, "grad_norm": 0.30989139809827226, "learning_rate": 6.293468803039114e-06, "loss": 0.4453, "step": 11439 }, { "epoch": 1.8787797918420135, "grad_norm": 0.3166916712968177, "learning_rate": 6.293038676758985e-06, "loss": 0.451, "step": 11440 }, { "epoch": 1.8789440190503561, "grad_norm": 0.27680712337488805, "learning_rate": 6.292608531004692e-06, "loss": 0.4412, "step": 11441 }, { "epoch": 1.8791082462586988, "grad_norm": 0.2942760765393107, "learning_rate": 6.292178365780906e-06, "loss": 0.4561, "step": 11442 }, { "epoch": 1.8792724734670416, "grad_norm": 0.39917989420901784, "learning_rate": 6.2917481810923e-06, "loss": 0.4643, "step": 11443 }, { "epoch": 1.8794367006753845, "grad_norm": 0.6947523304053774, "learning_rate": 6.291317976943547e-06, "loss": 0.4524, "step": 11444 }, { "epoch": 1.8796009278837271, "grad_norm": 0.41434515474515987, "learning_rate": 6.2908877533393164e-06, "loss": 0.4302, "step": 11445 }, { "epoch": 1.8797651550920698, "grad_norm": 0.45832914292702964, "learning_rate": 6.290457510284283e-06, "loss": 0.4517, "step": 11446 }, { "epoch": 1.8799293823004126, "grad_norm": 0.32247693907639524, "learning_rate": 6.290027247783121e-06, "loss": 0.4395, "step": 11447 }, { "epoch": 1.8800936095087555, "grad_norm": 0.2892060133011612, "learning_rate": 6.289596965840503e-06, "loss": 0.4655, "step": 11448 }, { "epoch": 1.8802578367170981, "grad_norm": 0.3216050782026567, "learning_rate": 6.2891666644610985e-06, "loss": 0.4504, "step": 11449 }, { "epoch": 1.8804220639254408, "grad_norm": 0.38605541235934754, "learning_rate": 6.288736343649584e-06, "loss": 0.4442, "step": 11450 }, { "epoch": 1.8805862911337836, "grad_norm": 0.6160848873275179, "learning_rate": 6.288306003410633e-06, "loss": 0.468, "step": 11451 }, { "epoch": 1.8807505183421265, "grad_norm": 0.3142613591082338, "learning_rate": 6.287875643748921e-06, "loss": 0.4312, "step": 11452 }, { "epoch": 1.8809147455504691, "grad_norm": 0.3910821269458435, "learning_rate": 6.2874452646691205e-06, "loss": 0.4533, "step": 11453 }, { "epoch": 1.8810789727588118, "grad_norm": 0.3190445080531141, "learning_rate": 6.287014866175905e-06, "loss": 0.4482, "step": 11454 }, { "epoch": 1.8812431999671546, "grad_norm": 0.3013278803822531, "learning_rate": 6.286584448273949e-06, "loss": 0.4392, "step": 11455 }, { "epoch": 1.8814074271754972, "grad_norm": 0.33607970980024, "learning_rate": 6.286154010967928e-06, "loss": 0.4596, "step": 11456 }, { "epoch": 1.88157165438384, "grad_norm": 0.28505845262642243, "learning_rate": 6.285723554262519e-06, "loss": 0.4511, "step": 11457 }, { "epoch": 1.8817358815921827, "grad_norm": 0.34519722662923064, "learning_rate": 6.285293078162394e-06, "loss": 0.4544, "step": 11458 }, { "epoch": 1.8819001088005254, "grad_norm": 0.26225405676276736, "learning_rate": 6.28486258267223e-06, "loss": 0.4177, "step": 11459 }, { "epoch": 1.8820643360088682, "grad_norm": 0.40104514826593735, "learning_rate": 6.284432067796701e-06, "loss": 0.4517, "step": 11460 }, { "epoch": 1.882228563217211, "grad_norm": 0.29963155183195445, "learning_rate": 6.284001533540486e-06, "loss": 0.45, "step": 11461 }, { "epoch": 1.8823927904255537, "grad_norm": 0.30673249122938356, "learning_rate": 6.283570979908258e-06, "loss": 0.4439, "step": 11462 }, { "epoch": 1.8825570176338964, "grad_norm": 0.3151831837335785, "learning_rate": 6.283140406904695e-06, "loss": 0.4615, "step": 11463 }, { "epoch": 1.8827212448422392, "grad_norm": 0.2774161243114612, "learning_rate": 6.282709814534472e-06, "loss": 0.448, "step": 11464 }, { "epoch": 1.882885472050582, "grad_norm": 0.32876418823333714, "learning_rate": 6.282279202802268e-06, "loss": 0.4354, "step": 11465 }, { "epoch": 1.8830496992589247, "grad_norm": 0.36150603135257353, "learning_rate": 6.281848571712756e-06, "loss": 0.4361, "step": 11466 }, { "epoch": 1.8832139264672674, "grad_norm": 0.36968781944725243, "learning_rate": 6.281417921270618e-06, "loss": 0.4453, "step": 11467 }, { "epoch": 1.8833781536756102, "grad_norm": 0.37179395407086213, "learning_rate": 6.280987251480527e-06, "loss": 0.4458, "step": 11468 }, { "epoch": 1.883542380883953, "grad_norm": 0.4567677928565284, "learning_rate": 6.280556562347163e-06, "loss": 0.4602, "step": 11469 }, { "epoch": 1.8837066080922957, "grad_norm": 0.3907571110244243, "learning_rate": 6.280125853875202e-06, "loss": 0.4568, "step": 11470 }, { "epoch": 1.8838708353006384, "grad_norm": 0.3045018771953139, "learning_rate": 6.279695126069323e-06, "loss": 0.4285, "step": 11471 }, { "epoch": 1.8840350625089812, "grad_norm": 0.38405524916279843, "learning_rate": 6.279264378934205e-06, "loss": 0.4835, "step": 11472 }, { "epoch": 1.8841992897173239, "grad_norm": 0.2940885242084993, "learning_rate": 6.278833612474525e-06, "loss": 0.4656, "step": 11473 }, { "epoch": 1.8843635169256667, "grad_norm": 0.34809134178677464, "learning_rate": 6.2784028266949615e-06, "loss": 0.4426, "step": 11474 }, { "epoch": 1.8845277441340094, "grad_norm": 0.27515836675655314, "learning_rate": 6.277972021600192e-06, "loss": 0.4433, "step": 11475 }, { "epoch": 1.884691971342352, "grad_norm": 0.33595930419622255, "learning_rate": 6.277541197194899e-06, "loss": 0.4445, "step": 11476 }, { "epoch": 1.8848561985506949, "grad_norm": 0.32328908953131097, "learning_rate": 6.27711035348376e-06, "loss": 0.4423, "step": 11477 }, { "epoch": 1.8850204257590377, "grad_norm": 0.2819189057919243, "learning_rate": 6.276679490471454e-06, "loss": 0.4349, "step": 11478 }, { "epoch": 1.8851846529673804, "grad_norm": 0.3256501015452309, "learning_rate": 6.276248608162659e-06, "loss": 0.4511, "step": 11479 }, { "epoch": 1.885348880175723, "grad_norm": 0.3552499408145123, "learning_rate": 6.275817706562058e-06, "loss": 0.4528, "step": 11480 }, { "epoch": 1.8855131073840659, "grad_norm": 0.2841417854307412, "learning_rate": 6.275386785674329e-06, "loss": 0.4485, "step": 11481 }, { "epoch": 1.8856773345924087, "grad_norm": 0.31758879872950563, "learning_rate": 6.274955845504154e-06, "loss": 0.4472, "step": 11482 }, { "epoch": 1.8858415618007514, "grad_norm": 0.43039374723598556, "learning_rate": 6.2745248860562105e-06, "loss": 0.4618, "step": 11483 }, { "epoch": 1.886005789009094, "grad_norm": 0.3325004206289839, "learning_rate": 6.274093907335181e-06, "loss": 0.4484, "step": 11484 }, { "epoch": 1.8861700162174369, "grad_norm": 0.3247843329476943, "learning_rate": 6.273662909345747e-06, "loss": 0.4326, "step": 11485 }, { "epoch": 1.8863342434257797, "grad_norm": 0.3280630386694321, "learning_rate": 6.273231892092589e-06, "loss": 0.4396, "step": 11486 }, { "epoch": 1.8864984706341223, "grad_norm": 0.3923324865040793, "learning_rate": 6.272800855580388e-06, "loss": 0.4365, "step": 11487 }, { "epoch": 1.886662697842465, "grad_norm": 0.33057770575928486, "learning_rate": 6.272369799813824e-06, "loss": 0.443, "step": 11488 }, { "epoch": 1.8868269250508078, "grad_norm": 0.7173285870423338, "learning_rate": 6.271938724797581e-06, "loss": 0.4398, "step": 11489 }, { "epoch": 1.8869911522591505, "grad_norm": 0.4121293372702511, "learning_rate": 6.27150763053634e-06, "loss": 0.4651, "step": 11490 }, { "epoch": 1.8871553794674933, "grad_norm": 0.29936010036350014, "learning_rate": 6.271076517034784e-06, "loss": 0.4499, "step": 11491 }, { "epoch": 1.887319606675836, "grad_norm": 0.3528959255900349, "learning_rate": 6.270645384297594e-06, "loss": 0.4273, "step": 11492 }, { "epoch": 1.8874838338841786, "grad_norm": 0.39731600061297867, "learning_rate": 6.270214232329453e-06, "loss": 0.4497, "step": 11493 }, { "epoch": 1.8876480610925215, "grad_norm": 0.277693424780527, "learning_rate": 6.269783061135044e-06, "loss": 0.444, "step": 11494 }, { "epoch": 1.8878122883008643, "grad_norm": 0.40844379179595136, "learning_rate": 6.269351870719049e-06, "loss": 0.4735, "step": 11495 }, { "epoch": 1.887976515509207, "grad_norm": 0.31135073552625936, "learning_rate": 6.268920661086153e-06, "loss": 0.437, "step": 11496 }, { "epoch": 1.8881407427175496, "grad_norm": 0.32411155738375347, "learning_rate": 6.268489432241038e-06, "loss": 0.4472, "step": 11497 }, { "epoch": 1.8883049699258925, "grad_norm": 0.28558926169895243, "learning_rate": 6.268058184188387e-06, "loss": 0.4326, "step": 11498 }, { "epoch": 1.8884691971342353, "grad_norm": 0.3449194860672656, "learning_rate": 6.267626916932886e-06, "loss": 0.4565, "step": 11499 }, { "epoch": 1.888633424342578, "grad_norm": 0.34609846937748573, "learning_rate": 6.267195630479215e-06, "loss": 0.45, "step": 11500 }, { "epoch": 1.8887976515509206, "grad_norm": 0.30123922815432147, "learning_rate": 6.266764324832063e-06, "loss": 0.4421, "step": 11501 }, { "epoch": 1.8889618787592635, "grad_norm": 0.33185310581070543, "learning_rate": 6.266332999996111e-06, "loss": 0.4628, "step": 11502 }, { "epoch": 1.8891261059676063, "grad_norm": 0.45745920406436913, "learning_rate": 6.265901655976046e-06, "loss": 0.4583, "step": 11503 }, { "epoch": 1.889290333175949, "grad_norm": 0.2995880781951445, "learning_rate": 6.265470292776551e-06, "loss": 0.4449, "step": 11504 }, { "epoch": 1.8894545603842916, "grad_norm": 0.45810579993366735, "learning_rate": 6.265038910402311e-06, "loss": 0.4526, "step": 11505 }, { "epoch": 1.8896187875926345, "grad_norm": 0.28333685775743284, "learning_rate": 6.264607508858013e-06, "loss": 0.4512, "step": 11506 }, { "epoch": 1.889783014800977, "grad_norm": 0.35274460640803634, "learning_rate": 6.2641760881483415e-06, "loss": 0.4415, "step": 11507 }, { "epoch": 1.88994724200932, "grad_norm": 0.32639796842673624, "learning_rate": 6.26374464827798e-06, "loss": 0.4678, "step": 11508 }, { "epoch": 1.8901114692176626, "grad_norm": 0.35721090968404956, "learning_rate": 6.263313189251618e-06, "loss": 0.4635, "step": 11509 }, { "epoch": 1.8902756964260052, "grad_norm": 0.3324618961782957, "learning_rate": 6.262881711073939e-06, "loss": 0.4408, "step": 11510 }, { "epoch": 1.890439923634348, "grad_norm": 0.3248140728701892, "learning_rate": 6.262450213749631e-06, "loss": 0.4484, "step": 11511 }, { "epoch": 1.890604150842691, "grad_norm": 0.30240343332479325, "learning_rate": 6.26201869728338e-06, "loss": 0.4579, "step": 11512 }, { "epoch": 1.8907683780510336, "grad_norm": 0.3734330252729453, "learning_rate": 6.261587161679871e-06, "loss": 0.4604, "step": 11513 }, { "epoch": 1.8909326052593762, "grad_norm": 0.31340663492426546, "learning_rate": 6.261155606943793e-06, "loss": 0.4493, "step": 11514 }, { "epoch": 1.891096832467719, "grad_norm": 0.3291571447089467, "learning_rate": 6.260724033079832e-06, "loss": 0.4614, "step": 11515 }, { "epoch": 1.891261059676062, "grad_norm": 0.29024380784731046, "learning_rate": 6.260292440092677e-06, "loss": 0.4427, "step": 11516 }, { "epoch": 1.8914252868844046, "grad_norm": 0.9818095952183078, "learning_rate": 6.259860827987014e-06, "loss": 0.4554, "step": 11517 }, { "epoch": 1.8915895140927472, "grad_norm": 0.59070892702716, "learning_rate": 6.259429196767529e-06, "loss": 0.4225, "step": 11518 }, { "epoch": 1.89175374130109, "grad_norm": 0.35026140088051805, "learning_rate": 6.258997546438914e-06, "loss": 0.4421, "step": 11519 }, { "epoch": 1.891917968509433, "grad_norm": 0.455132303164521, "learning_rate": 6.258565877005853e-06, "loss": 0.4801, "step": 11520 }, { "epoch": 1.8920821957177756, "grad_norm": 0.3062190096578685, "learning_rate": 6.258134188473038e-06, "loss": 0.4525, "step": 11521 }, { "epoch": 1.8922464229261182, "grad_norm": 0.33415070443741085, "learning_rate": 6.257702480845155e-06, "loss": 0.4534, "step": 11522 }, { "epoch": 1.892410650134461, "grad_norm": 0.30454079751989627, "learning_rate": 6.257270754126895e-06, "loss": 0.4444, "step": 11523 }, { "epoch": 1.8925748773428037, "grad_norm": 0.28195699746422137, "learning_rate": 6.256839008322944e-06, "loss": 0.4761, "step": 11524 }, { "epoch": 1.8927391045511466, "grad_norm": 0.32220012186937447, "learning_rate": 6.256407243437993e-06, "loss": 0.4656, "step": 11525 }, { "epoch": 1.8929033317594892, "grad_norm": 0.29079011686059164, "learning_rate": 6.255975459476733e-06, "loss": 0.4481, "step": 11526 }, { "epoch": 1.8930675589678319, "grad_norm": 0.27533179125606577, "learning_rate": 6.25554365644385e-06, "loss": 0.4347, "step": 11527 }, { "epoch": 1.8932317861761747, "grad_norm": 0.2722721620887308, "learning_rate": 6.255111834344037e-06, "loss": 0.447, "step": 11528 }, { "epoch": 1.8933960133845176, "grad_norm": 0.40678158182691243, "learning_rate": 6.254679993181982e-06, "loss": 0.4652, "step": 11529 }, { "epoch": 1.8935602405928602, "grad_norm": 0.32142588418213813, "learning_rate": 6.254248132962377e-06, "loss": 0.4635, "step": 11530 }, { "epoch": 1.8937244678012028, "grad_norm": 0.29323029764232855, "learning_rate": 6.253816253689909e-06, "loss": 0.4447, "step": 11531 }, { "epoch": 1.8938886950095457, "grad_norm": 0.29601891468051994, "learning_rate": 6.2533843553692736e-06, "loss": 0.4559, "step": 11532 }, { "epoch": 1.8940529222178886, "grad_norm": 0.286254401704754, "learning_rate": 6.252952438005157e-06, "loss": 0.4337, "step": 11533 }, { "epoch": 1.8942171494262312, "grad_norm": 0.3827539442976956, "learning_rate": 6.252520501602252e-06, "loss": 0.4571, "step": 11534 }, { "epoch": 1.8943813766345738, "grad_norm": 0.2871685184088086, "learning_rate": 6.2520885461652515e-06, "loss": 0.4257, "step": 11535 }, { "epoch": 1.8945456038429167, "grad_norm": 0.8404977702623102, "learning_rate": 6.251656571698846e-06, "loss": 0.4478, "step": 11536 }, { "epoch": 1.8947098310512596, "grad_norm": 0.32025923391449485, "learning_rate": 6.251224578207725e-06, "loss": 0.4484, "step": 11537 }, { "epoch": 1.8948740582596022, "grad_norm": 0.286052981589957, "learning_rate": 6.2507925656965825e-06, "loss": 0.4484, "step": 11538 }, { "epoch": 1.8950382854679448, "grad_norm": 0.2856432354318945, "learning_rate": 6.25036053417011e-06, "loss": 0.441, "step": 11539 }, { "epoch": 1.8952025126762877, "grad_norm": 0.3021535240566939, "learning_rate": 6.2499284836330014e-06, "loss": 0.4523, "step": 11540 }, { "epoch": 1.8953667398846303, "grad_norm": 0.4005247080570124, "learning_rate": 6.249496414089948e-06, "loss": 0.463, "step": 11541 }, { "epoch": 1.8955309670929732, "grad_norm": 0.2994150789421531, "learning_rate": 6.24906432554564e-06, "loss": 0.4448, "step": 11542 }, { "epoch": 1.8956951943013158, "grad_norm": 0.3034711094777697, "learning_rate": 6.248632218004773e-06, "loss": 0.4422, "step": 11543 }, { "epoch": 1.8958594215096585, "grad_norm": 0.3241176095587336, "learning_rate": 6.248200091472042e-06, "loss": 0.4704, "step": 11544 }, { "epoch": 1.8960236487180013, "grad_norm": 0.31980159348254694, "learning_rate": 6.247767945952138e-06, "loss": 0.4389, "step": 11545 }, { "epoch": 1.8961878759263442, "grad_norm": 0.37791081930961246, "learning_rate": 6.247335781449751e-06, "loss": 0.4415, "step": 11546 }, { "epoch": 1.8963521031346868, "grad_norm": 0.271887232267887, "learning_rate": 6.2469035979695805e-06, "loss": 0.449, "step": 11547 }, { "epoch": 1.8965163303430295, "grad_norm": 0.33458075064495246, "learning_rate": 6.246471395516319e-06, "loss": 0.4447, "step": 11548 }, { "epoch": 1.8966805575513723, "grad_norm": 0.8032382841821906, "learning_rate": 6.2460391740946585e-06, "loss": 0.4596, "step": 11549 }, { "epoch": 1.8968447847597152, "grad_norm": 0.39747951496956996, "learning_rate": 6.245606933709296e-06, "loss": 0.4669, "step": 11550 }, { "epoch": 1.8970090119680578, "grad_norm": 0.29049470000836963, "learning_rate": 6.245174674364923e-06, "loss": 0.4586, "step": 11551 }, { "epoch": 1.8971732391764005, "grad_norm": 0.3086255776162428, "learning_rate": 6.244742396066237e-06, "loss": 0.4448, "step": 11552 }, { "epoch": 1.8973374663847433, "grad_norm": 0.3656060035316904, "learning_rate": 6.244310098817933e-06, "loss": 0.4494, "step": 11553 }, { "epoch": 1.8975016935930862, "grad_norm": 0.2870249267218026, "learning_rate": 6.243877782624703e-06, "loss": 0.4474, "step": 11554 }, { "epoch": 1.8976659208014288, "grad_norm": 0.39457473479831795, "learning_rate": 6.243445447491246e-06, "loss": 0.4487, "step": 11555 }, { "epoch": 1.8978301480097715, "grad_norm": 0.35379457137991177, "learning_rate": 6.2430130934222545e-06, "loss": 0.4588, "step": 11556 }, { "epoch": 1.8979943752181143, "grad_norm": 0.39416032087160907, "learning_rate": 6.242580720422428e-06, "loss": 0.4667, "step": 11557 }, { "epoch": 1.898158602426457, "grad_norm": 0.3312029003928857, "learning_rate": 6.242148328496459e-06, "loss": 0.4657, "step": 11558 }, { "epoch": 1.8983228296347998, "grad_norm": 0.3977817496035618, "learning_rate": 6.241715917649046e-06, "loss": 0.4449, "step": 11559 }, { "epoch": 1.8984870568431425, "grad_norm": 0.2861507297462149, "learning_rate": 6.241283487884884e-06, "loss": 0.4449, "step": 11560 }, { "epoch": 1.898651284051485, "grad_norm": 0.2867583166903834, "learning_rate": 6.2408510392086714e-06, "loss": 0.4653, "step": 11561 }, { "epoch": 1.898815511259828, "grad_norm": 0.3138206192945438, "learning_rate": 6.240418571625102e-06, "loss": 0.4344, "step": 11562 }, { "epoch": 1.8989797384681708, "grad_norm": 0.33426289559226574, "learning_rate": 6.239986085138875e-06, "loss": 0.4586, "step": 11563 }, { "epoch": 1.8991439656765134, "grad_norm": 0.3225829509226065, "learning_rate": 6.2395535797546875e-06, "loss": 0.4533, "step": 11564 }, { "epoch": 1.899308192884856, "grad_norm": 0.32700753558233936, "learning_rate": 6.239121055477237e-06, "loss": 0.4444, "step": 11565 }, { "epoch": 1.899472420093199, "grad_norm": 0.7395817042078737, "learning_rate": 6.238688512311219e-06, "loss": 0.4559, "step": 11566 }, { "epoch": 1.8996366473015418, "grad_norm": 0.3176473441311553, "learning_rate": 6.238255950261335e-06, "loss": 0.4468, "step": 11567 }, { "epoch": 1.8998008745098844, "grad_norm": 0.3308084452657348, "learning_rate": 6.23782336933228e-06, "loss": 0.4732, "step": 11568 }, { "epoch": 1.899965101718227, "grad_norm": 0.3424847343425013, "learning_rate": 6.237390769528754e-06, "loss": 0.459, "step": 11569 }, { "epoch": 1.90012932892657, "grad_norm": 0.3329594799384342, "learning_rate": 6.236958150855456e-06, "loss": 0.4605, "step": 11570 }, { "epoch": 1.9002935561349128, "grad_norm": 0.5164135512378328, "learning_rate": 6.236525513317083e-06, "loss": 0.4517, "step": 11571 }, { "epoch": 1.9004577833432554, "grad_norm": 0.2962729195900784, "learning_rate": 6.236092856918333e-06, "loss": 0.454, "step": 11572 }, { "epoch": 1.900622010551598, "grad_norm": 0.4457999560545923, "learning_rate": 6.235660181663906e-06, "loss": 0.4499, "step": 11573 }, { "epoch": 1.900786237759941, "grad_norm": 0.29085184015101817, "learning_rate": 6.235227487558504e-06, "loss": 0.4487, "step": 11574 }, { "epoch": 1.9009504649682836, "grad_norm": 0.2955717712662332, "learning_rate": 6.2347947746068245e-06, "loss": 0.4578, "step": 11575 }, { "epoch": 1.9011146921766264, "grad_norm": 0.3152314118744751, "learning_rate": 6.234362042813565e-06, "loss": 0.4325, "step": 11576 }, { "epoch": 1.901278919384969, "grad_norm": 0.26407825448367234, "learning_rate": 6.233929292183427e-06, "loss": 0.4503, "step": 11577 }, { "epoch": 1.9014431465933117, "grad_norm": 0.3915379290000229, "learning_rate": 6.233496522721113e-06, "loss": 0.4504, "step": 11578 }, { "epoch": 1.9016073738016546, "grad_norm": 0.3412610510019888, "learning_rate": 6.233063734431321e-06, "loss": 0.4463, "step": 11579 }, { "epoch": 1.9017716010099974, "grad_norm": 0.32050195639366946, "learning_rate": 6.23263092731875e-06, "loss": 0.4497, "step": 11580 }, { "epoch": 1.90193582821834, "grad_norm": 0.38273878233161573, "learning_rate": 6.232198101388104e-06, "loss": 0.4403, "step": 11581 }, { "epoch": 1.9021000554266827, "grad_norm": 0.27806982974287925, "learning_rate": 6.2317652566440825e-06, "loss": 0.4451, "step": 11582 }, { "epoch": 1.9022642826350256, "grad_norm": 0.32856501937660954, "learning_rate": 6.231332393091385e-06, "loss": 0.4551, "step": 11583 }, { "epoch": 1.9024285098433684, "grad_norm": 0.2682827626213739, "learning_rate": 6.230899510734716e-06, "loss": 0.44, "step": 11584 }, { "epoch": 1.902592737051711, "grad_norm": 0.49272255538438214, "learning_rate": 6.230466609578773e-06, "loss": 0.4414, "step": 11585 }, { "epoch": 1.9027569642600537, "grad_norm": 0.32260483225705006, "learning_rate": 6.230033689628262e-06, "loss": 0.4271, "step": 11586 }, { "epoch": 1.9029211914683966, "grad_norm": 0.29093147267619285, "learning_rate": 6.229600750887883e-06, "loss": 0.4579, "step": 11587 }, { "epoch": 1.9030854186767394, "grad_norm": 0.35265332639510805, "learning_rate": 6.229167793362337e-06, "loss": 0.4413, "step": 11588 }, { "epoch": 1.903249645885082, "grad_norm": 0.3142574353716658, "learning_rate": 6.228734817056328e-06, "loss": 0.4415, "step": 11589 }, { "epoch": 1.9034138730934247, "grad_norm": 0.5622430879448479, "learning_rate": 6.228301821974559e-06, "loss": 0.4357, "step": 11590 }, { "epoch": 1.9035781003017676, "grad_norm": 0.3871498522685965, "learning_rate": 6.227868808121731e-06, "loss": 0.4503, "step": 11591 }, { "epoch": 1.9037423275101102, "grad_norm": 0.3141566277760273, "learning_rate": 6.227435775502547e-06, "loss": 0.4639, "step": 11592 }, { "epoch": 1.903906554718453, "grad_norm": 0.32246353834594804, "learning_rate": 6.227002724121711e-06, "loss": 0.4344, "step": 11593 }, { "epoch": 1.9040707819267957, "grad_norm": 0.29801343541053976, "learning_rate": 6.226569653983929e-06, "loss": 0.4561, "step": 11594 }, { "epoch": 1.9042350091351383, "grad_norm": 0.33855218720060254, "learning_rate": 6.2261365650939e-06, "loss": 0.445, "step": 11595 }, { "epoch": 1.9043992363434812, "grad_norm": 0.3087953002873901, "learning_rate": 6.2257034574563285e-06, "loss": 0.4356, "step": 11596 }, { "epoch": 1.904563463551824, "grad_norm": 0.448227658275254, "learning_rate": 6.225270331075921e-06, "loss": 0.4669, "step": 11597 }, { "epoch": 1.9047276907601667, "grad_norm": 0.325155177303091, "learning_rate": 6.22483718595738e-06, "loss": 0.4615, "step": 11598 }, { "epoch": 1.9048919179685093, "grad_norm": 0.4675952573516096, "learning_rate": 6.2244040221054095e-06, "loss": 0.4613, "step": 11599 }, { "epoch": 1.9050561451768522, "grad_norm": 0.32235353184835497, "learning_rate": 6.223970839524715e-06, "loss": 0.4601, "step": 11600 }, { "epoch": 1.905220372385195, "grad_norm": 0.33233067198397287, "learning_rate": 6.223537638220001e-06, "loss": 0.4715, "step": 11601 }, { "epoch": 1.9053845995935377, "grad_norm": 0.2982512809301434, "learning_rate": 6.223104418195972e-06, "loss": 0.4555, "step": 11602 }, { "epoch": 1.9055488268018803, "grad_norm": 0.3801442216186049, "learning_rate": 6.2226711794573354e-06, "loss": 0.4318, "step": 11603 }, { "epoch": 1.9057130540102232, "grad_norm": 0.2957424808790963, "learning_rate": 6.222237922008795e-06, "loss": 0.4574, "step": 11604 }, { "epoch": 1.905877281218566, "grad_norm": 0.3087230953401648, "learning_rate": 6.221804645855054e-06, "loss": 0.4411, "step": 11605 }, { "epoch": 1.9060415084269087, "grad_norm": 0.3542054566112007, "learning_rate": 6.221371351000822e-06, "loss": 0.4476, "step": 11606 }, { "epoch": 1.9062057356352513, "grad_norm": 0.3553897290376785, "learning_rate": 6.2209380374508035e-06, "loss": 0.4707, "step": 11607 }, { "epoch": 1.906369962843594, "grad_norm": 0.3564058255833194, "learning_rate": 6.220504705209705e-06, "loss": 0.4495, "step": 11608 }, { "epoch": 1.9065341900519368, "grad_norm": 0.3832925455459929, "learning_rate": 6.220071354282232e-06, "loss": 0.4625, "step": 11609 }, { "epoch": 1.9066984172602797, "grad_norm": 0.2817875411638949, "learning_rate": 6.219637984673092e-06, "loss": 0.4453, "step": 11610 }, { "epoch": 1.9068626444686223, "grad_norm": 0.4776547443948289, "learning_rate": 6.219204596386991e-06, "loss": 0.4558, "step": 11611 }, { "epoch": 1.907026871676965, "grad_norm": 0.34962942789150825, "learning_rate": 6.218771189428637e-06, "loss": 0.455, "step": 11612 }, { "epoch": 1.9071910988853078, "grad_norm": 0.30962434569385655, "learning_rate": 6.218337763802738e-06, "loss": 0.4462, "step": 11613 }, { "epoch": 1.9073553260936507, "grad_norm": 0.3391862780256781, "learning_rate": 6.2179043195139985e-06, "loss": 0.4351, "step": 11614 }, { "epoch": 1.9075195533019933, "grad_norm": 0.30719116774597893, "learning_rate": 6.2174708565671296e-06, "loss": 0.4492, "step": 11615 }, { "epoch": 1.907683780510336, "grad_norm": 0.30054149364958066, "learning_rate": 6.217037374966836e-06, "loss": 0.4628, "step": 11616 }, { "epoch": 1.9078480077186788, "grad_norm": 0.386653014036295, "learning_rate": 6.216603874717828e-06, "loss": 0.4479, "step": 11617 }, { "epoch": 1.9080122349270217, "grad_norm": 0.31127433847704317, "learning_rate": 6.216170355824812e-06, "loss": 0.4377, "step": 11618 }, { "epoch": 1.9081764621353643, "grad_norm": 0.3605960243523264, "learning_rate": 6.215736818292499e-06, "loss": 0.459, "step": 11619 }, { "epoch": 1.908340689343707, "grad_norm": 0.33757270101982667, "learning_rate": 6.215303262125595e-06, "loss": 0.4627, "step": 11620 }, { "epoch": 1.9085049165520498, "grad_norm": 0.38173723225560446, "learning_rate": 6.21486968732881e-06, "loss": 0.4569, "step": 11621 }, { "epoch": 1.9086691437603927, "grad_norm": 0.32491942608170243, "learning_rate": 6.214436093906852e-06, "loss": 0.4513, "step": 11622 }, { "epoch": 1.9088333709687353, "grad_norm": 0.38253325439429253, "learning_rate": 6.214002481864434e-06, "loss": 0.429, "step": 11623 }, { "epoch": 1.908997598177078, "grad_norm": 0.5169229832581916, "learning_rate": 6.213568851206261e-06, "loss": 0.4398, "step": 11624 }, { "epoch": 1.9091618253854206, "grad_norm": 0.36052604572634506, "learning_rate": 6.2131352019370446e-06, "loss": 0.4403, "step": 11625 }, { "epoch": 1.9093260525937634, "grad_norm": 0.2924912097276931, "learning_rate": 6.212701534061493e-06, "loss": 0.4611, "step": 11626 }, { "epoch": 1.9094902798021063, "grad_norm": 0.3404899101670224, "learning_rate": 6.212267847584319e-06, "loss": 0.4447, "step": 11627 }, { "epoch": 1.909654507010449, "grad_norm": 0.36381309075662677, "learning_rate": 6.211834142510232e-06, "loss": 0.4577, "step": 11628 }, { "epoch": 1.9098187342187916, "grad_norm": 0.38424637201933903, "learning_rate": 6.211400418843942e-06, "loss": 0.4495, "step": 11629 }, { "epoch": 1.9099829614271344, "grad_norm": 0.9281977678260263, "learning_rate": 6.2109666765901585e-06, "loss": 0.4659, "step": 11630 }, { "epoch": 1.9101471886354773, "grad_norm": 0.35161232363455713, "learning_rate": 6.2105329157535935e-06, "loss": 0.455, "step": 11631 }, { "epoch": 1.91031141584382, "grad_norm": 1.1585271182741959, "learning_rate": 6.21009913633896e-06, "loss": 0.4312, "step": 11632 }, { "epoch": 1.9104756430521626, "grad_norm": 0.3674122396524195, "learning_rate": 6.209665338350967e-06, "loss": 0.4796, "step": 11633 }, { "epoch": 1.9106398702605054, "grad_norm": 0.36718309874128846, "learning_rate": 6.209231521794324e-06, "loss": 0.4304, "step": 11634 }, { "epoch": 1.9108040974688483, "grad_norm": 0.6703568626303426, "learning_rate": 6.208797686673746e-06, "loss": 0.4637, "step": 11635 }, { "epoch": 1.910968324677191, "grad_norm": 0.40632340164699454, "learning_rate": 6.2083638329939455e-06, "loss": 0.46, "step": 11636 }, { "epoch": 1.9111325518855335, "grad_norm": 0.35780222483159585, "learning_rate": 6.207929960759631e-06, "loss": 0.4555, "step": 11637 }, { "epoch": 1.9112967790938764, "grad_norm": 0.34875469521533164, "learning_rate": 6.207496069975519e-06, "loss": 0.4445, "step": 11638 }, { "epoch": 1.9114610063022193, "grad_norm": 0.37116131198879804, "learning_rate": 6.207062160646318e-06, "loss": 0.4627, "step": 11639 }, { "epoch": 1.911625233510562, "grad_norm": 0.3261872497806788, "learning_rate": 6.206628232776743e-06, "loss": 0.436, "step": 11640 }, { "epoch": 1.9117894607189045, "grad_norm": 0.2917346097333719, "learning_rate": 6.206194286371505e-06, "loss": 0.438, "step": 11641 }, { "epoch": 1.9119536879272472, "grad_norm": 0.318715515401316, "learning_rate": 6.205760321435319e-06, "loss": 0.4716, "step": 11642 }, { "epoch": 1.91211791513559, "grad_norm": 0.3822885736630288, "learning_rate": 6.205326337972899e-06, "loss": 0.4612, "step": 11643 }, { "epoch": 1.912282142343933, "grad_norm": 0.39512948614808435, "learning_rate": 6.204892335988956e-06, "loss": 0.4546, "step": 11644 }, { "epoch": 1.9124463695522755, "grad_norm": 0.2906356956083419, "learning_rate": 6.204458315488205e-06, "loss": 0.4323, "step": 11645 }, { "epoch": 1.9126105967606182, "grad_norm": 0.3151679721101704, "learning_rate": 6.204024276475361e-06, "loss": 0.4274, "step": 11646 }, { "epoch": 1.912774823968961, "grad_norm": 0.280920093275785, "learning_rate": 6.203590218955136e-06, "loss": 0.4345, "step": 11647 }, { "epoch": 1.912939051177304, "grad_norm": 0.3177305722530813, "learning_rate": 6.203156142932243e-06, "loss": 0.4561, "step": 11648 }, { "epoch": 1.9131032783856465, "grad_norm": 0.35949250252352477, "learning_rate": 6.202722048411402e-06, "loss": 0.4366, "step": 11649 }, { "epoch": 1.9132675055939892, "grad_norm": 0.37584685311355087, "learning_rate": 6.202287935397321e-06, "loss": 0.4478, "step": 11650 }, { "epoch": 1.913431732802332, "grad_norm": 0.33655101627969175, "learning_rate": 6.20185380389472e-06, "loss": 0.4548, "step": 11651 }, { "epoch": 1.913595960010675, "grad_norm": 0.30964429163933455, "learning_rate": 6.201419653908313e-06, "loss": 0.4538, "step": 11652 }, { "epoch": 1.9137601872190175, "grad_norm": 0.29372286656988195, "learning_rate": 6.200985485442815e-06, "loss": 0.4318, "step": 11653 }, { "epoch": 1.9139244144273602, "grad_norm": 1.6537874331386198, "learning_rate": 6.200551298502939e-06, "loss": 0.4327, "step": 11654 }, { "epoch": 1.914088641635703, "grad_norm": 0.414880176031993, "learning_rate": 6.2001170930934025e-06, "loss": 0.4639, "step": 11655 }, { "epoch": 1.9142528688440459, "grad_norm": 0.381572536194753, "learning_rate": 6.199682869218922e-06, "loss": 0.454, "step": 11656 }, { "epoch": 1.9144170960523885, "grad_norm": 0.3618841751303404, "learning_rate": 6.199248626884215e-06, "loss": 0.4415, "step": 11657 }, { "epoch": 1.9145813232607312, "grad_norm": 0.3091815517830806, "learning_rate": 6.198814366093996e-06, "loss": 0.4635, "step": 11658 }, { "epoch": 1.9147455504690738, "grad_norm": 0.3461588940465848, "learning_rate": 6.198380086852981e-06, "loss": 0.4545, "step": 11659 }, { "epoch": 1.9149097776774167, "grad_norm": 0.32914313950790325, "learning_rate": 6.197945789165885e-06, "loss": 0.4288, "step": 11660 }, { "epoch": 1.9150740048857595, "grad_norm": 0.30501120382983676, "learning_rate": 6.197511473037431e-06, "loss": 0.4331, "step": 11661 }, { "epoch": 1.9152382320941022, "grad_norm": 0.31219879028545594, "learning_rate": 6.19707713847233e-06, "loss": 0.4464, "step": 11662 }, { "epoch": 1.9154024593024448, "grad_norm": 0.31460550974100093, "learning_rate": 6.196642785475302e-06, "loss": 0.4562, "step": 11663 }, { "epoch": 1.9155666865107877, "grad_norm": 0.36270468986808635, "learning_rate": 6.196208414051064e-06, "loss": 0.4501, "step": 11664 }, { "epoch": 1.9157309137191305, "grad_norm": 0.4112853774633795, "learning_rate": 6.195774024204334e-06, "loss": 0.4611, "step": 11665 }, { "epoch": 1.9158951409274732, "grad_norm": 0.33448305601115735, "learning_rate": 6.19533961593983e-06, "loss": 0.4738, "step": 11666 }, { "epoch": 1.9160593681358158, "grad_norm": 0.5320652524792794, "learning_rate": 6.194905189262269e-06, "loss": 0.4803, "step": 11667 }, { "epoch": 1.9162235953441586, "grad_norm": 0.3020273231113173, "learning_rate": 6.19447074417637e-06, "loss": 0.4522, "step": 11668 }, { "epoch": 1.9163878225525015, "grad_norm": 0.3270778515804615, "learning_rate": 6.194036280686851e-06, "loss": 0.4652, "step": 11669 }, { "epoch": 1.9165520497608441, "grad_norm": 0.30776326745206567, "learning_rate": 6.193601798798435e-06, "loss": 0.4489, "step": 11670 }, { "epoch": 1.9167162769691868, "grad_norm": 0.30728786187995366, "learning_rate": 6.193167298515833e-06, "loss": 0.4427, "step": 11671 }, { "epoch": 1.9168805041775296, "grad_norm": 0.3927785447223489, "learning_rate": 6.192732779843771e-06, "loss": 0.4553, "step": 11672 }, { "epoch": 1.9170447313858725, "grad_norm": 0.29282886384148993, "learning_rate": 6.192298242786963e-06, "loss": 0.4499, "step": 11673 }, { "epoch": 1.9172089585942151, "grad_norm": 0.31577968887587804, "learning_rate": 6.191863687350133e-06, "loss": 0.4792, "step": 11674 }, { "epoch": 1.9173731858025578, "grad_norm": 0.3761942499553469, "learning_rate": 6.191429113537998e-06, "loss": 0.4498, "step": 11675 }, { "epoch": 1.9175374130109004, "grad_norm": 0.317601854958605, "learning_rate": 6.190994521355279e-06, "loss": 0.4429, "step": 11676 }, { "epoch": 1.9177016402192433, "grad_norm": 0.3517208242879503, "learning_rate": 6.190559910806696e-06, "loss": 0.4644, "step": 11677 }, { "epoch": 1.9178658674275861, "grad_norm": 0.3241901907564984, "learning_rate": 6.190125281896969e-06, "loss": 0.456, "step": 11678 }, { "epoch": 1.9180300946359288, "grad_norm": 0.3025139517641792, "learning_rate": 6.189690634630818e-06, "loss": 0.4564, "step": 11679 }, { "epoch": 1.9181943218442714, "grad_norm": 0.35524817549149657, "learning_rate": 6.189255969012965e-06, "loss": 0.4399, "step": 11680 }, { "epoch": 1.9183585490526143, "grad_norm": 0.4244953064809638, "learning_rate": 6.18882128504813e-06, "loss": 0.4292, "step": 11681 }, { "epoch": 1.9185227762609571, "grad_norm": 0.30648349992963597, "learning_rate": 6.188386582741034e-06, "loss": 0.4547, "step": 11682 }, { "epoch": 1.9186870034692998, "grad_norm": 0.30163722557002615, "learning_rate": 6.187951862096398e-06, "loss": 0.4584, "step": 11683 }, { "epoch": 1.9188512306776424, "grad_norm": 0.33493401319101296, "learning_rate": 6.187517123118945e-06, "loss": 0.4509, "step": 11684 }, { "epoch": 1.9190154578859853, "grad_norm": 0.313911034591551, "learning_rate": 6.187082365813395e-06, "loss": 0.4539, "step": 11685 }, { "epoch": 1.9191796850943281, "grad_norm": 0.5202964654233119, "learning_rate": 6.186647590184471e-06, "loss": 0.4664, "step": 11686 }, { "epoch": 1.9193439123026708, "grad_norm": 0.2911861487320873, "learning_rate": 6.186212796236896e-06, "loss": 0.4493, "step": 11687 }, { "epoch": 1.9195081395110134, "grad_norm": 0.3894019111871137, "learning_rate": 6.185777983975389e-06, "loss": 0.4719, "step": 11688 }, { "epoch": 1.9196723667193563, "grad_norm": 0.3263915765382676, "learning_rate": 6.185343153404675e-06, "loss": 0.4603, "step": 11689 }, { "epoch": 1.9198365939276991, "grad_norm": 0.2812099109496593, "learning_rate": 6.184908304529477e-06, "loss": 0.4397, "step": 11690 }, { "epoch": 1.9200008211360418, "grad_norm": 0.3316284604588048, "learning_rate": 6.184473437354517e-06, "loss": 0.4634, "step": 11691 }, { "epoch": 1.9201650483443844, "grad_norm": 0.3214264446614346, "learning_rate": 6.184038551884518e-06, "loss": 0.4567, "step": 11692 }, { "epoch": 1.920329275552727, "grad_norm": 0.29348354172249397, "learning_rate": 6.183603648124203e-06, "loss": 0.4354, "step": 11693 }, { "epoch": 1.92049350276107, "grad_norm": 0.42769052741256725, "learning_rate": 6.183168726078295e-06, "loss": 0.4522, "step": 11694 }, { "epoch": 1.9206577299694128, "grad_norm": 0.3172711368471013, "learning_rate": 6.182733785751521e-06, "loss": 0.4598, "step": 11695 }, { "epoch": 1.9208219571777554, "grad_norm": 0.3141973931650255, "learning_rate": 6.182298827148602e-06, "loss": 0.4464, "step": 11696 }, { "epoch": 1.920986184386098, "grad_norm": 0.3561540020725479, "learning_rate": 6.181863850274262e-06, "loss": 0.4455, "step": 11697 }, { "epoch": 1.9211504115944409, "grad_norm": 0.32393565694758736, "learning_rate": 6.181428855133225e-06, "loss": 0.4564, "step": 11698 }, { "epoch": 1.9213146388027837, "grad_norm": 0.298550373846303, "learning_rate": 6.1809938417302176e-06, "loss": 0.4506, "step": 11699 }, { "epoch": 1.9214788660111264, "grad_norm": 0.3943979027923778, "learning_rate": 6.180558810069962e-06, "loss": 0.4397, "step": 11700 }, { "epoch": 1.921643093219469, "grad_norm": 0.33128113794288905, "learning_rate": 6.180123760157187e-06, "loss": 0.4649, "step": 11701 }, { "epoch": 1.9218073204278119, "grad_norm": 0.33788593807856176, "learning_rate": 6.179688691996611e-06, "loss": 0.4505, "step": 11702 }, { "epoch": 1.9219715476361547, "grad_norm": 0.29197053226973274, "learning_rate": 6.179253605592966e-06, "loss": 0.4484, "step": 11703 }, { "epoch": 1.9221357748444974, "grad_norm": 0.4002773092580003, "learning_rate": 6.178818500950975e-06, "loss": 0.4368, "step": 11704 }, { "epoch": 1.92230000205284, "grad_norm": 0.3225017152426909, "learning_rate": 6.178383378075361e-06, "loss": 0.4492, "step": 11705 }, { "epoch": 1.9224642292611829, "grad_norm": 0.3292097446594349, "learning_rate": 6.177948236970854e-06, "loss": 0.4524, "step": 11706 }, { "epoch": 1.9226284564695257, "grad_norm": 0.351995173082209, "learning_rate": 6.177513077642178e-06, "loss": 0.4453, "step": 11707 }, { "epoch": 1.9227926836778684, "grad_norm": 0.31252517151783626, "learning_rate": 6.177077900094058e-06, "loss": 0.453, "step": 11708 }, { "epoch": 1.922956910886211, "grad_norm": 0.2861624043707913, "learning_rate": 6.176642704331224e-06, "loss": 0.4326, "step": 11709 }, { "epoch": 1.9231211380945537, "grad_norm": 0.3842211082952547, "learning_rate": 6.176207490358399e-06, "loss": 0.4414, "step": 11710 }, { "epoch": 1.9232853653028965, "grad_norm": 0.7917893981486481, "learning_rate": 6.175772258180314e-06, "loss": 0.445, "step": 11711 }, { "epoch": 1.9234495925112394, "grad_norm": 0.4790207371880203, "learning_rate": 6.175337007801691e-06, "loss": 0.4307, "step": 11712 }, { "epoch": 1.923613819719582, "grad_norm": 0.40561312865159216, "learning_rate": 6.174901739227259e-06, "loss": 0.4498, "step": 11713 }, { "epoch": 1.9237780469279246, "grad_norm": 0.47192462934888785, "learning_rate": 6.174466452461749e-06, "loss": 0.4539, "step": 11714 }, { "epoch": 1.9239422741362675, "grad_norm": 0.3453248892639385, "learning_rate": 6.174031147509885e-06, "loss": 0.452, "step": 11715 }, { "epoch": 1.9241065013446104, "grad_norm": 0.46993171084266266, "learning_rate": 6.173595824376396e-06, "loss": 0.441, "step": 11716 }, { "epoch": 1.924270728552953, "grad_norm": 0.3132400675291704, "learning_rate": 6.17316048306601e-06, "loss": 0.4474, "step": 11717 }, { "epoch": 1.9244349557612956, "grad_norm": 0.5141692658873547, "learning_rate": 6.172725123583452e-06, "loss": 0.4445, "step": 11718 }, { "epoch": 1.9245991829696385, "grad_norm": 0.3123892509641973, "learning_rate": 6.1722897459334554e-06, "loss": 0.4195, "step": 11719 }, { "epoch": 1.9247634101779814, "grad_norm": 0.32517861042004026, "learning_rate": 6.171854350120748e-06, "loss": 0.4694, "step": 11720 }, { "epoch": 1.924927637386324, "grad_norm": 0.43041962733396594, "learning_rate": 6.171418936150057e-06, "loss": 0.4448, "step": 11721 }, { "epoch": 1.9250918645946666, "grad_norm": 0.32581878191201175, "learning_rate": 6.170983504026111e-06, "loss": 0.4353, "step": 11722 }, { "epoch": 1.9252560918030095, "grad_norm": 0.3221459896375791, "learning_rate": 6.17054805375364e-06, "loss": 0.4501, "step": 11723 }, { "epoch": 1.9254203190113524, "grad_norm": 0.4751198372362801, "learning_rate": 6.170112585337375e-06, "loss": 0.466, "step": 11724 }, { "epoch": 1.925584546219695, "grad_norm": 0.4111261993043956, "learning_rate": 6.169677098782044e-06, "loss": 0.4415, "step": 11725 }, { "epoch": 1.9257487734280376, "grad_norm": 0.4096292396716481, "learning_rate": 6.169241594092376e-06, "loss": 0.4322, "step": 11726 }, { "epoch": 1.9259130006363803, "grad_norm": 0.3758588386747688, "learning_rate": 6.168806071273102e-06, "loss": 0.4531, "step": 11727 }, { "epoch": 1.9260772278447231, "grad_norm": 0.32238900503219686, "learning_rate": 6.168370530328952e-06, "loss": 0.4496, "step": 11728 }, { "epoch": 1.926241455053066, "grad_norm": 0.3738260566944197, "learning_rate": 6.167934971264657e-06, "loss": 0.4533, "step": 11729 }, { "epoch": 1.9264056822614086, "grad_norm": 0.3247033594038588, "learning_rate": 6.167499394084947e-06, "loss": 0.4448, "step": 11730 }, { "epoch": 1.9265699094697513, "grad_norm": 0.3613742112719459, "learning_rate": 6.167063798794553e-06, "loss": 0.452, "step": 11731 }, { "epoch": 1.9267341366780941, "grad_norm": 0.30402605193304383, "learning_rate": 6.166628185398207e-06, "loss": 0.4418, "step": 11732 }, { "epoch": 1.926898363886437, "grad_norm": 0.36077690801634543, "learning_rate": 6.166192553900637e-06, "loss": 0.4634, "step": 11733 }, { "epoch": 1.9270625910947796, "grad_norm": 0.3157741710943167, "learning_rate": 6.165756904306578e-06, "loss": 0.437, "step": 11734 }, { "epoch": 1.9272268183031223, "grad_norm": 0.36396243689784985, "learning_rate": 6.16532123662076e-06, "loss": 0.4715, "step": 11735 }, { "epoch": 1.9273910455114651, "grad_norm": 0.32922576893702915, "learning_rate": 6.164885550847916e-06, "loss": 0.4401, "step": 11736 }, { "epoch": 1.927555272719808, "grad_norm": 0.40557599637326114, "learning_rate": 6.1644498469927755e-06, "loss": 0.4527, "step": 11737 }, { "epoch": 1.9277194999281506, "grad_norm": 0.32617286436421994, "learning_rate": 6.164014125060072e-06, "loss": 0.4699, "step": 11738 }, { "epoch": 1.9278837271364933, "grad_norm": 0.3296499446537119, "learning_rate": 6.163578385054538e-06, "loss": 0.4516, "step": 11739 }, { "epoch": 1.9280479543448361, "grad_norm": 0.5284605915318895, "learning_rate": 6.163142626980906e-06, "loss": 0.4438, "step": 11740 }, { "epoch": 1.928212181553179, "grad_norm": 0.304222483380602, "learning_rate": 6.1627068508439095e-06, "loss": 0.4493, "step": 11741 }, { "epoch": 1.9283764087615216, "grad_norm": 0.37578326123625305, "learning_rate": 6.1622710566482795e-06, "loss": 0.4453, "step": 11742 }, { "epoch": 1.9285406359698642, "grad_norm": 0.3316740779606767, "learning_rate": 6.161835244398751e-06, "loss": 0.4475, "step": 11743 }, { "epoch": 1.9287048631782069, "grad_norm": 0.4724230151433168, "learning_rate": 6.161399414100057e-06, "loss": 0.464, "step": 11744 }, { "epoch": 1.9288690903865497, "grad_norm": 0.33102520210764236, "learning_rate": 6.160963565756932e-06, "loss": 0.4482, "step": 11745 }, { "epoch": 1.9290333175948926, "grad_norm": 0.3039412054856362, "learning_rate": 6.160527699374107e-06, "loss": 0.4579, "step": 11746 }, { "epoch": 1.9291975448032352, "grad_norm": 0.4759493387711221, "learning_rate": 6.160091814956317e-06, "loss": 0.4543, "step": 11747 }, { "epoch": 1.9293617720115779, "grad_norm": 0.30295570419838147, "learning_rate": 6.159655912508297e-06, "loss": 0.4502, "step": 11748 }, { "epoch": 1.9295259992199207, "grad_norm": 0.3095851008676612, "learning_rate": 6.159219992034782e-06, "loss": 0.4476, "step": 11749 }, { "epoch": 1.9296902264282636, "grad_norm": 0.30467728546486345, "learning_rate": 6.158784053540504e-06, "loss": 0.4536, "step": 11750 }, { "epoch": 1.9298544536366062, "grad_norm": 0.3967570517620851, "learning_rate": 6.1583480970301995e-06, "loss": 0.436, "step": 11751 }, { "epoch": 1.9300186808449489, "grad_norm": 0.3002495362111364, "learning_rate": 6.157912122508603e-06, "loss": 0.4264, "step": 11752 }, { "epoch": 1.9301829080532917, "grad_norm": 0.3828269006370637, "learning_rate": 6.157476129980451e-06, "loss": 0.4421, "step": 11753 }, { "epoch": 1.9303471352616346, "grad_norm": 0.30484222608414635, "learning_rate": 6.157040119450475e-06, "loss": 0.4594, "step": 11754 }, { "epoch": 1.9305113624699772, "grad_norm": 0.4559966835142827, "learning_rate": 6.156604090923415e-06, "loss": 0.4618, "step": 11755 }, { "epoch": 1.9306755896783199, "grad_norm": 0.2895503351697011, "learning_rate": 6.1561680444040035e-06, "loss": 0.4558, "step": 11756 }, { "epoch": 1.9308398168866627, "grad_norm": 0.30856588658860873, "learning_rate": 6.1557319798969785e-06, "loss": 0.4452, "step": 11757 }, { "epoch": 1.9310040440950056, "grad_norm": 0.49077377646148856, "learning_rate": 6.155295897407075e-06, "loss": 0.4692, "step": 11758 }, { "epoch": 1.9311682713033482, "grad_norm": 0.2968529348737747, "learning_rate": 6.154859796939029e-06, "loss": 0.4615, "step": 11759 }, { "epoch": 1.9313324985116909, "grad_norm": 0.3808550571539235, "learning_rate": 6.154423678497578e-06, "loss": 0.4438, "step": 11760 }, { "epoch": 1.9314967257200335, "grad_norm": 0.33257583457981604, "learning_rate": 6.153987542087457e-06, "loss": 0.4715, "step": 11761 }, { "epoch": 1.9316609529283764, "grad_norm": 0.3034797701744059, "learning_rate": 6.153551387713406e-06, "loss": 0.4335, "step": 11762 }, { "epoch": 1.9318251801367192, "grad_norm": 0.3366439268492937, "learning_rate": 6.153115215380159e-06, "loss": 0.4536, "step": 11763 }, { "epoch": 1.9319894073450619, "grad_norm": 0.333839910373322, "learning_rate": 6.1526790250924545e-06, "loss": 0.4588, "step": 11764 }, { "epoch": 1.9321536345534045, "grad_norm": 0.29555048190918864, "learning_rate": 6.1522428168550286e-06, "loss": 0.441, "step": 11765 }, { "epoch": 1.9323178617617474, "grad_norm": 0.3160434566532558, "learning_rate": 6.151806590672622e-06, "loss": 0.4259, "step": 11766 }, { "epoch": 1.9324820889700902, "grad_norm": 0.3977836618478647, "learning_rate": 6.151370346549969e-06, "loss": 0.4411, "step": 11767 }, { "epoch": 1.9326463161784329, "grad_norm": 0.2852990076026332, "learning_rate": 6.150934084491809e-06, "loss": 0.4394, "step": 11768 }, { "epoch": 1.9328105433867755, "grad_norm": 0.34967502718124766, "learning_rate": 6.1504978045028825e-06, "loss": 0.4371, "step": 11769 }, { "epoch": 1.9329747705951184, "grad_norm": 0.2949264653113903, "learning_rate": 6.150061506587925e-06, "loss": 0.4506, "step": 11770 }, { "epoch": 1.9331389978034612, "grad_norm": 0.32292941521963847, "learning_rate": 6.149625190751676e-06, "loss": 0.4514, "step": 11771 }, { "epoch": 1.9333032250118038, "grad_norm": 0.3901095676284324, "learning_rate": 6.149188856998874e-06, "loss": 0.4461, "step": 11772 }, { "epoch": 1.9334674522201465, "grad_norm": 0.3148958430118748, "learning_rate": 6.148752505334259e-06, "loss": 0.439, "step": 11773 }, { "epoch": 1.9336316794284893, "grad_norm": 0.4645426254687232, "learning_rate": 6.148316135762571e-06, "loss": 0.4662, "step": 11774 }, { "epoch": 1.9337959066368322, "grad_norm": 0.3381456693550708, "learning_rate": 6.147879748288546e-06, "loss": 0.4397, "step": 11775 }, { "epoch": 1.9339601338451748, "grad_norm": 0.2965471428211383, "learning_rate": 6.1474433429169255e-06, "loss": 0.4574, "step": 11776 }, { "epoch": 1.9341243610535175, "grad_norm": 0.3603201346209414, "learning_rate": 6.14700691965245e-06, "loss": 0.4469, "step": 11777 }, { "epoch": 1.9342885882618601, "grad_norm": 0.32054268239319894, "learning_rate": 6.146570478499859e-06, "loss": 0.437, "step": 11778 }, { "epoch": 1.934452815470203, "grad_norm": 0.48292569009156594, "learning_rate": 6.146134019463895e-06, "loss": 0.4561, "step": 11779 }, { "epoch": 1.9346170426785458, "grad_norm": 0.3414445379177717, "learning_rate": 6.1456975425492925e-06, "loss": 0.4601, "step": 11780 }, { "epoch": 1.9347812698868885, "grad_norm": 0.32659418586761957, "learning_rate": 6.145261047760797e-06, "loss": 0.4548, "step": 11781 }, { "epoch": 1.9349454970952311, "grad_norm": 0.35652097068907895, "learning_rate": 6.144824535103147e-06, "loss": 0.4782, "step": 11782 }, { "epoch": 1.935109724303574, "grad_norm": 0.2759711761110947, "learning_rate": 6.144388004581084e-06, "loss": 0.4528, "step": 11783 }, { "epoch": 1.9352739515119168, "grad_norm": 0.34655152524113225, "learning_rate": 6.143951456199352e-06, "loss": 0.4426, "step": 11784 }, { "epoch": 1.9354381787202595, "grad_norm": 0.30678758926269073, "learning_rate": 6.143514889962687e-06, "loss": 0.4533, "step": 11785 }, { "epoch": 1.935602405928602, "grad_norm": 0.3465024740320573, "learning_rate": 6.143078305875834e-06, "loss": 0.4457, "step": 11786 }, { "epoch": 1.935766633136945, "grad_norm": 0.2944811158711843, "learning_rate": 6.142641703943534e-06, "loss": 0.4469, "step": 11787 }, { "epoch": 1.9359308603452878, "grad_norm": 0.4588778909977551, "learning_rate": 6.142205084170529e-06, "loss": 0.4569, "step": 11788 }, { "epoch": 1.9360950875536305, "grad_norm": 0.38001254582850585, "learning_rate": 6.141768446561563e-06, "loss": 0.4391, "step": 11789 }, { "epoch": 1.936259314761973, "grad_norm": 0.36647792906297894, "learning_rate": 6.141331791121374e-06, "loss": 0.4675, "step": 11790 }, { "epoch": 1.936423541970316, "grad_norm": 0.3147751631851471, "learning_rate": 6.140895117854708e-06, "loss": 0.4451, "step": 11791 }, { "epoch": 1.9365877691786588, "grad_norm": 0.37708723383068626, "learning_rate": 6.140458426766305e-06, "loss": 0.4363, "step": 11792 }, { "epoch": 1.9367519963870015, "grad_norm": 0.314419687793131, "learning_rate": 6.140021717860911e-06, "loss": 0.4344, "step": 11793 }, { "epoch": 1.936916223595344, "grad_norm": 0.2978678788734563, "learning_rate": 6.139584991143268e-06, "loss": 0.4354, "step": 11794 }, { "epoch": 1.9370804508036867, "grad_norm": 0.6290390429035364, "learning_rate": 6.139148246618118e-06, "loss": 0.4435, "step": 11795 }, { "epoch": 1.9372446780120296, "grad_norm": 0.3930535583884199, "learning_rate": 6.138711484290205e-06, "loss": 0.4452, "step": 11796 }, { "epoch": 1.9374089052203725, "grad_norm": 0.4058397974826341, "learning_rate": 6.1382747041642735e-06, "loss": 0.4424, "step": 11797 }, { "epoch": 1.937573132428715, "grad_norm": 0.28754785524275933, "learning_rate": 6.137837906245067e-06, "loss": 0.4626, "step": 11798 }, { "epoch": 1.9377373596370577, "grad_norm": 0.37145994996852144, "learning_rate": 6.13740109053733e-06, "loss": 0.4296, "step": 11799 }, { "epoch": 1.9379015868454006, "grad_norm": 0.31987207942764523, "learning_rate": 6.136964257045804e-06, "loss": 0.4407, "step": 11800 }, { "epoch": 1.9380658140537435, "grad_norm": 0.3120597556496171, "learning_rate": 6.136527405775238e-06, "loss": 0.4154, "step": 11801 }, { "epoch": 1.938230041262086, "grad_norm": 0.32184214275987144, "learning_rate": 6.136090536730372e-06, "loss": 0.4416, "step": 11802 }, { "epoch": 1.9383942684704287, "grad_norm": 0.30422790837511227, "learning_rate": 6.1356536499159555e-06, "loss": 0.4775, "step": 11803 }, { "epoch": 1.9385584956787716, "grad_norm": 0.34677800611122794, "learning_rate": 6.1352167453367305e-06, "loss": 0.4449, "step": 11804 }, { "epoch": 1.9387227228871144, "grad_norm": 0.2992127868849352, "learning_rate": 6.134779822997442e-06, "loss": 0.4461, "step": 11805 }, { "epoch": 1.938886950095457, "grad_norm": 0.3231711502256461, "learning_rate": 6.134342882902836e-06, "loss": 0.4586, "step": 11806 }, { "epoch": 1.9390511773037997, "grad_norm": 0.3037909014573711, "learning_rate": 6.133905925057659e-06, "loss": 0.4529, "step": 11807 }, { "epoch": 1.9392154045121426, "grad_norm": 0.3337539556315541, "learning_rate": 6.1334689494666564e-06, "loss": 0.4604, "step": 11808 }, { "epoch": 1.9393796317204854, "grad_norm": 0.33305602986601746, "learning_rate": 6.133031956134573e-06, "loss": 0.4361, "step": 11809 }, { "epoch": 1.939543858928828, "grad_norm": 0.8091816550177268, "learning_rate": 6.132594945066157e-06, "loss": 0.4646, "step": 11810 }, { "epoch": 1.9397080861371707, "grad_norm": 0.32246244156970166, "learning_rate": 6.132157916266152e-06, "loss": 0.4639, "step": 11811 }, { "epoch": 1.9398723133455134, "grad_norm": 0.44159454100120704, "learning_rate": 6.131720869739307e-06, "loss": 0.4508, "step": 11812 }, { "epoch": 1.9400365405538562, "grad_norm": 0.2880429219605012, "learning_rate": 6.131283805490368e-06, "loss": 0.4475, "step": 11813 }, { "epoch": 1.940200767762199, "grad_norm": 0.2735895117494609, "learning_rate": 6.130846723524082e-06, "loss": 0.4595, "step": 11814 }, { "epoch": 1.9403649949705417, "grad_norm": 0.3123242230326185, "learning_rate": 6.130409623845196e-06, "loss": 0.4765, "step": 11815 }, { "epoch": 1.9405292221788843, "grad_norm": 0.29578411751014344, "learning_rate": 6.129972506458458e-06, "loss": 0.4576, "step": 11816 }, { "epoch": 1.9406934493872272, "grad_norm": 0.4095465840591172, "learning_rate": 6.129535371368614e-06, "loss": 0.4277, "step": 11817 }, { "epoch": 1.94085767659557, "grad_norm": 0.27967699795557444, "learning_rate": 6.129098218580414e-06, "loss": 0.44, "step": 11818 }, { "epoch": 1.9410219038039127, "grad_norm": 0.3700003102868501, "learning_rate": 6.128661048098602e-06, "loss": 0.4289, "step": 11819 }, { "epoch": 1.9411861310122553, "grad_norm": 0.4692973743249203, "learning_rate": 6.128223859927931e-06, "loss": 0.4652, "step": 11820 }, { "epoch": 1.9413503582205982, "grad_norm": 0.3164140915929167, "learning_rate": 6.1277866540731465e-06, "loss": 0.4383, "step": 11821 }, { "epoch": 1.941514585428941, "grad_norm": 0.4703336243325058, "learning_rate": 6.1273494305389956e-06, "loss": 0.4446, "step": 11822 }, { "epoch": 1.9416788126372837, "grad_norm": 0.35750823869899867, "learning_rate": 6.126912189330231e-06, "loss": 0.4488, "step": 11823 }, { "epoch": 1.9418430398456263, "grad_norm": 0.3118945660735331, "learning_rate": 6.126474930451599e-06, "loss": 0.4787, "step": 11824 }, { "epoch": 1.9420072670539692, "grad_norm": 0.2687496340376281, "learning_rate": 6.126037653907848e-06, "loss": 0.4405, "step": 11825 }, { "epoch": 1.942171494262312, "grad_norm": 0.591042238910758, "learning_rate": 6.125600359703728e-06, "loss": 0.4355, "step": 11826 }, { "epoch": 1.9423357214706547, "grad_norm": 0.32423012638815046, "learning_rate": 6.125163047843991e-06, "loss": 0.4647, "step": 11827 }, { "epoch": 1.9424999486789973, "grad_norm": 0.448756546678214, "learning_rate": 6.124725718333383e-06, "loss": 0.4245, "step": 11828 }, { "epoch": 1.94266417588734, "grad_norm": 0.30948247325201406, "learning_rate": 6.124288371176655e-06, "loss": 0.4461, "step": 11829 }, { "epoch": 1.9428284030956828, "grad_norm": 0.2882913994343183, "learning_rate": 6.123851006378556e-06, "loss": 0.4506, "step": 11830 }, { "epoch": 1.9429926303040257, "grad_norm": 0.3698476194070314, "learning_rate": 6.123413623943839e-06, "loss": 0.4417, "step": 11831 }, { "epoch": 1.9431568575123683, "grad_norm": 0.39572904633456146, "learning_rate": 6.122976223877253e-06, "loss": 0.4455, "step": 11832 }, { "epoch": 1.943321084720711, "grad_norm": 0.31198435687290366, "learning_rate": 6.122538806183548e-06, "loss": 0.4525, "step": 11833 }, { "epoch": 1.9434853119290538, "grad_norm": 0.3316692403626222, "learning_rate": 6.122101370867475e-06, "loss": 0.462, "step": 11834 }, { "epoch": 1.9436495391373967, "grad_norm": 0.33372374349452755, "learning_rate": 6.121663917933784e-06, "loss": 0.4522, "step": 11835 }, { "epoch": 1.9438137663457393, "grad_norm": 0.3229007237330423, "learning_rate": 6.121226447387229e-06, "loss": 0.4402, "step": 11836 }, { "epoch": 1.943977993554082, "grad_norm": 0.34876032942863944, "learning_rate": 6.12078895923256e-06, "loss": 0.4379, "step": 11837 }, { "epoch": 1.9441422207624248, "grad_norm": 0.3025199224775647, "learning_rate": 6.120351453474528e-06, "loss": 0.4424, "step": 11838 }, { "epoch": 1.9443064479707677, "grad_norm": 0.3042076390572115, "learning_rate": 6.119913930117884e-06, "loss": 0.422, "step": 11839 }, { "epoch": 1.9444706751791103, "grad_norm": 0.3464106773619736, "learning_rate": 6.119476389167382e-06, "loss": 0.4286, "step": 11840 }, { "epoch": 1.944634902387453, "grad_norm": 0.31756238332376324, "learning_rate": 6.119038830627772e-06, "loss": 0.4447, "step": 11841 }, { "epoch": 1.9447991295957958, "grad_norm": 0.3361628110899518, "learning_rate": 6.118601254503809e-06, "loss": 0.4364, "step": 11842 }, { "epoch": 1.9449633568041387, "grad_norm": 0.4516933395464814, "learning_rate": 6.118163660800243e-06, "loss": 0.4539, "step": 11843 }, { "epoch": 1.9451275840124813, "grad_norm": 0.3930468527746109, "learning_rate": 6.117726049521826e-06, "loss": 0.47, "step": 11844 }, { "epoch": 1.945291811220824, "grad_norm": 0.35826140019934466, "learning_rate": 6.117288420673315e-06, "loss": 0.4543, "step": 11845 }, { "epoch": 1.9454560384291666, "grad_norm": 0.32262749277511915, "learning_rate": 6.116850774259458e-06, "loss": 0.4575, "step": 11846 }, { "epoch": 1.9456202656375094, "grad_norm": 0.42017179470753757, "learning_rate": 6.116413110285014e-06, "loss": 0.4259, "step": 11847 }, { "epoch": 1.9457844928458523, "grad_norm": 0.3091060953354457, "learning_rate": 6.115975428754731e-06, "loss": 0.4354, "step": 11848 }, { "epoch": 1.945948720054195, "grad_norm": 0.449447054482981, "learning_rate": 6.115537729673366e-06, "loss": 0.4313, "step": 11849 }, { "epoch": 1.9461129472625376, "grad_norm": 0.3242192526893855, "learning_rate": 6.115100013045671e-06, "loss": 0.4504, "step": 11850 }, { "epoch": 1.9462771744708804, "grad_norm": 0.33839339236288085, "learning_rate": 6.1146622788764e-06, "loss": 0.4586, "step": 11851 }, { "epoch": 1.9464414016792233, "grad_norm": 0.41077135290200856, "learning_rate": 6.11422452717031e-06, "loss": 0.45, "step": 11852 }, { "epoch": 1.946605628887566, "grad_norm": 0.46154500843898666, "learning_rate": 6.113786757932154e-06, "loss": 0.4687, "step": 11853 }, { "epoch": 1.9467698560959086, "grad_norm": 0.38772725882298875, "learning_rate": 6.113348971166684e-06, "loss": 0.4565, "step": 11854 }, { "epoch": 1.9469340833042514, "grad_norm": 0.34951381136472154, "learning_rate": 6.1129111668786565e-06, "loss": 0.433, "step": 11855 }, { "epoch": 1.9470983105125943, "grad_norm": 0.3058343506675485, "learning_rate": 6.112473345072829e-06, "loss": 0.4443, "step": 11856 }, { "epoch": 1.947262537720937, "grad_norm": 0.458504542424256, "learning_rate": 6.112035505753952e-06, "loss": 0.4228, "step": 11857 }, { "epoch": 1.9474267649292796, "grad_norm": 0.3648480272272111, "learning_rate": 6.111597648926786e-06, "loss": 0.4657, "step": 11858 }, { "epoch": 1.9475909921376224, "grad_norm": 0.3315564115633353, "learning_rate": 6.1111597745960825e-06, "loss": 0.4344, "step": 11859 }, { "epoch": 1.9477552193459653, "grad_norm": 0.3203259202134563, "learning_rate": 6.110721882766598e-06, "loss": 0.4507, "step": 11860 }, { "epoch": 1.947919446554308, "grad_norm": 0.32526518490626355, "learning_rate": 6.1102839734430905e-06, "loss": 0.4483, "step": 11861 }, { "epoch": 1.9480836737626506, "grad_norm": 0.30568500186568304, "learning_rate": 6.109846046630315e-06, "loss": 0.4251, "step": 11862 }, { "epoch": 1.9482479009709932, "grad_norm": 0.34335284427598955, "learning_rate": 6.1094081023330265e-06, "loss": 0.442, "step": 11863 }, { "epoch": 1.948412128179336, "grad_norm": 0.3000840673867567, "learning_rate": 6.108970140555982e-06, "loss": 0.4462, "step": 11864 }, { "epoch": 1.948576355387679, "grad_norm": 0.3536142230185942, "learning_rate": 6.10853216130394e-06, "loss": 0.4583, "step": 11865 }, { "epoch": 1.9487405825960216, "grad_norm": 0.31933514094763754, "learning_rate": 6.108094164581656e-06, "loss": 0.4355, "step": 11866 }, { "epoch": 1.9489048098043642, "grad_norm": 0.35391874407088647, "learning_rate": 6.107656150393888e-06, "loss": 0.4603, "step": 11867 }, { "epoch": 1.949069037012707, "grad_norm": 0.37510286288830225, "learning_rate": 6.10721811874539e-06, "loss": 0.4357, "step": 11868 }, { "epoch": 1.94923326422105, "grad_norm": 0.3732690817187727, "learning_rate": 6.106780069640924e-06, "loss": 0.4551, "step": 11869 }, { "epoch": 1.9493974914293926, "grad_norm": 0.3714339876825457, "learning_rate": 6.106342003085246e-06, "loss": 0.4611, "step": 11870 }, { "epoch": 1.9495617186377352, "grad_norm": 0.2837021911002435, "learning_rate": 6.1059039190831115e-06, "loss": 0.4401, "step": 11871 }, { "epoch": 1.949725945846078, "grad_norm": 0.4639579970164344, "learning_rate": 6.105465817639281e-06, "loss": 0.4399, "step": 11872 }, { "epoch": 1.949890173054421, "grad_norm": 0.3320134698090167, "learning_rate": 6.105027698758512e-06, "loss": 0.4232, "step": 11873 }, { "epoch": 1.9500544002627636, "grad_norm": 0.28217258687959057, "learning_rate": 6.104589562445565e-06, "loss": 0.4613, "step": 11874 }, { "epoch": 1.9502186274711062, "grad_norm": 0.3642163892311067, "learning_rate": 6.104151408705195e-06, "loss": 0.4611, "step": 11875 }, { "epoch": 1.950382854679449, "grad_norm": 0.7508595149691321, "learning_rate": 6.103713237542163e-06, "loss": 0.4404, "step": 11876 }, { "epoch": 1.950547081887792, "grad_norm": 0.33888086777812015, "learning_rate": 6.103275048961227e-06, "loss": 0.4326, "step": 11877 }, { "epoch": 1.9507113090961345, "grad_norm": 0.29969210211511377, "learning_rate": 6.102836842967146e-06, "loss": 0.4595, "step": 11878 }, { "epoch": 1.9508755363044772, "grad_norm": 0.3898798633549321, "learning_rate": 6.102398619564681e-06, "loss": 0.4445, "step": 11879 }, { "epoch": 1.9510397635128198, "grad_norm": 0.33948844287237157, "learning_rate": 6.101960378758589e-06, "loss": 0.4441, "step": 11880 }, { "epoch": 1.9512039907211627, "grad_norm": 0.3009932679535096, "learning_rate": 6.101522120553633e-06, "loss": 0.4689, "step": 11881 }, { "epoch": 1.9513682179295055, "grad_norm": 0.40499756454781155, "learning_rate": 6.10108384495457e-06, "loss": 0.443, "step": 11882 }, { "epoch": 1.9515324451378482, "grad_norm": 0.4204322798597718, "learning_rate": 6.100645551966162e-06, "loss": 0.4286, "step": 11883 }, { "epoch": 1.9516966723461908, "grad_norm": 0.31539927216386543, "learning_rate": 6.100207241593167e-06, "loss": 0.4402, "step": 11884 }, { "epoch": 1.9518608995545337, "grad_norm": 0.295725731524103, "learning_rate": 6.099768913840348e-06, "loss": 0.4323, "step": 11885 }, { "epoch": 1.9520251267628765, "grad_norm": 0.5328706680390949, "learning_rate": 6.099330568712465e-06, "loss": 0.4273, "step": 11886 }, { "epoch": 1.9521893539712192, "grad_norm": 0.42119041668735635, "learning_rate": 6.098892206214278e-06, "loss": 0.447, "step": 11887 }, { "epoch": 1.9523535811795618, "grad_norm": 0.3552233220762019, "learning_rate": 6.098453826350549e-06, "loss": 0.4624, "step": 11888 }, { "epoch": 1.9525178083879047, "grad_norm": 0.35988767125876403, "learning_rate": 6.0980154291260375e-06, "loss": 0.4427, "step": 11889 }, { "epoch": 1.9526820355962475, "grad_norm": 0.3725865342348446, "learning_rate": 6.097577014545507e-06, "loss": 0.4391, "step": 11890 }, { "epoch": 1.9528462628045902, "grad_norm": 0.31357186920814756, "learning_rate": 6.0971385826137194e-06, "loss": 0.4449, "step": 11891 }, { "epoch": 1.9530104900129328, "grad_norm": 0.38542678167037026, "learning_rate": 6.0967001333354335e-06, "loss": 0.4367, "step": 11892 }, { "epoch": 1.9531747172212757, "grad_norm": 0.37312227942542797, "learning_rate": 6.096261666715413e-06, "loss": 0.4501, "step": 11893 }, { "epoch": 1.9533389444296185, "grad_norm": 0.34996403105268287, "learning_rate": 6.095823182758422e-06, "loss": 0.4327, "step": 11894 }, { "epoch": 1.9535031716379612, "grad_norm": 0.4262377392819523, "learning_rate": 6.0953846814692214e-06, "loss": 0.4317, "step": 11895 }, { "epoch": 1.9536673988463038, "grad_norm": 0.3406779984733831, "learning_rate": 6.094946162852573e-06, "loss": 0.445, "step": 11896 }, { "epoch": 1.9538316260546464, "grad_norm": 0.32443708298506085, "learning_rate": 6.09450762691324e-06, "loss": 0.4404, "step": 11897 }, { "epoch": 1.9539958532629893, "grad_norm": 0.3150022109795289, "learning_rate": 6.094069073655984e-06, "loss": 0.4403, "step": 11898 }, { "epoch": 1.9541600804713322, "grad_norm": 0.3574477988232649, "learning_rate": 6.093630503085571e-06, "loss": 0.4603, "step": 11899 }, { "epoch": 1.9543243076796748, "grad_norm": 0.2974068481729623, "learning_rate": 6.093191915206762e-06, "loss": 0.4318, "step": 11900 }, { "epoch": 1.9544885348880174, "grad_norm": 0.3768435152698072, "learning_rate": 6.092753310024322e-06, "loss": 0.4547, "step": 11901 }, { "epoch": 1.9546527620963603, "grad_norm": 0.38455005690972244, "learning_rate": 6.092314687543014e-06, "loss": 0.4469, "step": 11902 }, { "epoch": 1.9548169893047032, "grad_norm": 0.31822901379518637, "learning_rate": 6.091876047767601e-06, "loss": 0.4425, "step": 11903 }, { "epoch": 1.9549812165130458, "grad_norm": 0.28402660304659677, "learning_rate": 6.091437390702849e-06, "loss": 0.4461, "step": 11904 }, { "epoch": 1.9551454437213884, "grad_norm": 0.3521752474751283, "learning_rate": 6.090998716353522e-06, "loss": 0.4318, "step": 11905 }, { "epoch": 1.9553096709297313, "grad_norm": 0.29060245833153847, "learning_rate": 6.090560024724381e-06, "loss": 0.4418, "step": 11906 }, { "epoch": 1.9554738981380742, "grad_norm": 0.3912999714229161, "learning_rate": 6.0901213158201946e-06, "loss": 0.4295, "step": 11907 }, { "epoch": 1.9556381253464168, "grad_norm": 0.3728285545096288, "learning_rate": 6.089682589645727e-06, "loss": 0.4326, "step": 11908 }, { "epoch": 1.9558023525547594, "grad_norm": 0.3202203110579774, "learning_rate": 6.08924384620574e-06, "loss": 0.4463, "step": 11909 }, { "epoch": 1.9559665797631023, "grad_norm": 0.536561797356937, "learning_rate": 6.088805085505004e-06, "loss": 0.4376, "step": 11910 }, { "epoch": 1.9561308069714451, "grad_norm": 0.4067293800969105, "learning_rate": 6.08836630754828e-06, "loss": 0.4476, "step": 11911 }, { "epoch": 1.9562950341797878, "grad_norm": 0.3378217956415752, "learning_rate": 6.087927512340336e-06, "loss": 0.4475, "step": 11912 }, { "epoch": 1.9564592613881304, "grad_norm": 0.29079037548101955, "learning_rate": 6.087488699885936e-06, "loss": 0.4429, "step": 11913 }, { "epoch": 1.956623488596473, "grad_norm": 0.37519263681592546, "learning_rate": 6.0870498701898465e-06, "loss": 0.4377, "step": 11914 }, { "epoch": 1.956787715804816, "grad_norm": 0.35455480132931083, "learning_rate": 6.086611023256836e-06, "loss": 0.4667, "step": 11915 }, { "epoch": 1.9569519430131588, "grad_norm": 0.4006295729770789, "learning_rate": 6.086172159091667e-06, "loss": 0.4507, "step": 11916 }, { "epoch": 1.9571161702215014, "grad_norm": 0.5609335201389217, "learning_rate": 6.085733277699109e-06, "loss": 0.4528, "step": 11917 }, { "epoch": 1.957280397429844, "grad_norm": 0.3352582817058797, "learning_rate": 6.085294379083927e-06, "loss": 0.4575, "step": 11918 }, { "epoch": 1.957444624638187, "grad_norm": 0.34092400470920575, "learning_rate": 6.084855463250887e-06, "loss": 0.4442, "step": 11919 }, { "epoch": 1.9576088518465298, "grad_norm": 0.4316159655034461, "learning_rate": 6.08441653020476e-06, "loss": 0.448, "step": 11920 }, { "epoch": 1.9577730790548724, "grad_norm": 0.28558420396013084, "learning_rate": 6.083977579950309e-06, "loss": 0.4419, "step": 11921 }, { "epoch": 1.957937306263215, "grad_norm": 0.40953524784616846, "learning_rate": 6.083538612492302e-06, "loss": 0.4469, "step": 11922 }, { "epoch": 1.958101533471558, "grad_norm": 0.29969065319404764, "learning_rate": 6.083099627835508e-06, "loss": 0.4432, "step": 11923 }, { "epoch": 1.9582657606799008, "grad_norm": 0.46090963146047326, "learning_rate": 6.082660625984697e-06, "loss": 0.4466, "step": 11924 }, { "epoch": 1.9584299878882434, "grad_norm": 0.29142333619321953, "learning_rate": 6.082221606944633e-06, "loss": 0.4449, "step": 11925 }, { "epoch": 1.958594215096586, "grad_norm": 0.38445693912408685, "learning_rate": 6.081782570720085e-06, "loss": 0.4441, "step": 11926 }, { "epoch": 1.958758442304929, "grad_norm": 0.2926899702163581, "learning_rate": 6.081343517315823e-06, "loss": 0.4741, "step": 11927 }, { "epoch": 1.9589226695132718, "grad_norm": 0.3185654549866373, "learning_rate": 6.080904446736613e-06, "loss": 0.4683, "step": 11928 }, { "epoch": 1.9590868967216144, "grad_norm": 0.3257148447442194, "learning_rate": 6.080465358987227e-06, "loss": 0.4369, "step": 11929 }, { "epoch": 1.959251123929957, "grad_norm": 0.34144365741472404, "learning_rate": 6.0800262540724314e-06, "loss": 0.4504, "step": 11930 }, { "epoch": 1.9594153511382997, "grad_norm": 0.40757690940344427, "learning_rate": 6.079587131996997e-06, "loss": 0.4606, "step": 11931 }, { "epoch": 1.9595795783466425, "grad_norm": 0.30314743296026664, "learning_rate": 6.079147992765691e-06, "loss": 0.4745, "step": 11932 }, { "epoch": 1.9597438055549854, "grad_norm": 0.3476291582985087, "learning_rate": 6.078708836383285e-06, "loss": 0.4584, "step": 11933 }, { "epoch": 1.959908032763328, "grad_norm": 0.38181630953661133, "learning_rate": 6.078269662854546e-06, "loss": 0.4507, "step": 11934 }, { "epoch": 1.9600722599716707, "grad_norm": 0.3039837980681577, "learning_rate": 6.077830472184249e-06, "loss": 0.4496, "step": 11935 }, { "epoch": 1.9602364871800135, "grad_norm": 0.3243794082669827, "learning_rate": 6.0773912643771585e-06, "loss": 0.4291, "step": 11936 }, { "epoch": 1.9604007143883564, "grad_norm": 0.4193973744888972, "learning_rate": 6.076952039438048e-06, "loss": 0.4761, "step": 11937 }, { "epoch": 1.960564941596699, "grad_norm": 0.2894100680354276, "learning_rate": 6.076512797371685e-06, "loss": 0.4309, "step": 11938 }, { "epoch": 1.9607291688050417, "grad_norm": 0.47924954345632687, "learning_rate": 6.0760735381828444e-06, "loss": 0.439, "step": 11939 }, { "epoch": 1.9608933960133845, "grad_norm": 0.3215352880588443, "learning_rate": 6.075634261876292e-06, "loss": 0.4399, "step": 11940 }, { "epoch": 1.9610576232217274, "grad_norm": 0.3105394115840711, "learning_rate": 6.0751949684568034e-06, "loss": 0.4423, "step": 11941 }, { "epoch": 1.96122185043007, "grad_norm": 0.3256268730700029, "learning_rate": 6.074755657929146e-06, "loss": 0.4395, "step": 11942 }, { "epoch": 1.9613860776384127, "grad_norm": 0.4212632534343356, "learning_rate": 6.074316330298094e-06, "loss": 0.4591, "step": 11943 }, { "epoch": 1.9615503048467555, "grad_norm": 0.33236517435930046, "learning_rate": 6.073876985568417e-06, "loss": 0.4351, "step": 11944 }, { "epoch": 1.9617145320550984, "grad_norm": 0.3231407977837, "learning_rate": 6.073437623744888e-06, "loss": 0.4242, "step": 11945 }, { "epoch": 1.961878759263441, "grad_norm": 0.30963714146020055, "learning_rate": 6.072998244832279e-06, "loss": 0.4376, "step": 11946 }, { "epoch": 1.9620429864717837, "grad_norm": 0.29128255878835513, "learning_rate": 6.072558848835359e-06, "loss": 0.4531, "step": 11947 }, { "epoch": 1.9622072136801263, "grad_norm": 0.2906945038194113, "learning_rate": 6.0721194357589036e-06, "loss": 0.4589, "step": 11948 }, { "epoch": 1.9623714408884692, "grad_norm": 0.28236743251721147, "learning_rate": 6.071680005607686e-06, "loss": 0.4651, "step": 11949 }, { "epoch": 1.962535668096812, "grad_norm": 0.3685667178520881, "learning_rate": 6.071240558386477e-06, "loss": 0.4517, "step": 11950 }, { "epoch": 1.9626998953051547, "grad_norm": 0.2718554589683119, "learning_rate": 6.0708010941000485e-06, "loss": 0.4562, "step": 11951 }, { "epoch": 1.9628641225134973, "grad_norm": 0.3077384180029517, "learning_rate": 6.070361612753175e-06, "loss": 0.4375, "step": 11952 }, { "epoch": 1.9630283497218401, "grad_norm": 0.32225790357731776, "learning_rate": 6.069922114350629e-06, "loss": 0.4628, "step": 11953 }, { "epoch": 1.963192576930183, "grad_norm": 0.4871176270330463, "learning_rate": 6.069482598897186e-06, "loss": 0.4521, "step": 11954 }, { "epoch": 1.9633568041385256, "grad_norm": 0.4126252515018033, "learning_rate": 6.069043066397615e-06, "loss": 0.4402, "step": 11955 }, { "epoch": 1.9635210313468683, "grad_norm": 0.3037151550263849, "learning_rate": 6.0686035168566945e-06, "loss": 0.4552, "step": 11956 }, { "epoch": 1.9636852585552111, "grad_norm": 0.3326743916440129, "learning_rate": 6.068163950279195e-06, "loss": 0.437, "step": 11957 }, { "epoch": 1.963849485763554, "grad_norm": 0.3131914650921411, "learning_rate": 6.067724366669895e-06, "loss": 0.4454, "step": 11958 }, { "epoch": 1.9640137129718966, "grad_norm": 0.33370325669191686, "learning_rate": 6.067284766033564e-06, "loss": 0.4556, "step": 11959 }, { "epoch": 1.9641779401802393, "grad_norm": 0.2634975212310298, "learning_rate": 6.066845148374978e-06, "loss": 0.4439, "step": 11960 }, { "epoch": 1.9643421673885821, "grad_norm": 0.297116985250537, "learning_rate": 6.066405513698912e-06, "loss": 0.4372, "step": 11961 }, { "epoch": 1.964506394596925, "grad_norm": 0.3084785855406754, "learning_rate": 6.0659658620101424e-06, "loss": 0.4395, "step": 11962 }, { "epoch": 1.9646706218052676, "grad_norm": 0.30382296858647195, "learning_rate": 6.065526193313442e-06, "loss": 0.4436, "step": 11963 }, { "epoch": 1.9648348490136103, "grad_norm": 0.47659789553897186, "learning_rate": 6.065086507613587e-06, "loss": 0.4531, "step": 11964 }, { "epoch": 1.964999076221953, "grad_norm": 0.32839380600621654, "learning_rate": 6.064646804915353e-06, "loss": 0.4427, "step": 11965 }, { "epoch": 1.9651633034302958, "grad_norm": 0.28526974160350493, "learning_rate": 6.0642070852235156e-06, "loss": 0.4546, "step": 11966 }, { "epoch": 1.9653275306386386, "grad_norm": 0.2921253976160405, "learning_rate": 6.063767348542849e-06, "loss": 0.4477, "step": 11967 }, { "epoch": 1.9654917578469813, "grad_norm": 0.32245870607633376, "learning_rate": 6.06332759487813e-06, "loss": 0.4463, "step": 11968 }, { "epoch": 1.965655985055324, "grad_norm": 0.265974759218518, "learning_rate": 6.062887824234138e-06, "loss": 0.4475, "step": 11969 }, { "epoch": 1.9658202122636668, "grad_norm": 0.3420749303053669, "learning_rate": 6.0624480366156455e-06, "loss": 0.4371, "step": 11970 }, { "epoch": 1.9659844394720096, "grad_norm": 0.5955902754876532, "learning_rate": 6.062008232027429e-06, "loss": 0.4318, "step": 11971 }, { "epoch": 1.9661486666803523, "grad_norm": 0.2654119467227501, "learning_rate": 6.061568410474266e-06, "loss": 0.4618, "step": 11972 }, { "epoch": 1.966312893888695, "grad_norm": 0.3256763544307515, "learning_rate": 6.061128571960935e-06, "loss": 0.4478, "step": 11973 }, { "epoch": 1.9664771210970378, "grad_norm": 0.38804030892894026, "learning_rate": 6.06068871649221e-06, "loss": 0.4599, "step": 11974 }, { "epoch": 1.9666413483053806, "grad_norm": 0.3603884815671852, "learning_rate": 6.060248844072872e-06, "loss": 0.4407, "step": 11975 }, { "epoch": 1.9668055755137233, "grad_norm": 0.29521900289850656, "learning_rate": 6.059808954707696e-06, "loss": 0.4553, "step": 11976 }, { "epoch": 1.966969802722066, "grad_norm": 0.31907625861012234, "learning_rate": 6.059369048401459e-06, "loss": 0.4485, "step": 11977 }, { "epoch": 1.9671340299304088, "grad_norm": 0.2827112303883396, "learning_rate": 6.058929125158942e-06, "loss": 0.4542, "step": 11978 }, { "epoch": 1.9672982571387516, "grad_norm": 0.4095160662478101, "learning_rate": 6.05848918498492e-06, "loss": 0.4447, "step": 11979 }, { "epoch": 1.9674624843470943, "grad_norm": 0.30726510403041185, "learning_rate": 6.058049227884171e-06, "loss": 0.4373, "step": 11980 }, { "epoch": 1.967626711555437, "grad_norm": 0.3706977126954439, "learning_rate": 6.057609253861475e-06, "loss": 0.4433, "step": 11981 }, { "epoch": 1.9677909387637795, "grad_norm": 0.27819231084749946, "learning_rate": 6.057169262921609e-06, "loss": 0.4415, "step": 11982 }, { "epoch": 1.9679551659721224, "grad_norm": 0.2774976321074535, "learning_rate": 6.056729255069356e-06, "loss": 0.4415, "step": 11983 }, { "epoch": 1.9681193931804652, "grad_norm": 0.3394973626327754, "learning_rate": 6.05628923030949e-06, "loss": 0.4256, "step": 11984 }, { "epoch": 1.9682836203888079, "grad_norm": 0.270213655988594, "learning_rate": 6.055849188646791e-06, "loss": 0.4301, "step": 11985 }, { "epoch": 1.9684478475971505, "grad_norm": 0.44524049804233634, "learning_rate": 6.055409130086039e-06, "loss": 0.4662, "step": 11986 }, { "epoch": 1.9686120748054934, "grad_norm": 0.32487464715932335, "learning_rate": 6.054969054632015e-06, "loss": 0.4474, "step": 11987 }, { "epoch": 1.9687763020138362, "grad_norm": 0.36859889874615925, "learning_rate": 6.0545289622894956e-06, "loss": 0.4406, "step": 11988 }, { "epoch": 1.9689405292221789, "grad_norm": 0.29552264416651164, "learning_rate": 6.054088853063263e-06, "loss": 0.4396, "step": 11989 }, { "epoch": 1.9691047564305215, "grad_norm": 0.2740422492527266, "learning_rate": 6.053648726958096e-06, "loss": 0.4517, "step": 11990 }, { "epoch": 1.9692689836388644, "grad_norm": 0.32702865970003103, "learning_rate": 6.053208583978776e-06, "loss": 0.4542, "step": 11991 }, { "epoch": 1.9694332108472072, "grad_norm": 0.30746129718076604, "learning_rate": 6.052768424130081e-06, "loss": 0.436, "step": 11992 }, { "epoch": 1.9695974380555499, "grad_norm": 0.3372313936554249, "learning_rate": 6.052328247416795e-06, "loss": 0.4574, "step": 11993 }, { "epoch": 1.9697616652638925, "grad_norm": 0.31184738193789135, "learning_rate": 6.051888053843697e-06, "loss": 0.4519, "step": 11994 }, { "epoch": 1.9699258924722354, "grad_norm": 0.2956821322627038, "learning_rate": 6.051447843415567e-06, "loss": 0.4409, "step": 11995 }, { "epoch": 1.9700901196805782, "grad_norm": 0.40843040125952373, "learning_rate": 6.051007616137187e-06, "loss": 0.4558, "step": 11996 }, { "epoch": 1.9702543468889209, "grad_norm": 0.32893246578214913, "learning_rate": 6.050567372013338e-06, "loss": 0.4265, "step": 11997 }, { "epoch": 1.9704185740972635, "grad_norm": 0.26744505739084257, "learning_rate": 6.050127111048803e-06, "loss": 0.4546, "step": 11998 }, { "epoch": 1.9705828013056061, "grad_norm": 0.2781686616464339, "learning_rate": 6.049686833248362e-06, "loss": 0.4448, "step": 11999 }, { "epoch": 1.970747028513949, "grad_norm": 0.27749506883908925, "learning_rate": 6.049246538616796e-06, "loss": 0.4388, "step": 12000 }, { "epoch": 1.9709112557222919, "grad_norm": 0.39291413716423657, "learning_rate": 6.048806227158889e-06, "loss": 0.4289, "step": 12001 }, { "epoch": 1.9710754829306345, "grad_norm": 0.2988992716204715, "learning_rate": 6.048365898879423e-06, "loss": 0.4484, "step": 12002 }, { "epoch": 1.9712397101389771, "grad_norm": 0.30423634070698163, "learning_rate": 6.04792555378318e-06, "loss": 0.4284, "step": 12003 }, { "epoch": 1.97140393734732, "grad_norm": 0.3669559441068953, "learning_rate": 6.047485191874944e-06, "loss": 0.4717, "step": 12004 }, { "epoch": 1.9715681645556629, "grad_norm": 0.33458514236368003, "learning_rate": 6.047044813159494e-06, "loss": 0.4448, "step": 12005 }, { "epoch": 1.9717323917640055, "grad_norm": 0.29073195184717887, "learning_rate": 6.046604417641616e-06, "loss": 0.4541, "step": 12006 }, { "epoch": 1.9718966189723481, "grad_norm": 0.3558324342641227, "learning_rate": 6.046164005326092e-06, "loss": 0.44, "step": 12007 }, { "epoch": 1.972060846180691, "grad_norm": 0.28499414277142565, "learning_rate": 6.0457235762177065e-06, "loss": 0.4573, "step": 12008 }, { "epoch": 1.9722250733890339, "grad_norm": 0.32143312180520023, "learning_rate": 6.045283130321242e-06, "loss": 0.4505, "step": 12009 }, { "epoch": 1.9723893005973765, "grad_norm": 0.3139482439274381, "learning_rate": 6.044842667641482e-06, "loss": 0.4546, "step": 12010 }, { "epoch": 1.9725535278057191, "grad_norm": 0.36164038349410554, "learning_rate": 6.04440218818321e-06, "loss": 0.4533, "step": 12011 }, { "epoch": 1.972717755014062, "grad_norm": 0.34017728293412075, "learning_rate": 6.043961691951212e-06, "loss": 0.4747, "step": 12012 }, { "epoch": 1.9728819822224049, "grad_norm": 0.28030323013934577, "learning_rate": 6.04352117895027e-06, "loss": 0.4353, "step": 12013 }, { "epoch": 1.9730462094307475, "grad_norm": 0.36739679215647425, "learning_rate": 6.043080649185171e-06, "loss": 0.4448, "step": 12014 }, { "epoch": 1.9732104366390901, "grad_norm": 0.3699289824111152, "learning_rate": 6.042640102660695e-06, "loss": 0.442, "step": 12015 }, { "epoch": 1.9733746638474328, "grad_norm": 0.29453155207049314, "learning_rate": 6.042199539381633e-06, "loss": 0.4512, "step": 12016 }, { "epoch": 1.9735388910557756, "grad_norm": 0.3369125096658498, "learning_rate": 6.041758959352764e-06, "loss": 0.4753, "step": 12017 }, { "epoch": 1.9737031182641185, "grad_norm": 0.359560199304719, "learning_rate": 6.041318362578878e-06, "loss": 0.4441, "step": 12018 }, { "epoch": 1.9738673454724611, "grad_norm": 0.4074514051910816, "learning_rate": 6.040877749064757e-06, "loss": 0.4534, "step": 12019 }, { "epoch": 1.9740315726808038, "grad_norm": 0.3280832276122813, "learning_rate": 6.040437118815187e-06, "loss": 0.4385, "step": 12020 }, { "epoch": 1.9741957998891466, "grad_norm": 0.29592394541807415, "learning_rate": 6.039996471834956e-06, "loss": 0.4272, "step": 12021 }, { "epoch": 1.9743600270974895, "grad_norm": 0.4068099660831585, "learning_rate": 6.039555808128848e-06, "loss": 0.4497, "step": 12022 }, { "epoch": 1.9745242543058321, "grad_norm": 0.40547477133364795, "learning_rate": 6.039115127701649e-06, "loss": 0.4596, "step": 12023 }, { "epoch": 1.9746884815141748, "grad_norm": 0.33863995062257035, "learning_rate": 6.038674430558144e-06, "loss": 0.4527, "step": 12024 }, { "epoch": 1.9748527087225176, "grad_norm": 0.2859546787151384, "learning_rate": 6.038233716703122e-06, "loss": 0.4227, "step": 12025 }, { "epoch": 1.9750169359308605, "grad_norm": 0.43978961671594363, "learning_rate": 6.037792986141368e-06, "loss": 0.4293, "step": 12026 }, { "epoch": 1.975181163139203, "grad_norm": 0.32042395347082325, "learning_rate": 6.037352238877669e-06, "loss": 0.4436, "step": 12027 }, { "epoch": 1.9753453903475457, "grad_norm": 0.45653170556439265, "learning_rate": 6.036911474916813e-06, "loss": 0.4355, "step": 12028 }, { "epoch": 1.9755096175558886, "grad_norm": 0.31606632423937275, "learning_rate": 6.036470694263585e-06, "loss": 0.4571, "step": 12029 }, { "epoch": 1.9756738447642315, "grad_norm": 0.36776567749481, "learning_rate": 6.036029896922774e-06, "loss": 0.4325, "step": 12030 }, { "epoch": 1.975838071972574, "grad_norm": 0.2897838666344959, "learning_rate": 6.035589082899168e-06, "loss": 0.4512, "step": 12031 }, { "epoch": 1.9760022991809167, "grad_norm": 0.34939504492231116, "learning_rate": 6.035148252197554e-06, "loss": 0.4403, "step": 12032 }, { "epoch": 1.9761665263892594, "grad_norm": 0.37178263437531295, "learning_rate": 6.034707404822718e-06, "loss": 0.4355, "step": 12033 }, { "epoch": 1.9763307535976022, "grad_norm": 0.3404844380988809, "learning_rate": 6.034266540779451e-06, "loss": 0.465, "step": 12034 }, { "epoch": 1.976494980805945, "grad_norm": 0.35425656017271817, "learning_rate": 6.033825660072538e-06, "loss": 0.4466, "step": 12035 }, { "epoch": 1.9766592080142877, "grad_norm": 0.3120757128379207, "learning_rate": 6.033384762706772e-06, "loss": 0.422, "step": 12036 }, { "epoch": 1.9768234352226304, "grad_norm": 0.35814186378850754, "learning_rate": 6.032943848686938e-06, "loss": 0.4438, "step": 12037 }, { "epoch": 1.9769876624309732, "grad_norm": 0.31766052787014015, "learning_rate": 6.032502918017823e-06, "loss": 0.4316, "step": 12038 }, { "epoch": 1.977151889639316, "grad_norm": 0.3316287841188254, "learning_rate": 6.032061970704221e-06, "loss": 0.4413, "step": 12039 }, { "epoch": 1.9773161168476587, "grad_norm": 0.29366943908554144, "learning_rate": 6.031621006750918e-06, "loss": 0.4524, "step": 12040 }, { "epoch": 1.9774803440560014, "grad_norm": 0.3365778223058008, "learning_rate": 6.031180026162704e-06, "loss": 0.4469, "step": 12041 }, { "epoch": 1.9776445712643442, "grad_norm": 0.273328767172499, "learning_rate": 6.030739028944369e-06, "loss": 0.4376, "step": 12042 }, { "epoch": 1.977808798472687, "grad_norm": 0.34032479412588085, "learning_rate": 6.030298015100702e-06, "loss": 0.4383, "step": 12043 }, { "epoch": 1.9779730256810297, "grad_norm": 0.29204890209099327, "learning_rate": 6.029856984636491e-06, "loss": 0.4493, "step": 12044 }, { "epoch": 1.9781372528893724, "grad_norm": 0.31070441585017955, "learning_rate": 6.02941593755653e-06, "loss": 0.4339, "step": 12045 }, { "epoch": 1.9783014800977152, "grad_norm": 0.34355821495186206, "learning_rate": 6.028974873865607e-06, "loss": 0.4497, "step": 12046 }, { "epoch": 1.978465707306058, "grad_norm": 0.37841270289295065, "learning_rate": 6.0285337935685125e-06, "loss": 0.4667, "step": 12047 }, { "epoch": 1.9786299345144007, "grad_norm": 0.2880315830387975, "learning_rate": 6.028092696670037e-06, "loss": 0.446, "step": 12048 }, { "epoch": 1.9787941617227434, "grad_norm": 0.2860235218704819, "learning_rate": 6.02765158317497e-06, "loss": 0.4548, "step": 12049 }, { "epoch": 1.978958388931086, "grad_norm": 0.6820882451984894, "learning_rate": 6.027210453088106e-06, "loss": 0.4264, "step": 12050 }, { "epoch": 1.9791226161394289, "grad_norm": 0.3449763478453752, "learning_rate": 6.026769306414234e-06, "loss": 0.4534, "step": 12051 }, { "epoch": 1.9792868433477717, "grad_norm": 0.31103476736921737, "learning_rate": 6.026328143158143e-06, "loss": 0.4565, "step": 12052 }, { "epoch": 1.9794510705561144, "grad_norm": 0.297356778106612, "learning_rate": 6.0258869633246275e-06, "loss": 0.4705, "step": 12053 }, { "epoch": 1.979615297764457, "grad_norm": 0.2549611004267377, "learning_rate": 6.0254457669184795e-06, "loss": 0.4406, "step": 12054 }, { "epoch": 1.9797795249727999, "grad_norm": 0.3368960762751481, "learning_rate": 6.025004553944488e-06, "loss": 0.4179, "step": 12055 }, { "epoch": 1.9799437521811427, "grad_norm": 0.28991698335346405, "learning_rate": 6.0245633244074485e-06, "loss": 0.4597, "step": 12056 }, { "epoch": 1.9801079793894854, "grad_norm": 0.3258666563489521, "learning_rate": 6.02412207831215e-06, "loss": 0.4308, "step": 12057 }, { "epoch": 1.980272206597828, "grad_norm": 0.3596563049725067, "learning_rate": 6.023680815663386e-06, "loss": 0.467, "step": 12058 }, { "epoch": 1.9804364338061708, "grad_norm": 0.3375615413381802, "learning_rate": 6.02323953646595e-06, "loss": 0.4426, "step": 12059 }, { "epoch": 1.9806006610145137, "grad_norm": 0.2701265231327295, "learning_rate": 6.022798240724633e-06, "loss": 0.4414, "step": 12060 }, { "epoch": 1.9807648882228563, "grad_norm": 0.327944132720733, "learning_rate": 6.0223569284442296e-06, "loss": 0.46, "step": 12061 }, { "epoch": 1.980929115431199, "grad_norm": 0.28050418382709885, "learning_rate": 6.021915599629533e-06, "loss": 0.4757, "step": 12062 }, { "epoch": 1.9810933426395418, "grad_norm": 0.3169619359243772, "learning_rate": 6.021474254285334e-06, "loss": 0.4506, "step": 12063 }, { "epoch": 1.9812575698478847, "grad_norm": 0.2981630396884761, "learning_rate": 6.021032892416428e-06, "loss": 0.4647, "step": 12064 }, { "epoch": 1.9814217970562273, "grad_norm": 0.32351176269873505, "learning_rate": 6.020591514027608e-06, "loss": 0.4581, "step": 12065 }, { "epoch": 1.98158602426457, "grad_norm": 0.4231399008420313, "learning_rate": 6.02015011912367e-06, "loss": 0.4458, "step": 12066 }, { "epoch": 1.9817502514729126, "grad_norm": 0.3037909970679311, "learning_rate": 6.019708707709406e-06, "loss": 0.4493, "step": 12067 }, { "epoch": 1.9819144786812555, "grad_norm": 0.27711451234107276, "learning_rate": 6.019267279789607e-06, "loss": 0.4367, "step": 12068 }, { "epoch": 1.9820787058895983, "grad_norm": 0.26567383056133026, "learning_rate": 6.018825835369073e-06, "loss": 0.4137, "step": 12069 }, { "epoch": 1.982242933097941, "grad_norm": 0.3748808611920089, "learning_rate": 6.018384374452596e-06, "loss": 0.4715, "step": 12070 }, { "epoch": 1.9824071603062836, "grad_norm": 0.381151333801941, "learning_rate": 6.017942897044971e-06, "loss": 0.4562, "step": 12071 }, { "epoch": 1.9825713875146265, "grad_norm": 0.4471531506882593, "learning_rate": 6.017501403150992e-06, "loss": 0.4493, "step": 12072 }, { "epoch": 1.9827356147229693, "grad_norm": 0.3843248058077205, "learning_rate": 6.017059892775455e-06, "loss": 0.4458, "step": 12073 }, { "epoch": 1.982899841931312, "grad_norm": 0.4006785253213048, "learning_rate": 6.016618365923154e-06, "loss": 0.4549, "step": 12074 }, { "epoch": 1.9830640691396546, "grad_norm": 0.40463613359960876, "learning_rate": 6.016176822598886e-06, "loss": 0.4634, "step": 12075 }, { "epoch": 1.9832282963479975, "grad_norm": 0.2790785754162893, "learning_rate": 6.015735262807448e-06, "loss": 0.4595, "step": 12076 }, { "epoch": 1.9833925235563403, "grad_norm": 0.3065794759268286, "learning_rate": 6.015293686553632e-06, "loss": 0.4687, "step": 12077 }, { "epoch": 1.983556750764683, "grad_norm": 0.4796119205886854, "learning_rate": 6.014852093842236e-06, "loss": 0.449, "step": 12078 }, { "epoch": 1.9837209779730256, "grad_norm": 0.32901033246316697, "learning_rate": 6.014410484678055e-06, "loss": 0.444, "step": 12079 }, { "epoch": 1.9838852051813685, "grad_norm": 0.31620813737993725, "learning_rate": 6.0139688590658875e-06, "loss": 0.4543, "step": 12080 }, { "epoch": 1.9840494323897113, "grad_norm": 0.5304261684883916, "learning_rate": 6.013527217010528e-06, "loss": 0.4352, "step": 12081 }, { "epoch": 1.984213659598054, "grad_norm": 0.33483212395030826, "learning_rate": 6.0130855585167735e-06, "loss": 0.4287, "step": 12082 }, { "epoch": 1.9843778868063966, "grad_norm": 0.31516123536811785, "learning_rate": 6.012643883589422e-06, "loss": 0.4534, "step": 12083 }, { "epoch": 1.9845421140147392, "grad_norm": 0.36557989364427224, "learning_rate": 6.012202192233269e-06, "loss": 0.4396, "step": 12084 }, { "epoch": 1.984706341223082, "grad_norm": 0.3248087899223682, "learning_rate": 6.011760484453113e-06, "loss": 0.4308, "step": 12085 }, { "epoch": 1.984870568431425, "grad_norm": 0.33659809991010253, "learning_rate": 6.01131876025375e-06, "loss": 0.4239, "step": 12086 }, { "epoch": 1.9850347956397676, "grad_norm": 0.4750655674070637, "learning_rate": 6.010877019639978e-06, "loss": 0.4417, "step": 12087 }, { "epoch": 1.9851990228481102, "grad_norm": 0.7893302234320693, "learning_rate": 6.010435262616595e-06, "loss": 0.4511, "step": 12088 }, { "epoch": 1.985363250056453, "grad_norm": 0.2919750160730782, "learning_rate": 6.009993489188401e-06, "loss": 0.4235, "step": 12089 }, { "epoch": 1.985527477264796, "grad_norm": 0.269083122192506, "learning_rate": 6.00955169936019e-06, "loss": 0.4384, "step": 12090 }, { "epoch": 1.9856917044731386, "grad_norm": 0.3046254431436656, "learning_rate": 6.009109893136764e-06, "loss": 0.4439, "step": 12091 }, { "epoch": 1.9858559316814812, "grad_norm": 0.34705997935606975, "learning_rate": 6.0086680705229185e-06, "loss": 0.4656, "step": 12092 }, { "epoch": 1.986020158889824, "grad_norm": 0.3014169128468884, "learning_rate": 6.008226231523454e-06, "loss": 0.4425, "step": 12093 }, { "epoch": 1.986184386098167, "grad_norm": 0.3203316019921863, "learning_rate": 6.007784376143168e-06, "loss": 0.4604, "step": 12094 }, { "epoch": 1.9863486133065096, "grad_norm": 0.28211784166140474, "learning_rate": 6.007342504386861e-06, "loss": 0.4461, "step": 12095 }, { "epoch": 1.9865128405148522, "grad_norm": 0.713397223611457, "learning_rate": 6.0069006162593316e-06, "loss": 0.4712, "step": 12096 }, { "epoch": 1.986677067723195, "grad_norm": 0.32804549480165146, "learning_rate": 6.006458711765378e-06, "loss": 0.4464, "step": 12097 }, { "epoch": 1.986841294931538, "grad_norm": 0.4390495685818857, "learning_rate": 6.0060167909098005e-06, "loss": 0.4532, "step": 12098 }, { "epoch": 1.9870055221398806, "grad_norm": 0.29066432280196225, "learning_rate": 6.005574853697399e-06, "loss": 0.4299, "step": 12099 }, { "epoch": 1.9871697493482232, "grad_norm": 0.27970517847861814, "learning_rate": 6.005132900132976e-06, "loss": 0.451, "step": 12100 }, { "epoch": 1.9873339765565659, "grad_norm": 0.2993981409645384, "learning_rate": 6.004690930221326e-06, "loss": 0.449, "step": 12101 }, { "epoch": 1.9874982037649087, "grad_norm": 0.4881082910339583, "learning_rate": 6.0042489439672526e-06, "loss": 0.4428, "step": 12102 }, { "epoch": 1.9876624309732516, "grad_norm": 0.35593125919821117, "learning_rate": 6.0038069413755554e-06, "loss": 0.4737, "step": 12103 }, { "epoch": 1.9878266581815942, "grad_norm": 0.3404574977066621, "learning_rate": 6.003364922451035e-06, "loss": 0.4371, "step": 12104 }, { "epoch": 1.9879908853899368, "grad_norm": 0.35843795271388007, "learning_rate": 6.002922887198494e-06, "loss": 0.4464, "step": 12105 }, { "epoch": 1.9881551125982797, "grad_norm": 0.278424406244817, "learning_rate": 6.002480835622731e-06, "loss": 0.4358, "step": 12106 }, { "epoch": 1.9883193398066226, "grad_norm": 0.3792249206543696, "learning_rate": 6.0020387677285474e-06, "loss": 0.4479, "step": 12107 }, { "epoch": 1.9884835670149652, "grad_norm": 0.3097187849705982, "learning_rate": 6.001596683520746e-06, "loss": 0.4455, "step": 12108 }, { "epoch": 1.9886477942233078, "grad_norm": 0.3912751913987081, "learning_rate": 6.001154583004126e-06, "loss": 0.4589, "step": 12109 }, { "epoch": 1.9888120214316507, "grad_norm": 0.36773299409138227, "learning_rate": 6.000712466183492e-06, "loss": 0.4546, "step": 12110 }, { "epoch": 1.9889762486399936, "grad_norm": 0.25200881130078995, "learning_rate": 6.000270333063643e-06, "loss": 0.4221, "step": 12111 }, { "epoch": 1.9891404758483362, "grad_norm": 0.41745494296022206, "learning_rate": 5.999828183649382e-06, "loss": 0.4375, "step": 12112 }, { "epoch": 1.9893047030566788, "grad_norm": 0.41119193520709213, "learning_rate": 5.999386017945512e-06, "loss": 0.4527, "step": 12113 }, { "epoch": 1.9894689302650217, "grad_norm": 0.3524347997142921, "learning_rate": 5.998943835956833e-06, "loss": 0.4477, "step": 12114 }, { "epoch": 1.9896331574733646, "grad_norm": 0.6992056215375053, "learning_rate": 5.998501637688151e-06, "loss": 0.4373, "step": 12115 }, { "epoch": 1.9897973846817072, "grad_norm": 0.35753146495246646, "learning_rate": 5.998059423144266e-06, "loss": 0.4239, "step": 12116 }, { "epoch": 1.9899616118900498, "grad_norm": 0.305443312300451, "learning_rate": 5.9976171923299825e-06, "loss": 0.4599, "step": 12117 }, { "epoch": 1.9901258390983925, "grad_norm": 0.3468952965866211, "learning_rate": 5.997174945250102e-06, "loss": 0.4367, "step": 12118 }, { "epoch": 1.9902900663067353, "grad_norm": 0.30992685733369446, "learning_rate": 5.996732681909429e-06, "loss": 0.4238, "step": 12119 }, { "epoch": 1.9904542935150782, "grad_norm": 0.3089277099150746, "learning_rate": 5.9962904023127654e-06, "loss": 0.4668, "step": 12120 }, { "epoch": 1.9906185207234208, "grad_norm": 0.4060973659557495, "learning_rate": 5.995848106464918e-06, "loss": 0.4341, "step": 12121 }, { "epoch": 1.9907827479317635, "grad_norm": 0.3343300894937679, "learning_rate": 5.995405794370687e-06, "loss": 0.4388, "step": 12122 }, { "epoch": 1.9909469751401063, "grad_norm": 0.4022894904900554, "learning_rate": 5.994963466034877e-06, "loss": 0.4604, "step": 12123 }, { "epoch": 1.9911112023484492, "grad_norm": 0.2809421394977338, "learning_rate": 5.994521121462295e-06, "loss": 0.4306, "step": 12124 }, { "epoch": 1.9912754295567918, "grad_norm": 0.2999392110190069, "learning_rate": 5.994078760657742e-06, "loss": 0.4521, "step": 12125 }, { "epoch": 1.9914396567651345, "grad_norm": 0.3439605067320684, "learning_rate": 5.9936363836260235e-06, "loss": 0.4559, "step": 12126 }, { "epoch": 1.9916038839734773, "grad_norm": 0.35520361022230595, "learning_rate": 5.993193990371945e-06, "loss": 0.448, "step": 12127 }, { "epoch": 1.9917681111818202, "grad_norm": 0.3873609826678473, "learning_rate": 5.99275158090031e-06, "loss": 0.4431, "step": 12128 }, { "epoch": 1.9919323383901628, "grad_norm": 0.623525798670634, "learning_rate": 5.9923091552159244e-06, "loss": 0.4399, "step": 12129 }, { "epoch": 1.9920965655985055, "grad_norm": 0.375059399085205, "learning_rate": 5.9918667133235946e-06, "loss": 0.4486, "step": 12130 }, { "epoch": 1.9922607928068483, "grad_norm": 0.4259468484416419, "learning_rate": 5.991424255228122e-06, "loss": 0.4604, "step": 12131 }, { "epoch": 1.9924250200151912, "grad_norm": 0.48872792746183463, "learning_rate": 5.990981780934316e-06, "loss": 0.4451, "step": 12132 }, { "epoch": 1.9925892472235338, "grad_norm": 0.3400954850191665, "learning_rate": 5.990539290446981e-06, "loss": 0.4511, "step": 12133 }, { "epoch": 1.9927534744318764, "grad_norm": 0.3584423281926243, "learning_rate": 5.990096783770923e-06, "loss": 0.4383, "step": 12134 }, { "epoch": 1.992917701640219, "grad_norm": 0.34324978031707004, "learning_rate": 5.989654260910947e-06, "loss": 0.442, "step": 12135 }, { "epoch": 1.993081928848562, "grad_norm": 0.32954134231022136, "learning_rate": 5.98921172187186e-06, "loss": 0.4543, "step": 12136 }, { "epoch": 1.9932461560569048, "grad_norm": 0.392536390634084, "learning_rate": 5.9887691666584685e-06, "loss": 0.4678, "step": 12137 }, { "epoch": 1.9934103832652474, "grad_norm": 0.2785571757259487, "learning_rate": 5.9883265952755796e-06, "loss": 0.4437, "step": 12138 }, { "epoch": 1.99357461047359, "grad_norm": 0.3650211841002478, "learning_rate": 5.987884007728001e-06, "loss": 0.4457, "step": 12139 }, { "epoch": 1.993738837681933, "grad_norm": 0.5938490366217879, "learning_rate": 5.987441404020537e-06, "loss": 0.4364, "step": 12140 }, { "epoch": 1.9939030648902758, "grad_norm": 0.3330731764741555, "learning_rate": 5.986998784157995e-06, "loss": 0.4222, "step": 12141 }, { "epoch": 1.9940672920986184, "grad_norm": 0.3502325402671695, "learning_rate": 5.986556148145183e-06, "loss": 0.435, "step": 12142 }, { "epoch": 1.994231519306961, "grad_norm": 0.3442202918445196, "learning_rate": 5.98611349598691e-06, "loss": 0.4507, "step": 12143 }, { "epoch": 1.994395746515304, "grad_norm": 0.4005337023377969, "learning_rate": 5.985670827687983e-06, "loss": 0.4468, "step": 12144 }, { "epoch": 1.9945599737236468, "grad_norm": 0.29403675952349245, "learning_rate": 5.985228143253207e-06, "loss": 0.4617, "step": 12145 }, { "epoch": 1.9947242009319894, "grad_norm": 0.430171744081949, "learning_rate": 5.984785442687394e-06, "loss": 0.4428, "step": 12146 }, { "epoch": 1.994888428140332, "grad_norm": 0.7144987121760876, "learning_rate": 5.984342725995349e-06, "loss": 0.4536, "step": 12147 }, { "epoch": 1.995052655348675, "grad_norm": 0.5575791795273514, "learning_rate": 5.9838999931818816e-06, "loss": 0.4447, "step": 12148 }, { "epoch": 1.9952168825570178, "grad_norm": 0.3103253306063338, "learning_rate": 5.983457244251801e-06, "loss": 0.4482, "step": 12149 }, { "epoch": 1.9953811097653604, "grad_norm": 0.306105882253974, "learning_rate": 5.983014479209914e-06, "loss": 0.4603, "step": 12150 }, { "epoch": 1.995545336973703, "grad_norm": 0.3256489586607816, "learning_rate": 5.982571698061033e-06, "loss": 0.4502, "step": 12151 }, { "epoch": 1.9957095641820457, "grad_norm": 0.30675263299161515, "learning_rate": 5.982128900809962e-06, "loss": 0.4408, "step": 12152 }, { "epoch": 1.9958737913903886, "grad_norm": 0.3979035965681152, "learning_rate": 5.981686087461514e-06, "loss": 0.4338, "step": 12153 }, { "epoch": 1.9960380185987314, "grad_norm": 0.5714962273931158, "learning_rate": 5.981243258020498e-06, "loss": 0.473, "step": 12154 }, { "epoch": 1.996202245807074, "grad_norm": 0.5233640080963108, "learning_rate": 5.980800412491722e-06, "loss": 0.4452, "step": 12155 }, { "epoch": 1.9963664730154167, "grad_norm": 0.5095439866164315, "learning_rate": 5.980357550879997e-06, "loss": 0.4404, "step": 12156 }, { "epoch": 1.9965307002237596, "grad_norm": 0.34135588876267836, "learning_rate": 5.979914673190132e-06, "loss": 0.4641, "step": 12157 }, { "epoch": 1.9966949274321024, "grad_norm": 0.3166966051628447, "learning_rate": 5.979471779426938e-06, "loss": 0.4463, "step": 12158 }, { "epoch": 1.996859154640445, "grad_norm": 0.3534752492741154, "learning_rate": 5.9790288695952256e-06, "loss": 0.4179, "step": 12159 }, { "epoch": 1.9970233818487877, "grad_norm": 0.2803029304863608, "learning_rate": 5.9785859436998035e-06, "loss": 0.438, "step": 12160 }, { "epoch": 1.9971876090571306, "grad_norm": 0.3739910815165364, "learning_rate": 5.978143001745484e-06, "loss": 0.4254, "step": 12161 }, { "epoch": 1.9973518362654734, "grad_norm": 0.33129160415655884, "learning_rate": 5.977700043737075e-06, "loss": 0.4323, "step": 12162 }, { "epoch": 1.997516063473816, "grad_norm": 0.31774150789673616, "learning_rate": 5.977257069679393e-06, "loss": 0.443, "step": 12163 }, { "epoch": 1.9976802906821587, "grad_norm": 0.3477407627974902, "learning_rate": 5.9768140795772445e-06, "loss": 0.4463, "step": 12164 }, { "epoch": 1.9978445178905015, "grad_norm": 0.3209257252688937, "learning_rate": 5.9763710734354415e-06, "loss": 0.4492, "step": 12165 }, { "epoch": 1.9980087450988444, "grad_norm": 0.32082689954088633, "learning_rate": 5.9759280512587966e-06, "loss": 0.4436, "step": 12166 }, { "epoch": 1.998172972307187, "grad_norm": 0.35698917095820754, "learning_rate": 5.975485013052122e-06, "loss": 0.4476, "step": 12167 }, { "epoch": 1.9983371995155297, "grad_norm": 0.3286716694844807, "learning_rate": 5.975041958820227e-06, "loss": 0.4326, "step": 12168 }, { "epoch": 1.9985014267238723, "grad_norm": 0.42593190197587716, "learning_rate": 5.974598888567925e-06, "loss": 0.4603, "step": 12169 }, { "epoch": 1.9986656539322152, "grad_norm": 0.3397923984316032, "learning_rate": 5.974155802300027e-06, "loss": 0.4391, "step": 12170 }, { "epoch": 1.998829881140558, "grad_norm": 0.4923490616754908, "learning_rate": 5.97371270002135e-06, "loss": 0.464, "step": 12171 }, { "epoch": 1.9989941083489007, "grad_norm": 0.3340119955565463, "learning_rate": 5.973269581736701e-06, "loss": 0.4498, "step": 12172 }, { "epoch": 1.9991583355572433, "grad_norm": 0.4640454105520234, "learning_rate": 5.972826447450896e-06, "loss": 0.4515, "step": 12173 }, { "epoch": 1.9993225627655862, "grad_norm": 0.6088139999913015, "learning_rate": 5.9723832971687455e-06, "loss": 0.445, "step": 12174 }, { "epoch": 1.999486789973929, "grad_norm": 0.27415029930250745, "learning_rate": 5.971940130895065e-06, "loss": 0.4335, "step": 12175 }, { "epoch": 1.9996510171822717, "grad_norm": 0.30806362684880456, "learning_rate": 5.971496948634665e-06, "loss": 0.4513, "step": 12176 }, { "epoch": 1.9998152443906143, "grad_norm": 0.4787944606540653, "learning_rate": 5.9710537503923605e-06, "loss": 0.4689, "step": 12177 }, { "epoch": 1.9999794715989572, "grad_norm": 0.2904555426356073, "learning_rate": 5.970610536172966e-06, "loss": 0.4574, "step": 12178 }, { "epoch": 2.0001436988073, "grad_norm": 0.42320407092102436, "learning_rate": 5.970167305981294e-06, "loss": 0.4538, "step": 12179 }, { "epoch": 2.0003079260156427, "grad_norm": 0.3083798483536343, "learning_rate": 5.969724059822159e-06, "loss": 0.4534, "step": 12180 }, { "epoch": 2.0004721532239853, "grad_norm": 0.4283547328237905, "learning_rate": 5.969280797700373e-06, "loss": 0.4586, "step": 12181 }, { "epoch": 2.000636380432328, "grad_norm": 0.4112612822032779, "learning_rate": 5.968837519620753e-06, "loss": 0.4597, "step": 12182 }, { "epoch": 2.000800607640671, "grad_norm": 0.3368213650434348, "learning_rate": 5.968394225588113e-06, "loss": 0.4564, "step": 12183 }, { "epoch": 2.0009648348490137, "grad_norm": 0.3492692409648053, "learning_rate": 5.967950915607267e-06, "loss": 0.4603, "step": 12184 }, { "epoch": 2.0011290620573563, "grad_norm": 0.327969557551925, "learning_rate": 5.967507589683027e-06, "loss": 0.4592, "step": 12185 }, { "epoch": 2.001293289265699, "grad_norm": 1.0052464626999962, "learning_rate": 5.967064247820214e-06, "loss": 0.447, "step": 12186 }, { "epoch": 2.001457516474042, "grad_norm": 0.3323420980370512, "learning_rate": 5.966620890023639e-06, "loss": 0.4497, "step": 12187 }, { "epoch": 2.0016217436823847, "grad_norm": 0.30221353880927326, "learning_rate": 5.966177516298119e-06, "loss": 0.4381, "step": 12188 }, { "epoch": 2.0017859708907273, "grad_norm": 0.34728416679258106, "learning_rate": 5.965734126648467e-06, "loss": 0.4511, "step": 12189 }, { "epoch": 2.00195019809907, "grad_norm": 0.34886261634920057, "learning_rate": 5.9652907210795e-06, "loss": 0.4818, "step": 12190 }, { "epoch": 2.002114425307413, "grad_norm": 0.3436162370687873, "learning_rate": 5.964847299596035e-06, "loss": 0.448, "step": 12191 }, { "epoch": 2.0022786525157557, "grad_norm": 0.4215227813011232, "learning_rate": 5.964403862202888e-06, "loss": 0.4549, "step": 12192 }, { "epoch": 2.0024428797240983, "grad_norm": 0.3366921499133086, "learning_rate": 5.963960408904874e-06, "loss": 0.4392, "step": 12193 }, { "epoch": 2.002607106932441, "grad_norm": 0.3245529569332661, "learning_rate": 5.963516939706809e-06, "loss": 0.4551, "step": 12194 }, { "epoch": 2.0027713341407836, "grad_norm": 0.33977378407262626, "learning_rate": 5.963073454613509e-06, "loss": 0.4695, "step": 12195 }, { "epoch": 2.0029355613491266, "grad_norm": 0.3069779515187422, "learning_rate": 5.962629953629794e-06, "loss": 0.4412, "step": 12196 }, { "epoch": 2.0030997885574693, "grad_norm": 0.3507049844560384, "learning_rate": 5.962186436760476e-06, "loss": 0.4761, "step": 12197 }, { "epoch": 2.003264015765812, "grad_norm": 0.3491589362302453, "learning_rate": 5.9617429040103785e-06, "loss": 0.441, "step": 12198 }, { "epoch": 2.0034282429741546, "grad_norm": 0.3742547465374901, "learning_rate": 5.961299355384311e-06, "loss": 0.4281, "step": 12199 }, { "epoch": 2.0035924701824976, "grad_norm": 0.4053926473537887, "learning_rate": 5.960855790887098e-06, "loss": 0.4504, "step": 12200 }, { "epoch": 2.0037566973908403, "grad_norm": 0.35957952049354897, "learning_rate": 5.960412210523552e-06, "loss": 0.4252, "step": 12201 }, { "epoch": 2.003920924599183, "grad_norm": 0.32657403088533404, "learning_rate": 5.959968614298493e-06, "loss": 0.4472, "step": 12202 }, { "epoch": 2.0040851518075256, "grad_norm": 0.3244325404399533, "learning_rate": 5.959525002216738e-06, "loss": 0.4618, "step": 12203 }, { "epoch": 2.0042493790158686, "grad_norm": 0.6620676419938378, "learning_rate": 5.959081374283106e-06, "loss": 0.4516, "step": 12204 }, { "epoch": 2.0044136062242113, "grad_norm": 0.31677753319473856, "learning_rate": 5.9586377305024145e-06, "loss": 0.4485, "step": 12205 }, { "epoch": 2.004577833432554, "grad_norm": 0.4121208836205025, "learning_rate": 5.958194070879482e-06, "loss": 0.4532, "step": 12206 }, { "epoch": 2.0047420606408966, "grad_norm": 0.3116933045975636, "learning_rate": 5.957750395419127e-06, "loss": 0.4567, "step": 12207 }, { "epoch": 2.0049062878492396, "grad_norm": 0.33302922228958487, "learning_rate": 5.957306704126169e-06, "loss": 0.4518, "step": 12208 }, { "epoch": 2.0050705150575823, "grad_norm": 0.4198718931149218, "learning_rate": 5.956862997005428e-06, "loss": 0.4647, "step": 12209 }, { "epoch": 2.005234742265925, "grad_norm": 0.34251455165151395, "learning_rate": 5.956419274061719e-06, "loss": 0.4512, "step": 12210 }, { "epoch": 2.0053989694742675, "grad_norm": 0.3519346429865396, "learning_rate": 5.955975535299863e-06, "loss": 0.4402, "step": 12211 }, { "epoch": 2.00556319668261, "grad_norm": 0.36308105461875656, "learning_rate": 5.955531780724683e-06, "loss": 0.4598, "step": 12212 }, { "epoch": 2.0057274238909533, "grad_norm": 0.3741093286692219, "learning_rate": 5.955088010340995e-06, "loss": 0.4443, "step": 12213 }, { "epoch": 2.005891651099296, "grad_norm": 0.328443035688512, "learning_rate": 5.954644224153618e-06, "loss": 0.4192, "step": 12214 }, { "epoch": 2.0060558783076385, "grad_norm": 0.3324771672414727, "learning_rate": 5.954200422167376e-06, "loss": 0.4319, "step": 12215 }, { "epoch": 2.006220105515981, "grad_norm": 0.29254376840142776, "learning_rate": 5.953756604387085e-06, "loss": 0.4662, "step": 12216 }, { "epoch": 2.0063843327243243, "grad_norm": 0.26014344535423545, "learning_rate": 5.953312770817568e-06, "loss": 0.432, "step": 12217 }, { "epoch": 2.006548559932667, "grad_norm": 0.3568735894151926, "learning_rate": 5.952868921463643e-06, "loss": 0.4651, "step": 12218 }, { "epoch": 2.0067127871410095, "grad_norm": 0.3141590794217856, "learning_rate": 5.952425056330134e-06, "loss": 0.4567, "step": 12219 }, { "epoch": 2.006877014349352, "grad_norm": 0.3884315355667573, "learning_rate": 5.951981175421858e-06, "loss": 0.4439, "step": 12220 }, { "epoch": 2.0070412415576953, "grad_norm": 0.30058138999138845, "learning_rate": 5.951537278743639e-06, "loss": 0.432, "step": 12221 }, { "epoch": 2.007205468766038, "grad_norm": 0.44575480874833284, "learning_rate": 5.951093366300296e-06, "loss": 0.4355, "step": 12222 }, { "epoch": 2.0073696959743805, "grad_norm": 0.366245952203828, "learning_rate": 5.950649438096653e-06, "loss": 0.4535, "step": 12223 }, { "epoch": 2.007533923182723, "grad_norm": 0.2867930542655121, "learning_rate": 5.9502054941375285e-06, "loss": 0.4378, "step": 12224 }, { "epoch": 2.0076981503910662, "grad_norm": 0.47746484419606905, "learning_rate": 5.949761534427746e-06, "loss": 0.4115, "step": 12225 }, { "epoch": 2.007862377599409, "grad_norm": 0.37527877564632606, "learning_rate": 5.9493175589721265e-06, "loss": 0.4338, "step": 12226 }, { "epoch": 2.0080266048077515, "grad_norm": 0.3297901763313656, "learning_rate": 5.948873567775493e-06, "loss": 0.4139, "step": 12227 }, { "epoch": 2.008190832016094, "grad_norm": 0.2561263798325084, "learning_rate": 5.948429560842666e-06, "loss": 0.447, "step": 12228 }, { "epoch": 2.008355059224437, "grad_norm": 0.4365444130306358, "learning_rate": 5.94798553817847e-06, "loss": 0.458, "step": 12229 }, { "epoch": 2.00851928643278, "grad_norm": 0.5232889294616114, "learning_rate": 5.9475414997877255e-06, "loss": 0.447, "step": 12230 }, { "epoch": 2.0086835136411225, "grad_norm": 0.3411242576654733, "learning_rate": 5.947097445675258e-06, "loss": 0.4541, "step": 12231 }, { "epoch": 2.008847740849465, "grad_norm": 0.3540437876499908, "learning_rate": 5.946653375845887e-06, "loss": 0.4528, "step": 12232 }, { "epoch": 2.009011968057808, "grad_norm": 0.41463655700949253, "learning_rate": 5.946209290304437e-06, "loss": 0.4484, "step": 12233 }, { "epoch": 2.009176195266151, "grad_norm": 0.3590231272906019, "learning_rate": 5.945765189055731e-06, "loss": 0.4646, "step": 12234 }, { "epoch": 2.0093404224744935, "grad_norm": 0.5948107860227744, "learning_rate": 5.945321072104593e-06, "loss": 0.4482, "step": 12235 }, { "epoch": 2.009504649682836, "grad_norm": 0.309417465568033, "learning_rate": 5.944876939455848e-06, "loss": 0.4331, "step": 12236 }, { "epoch": 2.009668876891179, "grad_norm": 0.3306098908330728, "learning_rate": 5.944432791114314e-06, "loss": 0.4499, "step": 12237 }, { "epoch": 2.009833104099522, "grad_norm": 0.31621060772673043, "learning_rate": 5.943988627084822e-06, "loss": 0.4568, "step": 12238 }, { "epoch": 2.0099973313078645, "grad_norm": 0.2856713698078628, "learning_rate": 5.943544447372191e-06, "loss": 0.4255, "step": 12239 }, { "epoch": 2.010161558516207, "grad_norm": 0.28032980432657734, "learning_rate": 5.943100251981248e-06, "loss": 0.45, "step": 12240 }, { "epoch": 2.01032578572455, "grad_norm": 0.3390743841064049, "learning_rate": 5.942656040916815e-06, "loss": 0.464, "step": 12241 }, { "epoch": 2.010490012932893, "grad_norm": 0.3187089522691845, "learning_rate": 5.942211814183721e-06, "loss": 0.4534, "step": 12242 }, { "epoch": 2.0106542401412355, "grad_norm": 0.42615693774868557, "learning_rate": 5.941767571786786e-06, "loss": 0.4631, "step": 12243 }, { "epoch": 2.010818467349578, "grad_norm": 0.36667170180078384, "learning_rate": 5.941323313730836e-06, "loss": 0.4236, "step": 12244 }, { "epoch": 2.010982694557921, "grad_norm": 0.43862327105157484, "learning_rate": 5.940879040020696e-06, "loss": 0.4519, "step": 12245 }, { "epoch": 2.0111469217662634, "grad_norm": 0.33579804262100527, "learning_rate": 5.940434750661194e-06, "loss": 0.4137, "step": 12246 }, { "epoch": 2.0113111489746065, "grad_norm": 0.30978789940948254, "learning_rate": 5.939990445657153e-06, "loss": 0.4343, "step": 12247 }, { "epoch": 2.011475376182949, "grad_norm": 0.3728929087707586, "learning_rate": 5.939546125013399e-06, "loss": 0.4539, "step": 12248 }, { "epoch": 2.0116396033912918, "grad_norm": 0.3256492544812732, "learning_rate": 5.939101788734757e-06, "loss": 0.4551, "step": 12249 }, { "epoch": 2.0118038305996344, "grad_norm": 0.30868824642963294, "learning_rate": 5.938657436826054e-06, "loss": 0.4391, "step": 12250 }, { "epoch": 2.0119680578079775, "grad_norm": 0.28528064429757055, "learning_rate": 5.938213069292117e-06, "loss": 0.4352, "step": 12251 }, { "epoch": 2.01213228501632, "grad_norm": 0.720505405909572, "learning_rate": 5.93776868613777e-06, "loss": 0.4582, "step": 12252 }, { "epoch": 2.0122965122246628, "grad_norm": 0.40996297358279415, "learning_rate": 5.93732428736784e-06, "loss": 0.4366, "step": 12253 }, { "epoch": 2.0124607394330054, "grad_norm": 0.36966590337145583, "learning_rate": 5.936879872987155e-06, "loss": 0.4422, "step": 12254 }, { "epoch": 2.0126249666413485, "grad_norm": 0.40644461183977937, "learning_rate": 5.93643544300054e-06, "loss": 0.4352, "step": 12255 }, { "epoch": 2.012789193849691, "grad_norm": 0.348232310385563, "learning_rate": 5.935990997412823e-06, "loss": 0.4499, "step": 12256 }, { "epoch": 2.0129534210580338, "grad_norm": 0.30635919875987044, "learning_rate": 5.9355465362288315e-06, "loss": 0.4597, "step": 12257 }, { "epoch": 2.0131176482663764, "grad_norm": 0.38023284124533757, "learning_rate": 5.9351020594533914e-06, "loss": 0.4341, "step": 12258 }, { "epoch": 2.0132818754747195, "grad_norm": 0.3209364969044947, "learning_rate": 5.934657567091332e-06, "loss": 0.4462, "step": 12259 }, { "epoch": 2.013446102683062, "grad_norm": 0.4138574131679595, "learning_rate": 5.9342130591474785e-06, "loss": 0.4343, "step": 12260 }, { "epoch": 2.0136103298914048, "grad_norm": 0.3691427016956709, "learning_rate": 5.933768535626662e-06, "loss": 0.4557, "step": 12261 }, { "epoch": 2.0137745570997474, "grad_norm": 0.3137057625519252, "learning_rate": 5.933323996533708e-06, "loss": 0.4639, "step": 12262 }, { "epoch": 2.01393878430809, "grad_norm": 0.4452247347531062, "learning_rate": 5.932879441873445e-06, "loss": 0.4478, "step": 12263 }, { "epoch": 2.014103011516433, "grad_norm": 0.32534561106326604, "learning_rate": 5.932434871650701e-06, "loss": 0.4222, "step": 12264 }, { "epoch": 2.0142672387247758, "grad_norm": 0.4228276902347355, "learning_rate": 5.931990285870306e-06, "loss": 0.4505, "step": 12265 }, { "epoch": 2.0144314659331184, "grad_norm": 0.4876125453560543, "learning_rate": 5.931545684537086e-06, "loss": 0.4232, "step": 12266 }, { "epoch": 2.014595693141461, "grad_norm": 0.4172990521894343, "learning_rate": 5.9311010676558724e-06, "loss": 0.4313, "step": 12267 }, { "epoch": 2.014759920349804, "grad_norm": 0.4012043012163585, "learning_rate": 5.9306564352314935e-06, "loss": 0.4425, "step": 12268 }, { "epoch": 2.0149241475581467, "grad_norm": 0.31821251020135133, "learning_rate": 5.930211787268777e-06, "loss": 0.455, "step": 12269 }, { "epoch": 2.0150883747664894, "grad_norm": 0.3358598549375324, "learning_rate": 5.929767123772555e-06, "loss": 0.4501, "step": 12270 }, { "epoch": 2.015252601974832, "grad_norm": 0.34992544409089366, "learning_rate": 5.929322444747655e-06, "loss": 0.4398, "step": 12271 }, { "epoch": 2.015416829183175, "grad_norm": 0.46600892538976924, "learning_rate": 5.928877750198906e-06, "loss": 0.4376, "step": 12272 }, { "epoch": 2.0155810563915177, "grad_norm": 0.4421023504524407, "learning_rate": 5.928433040131139e-06, "loss": 0.4627, "step": 12273 }, { "epoch": 2.0157452835998604, "grad_norm": 0.3466636254391006, "learning_rate": 5.9279883145491835e-06, "loss": 0.4657, "step": 12274 }, { "epoch": 2.015909510808203, "grad_norm": 0.41056134961088725, "learning_rate": 5.927543573457871e-06, "loss": 0.4482, "step": 12275 }, { "epoch": 2.016073738016546, "grad_norm": 0.3998987345596671, "learning_rate": 5.927098816862031e-06, "loss": 0.4558, "step": 12276 }, { "epoch": 2.0162379652248887, "grad_norm": 0.3411150872881389, "learning_rate": 5.926654044766493e-06, "loss": 0.4253, "step": 12277 }, { "epoch": 2.0164021924332314, "grad_norm": 0.44622183591825637, "learning_rate": 5.926209257176087e-06, "loss": 0.4414, "step": 12278 }, { "epoch": 2.016566419641574, "grad_norm": 0.4301937123349158, "learning_rate": 5.925764454095646e-06, "loss": 0.4332, "step": 12279 }, { "epoch": 2.0167306468499167, "grad_norm": 0.518469057403512, "learning_rate": 5.925319635530003e-06, "loss": 0.4605, "step": 12280 }, { "epoch": 2.0168948740582597, "grad_norm": 0.32242423510941276, "learning_rate": 5.924874801483985e-06, "loss": 0.4433, "step": 12281 }, { "epoch": 2.0170591012666024, "grad_norm": 0.3939941753019156, "learning_rate": 5.924429951962424e-06, "loss": 0.4509, "step": 12282 }, { "epoch": 2.017223328474945, "grad_norm": 0.330619113684146, "learning_rate": 5.9239850869701516e-06, "loss": 0.4334, "step": 12283 }, { "epoch": 2.0173875556832876, "grad_norm": 0.4240219166481088, "learning_rate": 5.923540206512001e-06, "loss": 0.4517, "step": 12284 }, { "epoch": 2.0175517828916307, "grad_norm": 0.37369167937967235, "learning_rate": 5.923095310592804e-06, "loss": 0.4418, "step": 12285 }, { "epoch": 2.0177160100999734, "grad_norm": 0.33052399671771765, "learning_rate": 5.922650399217391e-06, "loss": 0.4549, "step": 12286 }, { "epoch": 2.017880237308316, "grad_norm": 0.3318820800605246, "learning_rate": 5.922205472390594e-06, "loss": 0.4641, "step": 12287 }, { "epoch": 2.0180444645166586, "grad_norm": 0.25965853886842816, "learning_rate": 5.9217605301172475e-06, "loss": 0.4327, "step": 12288 }, { "epoch": 2.0182086917250017, "grad_norm": 0.39578260130111304, "learning_rate": 5.921315572402183e-06, "loss": 0.4443, "step": 12289 }, { "epoch": 2.0183729189333444, "grad_norm": 0.3019127278366922, "learning_rate": 5.920870599250232e-06, "loss": 0.4526, "step": 12290 }, { "epoch": 2.018537146141687, "grad_norm": 0.31515638588112566, "learning_rate": 5.92042561066623e-06, "loss": 0.4341, "step": 12291 }, { "epoch": 2.0187013733500296, "grad_norm": 0.3347035481218894, "learning_rate": 5.9199806066550074e-06, "loss": 0.4211, "step": 12292 }, { "epoch": 2.0188656005583727, "grad_norm": 0.342636954069315, "learning_rate": 5.919535587221398e-06, "loss": 0.4528, "step": 12293 }, { "epoch": 2.0190298277667154, "grad_norm": 0.31264338761398147, "learning_rate": 5.919090552370235e-06, "loss": 0.4433, "step": 12294 }, { "epoch": 2.019194054975058, "grad_norm": 0.30817308473653016, "learning_rate": 5.918645502106354e-06, "loss": 0.4533, "step": 12295 }, { "epoch": 2.0193582821834006, "grad_norm": 0.4982108932983128, "learning_rate": 5.918200436434586e-06, "loss": 0.4467, "step": 12296 }, { "epoch": 2.0195225093917433, "grad_norm": 0.3278038373993306, "learning_rate": 5.9177553553597665e-06, "loss": 0.4554, "step": 12297 }, { "epoch": 2.0196867366000864, "grad_norm": 0.3155972149201842, "learning_rate": 5.917310258886728e-06, "loss": 0.4544, "step": 12298 }, { "epoch": 2.019850963808429, "grad_norm": 0.36100394939817493, "learning_rate": 5.916865147020307e-06, "loss": 0.4359, "step": 12299 }, { "epoch": 2.0200151910167716, "grad_norm": 0.474739217853946, "learning_rate": 5.916420019765336e-06, "loss": 0.4442, "step": 12300 }, { "epoch": 2.0201794182251143, "grad_norm": 0.3725941176745618, "learning_rate": 5.915974877126649e-06, "loss": 0.4448, "step": 12301 }, { "epoch": 2.0203436454334573, "grad_norm": 0.3394057147474312, "learning_rate": 5.915529719109083e-06, "loss": 0.4326, "step": 12302 }, { "epoch": 2.0205078726418, "grad_norm": 0.3333744225766765, "learning_rate": 5.9150845457174704e-06, "loss": 0.444, "step": 12303 }, { "epoch": 2.0206720998501426, "grad_norm": 0.3984379295038264, "learning_rate": 5.9146393569566485e-06, "loss": 0.4323, "step": 12304 }, { "epoch": 2.0208363270584853, "grad_norm": 0.4014873188536819, "learning_rate": 5.914194152831451e-06, "loss": 0.4556, "step": 12305 }, { "epoch": 2.0210005542668283, "grad_norm": 0.38413574125456695, "learning_rate": 5.913748933346714e-06, "loss": 0.4335, "step": 12306 }, { "epoch": 2.021164781475171, "grad_norm": 0.33331851521554756, "learning_rate": 5.9133036985072705e-06, "loss": 0.4507, "step": 12307 }, { "epoch": 2.0213290086835136, "grad_norm": 0.3326758862247054, "learning_rate": 5.91285844831796e-06, "loss": 0.4528, "step": 12308 }, { "epoch": 2.0214932358918563, "grad_norm": 0.29505514100770924, "learning_rate": 5.912413182783617e-06, "loss": 0.4401, "step": 12309 }, { "epoch": 2.0216574631001993, "grad_norm": 0.34154494537786756, "learning_rate": 5.911967901909078e-06, "loss": 0.4341, "step": 12310 }, { "epoch": 2.021821690308542, "grad_norm": 0.4049103391456977, "learning_rate": 5.911522605699176e-06, "loss": 0.4444, "step": 12311 }, { "epoch": 2.0219859175168846, "grad_norm": 0.37915910316572177, "learning_rate": 5.911077294158751e-06, "loss": 0.4498, "step": 12312 }, { "epoch": 2.0221501447252272, "grad_norm": 0.42567405204291847, "learning_rate": 5.910631967292638e-06, "loss": 0.4521, "step": 12313 }, { "epoch": 2.02231437193357, "grad_norm": 0.303818370149242, "learning_rate": 5.910186625105676e-06, "loss": 0.4415, "step": 12314 }, { "epoch": 2.022478599141913, "grad_norm": 0.3723931157545376, "learning_rate": 5.909741267602698e-06, "loss": 0.4303, "step": 12315 }, { "epoch": 2.0226428263502556, "grad_norm": 0.34244289280435986, "learning_rate": 5.909295894788541e-06, "loss": 0.4294, "step": 12316 }, { "epoch": 2.0228070535585982, "grad_norm": 0.45940405346671165, "learning_rate": 5.9088505066680465e-06, "loss": 0.4461, "step": 12317 }, { "epoch": 2.022971280766941, "grad_norm": 0.5371059400275238, "learning_rate": 5.908405103246049e-06, "loss": 0.4659, "step": 12318 }, { "epoch": 2.023135507975284, "grad_norm": 0.37195619324867074, "learning_rate": 5.907959684527387e-06, "loss": 0.4438, "step": 12319 }, { "epoch": 2.0232997351836266, "grad_norm": 0.4444748904870266, "learning_rate": 5.907514250516897e-06, "loss": 0.4413, "step": 12320 }, { "epoch": 2.0234639623919692, "grad_norm": 0.4461299536447834, "learning_rate": 5.907068801219417e-06, "loss": 0.4335, "step": 12321 }, { "epoch": 2.023628189600312, "grad_norm": 0.43994447948385823, "learning_rate": 5.9066233366397854e-06, "loss": 0.4451, "step": 12322 }, { "epoch": 2.023792416808655, "grad_norm": 0.3247864832152194, "learning_rate": 5.90617785678284e-06, "loss": 0.4544, "step": 12323 }, { "epoch": 2.0239566440169976, "grad_norm": 0.3456630868813913, "learning_rate": 5.9057323616534216e-06, "loss": 0.4539, "step": 12324 }, { "epoch": 2.0241208712253402, "grad_norm": 0.4800805237875434, "learning_rate": 5.905286851256365e-06, "loss": 0.4624, "step": 12325 }, { "epoch": 2.024285098433683, "grad_norm": 0.34959063433120807, "learning_rate": 5.904841325596511e-06, "loss": 0.4667, "step": 12326 }, { "epoch": 2.024449325642026, "grad_norm": 0.41151557024535956, "learning_rate": 5.904395784678698e-06, "loss": 0.4403, "step": 12327 }, { "epoch": 2.0246135528503686, "grad_norm": 0.2929240886104455, "learning_rate": 5.903950228507764e-06, "loss": 0.4422, "step": 12328 }, { "epoch": 2.0247777800587112, "grad_norm": 0.4225784029548028, "learning_rate": 5.903504657088551e-06, "loss": 0.4294, "step": 12329 }, { "epoch": 2.024942007267054, "grad_norm": 0.3198757505033295, "learning_rate": 5.903059070425895e-06, "loss": 0.4427, "step": 12330 }, { "epoch": 2.0251062344753965, "grad_norm": 0.35512213130023546, "learning_rate": 5.902613468524639e-06, "loss": 0.435, "step": 12331 }, { "epoch": 2.0252704616837396, "grad_norm": 0.4014329270546415, "learning_rate": 5.902167851389619e-06, "loss": 0.4528, "step": 12332 }, { "epoch": 2.0254346888920822, "grad_norm": 0.3685874338668055, "learning_rate": 5.901722219025678e-06, "loss": 0.4603, "step": 12333 }, { "epoch": 2.025598916100425, "grad_norm": 0.28477749529150725, "learning_rate": 5.901276571437654e-06, "loss": 0.4383, "step": 12334 }, { "epoch": 2.0257631433087675, "grad_norm": 0.3095326874609088, "learning_rate": 5.9008309086303875e-06, "loss": 0.4409, "step": 12335 }, { "epoch": 2.0259273705171106, "grad_norm": 0.45871021472994483, "learning_rate": 5.900385230608718e-06, "loss": 0.459, "step": 12336 }, { "epoch": 2.026091597725453, "grad_norm": 1.0720311778843719, "learning_rate": 5.8999395373774885e-06, "loss": 0.4294, "step": 12337 }, { "epoch": 2.026255824933796, "grad_norm": 0.2924154206097919, "learning_rate": 5.899493828941537e-06, "loss": 0.4704, "step": 12338 }, { "epoch": 2.0264200521421385, "grad_norm": 0.327283098845854, "learning_rate": 5.899048105305709e-06, "loss": 0.4586, "step": 12339 }, { "epoch": 2.0265842793504816, "grad_norm": 0.349614059094532, "learning_rate": 5.898602366474839e-06, "loss": 0.4415, "step": 12340 }, { "epoch": 2.026748506558824, "grad_norm": 0.34197640336286816, "learning_rate": 5.898156612453772e-06, "loss": 0.43, "step": 12341 }, { "epoch": 2.026912733767167, "grad_norm": 0.4623929536887179, "learning_rate": 5.897710843247348e-06, "loss": 0.4478, "step": 12342 }, { "epoch": 2.0270769609755095, "grad_norm": 0.5355379594969214, "learning_rate": 5.89726505886041e-06, "loss": 0.4451, "step": 12343 }, { "epoch": 2.0272411881838526, "grad_norm": 0.3055222951108194, "learning_rate": 5.896819259297799e-06, "loss": 0.4359, "step": 12344 }, { "epoch": 2.027405415392195, "grad_norm": 0.3074334568504291, "learning_rate": 5.896373444564355e-06, "loss": 0.4468, "step": 12345 }, { "epoch": 2.027569642600538, "grad_norm": 0.33119785643769395, "learning_rate": 5.895927614664923e-06, "loss": 0.4489, "step": 12346 }, { "epoch": 2.0277338698088805, "grad_norm": 0.5005833716890481, "learning_rate": 5.895481769604343e-06, "loss": 0.4142, "step": 12347 }, { "epoch": 2.027898097017223, "grad_norm": 0.4446782973011459, "learning_rate": 5.895035909387459e-06, "loss": 0.4455, "step": 12348 }, { "epoch": 2.028062324225566, "grad_norm": 0.3169868306167431, "learning_rate": 5.894590034019111e-06, "loss": 0.4518, "step": 12349 }, { "epoch": 2.028226551433909, "grad_norm": 0.3679129008792082, "learning_rate": 5.894144143504144e-06, "loss": 0.4582, "step": 12350 }, { "epoch": 2.0283907786422515, "grad_norm": 0.3356771628200559, "learning_rate": 5.8936982378474e-06, "loss": 0.455, "step": 12351 }, { "epoch": 2.028555005850594, "grad_norm": 0.37868840952035804, "learning_rate": 5.893252317053722e-06, "loss": 0.4564, "step": 12352 }, { "epoch": 2.028719233058937, "grad_norm": 0.3660224757041939, "learning_rate": 5.892806381127953e-06, "loss": 0.416, "step": 12353 }, { "epoch": 2.02888346026728, "grad_norm": 0.35213115451904753, "learning_rate": 5.892360430074936e-06, "loss": 0.4313, "step": 12354 }, { "epoch": 2.0290476874756225, "grad_norm": 0.3061381002851472, "learning_rate": 5.891914463899515e-06, "loss": 0.4464, "step": 12355 }, { "epoch": 2.029211914683965, "grad_norm": 0.3198294060410183, "learning_rate": 5.891468482606532e-06, "loss": 0.4465, "step": 12356 }, { "epoch": 2.029376141892308, "grad_norm": 0.5061425404700527, "learning_rate": 5.8910224862008345e-06, "loss": 0.4704, "step": 12357 }, { "epoch": 2.029540369100651, "grad_norm": 0.2906027127123769, "learning_rate": 5.890576474687264e-06, "loss": 0.4539, "step": 12358 }, { "epoch": 2.0297045963089935, "grad_norm": 0.9628300081908078, "learning_rate": 5.890130448070665e-06, "loss": 0.4605, "step": 12359 }, { "epoch": 2.029868823517336, "grad_norm": 0.339842469867948, "learning_rate": 5.889684406355879e-06, "loss": 0.4452, "step": 12360 }, { "epoch": 2.030033050725679, "grad_norm": 0.2898973367954881, "learning_rate": 5.889238349547755e-06, "loss": 0.4559, "step": 12361 }, { "epoch": 2.030197277934022, "grad_norm": 0.32955122544839677, "learning_rate": 5.8887922776511355e-06, "loss": 0.4593, "step": 12362 }, { "epoch": 2.0303615051423645, "grad_norm": 0.29610395042009074, "learning_rate": 5.888346190670868e-06, "loss": 0.46, "step": 12363 }, { "epoch": 2.030525732350707, "grad_norm": 0.3076361536809495, "learning_rate": 5.887900088611792e-06, "loss": 0.4537, "step": 12364 }, { "epoch": 2.0306899595590497, "grad_norm": 0.3806885935501196, "learning_rate": 5.887453971478756e-06, "loss": 0.4385, "step": 12365 }, { "epoch": 2.030854186767393, "grad_norm": 0.4892710621594915, "learning_rate": 5.8870078392766036e-06, "loss": 0.4344, "step": 12366 }, { "epoch": 2.0310184139757355, "grad_norm": 0.6621760845336766, "learning_rate": 5.886561692010184e-06, "loss": 0.4566, "step": 12367 }, { "epoch": 2.031182641184078, "grad_norm": 0.34316954928589083, "learning_rate": 5.886115529684339e-06, "loss": 0.4545, "step": 12368 }, { "epoch": 2.0313468683924207, "grad_norm": 0.29649605317323635, "learning_rate": 5.8856693523039155e-06, "loss": 0.4409, "step": 12369 }, { "epoch": 2.031511095600764, "grad_norm": 0.32991559461150566, "learning_rate": 5.885223159873759e-06, "loss": 0.4436, "step": 12370 }, { "epoch": 2.0316753228091065, "grad_norm": 0.3997830334051244, "learning_rate": 5.884776952398717e-06, "loss": 0.4325, "step": 12371 }, { "epoch": 2.031839550017449, "grad_norm": 0.2950333162096897, "learning_rate": 5.884330729883634e-06, "loss": 0.455, "step": 12372 }, { "epoch": 2.0320037772257917, "grad_norm": 0.3424041916207841, "learning_rate": 5.883884492333359e-06, "loss": 0.44, "step": 12373 }, { "epoch": 2.032168004434135, "grad_norm": 0.27965686096829506, "learning_rate": 5.883438239752734e-06, "loss": 0.427, "step": 12374 }, { "epoch": 2.0323322316424774, "grad_norm": 0.29382204979502846, "learning_rate": 5.882991972146611e-06, "loss": 0.4455, "step": 12375 }, { "epoch": 2.03249645885082, "grad_norm": 0.32015433618862266, "learning_rate": 5.882545689519834e-06, "loss": 0.4558, "step": 12376 }, { "epoch": 2.0326606860591627, "grad_norm": 0.3289590563823037, "learning_rate": 5.88209939187725e-06, "loss": 0.4581, "step": 12377 }, { "epoch": 2.032824913267506, "grad_norm": 0.3202040927893558, "learning_rate": 5.881653079223708e-06, "loss": 0.433, "step": 12378 }, { "epoch": 2.0329891404758484, "grad_norm": 0.4237401638627447, "learning_rate": 5.881206751564053e-06, "loss": 0.4468, "step": 12379 }, { "epoch": 2.033153367684191, "grad_norm": 0.4113867464229899, "learning_rate": 5.8807604089031345e-06, "loss": 0.4498, "step": 12380 }, { "epoch": 2.0333175948925337, "grad_norm": 0.3713036320098837, "learning_rate": 5.880314051245799e-06, "loss": 0.4636, "step": 12381 }, { "epoch": 2.0334818221008764, "grad_norm": 0.3487669051771585, "learning_rate": 5.879867678596896e-06, "loss": 0.4261, "step": 12382 }, { "epoch": 2.0336460493092194, "grad_norm": 0.4297560095357159, "learning_rate": 5.8794212909612705e-06, "loss": 0.4632, "step": 12383 }, { "epoch": 2.033810276517562, "grad_norm": 0.41595656127910297, "learning_rate": 5.878974888343773e-06, "loss": 0.4363, "step": 12384 }, { "epoch": 2.0339745037259047, "grad_norm": 0.31941281295240154, "learning_rate": 5.878528470749252e-06, "loss": 0.4616, "step": 12385 }, { "epoch": 2.0341387309342474, "grad_norm": 0.3736947516535489, "learning_rate": 5.878082038182555e-06, "loss": 0.4407, "step": 12386 }, { "epoch": 2.0343029581425904, "grad_norm": 0.3648758434688456, "learning_rate": 5.8776355906485325e-06, "loss": 0.4615, "step": 12387 }, { "epoch": 2.034467185350933, "grad_norm": 0.33317524041004737, "learning_rate": 5.877189128152032e-06, "loss": 0.4516, "step": 12388 }, { "epoch": 2.0346314125592757, "grad_norm": 0.34434024200356267, "learning_rate": 5.876742650697902e-06, "loss": 0.4687, "step": 12389 }, { "epoch": 2.0347956397676183, "grad_norm": 0.3873243560043453, "learning_rate": 5.876296158290991e-06, "loss": 0.4664, "step": 12390 }, { "epoch": 2.0349598669759614, "grad_norm": 0.6587433734412117, "learning_rate": 5.87584965093615e-06, "loss": 0.4384, "step": 12391 }, { "epoch": 2.035124094184304, "grad_norm": 0.376035639840286, "learning_rate": 5.87540312863823e-06, "loss": 0.4448, "step": 12392 }, { "epoch": 2.0352883213926467, "grad_norm": 0.3356221661068441, "learning_rate": 5.874956591402078e-06, "loss": 0.4504, "step": 12393 }, { "epoch": 2.0354525486009893, "grad_norm": 0.3364467718112007, "learning_rate": 5.874510039232544e-06, "loss": 0.4287, "step": 12394 }, { "epoch": 2.0356167758093324, "grad_norm": 0.3530682596790142, "learning_rate": 5.874063472134479e-06, "loss": 0.4323, "step": 12395 }, { "epoch": 2.035781003017675, "grad_norm": 0.41389982440021067, "learning_rate": 5.8736168901127325e-06, "loss": 0.4536, "step": 12396 }, { "epoch": 2.0359452302260177, "grad_norm": 0.317301755237896, "learning_rate": 5.873170293172156e-06, "loss": 0.4371, "step": 12397 }, { "epoch": 2.0361094574343603, "grad_norm": 0.34272680753358126, "learning_rate": 5.872723681317599e-06, "loss": 0.4491, "step": 12398 }, { "epoch": 2.036273684642703, "grad_norm": 0.4006309979835223, "learning_rate": 5.87227705455391e-06, "loss": 0.441, "step": 12399 }, { "epoch": 2.036437911851046, "grad_norm": 0.5542787557823411, "learning_rate": 5.871830412885944e-06, "loss": 0.4296, "step": 12400 }, { "epoch": 2.0366021390593887, "grad_norm": 0.3431090307672414, "learning_rate": 5.871383756318551e-06, "loss": 0.4513, "step": 12401 }, { "epoch": 2.0367663662677313, "grad_norm": 0.3586838401808626, "learning_rate": 5.87093708485658e-06, "loss": 0.4485, "step": 12402 }, { "epoch": 2.036930593476074, "grad_norm": 0.3051512780842894, "learning_rate": 5.8704903985048825e-06, "loss": 0.4547, "step": 12403 }, { "epoch": 2.037094820684417, "grad_norm": 0.35974186850941714, "learning_rate": 5.870043697268312e-06, "loss": 0.4495, "step": 12404 }, { "epoch": 2.0372590478927597, "grad_norm": 0.6056306895454063, "learning_rate": 5.869596981151719e-06, "loss": 0.467, "step": 12405 }, { "epoch": 2.0374232751011023, "grad_norm": 0.2819644783966082, "learning_rate": 5.869150250159955e-06, "loss": 0.4314, "step": 12406 }, { "epoch": 2.037587502309445, "grad_norm": 0.4286095726381181, "learning_rate": 5.868703504297873e-06, "loss": 0.4474, "step": 12407 }, { "epoch": 2.037751729517788, "grad_norm": 0.32038506407005746, "learning_rate": 5.868256743570323e-06, "loss": 0.4422, "step": 12408 }, { "epoch": 2.0379159567261307, "grad_norm": 0.3259096360271549, "learning_rate": 5.8678099679821595e-06, "loss": 0.4517, "step": 12409 }, { "epoch": 2.0380801839344733, "grad_norm": 0.27644003099357023, "learning_rate": 5.867363177538234e-06, "loss": 0.4327, "step": 12410 }, { "epoch": 2.038244411142816, "grad_norm": 0.3402320682171515, "learning_rate": 5.866916372243399e-06, "loss": 0.4618, "step": 12411 }, { "epoch": 2.038408638351159, "grad_norm": 0.284775187844616, "learning_rate": 5.866469552102506e-06, "loss": 0.4524, "step": 12412 }, { "epoch": 2.0385728655595017, "grad_norm": 0.4978827495915803, "learning_rate": 5.866022717120411e-06, "loss": 0.434, "step": 12413 }, { "epoch": 2.0387370927678443, "grad_norm": 0.8185449778955562, "learning_rate": 5.865575867301965e-06, "loss": 0.4595, "step": 12414 }, { "epoch": 2.038901319976187, "grad_norm": 0.3231777306250387, "learning_rate": 5.8651290026520205e-06, "loss": 0.4369, "step": 12415 }, { "epoch": 2.0390655471845296, "grad_norm": 0.329589659019701, "learning_rate": 5.864682123175433e-06, "loss": 0.4475, "step": 12416 }, { "epoch": 2.0392297743928727, "grad_norm": 1.1849958099030102, "learning_rate": 5.864235228877056e-06, "loss": 0.4255, "step": 12417 }, { "epoch": 2.0393940016012153, "grad_norm": 0.33656368419001453, "learning_rate": 5.86378831976174e-06, "loss": 0.4348, "step": 12418 }, { "epoch": 2.039558228809558, "grad_norm": 0.3167560065962795, "learning_rate": 5.863341395834341e-06, "loss": 0.4144, "step": 12419 }, { "epoch": 2.0397224560179006, "grad_norm": 0.3695074006748326, "learning_rate": 5.862894457099714e-06, "loss": 0.4503, "step": 12420 }, { "epoch": 2.0398866832262437, "grad_norm": 0.3667195996387198, "learning_rate": 5.862447503562713e-06, "loss": 0.4379, "step": 12421 }, { "epoch": 2.0400509104345863, "grad_norm": 0.39032351094667883, "learning_rate": 5.862000535228191e-06, "loss": 0.4329, "step": 12422 }, { "epoch": 2.040215137642929, "grad_norm": 0.3380516948636253, "learning_rate": 5.861553552101003e-06, "loss": 0.4586, "step": 12423 }, { "epoch": 2.0403793648512716, "grad_norm": 0.35621909121959594, "learning_rate": 5.861106554186003e-06, "loss": 0.4533, "step": 12424 }, { "epoch": 2.0405435920596147, "grad_norm": 0.3354530237084805, "learning_rate": 5.860659541488048e-06, "loss": 0.4391, "step": 12425 }, { "epoch": 2.0407078192679573, "grad_norm": 0.2989633077547237, "learning_rate": 5.860212514011992e-06, "loss": 0.4241, "step": 12426 }, { "epoch": 2.0408720464763, "grad_norm": 0.35862761603268756, "learning_rate": 5.859765471762688e-06, "loss": 0.4446, "step": 12427 }, { "epoch": 2.0410362736846426, "grad_norm": 0.30444330966462657, "learning_rate": 5.859318414744995e-06, "loss": 0.474, "step": 12428 }, { "epoch": 2.0412005008929857, "grad_norm": 0.352540677765662, "learning_rate": 5.8588713429637655e-06, "loss": 0.4387, "step": 12429 }, { "epoch": 2.0413647281013283, "grad_norm": 0.3778446185553416, "learning_rate": 5.8584242564238566e-06, "loss": 0.4591, "step": 12430 }, { "epoch": 2.041528955309671, "grad_norm": 0.3491850807666615, "learning_rate": 5.857977155130124e-06, "loss": 0.4621, "step": 12431 }, { "epoch": 2.0416931825180136, "grad_norm": 0.38612813413715863, "learning_rate": 5.857530039087423e-06, "loss": 0.4732, "step": 12432 }, { "epoch": 2.041857409726356, "grad_norm": 0.3933909296763072, "learning_rate": 5.85708290830061e-06, "loss": 0.4572, "step": 12433 }, { "epoch": 2.0420216369346993, "grad_norm": 0.45460547103818244, "learning_rate": 5.856635762774542e-06, "loss": 0.4554, "step": 12434 }, { "epoch": 2.042185864143042, "grad_norm": 0.4897357475104495, "learning_rate": 5.856188602514075e-06, "loss": 0.4549, "step": 12435 }, { "epoch": 2.0423500913513846, "grad_norm": 0.37141597461045756, "learning_rate": 5.855741427524066e-06, "loss": 0.4341, "step": 12436 }, { "epoch": 2.042514318559727, "grad_norm": 0.3152170933745343, "learning_rate": 5.8552942378093694e-06, "loss": 0.4383, "step": 12437 }, { "epoch": 2.0426785457680703, "grad_norm": 0.3315546517567952, "learning_rate": 5.854847033374845e-06, "loss": 0.4422, "step": 12438 }, { "epoch": 2.042842772976413, "grad_norm": 0.34849627509879655, "learning_rate": 5.854399814225349e-06, "loss": 0.4534, "step": 12439 }, { "epoch": 2.0430070001847556, "grad_norm": 0.3415060059087718, "learning_rate": 5.8539525803657375e-06, "loss": 0.4244, "step": 12440 }, { "epoch": 2.043171227393098, "grad_norm": 0.39313173165872756, "learning_rate": 5.85350533180087e-06, "loss": 0.4217, "step": 12441 }, { "epoch": 2.0433354546014413, "grad_norm": 0.3429924779240633, "learning_rate": 5.853058068535603e-06, "loss": 0.4577, "step": 12442 }, { "epoch": 2.043499681809784, "grad_norm": 0.32928596156929474, "learning_rate": 5.852610790574793e-06, "loss": 0.4566, "step": 12443 }, { "epoch": 2.0436639090181266, "grad_norm": 0.47849256644842075, "learning_rate": 5.8521634979232995e-06, "loss": 0.4444, "step": 12444 }, { "epoch": 2.043828136226469, "grad_norm": 0.3687608357126545, "learning_rate": 5.851716190585981e-06, "loss": 0.4318, "step": 12445 }, { "epoch": 2.0439923634348123, "grad_norm": 0.32895085912600486, "learning_rate": 5.851268868567694e-06, "loss": 0.4203, "step": 12446 }, { "epoch": 2.044156590643155, "grad_norm": 0.37289343076052367, "learning_rate": 5.850821531873298e-06, "loss": 0.4433, "step": 12447 }, { "epoch": 2.0443208178514976, "grad_norm": 0.31859032614581423, "learning_rate": 5.8503741805076496e-06, "loss": 0.4338, "step": 12448 }, { "epoch": 2.04448504505984, "grad_norm": 0.2887224225883823, "learning_rate": 5.8499268144756104e-06, "loss": 0.4357, "step": 12449 }, { "epoch": 2.044649272268183, "grad_norm": 0.4697271025351198, "learning_rate": 5.8494794337820375e-06, "loss": 0.452, "step": 12450 }, { "epoch": 2.044813499476526, "grad_norm": 0.3282934512860411, "learning_rate": 5.849032038431792e-06, "loss": 0.4449, "step": 12451 }, { "epoch": 2.0449777266848685, "grad_norm": 0.385047656440147, "learning_rate": 5.8485846284297285e-06, "loss": 0.4388, "step": 12452 }, { "epoch": 2.045141953893211, "grad_norm": 0.36439150850801005, "learning_rate": 5.848137203780709e-06, "loss": 0.4558, "step": 12453 }, { "epoch": 2.045306181101554, "grad_norm": 0.40450690234416153, "learning_rate": 5.847689764489595e-06, "loss": 0.4337, "step": 12454 }, { "epoch": 2.045470408309897, "grad_norm": 0.43383659347572107, "learning_rate": 5.847242310561243e-06, "loss": 0.444, "step": 12455 }, { "epoch": 2.0456346355182395, "grad_norm": 0.33992573109644364, "learning_rate": 5.846794842000516e-06, "loss": 0.4302, "step": 12456 }, { "epoch": 2.045798862726582, "grad_norm": 0.3404891501105816, "learning_rate": 5.84634735881227e-06, "loss": 0.4315, "step": 12457 }, { "epoch": 2.045963089934925, "grad_norm": 0.3948929516683718, "learning_rate": 5.845899861001367e-06, "loss": 0.4352, "step": 12458 }, { "epoch": 2.046127317143268, "grad_norm": 0.36232433689948934, "learning_rate": 5.845452348572668e-06, "loss": 0.4228, "step": 12459 }, { "epoch": 2.0462915443516105, "grad_norm": 0.4879837670116479, "learning_rate": 5.845004821531033e-06, "loss": 0.4479, "step": 12460 }, { "epoch": 2.046455771559953, "grad_norm": 0.43242467602156015, "learning_rate": 5.844557279881321e-06, "loss": 0.4338, "step": 12461 }, { "epoch": 2.046619998768296, "grad_norm": 0.46972252076723947, "learning_rate": 5.844109723628395e-06, "loss": 0.4296, "step": 12462 }, { "epoch": 2.046784225976639, "grad_norm": 0.348287060668494, "learning_rate": 5.843662152777117e-06, "loss": 0.4462, "step": 12463 }, { "epoch": 2.0469484531849815, "grad_norm": 0.3554017413497341, "learning_rate": 5.843214567332343e-06, "loss": 0.4494, "step": 12464 }, { "epoch": 2.047112680393324, "grad_norm": 0.7426503419751455, "learning_rate": 5.842766967298939e-06, "loss": 0.4375, "step": 12465 }, { "epoch": 2.047276907601667, "grad_norm": 0.34331608608976466, "learning_rate": 5.842319352681763e-06, "loss": 0.4527, "step": 12466 }, { "epoch": 2.0474411348100094, "grad_norm": 0.326253689959674, "learning_rate": 5.8418717234856785e-06, "loss": 0.4347, "step": 12467 }, { "epoch": 2.0476053620183525, "grad_norm": 1.2139001651559362, "learning_rate": 5.841424079715548e-06, "loss": 0.4685, "step": 12468 }, { "epoch": 2.047769589226695, "grad_norm": 1.809298572630905, "learning_rate": 5.840976421376231e-06, "loss": 0.4267, "step": 12469 }, { "epoch": 2.047933816435038, "grad_norm": 0.643982392760066, "learning_rate": 5.840528748472593e-06, "loss": 0.4511, "step": 12470 }, { "epoch": 2.0480980436433804, "grad_norm": 0.4200958520866591, "learning_rate": 5.84008106100949e-06, "loss": 0.4756, "step": 12471 }, { "epoch": 2.0482622708517235, "grad_norm": 0.3785922765747419, "learning_rate": 5.839633358991792e-06, "loss": 0.4455, "step": 12472 }, { "epoch": 2.048426498060066, "grad_norm": 0.334283585605747, "learning_rate": 5.839185642424356e-06, "loss": 0.4554, "step": 12473 }, { "epoch": 2.048590725268409, "grad_norm": 0.2979365135820452, "learning_rate": 5.838737911312046e-06, "loss": 0.4394, "step": 12474 }, { "epoch": 2.0487549524767514, "grad_norm": 0.3928511038792992, "learning_rate": 5.838290165659726e-06, "loss": 0.4309, "step": 12475 }, { "epoch": 2.0489191796850945, "grad_norm": 0.42167196945885604, "learning_rate": 5.837842405472259e-06, "loss": 0.4202, "step": 12476 }, { "epoch": 2.049083406893437, "grad_norm": 0.384941912400613, "learning_rate": 5.837394630754504e-06, "loss": 0.4454, "step": 12477 }, { "epoch": 2.04924763410178, "grad_norm": 0.6027380809373334, "learning_rate": 5.83694684151133e-06, "loss": 0.4436, "step": 12478 }, { "epoch": 2.0494118613101224, "grad_norm": 0.5500830911834226, "learning_rate": 5.836499037747598e-06, "loss": 0.4446, "step": 12479 }, { "epoch": 2.0495760885184655, "grad_norm": 0.49515778776697733, "learning_rate": 5.836051219468171e-06, "loss": 0.4372, "step": 12480 }, { "epoch": 2.049740315726808, "grad_norm": 0.43075419664571624, "learning_rate": 5.835603386677913e-06, "loss": 0.4429, "step": 12481 }, { "epoch": 2.049904542935151, "grad_norm": 0.3542392734227228, "learning_rate": 5.8351555393816885e-06, "loss": 0.4262, "step": 12482 }, { "epoch": 2.0500687701434934, "grad_norm": 0.38268666364407705, "learning_rate": 5.8347076775843604e-06, "loss": 0.4303, "step": 12483 }, { "epoch": 2.050232997351836, "grad_norm": 0.47765234932548395, "learning_rate": 5.834259801290795e-06, "loss": 0.4498, "step": 12484 }, { "epoch": 2.050397224560179, "grad_norm": 0.41345370663759506, "learning_rate": 5.833811910505855e-06, "loss": 0.4189, "step": 12485 }, { "epoch": 2.050561451768522, "grad_norm": 0.4104341797134906, "learning_rate": 5.833364005234404e-06, "loss": 0.4538, "step": 12486 }, { "epoch": 2.0507256789768644, "grad_norm": 0.31647737223956823, "learning_rate": 5.83291608548131e-06, "loss": 0.4407, "step": 12487 }, { "epoch": 2.050889906185207, "grad_norm": 0.37302273496194177, "learning_rate": 5.832468151251435e-06, "loss": 0.4433, "step": 12488 }, { "epoch": 2.05105413339355, "grad_norm": 0.3554326401961096, "learning_rate": 5.832020202549644e-06, "loss": 0.4432, "step": 12489 }, { "epoch": 2.0512183606018928, "grad_norm": 0.491954272790454, "learning_rate": 5.831572239380806e-06, "loss": 0.4564, "step": 12490 }, { "epoch": 2.0513825878102354, "grad_norm": 0.3789475522315765, "learning_rate": 5.831124261749781e-06, "loss": 0.4495, "step": 12491 }, { "epoch": 2.051546815018578, "grad_norm": 0.647424818774864, "learning_rate": 5.830676269661436e-06, "loss": 0.4501, "step": 12492 }, { "epoch": 2.051711042226921, "grad_norm": 0.5823413883449976, "learning_rate": 5.830228263120641e-06, "loss": 0.4213, "step": 12493 }, { "epoch": 2.0518752694352638, "grad_norm": 0.27813242543412897, "learning_rate": 5.829780242132256e-06, "loss": 0.4397, "step": 12494 }, { "epoch": 2.0520394966436064, "grad_norm": 0.3284450588600766, "learning_rate": 5.829332206701149e-06, "loss": 0.443, "step": 12495 }, { "epoch": 2.052203723851949, "grad_norm": 0.3445192485646923, "learning_rate": 5.828884156832186e-06, "loss": 0.4518, "step": 12496 }, { "epoch": 2.052367951060292, "grad_norm": 0.549298740102889, "learning_rate": 5.828436092530235e-06, "loss": 0.4444, "step": 12497 }, { "epoch": 2.0525321782686348, "grad_norm": 0.46014439109326594, "learning_rate": 5.82798801380016e-06, "loss": 0.4532, "step": 12498 }, { "epoch": 2.0526964054769774, "grad_norm": 0.28368044288710964, "learning_rate": 5.8275399206468304e-06, "loss": 0.441, "step": 12499 }, { "epoch": 2.05286063268532, "grad_norm": 0.3968388641653914, "learning_rate": 5.8270918130751085e-06, "loss": 0.4459, "step": 12500 }, { "epoch": 2.0530248598936627, "grad_norm": 0.3710478412314034, "learning_rate": 5.8266436910898656e-06, "loss": 0.463, "step": 12501 }, { "epoch": 2.0531890871020058, "grad_norm": 0.313924593295772, "learning_rate": 5.826195554695966e-06, "loss": 0.4392, "step": 12502 }, { "epoch": 2.0533533143103484, "grad_norm": 0.4580347375031826, "learning_rate": 5.825747403898278e-06, "loss": 0.4503, "step": 12503 }, { "epoch": 2.053517541518691, "grad_norm": 0.4132435728401116, "learning_rate": 5.825299238701669e-06, "loss": 0.4559, "step": 12504 }, { "epoch": 2.0536817687270337, "grad_norm": 0.36199062801530246, "learning_rate": 5.824851059111007e-06, "loss": 0.4483, "step": 12505 }, { "epoch": 2.0538459959353768, "grad_norm": 0.4748756143679125, "learning_rate": 5.824402865131159e-06, "loss": 0.4575, "step": 12506 }, { "epoch": 2.0540102231437194, "grad_norm": 0.3218339347858069, "learning_rate": 5.823954656766991e-06, "loss": 0.4504, "step": 12507 }, { "epoch": 2.054174450352062, "grad_norm": 0.31580790798058256, "learning_rate": 5.823506434023374e-06, "loss": 0.4397, "step": 12508 }, { "epoch": 2.0543386775604047, "grad_norm": 0.35353519486956647, "learning_rate": 5.823058196905177e-06, "loss": 0.4517, "step": 12509 }, { "epoch": 2.0545029047687478, "grad_norm": 0.46529793989703433, "learning_rate": 5.8226099454172644e-06, "loss": 0.458, "step": 12510 }, { "epoch": 2.0546671319770904, "grad_norm": 0.3205124315397412, "learning_rate": 5.822161679564506e-06, "loss": 0.4364, "step": 12511 }, { "epoch": 2.054831359185433, "grad_norm": 0.3610849789933956, "learning_rate": 5.821713399351771e-06, "loss": 0.4436, "step": 12512 }, { "epoch": 2.0549955863937757, "grad_norm": 0.3604424163085528, "learning_rate": 5.821265104783929e-06, "loss": 0.4686, "step": 12513 }, { "epoch": 2.0551598136021187, "grad_norm": 0.3869786566160952, "learning_rate": 5.820816795865848e-06, "loss": 0.4518, "step": 12514 }, { "epoch": 2.0553240408104614, "grad_norm": 0.31366177202073153, "learning_rate": 5.8203684726023965e-06, "loss": 0.4382, "step": 12515 }, { "epoch": 2.055488268018804, "grad_norm": 0.3816416592520499, "learning_rate": 5.819920134998445e-06, "loss": 0.4486, "step": 12516 }, { "epoch": 2.0556524952271467, "grad_norm": 0.36611323579211497, "learning_rate": 5.819471783058861e-06, "loss": 0.4636, "step": 12517 }, { "epoch": 2.0558167224354893, "grad_norm": 0.3926812761164695, "learning_rate": 5.8190234167885164e-06, "loss": 0.4313, "step": 12518 }, { "epoch": 2.0559809496438324, "grad_norm": 0.3325452728082553, "learning_rate": 5.818575036192279e-06, "loss": 0.4638, "step": 12519 }, { "epoch": 2.056145176852175, "grad_norm": 0.42067798281032054, "learning_rate": 5.81812664127502e-06, "loss": 0.4637, "step": 12520 }, { "epoch": 2.0563094040605177, "grad_norm": 0.318034404416098, "learning_rate": 5.817678232041608e-06, "loss": 0.4283, "step": 12521 }, { "epoch": 2.0564736312688603, "grad_norm": 0.48940136510980775, "learning_rate": 5.817229808496915e-06, "loss": 0.4436, "step": 12522 }, { "epoch": 2.0566378584772034, "grad_norm": 0.2882544661032187, "learning_rate": 5.816781370645809e-06, "loss": 0.4309, "step": 12523 }, { "epoch": 2.056802085685546, "grad_norm": 0.5042845819900779, "learning_rate": 5.816332918493164e-06, "loss": 0.4497, "step": 12524 }, { "epoch": 2.0569663128938886, "grad_norm": 0.29557746058877027, "learning_rate": 5.815884452043846e-06, "loss": 0.4365, "step": 12525 }, { "epoch": 2.0571305401022313, "grad_norm": 0.33012923255531806, "learning_rate": 5.81543597130273e-06, "loss": 0.4569, "step": 12526 }, { "epoch": 2.0572947673105744, "grad_norm": 0.4815314599818145, "learning_rate": 5.8149874762746844e-06, "loss": 0.4455, "step": 12527 }, { "epoch": 2.057458994518917, "grad_norm": 0.30529200972710946, "learning_rate": 5.814538966964581e-06, "loss": 0.4355, "step": 12528 }, { "epoch": 2.0576232217272596, "grad_norm": 0.2781313905407027, "learning_rate": 5.814090443377291e-06, "loss": 0.4394, "step": 12529 }, { "epoch": 2.0577874489356023, "grad_norm": 0.33271003194262444, "learning_rate": 5.813641905517687e-06, "loss": 0.4605, "step": 12530 }, { "epoch": 2.0579516761439454, "grad_norm": 0.33716505625152493, "learning_rate": 5.813193353390637e-06, "loss": 0.4708, "step": 12531 }, { "epoch": 2.058115903352288, "grad_norm": 0.353896302279186, "learning_rate": 5.812744787001017e-06, "loss": 0.4373, "step": 12532 }, { "epoch": 2.0582801305606306, "grad_norm": 0.58896944714371, "learning_rate": 5.812296206353696e-06, "loss": 0.4643, "step": 12533 }, { "epoch": 2.0584443577689733, "grad_norm": 0.292337170150739, "learning_rate": 5.811847611453549e-06, "loss": 0.4405, "step": 12534 }, { "epoch": 2.058608584977316, "grad_norm": 0.29791135402795793, "learning_rate": 5.811399002305445e-06, "loss": 0.4583, "step": 12535 }, { "epoch": 2.058772812185659, "grad_norm": 0.29900219174218134, "learning_rate": 5.810950378914256e-06, "loss": 0.4568, "step": 12536 }, { "epoch": 2.0589370393940016, "grad_norm": 0.28608728842480263, "learning_rate": 5.810501741284858e-06, "loss": 0.4557, "step": 12537 }, { "epoch": 2.0591012666023443, "grad_norm": 0.3254239739716752, "learning_rate": 5.8100530894221215e-06, "loss": 0.4335, "step": 12538 }, { "epoch": 2.059265493810687, "grad_norm": 1.5228037555250937, "learning_rate": 5.80960442333092e-06, "loss": 0.4402, "step": 12539 }, { "epoch": 2.05942972101903, "grad_norm": 0.4226075001082411, "learning_rate": 5.809155743016125e-06, "loss": 0.4543, "step": 12540 }, { "epoch": 2.0595939482273726, "grad_norm": 0.3498474294927714, "learning_rate": 5.80870704848261e-06, "loss": 0.4519, "step": 12541 }, { "epoch": 2.0597581754357153, "grad_norm": 0.31951437366502333, "learning_rate": 5.808258339735251e-06, "loss": 0.4289, "step": 12542 }, { "epoch": 2.059922402644058, "grad_norm": 0.3069067592827343, "learning_rate": 5.807809616778918e-06, "loss": 0.448, "step": 12543 }, { "epoch": 2.060086629852401, "grad_norm": 0.34492891136792414, "learning_rate": 5.807360879618486e-06, "loss": 0.4223, "step": 12544 }, { "epoch": 2.0602508570607436, "grad_norm": 0.37131805354396374, "learning_rate": 5.806912128258828e-06, "loss": 0.4387, "step": 12545 }, { "epoch": 2.0604150842690863, "grad_norm": 0.34516089438021247, "learning_rate": 5.806463362704819e-06, "loss": 0.4268, "step": 12546 }, { "epoch": 2.060579311477429, "grad_norm": 0.34676052883678987, "learning_rate": 5.806014582961333e-06, "loss": 0.4409, "step": 12547 }, { "epoch": 2.060743538685772, "grad_norm": 0.32879785289901325, "learning_rate": 5.805565789033244e-06, "loss": 0.4446, "step": 12548 }, { "epoch": 2.0609077658941146, "grad_norm": 0.33290467087799636, "learning_rate": 5.805116980925425e-06, "loss": 0.4405, "step": 12549 }, { "epoch": 2.0610719931024573, "grad_norm": 0.37078533186888135, "learning_rate": 5.80466815864275e-06, "loss": 0.4466, "step": 12550 }, { "epoch": 2.0612362203108, "grad_norm": 0.2745242092923762, "learning_rate": 5.804219322190098e-06, "loss": 0.4486, "step": 12551 }, { "epoch": 2.0614004475191425, "grad_norm": 0.36904796252237204, "learning_rate": 5.80377047157234e-06, "loss": 0.4519, "step": 12552 }, { "epoch": 2.0615646747274856, "grad_norm": 0.5335812773510453, "learning_rate": 5.8033216067943515e-06, "loss": 0.4558, "step": 12553 }, { "epoch": 2.0617289019358283, "grad_norm": 0.3053953391869156, "learning_rate": 5.802872727861009e-06, "loss": 0.4586, "step": 12554 }, { "epoch": 2.061893129144171, "grad_norm": 0.40520225583447983, "learning_rate": 5.802423834777186e-06, "loss": 0.4351, "step": 12555 }, { "epoch": 2.0620573563525135, "grad_norm": 0.3988187826185149, "learning_rate": 5.801974927547758e-06, "loss": 0.4356, "step": 12556 }, { "epoch": 2.0622215835608566, "grad_norm": 0.3380322407657435, "learning_rate": 5.8015260061776024e-06, "loss": 0.4604, "step": 12557 }, { "epoch": 2.0623858107691992, "grad_norm": 0.3348430293607122, "learning_rate": 5.801077070671595e-06, "loss": 0.4293, "step": 12558 }, { "epoch": 2.062550037977542, "grad_norm": 0.3842683264535958, "learning_rate": 5.80062812103461e-06, "loss": 0.4341, "step": 12559 }, { "epoch": 2.0627142651858845, "grad_norm": 0.30200025238816425, "learning_rate": 5.800179157271522e-06, "loss": 0.4701, "step": 12560 }, { "epoch": 2.0628784923942276, "grad_norm": 0.31908377463654947, "learning_rate": 5.79973017938721e-06, "loss": 0.4494, "step": 12561 }, { "epoch": 2.0630427196025702, "grad_norm": 0.5005507434852071, "learning_rate": 5.7992811873865496e-06, "loss": 0.451, "step": 12562 }, { "epoch": 2.063206946810913, "grad_norm": 0.366742905796703, "learning_rate": 5.7988321812744175e-06, "loss": 0.4317, "step": 12563 }, { "epoch": 2.0633711740192555, "grad_norm": 0.5555018777431691, "learning_rate": 5.798383161055691e-06, "loss": 0.4484, "step": 12564 }, { "epoch": 2.0635354012275986, "grad_norm": 0.3180625989819009, "learning_rate": 5.797934126735244e-06, "loss": 0.4381, "step": 12565 }, { "epoch": 2.0636996284359412, "grad_norm": 0.32545313266240605, "learning_rate": 5.797485078317956e-06, "loss": 0.4591, "step": 12566 }, { "epoch": 2.063863855644284, "grad_norm": 0.43023638563585087, "learning_rate": 5.797036015808704e-06, "loss": 0.4502, "step": 12567 }, { "epoch": 2.0640280828526265, "grad_norm": 0.3400815349047949, "learning_rate": 5.796586939212365e-06, "loss": 0.458, "step": 12568 }, { "epoch": 2.064192310060969, "grad_norm": 0.34321856939719503, "learning_rate": 5.796137848533816e-06, "loss": 0.4243, "step": 12569 }, { "epoch": 2.0643565372693122, "grad_norm": 0.3930605011253571, "learning_rate": 5.795688743777934e-06, "loss": 0.4503, "step": 12570 }, { "epoch": 2.064520764477655, "grad_norm": 0.3441430685584969, "learning_rate": 5.795239624949597e-06, "loss": 0.4525, "step": 12571 }, { "epoch": 2.0646849916859975, "grad_norm": 0.3092235368191995, "learning_rate": 5.794790492053685e-06, "loss": 0.4407, "step": 12572 }, { "epoch": 2.06484921889434, "grad_norm": 0.2956042164630155, "learning_rate": 5.7943413450950745e-06, "loss": 0.429, "step": 12573 }, { "epoch": 2.0650134461026832, "grad_norm": 0.5785693723379479, "learning_rate": 5.793892184078642e-06, "loss": 0.4476, "step": 12574 }, { "epoch": 2.065177673311026, "grad_norm": 0.4050586734624316, "learning_rate": 5.793443009009268e-06, "loss": 0.4385, "step": 12575 }, { "epoch": 2.0653419005193685, "grad_norm": 0.44417807478608184, "learning_rate": 5.792993819891831e-06, "loss": 0.4655, "step": 12576 }, { "epoch": 2.065506127727711, "grad_norm": 0.32094819856464846, "learning_rate": 5.792544616731208e-06, "loss": 0.4371, "step": 12577 }, { "epoch": 2.065670354936054, "grad_norm": 0.3225322891675443, "learning_rate": 5.792095399532279e-06, "loss": 0.4388, "step": 12578 }, { "epoch": 2.065834582144397, "grad_norm": 0.3611766104446534, "learning_rate": 5.791646168299923e-06, "loss": 0.44, "step": 12579 }, { "epoch": 2.0659988093527395, "grad_norm": 0.41265178472084757, "learning_rate": 5.791196923039019e-06, "loss": 0.4262, "step": 12580 }, { "epoch": 2.066163036561082, "grad_norm": 0.5685535710349041, "learning_rate": 5.790747663754445e-06, "loss": 0.4407, "step": 12581 }, { "epoch": 2.066327263769425, "grad_norm": 0.4290513029494653, "learning_rate": 5.790298390451083e-06, "loss": 0.4375, "step": 12582 }, { "epoch": 2.066491490977768, "grad_norm": 0.31193517737081866, "learning_rate": 5.78984910313381e-06, "loss": 0.4455, "step": 12583 }, { "epoch": 2.0666557181861105, "grad_norm": 0.7311649893085822, "learning_rate": 5.789399801807506e-06, "loss": 0.4621, "step": 12584 }, { "epoch": 2.066819945394453, "grad_norm": 0.35584981635411755, "learning_rate": 5.7889504864770525e-06, "loss": 0.435, "step": 12585 }, { "epoch": 2.0669841726027958, "grad_norm": 0.34996905818049884, "learning_rate": 5.788501157147328e-06, "loss": 0.4436, "step": 12586 }, { "epoch": 2.067148399811139, "grad_norm": 0.2944915229752169, "learning_rate": 5.788051813823214e-06, "loss": 0.4583, "step": 12587 }, { "epoch": 2.0673126270194815, "grad_norm": 0.34794445160376564, "learning_rate": 5.78760245650959e-06, "loss": 0.4608, "step": 12588 }, { "epoch": 2.067476854227824, "grad_norm": 0.31060770589110037, "learning_rate": 5.787153085211336e-06, "loss": 0.4636, "step": 12589 }, { "epoch": 2.0676410814361668, "grad_norm": 0.36268150169000246, "learning_rate": 5.786703699933333e-06, "loss": 0.4428, "step": 12590 }, { "epoch": 2.06780530864451, "grad_norm": 0.4948601709280069, "learning_rate": 5.786254300680463e-06, "loss": 0.4552, "step": 12591 }, { "epoch": 2.0679695358528525, "grad_norm": 0.8228867505642475, "learning_rate": 5.785804887457604e-06, "loss": 0.4567, "step": 12592 }, { "epoch": 2.068133763061195, "grad_norm": 0.33416355670733, "learning_rate": 5.78535546026964e-06, "loss": 0.4259, "step": 12593 }, { "epoch": 2.0682979902695378, "grad_norm": 0.4689217739481361, "learning_rate": 5.784906019121451e-06, "loss": 0.447, "step": 12594 }, { "epoch": 2.068462217477881, "grad_norm": 0.32276567150550434, "learning_rate": 5.784456564017918e-06, "loss": 0.4617, "step": 12595 }, { "epoch": 2.0686264446862235, "grad_norm": 0.3707358256052318, "learning_rate": 5.784007094963924e-06, "loss": 0.4451, "step": 12596 }, { "epoch": 2.068790671894566, "grad_norm": 0.7082352154662992, "learning_rate": 5.783557611964349e-06, "loss": 0.457, "step": 12597 }, { "epoch": 2.0689548991029088, "grad_norm": 0.3352628121247371, "learning_rate": 5.783108115024076e-06, "loss": 0.4379, "step": 12598 }, { "epoch": 2.069119126311252, "grad_norm": 0.33373071703006074, "learning_rate": 5.782658604147985e-06, "loss": 0.4269, "step": 12599 }, { "epoch": 2.0692833535195945, "grad_norm": 0.3890245550176744, "learning_rate": 5.78220907934096e-06, "loss": 0.4238, "step": 12600 }, { "epoch": 2.069447580727937, "grad_norm": 0.42551587495552057, "learning_rate": 5.781759540607884e-06, "loss": 0.4379, "step": 12601 }, { "epoch": 2.0696118079362797, "grad_norm": 0.3848246045704332, "learning_rate": 5.781309987953638e-06, "loss": 0.4355, "step": 12602 }, { "epoch": 2.0697760351446224, "grad_norm": 0.39510346983773065, "learning_rate": 5.780860421383105e-06, "loss": 0.445, "step": 12603 }, { "epoch": 2.0699402623529655, "grad_norm": 0.5293818907890222, "learning_rate": 5.780410840901166e-06, "loss": 0.4388, "step": 12604 }, { "epoch": 2.070104489561308, "grad_norm": 0.43186625564440984, "learning_rate": 5.779961246512707e-06, "loss": 0.4424, "step": 12605 }, { "epoch": 2.0702687167696507, "grad_norm": 0.35612676312621716, "learning_rate": 5.77951163822261e-06, "loss": 0.4324, "step": 12606 }, { "epoch": 2.0704329439779934, "grad_norm": 0.44096187888292104, "learning_rate": 5.779062016035756e-06, "loss": 0.459, "step": 12607 }, { "epoch": 2.0705971711863365, "grad_norm": 0.3641439511112035, "learning_rate": 5.7786123799570305e-06, "loss": 0.4621, "step": 12608 }, { "epoch": 2.070761398394679, "grad_norm": 0.48210480440260733, "learning_rate": 5.778162729991317e-06, "loss": 0.4358, "step": 12609 }, { "epoch": 2.0709256256030217, "grad_norm": 0.40744258619042256, "learning_rate": 5.7777130661435004e-06, "loss": 0.4298, "step": 12610 }, { "epoch": 2.0710898528113644, "grad_norm": 0.2950203895635282, "learning_rate": 5.777263388418461e-06, "loss": 0.4276, "step": 12611 }, { "epoch": 2.0712540800197075, "grad_norm": 0.3186235372237503, "learning_rate": 5.776813696821085e-06, "loss": 0.4577, "step": 12612 }, { "epoch": 2.07141830722805, "grad_norm": 0.39173521730263494, "learning_rate": 5.776363991356255e-06, "loss": 0.4817, "step": 12613 }, { "epoch": 2.0715825344363927, "grad_norm": 0.37345701908829565, "learning_rate": 5.7759142720288586e-06, "loss": 0.4396, "step": 12614 }, { "epoch": 2.0717467616447354, "grad_norm": 0.6131889982299739, "learning_rate": 5.775464538843775e-06, "loss": 0.4282, "step": 12615 }, { "epoch": 2.0719109888530785, "grad_norm": 0.4123409725560399, "learning_rate": 5.775014791805894e-06, "loss": 0.4483, "step": 12616 }, { "epoch": 2.072075216061421, "grad_norm": 0.3181460531036635, "learning_rate": 5.7745650309200965e-06, "loss": 0.4546, "step": 12617 }, { "epoch": 2.0722394432697637, "grad_norm": 0.34628966447613285, "learning_rate": 5.77411525619127e-06, "loss": 0.4405, "step": 12618 }, { "epoch": 2.0724036704781064, "grad_norm": 0.32934812441671757, "learning_rate": 5.773665467624296e-06, "loss": 0.4236, "step": 12619 }, { "epoch": 2.072567897686449, "grad_norm": 0.3320241576727553, "learning_rate": 5.7732156652240635e-06, "loss": 0.4419, "step": 12620 }, { "epoch": 2.072732124894792, "grad_norm": 0.3751110180170982, "learning_rate": 5.772765848995457e-06, "loss": 0.42, "step": 12621 }, { "epoch": 2.0728963521031347, "grad_norm": 0.3094726411805009, "learning_rate": 5.772316018943361e-06, "loss": 0.452, "step": 12622 }, { "epoch": 2.0730605793114774, "grad_norm": 0.38697336585851455, "learning_rate": 5.771866175072659e-06, "loss": 0.4264, "step": 12623 }, { "epoch": 2.07322480651982, "grad_norm": 0.304288316654996, "learning_rate": 5.77141631738824e-06, "loss": 0.4484, "step": 12624 }, { "epoch": 2.073389033728163, "grad_norm": 0.482532511700392, "learning_rate": 5.770966445894991e-06, "loss": 0.4545, "step": 12625 }, { "epoch": 2.0735532609365057, "grad_norm": 0.32262406085428247, "learning_rate": 5.770516560597794e-06, "loss": 0.4462, "step": 12626 }, { "epoch": 2.0737174881448484, "grad_norm": 0.3325404739646796, "learning_rate": 5.770066661501538e-06, "loss": 0.4239, "step": 12627 }, { "epoch": 2.073881715353191, "grad_norm": 1.9006644872008762, "learning_rate": 5.769616748611106e-06, "loss": 0.4709, "step": 12628 }, { "epoch": 2.074045942561534, "grad_norm": 0.3423420126583943, "learning_rate": 5.769166821931389e-06, "loss": 0.4424, "step": 12629 }, { "epoch": 2.0742101697698767, "grad_norm": 0.29731610057754654, "learning_rate": 5.7687168814672726e-06, "loss": 0.4483, "step": 12630 }, { "epoch": 2.0743743969782193, "grad_norm": 0.3874440450920287, "learning_rate": 5.768266927223642e-06, "loss": 0.4681, "step": 12631 }, { "epoch": 2.074538624186562, "grad_norm": 0.3297323496352712, "learning_rate": 5.767816959205384e-06, "loss": 0.4278, "step": 12632 }, { "epoch": 2.074702851394905, "grad_norm": 0.3816910179675114, "learning_rate": 5.767366977417386e-06, "loss": 0.4577, "step": 12633 }, { "epoch": 2.0748670786032477, "grad_norm": 0.4387489677520473, "learning_rate": 5.766916981864536e-06, "loss": 0.4406, "step": 12634 }, { "epoch": 2.0750313058115903, "grad_norm": 0.3323500901032443, "learning_rate": 5.7664669725517215e-06, "loss": 0.4474, "step": 12635 }, { "epoch": 2.075195533019933, "grad_norm": 0.4094691211689768, "learning_rate": 5.766016949483831e-06, "loss": 0.4354, "step": 12636 }, { "epoch": 2.0753597602282756, "grad_norm": 0.3247612685603506, "learning_rate": 5.765566912665748e-06, "loss": 0.4453, "step": 12637 }, { "epoch": 2.0755239874366187, "grad_norm": 0.2992283474155525, "learning_rate": 5.765116862102365e-06, "loss": 0.4508, "step": 12638 }, { "epoch": 2.0756882146449613, "grad_norm": 0.2833987945361159, "learning_rate": 5.764666797798569e-06, "loss": 0.4493, "step": 12639 }, { "epoch": 2.075852441853304, "grad_norm": 0.36076810908372076, "learning_rate": 5.764216719759246e-06, "loss": 0.4424, "step": 12640 }, { "epoch": 2.0760166690616466, "grad_norm": 0.33496405013392627, "learning_rate": 5.763766627989285e-06, "loss": 0.4376, "step": 12641 }, { "epoch": 2.0761808962699897, "grad_norm": 0.3664945473785977, "learning_rate": 5.763316522493576e-06, "loss": 0.4511, "step": 12642 }, { "epoch": 2.0763451234783323, "grad_norm": 0.3160831448540927, "learning_rate": 5.7628664032770066e-06, "loss": 0.4547, "step": 12643 }, { "epoch": 2.076509350686675, "grad_norm": 0.3800779021900539, "learning_rate": 5.7624162703444655e-06, "loss": 0.4571, "step": 12644 }, { "epoch": 2.0766735778950176, "grad_norm": 0.3096148612282082, "learning_rate": 5.761966123700843e-06, "loss": 0.4465, "step": 12645 }, { "epoch": 2.0768378051033607, "grad_norm": 0.4700276065412499, "learning_rate": 5.761515963351024e-06, "loss": 0.4371, "step": 12646 }, { "epoch": 2.0770020323117033, "grad_norm": 0.5717112387339418, "learning_rate": 5.761065789299902e-06, "loss": 0.4733, "step": 12647 }, { "epoch": 2.077166259520046, "grad_norm": 0.3478262531191078, "learning_rate": 5.760615601552365e-06, "loss": 0.4252, "step": 12648 }, { "epoch": 2.0773304867283886, "grad_norm": 0.4208026301767626, "learning_rate": 5.760165400113301e-06, "loss": 0.4344, "step": 12649 }, { "epoch": 2.0774947139367317, "grad_norm": 0.3241596365519559, "learning_rate": 5.759715184987602e-06, "loss": 0.4352, "step": 12650 }, { "epoch": 2.0776589411450743, "grad_norm": 0.40095490723714017, "learning_rate": 5.7592649561801576e-06, "loss": 0.4414, "step": 12651 }, { "epoch": 2.077823168353417, "grad_norm": 0.34814818074051035, "learning_rate": 5.7588147136958555e-06, "loss": 0.4656, "step": 12652 }, { "epoch": 2.0779873955617596, "grad_norm": 0.5480012497939601, "learning_rate": 5.758364457539587e-06, "loss": 0.4658, "step": 12653 }, { "epoch": 2.0781516227701022, "grad_norm": 0.4022849269106388, "learning_rate": 5.757914187716242e-06, "loss": 0.4481, "step": 12654 }, { "epoch": 2.0783158499784453, "grad_norm": 0.3296491558960112, "learning_rate": 5.757463904230713e-06, "loss": 0.4312, "step": 12655 }, { "epoch": 2.078480077186788, "grad_norm": 0.291526171977682, "learning_rate": 5.757013607087888e-06, "loss": 0.4658, "step": 12656 }, { "epoch": 2.0786443043951306, "grad_norm": 0.3151420850285067, "learning_rate": 5.756563296292658e-06, "loss": 0.4162, "step": 12657 }, { "epoch": 2.0788085316034732, "grad_norm": 0.7250952171515853, "learning_rate": 5.756112971849915e-06, "loss": 0.4579, "step": 12658 }, { "epoch": 2.0789727588118163, "grad_norm": 0.323930778535311, "learning_rate": 5.755662633764549e-06, "loss": 0.4332, "step": 12659 }, { "epoch": 2.079136986020159, "grad_norm": 0.3319897590687082, "learning_rate": 5.755212282041452e-06, "loss": 0.4634, "step": 12660 }, { "epoch": 2.0793012132285016, "grad_norm": 0.2965645629673636, "learning_rate": 5.754761916685515e-06, "loss": 0.4494, "step": 12661 }, { "epoch": 2.0794654404368442, "grad_norm": 0.4789465039629421, "learning_rate": 5.754311537701626e-06, "loss": 0.4645, "step": 12662 }, { "epoch": 2.0796296676451873, "grad_norm": 0.3051466720049543, "learning_rate": 5.753861145094682e-06, "loss": 0.4367, "step": 12663 }, { "epoch": 2.07979389485353, "grad_norm": 0.34364344780338235, "learning_rate": 5.753410738869573e-06, "loss": 0.4287, "step": 12664 }, { "epoch": 2.0799581220618726, "grad_norm": 0.2942069240384221, "learning_rate": 5.75296031903119e-06, "loss": 0.4331, "step": 12665 }, { "epoch": 2.080122349270215, "grad_norm": 0.4048541793023706, "learning_rate": 5.752509885584423e-06, "loss": 0.4461, "step": 12666 }, { "epoch": 2.0802865764785583, "grad_norm": 0.3473334256844807, "learning_rate": 5.752059438534168e-06, "loss": 0.4515, "step": 12667 }, { "epoch": 2.080450803686901, "grad_norm": 0.33238636058550425, "learning_rate": 5.751608977885315e-06, "loss": 0.4528, "step": 12668 }, { "epoch": 2.0806150308952436, "grad_norm": 0.3399610070481964, "learning_rate": 5.751158503642758e-06, "loss": 0.4663, "step": 12669 }, { "epoch": 2.080779258103586, "grad_norm": 0.44808401171722956, "learning_rate": 5.750708015811389e-06, "loss": 0.4313, "step": 12670 }, { "epoch": 2.080943485311929, "grad_norm": 0.30559942252118855, "learning_rate": 5.7502575143960985e-06, "loss": 0.4576, "step": 12671 }, { "epoch": 2.081107712520272, "grad_norm": 0.29963645365849745, "learning_rate": 5.749806999401783e-06, "loss": 0.4201, "step": 12672 }, { "epoch": 2.0812719397286146, "grad_norm": 0.30932198493791446, "learning_rate": 5.7493564708333324e-06, "loss": 0.4507, "step": 12673 }, { "epoch": 2.081436166936957, "grad_norm": 0.38662130188894195, "learning_rate": 5.748905928695643e-06, "loss": 0.4598, "step": 12674 }, { "epoch": 2.0816003941453, "grad_norm": 0.3130379513175994, "learning_rate": 5.748455372993606e-06, "loss": 0.4262, "step": 12675 }, { "epoch": 2.081764621353643, "grad_norm": 0.28607888291252975, "learning_rate": 5.748004803732115e-06, "loss": 0.4517, "step": 12676 }, { "epoch": 2.0819288485619856, "grad_norm": 0.2895532108578058, "learning_rate": 5.747554220916065e-06, "loss": 0.4261, "step": 12677 }, { "epoch": 2.082093075770328, "grad_norm": 0.4363820417166602, "learning_rate": 5.747103624550347e-06, "loss": 0.4455, "step": 12678 }, { "epoch": 2.082257302978671, "grad_norm": 0.35167816630302146, "learning_rate": 5.7466530146398576e-06, "loss": 0.4309, "step": 12679 }, { "epoch": 2.082421530187014, "grad_norm": 0.3577392213525, "learning_rate": 5.746202391189488e-06, "loss": 0.4519, "step": 12680 }, { "epoch": 2.0825857573953566, "grad_norm": 0.33848902980111123, "learning_rate": 5.745751754204137e-06, "loss": 0.4325, "step": 12681 }, { "epoch": 2.082749984603699, "grad_norm": 0.3605086606372533, "learning_rate": 5.7453011036886955e-06, "loss": 0.4356, "step": 12682 }, { "epoch": 2.082914211812042, "grad_norm": 0.35835761281442624, "learning_rate": 5.744850439648058e-06, "loss": 0.4476, "step": 12683 }, { "epoch": 2.083078439020385, "grad_norm": 0.29445262431486496, "learning_rate": 5.744399762087121e-06, "loss": 0.4385, "step": 12684 }, { "epoch": 2.0832426662287276, "grad_norm": 0.2838943444170344, "learning_rate": 5.7439490710107785e-06, "loss": 0.4305, "step": 12685 }, { "epoch": 2.08340689343707, "grad_norm": 0.39643038537134845, "learning_rate": 5.743498366423923e-06, "loss": 0.4353, "step": 12686 }, { "epoch": 2.083571120645413, "grad_norm": 0.26822456161375324, "learning_rate": 5.743047648331453e-06, "loss": 0.4378, "step": 12687 }, { "epoch": 2.0837353478537555, "grad_norm": 0.3759392038585772, "learning_rate": 5.742596916738263e-06, "loss": 0.4605, "step": 12688 }, { "epoch": 2.0838995750620986, "grad_norm": 0.39420292932984524, "learning_rate": 5.742146171649249e-06, "loss": 0.4342, "step": 12689 }, { "epoch": 2.084063802270441, "grad_norm": 0.4549535925788061, "learning_rate": 5.741695413069304e-06, "loss": 0.4289, "step": 12690 }, { "epoch": 2.084228029478784, "grad_norm": 0.3253546959983401, "learning_rate": 5.741244641003325e-06, "loss": 0.439, "step": 12691 }, { "epoch": 2.0843922566871265, "grad_norm": 0.2965903307460332, "learning_rate": 5.740793855456207e-06, "loss": 0.4389, "step": 12692 }, { "epoch": 2.0845564838954695, "grad_norm": 0.32124029350309213, "learning_rate": 5.74034305643285e-06, "loss": 0.4189, "step": 12693 }, { "epoch": 2.084720711103812, "grad_norm": 0.3386294509403138, "learning_rate": 5.7398922439381455e-06, "loss": 0.4544, "step": 12694 }, { "epoch": 2.084884938312155, "grad_norm": 0.316581751906967, "learning_rate": 5.739441417976991e-06, "loss": 0.4422, "step": 12695 }, { "epoch": 2.0850491655204975, "grad_norm": 0.3995425057420119, "learning_rate": 5.738990578554283e-06, "loss": 0.435, "step": 12696 }, { "epoch": 2.0852133927288405, "grad_norm": 0.41720524884760174, "learning_rate": 5.73853972567492e-06, "loss": 0.4638, "step": 12697 }, { "epoch": 2.085377619937183, "grad_norm": 0.34690086468609344, "learning_rate": 5.738088859343795e-06, "loss": 0.4625, "step": 12698 }, { "epoch": 2.085541847145526, "grad_norm": 0.3311064860378474, "learning_rate": 5.737637979565808e-06, "loss": 0.4471, "step": 12699 }, { "epoch": 2.0857060743538685, "grad_norm": 0.3264983757330153, "learning_rate": 5.737187086345854e-06, "loss": 0.4497, "step": 12700 }, { "epoch": 2.0858703015622115, "grad_norm": 0.44699065895405937, "learning_rate": 5.736736179688833e-06, "loss": 0.4485, "step": 12701 }, { "epoch": 2.086034528770554, "grad_norm": 0.34545500806939633, "learning_rate": 5.736285259599639e-06, "loss": 0.4479, "step": 12702 }, { "epoch": 2.086198755978897, "grad_norm": 0.39482652743473867, "learning_rate": 5.73583432608317e-06, "loss": 0.4422, "step": 12703 }, { "epoch": 2.0863629831872395, "grad_norm": 0.341075659467438, "learning_rate": 5.735383379144325e-06, "loss": 0.4414, "step": 12704 }, { "epoch": 2.086527210395582, "grad_norm": 0.3309139894092436, "learning_rate": 5.734932418788001e-06, "loss": 0.4444, "step": 12705 }, { "epoch": 2.086691437603925, "grad_norm": 0.3247661891625629, "learning_rate": 5.734481445019097e-06, "loss": 0.4396, "step": 12706 }, { "epoch": 2.086855664812268, "grad_norm": 0.326532593618532, "learning_rate": 5.734030457842508e-06, "loss": 0.4546, "step": 12707 }, { "epoch": 2.0870198920206104, "grad_norm": 0.3922216807175495, "learning_rate": 5.733579457263135e-06, "loss": 0.4629, "step": 12708 }, { "epoch": 2.087184119228953, "grad_norm": 0.394354893553275, "learning_rate": 5.7331284432858755e-06, "loss": 0.4522, "step": 12709 }, { "epoch": 2.087348346437296, "grad_norm": 0.402018882323647, "learning_rate": 5.7326774159156275e-06, "loss": 0.4489, "step": 12710 }, { "epoch": 2.087512573645639, "grad_norm": 0.33228228472187143, "learning_rate": 5.73222637515729e-06, "loss": 0.4263, "step": 12711 }, { "epoch": 2.0876768008539814, "grad_norm": 0.4159735965654368, "learning_rate": 5.731775321015762e-06, "loss": 0.4548, "step": 12712 }, { "epoch": 2.087841028062324, "grad_norm": 0.4843775098777932, "learning_rate": 5.731324253495942e-06, "loss": 0.4442, "step": 12713 }, { "epoch": 2.088005255270667, "grad_norm": 0.33755449261044895, "learning_rate": 5.730873172602731e-06, "loss": 0.4554, "step": 12714 }, { "epoch": 2.08816948247901, "grad_norm": 0.4117674329873463, "learning_rate": 5.730422078341024e-06, "loss": 0.4238, "step": 12715 }, { "epoch": 2.0883337096873524, "grad_norm": 0.3001693197707603, "learning_rate": 5.729970970715722e-06, "loss": 0.4425, "step": 12716 }, { "epoch": 2.088497936895695, "grad_norm": 0.3338517787117876, "learning_rate": 5.729519849731726e-06, "loss": 0.4456, "step": 12717 }, { "epoch": 2.088662164104038, "grad_norm": 0.42389755185789696, "learning_rate": 5.729068715393936e-06, "loss": 0.4574, "step": 12718 }, { "epoch": 2.088826391312381, "grad_norm": 0.44123544041308665, "learning_rate": 5.72861756770725e-06, "loss": 0.4652, "step": 12719 }, { "epoch": 2.0889906185207234, "grad_norm": 0.35369014153572237, "learning_rate": 5.7281664066765675e-06, "loss": 0.4426, "step": 12720 }, { "epoch": 2.089154845729066, "grad_norm": 0.3249641567404478, "learning_rate": 5.727715232306789e-06, "loss": 0.4383, "step": 12721 }, { "epoch": 2.0893190729374087, "grad_norm": 0.32121212235146046, "learning_rate": 5.727264044602817e-06, "loss": 0.4364, "step": 12722 }, { "epoch": 2.089483300145752, "grad_norm": 0.3503459484904393, "learning_rate": 5.72681284356955e-06, "loss": 0.454, "step": 12723 }, { "epoch": 2.0896475273540944, "grad_norm": 0.3292701887707263, "learning_rate": 5.726361629211887e-06, "loss": 0.4371, "step": 12724 }, { "epoch": 2.089811754562437, "grad_norm": 0.29747395088568335, "learning_rate": 5.7259104015347315e-06, "loss": 0.4453, "step": 12725 }, { "epoch": 2.0899759817707797, "grad_norm": 0.34403589806070617, "learning_rate": 5.725459160542981e-06, "loss": 0.433, "step": 12726 }, { "epoch": 2.090140208979123, "grad_norm": 0.3846893582906354, "learning_rate": 5.72500790624154e-06, "loss": 0.4345, "step": 12727 }, { "epoch": 2.0903044361874654, "grad_norm": 0.3406838298169791, "learning_rate": 5.724556638635308e-06, "loss": 0.4279, "step": 12728 }, { "epoch": 2.090468663395808, "grad_norm": 0.4079214046800453, "learning_rate": 5.724105357729185e-06, "loss": 0.4468, "step": 12729 }, { "epoch": 2.0906328906041507, "grad_norm": 0.3143000270751584, "learning_rate": 5.723654063528074e-06, "loss": 0.4315, "step": 12730 }, { "epoch": 2.0907971178124938, "grad_norm": 0.32459926896482116, "learning_rate": 5.723202756036876e-06, "loss": 0.4371, "step": 12731 }, { "epoch": 2.0909613450208364, "grad_norm": 0.31723359855263167, "learning_rate": 5.722751435260493e-06, "loss": 0.4486, "step": 12732 }, { "epoch": 2.091125572229179, "grad_norm": 0.3074426863744088, "learning_rate": 5.7223001012038254e-06, "loss": 0.4255, "step": 12733 }, { "epoch": 2.0912897994375217, "grad_norm": 0.3509154461452414, "learning_rate": 5.721848753871777e-06, "loss": 0.4373, "step": 12734 }, { "epoch": 2.0914540266458648, "grad_norm": 0.35720034447561816, "learning_rate": 5.721397393269249e-06, "loss": 0.4243, "step": 12735 }, { "epoch": 2.0916182538542074, "grad_norm": 0.3614472189732083, "learning_rate": 5.720946019401143e-06, "loss": 0.4608, "step": 12736 }, { "epoch": 2.09178248106255, "grad_norm": 0.332457995357455, "learning_rate": 5.720494632272363e-06, "loss": 0.4511, "step": 12737 }, { "epoch": 2.0919467082708927, "grad_norm": 0.4137143471345643, "learning_rate": 5.720043231887808e-06, "loss": 0.4487, "step": 12738 }, { "epoch": 2.0921109354792353, "grad_norm": 0.30603264772356137, "learning_rate": 5.719591818252387e-06, "loss": 0.4734, "step": 12739 }, { "epoch": 2.0922751626875784, "grad_norm": 0.40363108857332247, "learning_rate": 5.719140391370996e-06, "loss": 0.4526, "step": 12740 }, { "epoch": 2.092439389895921, "grad_norm": 0.38789927188689594, "learning_rate": 5.718688951248541e-06, "loss": 0.4449, "step": 12741 }, { "epoch": 2.0926036171042637, "grad_norm": 0.3309222451571833, "learning_rate": 5.718237497889926e-06, "loss": 0.4547, "step": 12742 }, { "epoch": 2.0927678443126063, "grad_norm": 0.4855079010515181, "learning_rate": 5.717786031300054e-06, "loss": 0.4249, "step": 12743 }, { "epoch": 2.0929320715209494, "grad_norm": 0.38233922938650783, "learning_rate": 5.717334551483825e-06, "loss": 0.4391, "step": 12744 }, { "epoch": 2.093096298729292, "grad_norm": 0.3406979729307253, "learning_rate": 5.716883058446147e-06, "loss": 0.4369, "step": 12745 }, { "epoch": 2.0932605259376347, "grad_norm": 0.30944525643888987, "learning_rate": 5.716431552191921e-06, "loss": 0.4251, "step": 12746 }, { "epoch": 2.0934247531459773, "grad_norm": 0.31735897928397955, "learning_rate": 5.7159800327260525e-06, "loss": 0.4425, "step": 12747 }, { "epoch": 2.0935889803543204, "grad_norm": 0.5216526947925395, "learning_rate": 5.715528500053444e-06, "loss": 0.442, "step": 12748 }, { "epoch": 2.093753207562663, "grad_norm": 0.47185098877306997, "learning_rate": 5.715076954178999e-06, "loss": 0.4323, "step": 12749 }, { "epoch": 2.0939174347710057, "grad_norm": 0.4193173428927055, "learning_rate": 5.714625395107623e-06, "loss": 0.4698, "step": 12750 }, { "epoch": 2.0940816619793483, "grad_norm": 0.6124497113365915, "learning_rate": 5.714173822844221e-06, "loss": 0.4431, "step": 12751 }, { "epoch": 2.0942458891876914, "grad_norm": 0.293266143258921, "learning_rate": 5.713722237393696e-06, "loss": 0.4421, "step": 12752 }, { "epoch": 2.094410116396034, "grad_norm": 0.6165608268152939, "learning_rate": 5.713270638760955e-06, "loss": 0.4347, "step": 12753 }, { "epoch": 2.0945743436043767, "grad_norm": 0.3047436508071504, "learning_rate": 5.7128190269508995e-06, "loss": 0.457, "step": 12754 }, { "epoch": 2.0947385708127193, "grad_norm": 0.3668284014045656, "learning_rate": 5.712367401968436e-06, "loss": 0.4328, "step": 12755 }, { "epoch": 2.094902798021062, "grad_norm": 0.33986561289705924, "learning_rate": 5.711915763818472e-06, "loss": 0.4687, "step": 12756 }, { "epoch": 2.095067025229405, "grad_norm": 0.3706800130778407, "learning_rate": 5.711464112505909e-06, "loss": 0.4489, "step": 12757 }, { "epoch": 2.0952312524377477, "grad_norm": 0.39079281718805936, "learning_rate": 5.711012448035652e-06, "loss": 0.4633, "step": 12758 }, { "epoch": 2.0953954796460903, "grad_norm": 0.32449038886318793, "learning_rate": 5.710560770412611e-06, "loss": 0.4216, "step": 12759 }, { "epoch": 2.095559706854433, "grad_norm": 0.3466183057361125, "learning_rate": 5.710109079641688e-06, "loss": 0.4291, "step": 12760 }, { "epoch": 2.095723934062776, "grad_norm": 0.3446100118332191, "learning_rate": 5.70965737572779e-06, "loss": 0.4605, "step": 12761 }, { "epoch": 2.0958881612711187, "grad_norm": 0.3495938088505321, "learning_rate": 5.7092056586758225e-06, "loss": 0.4442, "step": 12762 }, { "epoch": 2.0960523884794613, "grad_norm": 0.36872695332068844, "learning_rate": 5.708753928490691e-06, "loss": 0.459, "step": 12763 }, { "epoch": 2.096216615687804, "grad_norm": 0.372112203602689, "learning_rate": 5.708302185177304e-06, "loss": 0.4351, "step": 12764 }, { "epoch": 2.096380842896147, "grad_norm": 0.31392433890046545, "learning_rate": 5.707850428740565e-06, "loss": 0.4495, "step": 12765 }, { "epoch": 2.0965450701044896, "grad_norm": 0.307439405862085, "learning_rate": 5.707398659185383e-06, "loss": 0.4327, "step": 12766 }, { "epoch": 2.0967092973128323, "grad_norm": 0.41439290769866416, "learning_rate": 5.706946876516664e-06, "loss": 0.4503, "step": 12767 }, { "epoch": 2.096873524521175, "grad_norm": 0.3074612197500893, "learning_rate": 5.706495080739314e-06, "loss": 0.4367, "step": 12768 }, { "epoch": 2.097037751729518, "grad_norm": 0.5222822521040957, "learning_rate": 5.706043271858241e-06, "loss": 0.416, "step": 12769 }, { "epoch": 2.0972019789378606, "grad_norm": 0.34805519039750665, "learning_rate": 5.70559144987835e-06, "loss": 0.4527, "step": 12770 }, { "epoch": 2.0973662061462033, "grad_norm": 0.3251807292612414, "learning_rate": 5.7051396148045514e-06, "loss": 0.4297, "step": 12771 }, { "epoch": 2.097530433354546, "grad_norm": 0.30016213451186896, "learning_rate": 5.70468776664175e-06, "loss": 0.4301, "step": 12772 }, { "epoch": 2.0976946605628886, "grad_norm": 0.3500767429575023, "learning_rate": 5.704235905394855e-06, "loss": 0.4569, "step": 12773 }, { "epoch": 2.0978588877712316, "grad_norm": 0.44306698843867504, "learning_rate": 5.703784031068771e-06, "loss": 0.4474, "step": 12774 }, { "epoch": 2.0980231149795743, "grad_norm": 0.35216126579915613, "learning_rate": 5.703332143668409e-06, "loss": 0.4296, "step": 12775 }, { "epoch": 2.098187342187917, "grad_norm": 0.3069250697540145, "learning_rate": 5.702880243198678e-06, "loss": 0.4239, "step": 12776 }, { "epoch": 2.0983515693962596, "grad_norm": 0.3491742333738244, "learning_rate": 5.7024283296644825e-06, "loss": 0.4424, "step": 12777 }, { "epoch": 2.0985157966046026, "grad_norm": 0.3532745513440168, "learning_rate": 5.701976403070732e-06, "loss": 0.4547, "step": 12778 }, { "epoch": 2.0986800238129453, "grad_norm": 0.6853319161198199, "learning_rate": 5.701524463422336e-06, "loss": 0.442, "step": 12779 }, { "epoch": 2.098844251021288, "grad_norm": 0.31164913944888184, "learning_rate": 5.701072510724201e-06, "loss": 0.4422, "step": 12780 }, { "epoch": 2.0990084782296305, "grad_norm": 0.33442415700253836, "learning_rate": 5.700620544981238e-06, "loss": 0.4329, "step": 12781 }, { "epoch": 2.0991727054379736, "grad_norm": 0.29691650447931556, "learning_rate": 5.7001685661983545e-06, "loss": 0.4464, "step": 12782 }, { "epoch": 2.0993369326463163, "grad_norm": 0.4273272208437839, "learning_rate": 5.699716574380459e-06, "loss": 0.4387, "step": 12783 }, { "epoch": 2.099501159854659, "grad_norm": 0.3664218978422905, "learning_rate": 5.699264569532461e-06, "loss": 0.4407, "step": 12784 }, { "epoch": 2.0996653870630015, "grad_norm": 0.2972060184677094, "learning_rate": 5.698812551659271e-06, "loss": 0.431, "step": 12785 }, { "epoch": 2.0998296142713446, "grad_norm": 0.3639133252374928, "learning_rate": 5.698360520765798e-06, "loss": 0.4439, "step": 12786 }, { "epoch": 2.0999938414796873, "grad_norm": 0.30413518112204113, "learning_rate": 5.697908476856948e-06, "loss": 0.4441, "step": 12787 }, { "epoch": 2.10015806868803, "grad_norm": 0.34623216367586335, "learning_rate": 5.697456419937635e-06, "loss": 0.4718, "step": 12788 }, { "epoch": 2.1003222958963725, "grad_norm": 0.4154051288309667, "learning_rate": 5.697004350012767e-06, "loss": 0.4382, "step": 12789 }, { "epoch": 2.100486523104715, "grad_norm": 0.33461676750805575, "learning_rate": 5.696552267087253e-06, "loss": 0.4585, "step": 12790 }, { "epoch": 2.1006507503130583, "grad_norm": 0.4078581820722512, "learning_rate": 5.696100171166006e-06, "loss": 0.4295, "step": 12791 }, { "epoch": 2.100814977521401, "grad_norm": 0.28825390150215113, "learning_rate": 5.695648062253933e-06, "loss": 0.4289, "step": 12792 }, { "epoch": 2.1009792047297435, "grad_norm": 0.43816165937186574, "learning_rate": 5.695195940355946e-06, "loss": 0.4544, "step": 12793 }, { "epoch": 2.101143431938086, "grad_norm": 0.5703413364880195, "learning_rate": 5.694743805476955e-06, "loss": 0.4567, "step": 12794 }, { "epoch": 2.1013076591464293, "grad_norm": 0.35355716963073625, "learning_rate": 5.69429165762187e-06, "loss": 0.4656, "step": 12795 }, { "epoch": 2.101471886354772, "grad_norm": 1.0919983683018712, "learning_rate": 5.693839496795605e-06, "loss": 0.4431, "step": 12796 }, { "epoch": 2.1016361135631145, "grad_norm": 0.33052976505924586, "learning_rate": 5.6933873230030665e-06, "loss": 0.4374, "step": 12797 }, { "epoch": 2.101800340771457, "grad_norm": 1.522094268884421, "learning_rate": 5.692935136249169e-06, "loss": 0.4613, "step": 12798 }, { "epoch": 2.1019645679798002, "grad_norm": 0.35010018307879287, "learning_rate": 5.692482936538821e-06, "loss": 0.4227, "step": 12799 }, { "epoch": 2.102128795188143, "grad_norm": 0.3341888208412005, "learning_rate": 5.692030723876934e-06, "loss": 0.4534, "step": 12800 }, { "epoch": 2.1022930223964855, "grad_norm": 0.4399329112161016, "learning_rate": 5.691578498268423e-06, "loss": 0.4605, "step": 12801 }, { "epoch": 2.102457249604828, "grad_norm": 0.4055705345962353, "learning_rate": 5.691126259718197e-06, "loss": 0.458, "step": 12802 }, { "epoch": 2.1026214768131712, "grad_norm": 0.312902913896997, "learning_rate": 5.690674008231166e-06, "loss": 0.4495, "step": 12803 }, { "epoch": 2.102785704021514, "grad_norm": 0.3318988567653643, "learning_rate": 5.690221743812244e-06, "loss": 0.4626, "step": 12804 }, { "epoch": 2.1029499312298565, "grad_norm": 0.34101958993527215, "learning_rate": 5.689769466466344e-06, "loss": 0.4202, "step": 12805 }, { "epoch": 2.103114158438199, "grad_norm": 0.30553517046334017, "learning_rate": 5.689317176198377e-06, "loss": 0.448, "step": 12806 }, { "epoch": 2.103278385646542, "grad_norm": 0.29574474153044406, "learning_rate": 5.688864873013256e-06, "loss": 0.4267, "step": 12807 }, { "epoch": 2.103442612854885, "grad_norm": 0.4147066715116571, "learning_rate": 5.688412556915891e-06, "loss": 0.4351, "step": 12808 }, { "epoch": 2.1036068400632275, "grad_norm": 0.3180074558597533, "learning_rate": 5.687960227911197e-06, "loss": 0.434, "step": 12809 }, { "epoch": 2.10377106727157, "grad_norm": 0.3715894218391581, "learning_rate": 5.6875078860040854e-06, "loss": 0.4319, "step": 12810 }, { "epoch": 2.103935294479913, "grad_norm": 0.3526111909409907, "learning_rate": 5.687055531199473e-06, "loss": 0.4302, "step": 12811 }, { "epoch": 2.104099521688256, "grad_norm": 0.3747430339445163, "learning_rate": 5.6866031635022664e-06, "loss": 0.4376, "step": 12812 }, { "epoch": 2.1042637488965985, "grad_norm": 0.4540276918521262, "learning_rate": 5.686150782917382e-06, "loss": 0.4383, "step": 12813 }, { "epoch": 2.104427976104941, "grad_norm": 0.3795862762748715, "learning_rate": 5.685698389449735e-06, "loss": 0.4659, "step": 12814 }, { "epoch": 2.104592203313284, "grad_norm": 0.41673579474541966, "learning_rate": 5.685245983104235e-06, "loss": 0.4393, "step": 12815 }, { "epoch": 2.104756430521627, "grad_norm": 0.3116492369443997, "learning_rate": 5.684793563885799e-06, "loss": 0.4574, "step": 12816 }, { "epoch": 2.1049206577299695, "grad_norm": 0.3399815985478469, "learning_rate": 5.684341131799338e-06, "loss": 0.4369, "step": 12817 }, { "epoch": 2.105084884938312, "grad_norm": 0.5281393777848307, "learning_rate": 5.683888686849769e-06, "loss": 0.4143, "step": 12818 }, { "epoch": 2.1052491121466548, "grad_norm": 0.3429045769350071, "learning_rate": 5.6834362290420015e-06, "loss": 0.4436, "step": 12819 }, { "epoch": 2.105413339354998, "grad_norm": 0.4486855426294437, "learning_rate": 5.682983758380955e-06, "loss": 0.438, "step": 12820 }, { "epoch": 2.1055775665633405, "grad_norm": 0.29769448844466256, "learning_rate": 5.682531274871538e-06, "loss": 0.4387, "step": 12821 }, { "epoch": 2.105741793771683, "grad_norm": 0.35663890431486234, "learning_rate": 5.68207877851867e-06, "loss": 0.4351, "step": 12822 }, { "epoch": 2.1059060209800258, "grad_norm": 0.3455155676747153, "learning_rate": 5.6816262693272625e-06, "loss": 0.4459, "step": 12823 }, { "epoch": 2.1060702481883684, "grad_norm": 0.3476365547950273, "learning_rate": 5.681173747302231e-06, "loss": 0.437, "step": 12824 }, { "epoch": 2.1062344753967115, "grad_norm": 0.31649578705530995, "learning_rate": 5.680721212448492e-06, "loss": 0.4493, "step": 12825 }, { "epoch": 2.106398702605054, "grad_norm": 0.3246652992220023, "learning_rate": 5.680268664770957e-06, "loss": 0.4527, "step": 12826 }, { "epoch": 2.1065629298133968, "grad_norm": 0.3578087030598692, "learning_rate": 5.679816104274546e-06, "loss": 0.4339, "step": 12827 }, { "epoch": 2.1067271570217394, "grad_norm": 0.4092237618364479, "learning_rate": 5.679363530964167e-06, "loss": 0.447, "step": 12828 }, { "epoch": 2.1068913842300825, "grad_norm": 0.3361602929137724, "learning_rate": 5.678910944844742e-06, "loss": 0.4472, "step": 12829 }, { "epoch": 2.107055611438425, "grad_norm": 0.38061197908341743, "learning_rate": 5.6784583459211855e-06, "loss": 0.437, "step": 12830 }, { "epoch": 2.1072198386467678, "grad_norm": 0.33944842725725227, "learning_rate": 5.678005734198412e-06, "loss": 0.4142, "step": 12831 }, { "epoch": 2.1073840658551104, "grad_norm": 0.27748281160012517, "learning_rate": 5.677553109681335e-06, "loss": 0.4437, "step": 12832 }, { "epoch": 2.1075482930634535, "grad_norm": 0.493159606649079, "learning_rate": 5.677100472374873e-06, "loss": 0.4537, "step": 12833 }, { "epoch": 2.107712520271796, "grad_norm": 0.33143414634234014, "learning_rate": 5.676647822283942e-06, "loss": 0.4473, "step": 12834 }, { "epoch": 2.1078767474801388, "grad_norm": 0.33655801079061937, "learning_rate": 5.67619515941346e-06, "loss": 0.439, "step": 12835 }, { "epoch": 2.1080409746884814, "grad_norm": 0.30323548739001926, "learning_rate": 5.675742483768339e-06, "loss": 0.4321, "step": 12836 }, { "epoch": 2.1082052018968245, "grad_norm": 0.3308812641204237, "learning_rate": 5.675289795353498e-06, "loss": 0.4344, "step": 12837 }, { "epoch": 2.108369429105167, "grad_norm": 0.3358438917250515, "learning_rate": 5.674837094173854e-06, "loss": 0.4495, "step": 12838 }, { "epoch": 2.1085336563135098, "grad_norm": 0.4482835836763625, "learning_rate": 5.674384380234323e-06, "loss": 0.4488, "step": 12839 }, { "epoch": 2.1086978835218524, "grad_norm": 0.29048868571265907, "learning_rate": 5.673931653539824e-06, "loss": 0.4486, "step": 12840 }, { "epoch": 2.108862110730195, "grad_norm": 0.4323304845732, "learning_rate": 5.6734789140952695e-06, "loss": 0.4321, "step": 12841 }, { "epoch": 2.109026337938538, "grad_norm": 0.30474831684058123, "learning_rate": 5.673026161905581e-06, "loss": 0.4372, "step": 12842 }, { "epoch": 2.1091905651468807, "grad_norm": 0.38957900370384485, "learning_rate": 5.672573396975674e-06, "loss": 0.4331, "step": 12843 }, { "epoch": 2.1093547923552234, "grad_norm": 0.33247541581000345, "learning_rate": 5.672120619310466e-06, "loss": 0.4318, "step": 12844 }, { "epoch": 2.109519019563566, "grad_norm": 0.366253574432419, "learning_rate": 5.671667828914876e-06, "loss": 0.4543, "step": 12845 }, { "epoch": 2.109683246771909, "grad_norm": 0.28846496806996375, "learning_rate": 5.6712150257938196e-06, "loss": 0.4418, "step": 12846 }, { "epoch": 2.1098474739802517, "grad_norm": 0.37577112889588415, "learning_rate": 5.670762209952215e-06, "loss": 0.4423, "step": 12847 }, { "epoch": 2.1100117011885944, "grad_norm": 0.327766941899745, "learning_rate": 5.670309381394982e-06, "loss": 0.4452, "step": 12848 }, { "epoch": 2.110175928396937, "grad_norm": 0.3545541182125981, "learning_rate": 5.669856540127037e-06, "loss": 0.4251, "step": 12849 }, { "epoch": 2.11034015560528, "grad_norm": 0.3924250514834875, "learning_rate": 5.6694036861533e-06, "loss": 0.4482, "step": 12850 }, { "epoch": 2.1105043828136227, "grad_norm": 0.3583855348922854, "learning_rate": 5.668950819478688e-06, "loss": 0.4352, "step": 12851 }, { "epoch": 2.1106686100219654, "grad_norm": 0.3968481426345676, "learning_rate": 5.66849794010812e-06, "loss": 0.4337, "step": 12852 }, { "epoch": 2.110832837230308, "grad_norm": 0.3224990826352743, "learning_rate": 5.6680450480465145e-06, "loss": 0.4443, "step": 12853 }, { "epoch": 2.110997064438651, "grad_norm": 0.39144802472570384, "learning_rate": 5.667592143298791e-06, "loss": 0.4289, "step": 12854 }, { "epoch": 2.1111612916469937, "grad_norm": 0.3718451290345085, "learning_rate": 5.667139225869867e-06, "loss": 0.4506, "step": 12855 }, { "epoch": 2.1113255188553364, "grad_norm": 0.3297370105246588, "learning_rate": 5.666686295764665e-06, "loss": 0.4506, "step": 12856 }, { "epoch": 2.111489746063679, "grad_norm": 0.5862997461169424, "learning_rate": 5.6662333529880994e-06, "loss": 0.4494, "step": 12857 }, { "epoch": 2.1116539732720216, "grad_norm": 0.42111008173194764, "learning_rate": 5.665780397545093e-06, "loss": 0.4436, "step": 12858 }, { "epoch": 2.1118182004803647, "grad_norm": 0.33023195184528636, "learning_rate": 5.665327429440566e-06, "loss": 0.4374, "step": 12859 }, { "epoch": 2.1119824276887074, "grad_norm": 0.39047191719718477, "learning_rate": 5.664874448679434e-06, "loss": 0.4404, "step": 12860 }, { "epoch": 2.11214665489705, "grad_norm": 0.36364259425271644, "learning_rate": 5.6644214552666205e-06, "loss": 0.4453, "step": 12861 }, { "epoch": 2.1123108821053926, "grad_norm": 0.47280712222169236, "learning_rate": 5.663968449207044e-06, "loss": 0.442, "step": 12862 }, { "epoch": 2.1124751093137357, "grad_norm": 0.34497358185281746, "learning_rate": 5.663515430505626e-06, "loss": 0.4632, "step": 12863 }, { "epoch": 2.1126393365220784, "grad_norm": 0.35565401681845893, "learning_rate": 5.663062399167285e-06, "loss": 0.4572, "step": 12864 }, { "epoch": 2.112803563730421, "grad_norm": 0.3225578882958658, "learning_rate": 5.662609355196944e-06, "loss": 0.4578, "step": 12865 }, { "epoch": 2.1129677909387636, "grad_norm": 0.3207304288053452, "learning_rate": 5.662156298599518e-06, "loss": 0.4319, "step": 12866 }, { "epoch": 2.1131320181471067, "grad_norm": 0.6186662136471093, "learning_rate": 5.661703229379933e-06, "loss": 0.4241, "step": 12867 }, { "epoch": 2.1132962453554494, "grad_norm": 0.3004416227688589, "learning_rate": 5.661250147543107e-06, "loss": 0.4552, "step": 12868 }, { "epoch": 2.113460472563792, "grad_norm": 0.3337040866259995, "learning_rate": 5.660797053093965e-06, "loss": 0.4431, "step": 12869 }, { "epoch": 2.1136246997721346, "grad_norm": 0.30755631851279613, "learning_rate": 5.6603439460374226e-06, "loss": 0.4349, "step": 12870 }, { "epoch": 2.1137889269804777, "grad_norm": 0.35471867913214444, "learning_rate": 5.659890826378403e-06, "loss": 0.4443, "step": 12871 }, { "epoch": 2.1139531541888203, "grad_norm": 0.29714877631996933, "learning_rate": 5.659437694121827e-06, "loss": 0.4423, "step": 12872 }, { "epoch": 2.114117381397163, "grad_norm": 0.2906786915109368, "learning_rate": 5.658984549272619e-06, "loss": 0.4479, "step": 12873 }, { "epoch": 2.1142816086055056, "grad_norm": 0.32187424124789155, "learning_rate": 5.658531391835699e-06, "loss": 0.4357, "step": 12874 }, { "epoch": 2.1144458358138483, "grad_norm": 0.29924613569598135, "learning_rate": 5.658078221815986e-06, "loss": 0.4509, "step": 12875 }, { "epoch": 2.1146100630221913, "grad_norm": 0.30270006577595426, "learning_rate": 5.657625039218405e-06, "loss": 0.4335, "step": 12876 }, { "epoch": 2.114774290230534, "grad_norm": 0.3369789030042965, "learning_rate": 5.6571718440478774e-06, "loss": 0.4334, "step": 12877 }, { "epoch": 2.1149385174388766, "grad_norm": 0.6159556214342785, "learning_rate": 5.656718636309324e-06, "loss": 0.4382, "step": 12878 }, { "epoch": 2.1151027446472193, "grad_norm": 0.3931772247618362, "learning_rate": 5.65626541600767e-06, "loss": 0.4486, "step": 12879 }, { "epoch": 2.1152669718555623, "grad_norm": 0.3023065547371581, "learning_rate": 5.655812183147834e-06, "loss": 0.4478, "step": 12880 }, { "epoch": 2.115431199063905, "grad_norm": 0.46112021793977825, "learning_rate": 5.655358937734742e-06, "loss": 0.4535, "step": 12881 }, { "epoch": 2.1155954262722476, "grad_norm": 0.31824756429454715, "learning_rate": 5.654905679773315e-06, "loss": 0.4336, "step": 12882 }, { "epoch": 2.1157596534805903, "grad_norm": 0.45968576013510043, "learning_rate": 5.654452409268476e-06, "loss": 0.4484, "step": 12883 }, { "epoch": 2.1159238806889333, "grad_norm": 0.32054414238138046, "learning_rate": 5.653999126225148e-06, "loss": 0.4297, "step": 12884 }, { "epoch": 2.116088107897276, "grad_norm": 1.3758552124140686, "learning_rate": 5.653545830648254e-06, "loss": 0.4522, "step": 12885 }, { "epoch": 2.1162523351056186, "grad_norm": 0.3020933416951688, "learning_rate": 5.653092522542717e-06, "loss": 0.4419, "step": 12886 }, { "epoch": 2.1164165623139612, "grad_norm": 0.29168531190257574, "learning_rate": 5.652639201913461e-06, "loss": 0.4563, "step": 12887 }, { "epoch": 2.1165807895223043, "grad_norm": 0.39239511980870456, "learning_rate": 5.652185868765409e-06, "loss": 0.4533, "step": 12888 }, { "epoch": 2.116745016730647, "grad_norm": 0.328826136046041, "learning_rate": 5.651732523103485e-06, "loss": 0.4404, "step": 12889 }, { "epoch": 2.1169092439389896, "grad_norm": 0.3431931638495928, "learning_rate": 5.6512791649326136e-06, "loss": 0.4611, "step": 12890 }, { "epoch": 2.1170734711473322, "grad_norm": 0.28812679712227424, "learning_rate": 5.650825794257716e-06, "loss": 0.438, "step": 12891 }, { "epoch": 2.117237698355675, "grad_norm": 0.2902608751383587, "learning_rate": 5.650372411083718e-06, "loss": 0.4342, "step": 12892 }, { "epoch": 2.117401925564018, "grad_norm": 0.37581087709899136, "learning_rate": 5.649919015415546e-06, "loss": 0.4447, "step": 12893 }, { "epoch": 2.1175661527723606, "grad_norm": 0.32938488910246017, "learning_rate": 5.649465607258122e-06, "loss": 0.4396, "step": 12894 }, { "epoch": 2.1177303799807032, "grad_norm": 0.4706196191524123, "learning_rate": 5.649012186616368e-06, "loss": 0.4348, "step": 12895 }, { "epoch": 2.117894607189046, "grad_norm": 0.41697900587103304, "learning_rate": 5.648558753495212e-06, "loss": 0.4366, "step": 12896 }, { "epoch": 2.118058834397389, "grad_norm": 0.31704559733383314, "learning_rate": 5.648105307899579e-06, "loss": 0.4481, "step": 12897 }, { "epoch": 2.1182230616057316, "grad_norm": 0.47313061714381116, "learning_rate": 5.647651849834392e-06, "loss": 0.4421, "step": 12898 }, { "epoch": 2.1183872888140742, "grad_norm": 0.3885098414913026, "learning_rate": 5.647198379304578e-06, "loss": 0.4371, "step": 12899 }, { "epoch": 2.118551516022417, "grad_norm": 0.35465476613932184, "learning_rate": 5.646744896315059e-06, "loss": 0.453, "step": 12900 }, { "epoch": 2.11871574323076, "grad_norm": 0.33812434114988693, "learning_rate": 5.646291400870763e-06, "loss": 0.4476, "step": 12901 }, { "epoch": 2.1188799704391026, "grad_norm": 0.2907497876750063, "learning_rate": 5.645837892976615e-06, "loss": 0.4291, "step": 12902 }, { "epoch": 2.1190441976474452, "grad_norm": 0.2562377614658291, "learning_rate": 5.6453843726375395e-06, "loss": 0.4528, "step": 12903 }, { "epoch": 2.119208424855788, "grad_norm": 0.4183063196730311, "learning_rate": 5.644930839858463e-06, "loss": 0.4566, "step": 12904 }, { "epoch": 2.119372652064131, "grad_norm": 0.3360083856619884, "learning_rate": 5.644477294644312e-06, "loss": 0.4612, "step": 12905 }, { "epoch": 2.1195368792724736, "grad_norm": 0.3550177847074318, "learning_rate": 5.644023737000011e-06, "loss": 0.413, "step": 12906 }, { "epoch": 2.119701106480816, "grad_norm": 0.3168948460799242, "learning_rate": 5.643570166930485e-06, "loss": 0.4267, "step": 12907 }, { "epoch": 2.119865333689159, "grad_norm": 0.41299478080841584, "learning_rate": 5.643116584440665e-06, "loss": 0.4389, "step": 12908 }, { "epoch": 2.1200295608975015, "grad_norm": 0.285914803678043, "learning_rate": 5.642662989535472e-06, "loss": 0.4204, "step": 12909 }, { "epoch": 2.1201937881058446, "grad_norm": 0.3225596692159442, "learning_rate": 5.642209382219836e-06, "loss": 0.4421, "step": 12910 }, { "epoch": 2.120358015314187, "grad_norm": 0.3499417079033318, "learning_rate": 5.6417557624986815e-06, "loss": 0.4402, "step": 12911 }, { "epoch": 2.12052224252253, "grad_norm": 0.29267360443334167, "learning_rate": 5.641302130376935e-06, "loss": 0.4348, "step": 12912 }, { "epoch": 2.1206864697308725, "grad_norm": 0.32426656914720464, "learning_rate": 5.640848485859526e-06, "loss": 0.4419, "step": 12913 }, { "epoch": 2.1208506969392156, "grad_norm": 0.3281027482639434, "learning_rate": 5.6403948289513795e-06, "loss": 0.4561, "step": 12914 }, { "epoch": 2.121014924147558, "grad_norm": 0.3220048109109444, "learning_rate": 5.6399411596574245e-06, "loss": 0.4575, "step": 12915 }, { "epoch": 2.121179151355901, "grad_norm": 0.5050677501725908, "learning_rate": 5.639487477982585e-06, "loss": 0.4443, "step": 12916 }, { "epoch": 2.1213433785642435, "grad_norm": 0.33091551603894176, "learning_rate": 5.639033783931792e-06, "loss": 0.438, "step": 12917 }, { "epoch": 2.1215076057725866, "grad_norm": 0.5370997172267648, "learning_rate": 5.63858007750997e-06, "loss": 0.442, "step": 12918 }, { "epoch": 2.121671832980929, "grad_norm": 0.3195961149692968, "learning_rate": 5.638126358722049e-06, "loss": 0.4289, "step": 12919 }, { "epoch": 2.121836060189272, "grad_norm": 0.29324147405114565, "learning_rate": 5.637672627572955e-06, "loss": 0.4474, "step": 12920 }, { "epoch": 2.1220002873976145, "grad_norm": 0.2773735461045715, "learning_rate": 5.637218884067618e-06, "loss": 0.4613, "step": 12921 }, { "epoch": 2.1221645146059576, "grad_norm": 0.3540915443199416, "learning_rate": 5.636765128210965e-06, "loss": 0.4309, "step": 12922 }, { "epoch": 2.1223287418143, "grad_norm": 0.3887095467543331, "learning_rate": 5.636311360007924e-06, "loss": 0.4282, "step": 12923 }, { "epoch": 2.122492969022643, "grad_norm": 0.3299693991544904, "learning_rate": 5.635857579463423e-06, "loss": 0.4328, "step": 12924 }, { "epoch": 2.1226571962309855, "grad_norm": 0.37227913141766533, "learning_rate": 5.635403786582392e-06, "loss": 0.4406, "step": 12925 }, { "epoch": 2.122821423439328, "grad_norm": 0.3635459484822922, "learning_rate": 5.634949981369758e-06, "loss": 0.4633, "step": 12926 }, { "epoch": 2.122985650647671, "grad_norm": 0.34839992097364925, "learning_rate": 5.634496163830452e-06, "loss": 0.4488, "step": 12927 }, { "epoch": 2.123149877856014, "grad_norm": 0.333284343465869, "learning_rate": 5.634042333969401e-06, "loss": 0.4568, "step": 12928 }, { "epoch": 2.1233141050643565, "grad_norm": 0.3805554693548058, "learning_rate": 5.633588491791533e-06, "loss": 0.4514, "step": 12929 }, { "epoch": 2.123478332272699, "grad_norm": 0.33782011930290967, "learning_rate": 5.63313463730178e-06, "loss": 0.4413, "step": 12930 }, { "epoch": 2.123642559481042, "grad_norm": 0.3804235495657651, "learning_rate": 5.63268077050507e-06, "loss": 0.4547, "step": 12931 }, { "epoch": 2.123806786689385, "grad_norm": 0.34878407931263294, "learning_rate": 5.632226891406332e-06, "loss": 0.4447, "step": 12932 }, { "epoch": 2.1239710138977275, "grad_norm": 0.2980431569749951, "learning_rate": 5.631773000010497e-06, "loss": 0.4501, "step": 12933 }, { "epoch": 2.12413524110607, "grad_norm": 0.2758512424001063, "learning_rate": 5.631319096322493e-06, "loss": 0.4375, "step": 12934 }, { "epoch": 2.124299468314413, "grad_norm": 0.31581503124840343, "learning_rate": 5.6308651803472505e-06, "loss": 0.4398, "step": 12935 }, { "epoch": 2.124463695522756, "grad_norm": 0.4350563326476109, "learning_rate": 5.630411252089699e-06, "loss": 0.4385, "step": 12936 }, { "epoch": 2.1246279227310985, "grad_norm": 0.32043068856204204, "learning_rate": 5.62995731155477e-06, "loss": 0.4418, "step": 12937 }, { "epoch": 2.124792149939441, "grad_norm": 0.3878698223491185, "learning_rate": 5.629503358747392e-06, "loss": 0.4548, "step": 12938 }, { "epoch": 2.124956377147784, "grad_norm": 0.395039731182331, "learning_rate": 5.6290493936724965e-06, "loss": 0.4387, "step": 12939 }, { "epoch": 2.125120604356127, "grad_norm": 0.3315098767767503, "learning_rate": 5.628595416335014e-06, "loss": 0.4409, "step": 12940 }, { "epoch": 2.1252848315644695, "grad_norm": 0.3856410406992659, "learning_rate": 5.628141426739875e-06, "loss": 0.4578, "step": 12941 }, { "epoch": 2.125449058772812, "grad_norm": 0.2973728241039122, "learning_rate": 5.627687424892011e-06, "loss": 0.4412, "step": 12942 }, { "epoch": 2.1256132859811547, "grad_norm": 0.7612320769649644, "learning_rate": 5.62723341079635e-06, "loss": 0.4356, "step": 12943 }, { "epoch": 2.125777513189498, "grad_norm": 0.34729546541020695, "learning_rate": 5.626779384457826e-06, "loss": 0.4264, "step": 12944 }, { "epoch": 2.1259417403978405, "grad_norm": 0.3043427111694188, "learning_rate": 5.6263253458813706e-06, "loss": 0.4497, "step": 12945 }, { "epoch": 2.126105967606183, "grad_norm": 0.32606508097107895, "learning_rate": 5.625871295071912e-06, "loss": 0.4416, "step": 12946 }, { "epoch": 2.1262701948145257, "grad_norm": 0.4127021304590552, "learning_rate": 5.625417232034384e-06, "loss": 0.45, "step": 12947 }, { "epoch": 2.126434422022869, "grad_norm": 0.41019044971934865, "learning_rate": 5.624963156773718e-06, "loss": 0.4496, "step": 12948 }, { "epoch": 2.1265986492312114, "grad_norm": 0.31596254214135494, "learning_rate": 5.624509069294845e-06, "loss": 0.4331, "step": 12949 }, { "epoch": 2.126762876439554, "grad_norm": 0.5525985814660054, "learning_rate": 5.6240549696026975e-06, "loss": 0.4287, "step": 12950 }, { "epoch": 2.1269271036478967, "grad_norm": 0.34763719641785923, "learning_rate": 5.623600857702207e-06, "loss": 0.4355, "step": 12951 }, { "epoch": 2.12709133085624, "grad_norm": 0.3335035146638821, "learning_rate": 5.6231467335983055e-06, "loss": 0.4353, "step": 12952 }, { "epoch": 2.1272555580645824, "grad_norm": 0.3217134828394556, "learning_rate": 5.622692597295925e-06, "loss": 0.4277, "step": 12953 }, { "epoch": 2.127419785272925, "grad_norm": 0.3820768103292228, "learning_rate": 5.622238448799999e-06, "loss": 0.4487, "step": 12954 }, { "epoch": 2.1275840124812677, "grad_norm": 0.4124972463916858, "learning_rate": 5.621784288115459e-06, "loss": 0.4457, "step": 12955 }, { "epoch": 2.127748239689611, "grad_norm": 0.41009248794214526, "learning_rate": 5.621330115247238e-06, "loss": 0.4431, "step": 12956 }, { "epoch": 2.1279124668979534, "grad_norm": 0.324419514380881, "learning_rate": 5.620875930200269e-06, "loss": 0.4171, "step": 12957 }, { "epoch": 2.128076694106296, "grad_norm": 0.44635639840875285, "learning_rate": 5.620421732979484e-06, "loss": 0.4335, "step": 12958 }, { "epoch": 2.1282409213146387, "grad_norm": 0.3072615539433608, "learning_rate": 5.619967523589817e-06, "loss": 0.427, "step": 12959 }, { "epoch": 2.1284051485229813, "grad_norm": 0.4381451304638284, "learning_rate": 5.619513302036201e-06, "loss": 0.4552, "step": 12960 }, { "epoch": 2.1285693757313244, "grad_norm": 0.6381434192285261, "learning_rate": 5.6190590683235686e-06, "loss": 0.4299, "step": 12961 }, { "epoch": 2.128733602939667, "grad_norm": 0.45459376598759865, "learning_rate": 5.618604822456854e-06, "loss": 0.4414, "step": 12962 }, { "epoch": 2.1288978301480097, "grad_norm": 0.49530451943785425, "learning_rate": 5.6181505644409904e-06, "loss": 0.462, "step": 12963 }, { "epoch": 2.1290620573563523, "grad_norm": 0.3506050848447432, "learning_rate": 5.617696294280911e-06, "loss": 0.4587, "step": 12964 }, { "epoch": 2.1292262845646954, "grad_norm": 0.3191382725265907, "learning_rate": 5.617242011981551e-06, "loss": 0.4386, "step": 12965 }, { "epoch": 2.129390511773038, "grad_norm": 0.3174610775036302, "learning_rate": 5.616787717547844e-06, "loss": 0.4326, "step": 12966 }, { "epoch": 2.1295547389813807, "grad_norm": 0.537093039177515, "learning_rate": 5.616333410984723e-06, "loss": 0.4605, "step": 12967 }, { "epoch": 2.1297189661897233, "grad_norm": 0.31637109473600866, "learning_rate": 5.615879092297121e-06, "loss": 0.4557, "step": 12968 }, { "epoch": 2.1298831933980664, "grad_norm": 0.3918704384575548, "learning_rate": 5.615424761489978e-06, "loss": 0.4314, "step": 12969 }, { "epoch": 2.130047420606409, "grad_norm": 0.35561557929773774, "learning_rate": 5.6149704185682215e-06, "loss": 0.4345, "step": 12970 }, { "epoch": 2.1302116478147517, "grad_norm": 0.4669581383106852, "learning_rate": 5.614516063536791e-06, "loss": 0.4602, "step": 12971 }, { "epoch": 2.1303758750230943, "grad_norm": 0.398092381638841, "learning_rate": 5.614061696400619e-06, "loss": 0.4342, "step": 12972 }, { "epoch": 2.1305401022314374, "grad_norm": 0.5613327187613648, "learning_rate": 5.6136073171646404e-06, "loss": 0.4566, "step": 12973 }, { "epoch": 2.13070432943978, "grad_norm": 0.34100163249184556, "learning_rate": 5.6131529258337906e-06, "loss": 0.444, "step": 12974 }, { "epoch": 2.1308685566481227, "grad_norm": 0.4985210923911517, "learning_rate": 5.612698522413005e-06, "loss": 0.4402, "step": 12975 }, { "epoch": 2.1310327838564653, "grad_norm": 0.3421448979356347, "learning_rate": 5.61224410690722e-06, "loss": 0.4419, "step": 12976 }, { "epoch": 2.131197011064808, "grad_norm": 0.34251735534570665, "learning_rate": 5.611789679321369e-06, "loss": 0.4548, "step": 12977 }, { "epoch": 2.131361238273151, "grad_norm": 0.3535445694441817, "learning_rate": 5.611335239660387e-06, "loss": 0.4449, "step": 12978 }, { "epoch": 2.1315254654814937, "grad_norm": 0.32032005716195605, "learning_rate": 5.610880787929211e-06, "loss": 0.454, "step": 12979 }, { "epoch": 2.1316896926898363, "grad_norm": 0.32004352432492167, "learning_rate": 5.610426324132778e-06, "loss": 0.4466, "step": 12980 }, { "epoch": 2.131853919898179, "grad_norm": 0.29014953643068214, "learning_rate": 5.6099718482760235e-06, "loss": 0.4714, "step": 12981 }, { "epoch": 2.132018147106522, "grad_norm": 0.34390516599209603, "learning_rate": 5.609517360363881e-06, "loss": 0.4304, "step": 12982 }, { "epoch": 2.1321823743148647, "grad_norm": 0.3213270673623128, "learning_rate": 5.6090628604012875e-06, "loss": 0.4484, "step": 12983 }, { "epoch": 2.1323466015232073, "grad_norm": 0.3058927295010632, "learning_rate": 5.608608348393181e-06, "loss": 0.4505, "step": 12984 }, { "epoch": 2.13251082873155, "grad_norm": 0.4679978012777797, "learning_rate": 5.608153824344498e-06, "loss": 0.4449, "step": 12985 }, { "epoch": 2.132675055939893, "grad_norm": 0.3197622088020438, "learning_rate": 5.607699288260174e-06, "loss": 0.4454, "step": 12986 }, { "epoch": 2.1328392831482357, "grad_norm": 0.27578517030301564, "learning_rate": 5.607244740145145e-06, "loss": 0.4387, "step": 12987 }, { "epoch": 2.1330035103565783, "grad_norm": 0.395659876071687, "learning_rate": 5.606790180004349e-06, "loss": 0.4415, "step": 12988 }, { "epoch": 2.133167737564921, "grad_norm": 0.3075443442701608, "learning_rate": 5.6063356078427225e-06, "loss": 0.4536, "step": 12989 }, { "epoch": 2.133331964773264, "grad_norm": 0.41313177332432993, "learning_rate": 5.605881023665203e-06, "loss": 0.4289, "step": 12990 }, { "epoch": 2.1334961919816067, "grad_norm": 0.3571092549071176, "learning_rate": 5.605426427476729e-06, "loss": 0.4664, "step": 12991 }, { "epoch": 2.1336604191899493, "grad_norm": 0.39056888922382665, "learning_rate": 5.604971819282235e-06, "loss": 0.4258, "step": 12992 }, { "epoch": 2.133824646398292, "grad_norm": 0.46537769479849345, "learning_rate": 5.60451719908666e-06, "loss": 0.449, "step": 12993 }, { "epoch": 2.1339888736066346, "grad_norm": 0.32186461409923345, "learning_rate": 5.604062566894941e-06, "loss": 0.4595, "step": 12994 }, { "epoch": 2.1341531008149777, "grad_norm": 0.31168989365248334, "learning_rate": 5.603607922712017e-06, "loss": 0.4561, "step": 12995 }, { "epoch": 2.1343173280233203, "grad_norm": 0.31585319111399035, "learning_rate": 5.603153266542826e-06, "loss": 0.431, "step": 12996 }, { "epoch": 2.134481555231663, "grad_norm": 0.30053858074909556, "learning_rate": 5.602698598392304e-06, "loss": 0.4168, "step": 12997 }, { "epoch": 2.1346457824400056, "grad_norm": 0.36234676368672447, "learning_rate": 5.602243918265391e-06, "loss": 0.4358, "step": 12998 }, { "epoch": 2.1348100096483487, "grad_norm": 0.33428480744979727, "learning_rate": 5.601789226167023e-06, "loss": 0.4185, "step": 12999 }, { "epoch": 2.1349742368566913, "grad_norm": 0.3244302536241494, "learning_rate": 5.601334522102142e-06, "loss": 0.4707, "step": 13000 }, { "epoch": 2.135138464065034, "grad_norm": 0.3502757192894509, "learning_rate": 5.600879806075683e-06, "loss": 0.4303, "step": 13001 }, { "epoch": 2.1353026912733766, "grad_norm": 0.3685548422888394, "learning_rate": 5.600425078092588e-06, "loss": 0.436, "step": 13002 }, { "epoch": 2.1354669184817197, "grad_norm": 0.2929653807287389, "learning_rate": 5.599970338157792e-06, "loss": 0.4328, "step": 13003 }, { "epoch": 2.1356311456900623, "grad_norm": 0.28914285602883866, "learning_rate": 5.599515586276236e-06, "loss": 0.4346, "step": 13004 }, { "epoch": 2.135795372898405, "grad_norm": 0.3748436051569583, "learning_rate": 5.59906082245286e-06, "loss": 0.4497, "step": 13005 }, { "epoch": 2.1359596001067476, "grad_norm": 0.4301403569066467, "learning_rate": 5.598606046692603e-06, "loss": 0.449, "step": 13006 }, { "epoch": 2.1361238273150907, "grad_norm": 0.2750459024303177, "learning_rate": 5.598151259000401e-06, "loss": 0.437, "step": 13007 }, { "epoch": 2.1362880545234333, "grad_norm": 0.32575515399845445, "learning_rate": 5.597696459381197e-06, "loss": 0.4617, "step": 13008 }, { "epoch": 2.136452281731776, "grad_norm": 0.2996017078795059, "learning_rate": 5.597241647839928e-06, "loss": 0.4336, "step": 13009 }, { "epoch": 2.1366165089401186, "grad_norm": 0.3353749243950319, "learning_rate": 5.596786824381538e-06, "loss": 0.4374, "step": 13010 }, { "epoch": 2.136780736148461, "grad_norm": 0.3015536802983604, "learning_rate": 5.596331989010964e-06, "loss": 0.4537, "step": 13011 }, { "epoch": 2.1369449633568043, "grad_norm": 0.7102526684523022, "learning_rate": 5.595877141733144e-06, "loss": 0.4264, "step": 13012 }, { "epoch": 2.137109190565147, "grad_norm": 1.0802129528539435, "learning_rate": 5.595422282553021e-06, "loss": 0.4365, "step": 13013 }, { "epoch": 2.1372734177734896, "grad_norm": 0.3765904066273181, "learning_rate": 5.594967411475532e-06, "loss": 0.4365, "step": 13014 }, { "epoch": 2.137437644981832, "grad_norm": 0.3296267997367895, "learning_rate": 5.594512528505624e-06, "loss": 0.452, "step": 13015 }, { "epoch": 2.1376018721901753, "grad_norm": 0.32635813808393965, "learning_rate": 5.59405763364823e-06, "loss": 0.4335, "step": 13016 }, { "epoch": 2.137766099398518, "grad_norm": 0.3054992350198616, "learning_rate": 5.593602726908295e-06, "loss": 0.441, "step": 13017 }, { "epoch": 2.1379303266068606, "grad_norm": 0.3160707553495685, "learning_rate": 5.593147808290756e-06, "loss": 0.4384, "step": 13018 }, { "epoch": 2.138094553815203, "grad_norm": 0.28928587834988345, "learning_rate": 5.592692877800559e-06, "loss": 0.4279, "step": 13019 }, { "epoch": 2.1382587810235463, "grad_norm": 0.401047698366232, "learning_rate": 5.592237935442642e-06, "loss": 0.4183, "step": 13020 }, { "epoch": 2.138423008231889, "grad_norm": 0.3045454687637298, "learning_rate": 5.591782981221946e-06, "loss": 0.4626, "step": 13021 }, { "epoch": 2.1385872354402315, "grad_norm": 0.2681046998791144, "learning_rate": 5.591328015143411e-06, "loss": 0.44, "step": 13022 }, { "epoch": 2.138751462648574, "grad_norm": 0.4218919748405687, "learning_rate": 5.590873037211982e-06, "loss": 0.4417, "step": 13023 }, { "epoch": 2.1389156898569173, "grad_norm": 0.2793908490461997, "learning_rate": 5.590418047432597e-06, "loss": 0.4348, "step": 13024 }, { "epoch": 2.13907991706526, "grad_norm": 0.3172246138905424, "learning_rate": 5.589963045810202e-06, "loss": 0.446, "step": 13025 }, { "epoch": 2.1392441442736025, "grad_norm": 0.312775867484758, "learning_rate": 5.589508032349734e-06, "loss": 0.4463, "step": 13026 }, { "epoch": 2.139408371481945, "grad_norm": 0.3568040949452443, "learning_rate": 5.589053007056136e-06, "loss": 0.4454, "step": 13027 }, { "epoch": 2.139572598690288, "grad_norm": 0.3577210247324337, "learning_rate": 5.588597969934353e-06, "loss": 0.4448, "step": 13028 }, { "epoch": 2.139736825898631, "grad_norm": 0.33210112769047656, "learning_rate": 5.588142920989323e-06, "loss": 0.443, "step": 13029 }, { "epoch": 2.1399010531069735, "grad_norm": 0.3524122026419944, "learning_rate": 5.587687860225991e-06, "loss": 0.4571, "step": 13030 }, { "epoch": 2.140065280315316, "grad_norm": 0.5054906330538123, "learning_rate": 5.5872327876493e-06, "loss": 0.4429, "step": 13031 }, { "epoch": 2.140229507523659, "grad_norm": 0.32656193120293153, "learning_rate": 5.58677770326419e-06, "loss": 0.4489, "step": 13032 }, { "epoch": 2.140393734732002, "grad_norm": 0.3670053673951288, "learning_rate": 5.586322607075604e-06, "loss": 0.4258, "step": 13033 }, { "epoch": 2.1405579619403445, "grad_norm": 0.3701069054773225, "learning_rate": 5.585867499088488e-06, "loss": 0.4419, "step": 13034 }, { "epoch": 2.140722189148687, "grad_norm": 0.34285488173179895, "learning_rate": 5.5854123793077805e-06, "loss": 0.4371, "step": 13035 }, { "epoch": 2.14088641635703, "grad_norm": 0.2893085117521016, "learning_rate": 5.5849572477384276e-06, "loss": 0.4504, "step": 13036 }, { "epoch": 2.141050643565373, "grad_norm": 0.4135270238157861, "learning_rate": 5.584502104385371e-06, "loss": 0.4536, "step": 13037 }, { "epoch": 2.1412148707737155, "grad_norm": 0.9039359209112584, "learning_rate": 5.584046949253554e-06, "loss": 0.4541, "step": 13038 }, { "epoch": 2.141379097982058, "grad_norm": 0.4604628288870824, "learning_rate": 5.583591782347923e-06, "loss": 0.4627, "step": 13039 }, { "epoch": 2.141543325190401, "grad_norm": 0.361287982585956, "learning_rate": 5.583136603673417e-06, "loss": 0.4319, "step": 13040 }, { "epoch": 2.141707552398744, "grad_norm": 0.28686311031123324, "learning_rate": 5.582681413234982e-06, "loss": 0.4349, "step": 13041 }, { "epoch": 2.1418717796070865, "grad_norm": 0.35422337043200885, "learning_rate": 5.582226211037562e-06, "loss": 0.4418, "step": 13042 }, { "epoch": 2.142036006815429, "grad_norm": 0.3200420784217416, "learning_rate": 5.5817709970861e-06, "loss": 0.4364, "step": 13043 }, { "epoch": 2.142200234023772, "grad_norm": 0.35940330185959896, "learning_rate": 5.581315771385542e-06, "loss": 0.4302, "step": 13044 }, { "epoch": 2.1423644612321144, "grad_norm": 0.288818054045889, "learning_rate": 5.580860533940831e-06, "loss": 0.4481, "step": 13045 }, { "epoch": 2.1425286884404575, "grad_norm": 0.34310898496235703, "learning_rate": 5.5804052847569096e-06, "loss": 0.4445, "step": 13046 }, { "epoch": 2.1426929156488, "grad_norm": 0.3507973417433449, "learning_rate": 5.579950023838725e-06, "loss": 0.4413, "step": 13047 }, { "epoch": 2.142857142857143, "grad_norm": 0.40522847529414024, "learning_rate": 5.57949475119122e-06, "loss": 0.4522, "step": 13048 }, { "epoch": 2.1430213700654854, "grad_norm": 0.3351266446909605, "learning_rate": 5.579039466819341e-06, "loss": 0.4465, "step": 13049 }, { "epoch": 2.1431855972738285, "grad_norm": 0.3245795390439118, "learning_rate": 5.578584170728031e-06, "loss": 0.4415, "step": 13050 }, { "epoch": 2.143349824482171, "grad_norm": 0.2817301746950823, "learning_rate": 5.578128862922235e-06, "loss": 0.443, "step": 13051 }, { "epoch": 2.143514051690514, "grad_norm": 0.3772109800765201, "learning_rate": 5.5776735434069e-06, "loss": 0.452, "step": 13052 }, { "epoch": 2.1436782788988564, "grad_norm": 0.41514420047183903, "learning_rate": 5.577218212186968e-06, "loss": 0.4514, "step": 13053 }, { "epoch": 2.1438425061071995, "grad_norm": 0.34853574492762074, "learning_rate": 5.576762869267388e-06, "loss": 0.4421, "step": 13054 }, { "epoch": 2.144006733315542, "grad_norm": 0.37626637398815654, "learning_rate": 5.576307514653103e-06, "loss": 0.4602, "step": 13055 }, { "epoch": 2.144170960523885, "grad_norm": 0.3908924960128689, "learning_rate": 5.5758521483490605e-06, "loss": 0.4251, "step": 13056 }, { "epoch": 2.1443351877322274, "grad_norm": 0.3037037346436557, "learning_rate": 5.575396770360205e-06, "loss": 0.4376, "step": 13057 }, { "epoch": 2.1444994149405705, "grad_norm": 0.39166416556086264, "learning_rate": 5.5749413806914825e-06, "loss": 0.4551, "step": 13058 }, { "epoch": 2.144663642148913, "grad_norm": 0.2758285721291899, "learning_rate": 5.57448597934784e-06, "loss": 0.4538, "step": 13059 }, { "epoch": 2.144827869357256, "grad_norm": 0.30518189424114334, "learning_rate": 5.57403056633422e-06, "loss": 0.4432, "step": 13060 }, { "epoch": 2.1449920965655984, "grad_norm": 0.3021189985917047, "learning_rate": 5.573575141655574e-06, "loss": 0.4256, "step": 13061 }, { "epoch": 2.145156323773941, "grad_norm": 0.34734420320468, "learning_rate": 5.573119705316844e-06, "loss": 0.4478, "step": 13062 }, { "epoch": 2.145320550982284, "grad_norm": 0.48479298084851663, "learning_rate": 5.572664257322978e-06, "loss": 0.4456, "step": 13063 }, { "epoch": 2.1454847781906268, "grad_norm": 0.9737469501992383, "learning_rate": 5.572208797678923e-06, "loss": 0.4519, "step": 13064 }, { "epoch": 2.1456490053989694, "grad_norm": 0.373845968709997, "learning_rate": 5.571753326389628e-06, "loss": 0.4225, "step": 13065 }, { "epoch": 2.145813232607312, "grad_norm": 0.32952687545014325, "learning_rate": 5.571297843460035e-06, "loss": 0.4551, "step": 13066 }, { "epoch": 2.145977459815655, "grad_norm": 0.28185179231913293, "learning_rate": 5.570842348895093e-06, "loss": 0.4522, "step": 13067 }, { "epoch": 2.1461416870239978, "grad_norm": 0.3750226506331507, "learning_rate": 5.570386842699751e-06, "loss": 0.4388, "step": 13068 }, { "epoch": 2.1463059142323404, "grad_norm": 0.363792534235299, "learning_rate": 5.569931324878955e-06, "loss": 0.4566, "step": 13069 }, { "epoch": 2.146470141440683, "grad_norm": 0.4173978857711169, "learning_rate": 5.56947579543765e-06, "loss": 0.4456, "step": 13070 }, { "epoch": 2.146634368649026, "grad_norm": 0.3617383410012619, "learning_rate": 5.5690202543807866e-06, "loss": 0.4536, "step": 13071 }, { "epoch": 2.1467985958573688, "grad_norm": 0.31683670414164145, "learning_rate": 5.568564701713312e-06, "loss": 0.4339, "step": 13072 }, { "epoch": 2.1469628230657114, "grad_norm": 0.6722978519914278, "learning_rate": 5.568109137440174e-06, "loss": 0.4453, "step": 13073 }, { "epoch": 2.147127050274054, "grad_norm": 0.31359460500682335, "learning_rate": 5.567653561566319e-06, "loss": 0.4406, "step": 13074 }, { "epoch": 2.147291277482397, "grad_norm": 0.3756857719735049, "learning_rate": 5.567197974096695e-06, "loss": 0.4512, "step": 13075 }, { "epoch": 2.1474555046907398, "grad_norm": 0.28206587584500525, "learning_rate": 5.566742375036252e-06, "loss": 0.4431, "step": 13076 }, { "epoch": 2.1476197318990824, "grad_norm": 0.3034882846676163, "learning_rate": 5.566286764389937e-06, "loss": 0.4387, "step": 13077 }, { "epoch": 2.147783959107425, "grad_norm": 0.33707031344715366, "learning_rate": 5.5658311421627e-06, "loss": 0.4486, "step": 13078 }, { "epoch": 2.1479481863157677, "grad_norm": 0.43035245643608405, "learning_rate": 5.5653755083594865e-06, "loss": 0.4515, "step": 13079 }, { "epoch": 2.1481124135241108, "grad_norm": 0.3180318636784757, "learning_rate": 5.564919862985248e-06, "loss": 0.4323, "step": 13080 }, { "epoch": 2.1482766407324534, "grad_norm": 0.7311184003595356, "learning_rate": 5.564464206044931e-06, "loss": 0.462, "step": 13081 }, { "epoch": 2.148440867940796, "grad_norm": 0.3498635077663437, "learning_rate": 5.5640085375434855e-06, "loss": 0.4308, "step": 13082 }, { "epoch": 2.1486050951491387, "grad_norm": 0.9829592808567664, "learning_rate": 5.5635528574858614e-06, "loss": 0.4572, "step": 13083 }, { "epoch": 2.1487693223574817, "grad_norm": 0.3950510374950475, "learning_rate": 5.563097165877006e-06, "loss": 0.4555, "step": 13084 }, { "epoch": 2.1489335495658244, "grad_norm": 1.013249951682912, "learning_rate": 5.562641462721869e-06, "loss": 0.4385, "step": 13085 }, { "epoch": 2.149097776774167, "grad_norm": 0.3005075649363099, "learning_rate": 5.562185748025402e-06, "loss": 0.4302, "step": 13086 }, { "epoch": 2.1492620039825097, "grad_norm": 0.7128528813599668, "learning_rate": 5.561730021792551e-06, "loss": 0.4462, "step": 13087 }, { "epoch": 2.1494262311908527, "grad_norm": 0.34082831138362135, "learning_rate": 5.561274284028269e-06, "loss": 0.4103, "step": 13088 }, { "epoch": 2.1495904583991954, "grad_norm": 0.27912374758491476, "learning_rate": 5.560818534737502e-06, "loss": 0.4734, "step": 13089 }, { "epoch": 2.149754685607538, "grad_norm": 0.41419567612684344, "learning_rate": 5.560362773925204e-06, "loss": 0.4371, "step": 13090 }, { "epoch": 2.1499189128158807, "grad_norm": 0.533304143338659, "learning_rate": 5.559907001596322e-06, "loss": 0.4488, "step": 13091 }, { "epoch": 2.1500831400242237, "grad_norm": 0.3287216931610325, "learning_rate": 5.559451217755807e-06, "loss": 0.423, "step": 13092 }, { "epoch": 2.1502473672325664, "grad_norm": 0.2987444710279113, "learning_rate": 5.55899542240861e-06, "loss": 0.4284, "step": 13093 }, { "epoch": 2.150411594440909, "grad_norm": 0.3915364216911103, "learning_rate": 5.558539615559681e-06, "loss": 0.435, "step": 13094 }, { "epoch": 2.1505758216492517, "grad_norm": 0.34923159544476823, "learning_rate": 5.55808379721397e-06, "loss": 0.4442, "step": 13095 }, { "epoch": 2.1507400488575943, "grad_norm": 0.3603631249694652, "learning_rate": 5.557627967376427e-06, "loss": 0.4224, "step": 13096 }, { "epoch": 2.1509042760659374, "grad_norm": 0.47614439422632243, "learning_rate": 5.557172126052005e-06, "loss": 0.4412, "step": 13097 }, { "epoch": 2.15106850327428, "grad_norm": 0.3048139485131453, "learning_rate": 5.556716273245654e-06, "loss": 0.4444, "step": 13098 }, { "epoch": 2.1512327304826226, "grad_norm": 0.3342919021364949, "learning_rate": 5.556260408962323e-06, "loss": 0.4726, "step": 13099 }, { "epoch": 2.1513969576909653, "grad_norm": 0.29465856880907676, "learning_rate": 5.5558045332069645e-06, "loss": 0.4508, "step": 13100 }, { "epoch": 2.1515611848993084, "grad_norm": 0.2961227755518122, "learning_rate": 5.555348645984531e-06, "loss": 0.4494, "step": 13101 }, { "epoch": 2.151725412107651, "grad_norm": 0.27934800568638346, "learning_rate": 5.554892747299973e-06, "loss": 0.4391, "step": 13102 }, { "epoch": 2.1518896393159936, "grad_norm": 0.33111901066285837, "learning_rate": 5.554436837158242e-06, "loss": 0.4348, "step": 13103 }, { "epoch": 2.1520538665243363, "grad_norm": 0.3684615911145497, "learning_rate": 5.553980915564289e-06, "loss": 0.4521, "step": 13104 }, { "epoch": 2.1522180937326794, "grad_norm": 0.4292233032330315, "learning_rate": 5.553524982523065e-06, "loss": 0.4528, "step": 13105 }, { "epoch": 2.152382320941022, "grad_norm": 0.43574829372998947, "learning_rate": 5.553069038039525e-06, "loss": 0.4537, "step": 13106 }, { "epoch": 2.1525465481493646, "grad_norm": 0.294165085934928, "learning_rate": 5.55261308211862e-06, "loss": 0.4509, "step": 13107 }, { "epoch": 2.1527107753577073, "grad_norm": 0.31222154571936955, "learning_rate": 5.5521571147653e-06, "loss": 0.4509, "step": 13108 }, { "epoch": 2.1528750025660504, "grad_norm": 0.27790905680021066, "learning_rate": 5.551701135984519e-06, "loss": 0.4542, "step": 13109 }, { "epoch": 2.153039229774393, "grad_norm": 0.4239281111220191, "learning_rate": 5.551245145781228e-06, "loss": 0.4555, "step": 13110 }, { "epoch": 2.1532034569827356, "grad_norm": 0.33405406926044195, "learning_rate": 5.550789144160381e-06, "loss": 0.4255, "step": 13111 }, { "epoch": 2.1533676841910783, "grad_norm": 0.3045577111345963, "learning_rate": 5.550333131126931e-06, "loss": 0.4337, "step": 13112 }, { "epoch": 2.153531911399421, "grad_norm": 0.3667448605274694, "learning_rate": 5.549877106685829e-06, "loss": 0.4398, "step": 13113 }, { "epoch": 2.153696138607764, "grad_norm": 0.3587313899783721, "learning_rate": 5.549421070842028e-06, "loss": 0.4337, "step": 13114 }, { "epoch": 2.1538603658161066, "grad_norm": 0.31611137498570135, "learning_rate": 5.548965023600482e-06, "loss": 0.43, "step": 13115 }, { "epoch": 2.1540245930244493, "grad_norm": 0.41761280354962393, "learning_rate": 5.548508964966144e-06, "loss": 0.4389, "step": 13116 }, { "epoch": 2.154188820232792, "grad_norm": 0.3276668465800213, "learning_rate": 5.548052894943968e-06, "loss": 0.4386, "step": 13117 }, { "epoch": 2.154353047441135, "grad_norm": 0.3058632740019484, "learning_rate": 5.547596813538905e-06, "loss": 0.4275, "step": 13118 }, { "epoch": 2.1545172746494776, "grad_norm": 0.3134859102629066, "learning_rate": 5.547140720755911e-06, "loss": 0.4368, "step": 13119 }, { "epoch": 2.1546815018578203, "grad_norm": 1.9136277979976022, "learning_rate": 5.546684616599937e-06, "loss": 0.4645, "step": 13120 }, { "epoch": 2.154845729066163, "grad_norm": 0.3081164038829003, "learning_rate": 5.5462285010759385e-06, "loss": 0.4404, "step": 13121 }, { "epoch": 2.155009956274506, "grad_norm": 0.7771526801527265, "learning_rate": 5.545772374188871e-06, "loss": 0.4397, "step": 13122 }, { "epoch": 2.1551741834828486, "grad_norm": 0.2715873990185136, "learning_rate": 5.545316235943686e-06, "loss": 0.4275, "step": 13123 }, { "epoch": 2.1553384106911913, "grad_norm": 0.33280980643281594, "learning_rate": 5.544860086345337e-06, "loss": 0.4512, "step": 13124 }, { "epoch": 2.155502637899534, "grad_norm": 0.3851568549443537, "learning_rate": 5.54440392539878e-06, "loss": 0.4319, "step": 13125 }, { "epoch": 2.155666865107877, "grad_norm": 0.3063502288189881, "learning_rate": 5.5439477531089685e-06, "loss": 0.4368, "step": 13126 }, { "epoch": 2.1558310923162196, "grad_norm": 0.32417940549514135, "learning_rate": 5.543491569480859e-06, "loss": 0.4548, "step": 13127 }, { "epoch": 2.1559953195245622, "grad_norm": 0.2819287851853112, "learning_rate": 5.543035374519403e-06, "loss": 0.4314, "step": 13128 }, { "epoch": 2.156159546732905, "grad_norm": 0.3343017480828349, "learning_rate": 5.542579168229557e-06, "loss": 0.4413, "step": 13129 }, { "epoch": 2.1563237739412475, "grad_norm": 0.30548747472069426, "learning_rate": 5.542122950616274e-06, "loss": 0.443, "step": 13130 }, { "epoch": 2.1564880011495906, "grad_norm": 0.27993439186124225, "learning_rate": 5.5416667216845124e-06, "loss": 0.4386, "step": 13131 }, { "epoch": 2.1566522283579332, "grad_norm": 0.32577795398226056, "learning_rate": 5.541210481439225e-06, "loss": 0.4435, "step": 13132 }, { "epoch": 2.156816455566276, "grad_norm": 0.32823780085029325, "learning_rate": 5.540754229885367e-06, "loss": 0.4507, "step": 13133 }, { "epoch": 2.1569806827746185, "grad_norm": 0.3047427313061152, "learning_rate": 5.5402979670278946e-06, "loss": 0.43, "step": 13134 }, { "epoch": 2.1571449099829616, "grad_norm": 0.340813084378149, "learning_rate": 5.539841692871761e-06, "loss": 0.4279, "step": 13135 }, { "epoch": 2.1573091371913042, "grad_norm": 0.46663307065196113, "learning_rate": 5.539385407421925e-06, "loss": 0.4634, "step": 13136 }, { "epoch": 2.157473364399647, "grad_norm": 0.37566933247851336, "learning_rate": 5.538929110683342e-06, "loss": 0.4472, "step": 13137 }, { "epoch": 2.1576375916079895, "grad_norm": 0.30868472477797043, "learning_rate": 5.538472802660965e-06, "loss": 0.4586, "step": 13138 }, { "epoch": 2.1578018188163326, "grad_norm": 0.34066658358138063, "learning_rate": 5.538016483359751e-06, "loss": 0.4448, "step": 13139 }, { "epoch": 2.1579660460246752, "grad_norm": 0.4073911081151271, "learning_rate": 5.537560152784659e-06, "loss": 0.4563, "step": 13140 }, { "epoch": 2.158130273233018, "grad_norm": 0.29822675903780166, "learning_rate": 5.537103810940641e-06, "loss": 0.4621, "step": 13141 }, { "epoch": 2.1582945004413605, "grad_norm": 0.31971502442009747, "learning_rate": 5.536647457832656e-06, "loss": 0.4553, "step": 13142 }, { "epoch": 2.1584587276497036, "grad_norm": 0.3426810765747357, "learning_rate": 5.53619109346566e-06, "loss": 0.4477, "step": 13143 }, { "epoch": 2.1586229548580462, "grad_norm": 0.945124020682834, "learning_rate": 5.5357347178446086e-06, "loss": 0.4475, "step": 13144 }, { "epoch": 2.158787182066389, "grad_norm": 0.4918830844122075, "learning_rate": 5.535278330974459e-06, "loss": 0.4576, "step": 13145 }, { "epoch": 2.1589514092747315, "grad_norm": 0.3058756489796773, "learning_rate": 5.534821932860169e-06, "loss": 0.4337, "step": 13146 }, { "epoch": 2.159115636483074, "grad_norm": 0.3120707299763528, "learning_rate": 5.534365523506694e-06, "loss": 0.4735, "step": 13147 }, { "epoch": 2.1592798636914172, "grad_norm": 0.2846250730751281, "learning_rate": 5.5339091029189925e-06, "loss": 0.437, "step": 13148 }, { "epoch": 2.15944409089976, "grad_norm": 0.3201064753442044, "learning_rate": 5.53345267110202e-06, "loss": 0.469, "step": 13149 }, { "epoch": 2.1596083181081025, "grad_norm": 0.29766138661669544, "learning_rate": 5.532996228060735e-06, "loss": 0.429, "step": 13150 }, { "epoch": 2.159772545316445, "grad_norm": 0.2846032186777436, "learning_rate": 5.532539773800095e-06, "loss": 0.4401, "step": 13151 }, { "epoch": 2.159936772524788, "grad_norm": 0.3129947601427393, "learning_rate": 5.5320833083250565e-06, "loss": 0.446, "step": 13152 }, { "epoch": 2.160100999733131, "grad_norm": 0.3144839696313304, "learning_rate": 5.531626831640578e-06, "loss": 0.452, "step": 13153 }, { "epoch": 2.1602652269414735, "grad_norm": 0.2959377033395708, "learning_rate": 5.531170343751617e-06, "loss": 0.4373, "step": 13154 }, { "epoch": 2.160429454149816, "grad_norm": 0.40203476934860716, "learning_rate": 5.530713844663132e-06, "loss": 0.4332, "step": 13155 }, { "epoch": 2.160593681358159, "grad_norm": 0.34219461068539536, "learning_rate": 5.530257334380081e-06, "loss": 0.4348, "step": 13156 }, { "epoch": 2.160757908566502, "grad_norm": 0.33771282218080734, "learning_rate": 5.529800812907421e-06, "loss": 0.4639, "step": 13157 }, { "epoch": 2.1609221357748445, "grad_norm": 0.31856610527953794, "learning_rate": 5.529344280250111e-06, "loss": 0.4474, "step": 13158 }, { "epoch": 2.161086362983187, "grad_norm": 0.3171282603923975, "learning_rate": 5.528887736413109e-06, "loss": 0.457, "step": 13159 }, { "epoch": 2.16125059019153, "grad_norm": 0.35217117196778364, "learning_rate": 5.528431181401375e-06, "loss": 0.4619, "step": 13160 }, { "epoch": 2.161414817399873, "grad_norm": 0.6060833146178379, "learning_rate": 5.527974615219866e-06, "loss": 0.4392, "step": 13161 }, { "epoch": 2.1615790446082155, "grad_norm": 0.589550773106566, "learning_rate": 5.527518037873542e-06, "loss": 0.4371, "step": 13162 }, { "epoch": 2.161743271816558, "grad_norm": 0.3145128961142026, "learning_rate": 5.527061449367359e-06, "loss": 0.4506, "step": 13163 }, { "epoch": 2.1619074990249008, "grad_norm": 0.28483661690939105, "learning_rate": 5.52660484970628e-06, "loss": 0.4407, "step": 13164 }, { "epoch": 2.162071726233244, "grad_norm": 0.31160106147746397, "learning_rate": 5.526148238895262e-06, "loss": 0.4325, "step": 13165 }, { "epoch": 2.1622359534415865, "grad_norm": 0.8214018457380634, "learning_rate": 5.525691616939266e-06, "loss": 0.449, "step": 13166 }, { "epoch": 2.162400180649929, "grad_norm": 0.42579552944763127, "learning_rate": 5.525234983843247e-06, "loss": 0.4375, "step": 13167 }, { "epoch": 2.1625644078582718, "grad_norm": 0.3172437304419483, "learning_rate": 5.524778339612168e-06, "loss": 0.4406, "step": 13168 }, { "epoch": 2.162728635066615, "grad_norm": 0.3581226513019574, "learning_rate": 5.5243216842509895e-06, "loss": 0.4547, "step": 13169 }, { "epoch": 2.1628928622749575, "grad_norm": 1.0570246497495848, "learning_rate": 5.523865017764668e-06, "loss": 0.4544, "step": 13170 }, { "epoch": 2.1630570894833, "grad_norm": 0.3390380057126517, "learning_rate": 5.523408340158167e-06, "loss": 0.4214, "step": 13171 }, { "epoch": 2.1632213166916427, "grad_norm": 0.36776919352984283, "learning_rate": 5.5229516514364426e-06, "loss": 0.4381, "step": 13172 }, { "epoch": 2.163385543899986, "grad_norm": 0.30982836563580607, "learning_rate": 5.522494951604457e-06, "loss": 0.4489, "step": 13173 }, { "epoch": 2.1635497711083285, "grad_norm": 0.44372920909803376, "learning_rate": 5.522038240667172e-06, "loss": 0.455, "step": 13174 }, { "epoch": 2.163713998316671, "grad_norm": 0.3466451063504817, "learning_rate": 5.521581518629544e-06, "loss": 0.4525, "step": 13175 }, { "epoch": 2.1638782255250137, "grad_norm": 0.3106973226661059, "learning_rate": 5.521124785496538e-06, "loss": 0.4483, "step": 13176 }, { "epoch": 2.164042452733357, "grad_norm": 0.28579331113902645, "learning_rate": 5.52066804127311e-06, "loss": 0.445, "step": 13177 }, { "epoch": 2.1642066799416995, "grad_norm": 0.26832241525859035, "learning_rate": 5.5202112859642245e-06, "loss": 0.4222, "step": 13178 }, { "epoch": 2.164370907150042, "grad_norm": 0.32216668813831933, "learning_rate": 5.51975451957484e-06, "loss": 0.4538, "step": 13179 }, { "epoch": 2.1645351343583847, "grad_norm": 0.34484591491438377, "learning_rate": 5.519297742109918e-06, "loss": 0.4371, "step": 13180 }, { "epoch": 2.1646993615667274, "grad_norm": 0.328862402254495, "learning_rate": 5.518840953574418e-06, "loss": 0.4318, "step": 13181 }, { "epoch": 2.1648635887750705, "grad_norm": 0.27757721400634844, "learning_rate": 5.518384153973306e-06, "loss": 0.4412, "step": 13182 }, { "epoch": 2.165027815983413, "grad_norm": 0.32631979517754256, "learning_rate": 5.517927343311538e-06, "loss": 0.4317, "step": 13183 }, { "epoch": 2.1651920431917557, "grad_norm": 0.27432476169664666, "learning_rate": 5.517470521594078e-06, "loss": 0.4293, "step": 13184 }, { "epoch": 2.1653562704000984, "grad_norm": 0.28966912344757134, "learning_rate": 5.517013688825888e-06, "loss": 0.4549, "step": 13185 }, { "epoch": 2.1655204976084415, "grad_norm": 0.6730067083934345, "learning_rate": 5.516556845011929e-06, "loss": 0.4474, "step": 13186 }, { "epoch": 2.165684724816784, "grad_norm": 0.29913936874253383, "learning_rate": 5.516099990157161e-06, "loss": 0.4675, "step": 13187 }, { "epoch": 2.1658489520251267, "grad_norm": 0.3424981637374919, "learning_rate": 5.515643124266546e-06, "loss": 0.4373, "step": 13188 }, { "epoch": 2.1660131792334694, "grad_norm": 0.30630459205370697, "learning_rate": 5.51518624734505e-06, "loss": 0.4465, "step": 13189 }, { "epoch": 2.1661774064418124, "grad_norm": 0.34883759198472747, "learning_rate": 5.514729359397632e-06, "loss": 0.4404, "step": 13190 }, { "epoch": 2.166341633650155, "grad_norm": 0.302842026651347, "learning_rate": 5.5142724604292555e-06, "loss": 0.4529, "step": 13191 }, { "epoch": 2.1665058608584977, "grad_norm": 0.3322886428250413, "learning_rate": 5.513815550444881e-06, "loss": 0.4315, "step": 13192 }, { "epoch": 2.1666700880668404, "grad_norm": 0.3008715144635831, "learning_rate": 5.513358629449472e-06, "loss": 0.4503, "step": 13193 }, { "epoch": 2.1668343152751834, "grad_norm": 0.31224920669877515, "learning_rate": 5.512901697447992e-06, "loss": 0.4351, "step": 13194 }, { "epoch": 2.166998542483526, "grad_norm": 0.28463358286996604, "learning_rate": 5.512444754445403e-06, "loss": 0.4423, "step": 13195 }, { "epoch": 2.1671627696918687, "grad_norm": 0.4034444644429901, "learning_rate": 5.511987800446668e-06, "loss": 0.4404, "step": 13196 }, { "epoch": 2.1673269969002114, "grad_norm": 0.38665482565892567, "learning_rate": 5.511530835456749e-06, "loss": 0.4366, "step": 13197 }, { "epoch": 2.167491224108554, "grad_norm": 0.3494804160464828, "learning_rate": 5.51107385948061e-06, "loss": 0.4525, "step": 13198 }, { "epoch": 2.167655451316897, "grad_norm": 0.3841231205093899, "learning_rate": 5.510616872523214e-06, "loss": 0.4509, "step": 13199 }, { "epoch": 2.1678196785252397, "grad_norm": 0.28514366579375827, "learning_rate": 5.510159874589527e-06, "loss": 0.4426, "step": 13200 }, { "epoch": 2.1679839057335824, "grad_norm": 0.3620023664594434, "learning_rate": 5.5097028656845065e-06, "loss": 0.4358, "step": 13201 }, { "epoch": 2.168148132941925, "grad_norm": 0.32201321607882394, "learning_rate": 5.509245845813121e-06, "loss": 0.4403, "step": 13202 }, { "epoch": 2.168312360150268, "grad_norm": 0.3210763284964427, "learning_rate": 5.508788814980333e-06, "loss": 0.4568, "step": 13203 }, { "epoch": 2.1684765873586107, "grad_norm": 0.3317228382026454, "learning_rate": 5.508331773191104e-06, "loss": 0.432, "step": 13204 }, { "epoch": 2.1686408145669533, "grad_norm": 0.30859052454192315, "learning_rate": 5.507874720450403e-06, "loss": 0.4565, "step": 13205 }, { "epoch": 2.168805041775296, "grad_norm": 0.27373704128356613, "learning_rate": 5.507417656763189e-06, "loss": 0.4408, "step": 13206 }, { "epoch": 2.168969268983639, "grad_norm": 0.390830204231793, "learning_rate": 5.506960582134428e-06, "loss": 0.4389, "step": 13207 }, { "epoch": 2.1691334961919817, "grad_norm": 0.316536079441099, "learning_rate": 5.506503496569085e-06, "loss": 0.4676, "step": 13208 }, { "epoch": 2.1692977234003243, "grad_norm": 0.30981856257732066, "learning_rate": 5.506046400072122e-06, "loss": 0.4464, "step": 13209 }, { "epoch": 2.169461950608667, "grad_norm": 0.8040089066478628, "learning_rate": 5.505589292648508e-06, "loss": 0.4571, "step": 13210 }, { "epoch": 2.16962617781701, "grad_norm": 0.29944619080582335, "learning_rate": 5.505132174303204e-06, "loss": 0.4429, "step": 13211 }, { "epoch": 2.1697904050253527, "grad_norm": 0.2931380112774569, "learning_rate": 5.504675045041174e-06, "loss": 0.432, "step": 13212 }, { "epoch": 2.1699546322336953, "grad_norm": 0.32723713071086563, "learning_rate": 5.504217904867386e-06, "loss": 0.4673, "step": 13213 }, { "epoch": 2.170118859442038, "grad_norm": 0.2680010066570768, "learning_rate": 5.503760753786804e-06, "loss": 0.4677, "step": 13214 }, { "epoch": 2.1702830866503806, "grad_norm": 0.29538207235909225, "learning_rate": 5.503303591804392e-06, "loss": 0.4396, "step": 13215 }, { "epoch": 2.1704473138587237, "grad_norm": 0.3072993424577965, "learning_rate": 5.5028464189251155e-06, "loss": 0.4505, "step": 13216 }, { "epoch": 2.1706115410670663, "grad_norm": 0.311688439811001, "learning_rate": 5.502389235153941e-06, "loss": 0.4313, "step": 13217 }, { "epoch": 2.170775768275409, "grad_norm": 0.35361331964980885, "learning_rate": 5.501932040495832e-06, "loss": 0.4646, "step": 13218 }, { "epoch": 2.1709399954837516, "grad_norm": 0.339514386407374, "learning_rate": 5.501474834955756e-06, "loss": 0.4427, "step": 13219 }, { "epoch": 2.1711042226920947, "grad_norm": 0.3359321760848037, "learning_rate": 5.501017618538679e-06, "loss": 0.4365, "step": 13220 }, { "epoch": 2.1712684499004373, "grad_norm": 0.29013968578222354, "learning_rate": 5.500560391249565e-06, "loss": 0.4308, "step": 13221 }, { "epoch": 2.17143267710878, "grad_norm": 0.33718799981468217, "learning_rate": 5.5001031530933794e-06, "loss": 0.4393, "step": 13222 }, { "epoch": 2.1715969043171226, "grad_norm": 0.3303170855751071, "learning_rate": 5.499645904075091e-06, "loss": 0.4309, "step": 13223 }, { "epoch": 2.1717611315254657, "grad_norm": 0.3150015557314923, "learning_rate": 5.499188644199664e-06, "loss": 0.4339, "step": 13224 }, { "epoch": 2.1719253587338083, "grad_norm": 0.3075108331038909, "learning_rate": 5.4987313734720665e-06, "loss": 0.451, "step": 13225 }, { "epoch": 2.172089585942151, "grad_norm": 0.33633278099740577, "learning_rate": 5.4982740918972625e-06, "loss": 0.431, "step": 13226 }, { "epoch": 2.1722538131504936, "grad_norm": 0.2829229596881379, "learning_rate": 5.497816799480219e-06, "loss": 0.4352, "step": 13227 }, { "epoch": 2.1724180403588367, "grad_norm": 0.2738157895536735, "learning_rate": 5.497359496225905e-06, "loss": 0.4403, "step": 13228 }, { "epoch": 2.1725822675671793, "grad_norm": 0.3134244993684418, "learning_rate": 5.496902182139286e-06, "loss": 0.4459, "step": 13229 }, { "epoch": 2.172746494775522, "grad_norm": 0.3357126734986133, "learning_rate": 5.496444857225326e-06, "loss": 0.4443, "step": 13230 }, { "epoch": 2.1729107219838646, "grad_norm": 0.3924307054131407, "learning_rate": 5.495987521488996e-06, "loss": 0.437, "step": 13231 }, { "epoch": 2.1730749491922072, "grad_norm": 0.3104051211706088, "learning_rate": 5.495530174935261e-06, "loss": 0.446, "step": 13232 }, { "epoch": 2.1732391764005503, "grad_norm": 0.38846412274853226, "learning_rate": 5.49507281756909e-06, "loss": 0.4555, "step": 13233 }, { "epoch": 2.173403403608893, "grad_norm": 0.3313264905531258, "learning_rate": 5.4946154493954495e-06, "loss": 0.4344, "step": 13234 }, { "epoch": 2.1735676308172356, "grad_norm": 0.3243910241314963, "learning_rate": 5.494158070419304e-06, "loss": 0.4423, "step": 13235 }, { "epoch": 2.1737318580255782, "grad_norm": 0.5319929603313633, "learning_rate": 5.493700680645626e-06, "loss": 0.4391, "step": 13236 }, { "epoch": 2.1738960852339213, "grad_norm": 0.26307427359923147, "learning_rate": 5.49324328007938e-06, "loss": 0.4479, "step": 13237 }, { "epoch": 2.174060312442264, "grad_norm": 0.3178794271208854, "learning_rate": 5.492785868725535e-06, "loss": 0.4577, "step": 13238 }, { "epoch": 2.1742245396506066, "grad_norm": 0.2809221309088171, "learning_rate": 5.49232844658906e-06, "loss": 0.469, "step": 13239 }, { "epoch": 2.174388766858949, "grad_norm": 0.3735418362737004, "learning_rate": 5.491871013674921e-06, "loss": 0.4233, "step": 13240 }, { "epoch": 2.1745529940672923, "grad_norm": 0.2725105093139469, "learning_rate": 5.491413569988085e-06, "loss": 0.4543, "step": 13241 }, { "epoch": 2.174717221275635, "grad_norm": 0.3151526891984052, "learning_rate": 5.490956115533523e-06, "loss": 0.4607, "step": 13242 }, { "epoch": 2.1748814484839776, "grad_norm": 0.36699330490616217, "learning_rate": 5.4904986503162035e-06, "loss": 0.4554, "step": 13243 }, { "epoch": 2.17504567569232, "grad_norm": 0.3380410448393794, "learning_rate": 5.490041174341094e-06, "loss": 0.4351, "step": 13244 }, { "epoch": 2.1752099029006633, "grad_norm": 0.3373211564238718, "learning_rate": 5.489583687613164e-06, "loss": 0.4429, "step": 13245 }, { "epoch": 2.175374130109006, "grad_norm": 0.3425286568997985, "learning_rate": 5.48912619013738e-06, "loss": 0.4482, "step": 13246 }, { "epoch": 2.1755383573173486, "grad_norm": 0.30984839575858375, "learning_rate": 5.488668681918712e-06, "loss": 0.4354, "step": 13247 }, { "epoch": 2.175702584525691, "grad_norm": 0.2957698078978961, "learning_rate": 5.488211162962132e-06, "loss": 0.4426, "step": 13248 }, { "epoch": 2.175866811734034, "grad_norm": 0.3545053186690607, "learning_rate": 5.487753633272605e-06, "loss": 0.4529, "step": 13249 }, { "epoch": 2.176031038942377, "grad_norm": 0.345155396866791, "learning_rate": 5.4872960928551015e-06, "loss": 0.4319, "step": 13250 }, { "epoch": 2.1761952661507196, "grad_norm": 0.3096685753746912, "learning_rate": 5.4868385417145905e-06, "loss": 0.4251, "step": 13251 }, { "epoch": 2.176359493359062, "grad_norm": 0.34819121870325165, "learning_rate": 5.486380979856042e-06, "loss": 0.439, "step": 13252 }, { "epoch": 2.176523720567405, "grad_norm": 0.365145070586753, "learning_rate": 5.485923407284428e-06, "loss": 0.4552, "step": 13253 }, { "epoch": 2.176687947775748, "grad_norm": 0.302409582339075, "learning_rate": 5.4854658240047145e-06, "loss": 0.4576, "step": 13254 }, { "epoch": 2.1768521749840906, "grad_norm": 0.3099827305656743, "learning_rate": 5.4850082300218725e-06, "loss": 0.4182, "step": 13255 }, { "epoch": 2.177016402192433, "grad_norm": 0.3303933405482676, "learning_rate": 5.4845506253408705e-06, "loss": 0.4438, "step": 13256 }, { "epoch": 2.177180629400776, "grad_norm": 0.32728970434710136, "learning_rate": 5.484093009966682e-06, "loss": 0.4346, "step": 13257 }, { "epoch": 2.177344856609119, "grad_norm": 0.32484025588411, "learning_rate": 5.483635383904273e-06, "loss": 0.4507, "step": 13258 }, { "epoch": 2.1775090838174616, "grad_norm": 0.3801502598555356, "learning_rate": 5.483177747158619e-06, "loss": 0.4434, "step": 13259 }, { "epoch": 2.177673311025804, "grad_norm": 0.2890146137698474, "learning_rate": 5.482720099734686e-06, "loss": 0.4617, "step": 13260 }, { "epoch": 2.177837538234147, "grad_norm": 0.3036280477508943, "learning_rate": 5.482262441637445e-06, "loss": 0.4692, "step": 13261 }, { "epoch": 2.17800176544249, "grad_norm": 0.2767654696097153, "learning_rate": 5.481804772871868e-06, "loss": 0.4392, "step": 13262 }, { "epoch": 2.1781659926508325, "grad_norm": 0.3103322912436335, "learning_rate": 5.481347093442926e-06, "loss": 0.4351, "step": 13263 }, { "epoch": 2.178330219859175, "grad_norm": 0.3730970128678796, "learning_rate": 5.480889403355589e-06, "loss": 0.446, "step": 13264 }, { "epoch": 2.178494447067518, "grad_norm": 0.4406758847281105, "learning_rate": 5.4804317026148274e-06, "loss": 0.449, "step": 13265 }, { "epoch": 2.1786586742758605, "grad_norm": 0.31286994077533276, "learning_rate": 5.4799739912256126e-06, "loss": 0.4337, "step": 13266 }, { "epoch": 2.1788229014842035, "grad_norm": 0.31232970447969627, "learning_rate": 5.479516269192915e-06, "loss": 0.4538, "step": 13267 }, { "epoch": 2.178987128692546, "grad_norm": 0.39424467153664117, "learning_rate": 5.47905853652171e-06, "loss": 0.4461, "step": 13268 }, { "epoch": 2.179151355900889, "grad_norm": 0.2763369646076082, "learning_rate": 5.4786007932169634e-06, "loss": 0.4412, "step": 13269 }, { "epoch": 2.1793155831092315, "grad_norm": 0.3549322006277845, "learning_rate": 5.478143039283651e-06, "loss": 0.4395, "step": 13270 }, { "epoch": 2.1794798103175745, "grad_norm": 0.3806948554055574, "learning_rate": 5.477685274726741e-06, "loss": 0.4377, "step": 13271 }, { "epoch": 2.179644037525917, "grad_norm": 0.36432326690094835, "learning_rate": 5.477227499551208e-06, "loss": 0.4354, "step": 13272 }, { "epoch": 2.17980826473426, "grad_norm": 0.4599377738684147, "learning_rate": 5.476769713762024e-06, "loss": 0.4674, "step": 13273 }, { "epoch": 2.1799724919426025, "grad_norm": 0.3250357559341009, "learning_rate": 5.47631191736416e-06, "loss": 0.4174, "step": 13274 }, { "epoch": 2.1801367191509455, "grad_norm": 0.32868619559098194, "learning_rate": 5.475854110362586e-06, "loss": 0.4413, "step": 13275 }, { "epoch": 2.180300946359288, "grad_norm": 0.34100178065605913, "learning_rate": 5.475396292762278e-06, "loss": 0.4561, "step": 13276 }, { "epoch": 2.180465173567631, "grad_norm": 0.5793751621914814, "learning_rate": 5.4749384645682054e-06, "loss": 0.4586, "step": 13277 }, { "epoch": 2.1806294007759734, "grad_norm": 0.4069008528073191, "learning_rate": 5.474480625785343e-06, "loss": 0.4164, "step": 13278 }, { "epoch": 2.1807936279843165, "grad_norm": 0.3734986648935865, "learning_rate": 5.474022776418661e-06, "loss": 0.4493, "step": 13279 }, { "epoch": 2.180957855192659, "grad_norm": 0.3441849653741825, "learning_rate": 5.473564916473134e-06, "loss": 0.4106, "step": 13280 }, { "epoch": 2.181122082401002, "grad_norm": 0.29951787954088177, "learning_rate": 5.473107045953734e-06, "loss": 0.4416, "step": 13281 }, { "epoch": 2.1812863096093444, "grad_norm": 0.4575444456948254, "learning_rate": 5.472649164865434e-06, "loss": 0.4501, "step": 13282 }, { "epoch": 2.181450536817687, "grad_norm": 0.3720739585342384, "learning_rate": 5.472191273213208e-06, "loss": 0.433, "step": 13283 }, { "epoch": 2.18161476402603, "grad_norm": 0.3481388997776785, "learning_rate": 5.471733371002027e-06, "loss": 0.4431, "step": 13284 }, { "epoch": 2.181778991234373, "grad_norm": 0.31569298160947046, "learning_rate": 5.471275458236865e-06, "loss": 0.4709, "step": 13285 }, { "epoch": 2.1819432184427154, "grad_norm": 0.3675943375619008, "learning_rate": 5.470817534922698e-06, "loss": 0.4618, "step": 13286 }, { "epoch": 2.182107445651058, "grad_norm": 0.2889262086312248, "learning_rate": 5.470359601064495e-06, "loss": 0.4383, "step": 13287 }, { "epoch": 2.182271672859401, "grad_norm": 0.38329425216994023, "learning_rate": 5.469901656667235e-06, "loss": 0.437, "step": 13288 }, { "epoch": 2.182435900067744, "grad_norm": 0.3052468220993611, "learning_rate": 5.469443701735887e-06, "loss": 0.4634, "step": 13289 }, { "epoch": 2.1826001272760864, "grad_norm": 0.3603417523141972, "learning_rate": 5.468985736275426e-06, "loss": 0.4242, "step": 13290 }, { "epoch": 2.182764354484429, "grad_norm": 0.27351967521952214, "learning_rate": 5.468527760290828e-06, "loss": 0.4479, "step": 13291 }, { "epoch": 2.182928581692772, "grad_norm": 0.7082922791523382, "learning_rate": 5.468069773787066e-06, "loss": 0.4306, "step": 13292 }, { "epoch": 2.183092808901115, "grad_norm": 0.3798345765622769, "learning_rate": 5.467611776769112e-06, "loss": 0.4247, "step": 13293 }, { "epoch": 2.1832570361094574, "grad_norm": 0.532693372970332, "learning_rate": 5.467153769241942e-06, "loss": 0.4451, "step": 13294 }, { "epoch": 2.1834212633178, "grad_norm": 0.37091825250503574, "learning_rate": 5.466695751210532e-06, "loss": 0.4563, "step": 13295 }, { "epoch": 2.183585490526143, "grad_norm": 0.3210104071626388, "learning_rate": 5.466237722679854e-06, "loss": 0.435, "step": 13296 }, { "epoch": 2.183749717734486, "grad_norm": 0.280191494056349, "learning_rate": 5.465779683654884e-06, "loss": 0.4162, "step": 13297 }, { "epoch": 2.1839139449428284, "grad_norm": 0.3062510831224376, "learning_rate": 5.465321634140597e-06, "loss": 0.4377, "step": 13298 }, { "epoch": 2.184078172151171, "grad_norm": 0.5031795325550483, "learning_rate": 5.464863574141968e-06, "loss": 0.4578, "step": 13299 }, { "epoch": 2.1842423993595137, "grad_norm": 0.3211714977323895, "learning_rate": 5.464405503663969e-06, "loss": 0.436, "step": 13300 }, { "epoch": 2.184406626567857, "grad_norm": 0.3563243829030005, "learning_rate": 5.463947422711578e-06, "loss": 0.4407, "step": 13301 }, { "epoch": 2.1845708537761994, "grad_norm": 0.5806060878516683, "learning_rate": 5.46348933128977e-06, "loss": 0.4444, "step": 13302 }, { "epoch": 2.184735080984542, "grad_norm": 0.3560099487190159, "learning_rate": 5.463031229403521e-06, "loss": 0.4552, "step": 13303 }, { "epoch": 2.1848993081928847, "grad_norm": 0.27206041929266034, "learning_rate": 5.462573117057804e-06, "loss": 0.4567, "step": 13304 }, { "epoch": 2.1850635354012278, "grad_norm": 0.3331834230070897, "learning_rate": 5.462114994257596e-06, "loss": 0.4334, "step": 13305 }, { "epoch": 2.1852277626095704, "grad_norm": 0.31765346174580844, "learning_rate": 5.461656861007872e-06, "loss": 0.4502, "step": 13306 }, { "epoch": 2.185391989817913, "grad_norm": 0.38588394716998564, "learning_rate": 5.461198717313611e-06, "loss": 0.4341, "step": 13307 }, { "epoch": 2.1855562170262557, "grad_norm": 0.37336262040504764, "learning_rate": 5.460740563179784e-06, "loss": 0.4542, "step": 13308 }, { "epoch": 2.1857204442345988, "grad_norm": 0.30693822439100865, "learning_rate": 5.46028239861137e-06, "loss": 0.4285, "step": 13309 }, { "epoch": 2.1858846714429414, "grad_norm": 0.35159606342901895, "learning_rate": 5.4598242236133434e-06, "loss": 0.439, "step": 13310 }, { "epoch": 2.186048898651284, "grad_norm": 0.5059138852284416, "learning_rate": 5.459366038190682e-06, "loss": 0.4463, "step": 13311 }, { "epoch": 2.1862131258596267, "grad_norm": 0.42008476646526727, "learning_rate": 5.458907842348362e-06, "loss": 0.4468, "step": 13312 }, { "epoch": 2.1863773530679698, "grad_norm": 0.4795580884704337, "learning_rate": 5.458449636091359e-06, "loss": 0.4393, "step": 13313 }, { "epoch": 2.1865415802763124, "grad_norm": 0.2752909964148785, "learning_rate": 5.457991419424649e-06, "loss": 0.4263, "step": 13314 }, { "epoch": 2.186705807484655, "grad_norm": 0.2727469759578638, "learning_rate": 5.45753319235321e-06, "loss": 0.4538, "step": 13315 }, { "epoch": 2.1868700346929977, "grad_norm": 0.4963736171783304, "learning_rate": 5.45707495488202e-06, "loss": 0.4378, "step": 13316 }, { "epoch": 2.1870342619013403, "grad_norm": 0.3257932535539851, "learning_rate": 5.456616707016054e-06, "loss": 0.46, "step": 13317 }, { "epoch": 2.1871984891096834, "grad_norm": 0.30857930741888084, "learning_rate": 5.456158448760289e-06, "loss": 0.4387, "step": 13318 }, { "epoch": 2.187362716318026, "grad_norm": 0.3837863937799594, "learning_rate": 5.455700180119701e-06, "loss": 0.4317, "step": 13319 }, { "epoch": 2.1875269435263687, "grad_norm": 0.46225930270367893, "learning_rate": 5.45524190109927e-06, "loss": 0.4528, "step": 13320 }, { "epoch": 2.1876911707347113, "grad_norm": 0.302233763714421, "learning_rate": 5.454783611703972e-06, "loss": 0.4556, "step": 13321 }, { "epoch": 2.1878553979430544, "grad_norm": 0.34696738770294017, "learning_rate": 5.454325311938786e-06, "loss": 0.4173, "step": 13322 }, { "epoch": 2.188019625151397, "grad_norm": 0.28421677391663946, "learning_rate": 5.453867001808686e-06, "loss": 0.4347, "step": 13323 }, { "epoch": 2.1881838523597397, "grad_norm": 0.2761714620499444, "learning_rate": 5.453408681318653e-06, "loss": 0.4571, "step": 13324 }, { "epoch": 2.1883480795680823, "grad_norm": 0.28369005370351114, "learning_rate": 5.452950350473663e-06, "loss": 0.4234, "step": 13325 }, { "epoch": 2.1885123067764254, "grad_norm": 0.3172399933133409, "learning_rate": 5.452492009278697e-06, "loss": 0.4257, "step": 13326 }, { "epoch": 2.188676533984768, "grad_norm": 0.38036218554406415, "learning_rate": 5.452033657738727e-06, "loss": 0.4449, "step": 13327 }, { "epoch": 2.1888407611931107, "grad_norm": 0.2983968064999166, "learning_rate": 5.4515752958587376e-06, "loss": 0.4528, "step": 13328 }, { "epoch": 2.1890049884014533, "grad_norm": 0.4085831022949406, "learning_rate": 5.4511169236437026e-06, "loss": 0.4323, "step": 13329 }, { "epoch": 2.1891692156097964, "grad_norm": 0.3073148198837579, "learning_rate": 5.450658541098603e-06, "loss": 0.4251, "step": 13330 }, { "epoch": 2.189333442818139, "grad_norm": 0.3552304549244225, "learning_rate": 5.450200148228416e-06, "loss": 0.4332, "step": 13331 }, { "epoch": 2.1894976700264817, "grad_norm": 0.4129208416334564, "learning_rate": 5.449741745038121e-06, "loss": 0.4513, "step": 13332 }, { "epoch": 2.1896618972348243, "grad_norm": 0.45681481402253127, "learning_rate": 5.449283331532696e-06, "loss": 0.4273, "step": 13333 }, { "epoch": 2.189826124443167, "grad_norm": 0.28285821802334854, "learning_rate": 5.4488249077171185e-06, "loss": 0.4364, "step": 13334 }, { "epoch": 2.18999035165151, "grad_norm": 0.3035145508717711, "learning_rate": 5.44836647359637e-06, "loss": 0.4488, "step": 13335 }, { "epoch": 2.1901545788598527, "grad_norm": 0.44449304721783334, "learning_rate": 5.447908029175429e-06, "loss": 0.437, "step": 13336 }, { "epoch": 2.1903188060681953, "grad_norm": 0.3076759098785597, "learning_rate": 5.447449574459275e-06, "loss": 0.4598, "step": 13337 }, { "epoch": 2.190483033276538, "grad_norm": 0.35226795756882684, "learning_rate": 5.446991109452884e-06, "loss": 0.4451, "step": 13338 }, { "epoch": 2.190647260484881, "grad_norm": 0.3400446784915832, "learning_rate": 5.44653263416124e-06, "loss": 0.4341, "step": 13339 }, { "epoch": 2.1908114876932236, "grad_norm": 0.3271085565370917, "learning_rate": 5.446074148589319e-06, "loss": 0.4204, "step": 13340 }, { "epoch": 2.1909757149015663, "grad_norm": 0.31458599529445347, "learning_rate": 5.445615652742105e-06, "loss": 0.4514, "step": 13341 }, { "epoch": 2.191139942109909, "grad_norm": 0.3009846308571748, "learning_rate": 5.445157146624571e-06, "loss": 0.4541, "step": 13342 }, { "epoch": 2.191304169318252, "grad_norm": 0.31103393177917293, "learning_rate": 5.444698630241701e-06, "loss": 0.4652, "step": 13343 }, { "epoch": 2.1914683965265946, "grad_norm": 0.283057430214247, "learning_rate": 5.444240103598475e-06, "loss": 0.4532, "step": 13344 }, { "epoch": 2.1916326237349373, "grad_norm": 0.29297163882420746, "learning_rate": 5.4437815666998725e-06, "loss": 0.4457, "step": 13345 }, { "epoch": 2.19179685094328, "grad_norm": 0.3287114100302679, "learning_rate": 5.4433230195508744e-06, "loss": 0.4414, "step": 13346 }, { "epoch": 2.191961078151623, "grad_norm": 0.33619588189444394, "learning_rate": 5.442864462156459e-06, "loss": 0.4293, "step": 13347 }, { "epoch": 2.1921253053599656, "grad_norm": 0.30645737669849693, "learning_rate": 5.442405894521608e-06, "loss": 0.4648, "step": 13348 }, { "epoch": 2.1922895325683083, "grad_norm": 0.29798132461815596, "learning_rate": 5.441947316651303e-06, "loss": 0.4462, "step": 13349 }, { "epoch": 2.192453759776651, "grad_norm": 0.3376869196756182, "learning_rate": 5.441488728550522e-06, "loss": 0.4192, "step": 13350 }, { "epoch": 2.1926179869849935, "grad_norm": 0.3381138888087342, "learning_rate": 5.4410301302242485e-06, "loss": 0.4398, "step": 13351 }, { "epoch": 2.1927822141933366, "grad_norm": 0.3274713175485761, "learning_rate": 5.440571521677461e-06, "loss": 0.4326, "step": 13352 }, { "epoch": 2.1929464414016793, "grad_norm": 0.3378498619220711, "learning_rate": 5.440112902915141e-06, "loss": 0.4504, "step": 13353 }, { "epoch": 2.193110668610022, "grad_norm": 0.3158528798529663, "learning_rate": 5.439654273942271e-06, "loss": 0.4214, "step": 13354 }, { "epoch": 2.1932748958183645, "grad_norm": 0.3505857824282715, "learning_rate": 5.439195634763829e-06, "loss": 0.4612, "step": 13355 }, { "epoch": 2.1934391230267076, "grad_norm": 0.28799065331510204, "learning_rate": 5.438736985384801e-06, "loss": 0.4298, "step": 13356 }, { "epoch": 2.1936033502350503, "grad_norm": 0.6200882777286091, "learning_rate": 5.438278325810165e-06, "loss": 0.4536, "step": 13357 }, { "epoch": 2.193767577443393, "grad_norm": 0.270771121171262, "learning_rate": 5.437819656044903e-06, "loss": 0.4481, "step": 13358 }, { "epoch": 2.1939318046517355, "grad_norm": 0.42757081634472144, "learning_rate": 5.437360976093996e-06, "loss": 0.4518, "step": 13359 }, { "epoch": 2.1940960318600786, "grad_norm": 0.365960276967426, "learning_rate": 5.436902285962429e-06, "loss": 0.4292, "step": 13360 }, { "epoch": 2.1942602590684213, "grad_norm": 0.3204066220765556, "learning_rate": 5.436443585655178e-06, "loss": 0.4467, "step": 13361 }, { "epoch": 2.194424486276764, "grad_norm": 0.623873347080686, "learning_rate": 5.435984875177231e-06, "loss": 0.4261, "step": 13362 }, { "epoch": 2.1945887134851065, "grad_norm": 0.338318483075795, "learning_rate": 5.435526154533565e-06, "loss": 0.4425, "step": 13363 }, { "epoch": 2.1947529406934496, "grad_norm": 0.2930560099271347, "learning_rate": 5.4350674237291666e-06, "loss": 0.4622, "step": 13364 }, { "epoch": 2.1949171679017923, "grad_norm": 0.4904425915483045, "learning_rate": 5.434608682769016e-06, "loss": 0.4576, "step": 13365 }, { "epoch": 2.195081395110135, "grad_norm": 0.36543303457059684, "learning_rate": 5.434149931658095e-06, "loss": 0.4477, "step": 13366 }, { "epoch": 2.1952456223184775, "grad_norm": 0.373200542196656, "learning_rate": 5.433691170401385e-06, "loss": 0.4362, "step": 13367 }, { "epoch": 2.19540984952682, "grad_norm": 0.3640609524375743, "learning_rate": 5.433232399003872e-06, "loss": 0.424, "step": 13368 }, { "epoch": 2.1955740767351632, "grad_norm": 0.31082790107927044, "learning_rate": 5.4327736174705355e-06, "loss": 0.4329, "step": 13369 }, { "epoch": 2.195738303943506, "grad_norm": 0.5255387391693609, "learning_rate": 5.432314825806362e-06, "loss": 0.4538, "step": 13370 }, { "epoch": 2.1959025311518485, "grad_norm": 0.3212060850181198, "learning_rate": 5.431856024016333e-06, "loss": 0.4353, "step": 13371 }, { "epoch": 2.196066758360191, "grad_norm": 0.313522628120879, "learning_rate": 5.4313972121054275e-06, "loss": 0.4384, "step": 13372 }, { "epoch": 2.1962309855685342, "grad_norm": 0.3749338646774257, "learning_rate": 5.430938390078634e-06, "loss": 0.4467, "step": 13373 }, { "epoch": 2.196395212776877, "grad_norm": 0.2977863779812921, "learning_rate": 5.430479557940933e-06, "loss": 0.4495, "step": 13374 }, { "epoch": 2.1965594399852195, "grad_norm": 0.28315765122651704, "learning_rate": 5.430020715697309e-06, "loss": 0.4448, "step": 13375 }, { "epoch": 2.196723667193562, "grad_norm": 0.30598231606697035, "learning_rate": 5.429561863352744e-06, "loss": 0.4422, "step": 13376 }, { "epoch": 2.1968878944019052, "grad_norm": 0.35302107028478596, "learning_rate": 5.429103000912222e-06, "loss": 0.4544, "step": 13377 }, { "epoch": 2.197052121610248, "grad_norm": 0.8169332006027017, "learning_rate": 5.42864412838073e-06, "loss": 0.4185, "step": 13378 }, { "epoch": 2.1972163488185905, "grad_norm": 0.3353800209447468, "learning_rate": 5.4281852457632475e-06, "loss": 0.4456, "step": 13379 }, { "epoch": 2.197380576026933, "grad_norm": 0.29430933701926504, "learning_rate": 5.42772635306476e-06, "loss": 0.4505, "step": 13380 }, { "epoch": 2.1975448032352762, "grad_norm": 0.448133699112051, "learning_rate": 5.427267450290251e-06, "loss": 0.4529, "step": 13381 }, { "epoch": 2.197709030443619, "grad_norm": 0.32960691354841337, "learning_rate": 5.426808537444707e-06, "loss": 0.4342, "step": 13382 }, { "epoch": 2.1978732576519615, "grad_norm": 0.33870856955694467, "learning_rate": 5.42634961453311e-06, "loss": 0.4419, "step": 13383 }, { "epoch": 2.198037484860304, "grad_norm": 0.34599685380848416, "learning_rate": 5.425890681560443e-06, "loss": 0.4368, "step": 13384 }, { "epoch": 2.198201712068647, "grad_norm": 0.5129549940134407, "learning_rate": 5.425431738531693e-06, "loss": 0.4605, "step": 13385 }, { "epoch": 2.19836593927699, "grad_norm": 0.3833834807966303, "learning_rate": 5.424972785451844e-06, "loss": 0.4247, "step": 13386 }, { "epoch": 2.1985301664853325, "grad_norm": 0.3204893316070029, "learning_rate": 5.424513822325881e-06, "loss": 0.4422, "step": 13387 }, { "epoch": 2.198694393693675, "grad_norm": 0.36698443042704193, "learning_rate": 5.4240548491587885e-06, "loss": 0.413, "step": 13388 }, { "epoch": 2.198858620902018, "grad_norm": 0.3811017115206296, "learning_rate": 5.4235958659555495e-06, "loss": 0.4781, "step": 13389 }, { "epoch": 2.199022848110361, "grad_norm": 0.32255326893783004, "learning_rate": 5.4231368727211526e-06, "loss": 0.4523, "step": 13390 }, { "epoch": 2.1991870753187035, "grad_norm": 0.572634516628206, "learning_rate": 5.422677869460581e-06, "loss": 0.4524, "step": 13391 }, { "epoch": 2.199351302527046, "grad_norm": 0.3633323388041515, "learning_rate": 5.422218856178818e-06, "loss": 0.422, "step": 13392 }, { "epoch": 2.1995155297353888, "grad_norm": 0.34242897491608737, "learning_rate": 5.421759832880852e-06, "loss": 0.4436, "step": 13393 }, { "epoch": 2.199679756943732, "grad_norm": 0.36406147142191303, "learning_rate": 5.421300799571668e-06, "loss": 0.4312, "step": 13394 }, { "epoch": 2.1998439841520745, "grad_norm": 0.2844107960510927, "learning_rate": 5.420841756256251e-06, "loss": 0.4572, "step": 13395 }, { "epoch": 2.200008211360417, "grad_norm": 0.33568246893123604, "learning_rate": 5.420382702939585e-06, "loss": 0.4307, "step": 13396 }, { "epoch": 2.2001724385687598, "grad_norm": 0.310811054875232, "learning_rate": 5.419923639626657e-06, "loss": 0.4317, "step": 13397 }, { "epoch": 2.200336665777103, "grad_norm": 0.3290758135242331, "learning_rate": 5.419464566322454e-06, "loss": 0.4512, "step": 13398 }, { "epoch": 2.2005008929854455, "grad_norm": 0.30047132711866265, "learning_rate": 5.419005483031963e-06, "loss": 0.4323, "step": 13399 }, { "epoch": 2.200665120193788, "grad_norm": 0.2852061143153248, "learning_rate": 5.4185463897601675e-06, "loss": 0.4317, "step": 13400 }, { "epoch": 2.2008293474021308, "grad_norm": 0.3378154655198677, "learning_rate": 5.418087286512053e-06, "loss": 0.4562, "step": 13401 }, { "epoch": 2.2009935746104734, "grad_norm": 0.3812354025796814, "learning_rate": 5.4176281732926076e-06, "loss": 0.4594, "step": 13402 }, { "epoch": 2.2011578018188165, "grad_norm": 0.41651687708085594, "learning_rate": 5.417169050106818e-06, "loss": 0.4339, "step": 13403 }, { "epoch": 2.201322029027159, "grad_norm": 0.2878243822875417, "learning_rate": 5.41670991695967e-06, "loss": 0.4233, "step": 13404 }, { "epoch": 2.2014862562355018, "grad_norm": 0.32510137346731693, "learning_rate": 5.416250773856151e-06, "loss": 0.4496, "step": 13405 }, { "epoch": 2.2016504834438444, "grad_norm": 0.2746385162503503, "learning_rate": 5.415791620801245e-06, "loss": 0.4438, "step": 13406 }, { "epoch": 2.2018147106521875, "grad_norm": 0.34944524205436805, "learning_rate": 5.415332457799944e-06, "loss": 0.45, "step": 13407 }, { "epoch": 2.20197893786053, "grad_norm": 0.35040689222504817, "learning_rate": 5.41487328485723e-06, "loss": 0.4436, "step": 13408 }, { "epoch": 2.2021431650688728, "grad_norm": 0.2910862319373962, "learning_rate": 5.414414101978092e-06, "loss": 0.4252, "step": 13409 }, { "epoch": 2.2023073922772154, "grad_norm": 0.28937276374288046, "learning_rate": 5.413954909167518e-06, "loss": 0.4517, "step": 13410 }, { "epoch": 2.2024716194855585, "grad_norm": 0.5219373842745609, "learning_rate": 5.413495706430494e-06, "loss": 0.4383, "step": 13411 }, { "epoch": 2.202635846693901, "grad_norm": 8.93823507260522, "learning_rate": 5.4130364937720085e-06, "loss": 0.4431, "step": 13412 }, { "epoch": 2.2028000739022437, "grad_norm": 0.31732089025480203, "learning_rate": 5.412577271197047e-06, "loss": 0.4258, "step": 13413 }, { "epoch": 2.2029643011105864, "grad_norm": 0.3292152186596089, "learning_rate": 5.4121180387105995e-06, "loss": 0.4336, "step": 13414 }, { "epoch": 2.2031285283189295, "grad_norm": 0.3087280941473258, "learning_rate": 5.411658796317653e-06, "loss": 0.4587, "step": 13415 }, { "epoch": 2.203292755527272, "grad_norm": 0.3653836232230834, "learning_rate": 5.411199544023195e-06, "loss": 0.4342, "step": 13416 }, { "epoch": 2.2034569827356147, "grad_norm": 0.27311329098667947, "learning_rate": 5.410740281832212e-06, "loss": 0.4418, "step": 13417 }, { "epoch": 2.2036212099439574, "grad_norm": 0.49616160219727334, "learning_rate": 5.410281009749694e-06, "loss": 0.4256, "step": 13418 }, { "epoch": 2.2037854371523, "grad_norm": 0.4126504742938762, "learning_rate": 5.40982172778063e-06, "loss": 0.4317, "step": 13419 }, { "epoch": 2.203949664360643, "grad_norm": 0.5683373155278401, "learning_rate": 5.409362435930006e-06, "loss": 0.446, "step": 13420 }, { "epoch": 2.2041138915689857, "grad_norm": 0.5772482685621865, "learning_rate": 5.408903134202812e-06, "loss": 0.4537, "step": 13421 }, { "epoch": 2.2042781187773284, "grad_norm": 0.9024243798707035, "learning_rate": 5.408443822604033e-06, "loss": 0.4324, "step": 13422 }, { "epoch": 2.204442345985671, "grad_norm": 0.432771176343393, "learning_rate": 5.407984501138664e-06, "loss": 0.4397, "step": 13423 }, { "epoch": 2.204606573194014, "grad_norm": 0.6234869815136771, "learning_rate": 5.407525169811689e-06, "loss": 0.4374, "step": 13424 }, { "epoch": 2.2047708004023567, "grad_norm": 0.5594566730793743, "learning_rate": 5.4070658286280965e-06, "loss": 0.4396, "step": 13425 }, { "epoch": 2.2049350276106994, "grad_norm": 0.5564798666728442, "learning_rate": 5.406606477592876e-06, "loss": 0.4682, "step": 13426 }, { "epoch": 2.205099254819042, "grad_norm": 0.4843442564867565, "learning_rate": 5.406147116711019e-06, "loss": 0.437, "step": 13427 }, { "epoch": 2.205263482027385, "grad_norm": 0.36792018386639114, "learning_rate": 5.405687745987512e-06, "loss": 0.4345, "step": 13428 }, { "epoch": 2.2054277092357277, "grad_norm": 0.3879908470046889, "learning_rate": 5.405228365427346e-06, "loss": 0.4384, "step": 13429 }, { "epoch": 2.2055919364440704, "grad_norm": 0.45374078326582784, "learning_rate": 5.404768975035508e-06, "loss": 0.4356, "step": 13430 }, { "epoch": 2.205756163652413, "grad_norm": 0.3407757604959021, "learning_rate": 5.404309574816988e-06, "loss": 0.4366, "step": 13431 }, { "epoch": 2.205920390860756, "grad_norm": 0.3141407896315137, "learning_rate": 5.4038501647767785e-06, "loss": 0.4357, "step": 13432 }, { "epoch": 2.2060846180690987, "grad_norm": 0.3267762123383814, "learning_rate": 5.403390744919866e-06, "loss": 0.4614, "step": 13433 }, { "epoch": 2.2062488452774414, "grad_norm": 0.37945900480305744, "learning_rate": 5.4029313152512424e-06, "loss": 0.4551, "step": 13434 }, { "epoch": 2.206413072485784, "grad_norm": 0.4124331195609138, "learning_rate": 5.402471875775894e-06, "loss": 0.4538, "step": 13435 }, { "epoch": 2.2065772996941266, "grad_norm": 0.6909163990609498, "learning_rate": 5.402012426498814e-06, "loss": 0.4542, "step": 13436 }, { "epoch": 2.2067415269024697, "grad_norm": 0.4938727070001304, "learning_rate": 5.401552967424993e-06, "loss": 0.4599, "step": 13437 }, { "epoch": 2.2069057541108124, "grad_norm": 0.4031025804524951, "learning_rate": 5.401093498559418e-06, "loss": 0.4301, "step": 13438 }, { "epoch": 2.207069981319155, "grad_norm": 0.3031333079133587, "learning_rate": 5.400634019907082e-06, "loss": 0.429, "step": 13439 }, { "epoch": 2.2072342085274976, "grad_norm": 0.4760746129352209, "learning_rate": 5.400174531472973e-06, "loss": 0.4522, "step": 13440 }, { "epoch": 2.2073984357358407, "grad_norm": 0.37261624908788993, "learning_rate": 5.3997150332620855e-06, "loss": 0.448, "step": 13441 }, { "epoch": 2.2075626629441834, "grad_norm": 0.3805111962488991, "learning_rate": 5.399255525279405e-06, "loss": 0.449, "step": 13442 }, { "epoch": 2.207726890152526, "grad_norm": 0.36182810691107475, "learning_rate": 5.398796007529926e-06, "loss": 0.4328, "step": 13443 }, { "epoch": 2.2078911173608686, "grad_norm": 0.34366558370047795, "learning_rate": 5.398336480018638e-06, "loss": 0.4285, "step": 13444 }, { "epoch": 2.2080553445692117, "grad_norm": 0.3058709969084702, "learning_rate": 5.397876942750532e-06, "loss": 0.4481, "step": 13445 }, { "epoch": 2.2082195717775543, "grad_norm": 0.33400468632077346, "learning_rate": 5.397417395730599e-06, "loss": 0.449, "step": 13446 }, { "epoch": 2.208383798985897, "grad_norm": 0.4026208309464146, "learning_rate": 5.396957838963829e-06, "loss": 0.4412, "step": 13447 }, { "epoch": 2.2085480261942396, "grad_norm": 0.35216660485768597, "learning_rate": 5.396498272455217e-06, "loss": 0.4534, "step": 13448 }, { "epoch": 2.2087122534025827, "grad_norm": 0.4944410426286778, "learning_rate": 5.39603869620975e-06, "loss": 0.451, "step": 13449 }, { "epoch": 2.2088764806109253, "grad_norm": 0.35289336830677026, "learning_rate": 5.395579110232421e-06, "loss": 0.443, "step": 13450 }, { "epoch": 2.209040707819268, "grad_norm": 0.3493194840023415, "learning_rate": 5.395119514528222e-06, "loss": 0.4473, "step": 13451 }, { "epoch": 2.2092049350276106, "grad_norm": 0.2907115504572143, "learning_rate": 5.394659909102144e-06, "loss": 0.4417, "step": 13452 }, { "epoch": 2.2093691622359533, "grad_norm": 0.3184738041292627, "learning_rate": 5.39420029395918e-06, "loss": 0.4483, "step": 13453 }, { "epoch": 2.2095333894442963, "grad_norm": 0.3785358669834699, "learning_rate": 5.393740669104321e-06, "loss": 0.4491, "step": 13454 }, { "epoch": 2.209697616652639, "grad_norm": 0.4155139242934717, "learning_rate": 5.393281034542559e-06, "loss": 0.4555, "step": 13455 }, { "epoch": 2.2098618438609816, "grad_norm": 0.3500073461421561, "learning_rate": 5.392821390278885e-06, "loss": 0.4247, "step": 13456 }, { "epoch": 2.2100260710693242, "grad_norm": 0.293089681541331, "learning_rate": 5.392361736318293e-06, "loss": 0.4445, "step": 13457 }, { "epoch": 2.2101902982776673, "grad_norm": 0.37913419132543585, "learning_rate": 5.3919020726657746e-06, "loss": 0.4511, "step": 13458 }, { "epoch": 2.21035452548601, "grad_norm": 0.3325337362826304, "learning_rate": 5.391442399326323e-06, "loss": 0.4466, "step": 13459 }, { "epoch": 2.2105187526943526, "grad_norm": 0.30312378625958575, "learning_rate": 5.390982716304928e-06, "loss": 0.4444, "step": 13460 }, { "epoch": 2.2106829799026952, "grad_norm": 0.45363941132765107, "learning_rate": 5.3905230236065845e-06, "loss": 0.4339, "step": 13461 }, { "epoch": 2.2108472071110383, "grad_norm": 0.35081016919396296, "learning_rate": 5.390063321236287e-06, "loss": 0.4468, "step": 13462 }, { "epoch": 2.211011434319381, "grad_norm": 0.4229490886794608, "learning_rate": 5.389603609199025e-06, "loss": 0.4377, "step": 13463 }, { "epoch": 2.2111756615277236, "grad_norm": 0.5187989672492346, "learning_rate": 5.389143887499791e-06, "loss": 0.4276, "step": 13464 }, { "epoch": 2.2113398887360662, "grad_norm": 0.349141035977585, "learning_rate": 5.38868415614358e-06, "loss": 0.4585, "step": 13465 }, { "epoch": 2.2115041159444093, "grad_norm": 0.38330880903839587, "learning_rate": 5.388224415135387e-06, "loss": 0.4477, "step": 13466 }, { "epoch": 2.211668343152752, "grad_norm": 0.3643086489980097, "learning_rate": 5.387764664480201e-06, "loss": 0.435, "step": 13467 }, { "epoch": 2.2118325703610946, "grad_norm": 1.2701962885891152, "learning_rate": 5.387304904183018e-06, "loss": 0.4448, "step": 13468 }, { "epoch": 2.2119967975694372, "grad_norm": 0.30489125033902276, "learning_rate": 5.38684513424883e-06, "loss": 0.4342, "step": 13469 }, { "epoch": 2.21216102477778, "grad_norm": 0.31767007262386243, "learning_rate": 5.386385354682632e-06, "loss": 0.4261, "step": 13470 }, { "epoch": 2.212325251986123, "grad_norm": 0.4085825381441104, "learning_rate": 5.385925565489416e-06, "loss": 0.4199, "step": 13471 }, { "epoch": 2.2124894791944656, "grad_norm": 0.33488337192858, "learning_rate": 5.385465766674178e-06, "loss": 0.4192, "step": 13472 }, { "epoch": 2.2126537064028082, "grad_norm": 0.3389412489135619, "learning_rate": 5.385005958241911e-06, "loss": 0.441, "step": 13473 }, { "epoch": 2.212817933611151, "grad_norm": 0.3722364049933887, "learning_rate": 5.384546140197607e-06, "loss": 0.4543, "step": 13474 }, { "epoch": 2.212982160819494, "grad_norm": 0.38592443024169204, "learning_rate": 5.384086312546262e-06, "loss": 0.4365, "step": 13475 }, { "epoch": 2.2131463880278366, "grad_norm": 0.3146545371530525, "learning_rate": 5.383626475292869e-06, "loss": 0.4475, "step": 13476 }, { "epoch": 2.2133106152361792, "grad_norm": 0.27347639969455967, "learning_rate": 5.383166628442426e-06, "loss": 0.4403, "step": 13477 }, { "epoch": 2.213474842444522, "grad_norm": 0.3374210661622069, "learning_rate": 5.382706771999923e-06, "loss": 0.4394, "step": 13478 }, { "epoch": 2.213639069652865, "grad_norm": 0.45448892335658087, "learning_rate": 5.382246905970355e-06, "loss": 0.4253, "step": 13479 }, { "epoch": 2.2138032968612076, "grad_norm": 0.28749697984270106, "learning_rate": 5.381787030358718e-06, "loss": 0.4429, "step": 13480 }, { "epoch": 2.21396752406955, "grad_norm": 0.36495583828274214, "learning_rate": 5.3813271451700064e-06, "loss": 0.4351, "step": 13481 }, { "epoch": 2.214131751277893, "grad_norm": 0.4129797086459647, "learning_rate": 5.380867250409214e-06, "loss": 0.4359, "step": 13482 }, { "epoch": 2.214295978486236, "grad_norm": 0.3320847636462863, "learning_rate": 5.380407346081338e-06, "loss": 0.4417, "step": 13483 }, { "epoch": 2.2144602056945786, "grad_norm": 0.33595000981170686, "learning_rate": 5.379947432191372e-06, "loss": 0.4466, "step": 13484 }, { "epoch": 2.214624432902921, "grad_norm": 0.3459516900532747, "learning_rate": 5.379487508744311e-06, "loss": 0.4249, "step": 13485 }, { "epoch": 2.214788660111264, "grad_norm": 0.289936742009504, "learning_rate": 5.3790275757451496e-06, "loss": 0.4356, "step": 13486 }, { "epoch": 2.2149528873196065, "grad_norm": 0.33670869144840854, "learning_rate": 5.378567633198885e-06, "loss": 0.4436, "step": 13487 }, { "epoch": 2.2151171145279496, "grad_norm": 0.36515225989280037, "learning_rate": 5.378107681110511e-06, "loss": 0.4227, "step": 13488 }, { "epoch": 2.215281341736292, "grad_norm": 0.3082061760174833, "learning_rate": 5.377647719485024e-06, "loss": 0.4461, "step": 13489 }, { "epoch": 2.215445568944635, "grad_norm": 0.3461068541761683, "learning_rate": 5.377187748327418e-06, "loss": 0.4383, "step": 13490 }, { "epoch": 2.2156097961529775, "grad_norm": 0.3146220360337419, "learning_rate": 5.3767277676426915e-06, "loss": 0.4579, "step": 13491 }, { "epoch": 2.2157740233613206, "grad_norm": 0.2933583327660923, "learning_rate": 5.376267777435838e-06, "loss": 0.4662, "step": 13492 }, { "epoch": 2.215938250569663, "grad_norm": 0.36036615968876007, "learning_rate": 5.375807777711855e-06, "loss": 0.4282, "step": 13493 }, { "epoch": 2.216102477778006, "grad_norm": 0.3082561878071788, "learning_rate": 5.3753477684757366e-06, "loss": 0.446, "step": 13494 }, { "epoch": 2.2162667049863485, "grad_norm": 0.2860483461505643, "learning_rate": 5.374887749732482e-06, "loss": 0.4432, "step": 13495 }, { "epoch": 2.2164309321946916, "grad_norm": 0.30065915277707855, "learning_rate": 5.3744277214870836e-06, "loss": 0.4567, "step": 13496 }, { "epoch": 2.216595159403034, "grad_norm": 0.28339697722762697, "learning_rate": 5.3739676837445414e-06, "loss": 0.4367, "step": 13497 }, { "epoch": 2.216759386611377, "grad_norm": 0.374467941980816, "learning_rate": 5.37350763650985e-06, "loss": 0.4438, "step": 13498 }, { "epoch": 2.2169236138197195, "grad_norm": 0.35613700649582364, "learning_rate": 5.3730475797880066e-06, "loss": 0.4382, "step": 13499 }, { "epoch": 2.2170878410280626, "grad_norm": 0.40460989848519385, "learning_rate": 5.372587513584006e-06, "loss": 0.4322, "step": 13500 }, { "epoch": 2.217252068236405, "grad_norm": 0.276120695564519, "learning_rate": 5.372127437902847e-06, "loss": 0.4412, "step": 13501 }, { "epoch": 2.217416295444748, "grad_norm": 0.2981034665655008, "learning_rate": 5.3716673527495275e-06, "loss": 0.4262, "step": 13502 }, { "epoch": 2.2175805226530905, "grad_norm": 0.3595831031378883, "learning_rate": 5.371207258129041e-06, "loss": 0.4305, "step": 13503 }, { "epoch": 2.217744749861433, "grad_norm": 0.35041234040561664, "learning_rate": 5.3707471540463884e-06, "loss": 0.457, "step": 13504 }, { "epoch": 2.217908977069776, "grad_norm": 0.3376497856717965, "learning_rate": 5.370287040506563e-06, "loss": 0.4454, "step": 13505 }, { "epoch": 2.218073204278119, "grad_norm": 0.3492901426130223, "learning_rate": 5.369826917514565e-06, "loss": 0.4239, "step": 13506 }, { "epoch": 2.2182374314864615, "grad_norm": 0.3528616323045615, "learning_rate": 5.369366785075392e-06, "loss": 0.4418, "step": 13507 }, { "epoch": 2.218401658694804, "grad_norm": 0.5411887062016001, "learning_rate": 5.368906643194039e-06, "loss": 0.4467, "step": 13508 }, { "epoch": 2.218565885903147, "grad_norm": 0.281710369398488, "learning_rate": 5.3684464918755054e-06, "loss": 0.4289, "step": 13509 }, { "epoch": 2.21873011311149, "grad_norm": 0.31953154742558165, "learning_rate": 5.367986331124787e-06, "loss": 0.4363, "step": 13510 }, { "epoch": 2.2188943403198325, "grad_norm": 0.27411536805995634, "learning_rate": 5.367526160946885e-06, "loss": 0.4382, "step": 13511 }, { "epoch": 2.219058567528175, "grad_norm": 0.28347580644466774, "learning_rate": 5.367065981346796e-06, "loss": 0.4318, "step": 13512 }, { "epoch": 2.219222794736518, "grad_norm": 0.35565024009430696, "learning_rate": 5.366605792329516e-06, "loss": 0.4408, "step": 13513 }, { "epoch": 2.219387021944861, "grad_norm": 0.3173430366533922, "learning_rate": 5.366145593900044e-06, "loss": 0.4578, "step": 13514 }, { "epoch": 2.2195512491532035, "grad_norm": 0.35393030124900166, "learning_rate": 5.365685386063378e-06, "loss": 0.4452, "step": 13515 }, { "epoch": 2.219715476361546, "grad_norm": 0.27084181578202887, "learning_rate": 5.365225168824519e-06, "loss": 0.4474, "step": 13516 }, { "epoch": 2.219879703569889, "grad_norm": 0.6068977121317608, "learning_rate": 5.364764942188463e-06, "loss": 0.4433, "step": 13517 }, { "epoch": 2.220043930778232, "grad_norm": 0.2827573163324981, "learning_rate": 5.364304706160208e-06, "loss": 0.4488, "step": 13518 }, { "epoch": 2.2202081579865744, "grad_norm": 0.3787643336445636, "learning_rate": 5.363844460744755e-06, "loss": 0.4316, "step": 13519 }, { "epoch": 2.220372385194917, "grad_norm": 0.30874648437768004, "learning_rate": 5.3633842059471e-06, "loss": 0.4378, "step": 13520 }, { "epoch": 2.2205366124032597, "grad_norm": 0.336792254197232, "learning_rate": 5.3629239417722444e-06, "loss": 0.4524, "step": 13521 }, { "epoch": 2.220700839611603, "grad_norm": 0.3227196910096211, "learning_rate": 5.362463668225184e-06, "loss": 0.4412, "step": 13522 }, { "epoch": 2.2208650668199454, "grad_norm": 0.32507962705134064, "learning_rate": 5.362003385310921e-06, "loss": 0.4536, "step": 13523 }, { "epoch": 2.221029294028288, "grad_norm": 0.2788200062922952, "learning_rate": 5.361543093034452e-06, "loss": 0.4432, "step": 13524 }, { "epoch": 2.2211935212366307, "grad_norm": 0.31509433182889934, "learning_rate": 5.361082791400778e-06, "loss": 0.4341, "step": 13525 }, { "epoch": 2.221357748444974, "grad_norm": 0.3591196718424824, "learning_rate": 5.360622480414898e-06, "loss": 0.4329, "step": 13526 }, { "epoch": 2.2215219756533164, "grad_norm": 0.30416959370332597, "learning_rate": 5.360162160081811e-06, "loss": 0.4394, "step": 13527 }, { "epoch": 2.221686202861659, "grad_norm": 0.6191722097673911, "learning_rate": 5.359701830406516e-06, "loss": 0.4481, "step": 13528 }, { "epoch": 2.2218504300700017, "grad_norm": 0.7503304964039875, "learning_rate": 5.359241491394014e-06, "loss": 0.4292, "step": 13529 }, { "epoch": 2.222014657278345, "grad_norm": 0.42825504406683945, "learning_rate": 5.358781143049304e-06, "loss": 0.4342, "step": 13530 }, { "epoch": 2.2221788844866874, "grad_norm": 0.29088118407198305, "learning_rate": 5.358320785377386e-06, "loss": 0.4375, "step": 13531 }, { "epoch": 2.22234311169503, "grad_norm": 0.33448670629242305, "learning_rate": 5.357860418383259e-06, "loss": 0.432, "step": 13532 }, { "epoch": 2.2225073389033727, "grad_norm": 0.28810069927283627, "learning_rate": 5.357400042071925e-06, "loss": 0.4489, "step": 13533 }, { "epoch": 2.222671566111716, "grad_norm": 0.39361019961728927, "learning_rate": 5.356939656448381e-06, "loss": 0.4466, "step": 13534 }, { "epoch": 2.2228357933200584, "grad_norm": 0.3077810757273589, "learning_rate": 5.356479261517631e-06, "loss": 0.4344, "step": 13535 }, { "epoch": 2.223000020528401, "grad_norm": 0.44608262878290833, "learning_rate": 5.356018857284672e-06, "loss": 0.4395, "step": 13536 }, { "epoch": 2.2231642477367437, "grad_norm": 0.45678308542254015, "learning_rate": 5.355558443754508e-06, "loss": 0.4452, "step": 13537 }, { "epoch": 2.2233284749450863, "grad_norm": 0.3910797651862828, "learning_rate": 5.355098020932136e-06, "loss": 0.4339, "step": 13538 }, { "epoch": 2.2234927021534294, "grad_norm": 1.0072820509471674, "learning_rate": 5.354637588822559e-06, "loss": 0.4631, "step": 13539 }, { "epoch": 2.223656929361772, "grad_norm": 0.3495902620317632, "learning_rate": 5.354177147430777e-06, "loss": 0.4667, "step": 13540 }, { "epoch": 2.2238211565701147, "grad_norm": 0.35133521693691183, "learning_rate": 5.353716696761791e-06, "loss": 0.4499, "step": 13541 }, { "epoch": 2.2239853837784573, "grad_norm": 0.40684209571486746, "learning_rate": 5.3532562368206006e-06, "loss": 0.4511, "step": 13542 }, { "epoch": 2.2241496109868004, "grad_norm": 0.652470986013063, "learning_rate": 5.3527957676122085e-06, "loss": 0.451, "step": 13543 }, { "epoch": 2.224313838195143, "grad_norm": 0.32350572923745236, "learning_rate": 5.352335289141614e-06, "loss": 0.4437, "step": 13544 }, { "epoch": 2.2244780654034857, "grad_norm": 0.28213502193799567, "learning_rate": 5.351874801413822e-06, "loss": 0.4281, "step": 13545 }, { "epoch": 2.2246422926118283, "grad_norm": 0.34122318350658276, "learning_rate": 5.351414304433832e-06, "loss": 0.4425, "step": 13546 }, { "epoch": 2.2248065198201714, "grad_norm": 0.34055176377424634, "learning_rate": 5.350953798206643e-06, "loss": 0.4641, "step": 13547 }, { "epoch": 2.224970747028514, "grad_norm": 0.3056182901324451, "learning_rate": 5.350493282737258e-06, "loss": 0.4472, "step": 13548 }, { "epoch": 2.2251349742368567, "grad_norm": 0.4295171215648557, "learning_rate": 5.3500327580306804e-06, "loss": 0.4411, "step": 13549 }, { "epoch": 2.2252992014451993, "grad_norm": 0.31663958292820704, "learning_rate": 5.349572224091911e-06, "loss": 0.4465, "step": 13550 }, { "epoch": 2.2254634286535424, "grad_norm": 0.3231332831732216, "learning_rate": 5.34911168092595e-06, "loss": 0.4488, "step": 13551 }, { "epoch": 2.225627655861885, "grad_norm": 0.3500335217632181, "learning_rate": 5.348651128537802e-06, "loss": 0.4341, "step": 13552 }, { "epoch": 2.2257918830702277, "grad_norm": 0.282708868398423, "learning_rate": 5.348190566932467e-06, "loss": 0.4248, "step": 13553 }, { "epoch": 2.2259561102785703, "grad_norm": 0.3214462040558992, "learning_rate": 5.347729996114949e-06, "loss": 0.4301, "step": 13554 }, { "epoch": 2.226120337486913, "grad_norm": 0.31372033167420005, "learning_rate": 5.347269416090249e-06, "loss": 0.4259, "step": 13555 }, { "epoch": 2.226284564695256, "grad_norm": 0.3327297401900336, "learning_rate": 5.3468088268633695e-06, "loss": 0.4441, "step": 13556 }, { "epoch": 2.2264487919035987, "grad_norm": 0.3294442381478894, "learning_rate": 5.346348228439312e-06, "loss": 0.4471, "step": 13557 }, { "epoch": 2.2266130191119413, "grad_norm": 0.3929457500037696, "learning_rate": 5.345887620823081e-06, "loss": 0.446, "step": 13558 }, { "epoch": 2.226777246320284, "grad_norm": 0.3631153626815294, "learning_rate": 5.345427004019677e-06, "loss": 0.4353, "step": 13559 }, { "epoch": 2.226941473528627, "grad_norm": 0.2869505335422441, "learning_rate": 5.344966378034106e-06, "loss": 0.4031, "step": 13560 }, { "epoch": 2.2271057007369697, "grad_norm": 1.0574105052879286, "learning_rate": 5.3445057428713675e-06, "loss": 0.4474, "step": 13561 }, { "epoch": 2.2272699279453123, "grad_norm": 0.3579479567718406, "learning_rate": 5.344045098536466e-06, "loss": 0.4628, "step": 13562 }, { "epoch": 2.227434155153655, "grad_norm": 0.3108652105716026, "learning_rate": 5.343584445034403e-06, "loss": 0.4389, "step": 13563 }, { "epoch": 2.227598382361998, "grad_norm": 0.31838087198749193, "learning_rate": 5.343123782370185e-06, "loss": 0.4672, "step": 13564 }, { "epoch": 2.2277626095703407, "grad_norm": 0.348928364328581, "learning_rate": 5.342663110548813e-06, "loss": 0.4227, "step": 13565 }, { "epoch": 2.2279268367786833, "grad_norm": 0.375223157962436, "learning_rate": 5.34220242957529e-06, "loss": 0.4516, "step": 13566 }, { "epoch": 2.228091063987026, "grad_norm": 0.36572646774424566, "learning_rate": 5.3417417394546195e-06, "loss": 0.4651, "step": 13567 }, { "epoch": 2.228255291195369, "grad_norm": 0.3584240750256261, "learning_rate": 5.341281040191805e-06, "loss": 0.4305, "step": 13568 }, { "epoch": 2.2284195184037117, "grad_norm": 0.3472023415751713, "learning_rate": 5.340820331791852e-06, "loss": 0.4582, "step": 13569 }, { "epoch": 2.2285837456120543, "grad_norm": 0.36169487795111643, "learning_rate": 5.340359614259764e-06, "loss": 0.4476, "step": 13570 }, { "epoch": 2.228747972820397, "grad_norm": 0.4990169623639773, "learning_rate": 5.3398988876005415e-06, "loss": 0.448, "step": 13571 }, { "epoch": 2.2289122000287396, "grad_norm": 0.3237664127605256, "learning_rate": 5.339438151819192e-06, "loss": 0.44, "step": 13572 }, { "epoch": 2.2290764272370827, "grad_norm": 0.3970367551739623, "learning_rate": 5.338977406920717e-06, "loss": 0.434, "step": 13573 }, { "epoch": 2.2292406544454253, "grad_norm": 0.35942224426552793, "learning_rate": 5.338516652910123e-06, "loss": 0.4468, "step": 13574 }, { "epoch": 2.229404881653768, "grad_norm": 0.27415087723762693, "learning_rate": 5.338055889792414e-06, "loss": 0.4583, "step": 13575 }, { "epoch": 2.2295691088621106, "grad_norm": 0.30038302397071287, "learning_rate": 5.337595117572591e-06, "loss": 0.4448, "step": 13576 }, { "epoch": 2.2297333360704537, "grad_norm": 0.6853846292788964, "learning_rate": 5.337134336255663e-06, "loss": 0.4408, "step": 13577 }, { "epoch": 2.2298975632787963, "grad_norm": 0.5243657367788079, "learning_rate": 5.336673545846631e-06, "loss": 0.4418, "step": 13578 }, { "epoch": 2.230061790487139, "grad_norm": 0.3239461966116936, "learning_rate": 5.336212746350502e-06, "loss": 0.4591, "step": 13579 }, { "epoch": 2.2302260176954816, "grad_norm": 0.380531154323319, "learning_rate": 5.33575193777228e-06, "loss": 0.4608, "step": 13580 }, { "epoch": 2.2303902449038246, "grad_norm": 0.30471466475214776, "learning_rate": 5.33529112011697e-06, "loss": 0.4535, "step": 13581 }, { "epoch": 2.2305544721121673, "grad_norm": 0.42057761873332117, "learning_rate": 5.334830293389576e-06, "loss": 0.4575, "step": 13582 }, { "epoch": 2.23071869932051, "grad_norm": 0.30684393851680114, "learning_rate": 5.334369457595104e-06, "loss": 0.4538, "step": 13583 }, { "epoch": 2.2308829265288526, "grad_norm": 0.4505269932130537, "learning_rate": 5.333908612738558e-06, "loss": 0.4441, "step": 13584 }, { "epoch": 2.2310471537371956, "grad_norm": 0.3104451390730639, "learning_rate": 5.333447758824945e-06, "loss": 0.4415, "step": 13585 }, { "epoch": 2.2312113809455383, "grad_norm": 0.3042509785408218, "learning_rate": 5.332986895859269e-06, "loss": 0.4442, "step": 13586 }, { "epoch": 2.231375608153881, "grad_norm": 0.33087608068349694, "learning_rate": 5.332526023846536e-06, "loss": 0.4431, "step": 13587 }, { "epoch": 2.2315398353622236, "grad_norm": 0.3253474702931535, "learning_rate": 5.332065142791751e-06, "loss": 0.4242, "step": 13588 }, { "epoch": 2.231704062570566, "grad_norm": 0.3869258011453058, "learning_rate": 5.33160425269992e-06, "loss": 0.4458, "step": 13589 }, { "epoch": 2.2318682897789093, "grad_norm": 0.4440672206002762, "learning_rate": 5.331143353576048e-06, "loss": 0.4346, "step": 13590 }, { "epoch": 2.232032516987252, "grad_norm": 0.36785815536200994, "learning_rate": 5.330682445425143e-06, "loss": 0.4452, "step": 13591 }, { "epoch": 2.2321967441955946, "grad_norm": 0.44275564494307584, "learning_rate": 5.330221528252207e-06, "loss": 0.4502, "step": 13592 }, { "epoch": 2.232360971403937, "grad_norm": 0.343543553199038, "learning_rate": 5.3297606020622495e-06, "loss": 0.4496, "step": 13593 }, { "epoch": 2.2325251986122803, "grad_norm": 0.37180636084536056, "learning_rate": 5.3292996668602765e-06, "loss": 0.4493, "step": 13594 }, { "epoch": 2.232689425820623, "grad_norm": 0.29019902675559567, "learning_rate": 5.328838722651292e-06, "loss": 0.441, "step": 13595 }, { "epoch": 2.2328536530289655, "grad_norm": 0.45008975878901003, "learning_rate": 5.3283777694403045e-06, "loss": 0.4499, "step": 13596 }, { "epoch": 2.233017880237308, "grad_norm": 0.34751380757528144, "learning_rate": 5.327916807232318e-06, "loss": 0.4276, "step": 13597 }, { "epoch": 2.2331821074456513, "grad_norm": 0.4329884068791182, "learning_rate": 5.32745583603234e-06, "loss": 0.4444, "step": 13598 }, { "epoch": 2.233346334653994, "grad_norm": 0.34680911321201413, "learning_rate": 5.32699485584538e-06, "loss": 0.4257, "step": 13599 }, { "epoch": 2.2335105618623365, "grad_norm": 0.4622230443995473, "learning_rate": 5.326533866676443e-06, "loss": 0.4285, "step": 13600 }, { "epoch": 2.233674789070679, "grad_norm": 0.5131870445694399, "learning_rate": 5.3260728685305314e-06, "loss": 0.4491, "step": 13601 }, { "epoch": 2.2338390162790223, "grad_norm": 0.40076083507511995, "learning_rate": 5.325611861412656e-06, "loss": 0.4386, "step": 13602 }, { "epoch": 2.234003243487365, "grad_norm": 0.27194120329673643, "learning_rate": 5.325150845327826e-06, "loss": 0.4402, "step": 13603 }, { "epoch": 2.2341674706957075, "grad_norm": 0.3627492697365076, "learning_rate": 5.3246898202810455e-06, "loss": 0.4561, "step": 13604 }, { "epoch": 2.23433169790405, "grad_norm": 0.36811125848416526, "learning_rate": 5.324228786277321e-06, "loss": 0.4442, "step": 13605 }, { "epoch": 2.234495925112393, "grad_norm": 0.32740568146167454, "learning_rate": 5.323767743321661e-06, "loss": 0.4416, "step": 13606 }, { "epoch": 2.234660152320736, "grad_norm": 0.2928833678123084, "learning_rate": 5.323306691419072e-06, "loss": 0.4526, "step": 13607 }, { "epoch": 2.2348243795290785, "grad_norm": 0.5940056253711327, "learning_rate": 5.322845630574564e-06, "loss": 0.4452, "step": 13608 }, { "epoch": 2.234988606737421, "grad_norm": 0.42021729424037163, "learning_rate": 5.322384560793143e-06, "loss": 0.4168, "step": 13609 }, { "epoch": 2.235152833945764, "grad_norm": 0.3073837421956184, "learning_rate": 5.321923482079815e-06, "loss": 0.4486, "step": 13610 }, { "epoch": 2.235317061154107, "grad_norm": 0.3522817208717473, "learning_rate": 5.321462394439591e-06, "loss": 0.4284, "step": 13611 }, { "epoch": 2.2354812883624495, "grad_norm": 0.3598916939940968, "learning_rate": 5.3210012978774765e-06, "loss": 0.4464, "step": 13612 }, { "epoch": 2.235645515570792, "grad_norm": 0.2617858004173523, "learning_rate": 5.320540192398479e-06, "loss": 0.4511, "step": 13613 }, { "epoch": 2.235809742779135, "grad_norm": 0.31940286486217156, "learning_rate": 5.3200790780076096e-06, "loss": 0.4472, "step": 13614 }, { "epoch": 2.235973969987478, "grad_norm": 0.2959868358849483, "learning_rate": 5.319617954709873e-06, "loss": 0.4249, "step": 13615 }, { "epoch": 2.2361381971958205, "grad_norm": 0.3030407022951804, "learning_rate": 5.319156822510281e-06, "loss": 0.448, "step": 13616 }, { "epoch": 2.236302424404163, "grad_norm": 0.34246789595360566, "learning_rate": 5.318695681413839e-06, "loss": 0.4412, "step": 13617 }, { "epoch": 2.236466651612506, "grad_norm": 0.3426164905586456, "learning_rate": 5.318234531425555e-06, "loss": 0.4696, "step": 13618 }, { "epoch": 2.236630878820849, "grad_norm": 0.32189875802648454, "learning_rate": 5.317773372550441e-06, "loss": 0.4516, "step": 13619 }, { "epoch": 2.2367951060291915, "grad_norm": 0.29844986670895207, "learning_rate": 5.317312204793502e-06, "loss": 0.4394, "step": 13620 }, { "epoch": 2.236959333237534, "grad_norm": 0.30412717030286257, "learning_rate": 5.316851028159749e-06, "loss": 0.4352, "step": 13621 }, { "epoch": 2.237123560445877, "grad_norm": 0.3608272137967386, "learning_rate": 5.3163898426541905e-06, "loss": 0.4342, "step": 13622 }, { "epoch": 2.2372877876542194, "grad_norm": 0.2920497088883626, "learning_rate": 5.315928648281834e-06, "loss": 0.445, "step": 13623 }, { "epoch": 2.2374520148625625, "grad_norm": 0.3006645750815423, "learning_rate": 5.315467445047691e-06, "loss": 0.4137, "step": 13624 }, { "epoch": 2.237616242070905, "grad_norm": 0.28330615816664806, "learning_rate": 5.315006232956768e-06, "loss": 0.4441, "step": 13625 }, { "epoch": 2.237780469279248, "grad_norm": 0.4482961606066074, "learning_rate": 5.314545012014077e-06, "loss": 0.4477, "step": 13626 }, { "epoch": 2.2379446964875904, "grad_norm": 0.5955992178112777, "learning_rate": 5.314083782224624e-06, "loss": 0.4501, "step": 13627 }, { "epoch": 2.2381089236959335, "grad_norm": 0.33806720327208106, "learning_rate": 5.3136225435934215e-06, "loss": 0.449, "step": 13628 }, { "epoch": 2.238273150904276, "grad_norm": 0.3012297840190392, "learning_rate": 5.313161296125477e-06, "loss": 0.4313, "step": 13629 }, { "epoch": 2.238437378112619, "grad_norm": 0.34520611961056774, "learning_rate": 5.312700039825801e-06, "loss": 0.4413, "step": 13630 }, { "epoch": 2.2386016053209614, "grad_norm": 0.3207154044252607, "learning_rate": 5.312238774699403e-06, "loss": 0.4581, "step": 13631 }, { "epoch": 2.2387658325293045, "grad_norm": 0.3156908540345743, "learning_rate": 5.311777500751293e-06, "loss": 0.4465, "step": 13632 }, { "epoch": 2.238930059737647, "grad_norm": 0.5587797292971918, "learning_rate": 5.31131621798648e-06, "loss": 0.4327, "step": 13633 }, { "epoch": 2.2390942869459898, "grad_norm": 0.3836716634336738, "learning_rate": 5.310854926409975e-06, "loss": 0.4342, "step": 13634 }, { "epoch": 2.2392585141543324, "grad_norm": 0.39705515751436415, "learning_rate": 5.310393626026789e-06, "loss": 0.4496, "step": 13635 }, { "epoch": 2.2394227413626755, "grad_norm": 0.4116210776291705, "learning_rate": 5.3099323168419276e-06, "loss": 0.4644, "step": 13636 }, { "epoch": 2.239586968571018, "grad_norm": 0.48621166225056617, "learning_rate": 5.309470998860407e-06, "loss": 0.428, "step": 13637 }, { "epoch": 2.2397511957793608, "grad_norm": 0.29530363881665117, "learning_rate": 5.309009672087236e-06, "loss": 0.4504, "step": 13638 }, { "epoch": 2.2399154229877034, "grad_norm": 0.3208941816493528, "learning_rate": 5.308548336527421e-06, "loss": 0.4145, "step": 13639 }, { "epoch": 2.240079650196046, "grad_norm": 0.34304395384197434, "learning_rate": 5.3080869921859765e-06, "loss": 0.4554, "step": 13640 }, { "epoch": 2.240243877404389, "grad_norm": 0.2997489296447226, "learning_rate": 5.3076256390679136e-06, "loss": 0.4306, "step": 13641 }, { "epoch": 2.2404081046127318, "grad_norm": 0.3598332489608276, "learning_rate": 5.30716427717824e-06, "loss": 0.4744, "step": 13642 }, { "epoch": 2.2405723318210744, "grad_norm": 0.3640856566145552, "learning_rate": 5.306702906521969e-06, "loss": 0.4426, "step": 13643 }, { "epoch": 2.240736559029417, "grad_norm": 0.25957988968195783, "learning_rate": 5.30624152710411e-06, "loss": 0.4347, "step": 13644 }, { "epoch": 2.24090078623776, "grad_norm": 0.3488760731941589, "learning_rate": 5.305780138929676e-06, "loss": 0.4667, "step": 13645 }, { "epoch": 2.2410650134461028, "grad_norm": 0.4165200065998101, "learning_rate": 5.305318742003677e-06, "loss": 0.4525, "step": 13646 }, { "epoch": 2.2412292406544454, "grad_norm": 0.31530256152853486, "learning_rate": 5.304857336331123e-06, "loss": 0.4622, "step": 13647 }, { "epoch": 2.241393467862788, "grad_norm": 0.4446666704947999, "learning_rate": 5.304395921917027e-06, "loss": 0.4301, "step": 13648 }, { "epoch": 2.241557695071131, "grad_norm": 0.4053189269354466, "learning_rate": 5.303934498766399e-06, "loss": 0.4412, "step": 13649 }, { "epoch": 2.2417219222794738, "grad_norm": 0.3625864205313529, "learning_rate": 5.303473066884254e-06, "loss": 0.4487, "step": 13650 }, { "epoch": 2.2418861494878164, "grad_norm": 0.3885493040306324, "learning_rate": 5.303011626275599e-06, "loss": 0.4359, "step": 13651 }, { "epoch": 2.242050376696159, "grad_norm": 0.318432682101398, "learning_rate": 5.3025501769454475e-06, "loss": 0.4322, "step": 13652 }, { "epoch": 2.242214603904502, "grad_norm": 0.3007696684784351, "learning_rate": 5.302088718898812e-06, "loss": 0.4489, "step": 13653 }, { "epoch": 2.2423788311128448, "grad_norm": 0.3887145358064661, "learning_rate": 5.3016272521407036e-06, "loss": 0.4483, "step": 13654 }, { "epoch": 2.2425430583211874, "grad_norm": 0.3409044775291692, "learning_rate": 5.301165776676134e-06, "loss": 0.4433, "step": 13655 }, { "epoch": 2.24270728552953, "grad_norm": 0.3670942744271576, "learning_rate": 5.300704292510116e-06, "loss": 0.4526, "step": 13656 }, { "epoch": 2.2428715127378727, "grad_norm": 0.29240835654932273, "learning_rate": 5.300242799647662e-06, "loss": 0.4306, "step": 13657 }, { "epoch": 2.2430357399462157, "grad_norm": 0.39224385150203395, "learning_rate": 5.299781298093785e-06, "loss": 0.4398, "step": 13658 }, { "epoch": 2.2431999671545584, "grad_norm": 0.3124724267924632, "learning_rate": 5.299319787853495e-06, "loss": 0.4426, "step": 13659 }, { "epoch": 2.243364194362901, "grad_norm": 0.3118206048528069, "learning_rate": 5.298858268931805e-06, "loss": 0.4599, "step": 13660 }, { "epoch": 2.2435284215712437, "grad_norm": 0.3472209576784062, "learning_rate": 5.298396741333729e-06, "loss": 0.4427, "step": 13661 }, { "epoch": 2.2436926487795867, "grad_norm": 0.28816801110315016, "learning_rate": 5.297935205064279e-06, "loss": 0.4155, "step": 13662 }, { "epoch": 2.2438568759879294, "grad_norm": 0.3280013408771476, "learning_rate": 5.297473660128469e-06, "loss": 0.4351, "step": 13663 }, { "epoch": 2.244021103196272, "grad_norm": 0.3013157024739304, "learning_rate": 5.297012106531308e-06, "loss": 0.4542, "step": 13664 }, { "epoch": 2.2441853304046147, "grad_norm": 0.2698186431923782, "learning_rate": 5.296550544277813e-06, "loss": 0.4358, "step": 13665 }, { "epoch": 2.2443495576129573, "grad_norm": 0.3152377394168143, "learning_rate": 5.296088973372994e-06, "loss": 0.4271, "step": 13666 }, { "epoch": 2.2445137848213004, "grad_norm": 0.332333820287266, "learning_rate": 5.295627393821867e-06, "loss": 0.4439, "step": 13667 }, { "epoch": 2.244678012029643, "grad_norm": 0.3540494011551216, "learning_rate": 5.295165805629443e-06, "loss": 0.427, "step": 13668 }, { "epoch": 2.2448422392379856, "grad_norm": 0.27766510632642816, "learning_rate": 5.294704208800735e-06, "loss": 0.4408, "step": 13669 }, { "epoch": 2.2450064664463287, "grad_norm": 0.32517347498764476, "learning_rate": 5.2942426033407585e-06, "loss": 0.4429, "step": 13670 }, { "epoch": 2.2451706936546714, "grad_norm": 0.3013033368759928, "learning_rate": 5.293780989254526e-06, "loss": 0.4392, "step": 13671 }, { "epoch": 2.245334920863014, "grad_norm": 0.282745086702727, "learning_rate": 5.293319366547051e-06, "loss": 0.4381, "step": 13672 }, { "epoch": 2.2454991480713566, "grad_norm": 0.3523847887648517, "learning_rate": 5.292857735223346e-06, "loss": 0.4364, "step": 13673 }, { "epoch": 2.2456633752796993, "grad_norm": 0.3750636418052584, "learning_rate": 5.292396095288428e-06, "loss": 0.4701, "step": 13674 }, { "epoch": 2.2458276024880424, "grad_norm": 0.6454444354941448, "learning_rate": 5.2919344467473074e-06, "loss": 0.4425, "step": 13675 }, { "epoch": 2.245991829696385, "grad_norm": 0.32847604128266267, "learning_rate": 5.2914727896049996e-06, "loss": 0.4349, "step": 13676 }, { "epoch": 2.2461560569047276, "grad_norm": 0.27768628742635093, "learning_rate": 5.291011123866519e-06, "loss": 0.4275, "step": 13677 }, { "epoch": 2.2463202841130703, "grad_norm": 0.29142901921932585, "learning_rate": 5.290549449536879e-06, "loss": 0.4339, "step": 13678 }, { "epoch": 2.2464845113214134, "grad_norm": 0.5690887619651148, "learning_rate": 5.290087766621095e-06, "loss": 0.429, "step": 13679 }, { "epoch": 2.246648738529756, "grad_norm": 0.3532823427162118, "learning_rate": 5.289626075124179e-06, "loss": 0.444, "step": 13680 }, { "epoch": 2.2468129657380986, "grad_norm": 0.2671035256621192, "learning_rate": 5.289164375051147e-06, "loss": 0.46, "step": 13681 }, { "epoch": 2.2469771929464413, "grad_norm": 0.34637078136166244, "learning_rate": 5.2887026664070154e-06, "loss": 0.4498, "step": 13682 }, { "epoch": 2.247141420154784, "grad_norm": 0.3572211705234191, "learning_rate": 5.288240949196796e-06, "loss": 0.4573, "step": 13683 }, { "epoch": 2.247305647363127, "grad_norm": 0.3579960088428089, "learning_rate": 5.287779223425504e-06, "loss": 0.4302, "step": 13684 }, { "epoch": 2.2474698745714696, "grad_norm": 0.34066301797241494, "learning_rate": 5.287317489098154e-06, "loss": 0.443, "step": 13685 }, { "epoch": 2.2476341017798123, "grad_norm": 0.32676299112996604, "learning_rate": 5.286855746219762e-06, "loss": 0.4399, "step": 13686 }, { "epoch": 2.2477983289881553, "grad_norm": 0.36409542955257634, "learning_rate": 5.2863939947953435e-06, "loss": 0.4546, "step": 13687 }, { "epoch": 2.247962556196498, "grad_norm": 0.4273691550545424, "learning_rate": 5.285932234829911e-06, "loss": 0.4636, "step": 13688 }, { "epoch": 2.2481267834048406, "grad_norm": 0.36166863699389046, "learning_rate": 5.285470466328482e-06, "loss": 0.4517, "step": 13689 }, { "epoch": 2.2482910106131833, "grad_norm": 0.3815787960529812, "learning_rate": 5.285008689296069e-06, "loss": 0.4625, "step": 13690 }, { "epoch": 2.248455237821526, "grad_norm": 0.26063558836316875, "learning_rate": 5.284546903737692e-06, "loss": 0.4617, "step": 13691 }, { "epoch": 2.248619465029869, "grad_norm": 0.4614932545774242, "learning_rate": 5.284085109658363e-06, "loss": 0.4235, "step": 13692 }, { "epoch": 2.2487836922382116, "grad_norm": 0.32931830311298993, "learning_rate": 5.283623307063098e-06, "loss": 0.427, "step": 13693 }, { "epoch": 2.2489479194465543, "grad_norm": 2.310641384368478, "learning_rate": 5.283161495956912e-06, "loss": 0.4346, "step": 13694 }, { "epoch": 2.249112146654897, "grad_norm": 0.5378084523050332, "learning_rate": 5.282699676344822e-06, "loss": 0.4452, "step": 13695 }, { "epoch": 2.24927637386324, "grad_norm": 0.34680472456278405, "learning_rate": 5.282237848231844e-06, "loss": 0.4457, "step": 13696 }, { "epoch": 2.2494406010715826, "grad_norm": 0.3445890634659044, "learning_rate": 5.281776011622994e-06, "loss": 0.4428, "step": 13697 }, { "epoch": 2.2496048282799253, "grad_norm": 0.2858278812975166, "learning_rate": 5.2813141665232865e-06, "loss": 0.4467, "step": 13698 }, { "epoch": 2.249769055488268, "grad_norm": 0.3145020721528898, "learning_rate": 5.280852312937738e-06, "loss": 0.433, "step": 13699 }, { "epoch": 2.2499332826966105, "grad_norm": 0.3817643009363534, "learning_rate": 5.280390450871367e-06, "loss": 0.446, "step": 13700 }, { "epoch": 2.2500975099049536, "grad_norm": 0.4631720137309753, "learning_rate": 5.279928580329187e-06, "loss": 0.4365, "step": 13701 }, { "epoch": 2.2502617371132962, "grad_norm": 0.3874126600272311, "learning_rate": 5.279466701316214e-06, "loss": 0.4451, "step": 13702 }, { "epoch": 2.250425964321639, "grad_norm": 0.7340837113996945, "learning_rate": 5.279004813837466e-06, "loss": 0.454, "step": 13703 }, { "epoch": 2.250590191529982, "grad_norm": 0.32826893954558084, "learning_rate": 5.278542917897961e-06, "loss": 0.4456, "step": 13704 }, { "epoch": 2.2507544187383246, "grad_norm": 0.30313075634072284, "learning_rate": 5.2780810135027115e-06, "loss": 0.467, "step": 13705 }, { "epoch": 2.2509186459466672, "grad_norm": 0.2936344465324073, "learning_rate": 5.277619100656739e-06, "loss": 0.4437, "step": 13706 }, { "epoch": 2.25108287315501, "grad_norm": 0.32540102092866996, "learning_rate": 5.277157179365056e-06, "loss": 0.4375, "step": 13707 }, { "epoch": 2.2512471003633525, "grad_norm": 0.5418154782829453, "learning_rate": 5.276695249632683e-06, "loss": 0.428, "step": 13708 }, { "epoch": 2.2514113275716956, "grad_norm": 0.4711780155828875, "learning_rate": 5.276233311464635e-06, "loss": 0.4598, "step": 13709 }, { "epoch": 2.2515755547800382, "grad_norm": 0.4652902743526495, "learning_rate": 5.275771364865929e-06, "loss": 0.4705, "step": 13710 }, { "epoch": 2.251739781988381, "grad_norm": 0.38330167114251695, "learning_rate": 5.275309409841583e-06, "loss": 0.4371, "step": 13711 }, { "epoch": 2.2519040091967235, "grad_norm": 0.34378733317073845, "learning_rate": 5.274847446396615e-06, "loss": 0.4462, "step": 13712 }, { "epoch": 2.2520682364050666, "grad_norm": 0.3504403391095108, "learning_rate": 5.27438547453604e-06, "loss": 0.4499, "step": 13713 }, { "epoch": 2.2522324636134092, "grad_norm": 0.3535033845955811, "learning_rate": 5.273923494264877e-06, "loss": 0.4429, "step": 13714 }, { "epoch": 2.252396690821752, "grad_norm": 0.33591679853041095, "learning_rate": 5.273461505588142e-06, "loss": 0.4497, "step": 13715 }, { "epoch": 2.2525609180300945, "grad_norm": 0.3383253978626295, "learning_rate": 5.272999508510857e-06, "loss": 0.4391, "step": 13716 }, { "epoch": 2.252725145238437, "grad_norm": 0.32385647020423586, "learning_rate": 5.272537503038036e-06, "loss": 0.4337, "step": 13717 }, { "epoch": 2.2528893724467802, "grad_norm": 0.3396561220455788, "learning_rate": 5.2720754891746965e-06, "loss": 0.4611, "step": 13718 }, { "epoch": 2.253053599655123, "grad_norm": 0.38140665585784617, "learning_rate": 5.271613466925859e-06, "loss": 0.4453, "step": 13719 }, { "epoch": 2.2532178268634655, "grad_norm": 0.6540812656662718, "learning_rate": 5.2711514362965395e-06, "loss": 0.443, "step": 13720 }, { "epoch": 2.2533820540718086, "grad_norm": 0.28094182270807777, "learning_rate": 5.270689397291757e-06, "loss": 0.4211, "step": 13721 }, { "epoch": 2.253546281280151, "grad_norm": 0.425522068083951, "learning_rate": 5.2702273499165286e-06, "loss": 0.4424, "step": 13722 }, { "epoch": 2.253710508488494, "grad_norm": 0.41584840792055144, "learning_rate": 5.2697652941758724e-06, "loss": 0.4358, "step": 13723 }, { "epoch": 2.2538747356968365, "grad_norm": 0.3177024211408362, "learning_rate": 5.26930323007481e-06, "loss": 0.434, "step": 13724 }, { "epoch": 2.254038962905179, "grad_norm": 0.3330303286606085, "learning_rate": 5.2688411576183565e-06, "loss": 0.4462, "step": 13725 }, { "epoch": 2.254203190113522, "grad_norm": 0.294027547485759, "learning_rate": 5.268379076811532e-06, "loss": 0.4463, "step": 13726 }, { "epoch": 2.254367417321865, "grad_norm": 0.3264056370297087, "learning_rate": 5.267916987659355e-06, "loss": 0.4393, "step": 13727 }, { "epoch": 2.2545316445302075, "grad_norm": 0.29553128037110177, "learning_rate": 5.267454890166842e-06, "loss": 0.4719, "step": 13728 }, { "epoch": 2.25469587173855, "grad_norm": 0.331978086815522, "learning_rate": 5.266992784339016e-06, "loss": 0.4409, "step": 13729 }, { "epoch": 2.254860098946893, "grad_norm": 0.2809751050096038, "learning_rate": 5.266530670180894e-06, "loss": 0.4285, "step": 13730 }, { "epoch": 2.255024326155236, "grad_norm": 0.3182632580568042, "learning_rate": 5.266068547697493e-06, "loss": 0.4454, "step": 13731 }, { "epoch": 2.2551885533635785, "grad_norm": 0.32836916473275135, "learning_rate": 5.265606416893835e-06, "loss": 0.4307, "step": 13732 }, { "epoch": 2.255352780571921, "grad_norm": 0.2853170249483263, "learning_rate": 5.265144277774938e-06, "loss": 0.4344, "step": 13733 }, { "epoch": 2.2555170077802638, "grad_norm": 0.5178439532805238, "learning_rate": 5.2646821303458215e-06, "loss": 0.4399, "step": 13734 }, { "epoch": 2.255681234988607, "grad_norm": 0.5507828085958464, "learning_rate": 5.264219974611505e-06, "loss": 0.4655, "step": 13735 }, { "epoch": 2.2558454621969495, "grad_norm": 0.3382196997965826, "learning_rate": 5.263757810577006e-06, "loss": 0.4631, "step": 13736 }, { "epoch": 2.256009689405292, "grad_norm": 0.28330362320821995, "learning_rate": 5.263295638247347e-06, "loss": 0.4392, "step": 13737 }, { "epoch": 2.256173916613635, "grad_norm": 0.35874744100148126, "learning_rate": 5.262833457627546e-06, "loss": 0.4491, "step": 13738 }, { "epoch": 2.256338143821978, "grad_norm": 0.267510615311155, "learning_rate": 5.262371268722623e-06, "loss": 0.4176, "step": 13739 }, { "epoch": 2.2565023710303205, "grad_norm": 0.5092229768402633, "learning_rate": 5.261909071537598e-06, "loss": 0.4427, "step": 13740 }, { "epoch": 2.256666598238663, "grad_norm": 0.2804393487003495, "learning_rate": 5.261446866077491e-06, "loss": 0.433, "step": 13741 }, { "epoch": 2.2568308254470058, "grad_norm": 0.3899613005160599, "learning_rate": 5.260984652347323e-06, "loss": 0.4338, "step": 13742 }, { "epoch": 2.256995052655349, "grad_norm": 0.3052554308900633, "learning_rate": 5.260522430352111e-06, "loss": 0.418, "step": 13743 }, { "epoch": 2.2571592798636915, "grad_norm": 0.3072133071489857, "learning_rate": 5.260060200096877e-06, "loss": 0.4692, "step": 13744 }, { "epoch": 2.257323507072034, "grad_norm": 0.2834940365753804, "learning_rate": 5.259597961586644e-06, "loss": 0.4443, "step": 13745 }, { "epoch": 2.2574877342803767, "grad_norm": 0.34360106449572925, "learning_rate": 5.259135714826429e-06, "loss": 0.4458, "step": 13746 }, { "epoch": 2.25765196148872, "grad_norm": 0.34200371546354, "learning_rate": 5.2586734598212515e-06, "loss": 0.4496, "step": 13747 }, { "epoch": 2.2578161886970625, "grad_norm": 0.32078529673268424, "learning_rate": 5.258211196576134e-06, "loss": 0.4541, "step": 13748 }, { "epoch": 2.257980415905405, "grad_norm": 0.7634754044032883, "learning_rate": 5.257748925096098e-06, "loss": 0.4563, "step": 13749 }, { "epoch": 2.2581446431137477, "grad_norm": 0.341481961545268, "learning_rate": 5.257286645386164e-06, "loss": 0.4456, "step": 13750 }, { "epoch": 2.2583088703220904, "grad_norm": 0.2911188279442169, "learning_rate": 5.256824357451351e-06, "loss": 0.4547, "step": 13751 }, { "epoch": 2.2584730975304335, "grad_norm": 0.37741366724925646, "learning_rate": 5.256362061296681e-06, "loss": 0.4595, "step": 13752 }, { "epoch": 2.258637324738776, "grad_norm": 0.3611787880484088, "learning_rate": 5.255899756927174e-06, "loss": 0.4582, "step": 13753 }, { "epoch": 2.2588015519471187, "grad_norm": 0.913157147165417, "learning_rate": 5.255437444347852e-06, "loss": 0.4317, "step": 13754 }, { "epoch": 2.258965779155462, "grad_norm": 0.7085119346882607, "learning_rate": 5.254975123563737e-06, "loss": 0.4393, "step": 13755 }, { "epoch": 2.2591300063638045, "grad_norm": 0.4123409307715226, "learning_rate": 5.25451279457985e-06, "loss": 0.4388, "step": 13756 }, { "epoch": 2.259294233572147, "grad_norm": 0.3130367328214307, "learning_rate": 5.25405045740121e-06, "loss": 0.461, "step": 13757 }, { "epoch": 2.2594584607804897, "grad_norm": 0.4009373961373831, "learning_rate": 5.253588112032841e-06, "loss": 0.4546, "step": 13758 }, { "epoch": 2.2596226879888324, "grad_norm": 0.3549456349719573, "learning_rate": 5.253125758479763e-06, "loss": 0.4515, "step": 13759 }, { "epoch": 2.2597869151971754, "grad_norm": 0.3200803902850578, "learning_rate": 5.2526633967469995e-06, "loss": 0.4402, "step": 13760 }, { "epoch": 2.259951142405518, "grad_norm": 0.3332251927527049, "learning_rate": 5.25220102683957e-06, "loss": 0.4173, "step": 13761 }, { "epoch": 2.2601153696138607, "grad_norm": 0.3379245162929123, "learning_rate": 5.251738648762497e-06, "loss": 0.4477, "step": 13762 }, { "epoch": 2.2602795968222034, "grad_norm": 0.4068887227418874, "learning_rate": 5.251276262520804e-06, "loss": 0.4476, "step": 13763 }, { "epoch": 2.2604438240305464, "grad_norm": 0.35292379389392914, "learning_rate": 5.25081386811951e-06, "loss": 0.4647, "step": 13764 }, { "epoch": 2.260608051238889, "grad_norm": 0.31261175083494874, "learning_rate": 5.2503514655636405e-06, "loss": 0.456, "step": 13765 }, { "epoch": 2.2607722784472317, "grad_norm": 0.3023902044167681, "learning_rate": 5.249889054858214e-06, "loss": 0.4486, "step": 13766 }, { "epoch": 2.2609365056555744, "grad_norm": 0.3510764596785331, "learning_rate": 5.249426636008257e-06, "loss": 0.4648, "step": 13767 }, { "epoch": 2.261100732863917, "grad_norm": 0.29579015381658, "learning_rate": 5.248964209018787e-06, "loss": 0.4333, "step": 13768 }, { "epoch": 2.26126496007226, "grad_norm": 0.3517170589499364, "learning_rate": 5.24850177389483e-06, "loss": 0.4315, "step": 13769 }, { "epoch": 2.2614291872806027, "grad_norm": 0.30858214290887587, "learning_rate": 5.248039330641407e-06, "loss": 0.4664, "step": 13770 }, { "epoch": 2.2615934144889454, "grad_norm": 0.6687128463622946, "learning_rate": 5.247576879263542e-06, "loss": 0.4483, "step": 13771 }, { "epoch": 2.2617576416972884, "grad_norm": 0.3588713029547824, "learning_rate": 5.247114419766255e-06, "loss": 0.426, "step": 13772 }, { "epoch": 2.261921868905631, "grad_norm": 0.3184520396061027, "learning_rate": 5.246651952154569e-06, "loss": 0.4415, "step": 13773 }, { "epoch": 2.2620860961139737, "grad_norm": 0.3182348076386226, "learning_rate": 5.246189476433512e-06, "loss": 0.4154, "step": 13774 }, { "epoch": 2.2622503233223163, "grad_norm": 0.3472059557046651, "learning_rate": 5.245726992608101e-06, "loss": 0.4297, "step": 13775 }, { "epoch": 2.262414550530659, "grad_norm": 0.4028328577668929, "learning_rate": 5.245264500683361e-06, "loss": 0.4502, "step": 13776 }, { "epoch": 2.262578777739002, "grad_norm": 0.30761601943596584, "learning_rate": 5.244802000664314e-06, "loss": 0.4434, "step": 13777 }, { "epoch": 2.2627430049473447, "grad_norm": 0.388515157470138, "learning_rate": 5.244339492555987e-06, "loss": 0.4364, "step": 13778 }, { "epoch": 2.2629072321556873, "grad_norm": 0.35450795824264086, "learning_rate": 5.2438769763634e-06, "loss": 0.4367, "step": 13779 }, { "epoch": 2.26307145936403, "grad_norm": 0.3294525187120768, "learning_rate": 5.243414452091578e-06, "loss": 0.4377, "step": 13780 }, { "epoch": 2.263235686572373, "grad_norm": 0.3722129129869963, "learning_rate": 5.242951919745542e-06, "loss": 0.4388, "step": 13781 }, { "epoch": 2.2633999137807157, "grad_norm": 0.4507616586121919, "learning_rate": 5.242489379330317e-06, "loss": 0.4447, "step": 13782 }, { "epoch": 2.2635641409890583, "grad_norm": 0.3598737035818175, "learning_rate": 5.242026830850928e-06, "loss": 0.4402, "step": 13783 }, { "epoch": 2.263728368197401, "grad_norm": 0.3324765686675211, "learning_rate": 5.241564274312398e-06, "loss": 0.4466, "step": 13784 }, { "epoch": 2.2638925954057436, "grad_norm": 0.4083224933688828, "learning_rate": 5.241101709719749e-06, "loss": 0.4498, "step": 13785 }, { "epoch": 2.2640568226140867, "grad_norm": 0.2933933784871219, "learning_rate": 5.240639137078007e-06, "loss": 0.4265, "step": 13786 }, { "epoch": 2.2642210498224293, "grad_norm": 0.27710009965559146, "learning_rate": 5.240176556392194e-06, "loss": 0.4314, "step": 13787 }, { "epoch": 2.264385277030772, "grad_norm": 0.29841634796489386, "learning_rate": 5.239713967667338e-06, "loss": 0.4508, "step": 13788 }, { "epoch": 2.264549504239115, "grad_norm": 0.5230942248164453, "learning_rate": 5.23925137090846e-06, "loss": 0.4558, "step": 13789 }, { "epoch": 2.2647137314474577, "grad_norm": 0.5844725375386487, "learning_rate": 5.238788766120583e-06, "loss": 0.4491, "step": 13790 }, { "epoch": 2.2648779586558003, "grad_norm": 0.35880413792086935, "learning_rate": 5.238326153308733e-06, "loss": 0.4613, "step": 13791 }, { "epoch": 2.265042185864143, "grad_norm": 0.27176190824021323, "learning_rate": 5.237863532477936e-06, "loss": 0.4297, "step": 13792 }, { "epoch": 2.2652064130724856, "grad_norm": 0.31010986715177674, "learning_rate": 5.237400903633215e-06, "loss": 0.4429, "step": 13793 }, { "epoch": 2.2653706402808287, "grad_norm": 0.35101079799031804, "learning_rate": 5.236938266779595e-06, "loss": 0.4443, "step": 13794 }, { "epoch": 2.2655348674891713, "grad_norm": 0.2913660569347403, "learning_rate": 5.236475621922099e-06, "loss": 0.4161, "step": 13795 }, { "epoch": 2.265699094697514, "grad_norm": 0.37533689834149453, "learning_rate": 5.236012969065754e-06, "loss": 0.434, "step": 13796 }, { "epoch": 2.2658633219058566, "grad_norm": 0.312108000748107, "learning_rate": 5.2355503082155835e-06, "loss": 0.4432, "step": 13797 }, { "epoch": 2.2660275491141997, "grad_norm": 0.33505756371900425, "learning_rate": 5.2350876393766135e-06, "loss": 0.4509, "step": 13798 }, { "epoch": 2.2661917763225423, "grad_norm": 0.2958642981997617, "learning_rate": 5.2346249625538684e-06, "loss": 0.4313, "step": 13799 }, { "epoch": 2.266356003530885, "grad_norm": 0.3161571807919032, "learning_rate": 5.234162277752374e-06, "loss": 0.4637, "step": 13800 }, { "epoch": 2.2665202307392276, "grad_norm": 0.2850364464418512, "learning_rate": 5.233699584977154e-06, "loss": 0.4434, "step": 13801 }, { "epoch": 2.2666844579475702, "grad_norm": 0.44128845128357014, "learning_rate": 5.233236884233234e-06, "loss": 0.4314, "step": 13802 }, { "epoch": 2.2668486851559133, "grad_norm": 0.2709781947969362, "learning_rate": 5.232774175525642e-06, "loss": 0.4356, "step": 13803 }, { "epoch": 2.267012912364256, "grad_norm": 0.3143610695517449, "learning_rate": 5.232311458859401e-06, "loss": 0.452, "step": 13804 }, { "epoch": 2.2671771395725986, "grad_norm": 0.2811302211053564, "learning_rate": 5.231848734239536e-06, "loss": 0.4454, "step": 13805 }, { "epoch": 2.2673413667809417, "grad_norm": 0.29524919404369326, "learning_rate": 5.231386001671074e-06, "loss": 0.4252, "step": 13806 }, { "epoch": 2.2675055939892843, "grad_norm": 0.3036902739174187, "learning_rate": 5.23092326115904e-06, "loss": 0.4454, "step": 13807 }, { "epoch": 2.267669821197627, "grad_norm": 0.3115339770671733, "learning_rate": 5.230460512708461e-06, "loss": 0.4635, "step": 13808 }, { "epoch": 2.2678340484059696, "grad_norm": 0.29688931339668434, "learning_rate": 5.2299977563243635e-06, "loss": 0.4468, "step": 13809 }, { "epoch": 2.267998275614312, "grad_norm": 0.3085715958936347, "learning_rate": 5.229534992011769e-06, "loss": 0.4583, "step": 13810 }, { "epoch": 2.2681625028226553, "grad_norm": 0.3095940496568009, "learning_rate": 5.229072219775708e-06, "loss": 0.4407, "step": 13811 }, { "epoch": 2.268326730030998, "grad_norm": 0.4908325226525298, "learning_rate": 5.228609439621206e-06, "loss": 0.4401, "step": 13812 }, { "epoch": 2.2684909572393406, "grad_norm": 0.44589233555087854, "learning_rate": 5.2281466515532894e-06, "loss": 0.4209, "step": 13813 }, { "epoch": 2.268655184447683, "grad_norm": 0.2802820328979476, "learning_rate": 5.227683855576983e-06, "loss": 0.4287, "step": 13814 }, { "epoch": 2.2688194116560263, "grad_norm": 0.4046303814982037, "learning_rate": 5.227221051697313e-06, "loss": 0.4517, "step": 13815 }, { "epoch": 2.268983638864369, "grad_norm": 0.33943476882464263, "learning_rate": 5.226758239919308e-06, "loss": 0.4378, "step": 13816 }, { "epoch": 2.2691478660727116, "grad_norm": 0.3936793154256377, "learning_rate": 5.226295420247994e-06, "loss": 0.459, "step": 13817 }, { "epoch": 2.269312093281054, "grad_norm": 0.2697716266058795, "learning_rate": 5.225832592688397e-06, "loss": 0.4345, "step": 13818 }, { "epoch": 2.269476320489397, "grad_norm": 0.3475167102931843, "learning_rate": 5.2253697572455424e-06, "loss": 0.4517, "step": 13819 }, { "epoch": 2.26964054769774, "grad_norm": 0.3057864002599708, "learning_rate": 5.224906913924459e-06, "loss": 0.4186, "step": 13820 }, { "epoch": 2.2698047749060826, "grad_norm": 0.3029753715957564, "learning_rate": 5.224444062730174e-06, "loss": 0.4541, "step": 13821 }, { "epoch": 2.269969002114425, "grad_norm": 0.307432523810183, "learning_rate": 5.2239812036677135e-06, "loss": 0.4453, "step": 13822 }, { "epoch": 2.2701332293227683, "grad_norm": 0.3060059931511143, "learning_rate": 5.2235183367421055e-06, "loss": 0.4426, "step": 13823 }, { "epoch": 2.270297456531111, "grad_norm": 0.36256508614149663, "learning_rate": 5.2230554619583756e-06, "loss": 0.4367, "step": 13824 }, { "epoch": 2.2704616837394536, "grad_norm": 0.3647724136370156, "learning_rate": 5.222592579321552e-06, "loss": 0.4479, "step": 13825 }, { "epoch": 2.270625910947796, "grad_norm": 0.31823959283917014, "learning_rate": 5.2221296888366615e-06, "loss": 0.442, "step": 13826 }, { "epoch": 2.270790138156139, "grad_norm": 0.30892461908438634, "learning_rate": 5.221666790508733e-06, "loss": 0.4305, "step": 13827 }, { "epoch": 2.270954365364482, "grad_norm": 0.5498779110061026, "learning_rate": 5.221203884342793e-06, "loss": 0.4374, "step": 13828 }, { "epoch": 2.2711185925728246, "grad_norm": 0.3437254626690269, "learning_rate": 5.22074097034387e-06, "loss": 0.4173, "step": 13829 }, { "epoch": 2.271282819781167, "grad_norm": 0.39163066483566433, "learning_rate": 5.22027804851699e-06, "loss": 0.4317, "step": 13830 }, { "epoch": 2.27144704698951, "grad_norm": 0.31973453986137745, "learning_rate": 5.219815118867182e-06, "loss": 0.417, "step": 13831 }, { "epoch": 2.271611274197853, "grad_norm": 0.5012814391620765, "learning_rate": 5.219352181399474e-06, "loss": 0.4577, "step": 13832 }, { "epoch": 2.2717755014061956, "grad_norm": 0.3326116761911855, "learning_rate": 5.218889236118892e-06, "loss": 0.4128, "step": 13833 }, { "epoch": 2.271939728614538, "grad_norm": 0.34504752744659983, "learning_rate": 5.218426283030467e-06, "loss": 0.4525, "step": 13834 }, { "epoch": 2.272103955822881, "grad_norm": 0.3250596236621934, "learning_rate": 5.2179633221392255e-06, "loss": 0.4545, "step": 13835 }, { "epoch": 2.2722681830312235, "grad_norm": 0.28065194575482644, "learning_rate": 5.217500353450196e-06, "loss": 0.4345, "step": 13836 }, { "epoch": 2.2724324102395665, "grad_norm": 0.7255066479196198, "learning_rate": 5.217037376968406e-06, "loss": 0.4438, "step": 13837 }, { "epoch": 2.272596637447909, "grad_norm": 0.4017089397573672, "learning_rate": 5.216574392698886e-06, "loss": 0.4591, "step": 13838 }, { "epoch": 2.272760864656252, "grad_norm": 0.5304928041774932, "learning_rate": 5.216111400646662e-06, "loss": 0.4441, "step": 13839 }, { "epoch": 2.272925091864595, "grad_norm": 0.2609976185282584, "learning_rate": 5.215648400816763e-06, "loss": 0.4351, "step": 13840 }, { "epoch": 2.2730893190729375, "grad_norm": 0.358583283765098, "learning_rate": 5.215185393214219e-06, "loss": 0.4385, "step": 13841 }, { "epoch": 2.27325354628128, "grad_norm": 0.3020230812447199, "learning_rate": 5.2147223778440586e-06, "loss": 0.4399, "step": 13842 }, { "epoch": 2.273417773489623, "grad_norm": 0.3367916444539525, "learning_rate": 5.21425935471131e-06, "loss": 0.4235, "step": 13843 }, { "epoch": 2.2735820006979655, "grad_norm": 0.2941663693437456, "learning_rate": 5.213796323821002e-06, "loss": 0.4502, "step": 13844 }, { "epoch": 2.2737462279063085, "grad_norm": 0.4697417001010032, "learning_rate": 5.2133332851781624e-06, "loss": 0.4267, "step": 13845 }, { "epoch": 2.273910455114651, "grad_norm": 0.3388810367942243, "learning_rate": 5.212870238787823e-06, "loss": 0.4564, "step": 13846 }, { "epoch": 2.274074682322994, "grad_norm": 0.2844059312289679, "learning_rate": 5.2124071846550115e-06, "loss": 0.4527, "step": 13847 }, { "epoch": 2.2742389095313364, "grad_norm": 0.4828332149280781, "learning_rate": 5.2119441227847555e-06, "loss": 0.4426, "step": 13848 }, { "epoch": 2.2744031367396795, "grad_norm": 0.33234453953277954, "learning_rate": 5.2114810531820864e-06, "loss": 0.456, "step": 13849 }, { "epoch": 2.274567363948022, "grad_norm": 0.29849007460004134, "learning_rate": 5.211017975852035e-06, "loss": 0.4232, "step": 13850 }, { "epoch": 2.274731591156365, "grad_norm": 0.31706913669563674, "learning_rate": 5.210554890799626e-06, "loss": 0.4273, "step": 13851 }, { "epoch": 2.2748958183647074, "grad_norm": 0.4073711965734478, "learning_rate": 5.210091798029894e-06, "loss": 0.4376, "step": 13852 }, { "epoch": 2.27506004557305, "grad_norm": 0.5229393755986401, "learning_rate": 5.209628697547865e-06, "loss": 0.4459, "step": 13853 }, { "epoch": 2.275224272781393, "grad_norm": 0.3537211489217148, "learning_rate": 5.209165589358571e-06, "loss": 0.4747, "step": 13854 }, { "epoch": 2.275388499989736, "grad_norm": 0.40748720524206644, "learning_rate": 5.20870247346704e-06, "loss": 0.4416, "step": 13855 }, { "epoch": 2.2755527271980784, "grad_norm": 0.3216274797784098, "learning_rate": 5.2082393498783036e-06, "loss": 0.4572, "step": 13856 }, { "epoch": 2.2757169544064215, "grad_norm": 0.35278286290399846, "learning_rate": 5.207776218597391e-06, "loss": 0.4452, "step": 13857 }, { "epoch": 2.275881181614764, "grad_norm": 0.4098238080258137, "learning_rate": 5.207313079629331e-06, "loss": 0.432, "step": 13858 }, { "epoch": 2.276045408823107, "grad_norm": 0.4580092831691726, "learning_rate": 5.2068499329791575e-06, "loss": 0.4349, "step": 13859 }, { "epoch": 2.2762096360314494, "grad_norm": 0.38826013386276875, "learning_rate": 5.206386778651896e-06, "loss": 0.4141, "step": 13860 }, { "epoch": 2.276373863239792, "grad_norm": 0.2941958339679804, "learning_rate": 5.20592361665258e-06, "loss": 0.4247, "step": 13861 }, { "epoch": 2.276538090448135, "grad_norm": 0.3003495984838666, "learning_rate": 5.20546044698624e-06, "loss": 0.4258, "step": 13862 }, { "epoch": 2.276702317656478, "grad_norm": 0.33772284519341567, "learning_rate": 5.204997269657905e-06, "loss": 0.439, "step": 13863 }, { "epoch": 2.2768665448648204, "grad_norm": 0.35383257962280384, "learning_rate": 5.2045340846726054e-06, "loss": 0.4494, "step": 13864 }, { "epoch": 2.277030772073163, "grad_norm": 0.3434125913276977, "learning_rate": 5.204070892035372e-06, "loss": 0.4356, "step": 13865 }, { "epoch": 2.277194999281506, "grad_norm": 0.30071978455766885, "learning_rate": 5.203607691751237e-06, "loss": 0.4401, "step": 13866 }, { "epoch": 2.277359226489849, "grad_norm": 0.29424246947418803, "learning_rate": 5.203144483825231e-06, "loss": 0.4518, "step": 13867 }, { "epoch": 2.2775234536981914, "grad_norm": 0.7205369293800823, "learning_rate": 5.202681268262382e-06, "loss": 0.433, "step": 13868 }, { "epoch": 2.277687680906534, "grad_norm": 0.4550218086842287, "learning_rate": 5.202218045067724e-06, "loss": 0.4227, "step": 13869 }, { "epoch": 2.2778519081148767, "grad_norm": 0.5522939727022292, "learning_rate": 5.201754814246286e-06, "loss": 0.4329, "step": 13870 }, { "epoch": 2.27801613532322, "grad_norm": 0.34802550224546164, "learning_rate": 5.201291575803103e-06, "loss": 0.4396, "step": 13871 }, { "epoch": 2.2781803625315624, "grad_norm": 0.29412082409797263, "learning_rate": 5.200828329743202e-06, "loss": 0.4474, "step": 13872 }, { "epoch": 2.278344589739905, "grad_norm": 0.3387381717897766, "learning_rate": 5.200365076071616e-06, "loss": 0.4344, "step": 13873 }, { "epoch": 2.278508816948248, "grad_norm": 0.3520361563968808, "learning_rate": 5.199901814793376e-06, "loss": 0.4562, "step": 13874 }, { "epoch": 2.2786730441565908, "grad_norm": 0.3288366800434973, "learning_rate": 5.199438545913514e-06, "loss": 0.4355, "step": 13875 }, { "epoch": 2.2788372713649334, "grad_norm": 0.29865338535780817, "learning_rate": 5.19897526943706e-06, "loss": 0.4518, "step": 13876 }, { "epoch": 2.279001498573276, "grad_norm": 0.3404207815125893, "learning_rate": 5.198511985369049e-06, "loss": 0.4322, "step": 13877 }, { "epoch": 2.2791657257816187, "grad_norm": 0.31980604254782763, "learning_rate": 5.198048693714509e-06, "loss": 0.4281, "step": 13878 }, { "epoch": 2.2793299529899618, "grad_norm": 0.2872471598384732, "learning_rate": 5.197585394478474e-06, "loss": 0.4482, "step": 13879 }, { "epoch": 2.2794941801983044, "grad_norm": 0.3154232003073397, "learning_rate": 5.1971220876659745e-06, "loss": 0.4364, "step": 13880 }, { "epoch": 2.279658407406647, "grad_norm": 0.6348972132812276, "learning_rate": 5.196658773282044e-06, "loss": 0.4298, "step": 13881 }, { "epoch": 2.2798226346149897, "grad_norm": 0.28308749448615994, "learning_rate": 5.196195451331715e-06, "loss": 0.4315, "step": 13882 }, { "epoch": 2.2799868618233328, "grad_norm": 0.37848023486848437, "learning_rate": 5.195732121820016e-06, "loss": 0.4639, "step": 13883 }, { "epoch": 2.2801510890316754, "grad_norm": 0.40271339721462446, "learning_rate": 5.1952687847519835e-06, "loss": 0.4328, "step": 13884 }, { "epoch": 2.280315316240018, "grad_norm": 0.3014707376437167, "learning_rate": 5.194805440132647e-06, "loss": 0.4214, "step": 13885 }, { "epoch": 2.2804795434483607, "grad_norm": 0.3373711819342816, "learning_rate": 5.19434208796704e-06, "loss": 0.4473, "step": 13886 }, { "epoch": 2.2806437706567033, "grad_norm": 0.38535680109940185, "learning_rate": 5.193878728260194e-06, "loss": 0.423, "step": 13887 }, { "epoch": 2.2808079978650464, "grad_norm": 0.39323458035612957, "learning_rate": 5.193415361017145e-06, "loss": 0.4376, "step": 13888 }, { "epoch": 2.280972225073389, "grad_norm": 0.40926189382068584, "learning_rate": 5.19295198624292e-06, "loss": 0.4268, "step": 13889 }, { "epoch": 2.2811364522817317, "grad_norm": 0.39605642816296704, "learning_rate": 5.192488603942555e-06, "loss": 0.441, "step": 13890 }, { "epoch": 2.2813006794900748, "grad_norm": 0.3321036515817321, "learning_rate": 5.192025214121084e-06, "loss": 0.4469, "step": 13891 }, { "epoch": 2.2814649066984174, "grad_norm": 0.3084896280179283, "learning_rate": 5.191561816783538e-06, "loss": 0.4458, "step": 13892 }, { "epoch": 2.28162913390676, "grad_norm": 0.4768889527534172, "learning_rate": 5.1910984119349495e-06, "loss": 0.4617, "step": 13893 }, { "epoch": 2.2817933611151027, "grad_norm": 0.479253467678791, "learning_rate": 5.190634999580352e-06, "loss": 0.4393, "step": 13894 }, { "epoch": 2.2819575883234453, "grad_norm": 0.34832956361346284, "learning_rate": 5.190171579724779e-06, "loss": 0.4353, "step": 13895 }, { "epoch": 2.2821218155317884, "grad_norm": 0.3443708744422341, "learning_rate": 5.189708152373266e-06, "loss": 0.4213, "step": 13896 }, { "epoch": 2.282286042740131, "grad_norm": 0.5197437799964241, "learning_rate": 5.189244717530841e-06, "loss": 0.4352, "step": 13897 }, { "epoch": 2.2824502699484737, "grad_norm": 0.3786963383411827, "learning_rate": 5.188781275202542e-06, "loss": 0.4144, "step": 13898 }, { "epoch": 2.2826144971568163, "grad_norm": 0.31878254563006886, "learning_rate": 5.188317825393398e-06, "loss": 0.4375, "step": 13899 }, { "epoch": 2.2827787243651594, "grad_norm": 0.32371021620229423, "learning_rate": 5.187854368108448e-06, "loss": 0.467, "step": 13900 }, { "epoch": 2.282942951573502, "grad_norm": 0.27065989521911527, "learning_rate": 5.1873909033527225e-06, "loss": 0.4494, "step": 13901 }, { "epoch": 2.2831071787818447, "grad_norm": 0.36088684689567835, "learning_rate": 5.186927431131254e-06, "loss": 0.4493, "step": 13902 }, { "epoch": 2.2832714059901873, "grad_norm": 0.3054866896938407, "learning_rate": 5.186463951449079e-06, "loss": 0.444, "step": 13903 }, { "epoch": 2.28343563319853, "grad_norm": 0.9427890070184183, "learning_rate": 5.1860004643112295e-06, "loss": 0.4566, "step": 13904 }, { "epoch": 2.283599860406873, "grad_norm": 0.36980738576340033, "learning_rate": 5.1855369697227405e-06, "loss": 0.4283, "step": 13905 }, { "epoch": 2.2837640876152157, "grad_norm": 0.3261290341141939, "learning_rate": 5.185073467688646e-06, "loss": 0.4368, "step": 13906 }, { "epoch": 2.2839283148235583, "grad_norm": 0.33320968485632513, "learning_rate": 5.184609958213978e-06, "loss": 0.4469, "step": 13907 }, { "epoch": 2.2840925420319014, "grad_norm": 0.2856889531244492, "learning_rate": 5.184146441303773e-06, "loss": 0.4525, "step": 13908 }, { "epoch": 2.284256769240244, "grad_norm": 0.3942354513878941, "learning_rate": 5.183682916963066e-06, "loss": 0.4316, "step": 13909 }, { "epoch": 2.2844209964485866, "grad_norm": 0.3732289311915922, "learning_rate": 5.183219385196887e-06, "loss": 0.4307, "step": 13910 }, { "epoch": 2.2845852236569293, "grad_norm": 0.33045744667353805, "learning_rate": 5.182755846010276e-06, "loss": 0.4419, "step": 13911 }, { "epoch": 2.284749450865272, "grad_norm": 0.3139712346377227, "learning_rate": 5.1822922994082636e-06, "loss": 0.4438, "step": 13912 }, { "epoch": 2.284913678073615, "grad_norm": 0.4955002940911547, "learning_rate": 5.181828745395886e-06, "loss": 0.4432, "step": 13913 }, { "epoch": 2.2850779052819576, "grad_norm": 0.353124834913557, "learning_rate": 5.181365183978176e-06, "loss": 0.4328, "step": 13914 }, { "epoch": 2.2852421324903003, "grad_norm": 0.33109773121135133, "learning_rate": 5.180901615160172e-06, "loss": 0.4238, "step": 13915 }, { "epoch": 2.285406359698643, "grad_norm": 0.3091236114028852, "learning_rate": 5.180438038946905e-06, "loss": 0.4395, "step": 13916 }, { "epoch": 2.285570586906986, "grad_norm": 0.3072253498064372, "learning_rate": 5.179974455343412e-06, "loss": 0.4536, "step": 13917 }, { "epoch": 2.2857348141153286, "grad_norm": 0.36653777780275587, "learning_rate": 5.179510864354727e-06, "loss": 0.4406, "step": 13918 }, { "epoch": 2.2858990413236713, "grad_norm": 0.3347389257699555, "learning_rate": 5.179047265985885e-06, "loss": 0.4269, "step": 13919 }, { "epoch": 2.286063268532014, "grad_norm": 0.30147606850707453, "learning_rate": 5.178583660241923e-06, "loss": 0.4446, "step": 13920 }, { "epoch": 2.2862274957403566, "grad_norm": 0.37986592730870994, "learning_rate": 5.178120047127874e-06, "loss": 0.4437, "step": 13921 }, { "epoch": 2.2863917229486996, "grad_norm": 0.4007300546013079, "learning_rate": 5.177656426648774e-06, "loss": 0.4545, "step": 13922 }, { "epoch": 2.2865559501570423, "grad_norm": 0.32051943336442223, "learning_rate": 5.177192798809658e-06, "loss": 0.4105, "step": 13923 }, { "epoch": 2.286720177365385, "grad_norm": 0.38011102453141177, "learning_rate": 5.1767291636155625e-06, "loss": 0.4485, "step": 13924 }, { "epoch": 2.286884404573728, "grad_norm": 0.35926434734994367, "learning_rate": 5.176265521071523e-06, "loss": 0.4362, "step": 13925 }, { "epoch": 2.2870486317820706, "grad_norm": 0.3731633735917125, "learning_rate": 5.175801871182575e-06, "loss": 0.4455, "step": 13926 }, { "epoch": 2.2872128589904133, "grad_norm": 0.29885224871406696, "learning_rate": 5.175338213953752e-06, "loss": 0.457, "step": 13927 }, { "epoch": 2.287377086198756, "grad_norm": 0.3104186968740682, "learning_rate": 5.174874549390092e-06, "loss": 0.4183, "step": 13928 }, { "epoch": 2.2875413134070985, "grad_norm": 0.34129434291138006, "learning_rate": 5.17441087749663e-06, "loss": 0.4402, "step": 13929 }, { "epoch": 2.2877055406154416, "grad_norm": 0.31123742064990884, "learning_rate": 5.173947198278405e-06, "loss": 0.4332, "step": 13930 }, { "epoch": 2.2878697678237843, "grad_norm": 0.29266834175594275, "learning_rate": 5.173483511740448e-06, "loss": 0.444, "step": 13931 }, { "epoch": 2.288033995032127, "grad_norm": 0.3725237989488707, "learning_rate": 5.173019817887798e-06, "loss": 0.4677, "step": 13932 }, { "epoch": 2.2881982222404695, "grad_norm": 0.31685516806527486, "learning_rate": 5.1725561167254895e-06, "loss": 0.4456, "step": 13933 }, { "epoch": 2.2883624494488126, "grad_norm": 0.39825508760706196, "learning_rate": 5.172092408258562e-06, "loss": 0.4505, "step": 13934 }, { "epoch": 2.2885266766571553, "grad_norm": 0.3634662074018649, "learning_rate": 5.171628692492049e-06, "loss": 0.4398, "step": 13935 }, { "epoch": 2.288690903865498, "grad_norm": 0.4549972944347865, "learning_rate": 5.171164969430987e-06, "loss": 0.4439, "step": 13936 }, { "epoch": 2.2888551310738405, "grad_norm": 0.40498915261239143, "learning_rate": 5.1707012390804125e-06, "loss": 0.4444, "step": 13937 }, { "epoch": 2.289019358282183, "grad_norm": 0.4218955172962593, "learning_rate": 5.1702375014453645e-06, "loss": 0.4519, "step": 13938 }, { "epoch": 2.2891835854905263, "grad_norm": 0.3348184465306751, "learning_rate": 5.1697737565308755e-06, "loss": 0.4556, "step": 13939 }, { "epoch": 2.289347812698869, "grad_norm": 0.3186255812768499, "learning_rate": 5.169310004341987e-06, "loss": 0.4308, "step": 13940 }, { "epoch": 2.2895120399072115, "grad_norm": 0.38078161937545435, "learning_rate": 5.1688462448837315e-06, "loss": 0.4242, "step": 13941 }, { "epoch": 2.2896762671155546, "grad_norm": 0.2922132896482114, "learning_rate": 5.168382478161149e-06, "loss": 0.4328, "step": 13942 }, { "epoch": 2.2898404943238972, "grad_norm": 0.34740487598494213, "learning_rate": 5.167918704179275e-06, "loss": 0.4308, "step": 13943 }, { "epoch": 2.29000472153224, "grad_norm": 0.3537760593250267, "learning_rate": 5.167454922943146e-06, "loss": 0.4307, "step": 13944 }, { "epoch": 2.2901689487405825, "grad_norm": 0.2928523746459882, "learning_rate": 5.1669911344578e-06, "loss": 0.4521, "step": 13945 }, { "epoch": 2.290333175948925, "grad_norm": 0.47139678614977115, "learning_rate": 5.166527338728275e-06, "loss": 0.4305, "step": 13946 }, { "epoch": 2.2904974031572682, "grad_norm": 0.2985657027832332, "learning_rate": 5.166063535759606e-06, "loss": 0.4491, "step": 13947 }, { "epoch": 2.290661630365611, "grad_norm": 0.3731853926343701, "learning_rate": 5.165599725556832e-06, "loss": 0.4415, "step": 13948 }, { "epoch": 2.2908258575739535, "grad_norm": 0.31532136412340483, "learning_rate": 5.165135908124991e-06, "loss": 0.4377, "step": 13949 }, { "epoch": 2.290990084782296, "grad_norm": 1.126183260087053, "learning_rate": 5.1646720834691185e-06, "loss": 0.433, "step": 13950 }, { "epoch": 2.2911543119906392, "grad_norm": 0.2954449428221947, "learning_rate": 5.164208251594255e-06, "loss": 0.4184, "step": 13951 }, { "epoch": 2.291318539198982, "grad_norm": 0.3198516287960699, "learning_rate": 5.163744412505434e-06, "loss": 0.4518, "step": 13952 }, { "epoch": 2.2914827664073245, "grad_norm": 0.417184169928783, "learning_rate": 5.163280566207697e-06, "loss": 0.4285, "step": 13953 }, { "epoch": 2.291646993615667, "grad_norm": 0.3120139219686837, "learning_rate": 5.162816712706081e-06, "loss": 0.4569, "step": 13954 }, { "epoch": 2.29181122082401, "grad_norm": 0.33460930796334654, "learning_rate": 5.162352852005622e-06, "loss": 0.4061, "step": 13955 }, { "epoch": 2.291975448032353, "grad_norm": 0.5502547933745806, "learning_rate": 5.16188898411136e-06, "loss": 0.4349, "step": 13956 }, { "epoch": 2.2921396752406955, "grad_norm": 0.3768704001174054, "learning_rate": 5.161425109028332e-06, "loss": 0.4496, "step": 13957 }, { "epoch": 2.292303902449038, "grad_norm": 0.36340695623770736, "learning_rate": 5.160961226761576e-06, "loss": 0.4414, "step": 13958 }, { "epoch": 2.2924681296573812, "grad_norm": 0.40539103376015434, "learning_rate": 5.160497337316133e-06, "loss": 0.4497, "step": 13959 }, { "epoch": 2.292632356865724, "grad_norm": 0.3861065338987038, "learning_rate": 5.160033440697038e-06, "loss": 0.4512, "step": 13960 }, { "epoch": 2.2927965840740665, "grad_norm": 0.2884253200044417, "learning_rate": 5.15956953690933e-06, "loss": 0.4278, "step": 13961 }, { "epoch": 2.292960811282409, "grad_norm": 0.30611232750057144, "learning_rate": 5.159105625958048e-06, "loss": 0.4382, "step": 13962 }, { "epoch": 2.2931250384907518, "grad_norm": 0.34316335987687846, "learning_rate": 5.158641707848231e-06, "loss": 0.4671, "step": 13963 }, { "epoch": 2.293289265699095, "grad_norm": 0.3970977754941982, "learning_rate": 5.158177782584917e-06, "loss": 0.4288, "step": 13964 }, { "epoch": 2.2934534929074375, "grad_norm": 0.3403187216231823, "learning_rate": 5.1577138501731435e-06, "loss": 0.4418, "step": 13965 }, { "epoch": 2.29361772011578, "grad_norm": 0.5190818158430706, "learning_rate": 5.1572499106179515e-06, "loss": 0.4337, "step": 13966 }, { "epoch": 2.2937819473241228, "grad_norm": 0.3232592487442511, "learning_rate": 5.156785963924378e-06, "loss": 0.4505, "step": 13967 }, { "epoch": 2.293946174532466, "grad_norm": 0.2813019946585075, "learning_rate": 5.156322010097464e-06, "loss": 0.4342, "step": 13968 }, { "epoch": 2.2941104017408085, "grad_norm": 0.3356722636096625, "learning_rate": 5.155858049142247e-06, "loss": 0.4353, "step": 13969 }, { "epoch": 2.294274628949151, "grad_norm": 0.2909214828377573, "learning_rate": 5.155394081063766e-06, "loss": 0.4321, "step": 13970 }, { "epoch": 2.2944388561574938, "grad_norm": 0.3733982655008716, "learning_rate": 5.154930105867061e-06, "loss": 0.472, "step": 13971 }, { "epoch": 2.2946030833658364, "grad_norm": 0.4654045248653441, "learning_rate": 5.154466123557169e-06, "loss": 0.4429, "step": 13972 }, { "epoch": 2.2947673105741795, "grad_norm": 0.3637665009212365, "learning_rate": 5.154002134139132e-06, "loss": 0.4238, "step": 13973 }, { "epoch": 2.294931537782522, "grad_norm": 0.2909657126305831, "learning_rate": 5.15353813761799e-06, "loss": 0.4607, "step": 13974 }, { "epoch": 2.2950957649908648, "grad_norm": 0.42400086471079623, "learning_rate": 5.153074133998778e-06, "loss": 0.4594, "step": 13975 }, { "epoch": 2.295259992199208, "grad_norm": 0.36972331015652127, "learning_rate": 5.15261012328654e-06, "loss": 0.4262, "step": 13976 }, { "epoch": 2.2954242194075505, "grad_norm": 0.4389775606377693, "learning_rate": 5.152146105486313e-06, "loss": 0.4249, "step": 13977 }, { "epoch": 2.295588446615893, "grad_norm": 0.33415273938695683, "learning_rate": 5.1516820806031395e-06, "loss": 0.4354, "step": 13978 }, { "epoch": 2.2957526738242358, "grad_norm": 0.4278522839527496, "learning_rate": 5.151218048642055e-06, "loss": 0.4476, "step": 13979 }, { "epoch": 2.2959169010325784, "grad_norm": 0.3741320360256869, "learning_rate": 5.150754009608105e-06, "loss": 0.4484, "step": 13980 }, { "epoch": 2.2960811282409215, "grad_norm": 0.311783369735317, "learning_rate": 5.150289963506323e-06, "loss": 0.4509, "step": 13981 }, { "epoch": 2.296245355449264, "grad_norm": 0.5405852343668192, "learning_rate": 5.149825910341753e-06, "loss": 0.4455, "step": 13982 }, { "epoch": 2.2964095826576068, "grad_norm": 0.3519118169672042, "learning_rate": 5.1493618501194355e-06, "loss": 0.4337, "step": 13983 }, { "epoch": 2.2965738098659494, "grad_norm": 0.28809793111921655, "learning_rate": 5.148897782844409e-06, "loss": 0.4485, "step": 13984 }, { "epoch": 2.2967380370742925, "grad_norm": 0.37024335296106775, "learning_rate": 5.1484337085217134e-06, "loss": 0.4317, "step": 13985 }, { "epoch": 2.296902264282635, "grad_norm": 0.38276051392136357, "learning_rate": 5.14796962715639e-06, "loss": 0.4423, "step": 13986 }, { "epoch": 2.2970664914909777, "grad_norm": 0.29217104055044624, "learning_rate": 5.147505538753478e-06, "loss": 0.4473, "step": 13987 }, { "epoch": 2.2972307186993204, "grad_norm": 0.47709697511449556, "learning_rate": 5.147041443318021e-06, "loss": 0.4508, "step": 13988 }, { "epoch": 2.297394945907663, "grad_norm": 0.30567178053755234, "learning_rate": 5.146577340855056e-06, "loss": 0.4429, "step": 13989 }, { "epoch": 2.297559173116006, "grad_norm": 0.3211430439562128, "learning_rate": 5.146113231369625e-06, "loss": 0.4527, "step": 13990 }, { "epoch": 2.2977234003243487, "grad_norm": 0.30219116042974015, "learning_rate": 5.145649114866768e-06, "loss": 0.457, "step": 13991 }, { "epoch": 2.2978876275326914, "grad_norm": 0.2898625848815241, "learning_rate": 5.145184991351529e-06, "loss": 0.4148, "step": 13992 }, { "epoch": 2.2980518547410345, "grad_norm": 1.1202762278001295, "learning_rate": 5.144720860828944e-06, "loss": 0.4187, "step": 13993 }, { "epoch": 2.298216081949377, "grad_norm": 0.5445625866550631, "learning_rate": 5.144256723304056e-06, "loss": 0.4431, "step": 13994 }, { "epoch": 2.2983803091577197, "grad_norm": 0.35936423083423646, "learning_rate": 5.143792578781906e-06, "loss": 0.4579, "step": 13995 }, { "epoch": 2.2985445363660624, "grad_norm": 0.3387039272937696, "learning_rate": 5.143328427267535e-06, "loss": 0.4532, "step": 13996 }, { "epoch": 2.298708763574405, "grad_norm": 0.35295839414607105, "learning_rate": 5.142864268765985e-06, "loss": 0.4416, "step": 13997 }, { "epoch": 2.298872990782748, "grad_norm": 0.28125146920926414, "learning_rate": 5.142400103282298e-06, "loss": 0.4393, "step": 13998 }, { "epoch": 2.2990372179910907, "grad_norm": 0.4012659500929637, "learning_rate": 5.141935930821512e-06, "loss": 0.4453, "step": 13999 }, { "epoch": 2.2992014451994334, "grad_norm": 0.3300866074431469, "learning_rate": 5.14147175138867e-06, "loss": 0.4331, "step": 14000 }, { "epoch": 2.299365672407776, "grad_norm": 0.4325211113663812, "learning_rate": 5.1410075649888156e-06, "loss": 0.4416, "step": 14001 }, { "epoch": 2.299529899616119, "grad_norm": 0.32184158336621926, "learning_rate": 5.1405433716269865e-06, "loss": 0.4513, "step": 14002 }, { "epoch": 2.2996941268244617, "grad_norm": 0.35911228396405964, "learning_rate": 5.140079171308228e-06, "loss": 0.4395, "step": 14003 }, { "epoch": 2.2998583540328044, "grad_norm": 0.3489742010041331, "learning_rate": 5.139614964037577e-06, "loss": 0.4434, "step": 14004 }, { "epoch": 2.300022581241147, "grad_norm": 0.37801129331195343, "learning_rate": 5.139150749820081e-06, "loss": 0.4453, "step": 14005 }, { "epoch": 2.3001868084494896, "grad_norm": 0.3762648304082034, "learning_rate": 5.138686528660778e-06, "loss": 0.4245, "step": 14006 }, { "epoch": 2.3003510356578327, "grad_norm": 0.33802117667366877, "learning_rate": 5.1382223005647094e-06, "loss": 0.4454, "step": 14007 }, { "epoch": 2.3005152628661754, "grad_norm": 0.4124698432484217, "learning_rate": 5.137758065536921e-06, "loss": 0.4344, "step": 14008 }, { "epoch": 2.300679490074518, "grad_norm": 0.4053973640415479, "learning_rate": 5.137293823582452e-06, "loss": 0.4372, "step": 14009 }, { "epoch": 2.300843717282861, "grad_norm": 0.32205027479758536, "learning_rate": 5.1368295747063455e-06, "loss": 0.4201, "step": 14010 }, { "epoch": 2.3010079444912037, "grad_norm": 0.3158597315788733, "learning_rate": 5.136365318913641e-06, "loss": 0.4489, "step": 14011 }, { "epoch": 2.3011721716995464, "grad_norm": 0.37379440368033845, "learning_rate": 5.1359010562093855e-06, "loss": 0.4481, "step": 14012 }, { "epoch": 2.301336398907889, "grad_norm": 0.3371250590690597, "learning_rate": 5.135436786598619e-06, "loss": 0.4418, "step": 14013 }, { "epoch": 2.3015006261162316, "grad_norm": 0.3469445965971112, "learning_rate": 5.134972510086382e-06, "loss": 0.4428, "step": 14014 }, { "epoch": 2.3016648533245747, "grad_norm": 0.35999734715615955, "learning_rate": 5.134508226677719e-06, "loss": 0.4387, "step": 14015 }, { "epoch": 2.3018290805329173, "grad_norm": 0.3987865712873361, "learning_rate": 5.134043936377672e-06, "loss": 0.4541, "step": 14016 }, { "epoch": 2.30199330774126, "grad_norm": 0.30289366918869837, "learning_rate": 5.133579639191286e-06, "loss": 0.4473, "step": 14017 }, { "epoch": 2.3021575349496026, "grad_norm": 0.403215916390885, "learning_rate": 5.133115335123601e-06, "loss": 0.4545, "step": 14018 }, { "epoch": 2.3023217621579457, "grad_norm": 0.34769404599655834, "learning_rate": 5.13265102417966e-06, "loss": 0.4373, "step": 14019 }, { "epoch": 2.3024859893662883, "grad_norm": 0.4296539613249186, "learning_rate": 5.132186706364507e-06, "loss": 0.4341, "step": 14020 }, { "epoch": 2.302650216574631, "grad_norm": 0.3154857891196642, "learning_rate": 5.131722381683183e-06, "loss": 0.4581, "step": 14021 }, { "epoch": 2.3028144437829736, "grad_norm": 0.3455431355471438, "learning_rate": 5.131258050140734e-06, "loss": 0.4238, "step": 14022 }, { "epoch": 2.3029786709913163, "grad_norm": 0.5078514368461862, "learning_rate": 5.130793711742201e-06, "loss": 0.4437, "step": 14023 }, { "epoch": 2.3031428981996593, "grad_norm": 0.34871399726253866, "learning_rate": 5.130329366492628e-06, "loss": 0.4279, "step": 14024 }, { "epoch": 2.303307125408002, "grad_norm": 0.4938279579093215, "learning_rate": 5.129865014397057e-06, "loss": 0.4359, "step": 14025 }, { "epoch": 2.3034713526163446, "grad_norm": 0.4455533239162793, "learning_rate": 5.129400655460533e-06, "loss": 0.4302, "step": 14026 }, { "epoch": 2.3036355798246877, "grad_norm": 0.317398104299277, "learning_rate": 5.1289362896881e-06, "loss": 0.4441, "step": 14027 }, { "epoch": 2.3037998070330303, "grad_norm": 0.3335278163989015, "learning_rate": 5.128471917084798e-06, "loss": 0.4409, "step": 14028 }, { "epoch": 2.303964034241373, "grad_norm": 0.3116408451208168, "learning_rate": 5.128007537655673e-06, "loss": 0.4565, "step": 14029 }, { "epoch": 2.3041282614497156, "grad_norm": 0.3616035436306267, "learning_rate": 5.1275431514057686e-06, "loss": 0.4402, "step": 14030 }, { "epoch": 2.3042924886580582, "grad_norm": 0.3156254049719913, "learning_rate": 5.127078758340128e-06, "loss": 0.4342, "step": 14031 }, { "epoch": 2.3044567158664013, "grad_norm": 0.37717368073785584, "learning_rate": 5.126614358463795e-06, "loss": 0.4355, "step": 14032 }, { "epoch": 2.304620943074744, "grad_norm": 0.6232263943964357, "learning_rate": 5.126149951781814e-06, "loss": 0.4383, "step": 14033 }, { "epoch": 2.3047851702830866, "grad_norm": 0.3038821240087618, "learning_rate": 5.1256855382992285e-06, "loss": 0.4616, "step": 14034 }, { "epoch": 2.3049493974914292, "grad_norm": 0.3415423211697857, "learning_rate": 5.125221118021082e-06, "loss": 0.437, "step": 14035 }, { "epoch": 2.3051136246997723, "grad_norm": 0.273495773934862, "learning_rate": 5.124756690952418e-06, "loss": 0.4553, "step": 14036 }, { "epoch": 2.305277851908115, "grad_norm": 0.32609384140101105, "learning_rate": 5.124292257098284e-06, "loss": 0.4358, "step": 14037 }, { "epoch": 2.3054420791164576, "grad_norm": 0.29790082821128433, "learning_rate": 5.123827816463722e-06, "loss": 0.4455, "step": 14038 }, { "epoch": 2.3056063063248002, "grad_norm": 0.4301738879695801, "learning_rate": 5.123363369053774e-06, "loss": 0.4351, "step": 14039 }, { "epoch": 2.305770533533143, "grad_norm": 0.4560846085606717, "learning_rate": 5.122898914873487e-06, "loss": 0.4325, "step": 14040 }, { "epoch": 2.305934760741486, "grad_norm": 0.3587378842032936, "learning_rate": 5.122434453927905e-06, "loss": 0.4325, "step": 14041 }, { "epoch": 2.3060989879498286, "grad_norm": 0.4712292651053053, "learning_rate": 5.121969986222074e-06, "loss": 0.4323, "step": 14042 }, { "epoch": 2.3062632151581712, "grad_norm": 0.40531640859089124, "learning_rate": 5.121505511761036e-06, "loss": 0.4324, "step": 14043 }, { "epoch": 2.3064274423665143, "grad_norm": 0.28030142930937785, "learning_rate": 5.121041030549835e-06, "loss": 0.4453, "step": 14044 }, { "epoch": 2.306591669574857, "grad_norm": 0.3595351679875266, "learning_rate": 5.120576542593519e-06, "loss": 0.4409, "step": 14045 }, { "epoch": 2.3067558967831996, "grad_norm": 0.30426305503665396, "learning_rate": 5.120112047897132e-06, "loss": 0.4528, "step": 14046 }, { "epoch": 2.3069201239915422, "grad_norm": 0.5848941502153416, "learning_rate": 5.119647546465717e-06, "loss": 0.4311, "step": 14047 }, { "epoch": 2.307084351199885, "grad_norm": 0.32612350991860606, "learning_rate": 5.11918303830432e-06, "loss": 0.4377, "step": 14048 }, { "epoch": 2.307248578408228, "grad_norm": 0.2763098121789989, "learning_rate": 5.118718523417985e-06, "loss": 0.4235, "step": 14049 }, { "epoch": 2.3074128056165706, "grad_norm": 0.3295502056022296, "learning_rate": 5.118254001811759e-06, "loss": 0.4374, "step": 14050 }, { "epoch": 2.307577032824913, "grad_norm": 0.3994712281531475, "learning_rate": 5.117789473490688e-06, "loss": 0.4412, "step": 14051 }, { "epoch": 2.307741260033256, "grad_norm": 0.3115856036129265, "learning_rate": 5.117324938459813e-06, "loss": 0.4284, "step": 14052 }, { "epoch": 2.307905487241599, "grad_norm": 0.9618678183501287, "learning_rate": 5.116860396724183e-06, "loss": 0.451, "step": 14053 }, { "epoch": 2.3080697144499416, "grad_norm": 0.3948070348891769, "learning_rate": 5.116395848288842e-06, "loss": 0.4334, "step": 14054 }, { "epoch": 2.308233941658284, "grad_norm": 0.3034880244011292, "learning_rate": 5.115931293158835e-06, "loss": 0.4449, "step": 14055 }, { "epoch": 2.308398168866627, "grad_norm": 0.31571765021660086, "learning_rate": 5.11546673133921e-06, "loss": 0.4207, "step": 14056 }, { "epoch": 2.3085623960749695, "grad_norm": 0.29877701295733133, "learning_rate": 5.1150021628350095e-06, "loss": 0.4526, "step": 14057 }, { "epoch": 2.3087266232833126, "grad_norm": 0.34530395459750013, "learning_rate": 5.114537587651279e-06, "loss": 0.4274, "step": 14058 }, { "epoch": 2.308890850491655, "grad_norm": 0.369050788922719, "learning_rate": 5.114073005793068e-06, "loss": 0.4483, "step": 14059 }, { "epoch": 2.309055077699998, "grad_norm": 0.28538396109289, "learning_rate": 5.113608417265419e-06, "loss": 0.4338, "step": 14060 }, { "epoch": 2.309219304908341, "grad_norm": 0.32589537320587636, "learning_rate": 5.113143822073379e-06, "loss": 0.4394, "step": 14061 }, { "epoch": 2.3093835321166836, "grad_norm": 0.32042454658452446, "learning_rate": 5.1126792202219936e-06, "loss": 0.4452, "step": 14062 }, { "epoch": 2.309547759325026, "grad_norm": 0.33793205302871954, "learning_rate": 5.11221461171631e-06, "loss": 0.4362, "step": 14063 }, { "epoch": 2.309711986533369, "grad_norm": 0.2879793855044109, "learning_rate": 5.111749996561371e-06, "loss": 0.4442, "step": 14064 }, { "epoch": 2.3098762137417115, "grad_norm": 0.3600649860400257, "learning_rate": 5.111285374762227e-06, "loss": 0.4214, "step": 14065 }, { "epoch": 2.3100404409500546, "grad_norm": 0.3584892274240055, "learning_rate": 5.110820746323924e-06, "loss": 0.4371, "step": 14066 }, { "epoch": 2.310204668158397, "grad_norm": 0.3154401212773581, "learning_rate": 5.1103561112515035e-06, "loss": 0.4408, "step": 14067 }, { "epoch": 2.31036889536674, "grad_norm": 0.42719130845995357, "learning_rate": 5.109891469550018e-06, "loss": 0.4538, "step": 14068 }, { "epoch": 2.3105331225750825, "grad_norm": 0.33674862607670436, "learning_rate": 5.109426821224509e-06, "loss": 0.4304, "step": 14069 }, { "epoch": 2.3106973497834256, "grad_norm": 0.33291473916346687, "learning_rate": 5.108962166280025e-06, "loss": 0.4515, "step": 14070 }, { "epoch": 2.310861576991768, "grad_norm": 0.34892866493515384, "learning_rate": 5.108497504721614e-06, "loss": 0.4331, "step": 14071 }, { "epoch": 2.311025804200111, "grad_norm": 0.300162383893827, "learning_rate": 5.108032836554321e-06, "loss": 0.4546, "step": 14072 }, { "epoch": 2.3111900314084535, "grad_norm": 0.28162989647215003, "learning_rate": 5.107568161783193e-06, "loss": 0.4533, "step": 14073 }, { "epoch": 2.311354258616796, "grad_norm": 0.38128347717669486, "learning_rate": 5.107103480413277e-06, "loss": 0.4519, "step": 14074 }, { "epoch": 2.311518485825139, "grad_norm": 0.29761447186213036, "learning_rate": 5.106638792449619e-06, "loss": 0.4547, "step": 14075 }, { "epoch": 2.311682713033482, "grad_norm": 0.40785614066545356, "learning_rate": 5.10617409789727e-06, "loss": 0.419, "step": 14076 }, { "epoch": 2.3118469402418245, "grad_norm": 0.35949262056041725, "learning_rate": 5.105709396761271e-06, "loss": 0.4461, "step": 14077 }, { "epoch": 2.3120111674501675, "grad_norm": 0.460989553636019, "learning_rate": 5.105244689046672e-06, "loss": 0.4462, "step": 14078 }, { "epoch": 2.31217539465851, "grad_norm": 0.2828943692652613, "learning_rate": 5.104779974758521e-06, "loss": 0.4257, "step": 14079 }, { "epoch": 2.312339621866853, "grad_norm": 0.3003531542996423, "learning_rate": 5.1043152539018645e-06, "loss": 0.4605, "step": 14080 }, { "epoch": 2.3125038490751955, "grad_norm": 0.3975382133409971, "learning_rate": 5.103850526481751e-06, "loss": 0.4359, "step": 14081 }, { "epoch": 2.312668076283538, "grad_norm": 0.32455473999813356, "learning_rate": 5.103385792503224e-06, "loss": 0.4535, "step": 14082 }, { "epoch": 2.312832303491881, "grad_norm": 0.3051514804701622, "learning_rate": 5.102921051971335e-06, "loss": 0.465, "step": 14083 }, { "epoch": 2.312996530700224, "grad_norm": 0.3355561230778918, "learning_rate": 5.102456304891131e-06, "loss": 0.4458, "step": 14084 }, { "epoch": 2.3131607579085665, "grad_norm": 0.37683145351901637, "learning_rate": 5.101991551267657e-06, "loss": 0.4505, "step": 14085 }, { "epoch": 2.313324985116909, "grad_norm": 0.37557984452306886, "learning_rate": 5.101526791105964e-06, "loss": 0.4602, "step": 14086 }, { "epoch": 2.313489212325252, "grad_norm": 0.3750722304582995, "learning_rate": 5.101062024411098e-06, "loss": 0.4527, "step": 14087 }, { "epoch": 2.313653439533595, "grad_norm": 0.27858798055563705, "learning_rate": 5.100597251188107e-06, "loss": 0.4474, "step": 14088 }, { "epoch": 2.3138176667419375, "grad_norm": 0.299630465893534, "learning_rate": 5.100132471442038e-06, "loss": 0.4342, "step": 14089 }, { "epoch": 2.31398189395028, "grad_norm": 0.3387134968545231, "learning_rate": 5.0996676851779405e-06, "loss": 0.4392, "step": 14090 }, { "epoch": 2.3141461211586227, "grad_norm": 0.4640134154275601, "learning_rate": 5.099202892400863e-06, "loss": 0.4295, "step": 14091 }, { "epoch": 2.314310348366966, "grad_norm": 0.33222568022393845, "learning_rate": 5.098738093115851e-06, "loss": 0.4542, "step": 14092 }, { "epoch": 2.3144745755753084, "grad_norm": 0.4566089439660268, "learning_rate": 5.098273287327954e-06, "loss": 0.4321, "step": 14093 }, { "epoch": 2.314638802783651, "grad_norm": 0.3340150445386211, "learning_rate": 5.097808475042221e-06, "loss": 0.4348, "step": 14094 }, { "epoch": 2.314803029991994, "grad_norm": 0.31587059691726316, "learning_rate": 5.097343656263701e-06, "loss": 0.44, "step": 14095 }, { "epoch": 2.314967257200337, "grad_norm": 0.29369202918284504, "learning_rate": 5.0968788309974405e-06, "loss": 0.4436, "step": 14096 }, { "epoch": 2.3151314844086794, "grad_norm": 0.3455351361430038, "learning_rate": 5.096413999248489e-06, "loss": 0.4494, "step": 14097 }, { "epoch": 2.315295711617022, "grad_norm": 0.29585478735310294, "learning_rate": 5.095949161021894e-06, "loss": 0.4273, "step": 14098 }, { "epoch": 2.3154599388253647, "grad_norm": 0.26622562749246065, "learning_rate": 5.0954843163227035e-06, "loss": 0.435, "step": 14099 }, { "epoch": 2.315624166033708, "grad_norm": 0.2716855705390083, "learning_rate": 5.09501946515597e-06, "loss": 0.4576, "step": 14100 }, { "epoch": 2.3157883932420504, "grad_norm": 0.38750734835384804, "learning_rate": 5.094554607526738e-06, "loss": 0.4542, "step": 14101 }, { "epoch": 2.315952620450393, "grad_norm": 0.39359876643785513, "learning_rate": 5.094089743440059e-06, "loss": 0.4345, "step": 14102 }, { "epoch": 2.3161168476587357, "grad_norm": 0.2808952967455288, "learning_rate": 5.09362487290098e-06, "loss": 0.4414, "step": 14103 }, { "epoch": 2.316281074867079, "grad_norm": 0.3194014628944141, "learning_rate": 5.093159995914551e-06, "loss": 0.4424, "step": 14104 }, { "epoch": 2.3164453020754214, "grad_norm": 0.4332979821627784, "learning_rate": 5.092695112485822e-06, "loss": 0.4527, "step": 14105 }, { "epoch": 2.316609529283764, "grad_norm": 0.5310827151398166, "learning_rate": 5.09223022261984e-06, "loss": 0.446, "step": 14106 }, { "epoch": 2.3167737564921067, "grad_norm": 0.2786846133670859, "learning_rate": 5.091765326321655e-06, "loss": 0.4418, "step": 14107 }, { "epoch": 2.3169379837004493, "grad_norm": 0.329155169416799, "learning_rate": 5.091300423596316e-06, "loss": 0.4484, "step": 14108 }, { "epoch": 2.3171022109087924, "grad_norm": 0.3980959735763232, "learning_rate": 5.0908355144488736e-06, "loss": 0.4526, "step": 14109 }, { "epoch": 2.317266438117135, "grad_norm": 0.32127663985472377, "learning_rate": 5.090370598884376e-06, "loss": 0.4498, "step": 14110 }, { "epoch": 2.3174306653254777, "grad_norm": 0.34683145841006363, "learning_rate": 5.089905676907873e-06, "loss": 0.4459, "step": 14111 }, { "epoch": 2.317594892533821, "grad_norm": 0.43147739958741066, "learning_rate": 5.0894407485244124e-06, "loss": 0.4443, "step": 14112 }, { "epoch": 2.3177591197421634, "grad_norm": 0.4648840760522442, "learning_rate": 5.0889758137390466e-06, "loss": 0.4456, "step": 14113 }, { "epoch": 2.317923346950506, "grad_norm": 0.31100822351587076, "learning_rate": 5.0885108725568235e-06, "loss": 0.4501, "step": 14114 }, { "epoch": 2.3180875741588487, "grad_norm": 0.31343052539393956, "learning_rate": 5.088045924982794e-06, "loss": 0.4409, "step": 14115 }, { "epoch": 2.3182518013671913, "grad_norm": 0.34227482246820434, "learning_rate": 5.0875809710220075e-06, "loss": 0.4308, "step": 14116 }, { "epoch": 2.3184160285755344, "grad_norm": 0.27676355153912735, "learning_rate": 5.087116010679511e-06, "loss": 0.4558, "step": 14117 }, { "epoch": 2.318580255783877, "grad_norm": 0.3073291035116375, "learning_rate": 5.08665104396036e-06, "loss": 0.4345, "step": 14118 }, { "epoch": 2.3187444829922197, "grad_norm": 0.32193629097359744, "learning_rate": 5.086186070869601e-06, "loss": 0.439, "step": 14119 }, { "epoch": 2.3189087102005623, "grad_norm": 0.281479804541597, "learning_rate": 5.085721091412284e-06, "loss": 0.4354, "step": 14120 }, { "epoch": 2.3190729374089054, "grad_norm": 0.35718102691551273, "learning_rate": 5.085256105593459e-06, "loss": 0.424, "step": 14121 }, { "epoch": 2.319237164617248, "grad_norm": 0.28737312989718716, "learning_rate": 5.084791113418178e-06, "loss": 0.4309, "step": 14122 }, { "epoch": 2.3194013918255907, "grad_norm": 0.26492875726681686, "learning_rate": 5.08432611489149e-06, "loss": 0.4475, "step": 14123 }, { "epoch": 2.3195656190339333, "grad_norm": 0.3803862478716756, "learning_rate": 5.083861110018444e-06, "loss": 0.4277, "step": 14124 }, { "epoch": 2.319729846242276, "grad_norm": 0.3684425492831474, "learning_rate": 5.083396098804093e-06, "loss": 0.4391, "step": 14125 }, { "epoch": 2.319894073450619, "grad_norm": 0.2797802642574338, "learning_rate": 5.082931081253487e-06, "loss": 0.4504, "step": 14126 }, { "epoch": 2.3200583006589617, "grad_norm": 0.31374455966117754, "learning_rate": 5.0824660573716756e-06, "loss": 0.432, "step": 14127 }, { "epoch": 2.3202225278673043, "grad_norm": 0.33890373708963784, "learning_rate": 5.082001027163708e-06, "loss": 0.4218, "step": 14128 }, { "epoch": 2.3203867550756474, "grad_norm": 0.3227194866548745, "learning_rate": 5.081535990634639e-06, "loss": 0.4614, "step": 14129 }, { "epoch": 2.32055098228399, "grad_norm": 0.4470375783159691, "learning_rate": 5.081070947789517e-06, "loss": 0.4233, "step": 14130 }, { "epoch": 2.3207152094923327, "grad_norm": 0.32100958956086817, "learning_rate": 5.080605898633392e-06, "loss": 0.4519, "step": 14131 }, { "epoch": 2.3208794367006753, "grad_norm": 0.3035182537145447, "learning_rate": 5.080140843171315e-06, "loss": 0.4415, "step": 14132 }, { "epoch": 2.321043663909018, "grad_norm": 0.38367426741253585, "learning_rate": 5.079675781408337e-06, "loss": 0.4198, "step": 14133 }, { "epoch": 2.321207891117361, "grad_norm": 0.28865208882220506, "learning_rate": 5.079210713349512e-06, "loss": 0.4461, "step": 14134 }, { "epoch": 2.3213721183257037, "grad_norm": 0.36621689632654064, "learning_rate": 5.078745638999888e-06, "loss": 0.4345, "step": 14135 }, { "epoch": 2.3215363455340463, "grad_norm": 0.4429051331999256, "learning_rate": 5.0782805583645165e-06, "loss": 0.4532, "step": 14136 }, { "epoch": 2.321700572742389, "grad_norm": 0.31515172267910496, "learning_rate": 5.077815471448449e-06, "loss": 0.4311, "step": 14137 }, { "epoch": 2.321864799950732, "grad_norm": 0.3330602157170481, "learning_rate": 5.077350378256737e-06, "loss": 0.4416, "step": 14138 }, { "epoch": 2.3220290271590747, "grad_norm": 0.31717514726939133, "learning_rate": 5.076885278794433e-06, "loss": 0.4261, "step": 14139 }, { "epoch": 2.3221932543674173, "grad_norm": 0.3302356919478289, "learning_rate": 5.0764201730665866e-06, "loss": 0.4234, "step": 14140 }, { "epoch": 2.32235748157576, "grad_norm": 0.33608792819627553, "learning_rate": 5.0759550610782494e-06, "loss": 0.439, "step": 14141 }, { "epoch": 2.3225217087841026, "grad_norm": 0.36175993010131097, "learning_rate": 5.075489942834474e-06, "loss": 0.4306, "step": 14142 }, { "epoch": 2.3226859359924457, "grad_norm": 0.2664133792406407, "learning_rate": 5.075024818340312e-06, "loss": 0.4309, "step": 14143 }, { "epoch": 2.3228501632007883, "grad_norm": 0.30003761719849853, "learning_rate": 5.0745596876008145e-06, "loss": 0.4443, "step": 14144 }, { "epoch": 2.323014390409131, "grad_norm": 0.3464197651882214, "learning_rate": 5.074094550621033e-06, "loss": 0.4303, "step": 14145 }, { "epoch": 2.323178617617474, "grad_norm": 0.304019749705254, "learning_rate": 5.07362940740602e-06, "loss": 0.4499, "step": 14146 }, { "epoch": 2.3233428448258167, "grad_norm": 0.3001217873675227, "learning_rate": 5.073164257960828e-06, "loss": 0.4441, "step": 14147 }, { "epoch": 2.3235070720341593, "grad_norm": 0.46064457005809356, "learning_rate": 5.072699102290509e-06, "loss": 0.4646, "step": 14148 }, { "epoch": 2.323671299242502, "grad_norm": 0.3779222203657981, "learning_rate": 5.072233940400112e-06, "loss": 0.4595, "step": 14149 }, { "epoch": 2.3238355264508446, "grad_norm": 0.41772714934031113, "learning_rate": 5.071768772294692e-06, "loss": 0.4357, "step": 14150 }, { "epoch": 2.3239997536591877, "grad_norm": 0.3716025171384937, "learning_rate": 5.0713035979793025e-06, "loss": 0.4315, "step": 14151 }, { "epoch": 2.3241639808675303, "grad_norm": 0.305323170356014, "learning_rate": 5.070838417458992e-06, "loss": 0.4386, "step": 14152 }, { "epoch": 2.324328208075873, "grad_norm": 0.39248809891274733, "learning_rate": 5.070373230738815e-06, "loss": 0.4287, "step": 14153 }, { "epoch": 2.3244924352842156, "grad_norm": 0.38032600364508357, "learning_rate": 5.069908037823823e-06, "loss": 0.4473, "step": 14154 }, { "epoch": 2.3246566624925586, "grad_norm": 0.33848005664175346, "learning_rate": 5.069442838719071e-06, "loss": 0.449, "step": 14155 }, { "epoch": 2.3248208897009013, "grad_norm": 0.3314219058532578, "learning_rate": 5.068977633429607e-06, "loss": 0.4281, "step": 14156 }, { "epoch": 2.324985116909244, "grad_norm": 0.33796106931331144, "learning_rate": 5.068512421960487e-06, "loss": 0.438, "step": 14157 }, { "epoch": 2.3251493441175866, "grad_norm": 0.3271594956649946, "learning_rate": 5.068047204316763e-06, "loss": 0.4406, "step": 14158 }, { "epoch": 2.325313571325929, "grad_norm": 0.4212547832783069, "learning_rate": 5.067581980503489e-06, "loss": 0.4389, "step": 14159 }, { "epoch": 2.3254777985342723, "grad_norm": 0.31153221402017683, "learning_rate": 5.067116750525714e-06, "loss": 0.4556, "step": 14160 }, { "epoch": 2.325642025742615, "grad_norm": 0.29817177042555487, "learning_rate": 5.066651514388493e-06, "loss": 0.411, "step": 14161 }, { "epoch": 2.3258062529509576, "grad_norm": 0.5029510933292733, "learning_rate": 5.06618627209688e-06, "loss": 0.4246, "step": 14162 }, { "epoch": 2.3259704801593006, "grad_norm": 0.3514442344266123, "learning_rate": 5.065721023655927e-06, "loss": 0.4491, "step": 14163 }, { "epoch": 2.3261347073676433, "grad_norm": 0.4915963134364145, "learning_rate": 5.065255769070687e-06, "loss": 0.4187, "step": 14164 }, { "epoch": 2.326298934575986, "grad_norm": 0.2865691070834308, "learning_rate": 5.064790508346213e-06, "loss": 0.4314, "step": 14165 }, { "epoch": 2.3264631617843285, "grad_norm": 0.3247883587286542, "learning_rate": 5.064325241487559e-06, "loss": 0.4283, "step": 14166 }, { "epoch": 2.326627388992671, "grad_norm": 0.35794421758596046, "learning_rate": 5.063859968499777e-06, "loss": 0.4566, "step": 14167 }, { "epoch": 2.3267916162010143, "grad_norm": 0.46948198967138616, "learning_rate": 5.063394689387921e-06, "loss": 0.4318, "step": 14168 }, { "epoch": 2.326955843409357, "grad_norm": 0.30464816469968276, "learning_rate": 5.062929404157046e-06, "loss": 0.4245, "step": 14169 }, { "epoch": 2.3271200706176995, "grad_norm": 0.4028945435079524, "learning_rate": 5.062464112812202e-06, "loss": 0.416, "step": 14170 }, { "epoch": 2.327284297826042, "grad_norm": 0.3313330258097169, "learning_rate": 5.061998815358444e-06, "loss": 0.4552, "step": 14171 }, { "epoch": 2.3274485250343853, "grad_norm": 0.3077777867179962, "learning_rate": 5.061533511800827e-06, "loss": 0.4567, "step": 14172 }, { "epoch": 2.327612752242728, "grad_norm": 0.29356991243950614, "learning_rate": 5.061068202144404e-06, "loss": 0.4314, "step": 14173 }, { "epoch": 2.3277769794510705, "grad_norm": 0.3028690657624948, "learning_rate": 5.060602886394227e-06, "loss": 0.4399, "step": 14174 }, { "epoch": 2.327941206659413, "grad_norm": 0.3859579578351086, "learning_rate": 5.060137564555352e-06, "loss": 0.4357, "step": 14175 }, { "epoch": 2.328105433867756, "grad_norm": 0.4652567023941955, "learning_rate": 5.0596722366328316e-06, "loss": 0.4401, "step": 14176 }, { "epoch": 2.328269661076099, "grad_norm": 0.3837296610403499, "learning_rate": 5.059206902631719e-06, "loss": 0.4507, "step": 14177 }, { "epoch": 2.3284338882844415, "grad_norm": 0.2869324518141024, "learning_rate": 5.0587415625570725e-06, "loss": 0.4319, "step": 14178 }, { "epoch": 2.328598115492784, "grad_norm": 0.3169258499686373, "learning_rate": 5.05827621641394e-06, "loss": 0.4254, "step": 14179 }, { "epoch": 2.3287623427011273, "grad_norm": 0.6569998287700313, "learning_rate": 5.057810864207379e-06, "loss": 0.4293, "step": 14180 }, { "epoch": 2.32892656990947, "grad_norm": 0.4219660136497421, "learning_rate": 5.057345505942444e-06, "loss": 0.4211, "step": 14181 }, { "epoch": 2.3290907971178125, "grad_norm": 0.3104954360549804, "learning_rate": 5.056880141624187e-06, "loss": 0.4445, "step": 14182 }, { "epoch": 2.329255024326155, "grad_norm": 0.27808873262914974, "learning_rate": 5.056414771257665e-06, "loss": 0.415, "step": 14183 }, { "epoch": 2.329419251534498, "grad_norm": 0.38868025370533194, "learning_rate": 5.055949394847932e-06, "loss": 0.4325, "step": 14184 }, { "epoch": 2.329583478742841, "grad_norm": 0.34073141928086287, "learning_rate": 5.05548401240004e-06, "loss": 0.4338, "step": 14185 }, { "epoch": 2.3297477059511835, "grad_norm": 0.3401716194953938, "learning_rate": 5.0550186239190445e-06, "loss": 0.4423, "step": 14186 }, { "epoch": 2.329911933159526, "grad_norm": 0.3208427891596354, "learning_rate": 5.0545532294100016e-06, "loss": 0.4463, "step": 14187 }, { "epoch": 2.330076160367869, "grad_norm": 0.36160948952617955, "learning_rate": 5.054087828877966e-06, "loss": 0.4596, "step": 14188 }, { "epoch": 2.330240387576212, "grad_norm": 0.31086819709984803, "learning_rate": 5.053622422327991e-06, "loss": 0.4191, "step": 14189 }, { "epoch": 2.3304046147845545, "grad_norm": 0.34567113619909157, "learning_rate": 5.053157009765131e-06, "loss": 0.4426, "step": 14190 }, { "epoch": 2.330568841992897, "grad_norm": 0.3963209834997856, "learning_rate": 5.052691591194442e-06, "loss": 0.4531, "step": 14191 }, { "epoch": 2.33073306920124, "grad_norm": 0.3103594508662998, "learning_rate": 5.052226166620978e-06, "loss": 0.4437, "step": 14192 }, { "epoch": 2.3308972964095824, "grad_norm": 0.6589932994119513, "learning_rate": 5.051760736049797e-06, "loss": 0.4587, "step": 14193 }, { "epoch": 2.3310615236179255, "grad_norm": 0.3181522531554116, "learning_rate": 5.051295299485949e-06, "loss": 0.4471, "step": 14194 }, { "epoch": 2.331225750826268, "grad_norm": 0.5168257697310911, "learning_rate": 5.0508298569344915e-06, "loss": 0.4496, "step": 14195 }, { "epoch": 2.331389978034611, "grad_norm": 0.39529469305292186, "learning_rate": 5.05036440840048e-06, "loss": 0.4473, "step": 14196 }, { "epoch": 2.331554205242954, "grad_norm": 0.9181809301881988, "learning_rate": 5.049898953888971e-06, "loss": 0.4445, "step": 14197 }, { "epoch": 2.3317184324512965, "grad_norm": 0.324874626375649, "learning_rate": 5.049433493405018e-06, "loss": 0.4317, "step": 14198 }, { "epoch": 2.331882659659639, "grad_norm": 0.3660678533136915, "learning_rate": 5.048968026953676e-06, "loss": 0.4453, "step": 14199 }, { "epoch": 2.332046886867982, "grad_norm": 0.29991457677255184, "learning_rate": 5.048502554540001e-06, "loss": 0.4349, "step": 14200 }, { "epoch": 2.3322111140763244, "grad_norm": 0.41932979749369553, "learning_rate": 5.048037076169049e-06, "loss": 0.4427, "step": 14201 }, { "epoch": 2.3323753412846675, "grad_norm": 0.3987618004851579, "learning_rate": 5.047571591845875e-06, "loss": 0.4478, "step": 14202 }, { "epoch": 2.33253956849301, "grad_norm": 0.3376552578233765, "learning_rate": 5.047106101575535e-06, "loss": 0.4224, "step": 14203 }, { "epoch": 2.332703795701353, "grad_norm": 0.3926530091665801, "learning_rate": 5.046640605363084e-06, "loss": 0.4436, "step": 14204 }, { "epoch": 2.3328680229096954, "grad_norm": 0.6240595384014374, "learning_rate": 5.046175103213579e-06, "loss": 0.4306, "step": 14205 }, { "epoch": 2.3330322501180385, "grad_norm": 0.4986024189108446, "learning_rate": 5.045709595132074e-06, "loss": 0.4649, "step": 14206 }, { "epoch": 2.333196477326381, "grad_norm": 0.2973310236981245, "learning_rate": 5.045244081123627e-06, "loss": 0.4206, "step": 14207 }, { "epoch": 2.3333607045347238, "grad_norm": 0.5038718583325013, "learning_rate": 5.044778561193291e-06, "loss": 0.425, "step": 14208 }, { "epoch": 2.3335249317430664, "grad_norm": 0.32869318551080595, "learning_rate": 5.044313035346126e-06, "loss": 0.4238, "step": 14209 }, { "epoch": 2.333689158951409, "grad_norm": 0.7844142557053408, "learning_rate": 5.043847503587184e-06, "loss": 0.4586, "step": 14210 }, { "epoch": 2.333853386159752, "grad_norm": 0.39747748879408473, "learning_rate": 5.043381965921524e-06, "loss": 0.425, "step": 14211 }, { "epoch": 2.3340176133680948, "grad_norm": 0.3712658915797102, "learning_rate": 5.042916422354202e-06, "loss": 0.4311, "step": 14212 }, { "epoch": 2.3341818405764374, "grad_norm": 0.35805916644196095, "learning_rate": 5.042450872890272e-06, "loss": 0.4497, "step": 14213 }, { "epoch": 2.3343460677847805, "grad_norm": 0.3489653733566996, "learning_rate": 5.041985317534793e-06, "loss": 0.4386, "step": 14214 }, { "epoch": 2.334510294993123, "grad_norm": 1.552344521578485, "learning_rate": 5.04151975629282e-06, "loss": 0.4399, "step": 14215 }, { "epoch": 2.3346745222014658, "grad_norm": 0.4330518043111234, "learning_rate": 5.041054189169409e-06, "loss": 0.4358, "step": 14216 }, { "epoch": 2.3348387494098084, "grad_norm": 0.32002591932723284, "learning_rate": 5.040588616169618e-06, "loss": 0.419, "step": 14217 }, { "epoch": 2.335002976618151, "grad_norm": 0.31767776648707025, "learning_rate": 5.040123037298503e-06, "loss": 0.4407, "step": 14218 }, { "epoch": 2.335167203826494, "grad_norm": 0.366803435400509, "learning_rate": 5.039657452561119e-06, "loss": 0.4371, "step": 14219 }, { "epoch": 2.3353314310348368, "grad_norm": 0.3296447167812067, "learning_rate": 5.039191861962524e-06, "loss": 0.4202, "step": 14220 }, { "epoch": 2.3354956582431794, "grad_norm": 0.3349276761092498, "learning_rate": 5.0387262655077755e-06, "loss": 0.4325, "step": 14221 }, { "epoch": 2.335659885451522, "grad_norm": 0.3980378072812971, "learning_rate": 5.0382606632019325e-06, "loss": 0.4229, "step": 14222 }, { "epoch": 2.335824112659865, "grad_norm": 0.3202898785116211, "learning_rate": 5.037795055050046e-06, "loss": 0.4743, "step": 14223 }, { "epoch": 2.3359883398682078, "grad_norm": 0.31778100820349403, "learning_rate": 5.037329441057176e-06, "loss": 0.4217, "step": 14224 }, { "epoch": 2.3361525670765504, "grad_norm": 0.32080705990736375, "learning_rate": 5.0368638212283795e-06, "loss": 0.4228, "step": 14225 }, { "epoch": 2.336316794284893, "grad_norm": 0.3102344359741497, "learning_rate": 5.036398195568716e-06, "loss": 0.4358, "step": 14226 }, { "epoch": 2.3364810214932357, "grad_norm": 0.29092912120353454, "learning_rate": 5.035932564083238e-06, "loss": 0.4297, "step": 14227 }, { "epoch": 2.3366452487015787, "grad_norm": 0.27798039965725674, "learning_rate": 5.035466926777007e-06, "loss": 0.4143, "step": 14228 }, { "epoch": 2.3368094759099214, "grad_norm": 0.44537914816795954, "learning_rate": 5.035001283655076e-06, "loss": 0.4425, "step": 14229 }, { "epoch": 2.336973703118264, "grad_norm": 0.3381259638195042, "learning_rate": 5.0345356347225065e-06, "loss": 0.4383, "step": 14230 }, { "epoch": 2.337137930326607, "grad_norm": 0.36687226092077213, "learning_rate": 5.034069979984353e-06, "loss": 0.4437, "step": 14231 }, { "epoch": 2.3373021575349497, "grad_norm": 0.46311466060791356, "learning_rate": 5.033604319445676e-06, "loss": 0.4397, "step": 14232 }, { "epoch": 2.3374663847432924, "grad_norm": 0.3617763925092038, "learning_rate": 5.0331386531115285e-06, "loss": 0.4663, "step": 14233 }, { "epoch": 2.337630611951635, "grad_norm": 0.44891382965557786, "learning_rate": 5.032672980986972e-06, "loss": 0.4556, "step": 14234 }, { "epoch": 2.3377948391599777, "grad_norm": 0.36788493675452716, "learning_rate": 5.032207303077063e-06, "loss": 0.4325, "step": 14235 }, { "epoch": 2.3379590663683207, "grad_norm": 0.3044382854335584, "learning_rate": 5.031741619386858e-06, "loss": 0.4566, "step": 14236 }, { "epoch": 2.3381232935766634, "grad_norm": 0.3440239656824285, "learning_rate": 5.0312759299214175e-06, "loss": 0.4456, "step": 14237 }, { "epoch": 2.338287520785006, "grad_norm": 0.3696861896699254, "learning_rate": 5.030810234685796e-06, "loss": 0.4308, "step": 14238 }, { "epoch": 2.3384517479933487, "grad_norm": 0.4248817510969493, "learning_rate": 5.030344533685054e-06, "loss": 0.4313, "step": 14239 }, { "epoch": 2.3386159752016917, "grad_norm": 0.3077063680986963, "learning_rate": 5.02987882692425e-06, "loss": 0.4457, "step": 14240 }, { "epoch": 2.3387802024100344, "grad_norm": 0.337582640428433, "learning_rate": 5.02941311440844e-06, "loss": 0.4135, "step": 14241 }, { "epoch": 2.338944429618377, "grad_norm": 0.3671872733120523, "learning_rate": 5.028947396142681e-06, "loss": 0.4155, "step": 14242 }, { "epoch": 2.3391086568267196, "grad_norm": 0.3805787562666042, "learning_rate": 5.028481672132034e-06, "loss": 0.4486, "step": 14243 }, { "epoch": 2.3392728840350623, "grad_norm": 0.5836936584296454, "learning_rate": 5.028015942381555e-06, "loss": 0.4465, "step": 14244 }, { "epoch": 2.3394371112434054, "grad_norm": 0.36682613928154184, "learning_rate": 5.027550206896304e-06, "loss": 0.4313, "step": 14245 }, { "epoch": 2.339601338451748, "grad_norm": 0.3211669947248319, "learning_rate": 5.027084465681339e-06, "loss": 0.4331, "step": 14246 }, { "epoch": 2.3397655656600906, "grad_norm": 0.33075234650746704, "learning_rate": 5.026618718741719e-06, "loss": 0.4381, "step": 14247 }, { "epoch": 2.3399297928684337, "grad_norm": 0.3224871130791294, "learning_rate": 5.0261529660824994e-06, "loss": 0.441, "step": 14248 }, { "epoch": 2.3400940200767764, "grad_norm": 0.5106742651433831, "learning_rate": 5.025687207708743e-06, "loss": 0.4349, "step": 14249 }, { "epoch": 2.340258247285119, "grad_norm": 0.3670987534944953, "learning_rate": 5.025221443625504e-06, "loss": 0.43, "step": 14250 }, { "epoch": 2.3404224744934616, "grad_norm": 0.3152223583268576, "learning_rate": 5.024755673837845e-06, "loss": 0.4203, "step": 14251 }, { "epoch": 2.3405867017018043, "grad_norm": 0.3104109942911494, "learning_rate": 5.024289898350825e-06, "loss": 0.4453, "step": 14252 }, { "epoch": 2.3407509289101474, "grad_norm": 0.3354252677596092, "learning_rate": 5.0238241171694974e-06, "loss": 0.426, "step": 14253 }, { "epoch": 2.34091515611849, "grad_norm": 0.3096400471568136, "learning_rate": 5.023358330298925e-06, "loss": 0.4414, "step": 14254 }, { "epoch": 2.3410793833268326, "grad_norm": 0.4627023481999118, "learning_rate": 5.022892537744167e-06, "loss": 0.4296, "step": 14255 }, { "epoch": 2.3412436105351753, "grad_norm": 0.47554609569492284, "learning_rate": 5.022426739510283e-06, "loss": 0.4319, "step": 14256 }, { "epoch": 2.3414078377435183, "grad_norm": 0.33244191992096805, "learning_rate": 5.021960935602329e-06, "loss": 0.4346, "step": 14257 }, { "epoch": 2.341572064951861, "grad_norm": 0.3671414791598005, "learning_rate": 5.021495126025366e-06, "loss": 0.4197, "step": 14258 }, { "epoch": 2.3417362921602036, "grad_norm": 0.3001605538598186, "learning_rate": 5.021029310784453e-06, "loss": 0.435, "step": 14259 }, { "epoch": 2.3419005193685463, "grad_norm": 0.41049676168709537, "learning_rate": 5.020563489884649e-06, "loss": 0.4452, "step": 14260 }, { "epoch": 2.342064746576889, "grad_norm": 0.39958421223807394, "learning_rate": 5.020097663331014e-06, "loss": 0.4378, "step": 14261 }, { "epoch": 2.342228973785232, "grad_norm": 0.3526260350919074, "learning_rate": 5.019631831128605e-06, "loss": 0.4508, "step": 14262 }, { "epoch": 2.3423932009935746, "grad_norm": 0.36019790514805233, "learning_rate": 5.019165993282483e-06, "loss": 0.4508, "step": 14263 }, { "epoch": 2.3425574282019173, "grad_norm": 0.39219392948370696, "learning_rate": 5.018700149797709e-06, "loss": 0.4285, "step": 14264 }, { "epoch": 2.3427216554102603, "grad_norm": 0.34781631591555273, "learning_rate": 5.018234300679341e-06, "loss": 0.4335, "step": 14265 }, { "epoch": 2.342885882618603, "grad_norm": 0.33544889426458624, "learning_rate": 5.017768445932438e-06, "loss": 0.4483, "step": 14266 }, { "epoch": 2.3430501098269456, "grad_norm": 0.3154253538619397, "learning_rate": 5.01730258556206e-06, "loss": 0.4427, "step": 14267 }, { "epoch": 2.3432143370352883, "grad_norm": 0.33192751785748736, "learning_rate": 5.016836719573268e-06, "loss": 0.4133, "step": 14268 }, { "epoch": 2.343378564243631, "grad_norm": 0.313437034515309, "learning_rate": 5.01637084797112e-06, "loss": 0.4341, "step": 14269 }, { "epoch": 2.343542791451974, "grad_norm": 0.31116640811452134, "learning_rate": 5.015904970760677e-06, "loss": 0.4455, "step": 14270 }, { "epoch": 2.3437070186603166, "grad_norm": 0.44806362836241137, "learning_rate": 5.015439087946998e-06, "loss": 0.4522, "step": 14271 }, { "epoch": 2.3438712458686592, "grad_norm": 0.34147456503692797, "learning_rate": 5.0149731995351445e-06, "loss": 0.4347, "step": 14272 }, { "epoch": 2.344035473077002, "grad_norm": 0.31152642014698967, "learning_rate": 5.014507305530173e-06, "loss": 0.4205, "step": 14273 }, { "epoch": 2.344199700285345, "grad_norm": 0.34475814543629774, "learning_rate": 5.014041405937147e-06, "loss": 0.4342, "step": 14274 }, { "epoch": 2.3443639274936876, "grad_norm": 0.4015184961878321, "learning_rate": 5.013575500761127e-06, "loss": 0.4257, "step": 14275 }, { "epoch": 2.3445281547020302, "grad_norm": 0.3033020240845556, "learning_rate": 5.013109590007172e-06, "loss": 0.4382, "step": 14276 }, { "epoch": 2.344692381910373, "grad_norm": 0.338000873883839, "learning_rate": 5.012643673680339e-06, "loss": 0.4404, "step": 14277 }, { "epoch": 2.3448566091187155, "grad_norm": 0.31550507098643704, "learning_rate": 5.012177751785694e-06, "loss": 0.4527, "step": 14278 }, { "epoch": 2.3450208363270586, "grad_norm": 0.2923251975366749, "learning_rate": 5.0117118243282925e-06, "loss": 0.4509, "step": 14279 }, { "epoch": 2.3451850635354012, "grad_norm": 0.4025916966292508, "learning_rate": 5.011245891313199e-06, "loss": 0.4418, "step": 14280 }, { "epoch": 2.345349290743744, "grad_norm": 0.3495439251380644, "learning_rate": 5.010779952745472e-06, "loss": 0.4466, "step": 14281 }, { "epoch": 2.345513517952087, "grad_norm": 0.3030842299624673, "learning_rate": 5.010314008630171e-06, "loss": 0.451, "step": 14282 }, { "epoch": 2.3456777451604296, "grad_norm": 0.25459738553951666, "learning_rate": 5.009848058972359e-06, "loss": 0.4288, "step": 14283 }, { "epoch": 2.3458419723687722, "grad_norm": 0.34130323628216985, "learning_rate": 5.009382103777093e-06, "loss": 0.4415, "step": 14284 }, { "epoch": 2.346006199577115, "grad_norm": 0.31855931505260354, "learning_rate": 5.008916143049439e-06, "loss": 0.4409, "step": 14285 }, { "epoch": 2.3461704267854575, "grad_norm": 0.529587397164376, "learning_rate": 5.008450176794455e-06, "loss": 0.4339, "step": 14286 }, { "epoch": 2.3463346539938006, "grad_norm": 0.30583306940272903, "learning_rate": 5.0079842050172e-06, "loss": 0.4358, "step": 14287 }, { "epoch": 2.3464988812021432, "grad_norm": 0.3376306795943431, "learning_rate": 5.007518227722738e-06, "loss": 0.4744, "step": 14288 }, { "epoch": 2.346663108410486, "grad_norm": 0.3799763220578188, "learning_rate": 5.007052244916129e-06, "loss": 0.4431, "step": 14289 }, { "epoch": 2.3468273356188285, "grad_norm": 0.2766437623030712, "learning_rate": 5.006586256602433e-06, "loss": 0.4062, "step": 14290 }, { "epoch": 2.3469915628271716, "grad_norm": 0.3172427330468487, "learning_rate": 5.006120262786712e-06, "loss": 0.4506, "step": 14291 }, { "epoch": 2.3471557900355142, "grad_norm": 0.370368306164017, "learning_rate": 5.005654263474026e-06, "loss": 0.4444, "step": 14292 }, { "epoch": 2.347320017243857, "grad_norm": 0.5424344754056988, "learning_rate": 5.0051882586694384e-06, "loss": 0.4411, "step": 14293 }, { "epoch": 2.3474842444521995, "grad_norm": 0.37539884285858194, "learning_rate": 5.004722248378009e-06, "loss": 0.4245, "step": 14294 }, { "epoch": 2.347648471660542, "grad_norm": 0.317396319175305, "learning_rate": 5.0042562326048e-06, "loss": 0.4444, "step": 14295 }, { "epoch": 2.347812698868885, "grad_norm": 0.28224187924061256, "learning_rate": 5.003790211354872e-06, "loss": 0.4551, "step": 14296 }, { "epoch": 2.347976926077228, "grad_norm": 0.3263896166098868, "learning_rate": 5.003324184633286e-06, "loss": 0.4499, "step": 14297 }, { "epoch": 2.3481411532855705, "grad_norm": 0.3283510564766459, "learning_rate": 5.002858152445104e-06, "loss": 0.439, "step": 14298 }, { "epoch": 2.3483053804939136, "grad_norm": 0.34528668929370976, "learning_rate": 5.002392114795388e-06, "loss": 0.4474, "step": 14299 }, { "epoch": 2.348469607702256, "grad_norm": 0.3675402895714062, "learning_rate": 5.0019260716892e-06, "loss": 0.466, "step": 14300 }, { "epoch": 2.348633834910599, "grad_norm": 0.32888646075463274, "learning_rate": 5.001460023131601e-06, "loss": 0.4308, "step": 14301 }, { "epoch": 2.3487980621189415, "grad_norm": 0.34194545071114996, "learning_rate": 5.000993969127652e-06, "loss": 0.4381, "step": 14302 }, { "epoch": 2.348962289327284, "grad_norm": 0.3481120091692677, "learning_rate": 5.000527909682415e-06, "loss": 0.4281, "step": 14303 }, { "epoch": 2.349126516535627, "grad_norm": 0.30426600918544544, "learning_rate": 5.000061844800953e-06, "loss": 0.4242, "step": 14304 }, { "epoch": 2.34929074374397, "grad_norm": 0.33042610605754225, "learning_rate": 4.9995957744883286e-06, "loss": 0.4362, "step": 14305 }, { "epoch": 2.3494549709523125, "grad_norm": 0.6139370838276929, "learning_rate": 4.999129698749602e-06, "loss": 0.4475, "step": 14306 }, { "epoch": 2.349619198160655, "grad_norm": 0.3091523983404541, "learning_rate": 4.998663617589835e-06, "loss": 0.4586, "step": 14307 }, { "epoch": 2.349783425368998, "grad_norm": 0.3671883433413459, "learning_rate": 4.998197531014091e-06, "loss": 0.4205, "step": 14308 }, { "epoch": 2.349947652577341, "grad_norm": 0.28508625616699385, "learning_rate": 4.997731439027432e-06, "loss": 0.4259, "step": 14309 }, { "epoch": 2.3501118797856835, "grad_norm": 0.33190313156947665, "learning_rate": 4.99726534163492e-06, "loss": 0.4272, "step": 14310 }, { "epoch": 2.350276106994026, "grad_norm": 0.3684075426938841, "learning_rate": 4.996799238841616e-06, "loss": 0.4357, "step": 14311 }, { "epoch": 2.3504403342023688, "grad_norm": 0.29104654696695337, "learning_rate": 4.9963331306525834e-06, "loss": 0.4507, "step": 14312 }, { "epoch": 2.350604561410712, "grad_norm": 0.45342829692727055, "learning_rate": 4.995867017072885e-06, "loss": 0.4348, "step": 14313 }, { "epoch": 2.3507687886190545, "grad_norm": 0.28462488186154083, "learning_rate": 4.995400898107584e-06, "loss": 0.4408, "step": 14314 }, { "epoch": 2.350933015827397, "grad_norm": 0.32996492095223623, "learning_rate": 4.994934773761742e-06, "loss": 0.455, "step": 14315 }, { "epoch": 2.35109724303574, "grad_norm": 0.4413132936257363, "learning_rate": 4.994468644040419e-06, "loss": 0.4453, "step": 14316 }, { "epoch": 2.351261470244083, "grad_norm": 0.287439219083887, "learning_rate": 4.994002508948682e-06, "loss": 0.45, "step": 14317 }, { "epoch": 2.3514256974524255, "grad_norm": 0.6687016558497426, "learning_rate": 4.993536368491592e-06, "loss": 0.4322, "step": 14318 }, { "epoch": 2.351589924660768, "grad_norm": 0.31833933331295167, "learning_rate": 4.99307022267421e-06, "loss": 0.4233, "step": 14319 }, { "epoch": 2.3517541518691107, "grad_norm": 0.2936523952712435, "learning_rate": 4.992604071501601e-06, "loss": 0.424, "step": 14320 }, { "epoch": 2.351918379077454, "grad_norm": 0.29199165743680827, "learning_rate": 4.992137914978827e-06, "loss": 0.4367, "step": 14321 }, { "epoch": 2.3520826062857965, "grad_norm": 0.38049347369291886, "learning_rate": 4.991671753110952e-06, "loss": 0.4562, "step": 14322 }, { "epoch": 2.352246833494139, "grad_norm": 0.28514980201093715, "learning_rate": 4.991205585903037e-06, "loss": 0.4403, "step": 14323 }, { "epoch": 2.3524110607024817, "grad_norm": 0.29063242226173736, "learning_rate": 4.990739413360147e-06, "loss": 0.4173, "step": 14324 }, { "epoch": 2.352575287910825, "grad_norm": 0.33956667259069695, "learning_rate": 4.990273235487343e-06, "loss": 0.4395, "step": 14325 }, { "epoch": 2.3527395151191675, "grad_norm": 0.29213608629516113, "learning_rate": 4.9898070522896885e-06, "loss": 0.436, "step": 14326 }, { "epoch": 2.35290374232751, "grad_norm": 0.3184200244453, "learning_rate": 4.9893408637722504e-06, "loss": 0.4421, "step": 14327 }, { "epoch": 2.3530679695358527, "grad_norm": 0.9054970444536435, "learning_rate": 4.988874669940086e-06, "loss": 0.4359, "step": 14328 }, { "epoch": 2.3532321967441954, "grad_norm": 0.8009512337096192, "learning_rate": 4.988408470798264e-06, "loss": 0.4652, "step": 14329 }, { "epoch": 2.3533964239525385, "grad_norm": 0.3000048361872114, "learning_rate": 4.987942266351845e-06, "loss": 0.4389, "step": 14330 }, { "epoch": 2.353560651160881, "grad_norm": 0.4435834381481093, "learning_rate": 4.987476056605892e-06, "loss": 0.4462, "step": 14331 }, { "epoch": 2.3537248783692237, "grad_norm": 0.31409223317024465, "learning_rate": 4.987009841565469e-06, "loss": 0.4288, "step": 14332 }, { "epoch": 2.353889105577567, "grad_norm": 0.3775543340666963, "learning_rate": 4.986543621235641e-06, "loss": 0.4687, "step": 14333 }, { "epoch": 2.3540533327859094, "grad_norm": 0.3154684698397416, "learning_rate": 4.986077395621471e-06, "loss": 0.4511, "step": 14334 }, { "epoch": 2.354217559994252, "grad_norm": 0.33377923891010136, "learning_rate": 4.985611164728022e-06, "loss": 0.4495, "step": 14335 }, { "epoch": 2.3543817872025947, "grad_norm": 0.38798308412223753, "learning_rate": 4.9851449285603575e-06, "loss": 0.4527, "step": 14336 }, { "epoch": 2.3545460144109374, "grad_norm": 0.3595666147784764, "learning_rate": 4.984678687123542e-06, "loss": 0.4423, "step": 14337 }, { "epoch": 2.3547102416192804, "grad_norm": 0.35797627990956515, "learning_rate": 4.984212440422639e-06, "loss": 0.4325, "step": 14338 }, { "epoch": 2.354874468827623, "grad_norm": 0.29402622902463466, "learning_rate": 4.9837461884627134e-06, "loss": 0.4498, "step": 14339 }, { "epoch": 2.3550386960359657, "grad_norm": 0.3619177067733234, "learning_rate": 4.983279931248827e-06, "loss": 0.4209, "step": 14340 }, { "epoch": 2.3552029232443084, "grad_norm": 0.3469217107065268, "learning_rate": 4.982813668786045e-06, "loss": 0.4301, "step": 14341 }, { "epoch": 2.3553671504526514, "grad_norm": 0.2869461199173222, "learning_rate": 4.982347401079432e-06, "loss": 0.4353, "step": 14342 }, { "epoch": 2.355531377660994, "grad_norm": 0.3310294155726151, "learning_rate": 4.981881128134052e-06, "loss": 0.4306, "step": 14343 }, { "epoch": 2.3556956048693367, "grad_norm": 0.44544791162175035, "learning_rate": 4.981414849954969e-06, "loss": 0.4457, "step": 14344 }, { "epoch": 2.3558598320776793, "grad_norm": 0.2821848497782474, "learning_rate": 4.980948566547246e-06, "loss": 0.4283, "step": 14345 }, { "epoch": 2.356024059286022, "grad_norm": 0.2899346904371948, "learning_rate": 4.980482277915948e-06, "loss": 0.4384, "step": 14346 }, { "epoch": 2.356188286494365, "grad_norm": 0.28895582908267986, "learning_rate": 4.9800159840661416e-06, "loss": 0.4425, "step": 14347 }, { "epoch": 2.3563525137027077, "grad_norm": 0.32261166832480315, "learning_rate": 4.979549685002888e-06, "loss": 0.4473, "step": 14348 }, { "epoch": 2.3565167409110503, "grad_norm": 0.41278684584248443, "learning_rate": 4.979083380731254e-06, "loss": 0.4489, "step": 14349 }, { "epoch": 2.3566809681193934, "grad_norm": 0.36717664735562605, "learning_rate": 4.978617071256302e-06, "loss": 0.4333, "step": 14350 }, { "epoch": 2.356845195327736, "grad_norm": 0.2871702500281055, "learning_rate": 4.978150756583098e-06, "loss": 0.4314, "step": 14351 }, { "epoch": 2.3570094225360787, "grad_norm": 0.296831622491759, "learning_rate": 4.977684436716707e-06, "loss": 0.4459, "step": 14352 }, { "epoch": 2.3571736497444213, "grad_norm": 0.3152784713613363, "learning_rate": 4.977218111662193e-06, "loss": 0.4313, "step": 14353 }, { "epoch": 2.357337876952764, "grad_norm": 0.3179123907686836, "learning_rate": 4.97675178142462e-06, "loss": 0.4383, "step": 14354 }, { "epoch": 2.357502104161107, "grad_norm": 0.33097652269284605, "learning_rate": 4.976285446009053e-06, "loss": 0.4608, "step": 14355 }, { "epoch": 2.3576663313694497, "grad_norm": 0.26370984506069994, "learning_rate": 4.9758191054205595e-06, "loss": 0.4194, "step": 14356 }, { "epoch": 2.3578305585777923, "grad_norm": 0.3503675341705753, "learning_rate": 4.975352759664201e-06, "loss": 0.4318, "step": 14357 }, { "epoch": 2.357994785786135, "grad_norm": 0.26075769429226026, "learning_rate": 4.974886408745045e-06, "loss": 0.4337, "step": 14358 }, { "epoch": 2.358159012994478, "grad_norm": 0.3167737749473058, "learning_rate": 4.9744200526681545e-06, "loss": 0.4472, "step": 14359 }, { "epoch": 2.3583232402028207, "grad_norm": 0.3267289682222068, "learning_rate": 4.973953691438595e-06, "loss": 0.4489, "step": 14360 }, { "epoch": 2.3584874674111633, "grad_norm": 0.48223262088179, "learning_rate": 4.973487325061433e-06, "loss": 0.4413, "step": 14361 }, { "epoch": 2.358651694619506, "grad_norm": 0.29004525308874013, "learning_rate": 4.973020953541732e-06, "loss": 0.4247, "step": 14362 }, { "epoch": 2.3588159218278486, "grad_norm": 0.30160171868034047, "learning_rate": 4.972554576884559e-06, "loss": 0.448, "step": 14363 }, { "epoch": 2.3589801490361917, "grad_norm": 0.41944901184902755, "learning_rate": 4.972088195094978e-06, "loss": 0.4512, "step": 14364 }, { "epoch": 2.3591443762445343, "grad_norm": 0.28947942760816986, "learning_rate": 4.971621808178054e-06, "loss": 0.4422, "step": 14365 }, { "epoch": 2.359308603452877, "grad_norm": 0.3238543328135968, "learning_rate": 4.971155416138853e-06, "loss": 0.4397, "step": 14366 }, { "epoch": 2.35947283066122, "grad_norm": 0.3703052518219256, "learning_rate": 4.970689018982442e-06, "loss": 0.4297, "step": 14367 }, { "epoch": 2.3596370578695627, "grad_norm": 0.32595968322039665, "learning_rate": 4.9702226167138855e-06, "loss": 0.4132, "step": 14368 }, { "epoch": 2.3598012850779053, "grad_norm": 0.36003670628504997, "learning_rate": 4.9697562093382475e-06, "loss": 0.4519, "step": 14369 }, { "epoch": 2.359965512286248, "grad_norm": 0.35066351988470934, "learning_rate": 4.969289796860595e-06, "loss": 0.4557, "step": 14370 }, { "epoch": 2.3601297394945906, "grad_norm": 0.28461795043565574, "learning_rate": 4.968823379285993e-06, "loss": 0.4465, "step": 14371 }, { "epoch": 2.3602939667029337, "grad_norm": 0.3349794950571723, "learning_rate": 4.96835695661951e-06, "loss": 0.4298, "step": 14372 }, { "epoch": 2.3604581939112763, "grad_norm": 0.4623462596162397, "learning_rate": 4.967890528866209e-06, "loss": 0.4376, "step": 14373 }, { "epoch": 2.360622421119619, "grad_norm": 0.3045711511063072, "learning_rate": 4.967424096031155e-06, "loss": 0.4281, "step": 14374 }, { "epoch": 2.3607866483279616, "grad_norm": 0.28236555126313645, "learning_rate": 4.966957658119415e-06, "loss": 0.4293, "step": 14375 }, { "epoch": 2.3609508755363047, "grad_norm": 0.329845338092337, "learning_rate": 4.966491215136056e-06, "loss": 0.4448, "step": 14376 }, { "epoch": 2.3611151027446473, "grad_norm": 0.2780889848515211, "learning_rate": 4.966024767086145e-06, "loss": 0.4521, "step": 14377 }, { "epoch": 2.36127932995299, "grad_norm": 0.7223244736692684, "learning_rate": 4.965558313974746e-06, "loss": 0.4267, "step": 14378 }, { "epoch": 2.3614435571613326, "grad_norm": 0.29452548364476305, "learning_rate": 4.965091855806925e-06, "loss": 0.4378, "step": 14379 }, { "epoch": 2.3616077843696752, "grad_norm": 0.34535556397656414, "learning_rate": 4.964625392587749e-06, "loss": 0.4374, "step": 14380 }, { "epoch": 2.3617720115780183, "grad_norm": 0.28214696649413246, "learning_rate": 4.9641589243222845e-06, "loss": 0.4398, "step": 14381 }, { "epoch": 2.361936238786361, "grad_norm": 0.9998617351395512, "learning_rate": 4.963692451015597e-06, "loss": 0.4421, "step": 14382 }, { "epoch": 2.3621004659947036, "grad_norm": 0.31297145796805065, "learning_rate": 4.963225972672753e-06, "loss": 0.4216, "step": 14383 }, { "epoch": 2.3622646932030467, "grad_norm": 0.2939100139357004, "learning_rate": 4.96275948929882e-06, "loss": 0.4648, "step": 14384 }, { "epoch": 2.3624289204113893, "grad_norm": 0.530158610934547, "learning_rate": 4.962293000898864e-06, "loss": 0.4314, "step": 14385 }, { "epoch": 2.362593147619732, "grad_norm": 0.40901577717616516, "learning_rate": 4.9618265074779496e-06, "loss": 0.428, "step": 14386 }, { "epoch": 2.3627573748280746, "grad_norm": 0.27364877261831405, "learning_rate": 4.961360009041146e-06, "loss": 0.44, "step": 14387 }, { "epoch": 2.362921602036417, "grad_norm": 0.30209721956900093, "learning_rate": 4.9608935055935175e-06, "loss": 0.4211, "step": 14388 }, { "epoch": 2.3630858292447603, "grad_norm": 0.39013537477111404, "learning_rate": 4.960426997140134e-06, "loss": 0.4411, "step": 14389 }, { "epoch": 2.363250056453103, "grad_norm": 0.5139156781012, "learning_rate": 4.959960483686059e-06, "loss": 0.4373, "step": 14390 }, { "epoch": 2.3634142836614456, "grad_norm": 0.3278145517121621, "learning_rate": 4.959493965236361e-06, "loss": 0.4433, "step": 14391 }, { "epoch": 2.363578510869788, "grad_norm": 0.3005983703147699, "learning_rate": 4.959027441796107e-06, "loss": 0.4535, "step": 14392 }, { "epoch": 2.3637427380781313, "grad_norm": 0.4589025460002175, "learning_rate": 4.958560913370363e-06, "loss": 0.4363, "step": 14393 }, { "epoch": 2.363906965286474, "grad_norm": 0.504401185692525, "learning_rate": 4.958094379964196e-06, "loss": 0.4448, "step": 14394 }, { "epoch": 2.3640711924948166, "grad_norm": 0.29769659114726227, "learning_rate": 4.9576278415826725e-06, "loss": 0.4592, "step": 14395 }, { "epoch": 2.364235419703159, "grad_norm": 0.29314450078630233, "learning_rate": 4.957161298230861e-06, "loss": 0.4562, "step": 14396 }, { "epoch": 2.364399646911502, "grad_norm": 0.33090458937336886, "learning_rate": 4.956694749913829e-06, "loss": 0.4701, "step": 14397 }, { "epoch": 2.364563874119845, "grad_norm": 0.2934499635019847, "learning_rate": 4.956228196636643e-06, "loss": 0.4375, "step": 14398 }, { "epoch": 2.3647281013281876, "grad_norm": 0.3996609817040107, "learning_rate": 4.9557616384043685e-06, "loss": 0.4257, "step": 14399 }, { "epoch": 2.36489232853653, "grad_norm": 0.32228752842073993, "learning_rate": 4.955295075222074e-06, "loss": 0.4337, "step": 14400 }, { "epoch": 2.3650565557448733, "grad_norm": 0.3656720599456349, "learning_rate": 4.954828507094828e-06, "loss": 0.4355, "step": 14401 }, { "epoch": 2.365220782953216, "grad_norm": 0.31683870295473615, "learning_rate": 4.954361934027697e-06, "loss": 0.4442, "step": 14402 }, { "epoch": 2.3653850101615586, "grad_norm": 0.3107408972347142, "learning_rate": 4.953895356025748e-06, "loss": 0.4561, "step": 14403 }, { "epoch": 2.365549237369901, "grad_norm": 0.32621837651394164, "learning_rate": 4.9534287730940486e-06, "loss": 0.4338, "step": 14404 }, { "epoch": 2.365713464578244, "grad_norm": 0.29890532466434366, "learning_rate": 4.952962185237667e-06, "loss": 0.4527, "step": 14405 }, { "epoch": 2.365877691786587, "grad_norm": 0.5040022006465182, "learning_rate": 4.952495592461671e-06, "loss": 0.4354, "step": 14406 }, { "epoch": 2.3660419189949295, "grad_norm": 0.31470278545685143, "learning_rate": 4.952028994771127e-06, "loss": 0.4347, "step": 14407 }, { "epoch": 2.366206146203272, "grad_norm": 0.3173507245055464, "learning_rate": 4.951562392171103e-06, "loss": 0.4437, "step": 14408 }, { "epoch": 2.366370373411615, "grad_norm": 0.3154547169516024, "learning_rate": 4.9510957846666665e-06, "loss": 0.4601, "step": 14409 }, { "epoch": 2.366534600619958, "grad_norm": 0.38068921310440645, "learning_rate": 4.950629172262888e-06, "loss": 0.4304, "step": 14410 }, { "epoch": 2.3666988278283005, "grad_norm": 0.4921138013809312, "learning_rate": 4.9501625549648315e-06, "loss": 0.4571, "step": 14411 }, { "epoch": 2.366863055036643, "grad_norm": 0.3367350539856582, "learning_rate": 4.949695932777568e-06, "loss": 0.4411, "step": 14412 }, { "epoch": 2.367027282244986, "grad_norm": 0.2890413534719377, "learning_rate": 4.949229305706163e-06, "loss": 0.4387, "step": 14413 }, { "epoch": 2.3671915094533285, "grad_norm": 0.29384243153256306, "learning_rate": 4.948762673755688e-06, "loss": 0.4309, "step": 14414 }, { "epoch": 2.3673557366616715, "grad_norm": 0.30973836082355344, "learning_rate": 4.948296036931206e-06, "loss": 0.4332, "step": 14415 }, { "epoch": 2.367519963870014, "grad_norm": 0.3154947522688878, "learning_rate": 4.947829395237789e-06, "loss": 0.4325, "step": 14416 }, { "epoch": 2.367684191078357, "grad_norm": 0.3468663049157948, "learning_rate": 4.947362748680506e-06, "loss": 0.4446, "step": 14417 }, { "epoch": 2.3678484182867, "grad_norm": 0.6625064405457736, "learning_rate": 4.946896097264421e-06, "loss": 0.4359, "step": 14418 }, { "epoch": 2.3680126454950425, "grad_norm": 0.36755003431616734, "learning_rate": 4.946429440994606e-06, "loss": 0.4501, "step": 14419 }, { "epoch": 2.368176872703385, "grad_norm": 0.3180908616830341, "learning_rate": 4.945962779876127e-06, "loss": 0.4367, "step": 14420 }, { "epoch": 2.368341099911728, "grad_norm": 0.2969125348286323, "learning_rate": 4.945496113914055e-06, "loss": 0.4397, "step": 14421 }, { "epoch": 2.3685053271200704, "grad_norm": 0.3484937349702599, "learning_rate": 4.945029443113455e-06, "loss": 0.4477, "step": 14422 }, { "epoch": 2.3686695543284135, "grad_norm": 0.3364151217982416, "learning_rate": 4.9445627674794e-06, "loss": 0.4408, "step": 14423 }, { "epoch": 2.368833781536756, "grad_norm": 0.27632261732359437, "learning_rate": 4.944096087016953e-06, "loss": 0.4327, "step": 14424 }, { "epoch": 2.368998008745099, "grad_norm": 0.33643617461694814, "learning_rate": 4.943629401731187e-06, "loss": 0.4327, "step": 14425 }, { "epoch": 2.3691622359534414, "grad_norm": 0.251371038402946, "learning_rate": 4.94316271162717e-06, "loss": 0.4295, "step": 14426 }, { "epoch": 2.3693264631617845, "grad_norm": 0.273291389563221, "learning_rate": 4.942696016709969e-06, "loss": 0.4571, "step": 14427 }, { "epoch": 2.369490690370127, "grad_norm": 0.3027602849916757, "learning_rate": 4.942229316984654e-06, "loss": 0.4303, "step": 14428 }, { "epoch": 2.36965491757847, "grad_norm": 0.29359944961044193, "learning_rate": 4.941762612456292e-06, "loss": 0.447, "step": 14429 }, { "epoch": 2.3698191447868124, "grad_norm": 0.30796589557839593, "learning_rate": 4.941295903129954e-06, "loss": 0.4462, "step": 14430 }, { "epoch": 2.369983371995155, "grad_norm": 0.3321031226177786, "learning_rate": 4.94082918901071e-06, "loss": 0.442, "step": 14431 }, { "epoch": 2.370147599203498, "grad_norm": 0.28573543373725724, "learning_rate": 4.940362470103627e-06, "loss": 0.4416, "step": 14432 }, { "epoch": 2.370311826411841, "grad_norm": 0.34980867774696045, "learning_rate": 4.939895746413773e-06, "loss": 0.4405, "step": 14433 }, { "epoch": 2.3704760536201834, "grad_norm": 0.38809888263301406, "learning_rate": 4.939429017946218e-06, "loss": 0.4441, "step": 14434 }, { "epoch": 2.3706402808285265, "grad_norm": 0.2776636961242803, "learning_rate": 4.938962284706034e-06, "loss": 0.4449, "step": 14435 }, { "epoch": 2.370804508036869, "grad_norm": 0.31336049708314334, "learning_rate": 4.9384955466982855e-06, "loss": 0.4198, "step": 14436 }, { "epoch": 2.370968735245212, "grad_norm": 0.2897266594367285, "learning_rate": 4.938028803928044e-06, "loss": 0.4437, "step": 14437 }, { "epoch": 2.3711329624535544, "grad_norm": 0.3676946069702938, "learning_rate": 4.93756205640038e-06, "loss": 0.4314, "step": 14438 }, { "epoch": 2.371297189661897, "grad_norm": 0.29396140174936525, "learning_rate": 4.937095304120362e-06, "loss": 0.4426, "step": 14439 }, { "epoch": 2.37146141687024, "grad_norm": 0.3154282077143417, "learning_rate": 4.936628547093057e-06, "loss": 0.4423, "step": 14440 }, { "epoch": 2.371625644078583, "grad_norm": 0.5470785694493393, "learning_rate": 4.936161785323538e-06, "loss": 0.4369, "step": 14441 }, { "epoch": 2.3717898712869254, "grad_norm": 0.2680266468948304, "learning_rate": 4.935695018816872e-06, "loss": 0.4333, "step": 14442 }, { "epoch": 2.371954098495268, "grad_norm": 0.310046436229158, "learning_rate": 4.935228247578129e-06, "loss": 0.4326, "step": 14443 }, { "epoch": 2.372118325703611, "grad_norm": 0.4633103707884507, "learning_rate": 4.93476147161238e-06, "loss": 0.4542, "step": 14444 }, { "epoch": 2.372282552911954, "grad_norm": 0.4442241811118408, "learning_rate": 4.9342946909246935e-06, "loss": 0.439, "step": 14445 }, { "epoch": 2.3724467801202964, "grad_norm": 0.3754637769732102, "learning_rate": 4.933827905520139e-06, "loss": 0.4583, "step": 14446 }, { "epoch": 2.372611007328639, "grad_norm": 0.2746592828941314, "learning_rate": 4.933361115403787e-06, "loss": 0.4348, "step": 14447 }, { "epoch": 2.3727752345369817, "grad_norm": 0.48162906768990915, "learning_rate": 4.932894320580707e-06, "loss": 0.4588, "step": 14448 }, { "epoch": 2.3729394617453248, "grad_norm": 0.27825006803186564, "learning_rate": 4.9324275210559675e-06, "loss": 0.4456, "step": 14449 }, { "epoch": 2.3731036889536674, "grad_norm": 0.6205579010136238, "learning_rate": 4.931960716834641e-06, "loss": 0.4453, "step": 14450 }, { "epoch": 2.37326791616201, "grad_norm": 0.3124084196903969, "learning_rate": 4.931493907921796e-06, "loss": 0.4307, "step": 14451 }, { "epoch": 2.373432143370353, "grad_norm": 0.3028093135665966, "learning_rate": 4.931027094322503e-06, "loss": 0.436, "step": 14452 }, { "epoch": 2.3735963705786958, "grad_norm": 0.3348730023292204, "learning_rate": 4.93056027604183e-06, "loss": 0.4355, "step": 14453 }, { "epoch": 2.3737605977870384, "grad_norm": 0.33042531193006286, "learning_rate": 4.93009345308485e-06, "loss": 0.4485, "step": 14454 }, { "epoch": 2.373924824995381, "grad_norm": 0.28904118773468906, "learning_rate": 4.929626625456633e-06, "loss": 0.437, "step": 14455 }, { "epoch": 2.3740890522037237, "grad_norm": 0.5665194365436337, "learning_rate": 4.929159793162247e-06, "loss": 0.4356, "step": 14456 }, { "epoch": 2.3742532794120668, "grad_norm": 0.3139144028595357, "learning_rate": 4.9286929562067635e-06, "loss": 0.4295, "step": 14457 }, { "epoch": 2.3744175066204094, "grad_norm": 0.3337755379913973, "learning_rate": 4.928226114595252e-06, "loss": 0.4425, "step": 14458 }, { "epoch": 2.374581733828752, "grad_norm": 0.6960305232843772, "learning_rate": 4.927759268332783e-06, "loss": 0.4401, "step": 14459 }, { "epoch": 2.3747459610370947, "grad_norm": 0.3636555112169848, "learning_rate": 4.927292417424429e-06, "loss": 0.4307, "step": 14460 }, { "epoch": 2.3749101882454378, "grad_norm": 0.4151227793689786, "learning_rate": 4.92682556187526e-06, "loss": 0.4312, "step": 14461 }, { "epoch": 2.3750744154537804, "grad_norm": 0.3043352830410617, "learning_rate": 4.926358701690343e-06, "loss": 0.4435, "step": 14462 }, { "epoch": 2.375238642662123, "grad_norm": 0.41972681854749694, "learning_rate": 4.925891836874751e-06, "loss": 0.4381, "step": 14463 }, { "epoch": 2.3754028698704657, "grad_norm": 0.3183437051748933, "learning_rate": 4.925424967433557e-06, "loss": 0.4492, "step": 14464 }, { "epoch": 2.3755670970788083, "grad_norm": 0.3825905561550247, "learning_rate": 4.924958093371828e-06, "loss": 0.4457, "step": 14465 }, { "epoch": 2.3757313242871514, "grad_norm": 0.3127106608442882, "learning_rate": 4.924491214694636e-06, "loss": 0.4438, "step": 14466 }, { "epoch": 2.375895551495494, "grad_norm": 0.30704550035120054, "learning_rate": 4.924024331407051e-06, "loss": 0.4409, "step": 14467 }, { "epoch": 2.3760597787038367, "grad_norm": 0.5557837596183262, "learning_rate": 4.923557443514145e-06, "loss": 0.4268, "step": 14468 }, { "epoch": 2.3762240059121797, "grad_norm": 0.31056296034006503, "learning_rate": 4.92309055102099e-06, "loss": 0.4226, "step": 14469 }, { "epoch": 2.3763882331205224, "grad_norm": 0.3614259977141636, "learning_rate": 4.922623653932655e-06, "loss": 0.437, "step": 14470 }, { "epoch": 2.376552460328865, "grad_norm": 0.32688326087771474, "learning_rate": 4.92215675225421e-06, "loss": 0.4247, "step": 14471 }, { "epoch": 2.3767166875372077, "grad_norm": 0.29109959383026757, "learning_rate": 4.921689845990726e-06, "loss": 0.4389, "step": 14472 }, { "epoch": 2.3768809147455503, "grad_norm": 0.3201231594951292, "learning_rate": 4.921222935147279e-06, "loss": 0.4474, "step": 14473 }, { "epoch": 2.3770451419538934, "grad_norm": 0.30953462437114765, "learning_rate": 4.920756019728934e-06, "loss": 0.4296, "step": 14474 }, { "epoch": 2.377209369162236, "grad_norm": 0.33172813843029597, "learning_rate": 4.9202890997407656e-06, "loss": 0.4333, "step": 14475 }, { "epoch": 2.3773735963705787, "grad_norm": 0.2837245728085035, "learning_rate": 4.9198221751878435e-06, "loss": 0.4304, "step": 14476 }, { "epoch": 2.3775378235789213, "grad_norm": 0.36359957078420707, "learning_rate": 4.919355246075241e-06, "loss": 0.4302, "step": 14477 }, { "epoch": 2.3777020507872644, "grad_norm": 0.3465557420820277, "learning_rate": 4.918888312408026e-06, "loss": 0.4385, "step": 14478 }, { "epoch": 2.377866277995607, "grad_norm": 0.28285182301127754, "learning_rate": 4.918421374191272e-06, "loss": 0.4361, "step": 14479 }, { "epoch": 2.3780305052039497, "grad_norm": 0.2794892656742042, "learning_rate": 4.917954431430051e-06, "loss": 0.4353, "step": 14480 }, { "epoch": 2.3781947324122923, "grad_norm": 0.9462095011637344, "learning_rate": 4.917487484129434e-06, "loss": 0.442, "step": 14481 }, { "epoch": 2.378358959620635, "grad_norm": 0.34605196349121137, "learning_rate": 4.917020532294491e-06, "loss": 0.4431, "step": 14482 }, { "epoch": 2.378523186828978, "grad_norm": 0.29326419873734, "learning_rate": 4.916553575930295e-06, "loss": 0.429, "step": 14483 }, { "epoch": 2.3786874140373206, "grad_norm": 0.32590839117850195, "learning_rate": 4.9160866150419185e-06, "loss": 0.4518, "step": 14484 }, { "epoch": 2.3788516412456633, "grad_norm": 0.3073546322282407, "learning_rate": 4.9156196496344315e-06, "loss": 0.4276, "step": 14485 }, { "epoch": 2.3790158684540064, "grad_norm": 0.36654859711759064, "learning_rate": 4.915152679712905e-06, "loss": 0.4439, "step": 14486 }, { "epoch": 2.379180095662349, "grad_norm": 0.3038320472398642, "learning_rate": 4.914685705282413e-06, "loss": 0.4282, "step": 14487 }, { "epoch": 2.3793443228706916, "grad_norm": 0.34343378873985786, "learning_rate": 4.914218726348026e-06, "loss": 0.4538, "step": 14488 }, { "epoch": 2.3795085500790343, "grad_norm": 0.30341262594748736, "learning_rate": 4.913751742914817e-06, "loss": 0.4309, "step": 14489 }, { "epoch": 2.379672777287377, "grad_norm": 0.29250488553478204, "learning_rate": 4.913284754987856e-06, "loss": 0.4135, "step": 14490 }, { "epoch": 2.37983700449572, "grad_norm": 0.27498243268008893, "learning_rate": 4.912817762572216e-06, "loss": 0.4461, "step": 14491 }, { "epoch": 2.3800012317040626, "grad_norm": 0.34557233743510063, "learning_rate": 4.912350765672968e-06, "loss": 0.419, "step": 14492 }, { "epoch": 2.3801654589124053, "grad_norm": 0.2647786611846677, "learning_rate": 4.911883764295186e-06, "loss": 0.4275, "step": 14493 }, { "epoch": 2.380329686120748, "grad_norm": 0.3375971662260146, "learning_rate": 4.911416758443941e-06, "loss": 0.4321, "step": 14494 }, { "epoch": 2.380493913329091, "grad_norm": 0.29484030285466734, "learning_rate": 4.910949748124306e-06, "loss": 0.4441, "step": 14495 }, { "epoch": 2.3806581405374336, "grad_norm": 0.3515144629906051, "learning_rate": 4.9104827333413515e-06, "loss": 0.4307, "step": 14496 }, { "epoch": 2.3808223677457763, "grad_norm": 0.2737921671106952, "learning_rate": 4.91001571410015e-06, "loss": 0.4262, "step": 14497 }, { "epoch": 2.380986594954119, "grad_norm": 0.3125825530773969, "learning_rate": 4.909548690405777e-06, "loss": 0.4474, "step": 14498 }, { "epoch": 2.3811508221624615, "grad_norm": 0.3069349138335879, "learning_rate": 4.909081662263299e-06, "loss": 0.4183, "step": 14499 }, { "epoch": 2.3813150493708046, "grad_norm": 0.31701743882648536, "learning_rate": 4.908614629677794e-06, "loss": 0.4235, "step": 14500 }, { "epoch": 2.3814792765791473, "grad_norm": 0.34108602787338077, "learning_rate": 4.908147592654332e-06, "loss": 0.4231, "step": 14501 }, { "epoch": 2.38164350378749, "grad_norm": 0.3935501178826395, "learning_rate": 4.9076805511979845e-06, "loss": 0.4539, "step": 14502 }, { "epoch": 2.381807730995833, "grad_norm": 0.3131609275378316, "learning_rate": 4.907213505313825e-06, "loss": 0.4473, "step": 14503 }, { "epoch": 2.3819719582041756, "grad_norm": 0.3002432702582786, "learning_rate": 4.9067464550069275e-06, "loss": 0.442, "step": 14504 }, { "epoch": 2.3821361854125183, "grad_norm": 0.3096378279868265, "learning_rate": 4.906279400282362e-06, "loss": 0.4402, "step": 14505 }, { "epoch": 2.382300412620861, "grad_norm": 0.2763473957324622, "learning_rate": 4.905812341145204e-06, "loss": 0.4298, "step": 14506 }, { "epoch": 2.3824646398292035, "grad_norm": 0.26518791364843874, "learning_rate": 4.905345277600524e-06, "loss": 0.4342, "step": 14507 }, { "epoch": 2.3826288670375466, "grad_norm": 0.2906595974916181, "learning_rate": 4.904878209653394e-06, "loss": 0.4429, "step": 14508 }, { "epoch": 2.3827930942458893, "grad_norm": 0.3391787092663081, "learning_rate": 4.904411137308889e-06, "loss": 0.4355, "step": 14509 }, { "epoch": 2.382957321454232, "grad_norm": 0.300445441947826, "learning_rate": 4.9039440605720834e-06, "loss": 0.4338, "step": 14510 }, { "epoch": 2.3831215486625745, "grad_norm": 0.26723588598079895, "learning_rate": 4.903476979448045e-06, "loss": 0.4346, "step": 14511 }, { "epoch": 2.3832857758709176, "grad_norm": 0.2838691958214916, "learning_rate": 4.903009893941851e-06, "loss": 0.4504, "step": 14512 }, { "epoch": 2.3834500030792602, "grad_norm": 0.36319610087457616, "learning_rate": 4.902542804058573e-06, "loss": 0.4413, "step": 14513 }, { "epoch": 2.383614230287603, "grad_norm": 0.4271662136062928, "learning_rate": 4.902075709803284e-06, "loss": 0.4456, "step": 14514 }, { "epoch": 2.3837784574959455, "grad_norm": 0.5264160203486998, "learning_rate": 4.901608611181057e-06, "loss": 0.4405, "step": 14515 }, { "epoch": 2.383942684704288, "grad_norm": 0.28902641518996575, "learning_rate": 4.901141508196965e-06, "loss": 0.4243, "step": 14516 }, { "epoch": 2.3841069119126312, "grad_norm": 0.28998258912696195, "learning_rate": 4.900674400856082e-06, "loss": 0.4389, "step": 14517 }, { "epoch": 2.384271139120974, "grad_norm": 0.297485830160264, "learning_rate": 4.900207289163482e-06, "loss": 0.4473, "step": 14518 }, { "epoch": 2.3844353663293165, "grad_norm": 0.32857178706650425, "learning_rate": 4.899740173124236e-06, "loss": 0.437, "step": 14519 }, { "epoch": 2.3845995935376596, "grad_norm": 0.33976806970867174, "learning_rate": 4.899273052743418e-06, "loss": 0.439, "step": 14520 }, { "epoch": 2.3847638207460022, "grad_norm": 0.2981178461359611, "learning_rate": 4.898805928026102e-06, "loss": 0.4224, "step": 14521 }, { "epoch": 2.384928047954345, "grad_norm": 0.40828126605470355, "learning_rate": 4.8983387989773605e-06, "loss": 0.4499, "step": 14522 }, { "epoch": 2.3850922751626875, "grad_norm": 0.28611327371513295, "learning_rate": 4.8978716656022686e-06, "loss": 0.4221, "step": 14523 }, { "epoch": 2.38525650237103, "grad_norm": 0.39824063682909056, "learning_rate": 4.8974045279059e-06, "loss": 0.4388, "step": 14524 }, { "epoch": 2.3854207295793732, "grad_norm": 0.5609924737582214, "learning_rate": 4.896937385893327e-06, "loss": 0.4526, "step": 14525 }, { "epoch": 2.385584956787716, "grad_norm": 0.31889911235677965, "learning_rate": 4.896470239569622e-06, "loss": 0.4425, "step": 14526 }, { "epoch": 2.3857491839960585, "grad_norm": 0.302289456244296, "learning_rate": 4.8960030889398605e-06, "loss": 0.4016, "step": 14527 }, { "epoch": 2.385913411204401, "grad_norm": 0.3105265703326025, "learning_rate": 4.895535934009116e-06, "loss": 0.4446, "step": 14528 }, { "epoch": 2.3860776384127442, "grad_norm": 0.29037798400142933, "learning_rate": 4.895068774782463e-06, "loss": 0.4385, "step": 14529 }, { "epoch": 2.386241865621087, "grad_norm": 0.3254765727629465, "learning_rate": 4.894601611264973e-06, "loss": 0.4645, "step": 14530 }, { "epoch": 2.3864060928294295, "grad_norm": 0.28695733641227344, "learning_rate": 4.894134443461723e-06, "loss": 0.4496, "step": 14531 }, { "epoch": 2.386570320037772, "grad_norm": 0.31091703634070156, "learning_rate": 4.893667271377783e-06, "loss": 0.4481, "step": 14532 }, { "epoch": 2.386734547246115, "grad_norm": 0.3169028926886554, "learning_rate": 4.8932000950182316e-06, "loss": 0.4358, "step": 14533 }, { "epoch": 2.386898774454458, "grad_norm": 0.32378659995173825, "learning_rate": 4.892732914388138e-06, "loss": 0.4216, "step": 14534 }, { "epoch": 2.3870630016628005, "grad_norm": 0.3424600596134925, "learning_rate": 4.89226572949258e-06, "loss": 0.4347, "step": 14535 }, { "epoch": 2.387227228871143, "grad_norm": 0.4083489609261254, "learning_rate": 4.891798540336628e-06, "loss": 0.4335, "step": 14536 }, { "epoch": 2.387391456079486, "grad_norm": 0.31418734811658605, "learning_rate": 4.891331346925361e-06, "loss": 0.4284, "step": 14537 }, { "epoch": 2.387555683287829, "grad_norm": 0.3118148361256724, "learning_rate": 4.89086414926385e-06, "loss": 0.4376, "step": 14538 }, { "epoch": 2.3877199104961715, "grad_norm": 0.3205728376077856, "learning_rate": 4.890396947357169e-06, "loss": 0.4489, "step": 14539 }, { "epoch": 2.387884137704514, "grad_norm": 0.3037510041274979, "learning_rate": 4.889929741210394e-06, "loss": 0.4459, "step": 14540 }, { "epoch": 2.3880483649128568, "grad_norm": 0.29716183603278645, "learning_rate": 4.889462530828597e-06, "loss": 0.4372, "step": 14541 }, { "epoch": 2.3882125921212, "grad_norm": 0.29372871219067426, "learning_rate": 4.888995316216855e-06, "loss": 0.4338, "step": 14542 }, { "epoch": 2.3883768193295425, "grad_norm": 0.31376207464816375, "learning_rate": 4.888528097380241e-06, "loss": 0.4438, "step": 14543 }, { "epoch": 2.388541046537885, "grad_norm": 0.3638794588880243, "learning_rate": 4.88806087432383e-06, "loss": 0.4374, "step": 14544 }, { "epoch": 2.3887052737462278, "grad_norm": 0.4157192070799767, "learning_rate": 4.8875936470526956e-06, "loss": 0.4471, "step": 14545 }, { "epoch": 2.388869500954571, "grad_norm": 0.4629312247969477, "learning_rate": 4.887126415571912e-06, "loss": 0.4498, "step": 14546 }, { "epoch": 2.3890337281629135, "grad_norm": 0.35641292496732957, "learning_rate": 4.886659179886555e-06, "loss": 0.4136, "step": 14547 }, { "epoch": 2.389197955371256, "grad_norm": 0.29823085621147055, "learning_rate": 4.886191940001701e-06, "loss": 0.4372, "step": 14548 }, { "epoch": 2.3893621825795988, "grad_norm": 0.3648444197028791, "learning_rate": 4.88572469592242e-06, "loss": 0.4408, "step": 14549 }, { "epoch": 2.3895264097879414, "grad_norm": 0.3131996192986303, "learning_rate": 4.88525744765379e-06, "loss": 0.4271, "step": 14550 }, { "epoch": 2.3896906369962845, "grad_norm": 0.9393732568333811, "learning_rate": 4.884790195200884e-06, "loss": 0.4277, "step": 14551 }, { "epoch": 2.389854864204627, "grad_norm": 0.49807294959646065, "learning_rate": 4.88432293856878e-06, "loss": 0.4137, "step": 14552 }, { "epoch": 2.3900190914129698, "grad_norm": 0.2795714012023261, "learning_rate": 4.88385567776255e-06, "loss": 0.426, "step": 14553 }, { "epoch": 2.390183318621313, "grad_norm": 0.3680107524133055, "learning_rate": 4.883388412787269e-06, "loss": 0.4374, "step": 14554 }, { "epoch": 2.3903475458296555, "grad_norm": 0.2576176856027463, "learning_rate": 4.882921143648013e-06, "loss": 0.4269, "step": 14555 }, { "epoch": 2.390511773037998, "grad_norm": 0.33334827288723884, "learning_rate": 4.882453870349858e-06, "loss": 0.4424, "step": 14556 }, { "epoch": 2.3906760002463407, "grad_norm": 0.32855118523344423, "learning_rate": 4.881986592897875e-06, "loss": 0.4258, "step": 14557 }, { "epoch": 2.3908402274546834, "grad_norm": 0.2641498222991579, "learning_rate": 4.881519311297145e-06, "loss": 0.4335, "step": 14558 }, { "epoch": 2.3910044546630265, "grad_norm": 0.30007217089842214, "learning_rate": 4.881052025552737e-06, "loss": 0.4448, "step": 14559 }, { "epoch": 2.391168681871369, "grad_norm": 0.2652220391149274, "learning_rate": 4.880584735669731e-06, "loss": 0.4102, "step": 14560 }, { "epoch": 2.3913329090797117, "grad_norm": 0.7023406690762278, "learning_rate": 4.880117441653199e-06, "loss": 0.4472, "step": 14561 }, { "epoch": 2.3914971362880544, "grad_norm": 0.3832103540577619, "learning_rate": 4.879650143508217e-06, "loss": 0.443, "step": 14562 }, { "epoch": 2.3916613634963975, "grad_norm": 0.3768401551082294, "learning_rate": 4.879182841239863e-06, "loss": 0.4443, "step": 14563 }, { "epoch": 2.39182559070474, "grad_norm": 0.529999664933227, "learning_rate": 4.87871553485321e-06, "loss": 0.4524, "step": 14564 }, { "epoch": 2.3919898179130827, "grad_norm": 0.5284450836711834, "learning_rate": 4.878248224353334e-06, "loss": 0.4441, "step": 14565 }, { "epoch": 2.3921540451214254, "grad_norm": 0.32265336090808, "learning_rate": 4.877780909745308e-06, "loss": 0.4271, "step": 14566 }, { "epoch": 2.392318272329768, "grad_norm": 0.30649852459360966, "learning_rate": 4.8773135910342105e-06, "loss": 0.4307, "step": 14567 }, { "epoch": 2.392482499538111, "grad_norm": 0.34600620366711066, "learning_rate": 4.876846268225117e-06, "loss": 0.426, "step": 14568 }, { "epoch": 2.3926467267464537, "grad_norm": 0.2898991988895646, "learning_rate": 4.876378941323102e-06, "loss": 0.4361, "step": 14569 }, { "epoch": 2.3928109539547964, "grad_norm": 0.31657080709066865, "learning_rate": 4.87591161033324e-06, "loss": 0.4468, "step": 14570 }, { "epoch": 2.392975181163139, "grad_norm": 0.3682597438220124, "learning_rate": 4.875444275260609e-06, "loss": 0.4413, "step": 14571 }, { "epoch": 2.393139408371482, "grad_norm": 0.2818996138587926, "learning_rate": 4.8749769361102855e-06, "loss": 0.4281, "step": 14572 }, { "epoch": 2.3933036355798247, "grad_norm": 0.2760481393412478, "learning_rate": 4.874509592887342e-06, "loss": 0.4633, "step": 14573 }, { "epoch": 2.3934678627881674, "grad_norm": 0.3090199962692466, "learning_rate": 4.874042245596856e-06, "loss": 0.4345, "step": 14574 }, { "epoch": 2.39363208999651, "grad_norm": 0.3753236126182619, "learning_rate": 4.873574894243902e-06, "loss": 0.4334, "step": 14575 }, { "epoch": 2.393796317204853, "grad_norm": 0.49666749402603405, "learning_rate": 4.873107538833558e-06, "loss": 0.4138, "step": 14576 }, { "epoch": 2.3939605444131957, "grad_norm": 0.44873693999064757, "learning_rate": 4.8726401793709e-06, "loss": 0.4385, "step": 14577 }, { "epoch": 2.3941247716215384, "grad_norm": 0.5624188771958173, "learning_rate": 4.872172815861003e-06, "loss": 0.4535, "step": 14578 }, { "epoch": 2.394288998829881, "grad_norm": 0.36776524540715916, "learning_rate": 4.871705448308942e-06, "loss": 0.4431, "step": 14579 }, { "epoch": 2.394453226038224, "grad_norm": 0.40112504999237575, "learning_rate": 4.871238076719794e-06, "loss": 0.4004, "step": 14580 }, { "epoch": 2.3946174532465667, "grad_norm": 0.38816955768952405, "learning_rate": 4.8707707010986365e-06, "loss": 0.444, "step": 14581 }, { "epoch": 2.3947816804549094, "grad_norm": 0.40890090055450345, "learning_rate": 4.870303321450544e-06, "loss": 0.4244, "step": 14582 }, { "epoch": 2.394945907663252, "grad_norm": 0.347335496686221, "learning_rate": 4.869835937780592e-06, "loss": 0.4351, "step": 14583 }, { "epoch": 2.3951101348715946, "grad_norm": 0.37816795611763476, "learning_rate": 4.869368550093859e-06, "loss": 0.4281, "step": 14584 }, { "epoch": 2.3952743620799377, "grad_norm": 0.31915528359769213, "learning_rate": 4.868901158395418e-06, "loss": 0.4397, "step": 14585 }, { "epoch": 2.3954385892882804, "grad_norm": 0.33747799228179265, "learning_rate": 4.86843376269035e-06, "loss": 0.4453, "step": 14586 }, { "epoch": 2.395602816496623, "grad_norm": 0.3983997690577079, "learning_rate": 4.867966362983728e-06, "loss": 0.4484, "step": 14587 }, { "epoch": 2.3957670437049656, "grad_norm": 0.475030543908353, "learning_rate": 4.86749895928063e-06, "loss": 0.428, "step": 14588 }, { "epoch": 2.3959312709133087, "grad_norm": 0.3346654030306633, "learning_rate": 4.86703155158613e-06, "loss": 0.4342, "step": 14589 }, { "epoch": 2.3960954981216513, "grad_norm": 0.286620871577916, "learning_rate": 4.866564139905308e-06, "loss": 0.4243, "step": 14590 }, { "epoch": 2.396259725329994, "grad_norm": 0.3188961526696404, "learning_rate": 4.866096724243238e-06, "loss": 0.4302, "step": 14591 }, { "epoch": 2.3964239525383366, "grad_norm": 0.4410066533907783, "learning_rate": 4.8656293046049976e-06, "loss": 0.4246, "step": 14592 }, { "epoch": 2.3965881797466797, "grad_norm": 0.30105566284819324, "learning_rate": 4.865161880995663e-06, "loss": 0.4263, "step": 14593 }, { "epoch": 2.3967524069550223, "grad_norm": 0.30798326084830907, "learning_rate": 4.864694453420312e-06, "loss": 0.447, "step": 14594 }, { "epoch": 2.396916634163365, "grad_norm": 0.32150354987246904, "learning_rate": 4.864227021884018e-06, "loss": 0.4431, "step": 14595 }, { "epoch": 2.3970808613717076, "grad_norm": 0.42606120355256905, "learning_rate": 4.863759586391862e-06, "loss": 0.4228, "step": 14596 }, { "epoch": 2.3972450885800507, "grad_norm": 0.3043593996027058, "learning_rate": 4.863292146948919e-06, "loss": 0.4576, "step": 14597 }, { "epoch": 2.3974093157883933, "grad_norm": 0.24487691882867033, "learning_rate": 4.862824703560266e-06, "loss": 0.4333, "step": 14598 }, { "epoch": 2.397573542996736, "grad_norm": 0.4344466671080413, "learning_rate": 4.862357256230979e-06, "loss": 0.4571, "step": 14599 }, { "epoch": 2.3977377702050786, "grad_norm": 0.4551198744757983, "learning_rate": 4.861889804966136e-06, "loss": 0.4434, "step": 14600 }, { "epoch": 2.3979019974134212, "grad_norm": 0.3458326007684332, "learning_rate": 4.861422349770814e-06, "loss": 0.413, "step": 14601 }, { "epoch": 2.3980662246217643, "grad_norm": 0.28749146184798846, "learning_rate": 4.8609548906500895e-06, "loss": 0.4352, "step": 14602 }, { "epoch": 2.398230451830107, "grad_norm": 0.3190568605296688, "learning_rate": 4.860487427609039e-06, "loss": 0.412, "step": 14603 }, { "epoch": 2.3983946790384496, "grad_norm": 0.30187148550272713, "learning_rate": 4.860019960652741e-06, "loss": 0.438, "step": 14604 }, { "epoch": 2.3985589062467922, "grad_norm": 0.3017561582254539, "learning_rate": 4.859552489786272e-06, "loss": 0.4505, "step": 14605 }, { "epoch": 2.3987231334551353, "grad_norm": 1.293891871086484, "learning_rate": 4.85908501501471e-06, "loss": 0.469, "step": 14606 }, { "epoch": 2.398887360663478, "grad_norm": 0.5743909815937656, "learning_rate": 4.858617536343131e-06, "loss": 0.4301, "step": 14607 }, { "epoch": 2.3990515878718206, "grad_norm": 0.30495791593097715, "learning_rate": 4.858150053776612e-06, "loss": 0.4623, "step": 14608 }, { "epoch": 2.3992158150801632, "grad_norm": 0.34780932489427824, "learning_rate": 4.857682567320231e-06, "loss": 0.4429, "step": 14609 }, { "epoch": 2.3993800422885063, "grad_norm": 0.3017423370784942, "learning_rate": 4.857215076979065e-06, "loss": 0.4366, "step": 14610 }, { "epoch": 2.399544269496849, "grad_norm": 0.49134203031797363, "learning_rate": 4.856747582758193e-06, "loss": 0.4387, "step": 14611 }, { "epoch": 2.3997084967051916, "grad_norm": 0.36562730185281356, "learning_rate": 4.856280084662692e-06, "loss": 0.4644, "step": 14612 }, { "epoch": 2.3998727239135342, "grad_norm": 0.3097029681533732, "learning_rate": 4.855812582697637e-06, "loss": 0.4408, "step": 14613 }, { "epoch": 2.4000369511218773, "grad_norm": 0.38873300419101187, "learning_rate": 4.8553450768681075e-06, "loss": 0.4431, "step": 14614 }, { "epoch": 2.40020117833022, "grad_norm": 0.31761409622079567, "learning_rate": 4.854877567179182e-06, "loss": 0.4001, "step": 14615 }, { "epoch": 2.4003654055385626, "grad_norm": 0.2777570280076414, "learning_rate": 4.8544100536359375e-06, "loss": 0.4245, "step": 14616 }, { "epoch": 2.4005296327469052, "grad_norm": 0.31920044070247267, "learning_rate": 4.853942536243449e-06, "loss": 0.4412, "step": 14617 }, { "epoch": 2.400693859955248, "grad_norm": 0.37106495638871273, "learning_rate": 4.8534750150067965e-06, "loss": 0.4423, "step": 14618 }, { "epoch": 2.400858087163591, "grad_norm": 0.30136802472197904, "learning_rate": 4.853007489931059e-06, "loss": 0.4284, "step": 14619 }, { "epoch": 2.4010223143719336, "grad_norm": 0.34743551515882315, "learning_rate": 4.8525399610213115e-06, "loss": 0.4357, "step": 14620 }, { "epoch": 2.4011865415802762, "grad_norm": 0.4630662332483221, "learning_rate": 4.852072428282635e-06, "loss": 0.4507, "step": 14621 }, { "epoch": 2.401350768788619, "grad_norm": 0.34421273907801864, "learning_rate": 4.851604891720104e-06, "loss": 0.4261, "step": 14622 }, { "epoch": 2.401514995996962, "grad_norm": 0.30429866073920575, "learning_rate": 4.851137351338798e-06, "loss": 0.4375, "step": 14623 }, { "epoch": 2.4016792232053046, "grad_norm": 0.30710285649408753, "learning_rate": 4.850669807143795e-06, "loss": 0.4438, "step": 14624 }, { "epoch": 2.401843450413647, "grad_norm": 0.3111504370306694, "learning_rate": 4.850202259140173e-06, "loss": 0.4482, "step": 14625 }, { "epoch": 2.40200767762199, "grad_norm": 0.2772359140975464, "learning_rate": 4.84973470733301e-06, "loss": 0.4459, "step": 14626 }, { "epoch": 2.402171904830333, "grad_norm": 0.34639818656776106, "learning_rate": 4.849267151727385e-06, "loss": 0.4418, "step": 14627 }, { "epoch": 2.4023361320386756, "grad_norm": 0.2964347781930256, "learning_rate": 4.848799592328374e-06, "loss": 0.4443, "step": 14628 }, { "epoch": 2.402500359247018, "grad_norm": 0.33595929206815733, "learning_rate": 4.848332029141055e-06, "loss": 0.4301, "step": 14629 }, { "epoch": 2.402664586455361, "grad_norm": 0.4015804960132361, "learning_rate": 4.847864462170509e-06, "loss": 0.4376, "step": 14630 }, { "epoch": 2.402828813663704, "grad_norm": 0.36583852333102235, "learning_rate": 4.847396891421814e-06, "loss": 0.4408, "step": 14631 }, { "epoch": 2.4029930408720466, "grad_norm": 0.3243934301895329, "learning_rate": 4.8469293169000455e-06, "loss": 0.432, "step": 14632 }, { "epoch": 2.403157268080389, "grad_norm": 1.1242199526297239, "learning_rate": 4.846461738610282e-06, "loss": 0.4221, "step": 14633 }, { "epoch": 2.403321495288732, "grad_norm": 0.6124803657632618, "learning_rate": 4.845994156557604e-06, "loss": 0.4373, "step": 14634 }, { "epoch": 2.4034857224970745, "grad_norm": 0.31726548703950835, "learning_rate": 4.84552657074709e-06, "loss": 0.4681, "step": 14635 }, { "epoch": 2.4036499497054176, "grad_norm": 0.3402830482709174, "learning_rate": 4.845058981183817e-06, "loss": 0.4427, "step": 14636 }, { "epoch": 2.40381417691376, "grad_norm": 0.41415711718620096, "learning_rate": 4.8445913878728644e-06, "loss": 0.4509, "step": 14637 }, { "epoch": 2.403978404122103, "grad_norm": 0.30062268597490205, "learning_rate": 4.844123790819309e-06, "loss": 0.4277, "step": 14638 }, { "epoch": 2.4041426313304455, "grad_norm": 0.3777055667239158, "learning_rate": 4.84365619002823e-06, "loss": 0.4244, "step": 14639 }, { "epoch": 2.4043068585387886, "grad_norm": 0.35823953526949825, "learning_rate": 4.84318858550471e-06, "loss": 0.4382, "step": 14640 }, { "epoch": 2.404471085747131, "grad_norm": 0.29672629622495195, "learning_rate": 4.842720977253822e-06, "loss": 0.4321, "step": 14641 }, { "epoch": 2.404635312955474, "grad_norm": 0.4199981609070766, "learning_rate": 4.842253365280647e-06, "loss": 0.4396, "step": 14642 }, { "epoch": 2.4047995401638165, "grad_norm": 0.3244624973865936, "learning_rate": 4.841785749590264e-06, "loss": 0.4583, "step": 14643 }, { "epoch": 2.4049637673721596, "grad_norm": 0.30771806534999885, "learning_rate": 4.841318130187752e-06, "loss": 0.4359, "step": 14644 }, { "epoch": 2.405127994580502, "grad_norm": 0.29862090519592716, "learning_rate": 4.840850507078189e-06, "loss": 0.4414, "step": 14645 }, { "epoch": 2.405292221788845, "grad_norm": 0.32810428176479883, "learning_rate": 4.840382880266654e-06, "loss": 0.4249, "step": 14646 }, { "epoch": 2.4054564489971875, "grad_norm": 0.36740941744035893, "learning_rate": 4.8399152497582255e-06, "loss": 0.4569, "step": 14647 }, { "epoch": 2.4056206762055306, "grad_norm": 0.2634679213019516, "learning_rate": 4.839447615557984e-06, "loss": 0.4461, "step": 14648 }, { "epoch": 2.405784903413873, "grad_norm": 0.29881940701178017, "learning_rate": 4.838979977671007e-06, "loss": 0.4378, "step": 14649 }, { "epoch": 2.405949130622216, "grad_norm": 0.29574009739461327, "learning_rate": 4.838512336102374e-06, "loss": 0.4135, "step": 14650 }, { "epoch": 2.4061133578305585, "grad_norm": 0.30183737594772203, "learning_rate": 4.838044690857163e-06, "loss": 0.4418, "step": 14651 }, { "epoch": 2.406277585038901, "grad_norm": 0.3247926430668289, "learning_rate": 4.8375770419404566e-06, "loss": 0.4605, "step": 14652 }, { "epoch": 2.406441812247244, "grad_norm": 0.2958133357958267, "learning_rate": 4.837109389357329e-06, "loss": 0.4294, "step": 14653 }, { "epoch": 2.406606039455587, "grad_norm": 0.36391652836693766, "learning_rate": 4.836641733112861e-06, "loss": 0.4277, "step": 14654 }, { "epoch": 2.4067702666639295, "grad_norm": 0.332486098628157, "learning_rate": 4.836174073212136e-06, "loss": 0.4281, "step": 14655 }, { "epoch": 2.406934493872272, "grad_norm": 0.31970043225608835, "learning_rate": 4.835706409660227e-06, "loss": 0.4152, "step": 14656 }, { "epoch": 2.407098721080615, "grad_norm": 0.38834894755127186, "learning_rate": 4.8352387424622174e-06, "loss": 0.4234, "step": 14657 }, { "epoch": 2.407262948288958, "grad_norm": 0.4903854226706532, "learning_rate": 4.834771071623184e-06, "loss": 0.4418, "step": 14658 }, { "epoch": 2.4074271754973005, "grad_norm": 0.36363960384397537, "learning_rate": 4.834303397148208e-06, "loss": 0.4549, "step": 14659 }, { "epoch": 2.407591402705643, "grad_norm": 0.29685954160347816, "learning_rate": 4.8338357190423684e-06, "loss": 0.4276, "step": 14660 }, { "epoch": 2.407755629913986, "grad_norm": 0.29239727092336887, "learning_rate": 4.833368037310746e-06, "loss": 0.4477, "step": 14661 }, { "epoch": 2.407919857122329, "grad_norm": 0.25791649206692546, "learning_rate": 4.832900351958416e-06, "loss": 0.4322, "step": 14662 }, { "epoch": 2.4080840843306714, "grad_norm": 0.3088996194131839, "learning_rate": 4.832432662990462e-06, "loss": 0.4291, "step": 14663 }, { "epoch": 2.408248311539014, "grad_norm": 0.41234587740782985, "learning_rate": 4.831964970411962e-06, "loss": 0.4531, "step": 14664 }, { "epoch": 2.408412538747357, "grad_norm": 0.28658592688301243, "learning_rate": 4.831497274227996e-06, "loss": 0.4164, "step": 14665 }, { "epoch": 2.4085767659557, "grad_norm": 0.340510853960804, "learning_rate": 4.831029574443644e-06, "loss": 0.4475, "step": 14666 }, { "epoch": 2.4087409931640424, "grad_norm": 0.28144928044547207, "learning_rate": 4.830561871063983e-06, "loss": 0.4171, "step": 14667 }, { "epoch": 2.408905220372385, "grad_norm": 0.28529132479570535, "learning_rate": 4.830094164094096e-06, "loss": 0.4457, "step": 14668 }, { "epoch": 2.4090694475807277, "grad_norm": 0.3332582267058933, "learning_rate": 4.829626453539062e-06, "loss": 0.4128, "step": 14669 }, { "epoch": 2.409233674789071, "grad_norm": 0.3058023319027211, "learning_rate": 4.829158739403962e-06, "loss": 0.4351, "step": 14670 }, { "epoch": 2.4093979019974134, "grad_norm": 0.294349833850524, "learning_rate": 4.828691021693872e-06, "loss": 0.4613, "step": 14671 }, { "epoch": 2.409562129205756, "grad_norm": 0.45108829225684566, "learning_rate": 4.828223300413873e-06, "loss": 0.4235, "step": 14672 }, { "epoch": 2.4097263564140987, "grad_norm": 0.29688811430073514, "learning_rate": 4.8277555755690495e-06, "loss": 0.45, "step": 14673 }, { "epoch": 2.409890583622442, "grad_norm": 0.3621360599742629, "learning_rate": 4.827287847164475e-06, "loss": 0.4473, "step": 14674 }, { "epoch": 2.4100548108307844, "grad_norm": 0.27637854458418865, "learning_rate": 4.826820115205234e-06, "loss": 0.4417, "step": 14675 }, { "epoch": 2.410219038039127, "grad_norm": 0.5931964755976143, "learning_rate": 4.8263523796964045e-06, "loss": 0.4292, "step": 14676 }, { "epoch": 2.4103832652474697, "grad_norm": 0.27390405093577597, "learning_rate": 4.825884640643067e-06, "loss": 0.4545, "step": 14677 }, { "epoch": 2.410547492455813, "grad_norm": 0.397743748353218, "learning_rate": 4.825416898050302e-06, "loss": 0.4375, "step": 14678 }, { "epoch": 2.4107117196641554, "grad_norm": 0.31255784691419425, "learning_rate": 4.82494915192319e-06, "loss": 0.4449, "step": 14679 }, { "epoch": 2.410875946872498, "grad_norm": 1.4403416299574465, "learning_rate": 4.824481402266809e-06, "loss": 0.442, "step": 14680 }, { "epoch": 2.4110401740808407, "grad_norm": 0.33922366749113997, "learning_rate": 4.82401364908624e-06, "loss": 0.4288, "step": 14681 }, { "epoch": 2.411204401289184, "grad_norm": 0.31757509275868, "learning_rate": 4.823545892386567e-06, "loss": 0.4422, "step": 14682 }, { "epoch": 2.4113686284975264, "grad_norm": 0.304788723585256, "learning_rate": 4.823078132172865e-06, "loss": 0.4255, "step": 14683 }, { "epoch": 2.411532855705869, "grad_norm": 0.27335390554437755, "learning_rate": 4.822610368450218e-06, "loss": 0.4283, "step": 14684 }, { "epoch": 2.4116970829142117, "grad_norm": 0.3469198590453585, "learning_rate": 4.822142601223704e-06, "loss": 0.4274, "step": 14685 }, { "epoch": 2.4118613101225543, "grad_norm": 0.3807364827960831, "learning_rate": 4.8216748304984055e-06, "loss": 0.4338, "step": 14686 }, { "epoch": 2.4120255373308974, "grad_norm": 0.3371271371858553, "learning_rate": 4.8212070562794e-06, "loss": 0.435, "step": 14687 }, { "epoch": 2.41218976453924, "grad_norm": 0.43622776633348503, "learning_rate": 4.820739278571771e-06, "loss": 0.4447, "step": 14688 }, { "epoch": 2.4123539917475827, "grad_norm": 0.334119379770926, "learning_rate": 4.820271497380598e-06, "loss": 0.4353, "step": 14689 }, { "epoch": 2.4125182189559253, "grad_norm": 0.41180417575499706, "learning_rate": 4.819803712710961e-06, "loss": 0.4331, "step": 14690 }, { "epoch": 2.4126824461642684, "grad_norm": 0.31571247354022247, "learning_rate": 4.819335924567942e-06, "loss": 0.4396, "step": 14691 }, { "epoch": 2.412846673372611, "grad_norm": 0.34680247785096713, "learning_rate": 4.818868132956619e-06, "loss": 0.4331, "step": 14692 }, { "epoch": 2.4130109005809537, "grad_norm": 0.3080440473620233, "learning_rate": 4.818400337882075e-06, "loss": 0.43, "step": 14693 }, { "epoch": 2.4131751277892963, "grad_norm": 0.2817006501283312, "learning_rate": 4.8179325393493906e-06, "loss": 0.449, "step": 14694 }, { "epoch": 2.4133393549976394, "grad_norm": 0.3112562541286652, "learning_rate": 4.817464737363646e-06, "loss": 0.4373, "step": 14695 }, { "epoch": 2.413503582205982, "grad_norm": 0.3130396013840511, "learning_rate": 4.816996931929922e-06, "loss": 0.4451, "step": 14696 }, { "epoch": 2.4136678094143247, "grad_norm": 0.32321218750431113, "learning_rate": 4.816529123053298e-06, "loss": 0.4302, "step": 14697 }, { "epoch": 2.4138320366226673, "grad_norm": 0.3698459892322004, "learning_rate": 4.816061310738859e-06, "loss": 0.4359, "step": 14698 }, { "epoch": 2.4139962638310104, "grad_norm": 0.42413402293263597, "learning_rate": 4.815593494991681e-06, "loss": 0.4245, "step": 14699 }, { "epoch": 2.414160491039353, "grad_norm": 0.30761605557733, "learning_rate": 4.815125675816848e-06, "loss": 0.4256, "step": 14700 }, { "epoch": 2.4143247182476957, "grad_norm": 0.24302570900623616, "learning_rate": 4.81465785321944e-06, "loss": 0.4306, "step": 14701 }, { "epoch": 2.4144889454560383, "grad_norm": 0.2927736876723968, "learning_rate": 4.814190027204536e-06, "loss": 0.4504, "step": 14702 }, { "epoch": 2.414653172664381, "grad_norm": 0.38682221463914745, "learning_rate": 4.813722197777223e-06, "loss": 0.4254, "step": 14703 }, { "epoch": 2.414817399872724, "grad_norm": 0.29190540997776415, "learning_rate": 4.813254364942578e-06, "loss": 0.4435, "step": 14704 }, { "epoch": 2.4149816270810667, "grad_norm": 0.300206389002567, "learning_rate": 4.81278652870568e-06, "loss": 0.4327, "step": 14705 }, { "epoch": 2.4151458542894093, "grad_norm": 0.39121257578811, "learning_rate": 4.812318689071613e-06, "loss": 0.4254, "step": 14706 }, { "epoch": 2.415310081497752, "grad_norm": 0.2755867218892501, "learning_rate": 4.811850846045459e-06, "loss": 0.4484, "step": 14707 }, { "epoch": 2.415474308706095, "grad_norm": 0.3882210233624939, "learning_rate": 4.811382999632297e-06, "loss": 0.4283, "step": 14708 }, { "epoch": 2.4156385359144377, "grad_norm": 0.33194414181235643, "learning_rate": 4.81091514983721e-06, "loss": 0.4329, "step": 14709 }, { "epoch": 2.4158027631227803, "grad_norm": 0.5485778450887616, "learning_rate": 4.810447296665278e-06, "loss": 0.4398, "step": 14710 }, { "epoch": 2.415966990331123, "grad_norm": 0.2714483130812466, "learning_rate": 4.809979440121583e-06, "loss": 0.4438, "step": 14711 }, { "epoch": 2.416131217539466, "grad_norm": 0.442827826944641, "learning_rate": 4.8095115802112055e-06, "loss": 0.4292, "step": 14712 }, { "epoch": 2.4162954447478087, "grad_norm": 0.2997106074840441, "learning_rate": 4.809043716939229e-06, "loss": 0.43, "step": 14713 }, { "epoch": 2.4164596719561513, "grad_norm": 0.3101144702754857, "learning_rate": 4.8085758503107335e-06, "loss": 0.4637, "step": 14714 }, { "epoch": 2.416623899164494, "grad_norm": 0.4345488387599415, "learning_rate": 4.8081079803308006e-06, "loss": 0.441, "step": 14715 }, { "epoch": 2.416788126372837, "grad_norm": 0.31524736528031577, "learning_rate": 4.807640107004511e-06, "loss": 0.4385, "step": 14716 }, { "epoch": 2.4169523535811797, "grad_norm": 0.9910231052912541, "learning_rate": 4.807172230336947e-06, "loss": 0.451, "step": 14717 }, { "epoch": 2.4171165807895223, "grad_norm": 0.353751030084181, "learning_rate": 4.806704350333191e-06, "loss": 0.4298, "step": 14718 }, { "epoch": 2.417280807997865, "grad_norm": 0.2960552918752241, "learning_rate": 4.8062364669983255e-06, "loss": 0.4353, "step": 14719 }, { "epoch": 2.4174450352062076, "grad_norm": 0.44033970780659726, "learning_rate": 4.80576858033743e-06, "loss": 0.4103, "step": 14720 }, { "epoch": 2.4176092624145507, "grad_norm": 0.37219487130298756, "learning_rate": 4.8053006903555846e-06, "loss": 0.4263, "step": 14721 }, { "epoch": 2.4177734896228933, "grad_norm": 0.30714372461167905, "learning_rate": 4.804832797057875e-06, "loss": 0.4256, "step": 14722 }, { "epoch": 2.417937716831236, "grad_norm": 0.31272508721749476, "learning_rate": 4.804364900449382e-06, "loss": 0.4506, "step": 14723 }, { "epoch": 2.4181019440395786, "grad_norm": 0.4030508319923359, "learning_rate": 4.803897000535186e-06, "loss": 0.4464, "step": 14724 }, { "epoch": 2.4182661712479216, "grad_norm": 0.3141823854058452, "learning_rate": 4.8034290973203695e-06, "loss": 0.4415, "step": 14725 }, { "epoch": 2.4184303984562643, "grad_norm": 0.33834956229051, "learning_rate": 4.802961190810014e-06, "loss": 0.4135, "step": 14726 }, { "epoch": 2.418594625664607, "grad_norm": 0.33520231537538203, "learning_rate": 4.802493281009202e-06, "loss": 0.4389, "step": 14727 }, { "epoch": 2.4187588528729496, "grad_norm": 0.289427403704886, "learning_rate": 4.802025367923017e-06, "loss": 0.4206, "step": 14728 }, { "epoch": 2.4189230800812926, "grad_norm": 0.31902463019262406, "learning_rate": 4.801557451556538e-06, "loss": 0.4171, "step": 14729 }, { "epoch": 2.4190873072896353, "grad_norm": 0.34344005937323363, "learning_rate": 4.801089531914847e-06, "loss": 0.4319, "step": 14730 }, { "epoch": 2.419251534497978, "grad_norm": 0.3326226745260404, "learning_rate": 4.80062160900303e-06, "loss": 0.4359, "step": 14731 }, { "epoch": 2.4194157617063206, "grad_norm": 0.34417344186097304, "learning_rate": 4.800153682826166e-06, "loss": 0.4375, "step": 14732 }, { "epoch": 2.4195799889146636, "grad_norm": 0.2824968263701154, "learning_rate": 4.799685753389338e-06, "loss": 0.4317, "step": 14733 }, { "epoch": 2.4197442161230063, "grad_norm": 0.4136191893113875, "learning_rate": 4.799217820697626e-06, "loss": 0.4159, "step": 14734 }, { "epoch": 2.419908443331349, "grad_norm": 0.31603338795216673, "learning_rate": 4.7987498847561146e-06, "loss": 0.4325, "step": 14735 }, { "epoch": 2.4200726705396916, "grad_norm": 0.34601978001013006, "learning_rate": 4.798281945569887e-06, "loss": 0.4193, "step": 14736 }, { "epoch": 2.420236897748034, "grad_norm": 0.31763167069909876, "learning_rate": 4.797814003144023e-06, "loss": 0.4432, "step": 14737 }, { "epoch": 2.4204011249563773, "grad_norm": 0.47743166016080013, "learning_rate": 4.797346057483606e-06, "loss": 0.4225, "step": 14738 }, { "epoch": 2.42056535216472, "grad_norm": 0.36547044513866017, "learning_rate": 4.796878108593718e-06, "loss": 0.4388, "step": 14739 }, { "epoch": 2.4207295793730625, "grad_norm": 0.3777551355717173, "learning_rate": 4.796410156479443e-06, "loss": 0.4363, "step": 14740 }, { "epoch": 2.420893806581405, "grad_norm": 0.35697617648628965, "learning_rate": 4.79594220114586e-06, "loss": 0.4224, "step": 14741 }, { "epoch": 2.4210580337897483, "grad_norm": 0.3862378017848256, "learning_rate": 4.795474242598054e-06, "loss": 0.4375, "step": 14742 }, { "epoch": 2.421222260998091, "grad_norm": 0.30417337392006183, "learning_rate": 4.7950062808411085e-06, "loss": 0.4333, "step": 14743 }, { "epoch": 2.4213864882064335, "grad_norm": 0.42732171860737683, "learning_rate": 4.794538315880103e-06, "loss": 0.4333, "step": 14744 }, { "epoch": 2.421550715414776, "grad_norm": 0.31445632031102666, "learning_rate": 4.7940703477201225e-06, "loss": 0.4304, "step": 14745 }, { "epoch": 2.4217149426231193, "grad_norm": 0.6221104220450133, "learning_rate": 4.793602376366248e-06, "loss": 0.4356, "step": 14746 }, { "epoch": 2.421879169831462, "grad_norm": 0.32177085663706345, "learning_rate": 4.793134401823564e-06, "loss": 0.4323, "step": 14747 }, { "epoch": 2.4220433970398045, "grad_norm": 0.410211944772348, "learning_rate": 4.792666424097151e-06, "loss": 0.4331, "step": 14748 }, { "epoch": 2.422207624248147, "grad_norm": 0.3075597976329248, "learning_rate": 4.7921984431920935e-06, "loss": 0.43, "step": 14749 }, { "epoch": 2.4223718514564903, "grad_norm": 0.35261253135593046, "learning_rate": 4.791730459113472e-06, "loss": 0.4492, "step": 14750 }, { "epoch": 2.422536078664833, "grad_norm": 0.36899376715600435, "learning_rate": 4.791262471866372e-06, "loss": 0.4108, "step": 14751 }, { "epoch": 2.4227003058731755, "grad_norm": 0.37434397764969857, "learning_rate": 4.790794481455874e-06, "loss": 0.4432, "step": 14752 }, { "epoch": 2.422864533081518, "grad_norm": 0.31972500121132164, "learning_rate": 4.7903264878870635e-06, "loss": 0.4082, "step": 14753 }, { "epoch": 2.423028760289861, "grad_norm": 0.3123339025336729, "learning_rate": 4.78985849116502e-06, "loss": 0.4427, "step": 14754 }, { "epoch": 2.423192987498204, "grad_norm": 0.2795028420999253, "learning_rate": 4.789390491294827e-06, "loss": 0.433, "step": 14755 }, { "epoch": 2.4233572147065465, "grad_norm": 0.328024548668862, "learning_rate": 4.78892248828157e-06, "loss": 0.4298, "step": 14756 }, { "epoch": 2.423521441914889, "grad_norm": 0.31231298140414665, "learning_rate": 4.788454482130332e-06, "loss": 0.4279, "step": 14757 }, { "epoch": 2.423685669123232, "grad_norm": 0.3172952986153162, "learning_rate": 4.787986472846193e-06, "loss": 0.4454, "step": 14758 }, { "epoch": 2.423849896331575, "grad_norm": 0.35895962136883086, "learning_rate": 4.787518460434238e-06, "loss": 0.4469, "step": 14759 }, { "epoch": 2.4240141235399175, "grad_norm": 0.25783531582906527, "learning_rate": 4.7870504448995495e-06, "loss": 0.4376, "step": 14760 }, { "epoch": 2.42417835074826, "grad_norm": 0.31120025248246125, "learning_rate": 4.78658242624721e-06, "loss": 0.4412, "step": 14761 }, { "epoch": 2.424342577956603, "grad_norm": 0.28799008577143, "learning_rate": 4.786114404482305e-06, "loss": 0.4224, "step": 14762 }, { "epoch": 2.424506805164946, "grad_norm": 0.32125630055880444, "learning_rate": 4.7856463796099156e-06, "loss": 0.4404, "step": 14763 }, { "epoch": 2.4246710323732885, "grad_norm": 0.32524375153824536, "learning_rate": 4.785178351635124e-06, "loss": 0.4425, "step": 14764 }, { "epoch": 2.424835259581631, "grad_norm": 0.43420083943177323, "learning_rate": 4.784710320563016e-06, "loss": 0.4355, "step": 14765 }, { "epoch": 2.424999486789974, "grad_norm": 0.34655407456659604, "learning_rate": 4.784242286398674e-06, "loss": 0.4326, "step": 14766 }, { "epoch": 2.425163713998317, "grad_norm": 0.2747114594008047, "learning_rate": 4.78377424914718e-06, "loss": 0.426, "step": 14767 }, { "epoch": 2.4253279412066595, "grad_norm": 0.34923290616290603, "learning_rate": 4.78330620881362e-06, "loss": 0.4456, "step": 14768 }, { "epoch": 2.425492168415002, "grad_norm": 0.26275802173147667, "learning_rate": 4.782838165403076e-06, "loss": 0.4401, "step": 14769 }, { "epoch": 2.425656395623345, "grad_norm": 0.3533306405127362, "learning_rate": 4.7823701189206295e-06, "loss": 0.4389, "step": 14770 }, { "epoch": 2.4258206228316874, "grad_norm": 0.3336264849677881, "learning_rate": 4.781902069371367e-06, "loss": 0.4475, "step": 14771 }, { "epoch": 2.4259848500400305, "grad_norm": 0.496776075438404, "learning_rate": 4.78143401676037e-06, "loss": 0.4364, "step": 14772 }, { "epoch": 2.426149077248373, "grad_norm": 0.3384395539167162, "learning_rate": 4.780965961092722e-06, "loss": 0.4439, "step": 14773 }, { "epoch": 2.426313304456716, "grad_norm": 0.33369923606988255, "learning_rate": 4.78049790237351e-06, "loss": 0.4317, "step": 14774 }, { "epoch": 2.4264775316650584, "grad_norm": 0.29929761882085637, "learning_rate": 4.780029840607812e-06, "loss": 0.4413, "step": 14775 }, { "epoch": 2.4266417588734015, "grad_norm": 0.30729134782795475, "learning_rate": 4.7795617758007145e-06, "loss": 0.429, "step": 14776 }, { "epoch": 2.426805986081744, "grad_norm": 0.28668811524646165, "learning_rate": 4.779093707957303e-06, "loss": 0.4459, "step": 14777 }, { "epoch": 2.4269702132900868, "grad_norm": 0.9480953824797685, "learning_rate": 4.7786256370826586e-06, "loss": 0.4403, "step": 14778 }, { "epoch": 2.4271344404984294, "grad_norm": 0.2905434139198909, "learning_rate": 4.7781575631818645e-06, "loss": 0.44, "step": 14779 }, { "epoch": 2.4272986677067725, "grad_norm": 0.36488714133691247, "learning_rate": 4.777689486260006e-06, "loss": 0.4242, "step": 14780 }, { "epoch": 2.427462894915115, "grad_norm": 0.5288134862615336, "learning_rate": 4.777221406322168e-06, "loss": 0.4292, "step": 14781 }, { "epoch": 2.4276271221234578, "grad_norm": 0.2905834476410685, "learning_rate": 4.776753323373432e-06, "loss": 0.4505, "step": 14782 }, { "epoch": 2.4277913493318004, "grad_norm": 0.33826065646395237, "learning_rate": 4.7762852374188815e-06, "loss": 0.4299, "step": 14783 }, { "epoch": 2.4279555765401435, "grad_norm": 0.40177622398477075, "learning_rate": 4.7758171484636015e-06, "loss": 0.4552, "step": 14784 }, { "epoch": 2.428119803748486, "grad_norm": 0.27650968021480343, "learning_rate": 4.775349056512676e-06, "loss": 0.431, "step": 14785 }, { "epoch": 2.4282840309568288, "grad_norm": 0.4170100154683804, "learning_rate": 4.77488096157119e-06, "loss": 0.4399, "step": 14786 }, { "epoch": 2.4284482581651714, "grad_norm": 0.3064746958240771, "learning_rate": 4.774412863644227e-06, "loss": 0.4325, "step": 14787 }, { "epoch": 2.428612485373514, "grad_norm": 0.6095324612409638, "learning_rate": 4.773944762736868e-06, "loss": 0.4377, "step": 14788 }, { "epoch": 2.428776712581857, "grad_norm": 0.2843106785607604, "learning_rate": 4.7734766588542005e-06, "loss": 0.4241, "step": 14789 }, { "epoch": 2.4289409397901998, "grad_norm": 0.3693598879437371, "learning_rate": 4.773008552001308e-06, "loss": 0.4398, "step": 14790 }, { "epoch": 2.4291051669985424, "grad_norm": 0.2911478294752035, "learning_rate": 4.772540442183273e-06, "loss": 0.431, "step": 14791 }, { "epoch": 2.429269394206885, "grad_norm": 0.27983158639217526, "learning_rate": 4.772072329405182e-06, "loss": 0.432, "step": 14792 }, { "epoch": 2.429433621415228, "grad_norm": 0.35636279705662716, "learning_rate": 4.771604213672116e-06, "loss": 0.4412, "step": 14793 }, { "epoch": 2.4295978486235708, "grad_norm": 0.3399555636216479, "learning_rate": 4.771136094989162e-06, "loss": 0.435, "step": 14794 }, { "epoch": 2.4297620758319134, "grad_norm": 0.34618491205707147, "learning_rate": 4.770667973361403e-06, "loss": 0.4566, "step": 14795 }, { "epoch": 2.429926303040256, "grad_norm": 0.28967649101482, "learning_rate": 4.770199848793924e-06, "loss": 0.4202, "step": 14796 }, { "epoch": 2.430090530248599, "grad_norm": 0.2779166934004358, "learning_rate": 4.7697317212918075e-06, "loss": 0.4289, "step": 14797 }, { "epoch": 2.4302547574569417, "grad_norm": 0.2812164799099954, "learning_rate": 4.76926359086014e-06, "loss": 0.4519, "step": 14798 }, { "epoch": 2.4304189846652844, "grad_norm": 0.32263573432382836, "learning_rate": 4.768795457504005e-06, "loss": 0.4667, "step": 14799 }, { "epoch": 2.430583211873627, "grad_norm": 0.28947225570522306, "learning_rate": 4.768327321228487e-06, "loss": 0.4348, "step": 14800 }, { "epoch": 2.43074743908197, "grad_norm": 0.2764048458026499, "learning_rate": 4.7678591820386705e-06, "loss": 0.4371, "step": 14801 }, { "epoch": 2.4309116662903127, "grad_norm": 0.2854607261745386, "learning_rate": 4.7673910399396396e-06, "loss": 0.4262, "step": 14802 }, { "epoch": 2.4310758934986554, "grad_norm": 0.26922749726743767, "learning_rate": 4.766922894936479e-06, "loss": 0.4454, "step": 14803 }, { "epoch": 2.431240120706998, "grad_norm": 0.2646681350023495, "learning_rate": 4.766454747034273e-06, "loss": 0.4247, "step": 14804 }, { "epoch": 2.4314043479153407, "grad_norm": 0.34550230812725563, "learning_rate": 4.765986596238106e-06, "loss": 0.4341, "step": 14805 }, { "epoch": 2.4315685751236837, "grad_norm": 0.2869108230808502, "learning_rate": 4.765518442553063e-06, "loss": 0.417, "step": 14806 }, { "epoch": 2.4317328023320264, "grad_norm": 0.2849337599012945, "learning_rate": 4.765050285984229e-06, "loss": 0.4495, "step": 14807 }, { "epoch": 2.431897029540369, "grad_norm": 0.4105925836148604, "learning_rate": 4.7645821265366875e-06, "loss": 0.425, "step": 14808 }, { "epoch": 2.4320612567487117, "grad_norm": 0.3121318395626099, "learning_rate": 4.764113964215523e-06, "loss": 0.4407, "step": 14809 }, { "epoch": 2.4322254839570547, "grad_norm": 0.28767812785841906, "learning_rate": 4.763645799025822e-06, "loss": 0.4481, "step": 14810 }, { "epoch": 2.4323897111653974, "grad_norm": 0.3154937609679195, "learning_rate": 4.763177630972669e-06, "loss": 0.4548, "step": 14811 }, { "epoch": 2.43255393837374, "grad_norm": 0.3992594414402425, "learning_rate": 4.762709460061147e-06, "loss": 0.4427, "step": 14812 }, { "epoch": 2.4327181655820826, "grad_norm": 0.3513545926571055, "learning_rate": 4.762241286296342e-06, "loss": 0.47, "step": 14813 }, { "epoch": 2.4328823927904257, "grad_norm": 0.29704178929318303, "learning_rate": 4.761773109683338e-06, "loss": 0.4328, "step": 14814 }, { "epoch": 2.4330466199987684, "grad_norm": 0.31782862402263434, "learning_rate": 4.761304930227222e-06, "loss": 0.4252, "step": 14815 }, { "epoch": 2.433210847207111, "grad_norm": 0.28176998645123574, "learning_rate": 4.760836747933077e-06, "loss": 0.4255, "step": 14816 }, { "epoch": 2.4333750744154536, "grad_norm": 0.3323179661972225, "learning_rate": 4.760368562805988e-06, "loss": 0.4447, "step": 14817 }, { "epoch": 2.4335393016237967, "grad_norm": 0.3144062587438668, "learning_rate": 4.759900374851038e-06, "loss": 0.431, "step": 14818 }, { "epoch": 2.4337035288321394, "grad_norm": 0.31873388546233744, "learning_rate": 4.759432184073317e-06, "loss": 0.43, "step": 14819 }, { "epoch": 2.433867756040482, "grad_norm": 0.35986862702714584, "learning_rate": 4.758963990477906e-06, "loss": 0.4175, "step": 14820 }, { "epoch": 2.4340319832488246, "grad_norm": 0.4216160669476413, "learning_rate": 4.758495794069893e-06, "loss": 0.4186, "step": 14821 }, { "epoch": 2.4341962104571673, "grad_norm": 0.2656357025434144, "learning_rate": 4.758027594854359e-06, "loss": 0.4245, "step": 14822 }, { "epoch": 2.4343604376655104, "grad_norm": 0.27275238122045936, "learning_rate": 4.757559392836393e-06, "loss": 0.4171, "step": 14823 }, { "epoch": 2.434524664873853, "grad_norm": 0.4642214461339467, "learning_rate": 4.757091188021078e-06, "loss": 0.4447, "step": 14824 }, { "epoch": 2.4346888920821956, "grad_norm": 0.32736316829012824, "learning_rate": 4.7566229804135e-06, "loss": 0.4379, "step": 14825 }, { "epoch": 2.4348531192905383, "grad_norm": 0.3461060149654547, "learning_rate": 4.7561547700187435e-06, "loss": 0.4493, "step": 14826 }, { "epoch": 2.4350173464988814, "grad_norm": 0.3414152378529399, "learning_rate": 4.755686556841894e-06, "loss": 0.4467, "step": 14827 }, { "epoch": 2.435181573707224, "grad_norm": 0.3408110403504788, "learning_rate": 4.755218340888038e-06, "loss": 0.436, "step": 14828 }, { "epoch": 2.4353458009155666, "grad_norm": 0.2892055813816026, "learning_rate": 4.754750122162258e-06, "loss": 0.4512, "step": 14829 }, { "epoch": 2.4355100281239093, "grad_norm": 0.6586018155691247, "learning_rate": 4.754281900669644e-06, "loss": 0.4342, "step": 14830 }, { "epoch": 2.4356742553322523, "grad_norm": 0.3309761857325862, "learning_rate": 4.753813676415275e-06, "loss": 0.435, "step": 14831 }, { "epoch": 2.435838482540595, "grad_norm": 0.47816462488938366, "learning_rate": 4.753345449404242e-06, "loss": 0.4482, "step": 14832 }, { "epoch": 2.4360027097489376, "grad_norm": 0.4051812130197788, "learning_rate": 4.752877219641628e-06, "loss": 0.4585, "step": 14833 }, { "epoch": 2.4361669369572803, "grad_norm": 0.3283393580462784, "learning_rate": 4.752408987132517e-06, "loss": 0.4471, "step": 14834 }, { "epoch": 2.4363311641656233, "grad_norm": 0.29286177768403737, "learning_rate": 4.751940751881998e-06, "loss": 0.4231, "step": 14835 }, { "epoch": 2.436495391373966, "grad_norm": 0.2906969999573708, "learning_rate": 4.751472513895154e-06, "loss": 0.4326, "step": 14836 }, { "epoch": 2.4366596185823086, "grad_norm": 0.39150745671413895, "learning_rate": 4.75100427317707e-06, "loss": 0.4434, "step": 14837 }, { "epoch": 2.4368238457906513, "grad_norm": 0.27745309394386025, "learning_rate": 4.750536029732834e-06, "loss": 0.4194, "step": 14838 }, { "epoch": 2.436988072998994, "grad_norm": 0.7360407236143521, "learning_rate": 4.750067783567528e-06, "loss": 0.4238, "step": 14839 }, { "epoch": 2.437152300207337, "grad_norm": 0.4518500230554605, "learning_rate": 4.749599534686242e-06, "loss": 0.4224, "step": 14840 }, { "epoch": 2.4373165274156796, "grad_norm": 0.34881458565898915, "learning_rate": 4.74913128309406e-06, "loss": 0.4303, "step": 14841 }, { "epoch": 2.4374807546240222, "grad_norm": 0.2615419271331008, "learning_rate": 4.748663028796065e-06, "loss": 0.4413, "step": 14842 }, { "epoch": 2.437644981832365, "grad_norm": 0.31938658995749575, "learning_rate": 4.748194771797346e-06, "loss": 0.4383, "step": 14843 }, { "epoch": 2.437809209040708, "grad_norm": 0.34806719512201467, "learning_rate": 4.747726512102988e-06, "loss": 0.4449, "step": 14844 }, { "epoch": 2.4379734362490506, "grad_norm": 0.2952998266610244, "learning_rate": 4.747258249718077e-06, "loss": 0.4133, "step": 14845 }, { "epoch": 2.4381376634573932, "grad_norm": 0.4080042462948629, "learning_rate": 4.746789984647696e-06, "loss": 0.4174, "step": 14846 }, { "epoch": 2.438301890665736, "grad_norm": 0.2617233550855106, "learning_rate": 4.7463217168969335e-06, "loss": 0.4516, "step": 14847 }, { "epoch": 2.438466117874079, "grad_norm": 0.3167837063772311, "learning_rate": 4.7458534464708746e-06, "loss": 0.4207, "step": 14848 }, { "epoch": 2.4386303450824216, "grad_norm": 0.34189588475116073, "learning_rate": 4.745385173374608e-06, "loss": 0.4362, "step": 14849 }, { "epoch": 2.4387945722907642, "grad_norm": 0.37358037512844017, "learning_rate": 4.7449168976132145e-06, "loss": 0.4224, "step": 14850 }, { "epoch": 2.438958799499107, "grad_norm": 0.3431725832259083, "learning_rate": 4.744448619191783e-06, "loss": 0.4262, "step": 14851 }, { "epoch": 2.43912302670745, "grad_norm": 0.314791986594665, "learning_rate": 4.743980338115398e-06, "loss": 0.4557, "step": 14852 }, { "epoch": 2.4392872539157926, "grad_norm": 0.5013463893524989, "learning_rate": 4.743512054389148e-06, "loss": 0.4394, "step": 14853 }, { "epoch": 2.4394514811241352, "grad_norm": 0.2725217761544163, "learning_rate": 4.743043768018117e-06, "loss": 0.4114, "step": 14854 }, { "epoch": 2.439615708332478, "grad_norm": 0.37105193624079863, "learning_rate": 4.742575479007393e-06, "loss": 0.4172, "step": 14855 }, { "epoch": 2.4397799355408205, "grad_norm": 0.4803472831557481, "learning_rate": 4.742107187362058e-06, "loss": 0.4385, "step": 14856 }, { "epoch": 2.4399441627491636, "grad_norm": 0.2943878838182107, "learning_rate": 4.741638893087203e-06, "loss": 0.4354, "step": 14857 }, { "epoch": 2.4401083899575062, "grad_norm": 0.2980819471686045, "learning_rate": 4.74117059618791e-06, "loss": 0.4249, "step": 14858 }, { "epoch": 2.440272617165849, "grad_norm": 0.7392078808610509, "learning_rate": 4.740702296669269e-06, "loss": 0.4452, "step": 14859 }, { "epoch": 2.4404368443741915, "grad_norm": 0.335397733043894, "learning_rate": 4.740233994536363e-06, "loss": 0.4384, "step": 14860 }, { "epoch": 2.4406010715825346, "grad_norm": 0.3189866960758374, "learning_rate": 4.73976568979428e-06, "loss": 0.4306, "step": 14861 }, { "epoch": 2.4407652987908772, "grad_norm": 0.2752736569035358, "learning_rate": 4.739297382448105e-06, "loss": 0.4227, "step": 14862 }, { "epoch": 2.44092952599922, "grad_norm": 0.37664773038821614, "learning_rate": 4.738829072502925e-06, "loss": 0.4311, "step": 14863 }, { "epoch": 2.4410937532075625, "grad_norm": 0.3309783756164121, "learning_rate": 4.738360759963827e-06, "loss": 0.4323, "step": 14864 }, { "epoch": 2.4412579804159056, "grad_norm": 0.3763551459442455, "learning_rate": 4.737892444835896e-06, "loss": 0.4403, "step": 14865 }, { "epoch": 2.441422207624248, "grad_norm": 0.35097168113421806, "learning_rate": 4.737424127124219e-06, "loss": 0.4442, "step": 14866 }, { "epoch": 2.441586434832591, "grad_norm": 0.3712543226418984, "learning_rate": 4.7369558068338825e-06, "loss": 0.4372, "step": 14867 }, { "epoch": 2.4417506620409335, "grad_norm": 0.32506729393422107, "learning_rate": 4.736487483969972e-06, "loss": 0.4395, "step": 14868 }, { "epoch": 2.4419148892492766, "grad_norm": 0.3064457747338616, "learning_rate": 4.7360191585375765e-06, "loss": 0.4452, "step": 14869 }, { "epoch": 2.442079116457619, "grad_norm": 0.44702514644854086, "learning_rate": 4.735550830541781e-06, "loss": 0.4159, "step": 14870 }, { "epoch": 2.442243343665962, "grad_norm": 0.3616030848631678, "learning_rate": 4.73508249998767e-06, "loss": 0.4256, "step": 14871 }, { "epoch": 2.4424075708743045, "grad_norm": 0.28232285830936266, "learning_rate": 4.734614166880332e-06, "loss": 0.4406, "step": 14872 }, { "epoch": 2.442571798082647, "grad_norm": 0.2805074286554534, "learning_rate": 4.734145831224853e-06, "loss": 0.4344, "step": 14873 }, { "epoch": 2.44273602529099, "grad_norm": 0.2956435605595372, "learning_rate": 4.733677493026323e-06, "loss": 0.429, "step": 14874 }, { "epoch": 2.442900252499333, "grad_norm": 0.2989704174696099, "learning_rate": 4.7332091522898225e-06, "loss": 0.4158, "step": 14875 }, { "epoch": 2.4430644797076755, "grad_norm": 0.26915929922268195, "learning_rate": 4.73274080902044e-06, "loss": 0.4651, "step": 14876 }, { "epoch": 2.443228706916018, "grad_norm": 0.3133310818964482, "learning_rate": 4.732272463223265e-06, "loss": 0.4208, "step": 14877 }, { "epoch": 2.443392934124361, "grad_norm": 0.2808808633684279, "learning_rate": 4.731804114903384e-06, "loss": 0.4278, "step": 14878 }, { "epoch": 2.443557161332704, "grad_norm": 0.9563998333839273, "learning_rate": 4.731335764065881e-06, "loss": 0.4451, "step": 14879 }, { "epoch": 2.4437213885410465, "grad_norm": 0.30893292698775016, "learning_rate": 4.730867410715844e-06, "loss": 0.4448, "step": 14880 }, { "epoch": 2.443885615749389, "grad_norm": 0.2708141085292231, "learning_rate": 4.7303990548583584e-06, "loss": 0.4341, "step": 14881 }, { "epoch": 2.444049842957732, "grad_norm": 0.43372099717902635, "learning_rate": 4.729930696498515e-06, "loss": 0.441, "step": 14882 }, { "epoch": 2.444214070166075, "grad_norm": 0.2895325076784331, "learning_rate": 4.729462335641396e-06, "loss": 0.426, "step": 14883 }, { "epoch": 2.4443782973744175, "grad_norm": 0.33145935474776, "learning_rate": 4.728993972292091e-06, "loss": 0.437, "step": 14884 }, { "epoch": 2.44454252458276, "grad_norm": 0.3935456848771356, "learning_rate": 4.728525606455686e-06, "loss": 0.4555, "step": 14885 }, { "epoch": 2.444706751791103, "grad_norm": 0.2952308962107992, "learning_rate": 4.728057238137269e-06, "loss": 0.438, "step": 14886 }, { "epoch": 2.444870978999446, "grad_norm": 0.3062234555252227, "learning_rate": 4.727588867341925e-06, "loss": 0.4143, "step": 14887 }, { "epoch": 2.4450352062077885, "grad_norm": 0.32180242990583197, "learning_rate": 4.727120494074741e-06, "loss": 0.4416, "step": 14888 }, { "epoch": 2.445199433416131, "grad_norm": 0.32843161134064053, "learning_rate": 4.726652118340808e-06, "loss": 0.418, "step": 14889 }, { "epoch": 2.4453636606244737, "grad_norm": 0.3390374896278502, "learning_rate": 4.726183740145208e-06, "loss": 0.4518, "step": 14890 }, { "epoch": 2.445527887832817, "grad_norm": 0.28347479023720434, "learning_rate": 4.725715359493031e-06, "loss": 0.4379, "step": 14891 }, { "epoch": 2.4456921150411595, "grad_norm": 0.30168778550602693, "learning_rate": 4.725246976389361e-06, "loss": 0.4447, "step": 14892 }, { "epoch": 2.445856342249502, "grad_norm": 0.3182228860270116, "learning_rate": 4.724778590839289e-06, "loss": 0.4436, "step": 14893 }, { "epoch": 2.4460205694578447, "grad_norm": 0.26929248249368787, "learning_rate": 4.7243102028479e-06, "loss": 0.4283, "step": 14894 }, { "epoch": 2.446184796666188, "grad_norm": 0.3502740325919489, "learning_rate": 4.723841812420282e-06, "loss": 0.441, "step": 14895 }, { "epoch": 2.4463490238745305, "grad_norm": 0.4357248159824678, "learning_rate": 4.723373419561521e-06, "loss": 0.449, "step": 14896 }, { "epoch": 2.446513251082873, "grad_norm": 0.2956091582918993, "learning_rate": 4.722905024276704e-06, "loss": 0.4464, "step": 14897 }, { "epoch": 2.4466774782912157, "grad_norm": 0.9827935727181517, "learning_rate": 4.72243662657092e-06, "loss": 0.4575, "step": 14898 }, { "epoch": 2.446841705499559, "grad_norm": 0.8911376698958159, "learning_rate": 4.7219682264492565e-06, "loss": 0.4177, "step": 14899 }, { "epoch": 2.4470059327079015, "grad_norm": 0.4204288106361778, "learning_rate": 4.721499823916798e-06, "loss": 0.4407, "step": 14900 }, { "epoch": 2.447170159916244, "grad_norm": 0.4214831462365801, "learning_rate": 4.7210314189786335e-06, "loss": 0.4385, "step": 14901 }, { "epoch": 2.4473343871245867, "grad_norm": 0.2856166432697078, "learning_rate": 4.72056301163985e-06, "loss": 0.4083, "step": 14902 }, { "epoch": 2.44749861433293, "grad_norm": 0.39927665904922716, "learning_rate": 4.720094601905538e-06, "loss": 0.4393, "step": 14903 }, { "epoch": 2.4476628415412724, "grad_norm": 0.27247089834361105, "learning_rate": 4.719626189780781e-06, "loss": 0.4343, "step": 14904 }, { "epoch": 2.447827068749615, "grad_norm": 0.3379655021876495, "learning_rate": 4.719157775270666e-06, "loss": 0.4368, "step": 14905 }, { "epoch": 2.4479912959579577, "grad_norm": 0.5812707202881656, "learning_rate": 4.718689358380282e-06, "loss": 0.427, "step": 14906 }, { "epoch": 2.4481555231663004, "grad_norm": 0.288175714574787, "learning_rate": 4.718220939114718e-06, "loss": 0.4393, "step": 14907 }, { "epoch": 2.4483197503746434, "grad_norm": 0.31558613912668265, "learning_rate": 4.717752517479059e-06, "loss": 0.4263, "step": 14908 }, { "epoch": 2.448483977582986, "grad_norm": 0.3122999644429297, "learning_rate": 4.7172840934783935e-06, "loss": 0.4497, "step": 14909 }, { "epoch": 2.4486482047913287, "grad_norm": 0.3856354602008668, "learning_rate": 4.716815667117808e-06, "loss": 0.4209, "step": 14910 }, { "epoch": 2.4488124319996714, "grad_norm": 0.3183549391768059, "learning_rate": 4.716347238402393e-06, "loss": 0.4361, "step": 14911 }, { "epoch": 2.4489766592080144, "grad_norm": 0.27665517262562817, "learning_rate": 4.715878807337233e-06, "loss": 0.407, "step": 14912 }, { "epoch": 2.449140886416357, "grad_norm": 0.5377088495858262, "learning_rate": 4.715410373927417e-06, "loss": 0.4374, "step": 14913 }, { "epoch": 2.4493051136246997, "grad_norm": 0.30183317730928727, "learning_rate": 4.714941938178032e-06, "loss": 0.4332, "step": 14914 }, { "epoch": 2.4494693408330424, "grad_norm": 0.3264467535903471, "learning_rate": 4.714473500094166e-06, "loss": 0.4374, "step": 14915 }, { "epoch": 2.4496335680413854, "grad_norm": 0.33462011748221, "learning_rate": 4.714005059680908e-06, "loss": 0.4584, "step": 14916 }, { "epoch": 2.449797795249728, "grad_norm": 0.3117046569026179, "learning_rate": 4.713536616943342e-06, "loss": 0.446, "step": 14917 }, { "epoch": 2.4499620224580707, "grad_norm": 0.3103767396060371, "learning_rate": 4.71306817188656e-06, "loss": 0.4251, "step": 14918 }, { "epoch": 2.4501262496664133, "grad_norm": 0.37468063922930944, "learning_rate": 4.712599724515649e-06, "loss": 0.4316, "step": 14919 }, { "epoch": 2.4502904768747564, "grad_norm": 0.2992503309515945, "learning_rate": 4.712131274835694e-06, "loss": 0.4332, "step": 14920 }, { "epoch": 2.450454704083099, "grad_norm": 0.2914771504239795, "learning_rate": 4.711662822851785e-06, "loss": 0.4394, "step": 14921 }, { "epoch": 2.4506189312914417, "grad_norm": 0.4249734365242678, "learning_rate": 4.711194368569009e-06, "loss": 0.448, "step": 14922 }, { "epoch": 2.4507831584997843, "grad_norm": 0.2915814818062091, "learning_rate": 4.710725911992456e-06, "loss": 0.4271, "step": 14923 }, { "epoch": 2.450947385708127, "grad_norm": 0.5198274597992836, "learning_rate": 4.710257453127212e-06, "loss": 0.4353, "step": 14924 }, { "epoch": 2.45111161291647, "grad_norm": 0.38566805754977934, "learning_rate": 4.709788991978364e-06, "loss": 0.4318, "step": 14925 }, { "epoch": 2.4512758401248127, "grad_norm": 0.33962611959497585, "learning_rate": 4.7093205285510024e-06, "loss": 0.4317, "step": 14926 }, { "epoch": 2.4514400673331553, "grad_norm": 0.3431469761470781, "learning_rate": 4.7088520628502134e-06, "loss": 0.4209, "step": 14927 }, { "epoch": 2.451604294541498, "grad_norm": 0.2557889173861677, "learning_rate": 4.708383594881086e-06, "loss": 0.444, "step": 14928 }, { "epoch": 2.451768521749841, "grad_norm": 0.3034543392005885, "learning_rate": 4.707915124648707e-06, "loss": 0.4489, "step": 14929 }, { "epoch": 2.4519327489581837, "grad_norm": 0.3355927507316989, "learning_rate": 4.707446652158164e-06, "loss": 0.4363, "step": 14930 }, { "epoch": 2.4520969761665263, "grad_norm": 0.43993410862279914, "learning_rate": 4.706978177414548e-06, "loss": 0.4439, "step": 14931 }, { "epoch": 2.452261203374869, "grad_norm": 0.309306723684051, "learning_rate": 4.7065097004229445e-06, "loss": 0.459, "step": 14932 }, { "epoch": 2.452425430583212, "grad_norm": 0.36210017974478553, "learning_rate": 4.706041221188444e-06, "loss": 0.458, "step": 14933 }, { "epoch": 2.4525896577915547, "grad_norm": 0.44914379194192844, "learning_rate": 4.705572739716132e-06, "loss": 0.4297, "step": 14934 }, { "epoch": 2.4527538849998973, "grad_norm": 0.4199867608706427, "learning_rate": 4.705104256011097e-06, "loss": 0.4251, "step": 14935 }, { "epoch": 2.45291811220824, "grad_norm": 0.2960066711495447, "learning_rate": 4.70463577007843e-06, "loss": 0.4098, "step": 14936 }, { "epoch": 2.453082339416583, "grad_norm": 0.29052926852285893, "learning_rate": 4.704167281923215e-06, "loss": 0.4391, "step": 14937 }, { "epoch": 2.4532465666249257, "grad_norm": 0.3345783118974667, "learning_rate": 4.703698791550544e-06, "loss": 0.4445, "step": 14938 }, { "epoch": 2.4534107938332683, "grad_norm": 0.31411562694600764, "learning_rate": 4.703230298965503e-06, "loss": 0.4374, "step": 14939 }, { "epoch": 2.453575021041611, "grad_norm": 0.39657920815497005, "learning_rate": 4.702761804173181e-06, "loss": 0.4297, "step": 14940 }, { "epoch": 2.4537392482499536, "grad_norm": 0.26347298494219035, "learning_rate": 4.7022933071786674e-06, "loss": 0.4503, "step": 14941 }, { "epoch": 2.4539034754582967, "grad_norm": 0.4001769996258484, "learning_rate": 4.701824807987049e-06, "loss": 0.4404, "step": 14942 }, { "epoch": 2.4540677026666393, "grad_norm": 0.3490323283984835, "learning_rate": 4.701356306603414e-06, "loss": 0.4108, "step": 14943 }, { "epoch": 2.454231929874982, "grad_norm": 0.32937590189847393, "learning_rate": 4.700887803032851e-06, "loss": 0.4331, "step": 14944 }, { "epoch": 2.4543961570833246, "grad_norm": 0.3413660682534813, "learning_rate": 4.70041929728045e-06, "loss": 0.4379, "step": 14945 }, { "epoch": 2.4545603842916677, "grad_norm": 0.5002206756837491, "learning_rate": 4.699950789351297e-06, "loss": 0.4483, "step": 14946 }, { "epoch": 2.4547246115000103, "grad_norm": 0.3417412630888464, "learning_rate": 4.699482279250482e-06, "loss": 0.4233, "step": 14947 }, { "epoch": 2.454888838708353, "grad_norm": 0.2913854518469454, "learning_rate": 4.699013766983093e-06, "loss": 0.4401, "step": 14948 }, { "epoch": 2.4550530659166956, "grad_norm": 0.28411465528321456, "learning_rate": 4.698545252554221e-06, "loss": 0.4299, "step": 14949 }, { "epoch": 2.4552172931250387, "grad_norm": 0.33716547921552187, "learning_rate": 4.69807673596895e-06, "loss": 0.4505, "step": 14950 }, { "epoch": 2.4553815203333813, "grad_norm": 0.2993655726406468, "learning_rate": 4.69760821723237e-06, "loss": 0.4476, "step": 14951 }, { "epoch": 2.455545747541724, "grad_norm": 0.30247931387572397, "learning_rate": 4.6971396963495725e-06, "loss": 0.4362, "step": 14952 }, { "epoch": 2.4557099747500666, "grad_norm": 0.2859025796264969, "learning_rate": 4.696671173325643e-06, "loss": 0.4473, "step": 14953 }, { "epoch": 2.4558742019584097, "grad_norm": 0.31087008805358723, "learning_rate": 4.6962026481656715e-06, "loss": 0.4323, "step": 14954 }, { "epoch": 2.4560384291667523, "grad_norm": 0.31108806233509195, "learning_rate": 4.695734120874745e-06, "loss": 0.4363, "step": 14955 }, { "epoch": 2.456202656375095, "grad_norm": 0.3713261946065773, "learning_rate": 4.6952655914579526e-06, "loss": 0.4582, "step": 14956 }, { "epoch": 2.4563668835834376, "grad_norm": 0.3066338892215127, "learning_rate": 4.694797059920388e-06, "loss": 0.4296, "step": 14957 }, { "epoch": 2.45653111079178, "grad_norm": 0.2810188832318168, "learning_rate": 4.694328526267133e-06, "loss": 0.4197, "step": 14958 }, { "epoch": 2.4566953380001233, "grad_norm": 0.29244327846057233, "learning_rate": 4.693859990503277e-06, "loss": 0.4436, "step": 14959 }, { "epoch": 2.456859565208466, "grad_norm": 0.3156738030477161, "learning_rate": 4.693391452633913e-06, "loss": 0.4539, "step": 14960 }, { "epoch": 2.4570237924168086, "grad_norm": 0.3456900198117342, "learning_rate": 4.692922912664128e-06, "loss": 0.4107, "step": 14961 }, { "epoch": 2.457188019625151, "grad_norm": 0.3750918202689833, "learning_rate": 4.69245437059901e-06, "loss": 0.4304, "step": 14962 }, { "epoch": 2.4573522468334943, "grad_norm": 0.42389798412654855, "learning_rate": 4.691985826443647e-06, "loss": 0.4171, "step": 14963 }, { "epoch": 2.457516474041837, "grad_norm": 0.28392710882294936, "learning_rate": 4.69151728020313e-06, "loss": 0.4347, "step": 14964 }, { "epoch": 2.4576807012501796, "grad_norm": 0.28639762426572174, "learning_rate": 4.691048731882546e-06, "loss": 0.4443, "step": 14965 }, { "epoch": 2.457844928458522, "grad_norm": 0.25684977537668713, "learning_rate": 4.690580181486986e-06, "loss": 0.4359, "step": 14966 }, { "epoch": 2.4580091556668653, "grad_norm": 0.36644612008467237, "learning_rate": 4.690111629021538e-06, "loss": 0.4653, "step": 14967 }, { "epoch": 2.458173382875208, "grad_norm": 0.3646384093642947, "learning_rate": 4.68964307449129e-06, "loss": 0.4397, "step": 14968 }, { "epoch": 2.4583376100835506, "grad_norm": 0.3382367193847496, "learning_rate": 4.689174517901331e-06, "loss": 0.4406, "step": 14969 }, { "epoch": 2.458501837291893, "grad_norm": 0.3140018837661301, "learning_rate": 4.688705959256752e-06, "loss": 0.4469, "step": 14970 }, { "epoch": 2.4586660645002363, "grad_norm": 0.35977465400020864, "learning_rate": 4.688237398562639e-06, "loss": 0.4258, "step": 14971 }, { "epoch": 2.458830291708579, "grad_norm": 0.3943690831857817, "learning_rate": 4.687768835824083e-06, "loss": 0.4461, "step": 14972 }, { "epoch": 2.4589945189169216, "grad_norm": 0.5226466463828919, "learning_rate": 4.687300271046173e-06, "loss": 0.4528, "step": 14973 }, { "epoch": 2.459158746125264, "grad_norm": 0.2765055483214686, "learning_rate": 4.686831704233999e-06, "loss": 0.4521, "step": 14974 }, { "epoch": 2.459322973333607, "grad_norm": 0.29749033942911196, "learning_rate": 4.6863631353926476e-06, "loss": 0.4468, "step": 14975 }, { "epoch": 2.45948720054195, "grad_norm": 0.2902094231503712, "learning_rate": 4.68589456452721e-06, "loss": 0.4278, "step": 14976 }, { "epoch": 2.4596514277502926, "grad_norm": 0.3078796587727116, "learning_rate": 4.6854259916427735e-06, "loss": 0.4159, "step": 14977 }, { "epoch": 2.459815654958635, "grad_norm": 0.2985730494760598, "learning_rate": 4.684957416744429e-06, "loss": 0.4503, "step": 14978 }, { "epoch": 2.459979882166978, "grad_norm": 0.3189836021672542, "learning_rate": 4.684488839837265e-06, "loss": 0.442, "step": 14979 }, { "epoch": 2.460144109375321, "grad_norm": 0.4842946241587704, "learning_rate": 4.684020260926369e-06, "loss": 0.4377, "step": 14980 }, { "epoch": 2.4603083365836635, "grad_norm": 0.373334795378634, "learning_rate": 4.683551680016834e-06, "loss": 0.4456, "step": 14981 }, { "epoch": 2.460472563792006, "grad_norm": 0.44875805856112194, "learning_rate": 4.683083097113748e-06, "loss": 0.445, "step": 14982 }, { "epoch": 2.460636791000349, "grad_norm": 0.37397571261106627, "learning_rate": 4.682614512222197e-06, "loss": 0.4335, "step": 14983 }, { "epoch": 2.460801018208692, "grad_norm": 0.3465551968819545, "learning_rate": 4.682145925347273e-06, "loss": 0.4215, "step": 14984 }, { "epoch": 2.4609652454170345, "grad_norm": 0.29430252055096734, "learning_rate": 4.681677336494065e-06, "loss": 0.4457, "step": 14985 }, { "epoch": 2.461129472625377, "grad_norm": 0.32286831624943985, "learning_rate": 4.6812087456676644e-06, "loss": 0.4154, "step": 14986 }, { "epoch": 2.46129369983372, "grad_norm": 0.3793808917437682, "learning_rate": 4.680740152873157e-06, "loss": 0.4303, "step": 14987 }, { "epoch": 2.461457927042063, "grad_norm": 0.34371076774506265, "learning_rate": 4.680271558115635e-06, "loss": 0.4444, "step": 14988 }, { "epoch": 2.4616221542504055, "grad_norm": 0.2800293307566269, "learning_rate": 4.679802961400184e-06, "loss": 0.4263, "step": 14989 }, { "epoch": 2.461786381458748, "grad_norm": 0.4105776277011872, "learning_rate": 4.679334362731898e-06, "loss": 0.442, "step": 14990 }, { "epoch": 2.461950608667091, "grad_norm": 0.29117182265697783, "learning_rate": 4.678865762115864e-06, "loss": 0.4155, "step": 14991 }, { "epoch": 2.4621148358754334, "grad_norm": 0.30870836843997373, "learning_rate": 4.678397159557172e-06, "loss": 0.4156, "step": 14992 }, { "epoch": 2.4622790630837765, "grad_norm": 0.2643732894140815, "learning_rate": 4.67792855506091e-06, "loss": 0.4354, "step": 14993 }, { "epoch": 2.462443290292119, "grad_norm": 0.2758373427519078, "learning_rate": 4.67745994863217e-06, "loss": 0.4397, "step": 14994 }, { "epoch": 2.462607517500462, "grad_norm": 0.2879361585632679, "learning_rate": 4.676991340276039e-06, "loss": 0.4245, "step": 14995 }, { "epoch": 2.4627717447088044, "grad_norm": 0.3407813698263583, "learning_rate": 4.676522729997611e-06, "loss": 0.424, "step": 14996 }, { "epoch": 2.4629359719171475, "grad_norm": 0.3317369355754718, "learning_rate": 4.676054117801969e-06, "loss": 0.445, "step": 14997 }, { "epoch": 2.46310019912549, "grad_norm": 0.6803115759149154, "learning_rate": 4.675585503694208e-06, "loss": 0.4384, "step": 14998 }, { "epoch": 2.463264426333833, "grad_norm": 0.3831499501518102, "learning_rate": 4.6751168876794164e-06, "loss": 0.447, "step": 14999 }, { "epoch": 2.4634286535421754, "grad_norm": 0.8359767289627413, "learning_rate": 4.674648269762681e-06, "loss": 0.4318, "step": 15000 }, { "epoch": 2.4635928807505185, "grad_norm": 0.2966619920680067, "learning_rate": 4.674179649949095e-06, "loss": 0.4424, "step": 15001 }, { "epoch": 2.463757107958861, "grad_norm": 0.2732458183573598, "learning_rate": 4.673711028243746e-06, "loss": 0.4509, "step": 15002 }, { "epoch": 2.463921335167204, "grad_norm": 0.29770838372139535, "learning_rate": 4.673242404651725e-06, "loss": 0.4195, "step": 15003 }, { "epoch": 2.4640855623755464, "grad_norm": 0.2502454178475141, "learning_rate": 4.67277377917812e-06, "loss": 0.4325, "step": 15004 }, { "epoch": 2.4642497895838895, "grad_norm": 0.27202883419282115, "learning_rate": 4.672305151828022e-06, "loss": 0.4389, "step": 15005 }, { "epoch": 2.464414016792232, "grad_norm": 0.316940999995647, "learning_rate": 4.671836522606521e-06, "loss": 0.4244, "step": 15006 }, { "epoch": 2.464578244000575, "grad_norm": 0.3077409686336453, "learning_rate": 4.671367891518707e-06, "loss": 0.4431, "step": 15007 }, { "epoch": 2.4647424712089174, "grad_norm": 0.2991331964822248, "learning_rate": 4.670899258569668e-06, "loss": 0.4186, "step": 15008 }, { "epoch": 2.46490669841726, "grad_norm": 0.30160160597116736, "learning_rate": 4.670430623764495e-06, "loss": 0.4209, "step": 15009 }, { "epoch": 2.465070925625603, "grad_norm": 1.2057903308954052, "learning_rate": 4.6699619871082775e-06, "loss": 0.4507, "step": 15010 }, { "epoch": 2.465235152833946, "grad_norm": 0.36722155486617347, "learning_rate": 4.669493348606105e-06, "loss": 0.4246, "step": 15011 }, { "epoch": 2.4653993800422884, "grad_norm": 0.2732815069756437, "learning_rate": 4.669024708263071e-06, "loss": 0.4399, "step": 15012 }, { "epoch": 2.465563607250631, "grad_norm": 0.3759797860911508, "learning_rate": 4.668556066084259e-06, "loss": 0.4429, "step": 15013 }, { "epoch": 2.465727834458974, "grad_norm": 0.29556366049368965, "learning_rate": 4.668087422074763e-06, "loss": 0.4399, "step": 15014 }, { "epoch": 2.465892061667317, "grad_norm": 0.33310926302971894, "learning_rate": 4.667618776239674e-06, "loss": 0.4282, "step": 15015 }, { "epoch": 2.4660562888756594, "grad_norm": 0.28561588310789915, "learning_rate": 4.6671501285840795e-06, "loss": 0.4207, "step": 15016 }, { "epoch": 2.466220516084002, "grad_norm": 0.31983473101658594, "learning_rate": 4.666681479113069e-06, "loss": 0.4324, "step": 15017 }, { "epoch": 2.466384743292345, "grad_norm": 0.3606193430776502, "learning_rate": 4.666212827831733e-06, "loss": 0.4311, "step": 15018 }, { "epoch": 2.4665489705006878, "grad_norm": 0.3478030075169639, "learning_rate": 4.665744174745164e-06, "loss": 0.4234, "step": 15019 }, { "epoch": 2.4667131977090304, "grad_norm": 0.2944069325615738, "learning_rate": 4.66527551985845e-06, "loss": 0.4405, "step": 15020 }, { "epoch": 2.466877424917373, "grad_norm": 0.4024844019253324, "learning_rate": 4.6648068631766816e-06, "loss": 0.4406, "step": 15021 }, { "epoch": 2.467041652125716, "grad_norm": 0.2907487570514868, "learning_rate": 4.664338204704947e-06, "loss": 0.4362, "step": 15022 }, { "epoch": 2.4672058793340588, "grad_norm": 0.3599279459135872, "learning_rate": 4.663869544448338e-06, "loss": 0.4249, "step": 15023 }, { "epoch": 2.4673701065424014, "grad_norm": 0.4021341046173282, "learning_rate": 4.663400882411946e-06, "loss": 0.4577, "step": 15024 }, { "epoch": 2.467534333750744, "grad_norm": 0.29310096697738297, "learning_rate": 4.662932218600859e-06, "loss": 0.4242, "step": 15025 }, { "epoch": 2.4676985609590867, "grad_norm": 0.3008874854202207, "learning_rate": 4.662463553020167e-06, "loss": 0.4483, "step": 15026 }, { "epoch": 2.4678627881674298, "grad_norm": 0.31891265100092614, "learning_rate": 4.661994885674962e-06, "loss": 0.4422, "step": 15027 }, { "epoch": 2.4680270153757724, "grad_norm": 0.4460526086221128, "learning_rate": 4.661526216570332e-06, "loss": 0.4276, "step": 15028 }, { "epoch": 2.468191242584115, "grad_norm": 0.3133604060641469, "learning_rate": 4.661057545711369e-06, "loss": 0.4528, "step": 15029 }, { "epoch": 2.4683554697924577, "grad_norm": 0.28146024737318615, "learning_rate": 4.660588873103164e-06, "loss": 0.4345, "step": 15030 }, { "epoch": 2.4685196970008008, "grad_norm": 0.528866055449549, "learning_rate": 4.6601201987508035e-06, "loss": 0.4364, "step": 15031 }, { "epoch": 2.4686839242091434, "grad_norm": 0.2872816059039179, "learning_rate": 4.659651522659382e-06, "loss": 0.4372, "step": 15032 }, { "epoch": 2.468848151417486, "grad_norm": 0.34378625498393467, "learning_rate": 4.659182844833987e-06, "loss": 0.4253, "step": 15033 }, { "epoch": 2.4690123786258287, "grad_norm": 0.3196887697710157, "learning_rate": 4.6587141652797085e-06, "loss": 0.44, "step": 15034 }, { "epoch": 2.4691766058341718, "grad_norm": 0.2937573874985924, "learning_rate": 4.658245484001641e-06, "loss": 0.4304, "step": 15035 }, { "epoch": 2.4693408330425144, "grad_norm": 0.30747920832229947, "learning_rate": 4.6577768010048685e-06, "loss": 0.434, "step": 15036 }, { "epoch": 2.469505060250857, "grad_norm": 0.3798041029245597, "learning_rate": 4.657308116294488e-06, "loss": 0.4494, "step": 15037 }, { "epoch": 2.4696692874591997, "grad_norm": 0.30374441085047227, "learning_rate": 4.656839429875584e-06, "loss": 0.4268, "step": 15038 }, { "epoch": 2.4698335146675428, "grad_norm": 0.3892278187656976, "learning_rate": 4.656370741753251e-06, "loss": 0.4299, "step": 15039 }, { "epoch": 2.4699977418758854, "grad_norm": 0.2855739563377692, "learning_rate": 4.655902051932576e-06, "loss": 0.433, "step": 15040 }, { "epoch": 2.470161969084228, "grad_norm": 0.3673913461509335, "learning_rate": 4.655433360418654e-06, "loss": 0.4347, "step": 15041 }, { "epoch": 2.4703261962925707, "grad_norm": 0.6614279208112778, "learning_rate": 4.65496466721657e-06, "loss": 0.4454, "step": 15042 }, { "epoch": 2.4704904235009133, "grad_norm": 0.27867910155523756, "learning_rate": 4.654495972331418e-06, "loss": 0.4296, "step": 15043 }, { "epoch": 2.4706546507092564, "grad_norm": 0.41239281456362475, "learning_rate": 4.65402727576829e-06, "loss": 0.436, "step": 15044 }, { "epoch": 2.470818877917599, "grad_norm": 0.3045078974339018, "learning_rate": 4.653558577532274e-06, "loss": 0.431, "step": 15045 }, { "epoch": 2.4709831051259417, "grad_norm": 0.3162849530056356, "learning_rate": 4.653089877628458e-06, "loss": 0.4205, "step": 15046 }, { "epoch": 2.4711473323342843, "grad_norm": 0.29902768192013507, "learning_rate": 4.652621176061936e-06, "loss": 0.4433, "step": 15047 }, { "epoch": 2.4713115595426274, "grad_norm": 0.31846117954541137, "learning_rate": 4.652152472837798e-06, "loss": 0.4314, "step": 15048 }, { "epoch": 2.47147578675097, "grad_norm": 0.2710159655321698, "learning_rate": 4.651683767961136e-06, "loss": 0.4532, "step": 15049 }, { "epoch": 2.4716400139593127, "grad_norm": 0.3420916679093024, "learning_rate": 4.651215061437038e-06, "loss": 0.4279, "step": 15050 }, { "epoch": 2.4718042411676553, "grad_norm": 0.27750671662652043, "learning_rate": 4.650746353270595e-06, "loss": 0.4153, "step": 15051 }, { "epoch": 2.4719684683759984, "grad_norm": 0.3031546521062631, "learning_rate": 4.650277643466899e-06, "loss": 0.42, "step": 15052 }, { "epoch": 2.472132695584341, "grad_norm": 0.3337669951335687, "learning_rate": 4.64980893203104e-06, "loss": 0.4325, "step": 15053 }, { "epoch": 2.4722969227926836, "grad_norm": 0.2946419765296823, "learning_rate": 4.64934021896811e-06, "loss": 0.4389, "step": 15054 }, { "epoch": 2.4724611500010263, "grad_norm": 0.6566714479694089, "learning_rate": 4.648871504283196e-06, "loss": 0.4273, "step": 15055 }, { "epoch": 2.4726253772093694, "grad_norm": 0.4074657555880319, "learning_rate": 4.6484027879813905e-06, "loss": 0.4383, "step": 15056 }, { "epoch": 2.472789604417712, "grad_norm": 0.27201573725819184, "learning_rate": 4.6479340700677865e-06, "loss": 0.4318, "step": 15057 }, { "epoch": 2.4729538316260546, "grad_norm": 0.26289819417609633, "learning_rate": 4.647465350547473e-06, "loss": 0.4327, "step": 15058 }, { "epoch": 2.4731180588343973, "grad_norm": 0.3174931715785728, "learning_rate": 4.64699662942554e-06, "loss": 0.4239, "step": 15059 }, { "epoch": 2.47328228604274, "grad_norm": 0.2919191884008475, "learning_rate": 4.64652790670708e-06, "loss": 0.457, "step": 15060 }, { "epoch": 2.473446513251083, "grad_norm": 0.31143441804325606, "learning_rate": 4.646059182397181e-06, "loss": 0.4143, "step": 15061 }, { "epoch": 2.4736107404594256, "grad_norm": 0.3528192752492953, "learning_rate": 4.645590456500937e-06, "loss": 0.4373, "step": 15062 }, { "epoch": 2.4737749676677683, "grad_norm": 0.334288162317713, "learning_rate": 4.645121729023436e-06, "loss": 0.4261, "step": 15063 }, { "epoch": 2.473939194876111, "grad_norm": 0.49576018154807827, "learning_rate": 4.644652999969772e-06, "loss": 0.4409, "step": 15064 }, { "epoch": 2.474103422084454, "grad_norm": 0.2739197448585547, "learning_rate": 4.644184269345033e-06, "loss": 0.4462, "step": 15065 }, { "epoch": 2.4742676492927966, "grad_norm": 0.3100637983651266, "learning_rate": 4.643715537154312e-06, "loss": 0.4469, "step": 15066 }, { "epoch": 2.4744318765011393, "grad_norm": 0.3359245159861427, "learning_rate": 4.643246803402698e-06, "loss": 0.4606, "step": 15067 }, { "epoch": 2.474596103709482, "grad_norm": 0.384617259495692, "learning_rate": 4.642778068095282e-06, "loss": 0.4261, "step": 15068 }, { "epoch": 2.474760330917825, "grad_norm": 0.29517197642369547, "learning_rate": 4.642309331237157e-06, "loss": 0.429, "step": 15069 }, { "epoch": 2.4749245581261676, "grad_norm": 0.32753736877117223, "learning_rate": 4.641840592833413e-06, "loss": 0.4431, "step": 15070 }, { "epoch": 2.4750887853345103, "grad_norm": 0.2791454602503895, "learning_rate": 4.641371852889139e-06, "loss": 0.4071, "step": 15071 }, { "epoch": 2.475253012542853, "grad_norm": 0.37888047503031486, "learning_rate": 4.640903111409428e-06, "loss": 0.4417, "step": 15072 }, { "epoch": 2.475417239751196, "grad_norm": 0.366354730985903, "learning_rate": 4.640434368399371e-06, "loss": 0.4554, "step": 15073 }, { "epoch": 2.4755814669595386, "grad_norm": 0.31104150908583245, "learning_rate": 4.63996562386406e-06, "loss": 0.4289, "step": 15074 }, { "epoch": 2.4757456941678813, "grad_norm": 0.37956424179769677, "learning_rate": 4.6394968778085825e-06, "loss": 0.4193, "step": 15075 }, { "epoch": 2.475909921376224, "grad_norm": 0.335801656073786, "learning_rate": 4.6390281302380325e-06, "loss": 0.4467, "step": 15076 }, { "epoch": 2.4760741485845665, "grad_norm": 0.2643865236365438, "learning_rate": 4.6385593811575e-06, "loss": 0.4419, "step": 15077 }, { "epoch": 2.4762383757929096, "grad_norm": 0.30764530125320155, "learning_rate": 4.638090630572076e-06, "loss": 0.4202, "step": 15078 }, { "epoch": 2.4764026030012523, "grad_norm": 0.32420305420903595, "learning_rate": 4.637621878486853e-06, "loss": 0.4522, "step": 15079 }, { "epoch": 2.476566830209595, "grad_norm": 0.3363197644758546, "learning_rate": 4.63715312490692e-06, "loss": 0.4223, "step": 15080 }, { "epoch": 2.4767310574179375, "grad_norm": 0.3091369151927837, "learning_rate": 4.636684369837368e-06, "loss": 0.4365, "step": 15081 }, { "epoch": 2.4768952846262806, "grad_norm": 0.29132422243990463, "learning_rate": 4.636215613283291e-06, "loss": 0.4294, "step": 15082 }, { "epoch": 2.4770595118346233, "grad_norm": 0.32071462618152996, "learning_rate": 4.635746855249779e-06, "loss": 0.4581, "step": 15083 }, { "epoch": 2.477223739042966, "grad_norm": 0.39568575870858036, "learning_rate": 4.635278095741922e-06, "loss": 0.4408, "step": 15084 }, { "epoch": 2.4773879662513085, "grad_norm": 0.2705314149096496, "learning_rate": 4.634809334764811e-06, "loss": 0.4407, "step": 15085 }, { "epoch": 2.4775521934596516, "grad_norm": 0.4562209852816167, "learning_rate": 4.634340572323538e-06, "loss": 0.4238, "step": 15086 }, { "epoch": 2.4777164206679942, "grad_norm": 0.35692718672318324, "learning_rate": 4.633871808423195e-06, "loss": 0.4366, "step": 15087 }, { "epoch": 2.477880647876337, "grad_norm": 0.353983621673839, "learning_rate": 4.633403043068873e-06, "loss": 0.4546, "step": 15088 }, { "epoch": 2.4780448750846795, "grad_norm": 0.42116045761854776, "learning_rate": 4.632934276265661e-06, "loss": 0.4482, "step": 15089 }, { "epoch": 2.4782091022930226, "grad_norm": 0.3206255592467727, "learning_rate": 4.6324655080186524e-06, "loss": 0.4248, "step": 15090 }, { "epoch": 2.4783733295013652, "grad_norm": 0.2792993323261359, "learning_rate": 4.6319967383329395e-06, "loss": 0.425, "step": 15091 }, { "epoch": 2.478537556709708, "grad_norm": 0.2843568794714767, "learning_rate": 4.631527967213611e-06, "loss": 0.4416, "step": 15092 }, { "epoch": 2.4787017839180505, "grad_norm": 0.27163007119109167, "learning_rate": 4.631059194665759e-06, "loss": 0.4263, "step": 15093 }, { "epoch": 2.478866011126393, "grad_norm": 0.285132682533487, "learning_rate": 4.630590420694475e-06, "loss": 0.451, "step": 15094 }, { "epoch": 2.4790302383347362, "grad_norm": 0.2962666477282085, "learning_rate": 4.630121645304853e-06, "loss": 0.4149, "step": 15095 }, { "epoch": 2.479194465543079, "grad_norm": 0.3436820673601864, "learning_rate": 4.629652868501979e-06, "loss": 0.4354, "step": 15096 }, { "epoch": 2.4793586927514215, "grad_norm": 0.30779119066240046, "learning_rate": 4.629184090290948e-06, "loss": 0.4039, "step": 15097 }, { "epoch": 2.479522919959764, "grad_norm": 0.30109729627360404, "learning_rate": 4.628715310676851e-06, "loss": 0.4364, "step": 15098 }, { "epoch": 2.4796871471681072, "grad_norm": 0.34213472664573985, "learning_rate": 4.62824652966478e-06, "loss": 0.4024, "step": 15099 }, { "epoch": 2.47985137437645, "grad_norm": 0.3312074651939912, "learning_rate": 4.627777747259825e-06, "loss": 0.4494, "step": 15100 }, { "epoch": 2.4800156015847925, "grad_norm": 0.3918133083512165, "learning_rate": 4.627308963467076e-06, "loss": 0.4383, "step": 15101 }, { "epoch": 2.480179828793135, "grad_norm": 0.40037398856188533, "learning_rate": 4.6268401782916285e-06, "loss": 0.4496, "step": 15102 }, { "epoch": 2.4803440560014782, "grad_norm": 0.29368723208387454, "learning_rate": 4.6263713917385725e-06, "loss": 0.4546, "step": 15103 }, { "epoch": 2.480508283209821, "grad_norm": 0.39442925356351577, "learning_rate": 4.625902603812998e-06, "loss": 0.4396, "step": 15104 }, { "epoch": 2.4806725104181635, "grad_norm": 0.49391288466102, "learning_rate": 4.625433814519997e-06, "loss": 0.4461, "step": 15105 }, { "epoch": 2.480836737626506, "grad_norm": 0.6507872211415915, "learning_rate": 4.624965023864661e-06, "loss": 0.4257, "step": 15106 }, { "epoch": 2.481000964834849, "grad_norm": 0.3170404081913076, "learning_rate": 4.624496231852082e-06, "loss": 0.4303, "step": 15107 }, { "epoch": 2.481165192043192, "grad_norm": 0.8010381914422225, "learning_rate": 4.624027438487352e-06, "loss": 0.4287, "step": 15108 }, { "epoch": 2.4813294192515345, "grad_norm": 0.3175103487015811, "learning_rate": 4.623558643775561e-06, "loss": 0.4396, "step": 15109 }, { "epoch": 2.481493646459877, "grad_norm": 0.2780316156388087, "learning_rate": 4.623089847721803e-06, "loss": 0.4386, "step": 15110 }, { "epoch": 2.4816578736682198, "grad_norm": 0.9957906981950299, "learning_rate": 4.622621050331167e-06, "loss": 0.4382, "step": 15111 }, { "epoch": 2.481822100876563, "grad_norm": 0.3119134050343224, "learning_rate": 4.622152251608747e-06, "loss": 0.41, "step": 15112 }, { "epoch": 2.4819863280849055, "grad_norm": 0.3165098283105381, "learning_rate": 4.621683451559633e-06, "loss": 0.4261, "step": 15113 }, { "epoch": 2.482150555293248, "grad_norm": 0.3120583440078844, "learning_rate": 4.621214650188916e-06, "loss": 0.4349, "step": 15114 }, { "epoch": 2.4823147825015908, "grad_norm": 0.2738497164372407, "learning_rate": 4.620745847501689e-06, "loss": 0.4321, "step": 15115 }, { "epoch": 2.482479009709934, "grad_norm": 0.36008005991934383, "learning_rate": 4.620277043503044e-06, "loss": 0.4417, "step": 15116 }, { "epoch": 2.4826432369182765, "grad_norm": 0.31989619467996416, "learning_rate": 4.619808238198072e-06, "loss": 0.446, "step": 15117 }, { "epoch": 2.482807464126619, "grad_norm": 0.6824453394065131, "learning_rate": 4.619339431591864e-06, "loss": 0.4383, "step": 15118 }, { "epoch": 2.4829716913349618, "grad_norm": 0.37095029373258626, "learning_rate": 4.618870623689512e-06, "loss": 0.4583, "step": 15119 }, { "epoch": 2.483135918543305, "grad_norm": 0.33314170061608483, "learning_rate": 4.618401814496109e-06, "loss": 0.4348, "step": 15120 }, { "epoch": 2.4833001457516475, "grad_norm": 0.33636320733646125, "learning_rate": 4.617933004016744e-06, "loss": 0.4554, "step": 15121 }, { "epoch": 2.48346437295999, "grad_norm": 1.0196784103267829, "learning_rate": 4.617464192256513e-06, "loss": 0.4434, "step": 15122 }, { "epoch": 2.4836286001683328, "grad_norm": 0.3193716312140261, "learning_rate": 4.616995379220504e-06, "loss": 0.43, "step": 15123 }, { "epoch": 2.483792827376676, "grad_norm": 0.288516069215822, "learning_rate": 4.616526564913811e-06, "loss": 0.434, "step": 15124 }, { "epoch": 2.4839570545850185, "grad_norm": 0.29912771431081314, "learning_rate": 4.616057749341524e-06, "loss": 0.4475, "step": 15125 }, { "epoch": 2.484121281793361, "grad_norm": 0.2733627283094918, "learning_rate": 4.615588932508735e-06, "loss": 0.4382, "step": 15126 }, { "epoch": 2.4842855090017038, "grad_norm": 0.3603048759548791, "learning_rate": 4.615120114420538e-06, "loss": 0.4283, "step": 15127 }, { "epoch": 2.4844497362100464, "grad_norm": 0.3743687907784263, "learning_rate": 4.6146512950820225e-06, "loss": 0.448, "step": 15128 }, { "epoch": 2.4846139634183895, "grad_norm": 0.3108138141975132, "learning_rate": 4.614182474498282e-06, "loss": 0.4537, "step": 15129 }, { "epoch": 2.484778190626732, "grad_norm": 0.3197077167691703, "learning_rate": 4.613713652674406e-06, "loss": 0.438, "step": 15130 }, { "epoch": 2.4849424178350747, "grad_norm": 0.4391802841487158, "learning_rate": 4.613244829615488e-06, "loss": 0.4311, "step": 15131 }, { "epoch": 2.4851066450434174, "grad_norm": 0.2920981537029958, "learning_rate": 4.612776005326621e-06, "loss": 0.4237, "step": 15132 }, { "epoch": 2.4852708722517605, "grad_norm": 0.29646873635485227, "learning_rate": 4.612307179812896e-06, "loss": 0.4341, "step": 15133 }, { "epoch": 2.485435099460103, "grad_norm": 0.29841283343957575, "learning_rate": 4.611838353079403e-06, "loss": 0.4303, "step": 15134 }, { "epoch": 2.4855993266684457, "grad_norm": 0.3250609621900752, "learning_rate": 4.611369525131235e-06, "loss": 0.4257, "step": 15135 }, { "epoch": 2.4857635538767884, "grad_norm": 0.35478496086877537, "learning_rate": 4.610900695973485e-06, "loss": 0.4379, "step": 15136 }, { "epoch": 2.4859277810851315, "grad_norm": 0.4670594900199308, "learning_rate": 4.610431865611247e-06, "loss": 0.4517, "step": 15137 }, { "epoch": 2.486092008293474, "grad_norm": 0.31415101552518854, "learning_rate": 4.609963034049607e-06, "loss": 0.4298, "step": 15138 }, { "epoch": 2.4862562355018167, "grad_norm": 0.38284691709415497, "learning_rate": 4.609494201293661e-06, "loss": 0.43, "step": 15139 }, { "epoch": 2.4864204627101594, "grad_norm": 0.3092641007937, "learning_rate": 4.6090253673484995e-06, "loss": 0.421, "step": 15140 }, { "epoch": 2.4865846899185025, "grad_norm": 0.35972316348470473, "learning_rate": 4.608556532219216e-06, "loss": 0.4597, "step": 15141 }, { "epoch": 2.486748917126845, "grad_norm": 0.368530624076118, "learning_rate": 4.608087695910903e-06, "loss": 0.4258, "step": 15142 }, { "epoch": 2.4869131443351877, "grad_norm": 0.303106049447321, "learning_rate": 4.6076188584286505e-06, "loss": 0.4249, "step": 15143 }, { "epoch": 2.4870773715435304, "grad_norm": 0.48973722250178475, "learning_rate": 4.607150019777551e-06, "loss": 0.4328, "step": 15144 }, { "epoch": 2.487241598751873, "grad_norm": 0.314524236592061, "learning_rate": 4.606681179962697e-06, "loss": 0.4424, "step": 15145 }, { "epoch": 2.487405825960216, "grad_norm": 0.5027401543850439, "learning_rate": 4.60621233898918e-06, "loss": 0.4372, "step": 15146 }, { "epoch": 2.4875700531685587, "grad_norm": 0.29423642278198925, "learning_rate": 4.605743496862093e-06, "loss": 0.4217, "step": 15147 }, { "epoch": 2.4877342803769014, "grad_norm": 0.31679517006738683, "learning_rate": 4.605274653586526e-06, "loss": 0.4495, "step": 15148 }, { "epoch": 2.487898507585244, "grad_norm": 0.26487858401307063, "learning_rate": 4.604805809167574e-06, "loss": 0.4323, "step": 15149 }, { "epoch": 2.488062734793587, "grad_norm": 0.32293708597952736, "learning_rate": 4.604336963610328e-06, "loss": 0.4254, "step": 15150 }, { "epoch": 2.4882269620019297, "grad_norm": 0.28820496702549153, "learning_rate": 4.60386811691988e-06, "loss": 0.4368, "step": 15151 }, { "epoch": 2.4883911892102724, "grad_norm": 0.27518127880599397, "learning_rate": 4.6033992691013225e-06, "loss": 0.4453, "step": 15152 }, { "epoch": 2.488555416418615, "grad_norm": 0.3493994813337365, "learning_rate": 4.6029304201597456e-06, "loss": 0.4322, "step": 15153 }, { "epoch": 2.488719643626958, "grad_norm": 0.3251525156968644, "learning_rate": 4.602461570100246e-06, "loss": 0.4246, "step": 15154 }, { "epoch": 2.4888838708353007, "grad_norm": 0.4157430492921045, "learning_rate": 4.6019927189279096e-06, "loss": 0.4371, "step": 15155 }, { "epoch": 2.4890480980436434, "grad_norm": 0.30758658627784186, "learning_rate": 4.601523866647834e-06, "loss": 0.4267, "step": 15156 }, { "epoch": 2.489212325251986, "grad_norm": 0.33772718364386484, "learning_rate": 4.601055013265109e-06, "loss": 0.4342, "step": 15157 }, { "epoch": 2.489376552460329, "grad_norm": 0.36400549487339234, "learning_rate": 4.6005861587848264e-06, "loss": 0.4364, "step": 15158 }, { "epoch": 2.4895407796686717, "grad_norm": 0.4213966493331531, "learning_rate": 4.600117303212079e-06, "loss": 0.3879, "step": 15159 }, { "epoch": 2.4897050068770143, "grad_norm": 0.3207069643365226, "learning_rate": 4.5996484465519594e-06, "loss": 0.4352, "step": 15160 }, { "epoch": 2.489869234085357, "grad_norm": 0.30520069096946645, "learning_rate": 4.599179588809561e-06, "loss": 0.4323, "step": 15161 }, { "epoch": 2.4900334612936996, "grad_norm": 0.4020668168700281, "learning_rate": 4.598710729989974e-06, "loss": 0.4535, "step": 15162 }, { "epoch": 2.4901976885020427, "grad_norm": 0.42245364077029657, "learning_rate": 4.5982418700982905e-06, "loss": 0.4525, "step": 15163 }, { "epoch": 2.4903619157103853, "grad_norm": 0.8362459834605354, "learning_rate": 4.597773009139604e-06, "loss": 0.4362, "step": 15164 }, { "epoch": 2.490526142918728, "grad_norm": 0.3449582767880722, "learning_rate": 4.597304147119006e-06, "loss": 0.4339, "step": 15165 }, { "epoch": 2.4906903701270706, "grad_norm": 0.3066916301852268, "learning_rate": 4.5968352840415904e-06, "loss": 0.4309, "step": 15166 }, { "epoch": 2.4908545973354137, "grad_norm": 0.5106999824766285, "learning_rate": 4.596366419912448e-06, "loss": 0.4531, "step": 15167 }, { "epoch": 2.4910188245437563, "grad_norm": 0.34081483839278837, "learning_rate": 4.595897554736671e-06, "loss": 0.4411, "step": 15168 }, { "epoch": 2.491183051752099, "grad_norm": 0.3703165743951746, "learning_rate": 4.5954286885193514e-06, "loss": 0.4255, "step": 15169 }, { "epoch": 2.4913472789604416, "grad_norm": 0.2788333157420013, "learning_rate": 4.5949598212655845e-06, "loss": 0.4364, "step": 15170 }, { "epoch": 2.4915115061687847, "grad_norm": 0.35393728188332935, "learning_rate": 4.594490952980459e-06, "loss": 0.4183, "step": 15171 }, { "epoch": 2.4916757333771273, "grad_norm": 0.6014347064530057, "learning_rate": 4.594022083669069e-06, "loss": 0.4263, "step": 15172 }, { "epoch": 2.49183996058547, "grad_norm": 0.3243854776951079, "learning_rate": 4.593553213336507e-06, "loss": 0.4318, "step": 15173 }, { "epoch": 2.4920041877938126, "grad_norm": 0.392814323494369, "learning_rate": 4.593084341987864e-06, "loss": 0.4218, "step": 15174 }, { "epoch": 2.4921684150021557, "grad_norm": 0.34098247165368967, "learning_rate": 4.592615469628235e-06, "loss": 0.4641, "step": 15175 }, { "epoch": 2.4923326422104983, "grad_norm": 0.35972202448556473, "learning_rate": 4.59214659626271e-06, "loss": 0.4248, "step": 15176 }, { "epoch": 2.492496869418841, "grad_norm": 0.2892343843339758, "learning_rate": 4.591677721896382e-06, "loss": 0.415, "step": 15177 }, { "epoch": 2.4926610966271836, "grad_norm": 0.3099738318685391, "learning_rate": 4.591208846534344e-06, "loss": 0.4426, "step": 15178 }, { "epoch": 2.4928253238355262, "grad_norm": 0.3571184083633324, "learning_rate": 4.590739970181689e-06, "loss": 0.4301, "step": 15179 }, { "epoch": 2.4929895510438693, "grad_norm": 0.33853164613700226, "learning_rate": 4.590271092843507e-06, "loss": 0.449, "step": 15180 }, { "epoch": 2.493153778252212, "grad_norm": 0.30508015812979095, "learning_rate": 4.589802214524896e-06, "loss": 0.4246, "step": 15181 }, { "epoch": 2.4933180054605546, "grad_norm": 0.3546083751853678, "learning_rate": 4.58933333523094e-06, "loss": 0.4307, "step": 15182 }, { "epoch": 2.4934822326688972, "grad_norm": 0.34676750407362766, "learning_rate": 4.5888644549667384e-06, "loss": 0.4382, "step": 15183 }, { "epoch": 2.4936464598772403, "grad_norm": 0.33336637439983086, "learning_rate": 4.58839557373738e-06, "loss": 0.4363, "step": 15184 }, { "epoch": 2.493810687085583, "grad_norm": 0.3562136027603175, "learning_rate": 4.58792669154796e-06, "loss": 0.4254, "step": 15185 }, { "epoch": 2.4939749142939256, "grad_norm": 0.30767491608825387, "learning_rate": 4.587457808403569e-06, "loss": 0.4312, "step": 15186 }, { "epoch": 2.4941391415022682, "grad_norm": 0.3717157302565822, "learning_rate": 4.586988924309302e-06, "loss": 0.4342, "step": 15187 }, { "epoch": 2.4943033687106113, "grad_norm": 0.31073508635900254, "learning_rate": 4.586520039270247e-06, "loss": 0.4286, "step": 15188 }, { "epoch": 2.494467595918954, "grad_norm": 0.300935391463759, "learning_rate": 4.5860511532915e-06, "loss": 0.429, "step": 15189 }, { "epoch": 2.4946318231272966, "grad_norm": 0.2891538888734539, "learning_rate": 4.585582266378153e-06, "loss": 0.4273, "step": 15190 }, { "epoch": 2.4947960503356392, "grad_norm": 0.3636855265250306, "learning_rate": 4.5851133785353e-06, "loss": 0.4359, "step": 15191 }, { "epoch": 2.4949602775439823, "grad_norm": 0.31238609339074075, "learning_rate": 4.584644489768029e-06, "loss": 0.4496, "step": 15192 }, { "epoch": 2.495124504752325, "grad_norm": 0.3466335457330843, "learning_rate": 4.584175600081438e-06, "loss": 0.4251, "step": 15193 }, { "epoch": 2.4952887319606676, "grad_norm": 0.28513115942321365, "learning_rate": 4.583706709480615e-06, "loss": 0.4303, "step": 15194 }, { "epoch": 2.49545295916901, "grad_norm": 0.3313604508733959, "learning_rate": 4.5832378179706564e-06, "loss": 0.4239, "step": 15195 }, { "epoch": 2.495617186377353, "grad_norm": 0.2735952782125291, "learning_rate": 4.582768925556653e-06, "loss": 0.4314, "step": 15196 }, { "epoch": 2.495781413585696, "grad_norm": 0.3199061503551554, "learning_rate": 4.582300032243698e-06, "loss": 0.4437, "step": 15197 }, { "epoch": 2.4959456407940386, "grad_norm": 0.2941447415940853, "learning_rate": 4.581831138036882e-06, "loss": 0.4434, "step": 15198 }, { "epoch": 2.496109868002381, "grad_norm": 0.3355786824721521, "learning_rate": 4.5813622429413e-06, "loss": 0.4277, "step": 15199 }, { "epoch": 2.496274095210724, "grad_norm": 0.28103705919862076, "learning_rate": 4.580893346962045e-06, "loss": 0.4314, "step": 15200 }, { "epoch": 2.496438322419067, "grad_norm": 0.3227157508902223, "learning_rate": 4.5804244501042085e-06, "loss": 0.4493, "step": 15201 }, { "epoch": 2.4966025496274096, "grad_norm": 0.2970871193039058, "learning_rate": 4.5799555523728824e-06, "loss": 0.42, "step": 15202 }, { "epoch": 2.496766776835752, "grad_norm": 0.4342254727787912, "learning_rate": 4.57948665377316e-06, "loss": 0.4333, "step": 15203 }, { "epoch": 2.496931004044095, "grad_norm": 0.3422407517356136, "learning_rate": 4.579017754310136e-06, "loss": 0.4373, "step": 15204 }, { "epoch": 2.497095231252438, "grad_norm": 0.32443689801201475, "learning_rate": 4.578548853988901e-06, "loss": 0.4196, "step": 15205 }, { "epoch": 2.4972594584607806, "grad_norm": 0.3213615230691932, "learning_rate": 4.578079952814547e-06, "loss": 0.4284, "step": 15206 }, { "epoch": 2.497423685669123, "grad_norm": 0.30105959294998685, "learning_rate": 4.577611050792169e-06, "loss": 0.4037, "step": 15207 }, { "epoch": 2.497587912877466, "grad_norm": 0.29610221993478114, "learning_rate": 4.577142147926859e-06, "loss": 0.448, "step": 15208 }, { "epoch": 2.497752140085809, "grad_norm": 0.300053251348721, "learning_rate": 4.576673244223709e-06, "loss": 0.4284, "step": 15209 }, { "epoch": 2.4979163672941516, "grad_norm": 0.4108333131032182, "learning_rate": 4.576204339687812e-06, "loss": 0.4393, "step": 15210 }, { "epoch": 2.498080594502494, "grad_norm": 0.42413555863246477, "learning_rate": 4.57573543432426e-06, "loss": 0.4404, "step": 15211 }, { "epoch": 2.498244821710837, "grad_norm": 0.3744832314644596, "learning_rate": 4.5752665281381474e-06, "loss": 0.427, "step": 15212 }, { "epoch": 2.4984090489191795, "grad_norm": 0.5036243765745887, "learning_rate": 4.574797621134566e-06, "loss": 0.4456, "step": 15213 }, { "epoch": 2.4985732761275226, "grad_norm": 0.39270734558141457, "learning_rate": 4.574328713318609e-06, "loss": 0.4252, "step": 15214 }, { "epoch": 2.498737503335865, "grad_norm": 0.29570628940704496, "learning_rate": 4.57385980469537e-06, "loss": 0.4527, "step": 15215 }, { "epoch": 2.498901730544208, "grad_norm": 0.7676602433712431, "learning_rate": 4.573390895269941e-06, "loss": 0.44, "step": 15216 }, { "epoch": 2.4990659577525505, "grad_norm": 0.6201610602230498, "learning_rate": 4.572921985047413e-06, "loss": 0.4306, "step": 15217 }, { "epoch": 2.4992301849608936, "grad_norm": 1.715363184871039, "learning_rate": 4.572453074032881e-06, "loss": 0.4368, "step": 15218 }, { "epoch": 2.499394412169236, "grad_norm": 0.3603458735228863, "learning_rate": 4.571984162231437e-06, "loss": 0.4428, "step": 15219 }, { "epoch": 2.499558639377579, "grad_norm": 0.29670745837822965, "learning_rate": 4.571515249648174e-06, "loss": 0.408, "step": 15220 }, { "epoch": 2.4997228665859215, "grad_norm": 0.28863059782458106, "learning_rate": 4.571046336288186e-06, "loss": 0.4034, "step": 15221 }, { "epoch": 2.4998870937942645, "grad_norm": 0.3231624496999091, "learning_rate": 4.570577422156564e-06, "loss": 0.4262, "step": 15222 }, { "epoch": 2.500051321002607, "grad_norm": 0.28074500386612955, "learning_rate": 4.570108507258403e-06, "loss": 0.4477, "step": 15223 }, { "epoch": 2.50021554821095, "grad_norm": 0.2709352897613838, "learning_rate": 4.569639591598794e-06, "loss": 0.4358, "step": 15224 }, { "epoch": 2.5003797754192925, "grad_norm": 0.28514705073201974, "learning_rate": 4.569170675182831e-06, "loss": 0.4236, "step": 15225 }, { "epoch": 2.5005440026276355, "grad_norm": 0.2873450628834571, "learning_rate": 4.5687017580156055e-06, "loss": 0.4324, "step": 15226 }, { "epoch": 2.500708229835978, "grad_norm": 0.29247613976893794, "learning_rate": 4.568232840102211e-06, "loss": 0.4417, "step": 15227 }, { "epoch": 2.500872457044321, "grad_norm": 0.3039572928244555, "learning_rate": 4.567763921447741e-06, "loss": 0.424, "step": 15228 }, { "epoch": 2.5010366842526635, "grad_norm": 0.3498139299117389, "learning_rate": 4.56729500205729e-06, "loss": 0.4579, "step": 15229 }, { "epoch": 2.501200911461006, "grad_norm": 0.35502698847985975, "learning_rate": 4.566826081935947e-06, "loss": 0.4223, "step": 15230 }, { "epoch": 2.501365138669349, "grad_norm": 0.3317335064782728, "learning_rate": 4.566357161088808e-06, "loss": 0.4263, "step": 15231 }, { "epoch": 2.501529365877692, "grad_norm": 0.3393369552996207, "learning_rate": 4.565888239520963e-06, "loss": 0.4415, "step": 15232 }, { "epoch": 2.5016935930860345, "grad_norm": 0.40649164473995997, "learning_rate": 4.56541931723751e-06, "loss": 0.4196, "step": 15233 }, { "epoch": 2.5018578202943775, "grad_norm": 0.5651714899939198, "learning_rate": 4.564950394243538e-06, "loss": 0.43, "step": 15234 }, { "epoch": 2.50202204750272, "grad_norm": 0.2903708190570917, "learning_rate": 4.564481470544139e-06, "loss": 0.4101, "step": 15235 }, { "epoch": 2.502186274711063, "grad_norm": 0.407942080989367, "learning_rate": 4.564012546144409e-06, "loss": 0.4586, "step": 15236 }, { "epoch": 2.5023505019194054, "grad_norm": 0.30899819635242237, "learning_rate": 4.56354362104944e-06, "loss": 0.4397, "step": 15237 }, { "epoch": 2.502514729127748, "grad_norm": 0.3462923651598003, "learning_rate": 4.563074695264324e-06, "loss": 0.4532, "step": 15238 }, { "epoch": 2.5026789563360907, "grad_norm": 0.3220893341552208, "learning_rate": 4.562605768794156e-06, "loss": 0.4317, "step": 15239 }, { "epoch": 2.502843183544434, "grad_norm": 0.31180577802122095, "learning_rate": 4.562136841644027e-06, "loss": 0.4425, "step": 15240 }, { "epoch": 2.5030074107527764, "grad_norm": 0.2764546692709857, "learning_rate": 4.561667913819031e-06, "loss": 0.4437, "step": 15241 }, { "epoch": 2.503171637961119, "grad_norm": 0.31019314339615184, "learning_rate": 4.56119898532426e-06, "loss": 0.4204, "step": 15242 }, { "epoch": 2.503335865169462, "grad_norm": 0.3931365921905194, "learning_rate": 4.560730056164808e-06, "loss": 0.4276, "step": 15243 }, { "epoch": 2.503500092377805, "grad_norm": 0.3767687885431783, "learning_rate": 4.560261126345769e-06, "loss": 0.4344, "step": 15244 }, { "epoch": 2.5036643195861474, "grad_norm": 0.35056231533364646, "learning_rate": 4.5597921958722336e-06, "loss": 0.439, "step": 15245 }, { "epoch": 2.50382854679449, "grad_norm": 0.34796478913559775, "learning_rate": 4.559323264749297e-06, "loss": 0.4379, "step": 15246 }, { "epoch": 2.5039927740028327, "grad_norm": 0.364929157109696, "learning_rate": 4.5588543329820504e-06, "loss": 0.435, "step": 15247 }, { "epoch": 2.504157001211176, "grad_norm": 0.30504356991755455, "learning_rate": 4.5583854005755875e-06, "loss": 0.4206, "step": 15248 }, { "epoch": 2.5043212284195184, "grad_norm": 0.33326552576505075, "learning_rate": 4.5579164675350035e-06, "loss": 0.4366, "step": 15249 }, { "epoch": 2.504485455627861, "grad_norm": 0.30634993507320807, "learning_rate": 4.557447533865389e-06, "loss": 0.424, "step": 15250 }, { "epoch": 2.504649682836204, "grad_norm": 0.3015603465660569, "learning_rate": 4.556978599571838e-06, "loss": 0.4437, "step": 15251 }, { "epoch": 2.504813910044547, "grad_norm": 0.404288094881335, "learning_rate": 4.556509664659441e-06, "loss": 0.4333, "step": 15252 }, { "epoch": 2.5049781372528894, "grad_norm": 0.26762791113011236, "learning_rate": 4.556040729133297e-06, "loss": 0.4433, "step": 15253 }, { "epoch": 2.505142364461232, "grad_norm": 0.3917995761448291, "learning_rate": 4.5555717929984945e-06, "loss": 0.42, "step": 15254 }, { "epoch": 2.5053065916695747, "grad_norm": 0.43586340257632006, "learning_rate": 4.5551028562601255e-06, "loss": 0.441, "step": 15255 }, { "epoch": 2.5054708188779173, "grad_norm": 0.5715050248374042, "learning_rate": 4.554633918923287e-06, "loss": 0.4354, "step": 15256 }, { "epoch": 2.5056350460862604, "grad_norm": 0.363662712363827, "learning_rate": 4.554164980993069e-06, "loss": 0.4175, "step": 15257 }, { "epoch": 2.505799273294603, "grad_norm": 0.3369993889608383, "learning_rate": 4.553696042474569e-06, "loss": 0.4322, "step": 15258 }, { "epoch": 2.5059635005029457, "grad_norm": 0.3051343689476861, "learning_rate": 4.5532271033728745e-06, "loss": 0.4337, "step": 15259 }, { "epoch": 2.5061277277112888, "grad_norm": 0.34664385637290773, "learning_rate": 4.552758163693083e-06, "loss": 0.4378, "step": 15260 }, { "epoch": 2.5062919549196314, "grad_norm": 0.38369436861216527, "learning_rate": 4.552289223440284e-06, "loss": 0.425, "step": 15261 }, { "epoch": 2.506456182127974, "grad_norm": 0.31098976283094065, "learning_rate": 4.551820282619575e-06, "loss": 0.4392, "step": 15262 }, { "epoch": 2.5066204093363167, "grad_norm": 0.2924212913716923, "learning_rate": 4.551351341236044e-06, "loss": 0.4387, "step": 15263 }, { "epoch": 2.5067846365446593, "grad_norm": 0.3614952525688465, "learning_rate": 4.55088239929479e-06, "loss": 0.4477, "step": 15264 }, { "epoch": 2.5069488637530024, "grad_norm": 0.31854411737094895, "learning_rate": 4.5504134568009e-06, "loss": 0.447, "step": 15265 }, { "epoch": 2.507113090961345, "grad_norm": 0.33338819590292956, "learning_rate": 4.549944513759473e-06, "loss": 0.4543, "step": 15266 }, { "epoch": 2.5072773181696877, "grad_norm": 0.37005363688745724, "learning_rate": 4.549475570175597e-06, "loss": 0.4504, "step": 15267 }, { "epoch": 2.5074415453780308, "grad_norm": 0.31931176261796645, "learning_rate": 4.5490066260543694e-06, "loss": 0.4491, "step": 15268 }, { "epoch": 2.5076057725863734, "grad_norm": 0.4028200604365174, "learning_rate": 4.548537681400881e-06, "loss": 0.4354, "step": 15269 }, { "epoch": 2.507769999794716, "grad_norm": 0.34431494305601784, "learning_rate": 4.548068736220224e-06, "loss": 0.4327, "step": 15270 }, { "epoch": 2.5079342270030587, "grad_norm": 0.3359105652446257, "learning_rate": 4.547599790517496e-06, "loss": 0.4452, "step": 15271 }, { "epoch": 2.5080984542114013, "grad_norm": 0.36606184117765694, "learning_rate": 4.547130844297786e-06, "loss": 0.4168, "step": 15272 }, { "epoch": 2.508262681419744, "grad_norm": 0.46539092867000703, "learning_rate": 4.546661897566189e-06, "loss": 0.43, "step": 15273 }, { "epoch": 2.508426908628087, "grad_norm": 0.3260456914724407, "learning_rate": 4.546192950327797e-06, "loss": 0.4333, "step": 15274 }, { "epoch": 2.5085911358364297, "grad_norm": 0.431282609159347, "learning_rate": 4.545724002587706e-06, "loss": 0.4328, "step": 15275 }, { "epoch": 2.5087553630447723, "grad_norm": 0.31874399961113387, "learning_rate": 4.5452550543510055e-06, "loss": 0.4409, "step": 15276 }, { "epoch": 2.5089195902531154, "grad_norm": 0.36712748209494483, "learning_rate": 4.544786105622791e-06, "loss": 0.4079, "step": 15277 }, { "epoch": 2.509083817461458, "grad_norm": 0.2955054728370622, "learning_rate": 4.544317156408157e-06, "loss": 0.433, "step": 15278 }, { "epoch": 2.5092480446698007, "grad_norm": 0.31795398888195725, "learning_rate": 4.543848206712193e-06, "loss": 0.4335, "step": 15279 }, { "epoch": 2.5094122718781433, "grad_norm": 0.327166579049544, "learning_rate": 4.543379256539995e-06, "loss": 0.4405, "step": 15280 }, { "epoch": 2.509576499086486, "grad_norm": 0.29009950534624457, "learning_rate": 4.542910305896655e-06, "loss": 0.4215, "step": 15281 }, { "epoch": 2.509740726294829, "grad_norm": 0.5040405319999883, "learning_rate": 4.542441354787268e-06, "loss": 0.4146, "step": 15282 }, { "epoch": 2.5099049535031717, "grad_norm": 0.3522597372682301, "learning_rate": 4.541972403216927e-06, "loss": 0.4387, "step": 15283 }, { "epoch": 2.5100691807115143, "grad_norm": 0.2726311387047777, "learning_rate": 4.5415034511907226e-06, "loss": 0.4354, "step": 15284 }, { "epoch": 2.5102334079198574, "grad_norm": 0.302638717120387, "learning_rate": 4.5410344987137496e-06, "loss": 0.435, "step": 15285 }, { "epoch": 2.5103976351282, "grad_norm": 0.2967323251347567, "learning_rate": 4.5405655457911026e-06, "loss": 0.4407, "step": 15286 }, { "epoch": 2.5105618623365427, "grad_norm": 0.31331167849939856, "learning_rate": 4.540096592427874e-06, "loss": 0.4412, "step": 15287 }, { "epoch": 2.5107260895448853, "grad_norm": 0.2815771015808261, "learning_rate": 4.539627638629157e-06, "loss": 0.4126, "step": 15288 }, { "epoch": 2.510890316753228, "grad_norm": 0.4327857806293426, "learning_rate": 4.539158684400044e-06, "loss": 0.4141, "step": 15289 }, { "epoch": 2.5110545439615706, "grad_norm": 0.33791685760518037, "learning_rate": 4.538689729745629e-06, "loss": 0.4324, "step": 15290 }, { "epoch": 2.5112187711699137, "grad_norm": 0.3428719333544963, "learning_rate": 4.538220774671005e-06, "loss": 0.4483, "step": 15291 }, { "epoch": 2.5113829983782563, "grad_norm": 0.30672105224945806, "learning_rate": 4.537751819181268e-06, "loss": 0.4369, "step": 15292 }, { "epoch": 2.511547225586599, "grad_norm": 0.6634715199384176, "learning_rate": 4.537282863281509e-06, "loss": 0.428, "step": 15293 }, { "epoch": 2.511711452794942, "grad_norm": 0.31872231581495836, "learning_rate": 4.536813906976819e-06, "loss": 0.433, "step": 15294 }, { "epoch": 2.5118756800032846, "grad_norm": 0.3231534958659712, "learning_rate": 4.536344950272295e-06, "loss": 0.4355, "step": 15295 }, { "epoch": 2.5120399072116273, "grad_norm": 0.2757083960884838, "learning_rate": 4.535875993173029e-06, "loss": 0.424, "step": 15296 }, { "epoch": 2.51220413441997, "grad_norm": 0.3142120617565392, "learning_rate": 4.535407035684115e-06, "loss": 0.4487, "step": 15297 }, { "epoch": 2.5123683616283126, "grad_norm": 0.34568721423469645, "learning_rate": 4.534938077810646e-06, "loss": 0.4384, "step": 15298 }, { "epoch": 2.5125325888366556, "grad_norm": 0.3676976562163103, "learning_rate": 4.534469119557714e-06, "loss": 0.4517, "step": 15299 }, { "epoch": 2.5126968160449983, "grad_norm": 0.4372151567267689, "learning_rate": 4.534000160930414e-06, "loss": 0.4505, "step": 15300 }, { "epoch": 2.512861043253341, "grad_norm": 0.38459047394944434, "learning_rate": 4.5335312019338385e-06, "loss": 0.4281, "step": 15301 }, { "epoch": 2.513025270461684, "grad_norm": 0.2934325348012631, "learning_rate": 4.533062242573082e-06, "loss": 0.4287, "step": 15302 }, { "epoch": 2.5131894976700266, "grad_norm": 0.3307628319172223, "learning_rate": 4.532593282853236e-06, "loss": 0.4387, "step": 15303 }, { "epoch": 2.5133537248783693, "grad_norm": 0.5152030404954767, "learning_rate": 4.532124322779395e-06, "loss": 0.4185, "step": 15304 }, { "epoch": 2.513517952086712, "grad_norm": 0.29601355750979563, "learning_rate": 4.531655362356652e-06, "loss": 0.4424, "step": 15305 }, { "epoch": 2.5136821792950546, "grad_norm": 0.33875656452253916, "learning_rate": 4.531186401590102e-06, "loss": 0.4533, "step": 15306 }, { "epoch": 2.513846406503397, "grad_norm": 0.2905312012969298, "learning_rate": 4.530717440484836e-06, "loss": 0.4417, "step": 15307 }, { "epoch": 2.5140106337117403, "grad_norm": 0.45733470068768756, "learning_rate": 4.530248479045949e-06, "loss": 0.4504, "step": 15308 }, { "epoch": 2.514174860920083, "grad_norm": 0.3232055665647335, "learning_rate": 4.529779517278533e-06, "loss": 0.4336, "step": 15309 }, { "epoch": 2.5143390881284255, "grad_norm": 0.32595716015001, "learning_rate": 4.529310555187681e-06, "loss": 0.4239, "step": 15310 }, { "epoch": 2.5145033153367686, "grad_norm": 0.32457344849632785, "learning_rate": 4.528841592778489e-06, "loss": 0.4606, "step": 15311 }, { "epoch": 2.5146675425451113, "grad_norm": 0.4409322797051764, "learning_rate": 4.528372630056049e-06, "loss": 0.4443, "step": 15312 }, { "epoch": 2.514831769753454, "grad_norm": 0.3427318010419413, "learning_rate": 4.527903667025455e-06, "loss": 0.4069, "step": 15313 }, { "epoch": 2.5149959969617965, "grad_norm": 0.3094199668034755, "learning_rate": 4.527434703691799e-06, "loss": 0.4165, "step": 15314 }, { "epoch": 2.515160224170139, "grad_norm": 0.3129531860016772, "learning_rate": 4.526965740060174e-06, "loss": 0.4453, "step": 15315 }, { "epoch": 2.5153244513784823, "grad_norm": 0.36141679423305373, "learning_rate": 4.526496776135675e-06, "loss": 0.458, "step": 15316 }, { "epoch": 2.515488678586825, "grad_norm": 0.32103349571056183, "learning_rate": 4.526027811923398e-06, "loss": 0.4344, "step": 15317 }, { "epoch": 2.5156529057951675, "grad_norm": 0.35520418639316653, "learning_rate": 4.52555884742843e-06, "loss": 0.4542, "step": 15318 }, { "epoch": 2.5158171330035106, "grad_norm": 0.38140314268757286, "learning_rate": 4.525089882655868e-06, "loss": 0.4279, "step": 15319 }, { "epoch": 2.5159813602118533, "grad_norm": 0.3587596025920061, "learning_rate": 4.524620917610805e-06, "loss": 0.4487, "step": 15320 }, { "epoch": 2.516145587420196, "grad_norm": 0.31918676746353725, "learning_rate": 4.524151952298336e-06, "loss": 0.4269, "step": 15321 }, { "epoch": 2.5163098146285385, "grad_norm": 0.3006162767702321, "learning_rate": 4.523682986723553e-06, "loss": 0.4487, "step": 15322 }, { "epoch": 2.516474041836881, "grad_norm": 0.35412734411765195, "learning_rate": 4.523214020891549e-06, "loss": 0.4191, "step": 15323 }, { "epoch": 2.516638269045224, "grad_norm": 0.5091874206900988, "learning_rate": 4.5227450548074165e-06, "loss": 0.4503, "step": 15324 }, { "epoch": 2.516802496253567, "grad_norm": 0.2851578874900258, "learning_rate": 4.522276088476253e-06, "loss": 0.4352, "step": 15325 }, { "epoch": 2.5169667234619095, "grad_norm": 0.9997601491535643, "learning_rate": 4.5218071219031476e-06, "loss": 0.4468, "step": 15326 }, { "epoch": 2.517130950670252, "grad_norm": 0.29348842306959855, "learning_rate": 4.5213381550931955e-06, "loss": 0.4525, "step": 15327 }, { "epoch": 2.5172951778785952, "grad_norm": 0.2674552211461208, "learning_rate": 4.52086918805149e-06, "loss": 0.4416, "step": 15328 }, { "epoch": 2.517459405086938, "grad_norm": 0.2949016922161546, "learning_rate": 4.5204002207831255e-06, "loss": 0.4262, "step": 15329 }, { "epoch": 2.5176236322952805, "grad_norm": 0.32887885088298996, "learning_rate": 4.519931253293194e-06, "loss": 0.4439, "step": 15330 }, { "epoch": 2.517787859503623, "grad_norm": 0.44021383142814, "learning_rate": 4.519462285586789e-06, "loss": 0.444, "step": 15331 }, { "epoch": 2.517952086711966, "grad_norm": 0.3943727299935503, "learning_rate": 4.518993317669005e-06, "loss": 0.4211, "step": 15332 }, { "epoch": 2.518116313920309, "grad_norm": 0.46339240270561693, "learning_rate": 4.5185243495449346e-06, "loss": 0.4441, "step": 15333 }, { "epoch": 2.5182805411286515, "grad_norm": 0.2996537058649584, "learning_rate": 4.518055381219671e-06, "loss": 0.4177, "step": 15334 }, { "epoch": 2.518444768336994, "grad_norm": 0.38842685502754193, "learning_rate": 4.517586412698308e-06, "loss": 0.4356, "step": 15335 }, { "epoch": 2.5186089955453372, "grad_norm": 0.45335596462941113, "learning_rate": 4.517117443985942e-06, "loss": 0.4299, "step": 15336 }, { "epoch": 2.51877322275368, "grad_norm": 0.3111046050312757, "learning_rate": 4.516648475087662e-06, "loss": 0.4326, "step": 15337 }, { "epoch": 2.5189374499620225, "grad_norm": 0.3004520651914907, "learning_rate": 4.516179506008563e-06, "loss": 0.4291, "step": 15338 }, { "epoch": 2.519101677170365, "grad_norm": 0.30623244552605644, "learning_rate": 4.515710536753737e-06, "loss": 0.4452, "step": 15339 }, { "epoch": 2.519265904378708, "grad_norm": 0.29111646298893507, "learning_rate": 4.515241567328281e-06, "loss": 0.4212, "step": 15340 }, { "epoch": 2.5194301315870504, "grad_norm": 0.292066653111252, "learning_rate": 4.514772597737286e-06, "loss": 0.4405, "step": 15341 }, { "epoch": 2.5195943587953935, "grad_norm": 0.45941378411880673, "learning_rate": 4.514303627985848e-06, "loss": 0.4301, "step": 15342 }, { "epoch": 2.519758586003736, "grad_norm": 0.36884625842137847, "learning_rate": 4.513834658079056e-06, "loss": 0.4311, "step": 15343 }, { "epoch": 2.519922813212079, "grad_norm": 0.3267743781596869, "learning_rate": 4.513365688022006e-06, "loss": 0.4263, "step": 15344 }, { "epoch": 2.520087040420422, "grad_norm": 0.4746571514823164, "learning_rate": 4.512896717819792e-06, "loss": 0.4373, "step": 15345 }, { "epoch": 2.5202512676287645, "grad_norm": 0.3153602181102855, "learning_rate": 4.512427747477508e-06, "loss": 0.4146, "step": 15346 }, { "epoch": 2.520415494837107, "grad_norm": 0.46102400221620937, "learning_rate": 4.511958777000246e-06, "loss": 0.4219, "step": 15347 }, { "epoch": 2.5205797220454498, "grad_norm": 0.35545244939339854, "learning_rate": 4.5114898063931e-06, "loss": 0.4453, "step": 15348 }, { "epoch": 2.5207439492537924, "grad_norm": 0.3778700125325818, "learning_rate": 4.5110208356611625e-06, "loss": 0.4593, "step": 15349 }, { "epoch": 2.5209081764621355, "grad_norm": 0.43565446974417477, "learning_rate": 4.510551864809529e-06, "loss": 0.4214, "step": 15350 }, { "epoch": 2.521072403670478, "grad_norm": 0.3153237572414692, "learning_rate": 4.5100828938432915e-06, "loss": 0.4309, "step": 15351 }, { "epoch": 2.5212366308788208, "grad_norm": 0.3206707578049655, "learning_rate": 4.509613922767543e-06, "loss": 0.4325, "step": 15352 }, { "epoch": 2.521400858087164, "grad_norm": 0.2925015730081957, "learning_rate": 4.509144951587378e-06, "loss": 0.4275, "step": 15353 }, { "epoch": 2.5215650852955065, "grad_norm": 0.29391592396077754, "learning_rate": 4.508675980307891e-06, "loss": 0.4336, "step": 15354 }, { "epoch": 2.521729312503849, "grad_norm": 0.3576499616385502, "learning_rate": 4.508207008934173e-06, "loss": 0.4186, "step": 15355 }, { "epoch": 2.5218935397121918, "grad_norm": 0.2790704744061716, "learning_rate": 4.50773803747132e-06, "loss": 0.4364, "step": 15356 }, { "epoch": 2.5220577669205344, "grad_norm": 0.2741415789587045, "learning_rate": 4.507269065924424e-06, "loss": 0.419, "step": 15357 }, { "epoch": 2.522221994128877, "grad_norm": 0.31922742530373754, "learning_rate": 4.506800094298579e-06, "loss": 0.426, "step": 15358 }, { "epoch": 2.52238622133722, "grad_norm": 0.30382675560648514, "learning_rate": 4.506331122598877e-06, "loss": 0.4343, "step": 15359 }, { "epoch": 2.5225504485455628, "grad_norm": 0.37896547303248035, "learning_rate": 4.505862150830413e-06, "loss": 0.4466, "step": 15360 }, { "epoch": 2.5227146757539054, "grad_norm": 0.3183774699805361, "learning_rate": 4.505393178998282e-06, "loss": 0.4527, "step": 15361 }, { "epoch": 2.5228789029622485, "grad_norm": 0.2846751786737191, "learning_rate": 4.504924207107573e-06, "loss": 0.4336, "step": 15362 }, { "epoch": 2.523043130170591, "grad_norm": 0.3478034808054333, "learning_rate": 4.504455235163385e-06, "loss": 0.4453, "step": 15363 }, { "epoch": 2.5232073573789338, "grad_norm": 0.2946001247345606, "learning_rate": 4.503986263170807e-06, "loss": 0.4382, "step": 15364 }, { "epoch": 2.5233715845872764, "grad_norm": 0.28990216086748805, "learning_rate": 4.503517291134936e-06, "loss": 0.4231, "step": 15365 }, { "epoch": 2.523535811795619, "grad_norm": 0.3804646638287557, "learning_rate": 4.503048319060862e-06, "loss": 0.4286, "step": 15366 }, { "epoch": 2.523700039003962, "grad_norm": 2.0413952323858604, "learning_rate": 4.502579346953682e-06, "loss": 0.445, "step": 15367 }, { "epoch": 2.5238642662123048, "grad_norm": 0.5064912268763407, "learning_rate": 4.5021103748184865e-06, "loss": 0.4368, "step": 15368 }, { "epoch": 2.5240284934206474, "grad_norm": 0.3052118739719438, "learning_rate": 4.50164140266037e-06, "loss": 0.4349, "step": 15369 }, { "epoch": 2.5241927206289905, "grad_norm": 0.3198249052491719, "learning_rate": 4.501172430484426e-06, "loss": 0.4264, "step": 15370 }, { "epoch": 2.524356947837333, "grad_norm": 0.5187552386652747, "learning_rate": 4.5007034582957505e-06, "loss": 0.4362, "step": 15371 }, { "epoch": 2.5245211750456757, "grad_norm": 0.2570604115758852, "learning_rate": 4.500234486099433e-06, "loss": 0.4435, "step": 15372 }, { "epoch": 2.5246854022540184, "grad_norm": 0.27968517151321504, "learning_rate": 4.499765513900568e-06, "loss": 0.4406, "step": 15373 }, { "epoch": 2.524849629462361, "grad_norm": 0.33267787718052455, "learning_rate": 4.499296541704251e-06, "loss": 0.4247, "step": 15374 }, { "epoch": 2.5250138566707037, "grad_norm": 0.3596710570956569, "learning_rate": 4.498827569515574e-06, "loss": 0.4202, "step": 15375 }, { "epoch": 2.5251780838790467, "grad_norm": 0.3257269029831687, "learning_rate": 4.49835859733963e-06, "loss": 0.4417, "step": 15376 }, { "epoch": 2.5253423110873894, "grad_norm": 0.4355993030836781, "learning_rate": 4.4978896251815155e-06, "loss": 0.4288, "step": 15377 }, { "epoch": 2.525506538295732, "grad_norm": 0.32559613958151995, "learning_rate": 4.49742065304632e-06, "loss": 0.4051, "step": 15378 }, { "epoch": 2.525670765504075, "grad_norm": 0.35226942817405693, "learning_rate": 4.496951680939139e-06, "loss": 0.4098, "step": 15379 }, { "epoch": 2.5258349927124177, "grad_norm": 0.31375777585851555, "learning_rate": 4.496482708865065e-06, "loss": 0.4481, "step": 15380 }, { "epoch": 2.5259992199207604, "grad_norm": 0.27628561903807153, "learning_rate": 4.496013736829193e-06, "loss": 0.4393, "step": 15381 }, { "epoch": 2.526163447129103, "grad_norm": 0.3308393282880393, "learning_rate": 4.495544764836616e-06, "loss": 0.4384, "step": 15382 }, { "epoch": 2.5263276743374457, "grad_norm": 0.3388520225022834, "learning_rate": 4.495075792892426e-06, "loss": 0.4407, "step": 15383 }, { "epoch": 2.5264919015457887, "grad_norm": 0.2992347879039684, "learning_rate": 4.494606821001719e-06, "loss": 0.4266, "step": 15384 }, { "epoch": 2.5266561287541314, "grad_norm": 0.3280214871252539, "learning_rate": 4.4941378491695864e-06, "loss": 0.4408, "step": 15385 }, { "epoch": 2.526820355962474, "grad_norm": 0.31297700953904556, "learning_rate": 4.493668877401125e-06, "loss": 0.4237, "step": 15386 }, { "epoch": 2.526984583170817, "grad_norm": 0.3584916739969418, "learning_rate": 4.493199905701423e-06, "loss": 0.4183, "step": 15387 }, { "epoch": 2.5271488103791597, "grad_norm": 0.26870273735724437, "learning_rate": 4.492730934075577e-06, "loss": 0.4187, "step": 15388 }, { "epoch": 2.5273130375875024, "grad_norm": 0.3480358281774671, "learning_rate": 4.492261962528681e-06, "loss": 0.4448, "step": 15389 }, { "epoch": 2.527477264795845, "grad_norm": 0.3162081780616614, "learning_rate": 4.491792991065828e-06, "loss": 0.4443, "step": 15390 }, { "epoch": 2.5276414920041876, "grad_norm": 0.2870245734500193, "learning_rate": 4.49132401969211e-06, "loss": 0.4417, "step": 15391 }, { "epoch": 2.5278057192125303, "grad_norm": 0.2879612790355667, "learning_rate": 4.490855048412621e-06, "loss": 0.4515, "step": 15392 }, { "epoch": 2.5279699464208734, "grad_norm": 0.3525720241438474, "learning_rate": 4.490386077232457e-06, "loss": 0.4362, "step": 15393 }, { "epoch": 2.528134173629216, "grad_norm": 0.34886617957552496, "learning_rate": 4.48991710615671e-06, "loss": 0.4293, "step": 15394 }, { "epoch": 2.5282984008375586, "grad_norm": 0.33907897924004743, "learning_rate": 4.489448135190472e-06, "loss": 0.4473, "step": 15395 }, { "epoch": 2.5284626280459017, "grad_norm": 0.3035129464459638, "learning_rate": 4.4889791643388385e-06, "loss": 0.4349, "step": 15396 }, { "epoch": 2.5286268552542444, "grad_norm": 0.30303534555734757, "learning_rate": 4.488510193606901e-06, "loss": 0.4357, "step": 15397 }, { "epoch": 2.528791082462587, "grad_norm": 0.790153063740717, "learning_rate": 4.4880412229997546e-06, "loss": 0.427, "step": 15398 }, { "epoch": 2.5289553096709296, "grad_norm": 0.3511038371128071, "learning_rate": 4.487572252522493e-06, "loss": 0.4357, "step": 15399 }, { "epoch": 2.5291195368792723, "grad_norm": 0.41092611732691453, "learning_rate": 4.4871032821802076e-06, "loss": 0.438, "step": 15400 }, { "epoch": 2.5292837640876153, "grad_norm": 0.3450527596392091, "learning_rate": 4.4866343119779936e-06, "loss": 0.456, "step": 15401 }, { "epoch": 2.529447991295958, "grad_norm": 0.31325138329002983, "learning_rate": 4.486165341920946e-06, "loss": 0.4486, "step": 15402 }, { "epoch": 2.5296122185043006, "grad_norm": 0.4049086399645973, "learning_rate": 4.485696372014154e-06, "loss": 0.4273, "step": 15403 }, { "epoch": 2.5297764457126437, "grad_norm": 0.3570244695616439, "learning_rate": 4.485227402262715e-06, "loss": 0.4435, "step": 15404 }, { "epoch": 2.5299406729209863, "grad_norm": 0.26325068167244875, "learning_rate": 4.48475843267172e-06, "loss": 0.4536, "step": 15405 }, { "epoch": 2.530104900129329, "grad_norm": 0.4771733843902166, "learning_rate": 4.484289463246263e-06, "loss": 0.4402, "step": 15406 }, { "epoch": 2.5302691273376716, "grad_norm": 0.5682448574100989, "learning_rate": 4.483820493991438e-06, "loss": 0.4431, "step": 15407 }, { "epoch": 2.5304333545460143, "grad_norm": 0.4079954803323253, "learning_rate": 4.483351524912339e-06, "loss": 0.4283, "step": 15408 }, { "epoch": 2.530597581754357, "grad_norm": 0.3742329415968252, "learning_rate": 4.48288255601406e-06, "loss": 0.4355, "step": 15409 }, { "epoch": 2.5307618089627, "grad_norm": 0.31517860625091276, "learning_rate": 4.482413587301691e-06, "loss": 0.4219, "step": 15410 }, { "epoch": 2.5309260361710426, "grad_norm": 0.32624001883317155, "learning_rate": 4.48194461878033e-06, "loss": 0.4355, "step": 15411 }, { "epoch": 2.5310902633793853, "grad_norm": 0.4844251612004818, "learning_rate": 4.481475650455067e-06, "loss": 0.4177, "step": 15412 }, { "epoch": 2.5312544905877283, "grad_norm": 0.41499405644931575, "learning_rate": 4.481006682330996e-06, "loss": 0.4381, "step": 15413 }, { "epoch": 2.531418717796071, "grad_norm": 0.3254658251219828, "learning_rate": 4.480537714413212e-06, "loss": 0.4417, "step": 15414 }, { "epoch": 2.5315829450044136, "grad_norm": 0.28061612293188354, "learning_rate": 4.480068746706807e-06, "loss": 0.4429, "step": 15415 }, { "epoch": 2.5317471722127562, "grad_norm": 0.29524776058023783, "learning_rate": 4.479599779216875e-06, "loss": 0.436, "step": 15416 }, { "epoch": 2.531911399421099, "grad_norm": 0.275370031459336, "learning_rate": 4.47913081194851e-06, "loss": 0.4402, "step": 15417 }, { "epoch": 2.532075626629442, "grad_norm": 0.382911694822015, "learning_rate": 4.478661844906805e-06, "loss": 0.4273, "step": 15418 }, { "epoch": 2.5322398538377846, "grad_norm": 0.2856386139248631, "learning_rate": 4.4781928780968535e-06, "loss": 0.4315, "step": 15419 }, { "epoch": 2.5324040810461272, "grad_norm": 0.3662003334417495, "learning_rate": 4.477723911523749e-06, "loss": 0.4377, "step": 15420 }, { "epoch": 2.5325683082544703, "grad_norm": 0.40571562617701123, "learning_rate": 4.477254945192584e-06, "loss": 0.4411, "step": 15421 }, { "epoch": 2.532732535462813, "grad_norm": 0.33072718734849793, "learning_rate": 4.4767859791084525e-06, "loss": 0.4123, "step": 15422 }, { "epoch": 2.5328967626711556, "grad_norm": 0.31030981069280167, "learning_rate": 4.4763170132764474e-06, "loss": 0.4415, "step": 15423 }, { "epoch": 2.5330609898794982, "grad_norm": 0.3393961988335576, "learning_rate": 4.475848047701664e-06, "loss": 0.4484, "step": 15424 }, { "epoch": 2.533225217087841, "grad_norm": 0.3355243579966591, "learning_rate": 4.475379082389194e-06, "loss": 0.4345, "step": 15425 }, { "epoch": 2.5333894442961835, "grad_norm": 0.2739009867205716, "learning_rate": 4.474910117344132e-06, "loss": 0.4299, "step": 15426 }, { "epoch": 2.5335536715045266, "grad_norm": 0.30376055594266194, "learning_rate": 4.474441152571572e-06, "loss": 0.4146, "step": 15427 }, { "epoch": 2.5337178987128692, "grad_norm": 0.3275017172400074, "learning_rate": 4.473972188076604e-06, "loss": 0.421, "step": 15428 }, { "epoch": 2.533882125921212, "grad_norm": 0.32453479680479597, "learning_rate": 4.473503223864325e-06, "loss": 0.4396, "step": 15429 }, { "epoch": 2.534046353129555, "grad_norm": 0.48443152017160745, "learning_rate": 4.473034259939826e-06, "loss": 0.4342, "step": 15430 }, { "epoch": 2.5342105803378976, "grad_norm": 0.30446815466227956, "learning_rate": 4.472565296308202e-06, "loss": 0.4375, "step": 15431 }, { "epoch": 2.5343748075462402, "grad_norm": 0.28200424035613053, "learning_rate": 4.472096332974545e-06, "loss": 0.4148, "step": 15432 }, { "epoch": 2.534539034754583, "grad_norm": 0.27325398958681874, "learning_rate": 4.4716273699439515e-06, "loss": 0.4212, "step": 15433 }, { "epoch": 2.5347032619629255, "grad_norm": 0.2784740527403517, "learning_rate": 4.471158407221511e-06, "loss": 0.4329, "step": 15434 }, { "epoch": 2.5348674891712686, "grad_norm": 0.33152751718058054, "learning_rate": 4.470689444812318e-06, "loss": 0.4278, "step": 15435 }, { "epoch": 2.535031716379611, "grad_norm": 0.3668173661236447, "learning_rate": 4.470220482721469e-06, "loss": 0.4222, "step": 15436 }, { "epoch": 2.535195943587954, "grad_norm": 0.30285348260361367, "learning_rate": 4.469751520954053e-06, "loss": 0.4429, "step": 15437 }, { "epoch": 2.535360170796297, "grad_norm": 0.3455367713469772, "learning_rate": 4.469282559515165e-06, "loss": 0.4236, "step": 15438 }, { "epoch": 2.5355243980046396, "grad_norm": 0.2989934245140012, "learning_rate": 4.4688135984098994e-06, "loss": 0.4492, "step": 15439 }, { "epoch": 2.535688625212982, "grad_norm": 0.5199522243515112, "learning_rate": 4.468344637643349e-06, "loss": 0.4461, "step": 15440 }, { "epoch": 2.535852852421325, "grad_norm": 0.3047139296928439, "learning_rate": 4.467875677220605e-06, "loss": 0.456, "step": 15441 }, { "epoch": 2.5360170796296675, "grad_norm": 0.4383952993691737, "learning_rate": 4.467406717146764e-06, "loss": 0.4231, "step": 15442 }, { "epoch": 2.53618130683801, "grad_norm": 0.29397560831634006, "learning_rate": 4.466937757426919e-06, "loss": 0.4269, "step": 15443 }, { "epoch": 2.536345534046353, "grad_norm": 0.38538600677786994, "learning_rate": 4.4664687980661635e-06, "loss": 0.4527, "step": 15444 }, { "epoch": 2.536509761254696, "grad_norm": 0.374965751531902, "learning_rate": 4.465999839069588e-06, "loss": 0.4519, "step": 15445 }, { "epoch": 2.5366739884630385, "grad_norm": 0.3716477346667442, "learning_rate": 4.465530880442287e-06, "loss": 0.4409, "step": 15446 }, { "epoch": 2.5368382156713816, "grad_norm": 1.7589956201460324, "learning_rate": 4.4650619221893555e-06, "loss": 0.431, "step": 15447 }, { "epoch": 2.537002442879724, "grad_norm": 0.36684972649658787, "learning_rate": 4.464592964315886e-06, "loss": 0.4427, "step": 15448 }, { "epoch": 2.537166670088067, "grad_norm": 0.33423643193368346, "learning_rate": 4.464124006826971e-06, "loss": 0.4296, "step": 15449 }, { "epoch": 2.5373308972964095, "grad_norm": 0.3044931364167006, "learning_rate": 4.463655049727705e-06, "loss": 0.4352, "step": 15450 }, { "epoch": 2.537495124504752, "grad_norm": 0.30416152178632283, "learning_rate": 4.463186093023181e-06, "loss": 0.437, "step": 15451 }, { "epoch": 2.537659351713095, "grad_norm": 0.4942313735733957, "learning_rate": 4.462717136718494e-06, "loss": 0.4555, "step": 15452 }, { "epoch": 2.537823578921438, "grad_norm": 0.3510260293567135, "learning_rate": 4.462248180818733e-06, "loss": 0.4535, "step": 15453 }, { "epoch": 2.5379878061297805, "grad_norm": 0.3444097092072025, "learning_rate": 4.461779225328995e-06, "loss": 0.4235, "step": 15454 }, { "epoch": 2.5381520333381236, "grad_norm": 0.3186023690214023, "learning_rate": 4.461310270254372e-06, "loss": 0.4269, "step": 15455 }, { "epoch": 2.538316260546466, "grad_norm": 0.28283435452405786, "learning_rate": 4.4608413155999574e-06, "loss": 0.4461, "step": 15456 }, { "epoch": 2.538480487754809, "grad_norm": 0.2835693384007139, "learning_rate": 4.460372361370844e-06, "loss": 0.4179, "step": 15457 }, { "epoch": 2.5386447149631515, "grad_norm": 0.469588775510588, "learning_rate": 4.459903407572127e-06, "loss": 0.4278, "step": 15458 }, { "epoch": 2.538808942171494, "grad_norm": 0.31968267198673356, "learning_rate": 4.459434454208898e-06, "loss": 0.4319, "step": 15459 }, { "epoch": 2.5389731693798367, "grad_norm": 0.3139217810294942, "learning_rate": 4.45896550128625e-06, "loss": 0.4486, "step": 15460 }, { "epoch": 2.53913739658818, "grad_norm": 0.6289982528503534, "learning_rate": 4.458496548809279e-06, "loss": 0.4383, "step": 15461 }, { "epoch": 2.5393016237965225, "grad_norm": 0.43248669042514953, "learning_rate": 4.458027596783075e-06, "loss": 0.4298, "step": 15462 }, { "epoch": 2.539465851004865, "grad_norm": 0.32175232275524884, "learning_rate": 4.457558645212733e-06, "loss": 0.4305, "step": 15463 }, { "epoch": 2.539630078213208, "grad_norm": 0.33913465217808964, "learning_rate": 4.457089694103345e-06, "loss": 0.4423, "step": 15464 }, { "epoch": 2.539794305421551, "grad_norm": 0.4217029864914242, "learning_rate": 4.456620743460005e-06, "loss": 0.4327, "step": 15465 }, { "epoch": 2.5399585326298935, "grad_norm": 0.32107328028587867, "learning_rate": 4.456151793287807e-06, "loss": 0.4595, "step": 15466 }, { "epoch": 2.540122759838236, "grad_norm": 0.4056818357852612, "learning_rate": 4.455682843591845e-06, "loss": 0.4314, "step": 15467 }, { "epoch": 2.5402869870465787, "grad_norm": 0.3285580483437489, "learning_rate": 4.455213894377208e-06, "loss": 0.4394, "step": 15468 }, { "epoch": 2.540451214254922, "grad_norm": 0.4463962845057985, "learning_rate": 4.454744945648996e-06, "loss": 0.4365, "step": 15469 }, { "epoch": 2.5406154414632645, "grad_norm": 0.5759978564993642, "learning_rate": 4.454275997412296e-06, "loss": 0.4314, "step": 15470 }, { "epoch": 2.540779668671607, "grad_norm": 0.30311744414915, "learning_rate": 4.453807049672203e-06, "loss": 0.4443, "step": 15471 }, { "epoch": 2.54094389587995, "grad_norm": 0.48319619600914393, "learning_rate": 4.453338102433812e-06, "loss": 0.4312, "step": 15472 }, { "epoch": 2.541108123088293, "grad_norm": 0.27961971412030734, "learning_rate": 4.452869155702215e-06, "loss": 0.439, "step": 15473 }, { "epoch": 2.5412723502966355, "grad_norm": 0.33938593418946583, "learning_rate": 4.452400209482505e-06, "loss": 0.419, "step": 15474 }, { "epoch": 2.541436577504978, "grad_norm": 0.31723679723348247, "learning_rate": 4.451931263779775e-06, "loss": 0.42, "step": 15475 }, { "epoch": 2.5416008047133207, "grad_norm": 0.3900582478708769, "learning_rate": 4.4514623185991195e-06, "loss": 0.4435, "step": 15476 }, { "epoch": 2.5417650319216634, "grad_norm": 0.3161165257957074, "learning_rate": 4.4509933739456325e-06, "loss": 0.4175, "step": 15477 }, { "epoch": 2.5419292591300064, "grad_norm": 0.3988098521207848, "learning_rate": 4.450524429824405e-06, "loss": 0.4453, "step": 15478 }, { "epoch": 2.542093486338349, "grad_norm": 0.32174868195541384, "learning_rate": 4.45005548624053e-06, "loss": 0.4372, "step": 15479 }, { "epoch": 2.5422577135466917, "grad_norm": 0.3045389565255562, "learning_rate": 4.449586543199101e-06, "loss": 0.4247, "step": 15480 }, { "epoch": 2.542421940755035, "grad_norm": 0.2908792710329188, "learning_rate": 4.4491176007052115e-06, "loss": 0.4271, "step": 15481 }, { "epoch": 2.5425861679633774, "grad_norm": 0.29144215488845143, "learning_rate": 4.448648658763957e-06, "loss": 0.4374, "step": 15482 }, { "epoch": 2.54275039517172, "grad_norm": 0.3977821028455875, "learning_rate": 4.448179717380426e-06, "loss": 0.4574, "step": 15483 }, { "epoch": 2.5429146223800627, "grad_norm": 0.33925302540685487, "learning_rate": 4.447710776559716e-06, "loss": 0.4403, "step": 15484 }, { "epoch": 2.5430788495884054, "grad_norm": 0.38977415792187947, "learning_rate": 4.447241836306917e-06, "loss": 0.4484, "step": 15485 }, { "epoch": 2.5432430767967484, "grad_norm": 0.3444752824234094, "learning_rate": 4.4467728966271265e-06, "loss": 0.4188, "step": 15486 }, { "epoch": 2.543407304005091, "grad_norm": 0.29387542378527076, "learning_rate": 4.446303957525432e-06, "loss": 0.4551, "step": 15487 }, { "epoch": 2.5435715312134337, "grad_norm": 0.3891893197024633, "learning_rate": 4.445835019006931e-06, "loss": 0.4435, "step": 15488 }, { "epoch": 2.543735758421777, "grad_norm": 0.5432113733163281, "learning_rate": 4.445366081076714e-06, "loss": 0.4286, "step": 15489 }, { "epoch": 2.5438999856301194, "grad_norm": 0.30458472444959955, "learning_rate": 4.444897143739875e-06, "loss": 0.4291, "step": 15490 }, { "epoch": 2.544064212838462, "grad_norm": 0.33335443953907484, "learning_rate": 4.444428207001507e-06, "loss": 0.4427, "step": 15491 }, { "epoch": 2.5442284400468047, "grad_norm": 0.3114285780540954, "learning_rate": 4.4439592708667044e-06, "loss": 0.4315, "step": 15492 }, { "epoch": 2.5443926672551473, "grad_norm": 0.33859893153267717, "learning_rate": 4.443490335340558e-06, "loss": 0.4292, "step": 15493 }, { "epoch": 2.54455689446349, "grad_norm": 0.34687445804031664, "learning_rate": 4.443021400428164e-06, "loss": 0.4318, "step": 15494 }, { "epoch": 2.544721121671833, "grad_norm": 0.2773802250144603, "learning_rate": 4.442552466134613e-06, "loss": 0.4369, "step": 15495 }, { "epoch": 2.5448853488801757, "grad_norm": 0.29535434743502614, "learning_rate": 4.4420835324649976e-06, "loss": 0.4346, "step": 15496 }, { "epoch": 2.5450495760885183, "grad_norm": 0.9360580914134653, "learning_rate": 4.441614599424413e-06, "loss": 0.4517, "step": 15497 }, { "epoch": 2.5452138032968614, "grad_norm": 0.7513840159695082, "learning_rate": 4.441145667017951e-06, "loss": 0.4466, "step": 15498 }, { "epoch": 2.545378030505204, "grad_norm": 0.34400842175373336, "learning_rate": 4.4406767352507045e-06, "loss": 0.4268, "step": 15499 }, { "epoch": 2.5455422577135467, "grad_norm": 0.36609020722625096, "learning_rate": 4.440207804127767e-06, "loss": 0.4322, "step": 15500 }, { "epoch": 2.5457064849218893, "grad_norm": 0.30316434981498774, "learning_rate": 4.439738873654232e-06, "loss": 0.4371, "step": 15501 }, { "epoch": 2.545870712130232, "grad_norm": 0.2875496740502021, "learning_rate": 4.439269943835192e-06, "loss": 0.4177, "step": 15502 }, { "epoch": 2.546034939338575, "grad_norm": 0.3142325124623637, "learning_rate": 4.438801014675742e-06, "loss": 0.4359, "step": 15503 }, { "epoch": 2.5461991665469177, "grad_norm": 0.3137356864008128, "learning_rate": 4.43833208618097e-06, "loss": 0.4371, "step": 15504 }, { "epoch": 2.5463633937552603, "grad_norm": 0.32170461352243107, "learning_rate": 4.437863158355975e-06, "loss": 0.4447, "step": 15505 }, { "epoch": 2.5465276209636034, "grad_norm": 0.3802915803414638, "learning_rate": 4.437394231205845e-06, "loss": 0.4345, "step": 15506 }, { "epoch": 2.546691848171946, "grad_norm": 0.31857101888559824, "learning_rate": 4.436925304735677e-06, "loss": 0.4309, "step": 15507 }, { "epoch": 2.5468560753802887, "grad_norm": 0.2584963933988361, "learning_rate": 4.4364563789505604e-06, "loss": 0.4443, "step": 15508 }, { "epoch": 2.5470203025886313, "grad_norm": 0.4038773729499395, "learning_rate": 4.435987453855591e-06, "loss": 0.4472, "step": 15509 }, { "epoch": 2.547184529796974, "grad_norm": 0.31802899292435854, "learning_rate": 4.43551852945586e-06, "loss": 0.4401, "step": 15510 }, { "epoch": 2.5473487570053166, "grad_norm": 0.3242892323716624, "learning_rate": 4.435049605756464e-06, "loss": 0.4332, "step": 15511 }, { "epoch": 2.5475129842136597, "grad_norm": 0.26797913518694794, "learning_rate": 4.434580682762491e-06, "loss": 0.4278, "step": 15512 }, { "epoch": 2.5476772114220023, "grad_norm": 0.44582865863931953, "learning_rate": 4.434111760479037e-06, "loss": 0.4533, "step": 15513 }, { "epoch": 2.547841438630345, "grad_norm": 0.3055931723597788, "learning_rate": 4.433642838911193e-06, "loss": 0.4502, "step": 15514 }, { "epoch": 2.548005665838688, "grad_norm": 0.3085446507289279, "learning_rate": 4.433173918064053e-06, "loss": 0.4381, "step": 15515 }, { "epoch": 2.5481698930470307, "grad_norm": 0.2799455160677543, "learning_rate": 4.432704997942711e-06, "loss": 0.4293, "step": 15516 }, { "epoch": 2.5483341202553733, "grad_norm": 0.3072559176696745, "learning_rate": 4.432236078552259e-06, "loss": 0.4313, "step": 15517 }, { "epoch": 2.548498347463716, "grad_norm": 0.2794966100213563, "learning_rate": 4.4317671598977885e-06, "loss": 0.4265, "step": 15518 }, { "epoch": 2.5486625746720586, "grad_norm": 0.28997197483099796, "learning_rate": 4.431298241984396e-06, "loss": 0.4536, "step": 15519 }, { "epoch": 2.5488268018804017, "grad_norm": 0.4573616100826798, "learning_rate": 4.430829324817171e-06, "loss": 0.4291, "step": 15520 }, { "epoch": 2.5489910290887443, "grad_norm": 0.4438472979662701, "learning_rate": 4.430360408401207e-06, "loss": 0.4267, "step": 15521 }, { "epoch": 2.549155256297087, "grad_norm": 0.2577708578255245, "learning_rate": 4.429891492741598e-06, "loss": 0.448, "step": 15522 }, { "epoch": 2.54931948350543, "grad_norm": 0.32911337426606396, "learning_rate": 4.429422577843436e-06, "loss": 0.4351, "step": 15523 }, { "epoch": 2.5494837107137727, "grad_norm": 0.3524767093716376, "learning_rate": 4.428953663711814e-06, "loss": 0.4165, "step": 15524 }, { "epoch": 2.5496479379221153, "grad_norm": 0.29196546007234, "learning_rate": 4.428484750351825e-06, "loss": 0.4344, "step": 15525 }, { "epoch": 2.549812165130458, "grad_norm": 0.298449530861429, "learning_rate": 4.428015837768563e-06, "loss": 0.4259, "step": 15526 }, { "epoch": 2.5499763923388006, "grad_norm": 0.6148962359432069, "learning_rate": 4.42754692596712e-06, "loss": 0.4403, "step": 15527 }, { "epoch": 2.550140619547143, "grad_norm": 0.3280661317620682, "learning_rate": 4.42707801495259e-06, "loss": 0.4275, "step": 15528 }, { "epoch": 2.5503048467554863, "grad_norm": 0.2837424072375426, "learning_rate": 4.426609104730062e-06, "loss": 0.4427, "step": 15529 }, { "epoch": 2.550469073963829, "grad_norm": 0.28041871680203856, "learning_rate": 4.426140195304631e-06, "loss": 0.4635, "step": 15530 }, { "epoch": 2.5506333011721716, "grad_norm": 0.350200009979362, "learning_rate": 4.425671286681392e-06, "loss": 0.4343, "step": 15531 }, { "epoch": 2.5507975283805147, "grad_norm": 0.32454220273887197, "learning_rate": 4.425202378865435e-06, "loss": 0.4614, "step": 15532 }, { "epoch": 2.5509617555888573, "grad_norm": 0.5367213364709559, "learning_rate": 4.424733471861853e-06, "loss": 0.4101, "step": 15533 }, { "epoch": 2.5511259827972, "grad_norm": 0.28676733476987987, "learning_rate": 4.42426456567574e-06, "loss": 0.4259, "step": 15534 }, { "epoch": 2.5512902100055426, "grad_norm": 0.2975809840115441, "learning_rate": 4.423795660312189e-06, "loss": 0.4426, "step": 15535 }, { "epoch": 2.551454437213885, "grad_norm": 0.3249328928419927, "learning_rate": 4.423326755776292e-06, "loss": 0.4473, "step": 15536 }, { "epoch": 2.5516186644222283, "grad_norm": 0.31184122620432747, "learning_rate": 4.422857852073143e-06, "loss": 0.4104, "step": 15537 }, { "epoch": 2.551782891630571, "grad_norm": 0.30635029912032957, "learning_rate": 4.422388949207831e-06, "loss": 0.4345, "step": 15538 }, { "epoch": 2.5519471188389136, "grad_norm": 0.33025971138949717, "learning_rate": 4.421920047185453e-06, "loss": 0.4492, "step": 15539 }, { "epoch": 2.5521113460472566, "grad_norm": 0.3181232184610691, "learning_rate": 4.421451146011099e-06, "loss": 0.4229, "step": 15540 }, { "epoch": 2.5522755732555993, "grad_norm": 0.28306483682463085, "learning_rate": 4.420982245689865e-06, "loss": 0.4365, "step": 15541 }, { "epoch": 2.552439800463942, "grad_norm": 0.34612517738918935, "learning_rate": 4.420513346226839e-06, "loss": 0.432, "step": 15542 }, { "epoch": 2.5526040276722846, "grad_norm": 0.30498092076014316, "learning_rate": 4.420044447627118e-06, "loss": 0.4353, "step": 15543 }, { "epoch": 2.552768254880627, "grad_norm": 0.3009496686032841, "learning_rate": 4.419575549895793e-06, "loss": 0.4316, "step": 15544 }, { "epoch": 2.55293248208897, "grad_norm": 0.29409546874362075, "learning_rate": 4.419106653037956e-06, "loss": 0.4305, "step": 15545 }, { "epoch": 2.553096709297313, "grad_norm": 0.30258945834332523, "learning_rate": 4.418637757058701e-06, "loss": 0.4276, "step": 15546 }, { "epoch": 2.5532609365056556, "grad_norm": 0.3330461796000069, "learning_rate": 4.418168861963119e-06, "loss": 0.4454, "step": 15547 }, { "epoch": 2.553425163713998, "grad_norm": 0.3405066873911021, "learning_rate": 4.4176999677563035e-06, "loss": 0.4283, "step": 15548 }, { "epoch": 2.5535893909223413, "grad_norm": 0.39288368153964764, "learning_rate": 4.417231074443347e-06, "loss": 0.4311, "step": 15549 }, { "epoch": 2.553753618130684, "grad_norm": 0.37034601558345265, "learning_rate": 4.416762182029344e-06, "loss": 0.432, "step": 15550 }, { "epoch": 2.5539178453390265, "grad_norm": 0.38462584820097667, "learning_rate": 4.416293290519385e-06, "loss": 0.452, "step": 15551 }, { "epoch": 2.554082072547369, "grad_norm": 0.42051310281220755, "learning_rate": 4.415824399918563e-06, "loss": 0.435, "step": 15552 }, { "epoch": 2.554246299755712, "grad_norm": 0.3166037955740742, "learning_rate": 4.4153555102319725e-06, "loss": 0.413, "step": 15553 }, { "epoch": 2.554410526964055, "grad_norm": 0.31043044492793076, "learning_rate": 4.414886621464702e-06, "loss": 0.4347, "step": 15554 }, { "epoch": 2.5545747541723975, "grad_norm": 0.34700798655471937, "learning_rate": 4.414417733621847e-06, "loss": 0.4421, "step": 15555 }, { "epoch": 2.55473898138074, "grad_norm": 0.2892813037690508, "learning_rate": 4.4139488467085004e-06, "loss": 0.4446, "step": 15556 }, { "epoch": 2.5549032085890833, "grad_norm": 0.3175899138027976, "learning_rate": 4.413479960729754e-06, "loss": 0.4314, "step": 15557 }, { "epoch": 2.555067435797426, "grad_norm": 0.3149807022034962, "learning_rate": 4.413011075690699e-06, "loss": 0.4423, "step": 15558 }, { "epoch": 2.5552316630057685, "grad_norm": 0.5347116578772787, "learning_rate": 4.41254219159643e-06, "loss": 0.4459, "step": 15559 }, { "epoch": 2.555395890214111, "grad_norm": 0.37843617764625054, "learning_rate": 4.41207330845204e-06, "loss": 0.4341, "step": 15560 }, { "epoch": 2.555560117422454, "grad_norm": 0.2762679102433087, "learning_rate": 4.411604426262621e-06, "loss": 0.4058, "step": 15561 }, { "epoch": 2.5557243446307965, "grad_norm": 0.30060861946870354, "learning_rate": 4.411135545033263e-06, "loss": 0.4443, "step": 15562 }, { "epoch": 2.5558885718391395, "grad_norm": 0.3800865842812888, "learning_rate": 4.41066666476906e-06, "loss": 0.43, "step": 15563 }, { "epoch": 2.556052799047482, "grad_norm": 0.25946722154162527, "learning_rate": 4.4101977854751055e-06, "loss": 0.4419, "step": 15564 }, { "epoch": 2.556217026255825, "grad_norm": 0.6759836164016616, "learning_rate": 4.409728907156493e-06, "loss": 0.4274, "step": 15565 }, { "epoch": 2.556381253464168, "grad_norm": 0.6234744045352152, "learning_rate": 4.409260029818312e-06, "loss": 0.4346, "step": 15566 }, { "epoch": 2.5565454806725105, "grad_norm": 0.38581918418779537, "learning_rate": 4.408791153465655e-06, "loss": 0.4232, "step": 15567 }, { "epoch": 2.556709707880853, "grad_norm": 0.29573823645262287, "learning_rate": 4.408322278103617e-06, "loss": 0.4193, "step": 15568 }, { "epoch": 2.556873935089196, "grad_norm": 0.3016718414824482, "learning_rate": 4.407853403737291e-06, "loss": 0.4545, "step": 15569 }, { "epoch": 2.5570381622975384, "grad_norm": 0.3891485274119814, "learning_rate": 4.407384530371766e-06, "loss": 0.4365, "step": 15570 }, { "epoch": 2.5572023895058815, "grad_norm": 0.2532739379279318, "learning_rate": 4.406915658012136e-06, "loss": 0.4317, "step": 15571 }, { "epoch": 2.557366616714224, "grad_norm": 0.29871548761299727, "learning_rate": 4.406446786663494e-06, "loss": 0.4403, "step": 15572 }, { "epoch": 2.557530843922567, "grad_norm": 0.35194308191729523, "learning_rate": 4.405977916330931e-06, "loss": 0.4441, "step": 15573 }, { "epoch": 2.55769507113091, "grad_norm": 0.3031216695491342, "learning_rate": 4.405509047019541e-06, "loss": 0.4154, "step": 15574 }, { "epoch": 2.5578592983392525, "grad_norm": 0.31930845237851674, "learning_rate": 4.4050401787344165e-06, "loss": 0.4454, "step": 15575 }, { "epoch": 2.558023525547595, "grad_norm": 0.33002408879010403, "learning_rate": 4.404571311480648e-06, "loss": 0.4434, "step": 15576 }, { "epoch": 2.558187752755938, "grad_norm": 0.30275352248274623, "learning_rate": 4.404102445263329e-06, "loss": 0.4509, "step": 15577 }, { "epoch": 2.5583519799642804, "grad_norm": 0.35335386445372424, "learning_rate": 4.4036335800875535e-06, "loss": 0.448, "step": 15578 }, { "epoch": 2.558516207172623, "grad_norm": 0.2800989999633153, "learning_rate": 4.403164715958411e-06, "loss": 0.425, "step": 15579 }, { "epoch": 2.558680434380966, "grad_norm": 0.3127782022895306, "learning_rate": 4.402695852880995e-06, "loss": 0.4137, "step": 15580 }, { "epoch": 2.558844661589309, "grad_norm": 0.3799146716087815, "learning_rate": 4.402226990860397e-06, "loss": 0.4266, "step": 15581 }, { "epoch": 2.5590088887976514, "grad_norm": 0.3673831939441783, "learning_rate": 4.401758129901711e-06, "loss": 0.4064, "step": 15582 }, { "epoch": 2.5591731160059945, "grad_norm": 0.3376274379342438, "learning_rate": 4.401289270010027e-06, "loss": 0.4233, "step": 15583 }, { "epoch": 2.559337343214337, "grad_norm": 0.3998414262198734, "learning_rate": 4.40082041119044e-06, "loss": 0.4275, "step": 15584 }, { "epoch": 2.55950157042268, "grad_norm": 0.3520853575209431, "learning_rate": 4.40035155344804e-06, "loss": 0.4616, "step": 15585 }, { "epoch": 2.5596657976310224, "grad_norm": 0.37561224309961333, "learning_rate": 4.399882696787922e-06, "loss": 0.4394, "step": 15586 }, { "epoch": 2.559830024839365, "grad_norm": 0.36754339045918716, "learning_rate": 4.399413841215175e-06, "loss": 0.4575, "step": 15587 }, { "epoch": 2.559994252047708, "grad_norm": 0.33783166500512124, "learning_rate": 4.398944986734892e-06, "loss": 0.4164, "step": 15588 }, { "epoch": 2.560158479256051, "grad_norm": 0.31452477254199035, "learning_rate": 4.398476133352167e-06, "loss": 0.4528, "step": 15589 }, { "epoch": 2.5603227064643934, "grad_norm": 0.2787399568684325, "learning_rate": 4.398007281072091e-06, "loss": 0.4209, "step": 15590 }, { "epoch": 2.5604869336727365, "grad_norm": 0.2767623218217474, "learning_rate": 4.397538429899756e-06, "loss": 0.414, "step": 15591 }, { "epoch": 2.560651160881079, "grad_norm": 0.28888844049323786, "learning_rate": 4.397069579840253e-06, "loss": 0.4542, "step": 15592 }, { "epoch": 2.5608153880894218, "grad_norm": 0.5864957714875929, "learning_rate": 4.396600730898677e-06, "loss": 0.4243, "step": 15593 }, { "epoch": 2.5609796152977644, "grad_norm": 0.38962154546595434, "learning_rate": 4.396131883080121e-06, "loss": 0.4387, "step": 15594 }, { "epoch": 2.561143842506107, "grad_norm": 0.27331680844407696, "learning_rate": 4.395663036389673e-06, "loss": 0.4464, "step": 15595 }, { "epoch": 2.5613080697144497, "grad_norm": 0.3069416039408074, "learning_rate": 4.395194190832426e-06, "loss": 0.4388, "step": 15596 }, { "epoch": 2.5614722969227928, "grad_norm": 0.3252988244608723, "learning_rate": 4.394725346413474e-06, "loss": 0.4312, "step": 15597 }, { "epoch": 2.5616365241311354, "grad_norm": 0.556385918534084, "learning_rate": 4.394256503137908e-06, "loss": 0.4547, "step": 15598 }, { "epoch": 2.561800751339478, "grad_norm": 0.3021824955470384, "learning_rate": 4.393787661010821e-06, "loss": 0.4296, "step": 15599 }, { "epoch": 2.561964978547821, "grad_norm": 0.2910937592004871, "learning_rate": 4.393318820037304e-06, "loss": 0.4649, "step": 15600 }, { "epoch": 2.5621292057561638, "grad_norm": 0.31504368710656216, "learning_rate": 4.39284998022245e-06, "loss": 0.4244, "step": 15601 }, { "epoch": 2.5622934329645064, "grad_norm": 0.3230087906700632, "learning_rate": 4.39238114157135e-06, "loss": 0.4366, "step": 15602 }, { "epoch": 2.562457660172849, "grad_norm": 0.33819297778333945, "learning_rate": 4.3919123040890985e-06, "loss": 0.4313, "step": 15603 }, { "epoch": 2.5626218873811917, "grad_norm": 0.28026153833271106, "learning_rate": 4.391443467780784e-06, "loss": 0.4233, "step": 15604 }, { "epoch": 2.5627861145895348, "grad_norm": 0.313441369830849, "learning_rate": 4.390974632651502e-06, "loss": 0.4295, "step": 15605 }, { "epoch": 2.5629503417978774, "grad_norm": 0.8718709717848913, "learning_rate": 4.3905057987063406e-06, "loss": 0.4238, "step": 15606 }, { "epoch": 2.56311456900622, "grad_norm": 0.26352651969380825, "learning_rate": 4.390036965950394e-06, "loss": 0.4334, "step": 15607 }, { "epoch": 2.563278796214563, "grad_norm": 0.2994463931598903, "learning_rate": 4.389568134388754e-06, "loss": 0.4317, "step": 15608 }, { "epoch": 2.5634430234229058, "grad_norm": 0.3646830270622841, "learning_rate": 4.389099304026515e-06, "loss": 0.4337, "step": 15609 }, { "epoch": 2.5636072506312484, "grad_norm": 0.5393460161367439, "learning_rate": 4.3886304748687644e-06, "loss": 0.4356, "step": 15610 }, { "epoch": 2.563771477839591, "grad_norm": 0.30025939172178034, "learning_rate": 4.388161646920599e-06, "loss": 0.4251, "step": 15611 }, { "epoch": 2.5639357050479337, "grad_norm": 0.31959158175100244, "learning_rate": 4.387692820187106e-06, "loss": 0.436, "step": 15612 }, { "epoch": 2.5640999322562763, "grad_norm": 0.29317675299532536, "learning_rate": 4.38722399467338e-06, "loss": 0.4383, "step": 15613 }, { "epoch": 2.5642641594646194, "grad_norm": 0.34975265052513155, "learning_rate": 4.386755170384513e-06, "loss": 0.4398, "step": 15614 }, { "epoch": 2.564428386672962, "grad_norm": 0.3412204875657687, "learning_rate": 4.386286347325595e-06, "loss": 0.4538, "step": 15615 }, { "epoch": 2.5645926138813047, "grad_norm": 0.31861125143751867, "learning_rate": 4.385817525501719e-06, "loss": 0.4384, "step": 15616 }, { "epoch": 2.5647568410896477, "grad_norm": 0.2895893168578073, "learning_rate": 4.385348704917978e-06, "loss": 0.4292, "step": 15617 }, { "epoch": 2.5649210682979904, "grad_norm": 0.3212807387575279, "learning_rate": 4.384879885579462e-06, "loss": 0.4217, "step": 15618 }, { "epoch": 2.565085295506333, "grad_norm": 0.37869276897825244, "learning_rate": 4.384411067491265e-06, "loss": 0.4226, "step": 15619 }, { "epoch": 2.5652495227146757, "grad_norm": 0.32545970108276556, "learning_rate": 4.383942250658478e-06, "loss": 0.4182, "step": 15620 }, { "epoch": 2.5654137499230183, "grad_norm": 0.5821317175586268, "learning_rate": 4.383473435086191e-06, "loss": 0.4307, "step": 15621 }, { "epoch": 2.5655779771313614, "grad_norm": 0.3737727497282395, "learning_rate": 4.383004620779497e-06, "loss": 0.4254, "step": 15622 }, { "epoch": 2.565742204339704, "grad_norm": 0.3252778528469744, "learning_rate": 4.382535807743487e-06, "loss": 0.4444, "step": 15623 }, { "epoch": 2.5659064315480467, "grad_norm": 0.40545934497853325, "learning_rate": 4.382066995983256e-06, "loss": 0.4465, "step": 15624 }, { "epoch": 2.5660706587563897, "grad_norm": 0.3786830883990562, "learning_rate": 4.381598185503892e-06, "loss": 0.4253, "step": 15625 }, { "epoch": 2.5662348859647324, "grad_norm": 0.33683926885493226, "learning_rate": 4.381129376310488e-06, "loss": 0.4349, "step": 15626 }, { "epoch": 2.566399113173075, "grad_norm": 0.9430359938687994, "learning_rate": 4.380660568408136e-06, "loss": 0.4134, "step": 15627 }, { "epoch": 2.5665633403814176, "grad_norm": 0.4029380612559527, "learning_rate": 4.38019176180193e-06, "loss": 0.4342, "step": 15628 }, { "epoch": 2.5667275675897603, "grad_norm": 0.3329457909755624, "learning_rate": 4.379722956496958e-06, "loss": 0.4387, "step": 15629 }, { "epoch": 2.566891794798103, "grad_norm": 0.35796644332871863, "learning_rate": 4.379254152498312e-06, "loss": 0.4571, "step": 15630 }, { "epoch": 2.567056022006446, "grad_norm": 0.34985740849790825, "learning_rate": 4.378785349811085e-06, "loss": 0.4408, "step": 15631 }, { "epoch": 2.5672202492147886, "grad_norm": 0.3545240868647146, "learning_rate": 4.378316548440369e-06, "loss": 0.4361, "step": 15632 }, { "epoch": 2.5673844764231313, "grad_norm": 0.297635939595374, "learning_rate": 4.3778477483912545e-06, "loss": 0.4233, "step": 15633 }, { "epoch": 2.5675487036314744, "grad_norm": 0.2947778083461371, "learning_rate": 4.377378949668833e-06, "loss": 0.433, "step": 15634 }, { "epoch": 2.567712930839817, "grad_norm": 0.395361891954945, "learning_rate": 4.376910152278197e-06, "loss": 0.4374, "step": 15635 }, { "epoch": 2.5678771580481596, "grad_norm": 0.26747379087473455, "learning_rate": 4.37644135622444e-06, "loss": 0.4452, "step": 15636 }, { "epoch": 2.5680413852565023, "grad_norm": 0.3537882378113586, "learning_rate": 4.37597256151265e-06, "loss": 0.4309, "step": 15637 }, { "epoch": 2.568205612464845, "grad_norm": 0.3126543583966415, "learning_rate": 4.375503768147918e-06, "loss": 0.4475, "step": 15638 }, { "epoch": 2.568369839673188, "grad_norm": 0.310994438653193, "learning_rate": 4.375034976135341e-06, "loss": 0.4532, "step": 15639 }, { "epoch": 2.5685340668815306, "grad_norm": 0.2757312848098248, "learning_rate": 4.374566185480005e-06, "loss": 0.4254, "step": 15640 }, { "epoch": 2.5686982940898733, "grad_norm": 0.27164222845895303, "learning_rate": 4.374097396187003e-06, "loss": 0.4184, "step": 15641 }, { "epoch": 2.5688625212982164, "grad_norm": 0.27266040054382534, "learning_rate": 4.373628608261428e-06, "loss": 0.422, "step": 15642 }, { "epoch": 2.569026748506559, "grad_norm": 0.29807414547875344, "learning_rate": 4.373159821708372e-06, "loss": 0.4379, "step": 15643 }, { "epoch": 2.5691909757149016, "grad_norm": 0.4276170738492218, "learning_rate": 4.372691036532923e-06, "loss": 0.4514, "step": 15644 }, { "epoch": 2.5693552029232443, "grad_norm": 0.30498004090935377, "learning_rate": 4.372222252740177e-06, "loss": 0.4365, "step": 15645 }, { "epoch": 2.569519430131587, "grad_norm": 0.3839879430641964, "learning_rate": 4.371753470335221e-06, "loss": 0.4384, "step": 15646 }, { "epoch": 2.5696836573399295, "grad_norm": 0.4011106422419418, "learning_rate": 4.37128468932315e-06, "loss": 0.4383, "step": 15647 }, { "epoch": 2.5698478845482726, "grad_norm": 0.35855065427808513, "learning_rate": 4.3708159097090536e-06, "loss": 0.4398, "step": 15648 }, { "epoch": 2.5700121117566153, "grad_norm": 0.3644533904290332, "learning_rate": 4.370347131498022e-06, "loss": 0.4257, "step": 15649 }, { "epoch": 2.570176338964958, "grad_norm": 0.33253435890955046, "learning_rate": 4.369878354695148e-06, "loss": 0.4282, "step": 15650 }, { "epoch": 2.570340566173301, "grad_norm": 0.26752244259463775, "learning_rate": 4.369409579305525e-06, "loss": 0.4401, "step": 15651 }, { "epoch": 2.5705047933816436, "grad_norm": 0.27764725847093746, "learning_rate": 4.368940805334241e-06, "loss": 0.4348, "step": 15652 }, { "epoch": 2.5706690205899863, "grad_norm": 0.46418971042458596, "learning_rate": 4.3684720327863904e-06, "loss": 0.411, "step": 15653 }, { "epoch": 2.570833247798329, "grad_norm": 0.29686142198669646, "learning_rate": 4.368003261667062e-06, "loss": 0.4427, "step": 15654 }, { "epoch": 2.5709974750066715, "grad_norm": 0.27340688193424023, "learning_rate": 4.367534491981349e-06, "loss": 0.4459, "step": 15655 }, { "epoch": 2.5711617022150146, "grad_norm": 0.29452955602163794, "learning_rate": 4.36706572373434e-06, "loss": 0.4615, "step": 15656 }, { "epoch": 2.5713259294233572, "grad_norm": 0.2902678707411273, "learning_rate": 4.366596956931128e-06, "loss": 0.4221, "step": 15657 }, { "epoch": 2.5714901566317, "grad_norm": 0.47928414230720084, "learning_rate": 4.366128191576806e-06, "loss": 0.4417, "step": 15658 }, { "epoch": 2.571654383840043, "grad_norm": 0.30012861208394687, "learning_rate": 4.3656594276764616e-06, "loss": 0.4254, "step": 15659 }, { "epoch": 2.5718186110483856, "grad_norm": 0.3130155646681567, "learning_rate": 4.365190665235189e-06, "loss": 0.4422, "step": 15660 }, { "epoch": 2.5719828382567282, "grad_norm": 0.3455326963780102, "learning_rate": 4.36472190425808e-06, "loss": 0.4318, "step": 15661 }, { "epoch": 2.572147065465071, "grad_norm": 0.3041176120109691, "learning_rate": 4.364253144750222e-06, "loss": 0.4313, "step": 15662 }, { "epoch": 2.5723112926734135, "grad_norm": 0.28055282368032514, "learning_rate": 4.36378438671671e-06, "loss": 0.4413, "step": 15663 }, { "epoch": 2.572475519881756, "grad_norm": 0.5147635636054625, "learning_rate": 4.363315630162632e-06, "loss": 0.4397, "step": 15664 }, { "epoch": 2.5726397470900992, "grad_norm": 0.2849554341120117, "learning_rate": 4.362846875093081e-06, "loss": 0.4344, "step": 15665 }, { "epoch": 2.572803974298442, "grad_norm": 0.2846440291685015, "learning_rate": 4.3623781215131475e-06, "loss": 0.4277, "step": 15666 }, { "epoch": 2.5729682015067845, "grad_norm": 0.3305141222386102, "learning_rate": 4.3619093694279245e-06, "loss": 0.4405, "step": 15667 }, { "epoch": 2.5731324287151276, "grad_norm": 0.3596809632535296, "learning_rate": 4.3614406188425005e-06, "loss": 0.4677, "step": 15668 }, { "epoch": 2.5732966559234702, "grad_norm": 0.4530597862843398, "learning_rate": 4.360971869761968e-06, "loss": 0.4387, "step": 15669 }, { "epoch": 2.573460883131813, "grad_norm": 0.3251876593883486, "learning_rate": 4.360503122191419e-06, "loss": 0.4108, "step": 15670 }, { "epoch": 2.5736251103401555, "grad_norm": 0.2803538758207167, "learning_rate": 4.360034376135942e-06, "loss": 0.4446, "step": 15671 }, { "epoch": 2.573789337548498, "grad_norm": 0.28182246452077286, "learning_rate": 4.3595656316006295e-06, "loss": 0.4468, "step": 15672 }, { "epoch": 2.5739535647568412, "grad_norm": 0.34577943342234163, "learning_rate": 4.359096888590573e-06, "loss": 0.4308, "step": 15673 }, { "epoch": 2.574117791965184, "grad_norm": 0.38565717225066504, "learning_rate": 4.358628147110862e-06, "loss": 0.432, "step": 15674 }, { "epoch": 2.5742820191735265, "grad_norm": 0.30459225924397265, "learning_rate": 4.358159407166588e-06, "loss": 0.4202, "step": 15675 }, { "epoch": 2.5744462463818696, "grad_norm": 0.3196455829723776, "learning_rate": 4.357690668762844e-06, "loss": 0.4338, "step": 15676 }, { "epoch": 2.5746104735902122, "grad_norm": 0.32731457911173506, "learning_rate": 4.357221931904718e-06, "loss": 0.4271, "step": 15677 }, { "epoch": 2.574774700798555, "grad_norm": 0.3219906580343855, "learning_rate": 4.356753196597304e-06, "loss": 0.4437, "step": 15678 }, { "epoch": 2.5749389280068975, "grad_norm": 0.29879707891187424, "learning_rate": 4.35628446284569e-06, "loss": 0.4442, "step": 15679 }, { "epoch": 2.57510315521524, "grad_norm": 0.3494962037579369, "learning_rate": 4.355815730654968e-06, "loss": 0.4262, "step": 15680 }, { "epoch": 2.5752673824235828, "grad_norm": 0.25523753461883125, "learning_rate": 4.355347000030229e-06, "loss": 0.426, "step": 15681 }, { "epoch": 2.575431609631926, "grad_norm": 0.45019078924757433, "learning_rate": 4.354878270976564e-06, "loss": 0.441, "step": 15682 }, { "epoch": 2.5755958368402685, "grad_norm": 0.32355129633184126, "learning_rate": 4.354409543499064e-06, "loss": 0.4299, "step": 15683 }, { "epoch": 2.575760064048611, "grad_norm": 0.3285679353712852, "learning_rate": 4.353940817602819e-06, "loss": 0.4186, "step": 15684 }, { "epoch": 2.575924291256954, "grad_norm": 0.27666075631853193, "learning_rate": 4.35347209329292e-06, "loss": 0.4338, "step": 15685 }, { "epoch": 2.576088518465297, "grad_norm": 0.49437240302558794, "learning_rate": 4.353003370574461e-06, "loss": 0.4409, "step": 15686 }, { "epoch": 2.5762527456736395, "grad_norm": 0.30204622384409346, "learning_rate": 4.352534649452527e-06, "loss": 0.4422, "step": 15687 }, { "epoch": 2.576416972881982, "grad_norm": 0.273434931240618, "learning_rate": 4.352065929932215e-06, "loss": 0.4177, "step": 15688 }, { "epoch": 2.5765812000903248, "grad_norm": 0.35155939309416256, "learning_rate": 4.35159721201861e-06, "loss": 0.4432, "step": 15689 }, { "epoch": 2.576745427298668, "grad_norm": 0.3920495165730994, "learning_rate": 4.351128495716805e-06, "loss": 0.4235, "step": 15690 }, { "epoch": 2.5769096545070105, "grad_norm": 0.30913151759937846, "learning_rate": 4.350659781031891e-06, "loss": 0.4201, "step": 15691 }, { "epoch": 2.577073881715353, "grad_norm": 0.3076565225368957, "learning_rate": 4.35019106796896e-06, "loss": 0.4235, "step": 15692 }, { "epoch": 2.577238108923696, "grad_norm": 0.29655321776974725, "learning_rate": 4.349722356533101e-06, "loss": 0.4459, "step": 15693 }, { "epoch": 2.577402336132039, "grad_norm": 0.28892151775120634, "learning_rate": 4.3492536467294044e-06, "loss": 0.4179, "step": 15694 }, { "epoch": 2.5775665633403815, "grad_norm": 0.41459237885226863, "learning_rate": 4.3487849385629635e-06, "loss": 0.4146, "step": 15695 }, { "epoch": 2.577730790548724, "grad_norm": 0.2902977896042206, "learning_rate": 4.348316232038865e-06, "loss": 0.437, "step": 15696 }, { "epoch": 2.5778950177570668, "grad_norm": 0.4496536761679526, "learning_rate": 4.347847527162203e-06, "loss": 0.4415, "step": 15697 }, { "epoch": 2.5780592449654094, "grad_norm": 0.36641641828421784, "learning_rate": 4.3473788239380645e-06, "loss": 0.4223, "step": 15698 }, { "epoch": 2.5782234721737525, "grad_norm": 0.36549153174167476, "learning_rate": 4.3469101223715425e-06, "loss": 0.4306, "step": 15699 }, { "epoch": 2.578387699382095, "grad_norm": 0.3015497487218498, "learning_rate": 4.3464414224677275e-06, "loss": 0.419, "step": 15700 }, { "epoch": 2.5785519265904377, "grad_norm": 0.27625426522960334, "learning_rate": 4.345972724231711e-06, "loss": 0.4503, "step": 15701 }, { "epoch": 2.578716153798781, "grad_norm": 0.30417803932476767, "learning_rate": 4.3455040276685805e-06, "loss": 0.4298, "step": 15702 }, { "epoch": 2.5788803810071235, "grad_norm": 0.31044685598214067, "learning_rate": 4.345035332783431e-06, "loss": 0.4572, "step": 15703 }, { "epoch": 2.579044608215466, "grad_norm": 0.34029110494135095, "learning_rate": 4.344566639581348e-06, "loss": 0.4286, "step": 15704 }, { "epoch": 2.5792088354238087, "grad_norm": 0.4220228863169494, "learning_rate": 4.344097948067424e-06, "loss": 0.4313, "step": 15705 }, { "epoch": 2.5793730626321514, "grad_norm": 0.29592190663583623, "learning_rate": 4.34362925824675e-06, "loss": 0.4506, "step": 15706 }, { "epoch": 2.5795372898404945, "grad_norm": 0.29158058048698404, "learning_rate": 4.343160570124417e-06, "loss": 0.4402, "step": 15707 }, { "epoch": 2.579701517048837, "grad_norm": 0.3708781139752633, "learning_rate": 4.3426918837055135e-06, "loss": 0.4506, "step": 15708 }, { "epoch": 2.5798657442571797, "grad_norm": 0.3318455295905244, "learning_rate": 4.342223198995131e-06, "loss": 0.4468, "step": 15709 }, { "epoch": 2.580029971465523, "grad_norm": 0.4317230371408357, "learning_rate": 4.34175451599836e-06, "loss": 0.4466, "step": 15710 }, { "epoch": 2.5801941986738655, "grad_norm": 0.3369867194655041, "learning_rate": 4.341285834720292e-06, "loss": 0.4115, "step": 15711 }, { "epoch": 2.580358425882208, "grad_norm": 0.31912585151917744, "learning_rate": 4.340817155166015e-06, "loss": 0.4482, "step": 15712 }, { "epoch": 2.5805226530905507, "grad_norm": 0.25662968380350065, "learning_rate": 4.340348477340619e-06, "loss": 0.4425, "step": 15713 }, { "epoch": 2.5806868802988934, "grad_norm": 0.3063092669941935, "learning_rate": 4.339879801249197e-06, "loss": 0.4489, "step": 15714 }, { "epoch": 2.580851107507236, "grad_norm": 0.2630713688895437, "learning_rate": 4.339411126896836e-06, "loss": 0.4542, "step": 15715 }, { "epoch": 2.581015334715579, "grad_norm": 0.31208575964151564, "learning_rate": 4.338942454288631e-06, "loss": 0.4318, "step": 15716 }, { "epoch": 2.5811795619239217, "grad_norm": 0.3594129107656559, "learning_rate": 4.338473783429668e-06, "loss": 0.4353, "step": 15717 }, { "epoch": 2.5813437891322644, "grad_norm": 0.3693658716118475, "learning_rate": 4.338005114325038e-06, "loss": 0.4337, "step": 15718 }, { "epoch": 2.5815080163406074, "grad_norm": 0.40035589840951313, "learning_rate": 4.3375364469798315e-06, "loss": 0.4231, "step": 15719 }, { "epoch": 2.58167224354895, "grad_norm": 0.2942822598845297, "learning_rate": 4.3370677813991425e-06, "loss": 0.4365, "step": 15720 }, { "epoch": 2.5818364707572927, "grad_norm": 0.2598983203441498, "learning_rate": 4.3365991175880545e-06, "loss": 0.43, "step": 15721 }, { "epoch": 2.5820006979656354, "grad_norm": 0.3306450186865602, "learning_rate": 4.336130455551662e-06, "loss": 0.4488, "step": 15722 }, { "epoch": 2.582164925173978, "grad_norm": 0.4172832201505027, "learning_rate": 4.335661795295053e-06, "loss": 0.4493, "step": 15723 }, { "epoch": 2.582329152382321, "grad_norm": 0.4443225994832997, "learning_rate": 4.3351931368233195e-06, "loss": 0.4485, "step": 15724 }, { "epoch": 2.5824933795906637, "grad_norm": 0.36022320421626164, "learning_rate": 4.334724480141551e-06, "loss": 0.4535, "step": 15725 }, { "epoch": 2.5826576067990064, "grad_norm": 0.28358389028441683, "learning_rate": 4.334255825254836e-06, "loss": 0.4391, "step": 15726 }, { "epoch": 2.5828218340073494, "grad_norm": 0.33976043099099573, "learning_rate": 4.333787172168266e-06, "loss": 0.4492, "step": 15727 }, { "epoch": 2.582986061215692, "grad_norm": 0.2803123022258049, "learning_rate": 4.333318520886932e-06, "loss": 0.4182, "step": 15728 }, { "epoch": 2.5831502884240347, "grad_norm": 0.2642610620741509, "learning_rate": 4.332849871415922e-06, "loss": 0.4329, "step": 15729 }, { "epoch": 2.5833145156323774, "grad_norm": 0.2714676626725195, "learning_rate": 4.332381223760327e-06, "loss": 0.4358, "step": 15730 }, { "epoch": 2.58347874284072, "grad_norm": 0.29556803031452, "learning_rate": 4.331912577925237e-06, "loss": 0.4179, "step": 15731 }, { "epoch": 2.5836429700490626, "grad_norm": 0.3645808737464659, "learning_rate": 4.3314439339157415e-06, "loss": 0.4348, "step": 15732 }, { "epoch": 2.5838071972574057, "grad_norm": 0.3772393709280388, "learning_rate": 4.3309752917369305e-06, "loss": 0.4406, "step": 15733 }, { "epoch": 2.5839714244657483, "grad_norm": 0.27548331977469753, "learning_rate": 4.330506651393894e-06, "loss": 0.4471, "step": 15734 }, { "epoch": 2.584135651674091, "grad_norm": 0.2826736816359112, "learning_rate": 4.330038012891723e-06, "loss": 0.4483, "step": 15735 }, { "epoch": 2.584299878882434, "grad_norm": 0.4133515073580495, "learning_rate": 4.329569376235506e-06, "loss": 0.4419, "step": 15736 }, { "epoch": 2.5844641060907767, "grad_norm": 0.3225643113998406, "learning_rate": 4.329100741430334e-06, "loss": 0.4471, "step": 15737 }, { "epoch": 2.5846283332991193, "grad_norm": 0.3407019919418374, "learning_rate": 4.3286321084812955e-06, "loss": 0.4533, "step": 15738 }, { "epoch": 2.584792560507462, "grad_norm": 0.3428604509352793, "learning_rate": 4.32816347739348e-06, "loss": 0.4755, "step": 15739 }, { "epoch": 2.5849567877158046, "grad_norm": 0.32503432507541574, "learning_rate": 4.327694848171979e-06, "loss": 0.4341, "step": 15740 }, { "epoch": 2.5851210149241477, "grad_norm": 0.36169010504914045, "learning_rate": 4.327226220821881e-06, "loss": 0.4603, "step": 15741 }, { "epoch": 2.5852852421324903, "grad_norm": 0.2994689117279142, "learning_rate": 4.326757595348276e-06, "loss": 0.4367, "step": 15742 }, { "epoch": 2.585449469340833, "grad_norm": 0.313911921117997, "learning_rate": 4.326288971756254e-06, "loss": 0.4324, "step": 15743 }, { "epoch": 2.585613696549176, "grad_norm": 0.2735236587767174, "learning_rate": 4.3258203500509055e-06, "loss": 0.4339, "step": 15744 }, { "epoch": 2.5857779237575187, "grad_norm": 0.3583785949380112, "learning_rate": 4.32535173023732e-06, "loss": 0.4228, "step": 15745 }, { "epoch": 2.5859421509658613, "grad_norm": 0.3579906842061974, "learning_rate": 4.324883112320586e-06, "loss": 0.4374, "step": 15746 }, { "epoch": 2.586106378174204, "grad_norm": 0.2920349175148648, "learning_rate": 4.324414496305793e-06, "loss": 0.448, "step": 15747 }, { "epoch": 2.5862706053825466, "grad_norm": 0.3551419529001769, "learning_rate": 4.323945882198031e-06, "loss": 0.4296, "step": 15748 }, { "epoch": 2.5864348325908892, "grad_norm": 0.38050189174987475, "learning_rate": 4.3234772700023904e-06, "loss": 0.4367, "step": 15749 }, { "epoch": 2.5865990597992323, "grad_norm": 0.27693520786110415, "learning_rate": 4.323008659723961e-06, "loss": 0.4273, "step": 15750 }, { "epoch": 2.586763287007575, "grad_norm": 0.2946787998493088, "learning_rate": 4.32254005136783e-06, "loss": 0.4506, "step": 15751 }, { "epoch": 2.5869275142159176, "grad_norm": 0.300489672148383, "learning_rate": 4.32207144493909e-06, "loss": 0.4254, "step": 15752 }, { "epoch": 2.5870917414242607, "grad_norm": 0.33785313952343987, "learning_rate": 4.32160284044283e-06, "loss": 0.4356, "step": 15753 }, { "epoch": 2.5872559686326033, "grad_norm": 0.517376636026884, "learning_rate": 4.321134237884138e-06, "loss": 0.4211, "step": 15754 }, { "epoch": 2.587420195840946, "grad_norm": 0.2898400184440624, "learning_rate": 4.320665637268103e-06, "loss": 0.4221, "step": 15755 }, { "epoch": 2.5875844230492886, "grad_norm": 0.42197824385946514, "learning_rate": 4.3201970385998164e-06, "loss": 0.4449, "step": 15756 }, { "epoch": 2.5877486502576312, "grad_norm": 0.31677534657011097, "learning_rate": 4.319728441884366e-06, "loss": 0.4403, "step": 15757 }, { "epoch": 2.5879128774659743, "grad_norm": 0.2832694609001604, "learning_rate": 4.319259847126843e-06, "loss": 0.4378, "step": 15758 }, { "epoch": 2.588077104674317, "grad_norm": 0.39187976296966304, "learning_rate": 4.318791254332337e-06, "loss": 0.4537, "step": 15759 }, { "epoch": 2.5882413318826596, "grad_norm": 0.296304090284739, "learning_rate": 4.318322663505934e-06, "loss": 0.4382, "step": 15760 }, { "epoch": 2.5884055590910027, "grad_norm": 0.2989425161690862, "learning_rate": 4.317854074652727e-06, "loss": 0.4451, "step": 15761 }, { "epoch": 2.5885697862993453, "grad_norm": 0.32956500842867725, "learning_rate": 4.317385487777805e-06, "loss": 0.4252, "step": 15762 }, { "epoch": 2.588734013507688, "grad_norm": 0.3554805266347347, "learning_rate": 4.316916902886255e-06, "loss": 0.4785, "step": 15763 }, { "epoch": 2.5888982407160306, "grad_norm": 0.29784661781396565, "learning_rate": 4.316448319983166e-06, "loss": 0.449, "step": 15764 }, { "epoch": 2.5890624679243732, "grad_norm": 0.3403796996552582, "learning_rate": 4.315979739073631e-06, "loss": 0.42, "step": 15765 }, { "epoch": 2.589226695132716, "grad_norm": 0.2820204314916899, "learning_rate": 4.315511160162736e-06, "loss": 0.42, "step": 15766 }, { "epoch": 2.589390922341059, "grad_norm": 0.34694290008039924, "learning_rate": 4.315042583255571e-06, "loss": 0.4405, "step": 15767 }, { "epoch": 2.5895551495494016, "grad_norm": 0.285712751776153, "learning_rate": 4.314574008357227e-06, "loss": 0.4387, "step": 15768 }, { "epoch": 2.589719376757744, "grad_norm": 0.2886681436122029, "learning_rate": 4.31410543547279e-06, "loss": 0.4346, "step": 15769 }, { "epoch": 2.5898836039660873, "grad_norm": 0.26426338895128476, "learning_rate": 4.3136368646073535e-06, "loss": 0.465, "step": 15770 }, { "epoch": 2.59004783117443, "grad_norm": 0.46212153735161743, "learning_rate": 4.313168295766003e-06, "loss": 0.423, "step": 15771 }, { "epoch": 2.5902120583827726, "grad_norm": 0.27173188730977343, "learning_rate": 4.312699728953827e-06, "loss": 0.4151, "step": 15772 }, { "epoch": 2.590376285591115, "grad_norm": 0.44061969942428547, "learning_rate": 4.312231164175917e-06, "loss": 0.437, "step": 15773 }, { "epoch": 2.590540512799458, "grad_norm": 0.2805299615077187, "learning_rate": 4.311762601437362e-06, "loss": 0.4249, "step": 15774 }, { "epoch": 2.590704740007801, "grad_norm": 0.3082668237862245, "learning_rate": 4.3112940407432495e-06, "loss": 0.4388, "step": 15775 }, { "epoch": 2.5908689672161436, "grad_norm": 0.2950023105650061, "learning_rate": 4.3108254820986685e-06, "loss": 0.4588, "step": 15776 }, { "epoch": 2.591033194424486, "grad_norm": 0.3201705793393197, "learning_rate": 4.31035692550871e-06, "loss": 0.4311, "step": 15777 }, { "epoch": 2.5911974216328293, "grad_norm": 0.3640366984477735, "learning_rate": 4.309888370978464e-06, "loss": 0.4126, "step": 15778 }, { "epoch": 2.591361648841172, "grad_norm": 0.2999324534104021, "learning_rate": 4.309419818513014e-06, "loss": 0.445, "step": 15779 }, { "epoch": 2.5915258760495146, "grad_norm": 0.35312819820987595, "learning_rate": 4.308951268117454e-06, "loss": 0.4531, "step": 15780 }, { "epoch": 2.591690103257857, "grad_norm": 0.3104622059310826, "learning_rate": 4.308482719796871e-06, "loss": 0.419, "step": 15781 }, { "epoch": 2.5918543304662, "grad_norm": 0.40858664531530114, "learning_rate": 4.308014173556353e-06, "loss": 0.4378, "step": 15782 }, { "epoch": 2.5920185576745425, "grad_norm": 0.3084003255005991, "learning_rate": 4.3075456294009906e-06, "loss": 0.4262, "step": 15783 }, { "epoch": 2.5921827848828856, "grad_norm": 0.37756243501180997, "learning_rate": 4.3070770873358725e-06, "loss": 0.4435, "step": 15784 }, { "epoch": 2.592347012091228, "grad_norm": 0.3318405143473803, "learning_rate": 4.306608547366087e-06, "loss": 0.4372, "step": 15785 }, { "epoch": 2.592511239299571, "grad_norm": 0.31825262182092245, "learning_rate": 4.306140009496722e-06, "loss": 0.4355, "step": 15786 }, { "epoch": 2.592675466507914, "grad_norm": 0.26950774595672156, "learning_rate": 4.30567147373287e-06, "loss": 0.4384, "step": 15787 }, { "epoch": 2.5928396937162566, "grad_norm": 0.2773574713214042, "learning_rate": 4.305202940079614e-06, "loss": 0.4168, "step": 15788 }, { "epoch": 2.593003920924599, "grad_norm": 0.27968928473718313, "learning_rate": 4.304734408542048e-06, "loss": 0.4126, "step": 15789 }, { "epoch": 2.593168148132942, "grad_norm": 0.339631272148747, "learning_rate": 4.304265879125256e-06, "loss": 0.4252, "step": 15790 }, { "epoch": 2.5933323753412845, "grad_norm": 0.4405429762051981, "learning_rate": 4.30379735183433e-06, "loss": 0.4386, "step": 15791 }, { "epoch": 2.5934966025496275, "grad_norm": 0.33307488107275973, "learning_rate": 4.303328826674358e-06, "loss": 0.4191, "step": 15792 }, { "epoch": 2.59366082975797, "grad_norm": 0.31976140803255354, "learning_rate": 4.3028603036504286e-06, "loss": 0.4373, "step": 15793 }, { "epoch": 2.593825056966313, "grad_norm": 0.29829335474835267, "learning_rate": 4.302391782767629e-06, "loss": 0.4349, "step": 15794 }, { "epoch": 2.593989284174656, "grad_norm": 0.3889999203353962, "learning_rate": 4.301923264031052e-06, "loss": 0.434, "step": 15795 }, { "epoch": 2.5941535113829985, "grad_norm": 0.30571572031790084, "learning_rate": 4.301454747445781e-06, "loss": 0.4201, "step": 15796 }, { "epoch": 2.594317738591341, "grad_norm": 0.3459232731873862, "learning_rate": 4.300986233016907e-06, "loss": 0.4514, "step": 15797 }, { "epoch": 2.594481965799684, "grad_norm": 0.30146836874846944, "learning_rate": 4.300517720749518e-06, "loss": 0.4502, "step": 15798 }, { "epoch": 2.5946461930080265, "grad_norm": 0.36184226392840596, "learning_rate": 4.3000492106487035e-06, "loss": 0.431, "step": 15799 }, { "epoch": 2.594810420216369, "grad_norm": 0.31671853093359414, "learning_rate": 4.299580702719551e-06, "loss": 0.4438, "step": 15800 }, { "epoch": 2.594974647424712, "grad_norm": 0.3050096609024856, "learning_rate": 4.299112196967149e-06, "loss": 0.4642, "step": 15801 }, { "epoch": 2.595138874633055, "grad_norm": 0.41602891522432706, "learning_rate": 4.2986436933965866e-06, "loss": 0.4327, "step": 15802 }, { "epoch": 2.5953031018413975, "grad_norm": 0.24868676436677487, "learning_rate": 4.298175192012953e-06, "loss": 0.4176, "step": 15803 }, { "epoch": 2.5954673290497405, "grad_norm": 0.36640826122549386, "learning_rate": 4.2977066928213345e-06, "loss": 0.4296, "step": 15804 }, { "epoch": 2.595631556258083, "grad_norm": 0.30540983951859807, "learning_rate": 4.29723819582682e-06, "loss": 0.4366, "step": 15805 }, { "epoch": 2.595795783466426, "grad_norm": 0.3490310182612695, "learning_rate": 4.296769701034497e-06, "loss": 0.4336, "step": 15806 }, { "epoch": 2.5959600106747684, "grad_norm": 0.2680422378112935, "learning_rate": 4.296301208449456e-06, "loss": 0.4255, "step": 15807 }, { "epoch": 2.596124237883111, "grad_norm": 0.28315344448721086, "learning_rate": 4.295832718076785e-06, "loss": 0.4163, "step": 15808 }, { "epoch": 2.596288465091454, "grad_norm": 0.2923550723051912, "learning_rate": 4.295364229921571e-06, "loss": 0.416, "step": 15809 }, { "epoch": 2.596452692299797, "grad_norm": 0.28055270604204857, "learning_rate": 4.294895743988902e-06, "loss": 0.4469, "step": 15810 }, { "epoch": 2.5966169195081394, "grad_norm": 0.2879848375564051, "learning_rate": 4.294427260283868e-06, "loss": 0.4399, "step": 15811 }, { "epoch": 2.5967811467164825, "grad_norm": 0.5644459781722265, "learning_rate": 4.293958778811558e-06, "loss": 0.4669, "step": 15812 }, { "epoch": 2.596945373924825, "grad_norm": 0.2848883445687235, "learning_rate": 4.293490299577056e-06, "loss": 0.421, "step": 15813 }, { "epoch": 2.597109601133168, "grad_norm": 0.3855917851665769, "learning_rate": 4.293021822585452e-06, "loss": 0.4571, "step": 15814 }, { "epoch": 2.5972738283415104, "grad_norm": 0.37697902958562113, "learning_rate": 4.292553347841836e-06, "loss": 0.4241, "step": 15815 }, { "epoch": 2.597438055549853, "grad_norm": 0.3082437220187043, "learning_rate": 4.2920848753512945e-06, "loss": 0.4571, "step": 15816 }, { "epoch": 2.5976022827581957, "grad_norm": 0.4806961940115057, "learning_rate": 4.291616405118915e-06, "loss": 0.4438, "step": 15817 }, { "epoch": 2.597766509966539, "grad_norm": 0.33571584868959514, "learning_rate": 4.291147937149787e-06, "loss": 0.4355, "step": 15818 }, { "epoch": 2.5979307371748814, "grad_norm": 0.8390119913907225, "learning_rate": 4.290679471448998e-06, "loss": 0.4369, "step": 15819 }, { "epoch": 2.598094964383224, "grad_norm": 0.24477381682903024, "learning_rate": 4.290211008021638e-06, "loss": 0.4346, "step": 15820 }, { "epoch": 2.598259191591567, "grad_norm": 0.36475684221611265, "learning_rate": 4.289742546872789e-06, "loss": 0.4485, "step": 15821 }, { "epoch": 2.59842341879991, "grad_norm": 0.3489466071086147, "learning_rate": 4.289274088007544e-06, "loss": 0.4417, "step": 15822 }, { "epoch": 2.5985876460082524, "grad_norm": 0.27971414760095065, "learning_rate": 4.288805631430991e-06, "loss": 0.441, "step": 15823 }, { "epoch": 2.598751873216595, "grad_norm": 0.26586588593970034, "learning_rate": 4.288337177148215e-06, "loss": 0.4563, "step": 15824 }, { "epoch": 2.5989161004249377, "grad_norm": 0.30542948972381184, "learning_rate": 4.287868725164307e-06, "loss": 0.4266, "step": 15825 }, { "epoch": 2.599080327633281, "grad_norm": 0.31955052815461044, "learning_rate": 4.287400275484351e-06, "loss": 0.4498, "step": 15826 }, { "epoch": 2.5992445548416234, "grad_norm": 0.31979104627134625, "learning_rate": 4.28693182811344e-06, "loss": 0.4317, "step": 15827 }, { "epoch": 2.599408782049966, "grad_norm": 0.7336338061730221, "learning_rate": 4.286463383056658e-06, "loss": 0.4161, "step": 15828 }, { "epoch": 2.599573009258309, "grad_norm": 0.26448341402895187, "learning_rate": 4.285994940319094e-06, "loss": 0.4546, "step": 15829 }, { "epoch": 2.599737236466652, "grad_norm": 0.2814894747781621, "learning_rate": 4.285526499905835e-06, "loss": 0.4408, "step": 15830 }, { "epoch": 2.5999014636749944, "grad_norm": 0.32304929685665024, "learning_rate": 4.2850580618219685e-06, "loss": 0.4495, "step": 15831 }, { "epoch": 2.600065690883337, "grad_norm": 0.3003407465523042, "learning_rate": 4.284589626072584e-06, "loss": 0.4211, "step": 15832 }, { "epoch": 2.6002299180916797, "grad_norm": 0.3126022106522328, "learning_rate": 4.2841211926627685e-06, "loss": 0.4459, "step": 15833 }, { "epoch": 2.6003941453000223, "grad_norm": 0.3202251903747605, "learning_rate": 4.283652761597607e-06, "loss": 0.4123, "step": 15834 }, { "epoch": 2.6005583725083654, "grad_norm": 0.3193033038388879, "learning_rate": 4.283184332882192e-06, "loss": 0.43, "step": 15835 }, { "epoch": 2.600722599716708, "grad_norm": 0.4759091609533221, "learning_rate": 4.282715906521607e-06, "loss": 0.4351, "step": 15836 }, { "epoch": 2.6008868269250507, "grad_norm": 0.34048263468535467, "learning_rate": 4.2822474825209426e-06, "loss": 0.4542, "step": 15837 }, { "epoch": 2.6010510541333938, "grad_norm": 0.2964018813124492, "learning_rate": 4.281779060885284e-06, "loss": 0.4524, "step": 15838 }, { "epoch": 2.6012152813417364, "grad_norm": 0.272121429844428, "learning_rate": 4.281310641619719e-06, "loss": 0.4389, "step": 15839 }, { "epoch": 2.601379508550079, "grad_norm": 0.2930384076277509, "learning_rate": 4.280842224729335e-06, "loss": 0.4205, "step": 15840 }, { "epoch": 2.6015437357584217, "grad_norm": 0.3932004190719817, "learning_rate": 4.28037381021922e-06, "loss": 0.4208, "step": 15841 }, { "epoch": 2.6017079629667643, "grad_norm": 0.2916958143650516, "learning_rate": 4.279905398094463e-06, "loss": 0.4442, "step": 15842 }, { "epoch": 2.6018721901751074, "grad_norm": 0.38320540985005647, "learning_rate": 4.279436988360148e-06, "loss": 0.4371, "step": 15843 }, { "epoch": 2.60203641738345, "grad_norm": 0.3442761883932953, "learning_rate": 4.278968581021366e-06, "loss": 0.4267, "step": 15844 }, { "epoch": 2.6022006445917927, "grad_norm": 0.3311876856923415, "learning_rate": 4.278500176083204e-06, "loss": 0.4491, "step": 15845 }, { "epoch": 2.6023648718001358, "grad_norm": 0.271081882936446, "learning_rate": 4.278031773550745e-06, "loss": 0.443, "step": 15846 }, { "epoch": 2.6025290990084784, "grad_norm": 0.27360586704891615, "learning_rate": 4.27756337342908e-06, "loss": 0.4234, "step": 15847 }, { "epoch": 2.602693326216821, "grad_norm": 0.3028946730931956, "learning_rate": 4.277094975723297e-06, "loss": 0.4178, "step": 15848 }, { "epoch": 2.6028575534251637, "grad_norm": 0.4211902298436584, "learning_rate": 4.27662658043848e-06, "loss": 0.4279, "step": 15849 }, { "epoch": 2.6030217806335063, "grad_norm": 0.29390415171983797, "learning_rate": 4.276158187579719e-06, "loss": 0.4311, "step": 15850 }, { "epoch": 2.603186007841849, "grad_norm": 0.2893391661327366, "learning_rate": 4.275689797152101e-06, "loss": 0.4442, "step": 15851 }, { "epoch": 2.603350235050192, "grad_norm": 0.30309257216350804, "learning_rate": 4.275221409160711e-06, "loss": 0.4148, "step": 15852 }, { "epoch": 2.6035144622585347, "grad_norm": 0.3694770688838367, "learning_rate": 4.274753023610641e-06, "loss": 0.4402, "step": 15853 }, { "epoch": 2.6036786894668773, "grad_norm": 0.31224351318683174, "learning_rate": 4.274284640506972e-06, "loss": 0.4209, "step": 15854 }, { "epoch": 2.6038429166752204, "grad_norm": 0.26476569410972106, "learning_rate": 4.273816259854794e-06, "loss": 0.4353, "step": 15855 }, { "epoch": 2.604007143883563, "grad_norm": 0.2524068878432348, "learning_rate": 4.273347881659193e-06, "loss": 0.4341, "step": 15856 }, { "epoch": 2.6041713710919057, "grad_norm": 0.3836716245953561, "learning_rate": 4.272879505925259e-06, "loss": 0.4405, "step": 15857 }, { "epoch": 2.6043355983002483, "grad_norm": 0.31367484844860166, "learning_rate": 4.272411132658076e-06, "loss": 0.437, "step": 15858 }, { "epoch": 2.604499825508591, "grad_norm": 0.24073228613682615, "learning_rate": 4.271942761862731e-06, "loss": 0.4263, "step": 15859 }, { "epoch": 2.604664052716934, "grad_norm": 0.31112665903222925, "learning_rate": 4.271474393544313e-06, "loss": 0.4367, "step": 15860 }, { "epoch": 2.6048282799252767, "grad_norm": 0.39513586710108894, "learning_rate": 4.271006027707909e-06, "loss": 0.4421, "step": 15861 }, { "epoch": 2.6049925071336193, "grad_norm": 0.37016694695642754, "learning_rate": 4.270537664358604e-06, "loss": 0.4234, "step": 15862 }, { "epoch": 2.6051567343419624, "grad_norm": 0.3234392124628261, "learning_rate": 4.270069303501487e-06, "loss": 0.4366, "step": 15863 }, { "epoch": 2.605320961550305, "grad_norm": 0.3211553226048717, "learning_rate": 4.269600945141642e-06, "loss": 0.4139, "step": 15864 }, { "epoch": 2.6054851887586477, "grad_norm": 0.297381168760795, "learning_rate": 4.269132589284157e-06, "loss": 0.4222, "step": 15865 }, { "epoch": 2.6056494159669903, "grad_norm": 0.2684853314660215, "learning_rate": 4.268664235934119e-06, "loss": 0.4226, "step": 15866 }, { "epoch": 2.605813643175333, "grad_norm": 0.24428461191927797, "learning_rate": 4.268195885096617e-06, "loss": 0.4055, "step": 15867 }, { "epoch": 2.6059778703836756, "grad_norm": 0.30690138353801594, "learning_rate": 4.267727536776734e-06, "loss": 0.4327, "step": 15868 }, { "epoch": 2.6061420975920186, "grad_norm": 0.309580731291792, "learning_rate": 4.267259190979558e-06, "loss": 0.4277, "step": 15869 }, { "epoch": 2.6063063248003613, "grad_norm": 0.3521082341168475, "learning_rate": 4.2667908477101794e-06, "loss": 0.4369, "step": 15870 }, { "epoch": 2.606470552008704, "grad_norm": 0.27827993463951545, "learning_rate": 4.266322506973679e-06, "loss": 0.4153, "step": 15871 }, { "epoch": 2.606634779217047, "grad_norm": 0.29314152987116177, "learning_rate": 4.265854168775148e-06, "loss": 0.4383, "step": 15872 }, { "epoch": 2.6067990064253896, "grad_norm": 0.4175886384913535, "learning_rate": 4.265385833119668e-06, "loss": 0.4374, "step": 15873 }, { "epoch": 2.6069632336337323, "grad_norm": 0.3215341260226248, "learning_rate": 4.264917500012331e-06, "loss": 0.4481, "step": 15874 }, { "epoch": 2.607127460842075, "grad_norm": 0.28916901461660643, "learning_rate": 4.2644491694582196e-06, "loss": 0.4291, "step": 15875 }, { "epoch": 2.6072916880504176, "grad_norm": 0.5564639233489209, "learning_rate": 4.263980841462424e-06, "loss": 0.4434, "step": 15876 }, { "epoch": 2.6074559152587606, "grad_norm": 0.4252859915961404, "learning_rate": 4.263512516030027e-06, "loss": 0.433, "step": 15877 }, { "epoch": 2.6076201424671033, "grad_norm": 0.3004118104642622, "learning_rate": 4.2630441931661195e-06, "loss": 0.4334, "step": 15878 }, { "epoch": 2.607784369675446, "grad_norm": 0.3107784239472434, "learning_rate": 4.262575872875782e-06, "loss": 0.429, "step": 15879 }, { "epoch": 2.607948596883789, "grad_norm": 0.3734813433262263, "learning_rate": 4.262107555164105e-06, "loss": 0.4144, "step": 15880 }, { "epoch": 2.6081128240921316, "grad_norm": 0.3201747720302578, "learning_rate": 4.261639240036174e-06, "loss": 0.4365, "step": 15881 }, { "epoch": 2.6082770513004743, "grad_norm": 0.30335678042097514, "learning_rate": 4.261170927497076e-06, "loss": 0.4369, "step": 15882 }, { "epoch": 2.608441278508817, "grad_norm": 0.30057906830959097, "learning_rate": 4.260702617551896e-06, "loss": 0.4274, "step": 15883 }, { "epoch": 2.6086055057171595, "grad_norm": 0.6371499207725939, "learning_rate": 4.260234310205721e-06, "loss": 0.4348, "step": 15884 }, { "epoch": 2.608769732925502, "grad_norm": 0.33918844373338264, "learning_rate": 4.2597660054636376e-06, "loss": 0.4335, "step": 15885 }, { "epoch": 2.6089339601338453, "grad_norm": 0.3361159771638176, "learning_rate": 4.259297703330732e-06, "loss": 0.4276, "step": 15886 }, { "epoch": 2.609098187342188, "grad_norm": 0.3028877725278089, "learning_rate": 4.258829403812091e-06, "loss": 0.4339, "step": 15887 }, { "epoch": 2.6092624145505305, "grad_norm": 0.2717049246569981, "learning_rate": 4.258361106912799e-06, "loss": 0.422, "step": 15888 }, { "epoch": 2.6094266417588736, "grad_norm": 0.29662739630039475, "learning_rate": 4.257892812637942e-06, "loss": 0.4485, "step": 15889 }, { "epoch": 2.6095908689672163, "grad_norm": 0.32580420835705043, "learning_rate": 4.257424520992608e-06, "loss": 0.4304, "step": 15890 }, { "epoch": 2.609755096175559, "grad_norm": 0.2789367086644554, "learning_rate": 4.256956231981883e-06, "loss": 0.4312, "step": 15891 }, { "epoch": 2.6099193233839015, "grad_norm": 0.27050105126913654, "learning_rate": 4.256487945610853e-06, "loss": 0.4363, "step": 15892 }, { "epoch": 2.610083550592244, "grad_norm": 0.32473112425656697, "learning_rate": 4.256019661884601e-06, "loss": 0.4514, "step": 15893 }, { "epoch": 2.6102477778005873, "grad_norm": 1.2224814949738927, "learning_rate": 4.255551380808217e-06, "loss": 0.4475, "step": 15894 }, { "epoch": 2.61041200500893, "grad_norm": 0.2899718094249228, "learning_rate": 4.255083102386787e-06, "loss": 0.4432, "step": 15895 }, { "epoch": 2.6105762322172725, "grad_norm": 0.31770282494513413, "learning_rate": 4.254614826625393e-06, "loss": 0.4164, "step": 15896 }, { "epoch": 2.6107404594256156, "grad_norm": 0.31746036994094073, "learning_rate": 4.254146553529126e-06, "loss": 0.4343, "step": 15897 }, { "epoch": 2.6109046866339582, "grad_norm": 0.32599628063434466, "learning_rate": 4.253678283103067e-06, "loss": 0.4484, "step": 15898 }, { "epoch": 2.611068913842301, "grad_norm": 0.29199535549958605, "learning_rate": 4.2532100153523045e-06, "loss": 0.424, "step": 15899 }, { "epoch": 2.6112331410506435, "grad_norm": 0.30444024787236657, "learning_rate": 4.252741750281925e-06, "loss": 0.4227, "step": 15900 }, { "epoch": 2.611397368258986, "grad_norm": 0.4412199564586546, "learning_rate": 4.252273487897013e-06, "loss": 0.4427, "step": 15901 }, { "epoch": 2.611561595467329, "grad_norm": 0.5410335689281915, "learning_rate": 4.251805228202654e-06, "loss": 0.4292, "step": 15902 }, { "epoch": 2.611725822675672, "grad_norm": 0.41213691877007574, "learning_rate": 4.251336971203934e-06, "loss": 0.4487, "step": 15903 }, { "epoch": 2.6118900498840145, "grad_norm": 0.3330652790042675, "learning_rate": 4.250868716905941e-06, "loss": 0.4392, "step": 15904 }, { "epoch": 2.612054277092357, "grad_norm": 0.3481140986138393, "learning_rate": 4.250400465313758e-06, "loss": 0.422, "step": 15905 }, { "epoch": 2.6122185043007002, "grad_norm": 0.31634923265199455, "learning_rate": 4.249932216432472e-06, "loss": 0.4314, "step": 15906 }, { "epoch": 2.612382731509043, "grad_norm": 0.2918750725823665, "learning_rate": 4.249463970267168e-06, "loss": 0.4341, "step": 15907 }, { "epoch": 2.6125469587173855, "grad_norm": 1.0899497941084635, "learning_rate": 4.24899572682293e-06, "loss": 0.4252, "step": 15908 }, { "epoch": 2.612711185925728, "grad_norm": 0.32946172413991354, "learning_rate": 4.248527486104847e-06, "loss": 0.4436, "step": 15909 }, { "epoch": 2.612875413134071, "grad_norm": 0.28083680309894277, "learning_rate": 4.248059248118003e-06, "loss": 0.4234, "step": 15910 }, { "epoch": 2.613039640342414, "grad_norm": 0.2696569450276403, "learning_rate": 4.247591012867483e-06, "loss": 0.4317, "step": 15911 }, { "epoch": 2.6132038675507565, "grad_norm": 0.32797454999406345, "learning_rate": 4.247122780358374e-06, "loss": 0.4485, "step": 15912 }, { "epoch": 2.613368094759099, "grad_norm": 0.2978012132300891, "learning_rate": 4.246654550595759e-06, "loss": 0.4444, "step": 15913 }, { "epoch": 2.6135323219674422, "grad_norm": 0.3347378118305014, "learning_rate": 4.246186323584725e-06, "loss": 0.4491, "step": 15914 }, { "epoch": 2.613696549175785, "grad_norm": 0.30672663933298117, "learning_rate": 4.245718099330358e-06, "loss": 0.4711, "step": 15915 }, { "epoch": 2.6138607763841275, "grad_norm": 0.2780680965058183, "learning_rate": 4.245249877837742e-06, "loss": 0.4234, "step": 15916 }, { "epoch": 2.61402500359247, "grad_norm": 0.31100650951866027, "learning_rate": 4.2447816591119626e-06, "loss": 0.4395, "step": 15917 }, { "epoch": 2.614189230800813, "grad_norm": 0.3165837170282832, "learning_rate": 4.244313443158106e-06, "loss": 0.4534, "step": 15918 }, { "epoch": 2.6143534580091554, "grad_norm": 0.2945574433533434, "learning_rate": 4.243845229981256e-06, "loss": 0.4322, "step": 15919 }, { "epoch": 2.6145176852174985, "grad_norm": 0.2854056769828595, "learning_rate": 4.243377019586501e-06, "loss": 0.4216, "step": 15920 }, { "epoch": 2.614681912425841, "grad_norm": 0.3449269045558118, "learning_rate": 4.242908811978924e-06, "loss": 0.4335, "step": 15921 }, { "epoch": 2.6148461396341838, "grad_norm": 0.29707638186807517, "learning_rate": 4.2424406071636085e-06, "loss": 0.4307, "step": 15922 }, { "epoch": 2.615010366842527, "grad_norm": 0.31842893735652705, "learning_rate": 4.241972405145641e-06, "loss": 0.4462, "step": 15923 }, { "epoch": 2.6151745940508695, "grad_norm": 0.2784297794765863, "learning_rate": 4.241504205930108e-06, "loss": 0.4393, "step": 15924 }, { "epoch": 2.615338821259212, "grad_norm": 0.637609011183369, "learning_rate": 4.241036009522094e-06, "loss": 0.4164, "step": 15925 }, { "epoch": 2.6155030484675548, "grad_norm": 0.30614294622529886, "learning_rate": 4.240567815926683e-06, "loss": 0.4284, "step": 15926 }, { "epoch": 2.6156672756758974, "grad_norm": 0.3447127775105285, "learning_rate": 4.240099625148961e-06, "loss": 0.4537, "step": 15927 }, { "epoch": 2.6158315028842405, "grad_norm": 0.4970599090971395, "learning_rate": 4.2396314371940125e-06, "loss": 0.4371, "step": 15928 }, { "epoch": 2.615995730092583, "grad_norm": 0.43555955002393065, "learning_rate": 4.239163252066925e-06, "loss": 0.4361, "step": 15929 }, { "epoch": 2.6161599573009258, "grad_norm": 0.32903375832698184, "learning_rate": 4.238695069772779e-06, "loss": 0.4384, "step": 15930 }, { "epoch": 2.616324184509269, "grad_norm": 0.33202092298407887, "learning_rate": 4.238226890316663e-06, "loss": 0.4481, "step": 15931 }, { "epoch": 2.6164884117176115, "grad_norm": 0.45140111170335284, "learning_rate": 4.237758713703659e-06, "loss": 0.4361, "step": 15932 }, { "epoch": 2.616652638925954, "grad_norm": 0.33571375004655346, "learning_rate": 4.237290539938854e-06, "loss": 0.4146, "step": 15933 }, { "epoch": 2.6168168661342968, "grad_norm": 0.5085529476929775, "learning_rate": 4.236822369027331e-06, "loss": 0.4412, "step": 15934 }, { "epoch": 2.6169810933426394, "grad_norm": 0.26527704034784494, "learning_rate": 4.236354200974178e-06, "loss": 0.4344, "step": 15935 }, { "epoch": 2.617145320550982, "grad_norm": 0.2653581981143029, "learning_rate": 4.235886035784477e-06, "loss": 0.4255, "step": 15936 }, { "epoch": 2.617309547759325, "grad_norm": 0.26429214414880825, "learning_rate": 4.235417873463314e-06, "loss": 0.4164, "step": 15937 }, { "epoch": 2.6174737749676678, "grad_norm": 0.451089379274073, "learning_rate": 4.234949714015772e-06, "loss": 0.4303, "step": 15938 }, { "epoch": 2.6176380021760104, "grad_norm": 0.613858816212356, "learning_rate": 4.2344815574469376e-06, "loss": 0.4503, "step": 15939 }, { "epoch": 2.6178022293843535, "grad_norm": 0.42073771656410275, "learning_rate": 4.234013403761895e-06, "loss": 0.4332, "step": 15940 }, { "epoch": 2.617966456592696, "grad_norm": 0.2764531610938458, "learning_rate": 4.233545252965728e-06, "loss": 0.4246, "step": 15941 }, { "epoch": 2.6181306838010387, "grad_norm": 0.3069746988437526, "learning_rate": 4.233077105063521e-06, "loss": 0.44, "step": 15942 }, { "epoch": 2.6182949110093814, "grad_norm": 0.34945204460664414, "learning_rate": 4.232608960060361e-06, "loss": 0.4338, "step": 15943 }, { "epoch": 2.618459138217724, "grad_norm": 0.3304580150854837, "learning_rate": 4.23214081796133e-06, "loss": 0.4513, "step": 15944 }, { "epoch": 2.618623365426067, "grad_norm": 0.4045233492426899, "learning_rate": 4.231672678771513e-06, "loss": 0.4383, "step": 15945 }, { "epoch": 2.6187875926344097, "grad_norm": 0.3027881257346491, "learning_rate": 4.231204542495996e-06, "loss": 0.4323, "step": 15946 }, { "epoch": 2.6189518198427524, "grad_norm": 0.3566048787535595, "learning_rate": 4.230736409139861e-06, "loss": 0.4433, "step": 15947 }, { "epoch": 2.6191160470510955, "grad_norm": 0.28909857225231095, "learning_rate": 4.230268278708193e-06, "loss": 0.4371, "step": 15948 }, { "epoch": 2.619280274259438, "grad_norm": 0.30328281123785367, "learning_rate": 4.229800151206077e-06, "loss": 0.4616, "step": 15949 }, { "epoch": 2.6194445014677807, "grad_norm": 0.28871293493755906, "learning_rate": 4.229332026638598e-06, "loss": 0.4094, "step": 15950 }, { "epoch": 2.6196087286761234, "grad_norm": 0.3015937440164125, "learning_rate": 4.228863905010839e-06, "loss": 0.4405, "step": 15951 }, { "epoch": 2.619772955884466, "grad_norm": 0.33214719515041785, "learning_rate": 4.2283957863278845e-06, "loss": 0.4394, "step": 15952 }, { "epoch": 2.6199371830928087, "grad_norm": 0.3584487107355222, "learning_rate": 4.227927670594818e-06, "loss": 0.4484, "step": 15953 }, { "epoch": 2.6201014103011517, "grad_norm": 0.3666964485124772, "learning_rate": 4.227459557816728e-06, "loss": 0.4186, "step": 15954 }, { "epoch": 2.6202656375094944, "grad_norm": 0.27626251940460655, "learning_rate": 4.226991447998694e-06, "loss": 0.4184, "step": 15955 }, { "epoch": 2.620429864717837, "grad_norm": 0.3010169911353582, "learning_rate": 4.226523341145801e-06, "loss": 0.4435, "step": 15956 }, { "epoch": 2.62059409192618, "grad_norm": 0.3042075192771724, "learning_rate": 4.226055237263132e-06, "loss": 0.4275, "step": 15957 }, { "epoch": 2.6207583191345227, "grad_norm": 0.28337020528828455, "learning_rate": 4.225587136355774e-06, "loss": 0.4321, "step": 15958 }, { "epoch": 2.6209225463428654, "grad_norm": 0.275572951052758, "learning_rate": 4.22511903842881e-06, "loss": 0.4293, "step": 15959 }, { "epoch": 2.621086773551208, "grad_norm": 0.3765406983641153, "learning_rate": 4.2246509434873235e-06, "loss": 0.4488, "step": 15960 }, { "epoch": 2.6212510007595506, "grad_norm": 0.3500604250595598, "learning_rate": 4.224182851536398e-06, "loss": 0.4265, "step": 15961 }, { "epoch": 2.6214152279678937, "grad_norm": 0.3161889341013727, "learning_rate": 4.22371476258112e-06, "loss": 0.4507, "step": 15962 }, { "epoch": 2.6215794551762364, "grad_norm": 0.2995552223403898, "learning_rate": 4.223246676626571e-06, "loss": 0.4271, "step": 15963 }, { "epoch": 2.621743682384579, "grad_norm": 0.40480995619927845, "learning_rate": 4.222778593677833e-06, "loss": 0.4236, "step": 15964 }, { "epoch": 2.621907909592922, "grad_norm": 0.3988167086614991, "learning_rate": 4.222310513739995e-06, "loss": 0.4419, "step": 15965 }, { "epoch": 2.6220721368012647, "grad_norm": 0.32298607418392483, "learning_rate": 4.221842436818136e-06, "loss": 0.4397, "step": 15966 }, { "epoch": 2.6222363640096074, "grad_norm": 0.29969031931968826, "learning_rate": 4.2213743629173425e-06, "loss": 0.4201, "step": 15967 }, { "epoch": 2.62240059121795, "grad_norm": 0.27714271714246214, "learning_rate": 4.220906292042697e-06, "loss": 0.4431, "step": 15968 }, { "epoch": 2.6225648184262926, "grad_norm": 0.2942175988231403, "learning_rate": 4.220438224199285e-06, "loss": 0.4351, "step": 15969 }, { "epoch": 2.6227290456346353, "grad_norm": 0.27700577987436237, "learning_rate": 4.21997015939219e-06, "loss": 0.4394, "step": 15970 }, { "epoch": 2.6228932728429784, "grad_norm": 0.31847153155102015, "learning_rate": 4.219502097626492e-06, "loss": 0.428, "step": 15971 }, { "epoch": 2.623057500051321, "grad_norm": 0.2929640968271118, "learning_rate": 4.219034038907278e-06, "loss": 0.4342, "step": 15972 }, { "epoch": 2.6232217272596636, "grad_norm": 0.36743081273337336, "learning_rate": 4.21856598323963e-06, "loss": 0.434, "step": 15973 }, { "epoch": 2.6233859544680067, "grad_norm": 0.251752898125371, "learning_rate": 4.218097930628634e-06, "loss": 0.4176, "step": 15974 }, { "epoch": 2.6235501816763493, "grad_norm": 0.26781368785221105, "learning_rate": 4.217629881079372e-06, "loss": 0.4441, "step": 15975 }, { "epoch": 2.623714408884692, "grad_norm": 0.29308075035555686, "learning_rate": 4.217161834596925e-06, "loss": 0.4396, "step": 15976 }, { "epoch": 2.6238786360930346, "grad_norm": 0.3126222728890951, "learning_rate": 4.21669379118638e-06, "loss": 0.4339, "step": 15977 }, { "epoch": 2.6240428633013773, "grad_norm": 0.42222963655284285, "learning_rate": 4.21622575085282e-06, "loss": 0.4497, "step": 15978 }, { "epoch": 2.6242070905097203, "grad_norm": 0.2680911148735966, "learning_rate": 4.215757713601327e-06, "loss": 0.4505, "step": 15979 }, { "epoch": 2.624371317718063, "grad_norm": 0.26722916634092214, "learning_rate": 4.215289679436986e-06, "loss": 0.4328, "step": 15980 }, { "epoch": 2.6245355449264056, "grad_norm": 0.28522394018656005, "learning_rate": 4.214821648364877e-06, "loss": 0.4208, "step": 15981 }, { "epoch": 2.6246997721347487, "grad_norm": 0.3730121107654761, "learning_rate": 4.214353620390086e-06, "loss": 0.4349, "step": 15982 }, { "epoch": 2.6248639993430913, "grad_norm": 0.34376190480566104, "learning_rate": 4.2138855955176955e-06, "loss": 0.4477, "step": 15983 }, { "epoch": 2.625028226551434, "grad_norm": 0.35628550422099475, "learning_rate": 4.21341757375279e-06, "loss": 0.4342, "step": 15984 }, { "epoch": 2.6251924537597766, "grad_norm": 0.3953333670118324, "learning_rate": 4.212949555100451e-06, "loss": 0.4378, "step": 15985 }, { "epoch": 2.6253566809681192, "grad_norm": 0.3403985295123224, "learning_rate": 4.212481539565762e-06, "loss": 0.4206, "step": 15986 }, { "epoch": 2.625520908176462, "grad_norm": 0.3700412707613751, "learning_rate": 4.212013527153809e-06, "loss": 0.4462, "step": 15987 }, { "epoch": 2.625685135384805, "grad_norm": 0.3124818850646178, "learning_rate": 4.211545517869669e-06, "loss": 0.4382, "step": 15988 }, { "epoch": 2.6258493625931476, "grad_norm": 0.37030254675822366, "learning_rate": 4.21107751171843e-06, "loss": 0.4218, "step": 15989 }, { "epoch": 2.6260135898014902, "grad_norm": 0.3251159171177923, "learning_rate": 4.210609508705173e-06, "loss": 0.4342, "step": 15990 }, { "epoch": 2.6261778170098333, "grad_norm": 0.28275761112001435, "learning_rate": 4.210141508834981e-06, "loss": 0.4313, "step": 15991 }, { "epoch": 2.626342044218176, "grad_norm": 0.3372588684132263, "learning_rate": 4.2096735121129385e-06, "loss": 0.4427, "step": 15992 }, { "epoch": 2.6265062714265186, "grad_norm": 0.35546215576563495, "learning_rate": 4.2092055185441265e-06, "loss": 0.4253, "step": 15993 }, { "epoch": 2.6266704986348612, "grad_norm": 0.28538441490936256, "learning_rate": 4.2087375281336286e-06, "loss": 0.4459, "step": 15994 }, { "epoch": 2.626834725843204, "grad_norm": 0.3705717411953057, "learning_rate": 4.208269540886529e-06, "loss": 0.4422, "step": 15995 }, { "epoch": 2.626998953051547, "grad_norm": 0.26329011035550054, "learning_rate": 4.207801556807909e-06, "loss": 0.4442, "step": 15996 }, { "epoch": 2.6271631802598896, "grad_norm": 0.28256898887020987, "learning_rate": 4.20733357590285e-06, "loss": 0.4422, "step": 15997 }, { "epoch": 2.6273274074682322, "grad_norm": 0.31725685114307306, "learning_rate": 4.206865598176437e-06, "loss": 0.4472, "step": 15998 }, { "epoch": 2.6274916346765753, "grad_norm": 1.181339168715183, "learning_rate": 4.206397623633753e-06, "loss": 0.4324, "step": 15999 }, { "epoch": 2.627655861884918, "grad_norm": 0.5335499658081728, "learning_rate": 4.2059296522798786e-06, "loss": 0.4335, "step": 16000 }, { "epoch": 2.633813827817476, "grad_norm": 0.3458413336772324, "learning_rate": 4.188362348384085e-06, "loss": 0.4379, "step": 16001 }, { "epoch": 2.6339784120365923, "grad_norm": 0.5975672929918889, "learning_rate": 4.1878934149027705e-06, "loss": 0.4447, "step": 16002 }, { "epoch": 2.634142996255709, "grad_norm": 0.31596954107529784, "learning_rate": 4.18742448482704e-06, "loss": 0.43, "step": 16003 }, { "epoch": 2.6343075804748253, "grad_norm": 0.29705223444507267, "learning_rate": 4.186955558162006e-06, "loss": 0.4309, "step": 16004 }, { "epoch": 2.6344721646939417, "grad_norm": 0.3042036993024349, "learning_rate": 4.186486634912789e-06, "loss": 0.4447, "step": 16005 }, { "epoch": 2.634636748913058, "grad_norm": 0.38759716050242937, "learning_rate": 4.1860177150845045e-06, "loss": 0.4292, "step": 16006 }, { "epoch": 2.6348013331321747, "grad_norm": 0.31390985601371674, "learning_rate": 4.18554879868227e-06, "loss": 0.4378, "step": 16007 }, { "epoch": 2.634965917351291, "grad_norm": 0.5655176381737559, "learning_rate": 4.1850798857112e-06, "loss": 0.4382, "step": 16008 }, { "epoch": 2.6351305015704076, "grad_norm": 0.3223395703472826, "learning_rate": 4.184610976176414e-06, "loss": 0.438, "step": 16009 }, { "epoch": 2.635295085789524, "grad_norm": 0.35687197663501297, "learning_rate": 4.184142070083026e-06, "loss": 0.4237, "step": 16010 }, { "epoch": 2.6354596700086406, "grad_norm": 0.44101539197692596, "learning_rate": 4.183673167436155e-06, "loss": 0.4341, "step": 16011 }, { "epoch": 2.635624254227757, "grad_norm": 0.3007372895231009, "learning_rate": 4.183204268240916e-06, "loss": 0.4288, "step": 16012 }, { "epoch": 2.6357888384468735, "grad_norm": 0.29204721833278224, "learning_rate": 4.182735372502424e-06, "loss": 0.4309, "step": 16013 }, { "epoch": 2.63595342266599, "grad_norm": 0.31142547055058784, "learning_rate": 4.1822664802258e-06, "loss": 0.4211, "step": 16014 }, { "epoch": 2.6361180068851064, "grad_norm": 0.4347133243104596, "learning_rate": 4.181797591416155e-06, "loss": 0.4404, "step": 16015 }, { "epoch": 2.636282591104223, "grad_norm": 0.5475754911371223, "learning_rate": 4.181328706078609e-06, "loss": 0.427, "step": 16016 }, { "epoch": 2.6364471753233394, "grad_norm": 0.33465708743300165, "learning_rate": 4.180859824218275e-06, "loss": 0.441, "step": 16017 }, { "epoch": 2.636611759542456, "grad_norm": 0.4214285639676177, "learning_rate": 4.180390945840273e-06, "loss": 0.4257, "step": 16018 }, { "epoch": 2.6367763437615723, "grad_norm": 0.380826150533155, "learning_rate": 4.179922070949717e-06, "loss": 0.4514, "step": 16019 }, { "epoch": 2.636940927980689, "grad_norm": 0.2875791514989647, "learning_rate": 4.179453199551723e-06, "loss": 0.4438, "step": 16020 }, { "epoch": 2.6371055121998053, "grad_norm": 0.308429485865092, "learning_rate": 4.178984331651408e-06, "loss": 0.4246, "step": 16021 }, { "epoch": 2.6372700964189217, "grad_norm": 0.346006434138322, "learning_rate": 4.178515467253888e-06, "loss": 0.4488, "step": 16022 }, { "epoch": 2.637434680638038, "grad_norm": 0.2868870426572875, "learning_rate": 4.1780466063642804e-06, "loss": 0.4168, "step": 16023 }, { "epoch": 2.6375992648571547, "grad_norm": 0.3518715966594303, "learning_rate": 4.177577748987697e-06, "loss": 0.4306, "step": 16024 }, { "epoch": 2.637763849076271, "grad_norm": 0.33694302289305983, "learning_rate": 4.177108895129259e-06, "loss": 0.4469, "step": 16025 }, { "epoch": 2.6379284332953876, "grad_norm": 0.2835088136502032, "learning_rate": 4.176640044794079e-06, "loss": 0.4265, "step": 16026 }, { "epoch": 2.638093017514504, "grad_norm": 0.3304200094771688, "learning_rate": 4.176171197987273e-06, "loss": 0.4282, "step": 16027 }, { "epoch": 2.6382576017336206, "grad_norm": 0.29655360915312234, "learning_rate": 4.1757023547139585e-06, "loss": 0.4258, "step": 16028 }, { "epoch": 2.638422185952737, "grad_norm": 0.34291231739564487, "learning_rate": 4.175233514979249e-06, "loss": 0.4276, "step": 16029 }, { "epoch": 2.6385867701718535, "grad_norm": 0.32533256752976253, "learning_rate": 4.174764678788264e-06, "loss": 0.4225, "step": 16030 }, { "epoch": 2.63875135439097, "grad_norm": 0.27444339256916256, "learning_rate": 4.174295846146115e-06, "loss": 0.4422, "step": 16031 }, { "epoch": 2.6389159386100864, "grad_norm": 0.3594351017234265, "learning_rate": 4.173827017057922e-06, "loss": 0.4176, "step": 16032 }, { "epoch": 2.639080522829203, "grad_norm": 0.3259773673795588, "learning_rate": 4.173358191528798e-06, "loss": 0.4315, "step": 16033 }, { "epoch": 2.6392451070483194, "grad_norm": 0.33864295381147363, "learning_rate": 4.172889369563857e-06, "loss": 0.4419, "step": 16034 }, { "epoch": 2.639409691267436, "grad_norm": 0.29615433686097753, "learning_rate": 4.172420551168219e-06, "loss": 0.4399, "step": 16035 }, { "epoch": 2.6395742754865523, "grad_norm": 0.47785437472432807, "learning_rate": 4.171951736346995e-06, "loss": 0.4334, "step": 16036 }, { "epoch": 2.639738859705669, "grad_norm": 0.4004573990222095, "learning_rate": 4.171482925105304e-06, "loss": 0.4471, "step": 16037 }, { "epoch": 2.6399034439247853, "grad_norm": 0.3143272720035237, "learning_rate": 4.17101411744826e-06, "loss": 0.4294, "step": 16038 }, { "epoch": 2.6400680281439017, "grad_norm": 0.35269234949253736, "learning_rate": 4.170545313380978e-06, "loss": 0.4045, "step": 16039 }, { "epoch": 2.6402326123630178, "grad_norm": 0.340001284874301, "learning_rate": 4.1700765129085754e-06, "loss": 0.4191, "step": 16040 }, { "epoch": 2.6403971965821342, "grad_norm": 0.41295947541357814, "learning_rate": 4.169607716036165e-06, "loss": 0.4088, "step": 16041 }, { "epoch": 2.6405617808012507, "grad_norm": 0.31813500109467485, "learning_rate": 4.169138922768865e-06, "loss": 0.4433, "step": 16042 }, { "epoch": 2.640726365020367, "grad_norm": 0.31553983739624203, "learning_rate": 4.168670133111787e-06, "loss": 0.4269, "step": 16043 }, { "epoch": 2.6408909492394836, "grad_norm": 0.28908417796027375, "learning_rate": 4.16820134707005e-06, "loss": 0.4391, "step": 16044 }, { "epoch": 2.6410555334586, "grad_norm": 0.25498288846337946, "learning_rate": 4.1677325646487644e-06, "loss": 0.4344, "step": 16045 }, { "epoch": 2.6412201176777166, "grad_norm": 0.9148740161576957, "learning_rate": 4.167263785853052e-06, "loss": 0.4397, "step": 16046 }, { "epoch": 2.641384701896833, "grad_norm": 0.3654250205826132, "learning_rate": 4.166795010688024e-06, "loss": 0.437, "step": 16047 }, { "epoch": 2.6415492861159495, "grad_norm": 0.2839790292083236, "learning_rate": 4.166326239158794e-06, "loss": 0.4405, "step": 16048 }, { "epoch": 2.641713870335066, "grad_norm": 0.2566802127035541, "learning_rate": 4.16585747127048e-06, "loss": 0.4223, "step": 16049 }, { "epoch": 2.6418784545541825, "grad_norm": 0.3592073497185888, "learning_rate": 4.165388707028195e-06, "loss": 0.4197, "step": 16050 }, { "epoch": 2.642043038773299, "grad_norm": 0.296672179833109, "learning_rate": 4.164919946437056e-06, "loss": 0.4494, "step": 16051 }, { "epoch": 2.6422076229924154, "grad_norm": 0.26656374733013644, "learning_rate": 4.164451189502176e-06, "loss": 0.4348, "step": 16052 }, { "epoch": 2.642372207211532, "grad_norm": 0.8647309996086165, "learning_rate": 4.163982436228672e-06, "loss": 0.424, "step": 16053 }, { "epoch": 2.6425367914306483, "grad_norm": 0.2867680259115217, "learning_rate": 4.163513686621655e-06, "loss": 0.4553, "step": 16054 }, { "epoch": 2.642701375649765, "grad_norm": 0.2975334360287579, "learning_rate": 4.163044940686246e-06, "loss": 0.4421, "step": 16055 }, { "epoch": 2.6428659598688813, "grad_norm": 0.4094481182342718, "learning_rate": 4.162576198427554e-06, "loss": 0.4242, "step": 16056 }, { "epoch": 2.6430305440879978, "grad_norm": 0.319781031677507, "learning_rate": 4.162107459850696e-06, "loss": 0.4294, "step": 16057 }, { "epoch": 2.6431951283071142, "grad_norm": 0.29254963141456264, "learning_rate": 4.1616387249607865e-06, "loss": 0.4415, "step": 16058 }, { "epoch": 2.6433597125262307, "grad_norm": 0.3342593584953292, "learning_rate": 4.1611699937629395e-06, "loss": 0.4362, "step": 16059 }, { "epoch": 2.643524296745347, "grad_norm": 0.29622533476076573, "learning_rate": 4.160701266262272e-06, "loss": 0.4041, "step": 16060 }, { "epoch": 2.6436888809644636, "grad_norm": 0.29560361745977387, "learning_rate": 4.160232542463895e-06, "loss": 0.4316, "step": 16061 }, { "epoch": 2.64385346518358, "grad_norm": 0.34481303444827943, "learning_rate": 4.159763822372926e-06, "loss": 0.4565, "step": 16062 }, { "epoch": 2.6440180494026966, "grad_norm": 0.44701790743094155, "learning_rate": 4.15929510599448e-06, "loss": 0.4322, "step": 16063 }, { "epoch": 2.644182633621813, "grad_norm": 0.38441840228187624, "learning_rate": 4.158826393333666e-06, "loss": 0.4308, "step": 16064 }, { "epoch": 2.644347217840929, "grad_norm": 0.27856025851449084, "learning_rate": 4.158357684395606e-06, "loss": 0.4557, "step": 16065 }, { "epoch": 2.6445118020600455, "grad_norm": 0.346035538895123, "learning_rate": 4.1578889791854074e-06, "loss": 0.4371, "step": 16066 }, { "epoch": 2.644676386279162, "grad_norm": 0.29522255544530657, "learning_rate": 4.15742027770819e-06, "loss": 0.4429, "step": 16067 }, { "epoch": 2.6448409704982785, "grad_norm": 0.2568408699069682, "learning_rate": 4.156951579969064e-06, "loss": 0.4511, "step": 16068 }, { "epoch": 2.645005554717395, "grad_norm": 0.2795226041069509, "learning_rate": 4.156482885973147e-06, "loss": 0.4317, "step": 16069 }, { "epoch": 2.6451701389365114, "grad_norm": 0.41437289939438915, "learning_rate": 4.156014195725552e-06, "loss": 0.4471, "step": 16070 }, { "epoch": 2.645334723155628, "grad_norm": 0.37249505618644185, "learning_rate": 4.155545509231391e-06, "loss": 0.4286, "step": 16071 }, { "epoch": 2.6454993073747444, "grad_norm": 0.36242228653315584, "learning_rate": 4.155076826495783e-06, "loss": 0.439, "step": 16072 }, { "epoch": 2.645663891593861, "grad_norm": 0.31756610529430523, "learning_rate": 4.154608147523834e-06, "loss": 0.4273, "step": 16073 }, { "epoch": 2.6458284758129773, "grad_norm": 0.32159274867073534, "learning_rate": 4.154139472320668e-06, "loss": 0.4537, "step": 16074 }, { "epoch": 2.645993060032094, "grad_norm": 0.335271183359619, "learning_rate": 4.153670800891389e-06, "loss": 0.4422, "step": 16075 }, { "epoch": 2.6461576442512103, "grad_norm": 0.3287060062641479, "learning_rate": 4.153202133241121e-06, "loss": 0.4472, "step": 16076 }, { "epoch": 2.6463222284703267, "grad_norm": 0.3426461142941525, "learning_rate": 4.1527334693749716e-06, "loss": 0.4241, "step": 16077 }, { "epoch": 2.646486812689443, "grad_norm": 0.29177189773366047, "learning_rate": 4.1522648092980546e-06, "loss": 0.4293, "step": 16078 }, { "epoch": 2.6466513969085597, "grad_norm": 0.35057317490047024, "learning_rate": 4.151796153015486e-06, "loss": 0.4336, "step": 16079 }, { "epoch": 2.646815981127676, "grad_norm": 0.3733394546466849, "learning_rate": 4.151327500532379e-06, "loss": 0.4339, "step": 16080 }, { "epoch": 2.6469805653467926, "grad_norm": 0.3274339998882645, "learning_rate": 4.150858851853847e-06, "loss": 0.4353, "step": 16081 }, { "epoch": 2.647145149565909, "grad_norm": 0.39048201119481457, "learning_rate": 4.150390206985002e-06, "loss": 0.4288, "step": 16082 }, { "epoch": 2.6473097337850255, "grad_norm": 0.2890017641382168, "learning_rate": 4.149921565930962e-06, "loss": 0.4285, "step": 16083 }, { "epoch": 2.647474318004142, "grad_norm": 0.3689520037974736, "learning_rate": 4.149452928696839e-06, "loss": 0.447, "step": 16084 }, { "epoch": 2.6476389022232585, "grad_norm": 0.32192762270808284, "learning_rate": 4.148984295287743e-06, "loss": 0.4378, "step": 16085 }, { "epoch": 2.647803486442375, "grad_norm": 0.34372293910344526, "learning_rate": 4.148515665708792e-06, "loss": 0.4493, "step": 16086 }, { "epoch": 2.6479680706614914, "grad_norm": 0.3676412358462734, "learning_rate": 4.148047039965097e-06, "loss": 0.4115, "step": 16087 }, { "epoch": 2.648132654880608, "grad_norm": 0.32314400463920195, "learning_rate": 4.147578418061772e-06, "loss": 0.4302, "step": 16088 }, { "epoch": 2.6482972390997244, "grad_norm": 0.32951777235338797, "learning_rate": 4.147109800003931e-06, "loss": 0.432, "step": 16089 }, { "epoch": 2.648461823318841, "grad_norm": 0.4144155570458211, "learning_rate": 4.146641185796687e-06, "loss": 0.4526, "step": 16090 }, { "epoch": 2.6486264075379573, "grad_norm": 0.32492391828837874, "learning_rate": 4.146172575445153e-06, "loss": 0.4171, "step": 16091 }, { "epoch": 2.6487909917570738, "grad_norm": 0.32096250114278707, "learning_rate": 4.1457039689544435e-06, "loss": 0.4263, "step": 16092 }, { "epoch": 2.6489555759761902, "grad_norm": 0.2931596596890427, "learning_rate": 4.1452353663296715e-06, "loss": 0.4177, "step": 16093 }, { "epoch": 2.6491201601953067, "grad_norm": 0.42602171281467677, "learning_rate": 4.144766767575947e-06, "loss": 0.4397, "step": 16094 }, { "epoch": 2.649284744414423, "grad_norm": 0.31803480047796434, "learning_rate": 4.144298172698389e-06, "loss": 0.4048, "step": 16095 }, { "epoch": 2.6494493286335397, "grad_norm": 0.3114085359245422, "learning_rate": 4.143829581702105e-06, "loss": 0.4518, "step": 16096 }, { "epoch": 2.649613912852656, "grad_norm": 0.27338831216461773, "learning_rate": 4.143360994592211e-06, "loss": 0.4443, "step": 16097 }, { "epoch": 2.6497784970717726, "grad_norm": 0.3096832549208866, "learning_rate": 4.14289241137382e-06, "loss": 0.431, "step": 16098 }, { "epoch": 2.649943081290889, "grad_norm": 0.570367929625992, "learning_rate": 4.142423832052045e-06, "loss": 0.433, "step": 16099 }, { "epoch": 2.6501076655100055, "grad_norm": 0.3231277758544926, "learning_rate": 4.141955256631997e-06, "loss": 0.4274, "step": 16100 }, { "epoch": 2.650272249729122, "grad_norm": 0.31080107211903213, "learning_rate": 4.14148668511879e-06, "loss": 0.4327, "step": 16101 }, { "epoch": 2.6504368339482385, "grad_norm": 0.4623297273803156, "learning_rate": 4.141018117517539e-06, "loss": 0.4279, "step": 16102 }, { "epoch": 2.650601418167355, "grad_norm": 0.26247998589542876, "learning_rate": 4.1405495538333534e-06, "loss": 0.4313, "step": 16103 }, { "epoch": 2.6507660023864714, "grad_norm": 0.29532370622536497, "learning_rate": 4.14008099407135e-06, "loss": 0.4159, "step": 16104 }, { "epoch": 2.650930586605588, "grad_norm": 0.41691707860407345, "learning_rate": 4.139612438236636e-06, "loss": 0.4391, "step": 16105 }, { "epoch": 2.6510951708247044, "grad_norm": 0.4613190154024909, "learning_rate": 4.13914388633433e-06, "loss": 0.4255, "step": 16106 }, { "epoch": 2.6512597550438204, "grad_norm": 0.2797038181019288, "learning_rate": 4.138675338369541e-06, "loss": 0.4285, "step": 16107 }, { "epoch": 2.651424339262937, "grad_norm": 0.2871785210367013, "learning_rate": 4.138206794347381e-06, "loss": 0.4277, "step": 16108 }, { "epoch": 2.6515889234820533, "grad_norm": 0.2833375328201389, "learning_rate": 4.137738254272966e-06, "loss": 0.4306, "step": 16109 }, { "epoch": 2.65175350770117, "grad_norm": 0.2791742154687719, "learning_rate": 4.137269718151405e-06, "loss": 0.4182, "step": 16110 }, { "epoch": 2.6519180919202863, "grad_norm": 0.2977754878777086, "learning_rate": 4.1368011859878135e-06, "loss": 0.4469, "step": 16111 }, { "epoch": 2.6520826761394027, "grad_norm": 0.37579617600776255, "learning_rate": 4.136332657787302e-06, "loss": 0.4436, "step": 16112 }, { "epoch": 2.652247260358519, "grad_norm": 0.2968274348201199, "learning_rate": 4.135864133554983e-06, "loss": 0.4178, "step": 16113 }, { "epoch": 2.6524118445776357, "grad_norm": 0.2964357275876666, "learning_rate": 4.135395613295971e-06, "loss": 0.4285, "step": 16114 }, { "epoch": 2.652576428796752, "grad_norm": 0.26860658490969397, "learning_rate": 4.134927097015373e-06, "loss": 0.4483, "step": 16115 }, { "epoch": 2.6527410130158686, "grad_norm": 0.34189626645070365, "learning_rate": 4.134458584718309e-06, "loss": 0.4368, "step": 16116 }, { "epoch": 2.652905597234985, "grad_norm": 0.2779968278720606, "learning_rate": 4.133990076409884e-06, "loss": 0.4249, "step": 16117 }, { "epoch": 2.6530701814541016, "grad_norm": 0.30593249193352895, "learning_rate": 4.133521572095214e-06, "loss": 0.4443, "step": 16118 }, { "epoch": 2.653234765673218, "grad_norm": 0.2854793229772029, "learning_rate": 4.133053071779411e-06, "loss": 0.4286, "step": 16119 }, { "epoch": 2.6533993498923345, "grad_norm": 0.29006974883980136, "learning_rate": 4.1325845754675856e-06, "loss": 0.4163, "step": 16120 }, { "epoch": 2.653563934111451, "grad_norm": 0.3077942187428335, "learning_rate": 4.132116083164851e-06, "loss": 0.45, "step": 16121 }, { "epoch": 2.6537285183305674, "grad_norm": 0.3408742741710642, "learning_rate": 4.131647594876319e-06, "loss": 0.4492, "step": 16122 }, { "epoch": 2.653893102549684, "grad_norm": 0.2926827150992984, "learning_rate": 4.1311791106071026e-06, "loss": 0.4253, "step": 16123 }, { "epoch": 2.6540576867688004, "grad_norm": 0.3155775845434203, "learning_rate": 4.13071063036231e-06, "loss": 0.4121, "step": 16124 }, { "epoch": 2.654222270987917, "grad_norm": 0.2646331437379279, "learning_rate": 4.130242154147058e-06, "loss": 0.4217, "step": 16125 }, { "epoch": 2.6543868552070333, "grad_norm": 0.26323728951390457, "learning_rate": 4.129773681966453e-06, "loss": 0.4286, "step": 16126 }, { "epoch": 2.65455143942615, "grad_norm": 0.4258820615094784, "learning_rate": 4.129305213825614e-06, "loss": 0.4105, "step": 16127 }, { "epoch": 2.6547160236452663, "grad_norm": 0.2896312674933501, "learning_rate": 4.128836749729646e-06, "loss": 0.4373, "step": 16128 }, { "epoch": 2.6548806078643827, "grad_norm": 0.31894235570171264, "learning_rate": 4.128368289683663e-06, "loss": 0.4397, "step": 16129 }, { "epoch": 2.655045192083499, "grad_norm": 0.31359654504208, "learning_rate": 4.127899833692778e-06, "loss": 0.442, "step": 16130 }, { "epoch": 2.6552097763026157, "grad_norm": 0.3494845795600111, "learning_rate": 4.1274313817621e-06, "loss": 0.4355, "step": 16131 }, { "epoch": 2.6553743605217317, "grad_norm": 0.3489366047251084, "learning_rate": 4.126962933896744e-06, "loss": 0.4342, "step": 16132 }, { "epoch": 2.655538944740848, "grad_norm": 0.34060977168449236, "learning_rate": 4.126494490101818e-06, "loss": 0.4374, "step": 16133 }, { "epoch": 2.6557035289599646, "grad_norm": 0.2742126576753534, "learning_rate": 4.126026050382436e-06, "loss": 0.4451, "step": 16134 }, { "epoch": 2.655868113179081, "grad_norm": 0.2992707000967294, "learning_rate": 4.125557614743707e-06, "loss": 0.4185, "step": 16135 }, { "epoch": 2.6560326973981976, "grad_norm": 0.2877101811641106, "learning_rate": 4.125089183190747e-06, "loss": 0.4328, "step": 16136 }, { "epoch": 2.656197281617314, "grad_norm": 0.4447724590169303, "learning_rate": 4.124620755728663e-06, "loss": 0.4457, "step": 16137 }, { "epoch": 2.6563618658364305, "grad_norm": 0.28823615255434304, "learning_rate": 4.124152332362565e-06, "loss": 0.4357, "step": 16138 }, { "epoch": 2.656526450055547, "grad_norm": 0.2877010704372036, "learning_rate": 4.12368391309757e-06, "loss": 0.4209, "step": 16139 }, { "epoch": 2.6566910342746635, "grad_norm": 1.2348588948682877, "learning_rate": 4.123215497938783e-06, "loss": 0.434, "step": 16140 }, { "epoch": 2.65685561849378, "grad_norm": 0.3176384519860354, "learning_rate": 4.12274708689132e-06, "loss": 0.4248, "step": 16141 }, { "epoch": 2.6570202027128964, "grad_norm": 0.34314293250836697, "learning_rate": 4.1222786799602895e-06, "loss": 0.4255, "step": 16142 }, { "epoch": 2.657184786932013, "grad_norm": 0.32065602089046813, "learning_rate": 4.121810277150804e-06, "loss": 0.4327, "step": 16143 }, { "epoch": 2.6573493711511293, "grad_norm": 0.31641949004858255, "learning_rate": 4.121341878467975e-06, "loss": 0.4359, "step": 16144 }, { "epoch": 2.657513955370246, "grad_norm": 0.31985622093017013, "learning_rate": 4.120873483916909e-06, "loss": 0.4469, "step": 16145 }, { "epoch": 2.6576785395893623, "grad_norm": 0.4740004344214797, "learning_rate": 4.1204050935027235e-06, "loss": 0.4051, "step": 16146 }, { "epoch": 2.6578431238084788, "grad_norm": 0.3047611687392972, "learning_rate": 4.119936707230524e-06, "loss": 0.4228, "step": 16147 }, { "epoch": 2.6580077080275952, "grad_norm": 0.35193746954809046, "learning_rate": 4.1194683251054246e-06, "loss": 0.4341, "step": 16148 }, { "epoch": 2.6581722922467117, "grad_norm": 0.4636378951303919, "learning_rate": 4.118999947132533e-06, "loss": 0.4312, "step": 16149 }, { "epoch": 2.658336876465828, "grad_norm": 0.32009910134700315, "learning_rate": 4.1185315733169645e-06, "loss": 0.4396, "step": 16150 }, { "epoch": 2.6585014606849446, "grad_norm": 0.32073795597582255, "learning_rate": 4.1180632036638265e-06, "loss": 0.4309, "step": 16151 }, { "epoch": 2.658666044904061, "grad_norm": 0.4289503976591517, "learning_rate": 4.117594838178229e-06, "loss": 0.4286, "step": 16152 }, { "epoch": 2.6588306291231776, "grad_norm": 0.3596741397311703, "learning_rate": 4.1171264768652855e-06, "loss": 0.4439, "step": 16153 }, { "epoch": 2.658995213342294, "grad_norm": 0.35541758817558256, "learning_rate": 4.1166581197301036e-06, "loss": 0.4418, "step": 16154 }, { "epoch": 2.6591597975614105, "grad_norm": 0.32675505943073413, "learning_rate": 4.116189766777797e-06, "loss": 0.4353, "step": 16155 }, { "epoch": 2.659324381780527, "grad_norm": 0.3698642182004654, "learning_rate": 4.115721418013473e-06, "loss": 0.4203, "step": 16156 }, { "epoch": 2.6594889659996435, "grad_norm": 0.6317514981473367, "learning_rate": 4.115253073442245e-06, "loss": 0.4277, "step": 16157 }, { "epoch": 2.65965355021876, "grad_norm": 0.3042933450838122, "learning_rate": 4.114784733069222e-06, "loss": 0.4547, "step": 16158 }, { "epoch": 2.6598181344378764, "grad_norm": 0.5519804724689971, "learning_rate": 4.114316396899513e-06, "loss": 0.4268, "step": 16159 }, { "epoch": 2.659982718656993, "grad_norm": 0.3026651330178726, "learning_rate": 4.11384806493823e-06, "loss": 0.4267, "step": 16160 }, { "epoch": 2.6601473028761093, "grad_norm": 0.2896548638142162, "learning_rate": 4.113379737190482e-06, "loss": 0.434, "step": 16161 }, { "epoch": 2.660311887095226, "grad_norm": 0.3162773316354961, "learning_rate": 4.112911413661382e-06, "loss": 0.4183, "step": 16162 }, { "epoch": 2.6604764713143423, "grad_norm": 0.30438307843708645, "learning_rate": 4.112443094356036e-06, "loss": 0.4222, "step": 16163 }, { "epoch": 2.6606410555334588, "grad_norm": 0.325295396745359, "learning_rate": 4.111974779279558e-06, "loss": 0.4417, "step": 16164 }, { "epoch": 2.6608056397525752, "grad_norm": 0.289759415015586, "learning_rate": 4.111506468437057e-06, "loss": 0.4367, "step": 16165 }, { "epoch": 2.6609702239716917, "grad_norm": 0.2611358156919582, "learning_rate": 4.11103816183364e-06, "loss": 0.4349, "step": 16166 }, { "epoch": 2.661134808190808, "grad_norm": 0.30681190236477174, "learning_rate": 4.110569859474421e-06, "loss": 0.4151, "step": 16167 }, { "epoch": 2.6612993924099246, "grad_norm": 0.3179901230899138, "learning_rate": 4.110101561364506e-06, "loss": 0.4361, "step": 16168 }, { "epoch": 2.661463976629041, "grad_norm": 0.3743547215570952, "learning_rate": 4.109633267509009e-06, "loss": 0.4457, "step": 16169 }, { "epoch": 2.6616285608481576, "grad_norm": 0.3155628139691341, "learning_rate": 4.109164977913037e-06, "loss": 0.4396, "step": 16170 }, { "epoch": 2.661793145067274, "grad_norm": 0.27701818933684635, "learning_rate": 4.108696692581702e-06, "loss": 0.4359, "step": 16171 }, { "epoch": 2.6619577292863905, "grad_norm": 0.3668374170702665, "learning_rate": 4.108228411520113e-06, "loss": 0.4368, "step": 16172 }, { "epoch": 2.662122313505507, "grad_norm": 0.3149660241315818, "learning_rate": 4.1077601347333775e-06, "loss": 0.4477, "step": 16173 }, { "epoch": 2.662286897724623, "grad_norm": 0.36031363805868644, "learning_rate": 4.107291862226608e-06, "loss": 0.4386, "step": 16174 }, { "epoch": 2.6624514819437395, "grad_norm": 0.3752687445537029, "learning_rate": 4.106823594004912e-06, "loss": 0.4326, "step": 16175 }, { "epoch": 2.662616066162856, "grad_norm": 0.33165849121813185, "learning_rate": 4.106355330073402e-06, "loss": 0.4155, "step": 16176 }, { "epoch": 2.6627806503819724, "grad_norm": 0.2996559896507021, "learning_rate": 4.105887070437182e-06, "loss": 0.4407, "step": 16177 }, { "epoch": 2.662945234601089, "grad_norm": 0.3691651742015506, "learning_rate": 4.1054188151013685e-06, "loss": 0.4311, "step": 16178 }, { "epoch": 2.6631098188202054, "grad_norm": 0.37706408757539167, "learning_rate": 4.1049505640710655e-06, "loss": 0.4414, "step": 16179 }, { "epoch": 2.663274403039322, "grad_norm": 0.4580061387375138, "learning_rate": 4.104482317351386e-06, "loss": 0.425, "step": 16180 }, { "epoch": 2.6634389872584383, "grad_norm": 0.3982468103731691, "learning_rate": 4.104014074947436e-06, "loss": 0.4176, "step": 16181 }, { "epoch": 2.6636035714775548, "grad_norm": 0.31071724356969327, "learning_rate": 4.103545836864326e-06, "loss": 0.4289, "step": 16182 }, { "epoch": 2.6637681556966712, "grad_norm": 0.3174287098929874, "learning_rate": 4.103077603107167e-06, "loss": 0.4249, "step": 16183 }, { "epoch": 2.6639327399157877, "grad_norm": 0.28455613214851727, "learning_rate": 4.102609373681066e-06, "loss": 0.4451, "step": 16184 }, { "epoch": 2.664097324134904, "grad_norm": 0.3196335886895791, "learning_rate": 4.102141148591134e-06, "loss": 0.4226, "step": 16185 }, { "epoch": 2.6642619083540207, "grad_norm": 0.6233557455696098, "learning_rate": 4.1016729278424765e-06, "loss": 0.4241, "step": 16186 }, { "epoch": 2.664426492573137, "grad_norm": 0.33200426377694864, "learning_rate": 4.101204711440209e-06, "loss": 0.4439, "step": 16187 }, { "epoch": 2.6645910767922536, "grad_norm": 0.3156950309931716, "learning_rate": 4.100736499389434e-06, "loss": 0.4652, "step": 16188 }, { "epoch": 2.66475566101137, "grad_norm": 0.29051265138452365, "learning_rate": 4.100268291695262e-06, "loss": 0.4058, "step": 16189 }, { "epoch": 2.6649202452304865, "grad_norm": 0.3876607898144007, "learning_rate": 4.099800088362805e-06, "loss": 0.4336, "step": 16190 }, { "epoch": 2.665084829449603, "grad_norm": 0.2801680526667986, "learning_rate": 4.099331889397168e-06, "loss": 0.4409, "step": 16191 }, { "epoch": 2.6652494136687195, "grad_norm": 0.3928005012139743, "learning_rate": 4.098863694803462e-06, "loss": 0.4452, "step": 16192 }, { "epoch": 2.665413997887836, "grad_norm": 0.31667327209953494, "learning_rate": 4.098395504586795e-06, "loss": 0.4314, "step": 16193 }, { "epoch": 2.6655785821069524, "grad_norm": 0.4095252931530756, "learning_rate": 4.0979273187522775e-06, "loss": 0.4398, "step": 16194 }, { "epoch": 2.665743166326069, "grad_norm": 0.3039674024879263, "learning_rate": 4.097459137305016e-06, "loss": 0.4303, "step": 16195 }, { "epoch": 2.6659077505451854, "grad_norm": 0.3846388319882331, "learning_rate": 4.096990960250118e-06, "loss": 0.4087, "step": 16196 }, { "epoch": 2.666072334764302, "grad_norm": 0.36933999442641624, "learning_rate": 4.096522787592697e-06, "loss": 0.435, "step": 16197 }, { "epoch": 2.6662369189834183, "grad_norm": 0.26155978929311113, "learning_rate": 4.0960546193378555e-06, "loss": 0.4179, "step": 16198 }, { "epoch": 2.6664015032025343, "grad_norm": 0.2824416471224022, "learning_rate": 4.095586455490706e-06, "loss": 0.4447, "step": 16199 }, { "epoch": 2.666566087421651, "grad_norm": 0.3029193693644384, "learning_rate": 4.095118296056355e-06, "loss": 0.4314, "step": 16200 }, { "epoch": 2.6667306716407673, "grad_norm": 0.5897728845044921, "learning_rate": 4.0946501410399124e-06, "loss": 0.4221, "step": 16201 }, { "epoch": 2.6668952558598837, "grad_norm": 0.3111304963439345, "learning_rate": 4.094181990446486e-06, "loss": 0.4532, "step": 16202 }, { "epoch": 2.667059840079, "grad_norm": 0.3465409814026947, "learning_rate": 4.093713844281182e-06, "loss": 0.4544, "step": 16203 }, { "epoch": 2.6672244242981167, "grad_norm": 0.2985915021659544, "learning_rate": 4.093245702549113e-06, "loss": 0.4428, "step": 16204 }, { "epoch": 2.667389008517233, "grad_norm": 0.3219223204177759, "learning_rate": 4.092777565255381e-06, "loss": 0.443, "step": 16205 }, { "epoch": 2.6675535927363496, "grad_norm": 0.3190977768020272, "learning_rate": 4.092309432405101e-06, "loss": 0.4039, "step": 16206 }, { "epoch": 2.667718176955466, "grad_norm": 0.29823725676161056, "learning_rate": 4.091841304003376e-06, "loss": 0.4328, "step": 16207 }, { "epoch": 2.6678827611745826, "grad_norm": 0.3483582913118178, "learning_rate": 4.091373180055317e-06, "loss": 0.4572, "step": 16208 }, { "epoch": 2.668047345393699, "grad_norm": 0.2992140663455201, "learning_rate": 4.09090506056603e-06, "loss": 0.4164, "step": 16209 }, { "epoch": 2.6682119296128155, "grad_norm": 0.3210340795390415, "learning_rate": 4.090436945540623e-06, "loss": 0.4382, "step": 16210 }, { "epoch": 2.668376513831932, "grad_norm": 0.3029225777699273, "learning_rate": 4.089968834984206e-06, "loss": 0.4334, "step": 16211 }, { "epoch": 2.6685410980510484, "grad_norm": 0.29585427129308206, "learning_rate": 4.089500728901885e-06, "loss": 0.4171, "step": 16212 }, { "epoch": 2.668705682270165, "grad_norm": 0.30808751863312334, "learning_rate": 4.089032627298768e-06, "loss": 0.4282, "step": 16213 }, { "epoch": 2.6688702664892814, "grad_norm": 0.26945376664591575, "learning_rate": 4.0885645301799615e-06, "loss": 0.4313, "step": 16214 }, { "epoch": 2.669034850708398, "grad_norm": 0.3248008830813408, "learning_rate": 4.088096437550577e-06, "loss": 0.4383, "step": 16215 }, { "epoch": 2.6691994349275143, "grad_norm": 0.3257332301812428, "learning_rate": 4.0876283494157186e-06, "loss": 0.4334, "step": 16216 }, { "epoch": 2.669364019146631, "grad_norm": 0.4606309903111496, "learning_rate": 4.087160265780496e-06, "loss": 0.4325, "step": 16217 }, { "epoch": 2.6695286033657473, "grad_norm": 0.3205989020265071, "learning_rate": 4.086692186650016e-06, "loss": 0.4257, "step": 16218 }, { "epoch": 2.6696931875848637, "grad_norm": 0.333101561605051, "learning_rate": 4.086224112029385e-06, "loss": 0.4371, "step": 16219 }, { "epoch": 2.66985777180398, "grad_norm": 0.28771924507188223, "learning_rate": 4.085756041923711e-06, "loss": 0.438, "step": 16220 }, { "epoch": 2.6700223560230967, "grad_norm": 0.27046072482072303, "learning_rate": 4.085287976338102e-06, "loss": 0.4127, "step": 16221 }, { "epoch": 2.670186940242213, "grad_norm": 0.300922333424232, "learning_rate": 4.084819915277665e-06, "loss": 0.4319, "step": 16222 }, { "epoch": 2.6703515244613296, "grad_norm": 0.24299807154068132, "learning_rate": 4.0843518587475064e-06, "loss": 0.415, "step": 16223 }, { "epoch": 2.670516108680446, "grad_norm": 0.341735720442819, "learning_rate": 4.083883806752737e-06, "loss": 0.414, "step": 16224 }, { "epoch": 2.6706806928995626, "grad_norm": 0.3393067511916409, "learning_rate": 4.083415759298461e-06, "loss": 0.429, "step": 16225 }, { "epoch": 2.670845277118679, "grad_norm": 0.28674310194577535, "learning_rate": 4.082947716389784e-06, "loss": 0.429, "step": 16226 }, { "epoch": 2.6710098613377955, "grad_norm": 0.5185646111855251, "learning_rate": 4.082479678031817e-06, "loss": 0.4454, "step": 16227 }, { "epoch": 2.671174445556912, "grad_norm": 0.34007590718458663, "learning_rate": 4.082011644229663e-06, "loss": 0.4159, "step": 16228 }, { "epoch": 2.6713390297760284, "grad_norm": 0.29528169814285576, "learning_rate": 4.081543614988434e-06, "loss": 0.4363, "step": 16229 }, { "epoch": 2.671503613995145, "grad_norm": 0.3010034087122512, "learning_rate": 4.0810755903132315e-06, "loss": 0.4339, "step": 16230 }, { "epoch": 2.6716681982142614, "grad_norm": 0.3420782797293866, "learning_rate": 4.080607570209166e-06, "loss": 0.4532, "step": 16231 }, { "epoch": 2.671832782433378, "grad_norm": 0.3518435427751243, "learning_rate": 4.080139554681343e-06, "loss": 0.4374, "step": 16232 }, { "epoch": 2.6719973666524943, "grad_norm": 0.3853923992274283, "learning_rate": 4.0796715437348696e-06, "loss": 0.4189, "step": 16233 }, { "epoch": 2.672161950871611, "grad_norm": 0.39547593141325593, "learning_rate": 4.079203537374853e-06, "loss": 0.4322, "step": 16234 }, { "epoch": 2.6723265350907273, "grad_norm": 0.4567047029006985, "learning_rate": 4.078735535606399e-06, "loss": 0.4189, "step": 16235 }, { "epoch": 2.6724911193098437, "grad_norm": 0.30768753053570136, "learning_rate": 4.078267538434616e-06, "loss": 0.403, "step": 16236 }, { "epoch": 2.67265570352896, "grad_norm": 0.37096345709738054, "learning_rate": 4.077799545864607e-06, "loss": 0.4071, "step": 16237 }, { "epoch": 2.6728202877480767, "grad_norm": 0.2795107187902698, "learning_rate": 4.0773315579014835e-06, "loss": 0.4394, "step": 16238 }, { "epoch": 2.672984871967193, "grad_norm": 0.40934872094739266, "learning_rate": 4.076863574550349e-06, "loss": 0.4526, "step": 16239 }, { "epoch": 2.673149456186309, "grad_norm": 0.3011794778746138, "learning_rate": 4.076395595816308e-06, "loss": 0.4298, "step": 16240 }, { "epoch": 2.6733140404054256, "grad_norm": 0.4588930576874614, "learning_rate": 4.075927621704471e-06, "loss": 0.4395, "step": 16241 }, { "epoch": 2.673478624624542, "grad_norm": 0.2837479901390299, "learning_rate": 4.075459652219941e-06, "loss": 0.4207, "step": 16242 }, { "epoch": 2.6736432088436586, "grad_norm": 0.37951879970028074, "learning_rate": 4.074991687367827e-06, "loss": 0.4461, "step": 16243 }, { "epoch": 2.673807793062775, "grad_norm": 0.4577196555274362, "learning_rate": 4.074523727153234e-06, "loss": 0.4284, "step": 16244 }, { "epoch": 2.6739723772818915, "grad_norm": 0.3745132841651956, "learning_rate": 4.074055771581268e-06, "loss": 0.4428, "step": 16245 }, { "epoch": 2.674136961501008, "grad_norm": 0.5084261638812808, "learning_rate": 4.073587820657037e-06, "loss": 0.4531, "step": 16246 }, { "epoch": 2.6743015457201245, "grad_norm": 0.39509714110382965, "learning_rate": 4.073119874385644e-06, "loss": 0.4279, "step": 16247 }, { "epoch": 2.674466129939241, "grad_norm": 0.3186932320538688, "learning_rate": 4.072651932772197e-06, "loss": 0.4236, "step": 16248 }, { "epoch": 2.6746307141583574, "grad_norm": 0.49111200088983387, "learning_rate": 4.072183995821801e-06, "loss": 0.4293, "step": 16249 }, { "epoch": 2.674795298377474, "grad_norm": 0.349164641605376, "learning_rate": 4.071716063539563e-06, "loss": 0.4612, "step": 16250 }, { "epoch": 2.6749598825965903, "grad_norm": 0.4703838182432665, "learning_rate": 4.0712481359305876e-06, "loss": 0.4506, "step": 16251 }, { "epoch": 2.675124466815707, "grad_norm": 0.27681566344147845, "learning_rate": 4.070780212999982e-06, "loss": 0.4306, "step": 16252 }, { "epoch": 2.6752890510348233, "grad_norm": 0.2706256207107712, "learning_rate": 4.070312294752852e-06, "loss": 0.4132, "step": 16253 }, { "epoch": 2.6754536352539398, "grad_norm": 0.2942736546722622, "learning_rate": 4.069844381194301e-06, "loss": 0.4247, "step": 16254 }, { "epoch": 2.6756182194730562, "grad_norm": 0.2887730169859917, "learning_rate": 4.06937647232944e-06, "loss": 0.4282, "step": 16255 }, { "epoch": 2.6757828036921727, "grad_norm": 0.3249853712294799, "learning_rate": 4.068908568163367e-06, "loss": 0.4552, "step": 16256 }, { "epoch": 2.675947387911289, "grad_norm": 0.42233120294813575, "learning_rate": 4.068440668701195e-06, "loss": 0.4494, "step": 16257 }, { "epoch": 2.6761119721304056, "grad_norm": 0.39370678424915456, "learning_rate": 4.067972773948022e-06, "loss": 0.4425, "step": 16258 }, { "epoch": 2.676276556349522, "grad_norm": 0.3351960219169127, "learning_rate": 4.067504883908962e-06, "loss": 0.4287, "step": 16259 }, { "epoch": 2.6764411405686386, "grad_norm": 0.3646822270877999, "learning_rate": 4.067036998589114e-06, "loss": 0.4406, "step": 16260 }, { "epoch": 2.676605724787755, "grad_norm": 0.27525514075166774, "learning_rate": 4.066569117993586e-06, "loss": 0.4177, "step": 16261 }, { "epoch": 2.6767703090068715, "grad_norm": 0.33919213329865616, "learning_rate": 4.066101242127483e-06, "loss": 0.4459, "step": 16262 }, { "epoch": 2.676934893225988, "grad_norm": 0.35308673235114507, "learning_rate": 4.0656333709959085e-06, "loss": 0.4379, "step": 16263 }, { "epoch": 2.6770994774451045, "grad_norm": 0.30855438126161155, "learning_rate": 4.065165504603971e-06, "loss": 0.4431, "step": 16264 }, { "epoch": 2.677264061664221, "grad_norm": 0.34653493149245607, "learning_rate": 4.064697642956773e-06, "loss": 0.4355, "step": 16265 }, { "epoch": 2.677428645883337, "grad_norm": 0.29623996229396615, "learning_rate": 4.064229786059422e-06, "loss": 0.4255, "step": 16266 }, { "epoch": 2.6775932301024534, "grad_norm": 0.3025284374334076, "learning_rate": 4.06376193391702e-06, "loss": 0.4453, "step": 16267 }, { "epoch": 2.67775781432157, "grad_norm": 0.3830832942566304, "learning_rate": 4.063294086534675e-06, "loss": 0.4266, "step": 16268 }, { "epoch": 2.6779223985406864, "grad_norm": 0.3344091233245314, "learning_rate": 4.06282624391749e-06, "loss": 0.4222, "step": 16269 }, { "epoch": 2.678086982759803, "grad_norm": 0.3260377569625621, "learning_rate": 4.06235840607057e-06, "loss": 0.4139, "step": 16270 }, { "epoch": 2.6782515669789193, "grad_norm": 0.4072985148156493, "learning_rate": 4.061890572999021e-06, "loss": 0.4392, "step": 16271 }, { "epoch": 2.6784161511980358, "grad_norm": 0.3117617202916446, "learning_rate": 4.061422744707947e-06, "loss": 0.4456, "step": 16272 }, { "epoch": 2.6785807354171522, "grad_norm": 0.34202950905471813, "learning_rate": 4.060954921202454e-06, "loss": 0.4252, "step": 16273 }, { "epoch": 2.6787453196362687, "grad_norm": 0.3078055002171829, "learning_rate": 4.060487102487644e-06, "loss": 0.4427, "step": 16274 }, { "epoch": 2.678909903855385, "grad_norm": 0.34492177812050034, "learning_rate": 4.060019288568624e-06, "loss": 0.4127, "step": 16275 }, { "epoch": 2.6790744880745017, "grad_norm": 0.4479150830400735, "learning_rate": 4.0595514794505e-06, "loss": 0.41, "step": 16276 }, { "epoch": 2.679239072293618, "grad_norm": 0.3027447731401161, "learning_rate": 4.0590836751383704e-06, "loss": 0.4125, "step": 16277 }, { "epoch": 2.6794036565127346, "grad_norm": 0.3868369095960643, "learning_rate": 4.058615875637347e-06, "loss": 0.4229, "step": 16278 }, { "epoch": 2.679568240731851, "grad_norm": 0.31252996703349806, "learning_rate": 4.058148080952529e-06, "loss": 0.4387, "step": 16279 }, { "epoch": 2.6797328249509675, "grad_norm": 0.38131546502511204, "learning_rate": 4.057680291089025e-06, "loss": 0.4208, "step": 16280 }, { "epoch": 2.679897409170084, "grad_norm": 0.31437886402272847, "learning_rate": 4.057212506051935e-06, "loss": 0.442, "step": 16281 }, { "epoch": 2.6800619933892005, "grad_norm": 0.29705877207988296, "learning_rate": 4.056744725846366e-06, "loss": 0.4405, "step": 16282 }, { "epoch": 2.680226577608317, "grad_norm": 0.3287127232403905, "learning_rate": 4.0562769504774226e-06, "loss": 0.4369, "step": 16283 }, { "epoch": 2.6803911618274334, "grad_norm": 0.3461000338063579, "learning_rate": 4.055809179950207e-06, "loss": 0.4416, "step": 16284 }, { "epoch": 2.68055574604655, "grad_norm": 0.3162634482779627, "learning_rate": 4.055341414269825e-06, "loss": 0.4496, "step": 16285 }, { "epoch": 2.6807203302656664, "grad_norm": 0.36816052604616, "learning_rate": 4.0548736534413795e-06, "loss": 0.439, "step": 16286 }, { "epoch": 2.680884914484783, "grad_norm": 0.354552042915324, "learning_rate": 4.0544058974699764e-06, "loss": 0.4346, "step": 16287 }, { "epoch": 2.6810494987038993, "grad_norm": 0.38578018985135637, "learning_rate": 4.053938146360715e-06, "loss": 0.4294, "step": 16288 }, { "epoch": 2.6812140829230158, "grad_norm": 0.26869839291294056, "learning_rate": 4.053470400118707e-06, "loss": 0.4321, "step": 16289 }, { "epoch": 2.6813786671421322, "grad_norm": 0.5894318253041799, "learning_rate": 4.053002658749049e-06, "loss": 0.4472, "step": 16290 }, { "epoch": 2.6815432513612487, "grad_norm": 0.32389992131136, "learning_rate": 4.0525349222568475e-06, "loss": 0.4352, "step": 16291 }, { "epoch": 2.681707835580365, "grad_norm": 0.35372515325452797, "learning_rate": 4.052067190647208e-06, "loss": 0.4445, "step": 16292 }, { "epoch": 2.6818724197994817, "grad_norm": 0.3780722530944741, "learning_rate": 4.051599463925231e-06, "loss": 0.4342, "step": 16293 }, { "epoch": 2.682037004018598, "grad_norm": 0.3179532350010808, "learning_rate": 4.051131742096022e-06, "loss": 0.4373, "step": 16294 }, { "epoch": 2.6822015882377146, "grad_norm": 0.28235609038119763, "learning_rate": 4.050664025164684e-06, "loss": 0.4266, "step": 16295 }, { "epoch": 2.682366172456831, "grad_norm": 0.34937662996411334, "learning_rate": 4.050196313136322e-06, "loss": 0.4204, "step": 16296 }, { "epoch": 2.6825307566759475, "grad_norm": 0.29543097261117546, "learning_rate": 4.049728606016039e-06, "loss": 0.4281, "step": 16297 }, { "epoch": 2.682695340895064, "grad_norm": 0.2755549630399525, "learning_rate": 4.049260903808936e-06, "loss": 0.4285, "step": 16298 }, { "epoch": 2.6828599251141805, "grad_norm": 0.3197657919433483, "learning_rate": 4.048793206520118e-06, "loss": 0.4312, "step": 16299 }, { "epoch": 2.683024509333297, "grad_norm": 0.41965219409979476, "learning_rate": 4.048325514154688e-06, "loss": 0.4388, "step": 16300 }, { "epoch": 2.6831890935524134, "grad_norm": 0.27678180863462526, "learning_rate": 4.047857826717751e-06, "loss": 0.4242, "step": 16301 }, { "epoch": 2.68335367777153, "grad_norm": 0.347595027625458, "learning_rate": 4.047390144214409e-06, "loss": 0.4457, "step": 16302 }, { "epoch": 2.6835182619906464, "grad_norm": 0.27207774173030125, "learning_rate": 4.046922466649764e-06, "loss": 0.4235, "step": 16303 }, { "epoch": 2.683682846209763, "grad_norm": 0.33812820655438, "learning_rate": 4.0464547940289205e-06, "loss": 0.4282, "step": 16304 }, { "epoch": 2.6838474304288793, "grad_norm": 0.31855684501272835, "learning_rate": 4.045987126356982e-06, "loss": 0.4508, "step": 16305 }, { "epoch": 2.6840120146479958, "grad_norm": 0.2655956406794349, "learning_rate": 4.045519463639052e-06, "loss": 0.4403, "step": 16306 }, { "epoch": 2.684176598867112, "grad_norm": 0.3066415099568566, "learning_rate": 4.045051805880229e-06, "loss": 0.4123, "step": 16307 }, { "epoch": 2.6843411830862283, "grad_norm": 0.4262155359903081, "learning_rate": 4.044584153085623e-06, "loss": 0.4483, "step": 16308 }, { "epoch": 2.6845057673053447, "grad_norm": 0.30152830675260467, "learning_rate": 4.044116505260329e-06, "loss": 0.4397, "step": 16309 }, { "epoch": 2.684670351524461, "grad_norm": 0.3478555543350279, "learning_rate": 4.043648862409457e-06, "loss": 0.4255, "step": 16310 }, { "epoch": 2.6848349357435777, "grad_norm": 0.4643246693630661, "learning_rate": 4.043181224538104e-06, "loss": 0.4371, "step": 16311 }, { "epoch": 2.684999519962694, "grad_norm": 0.2839497220739598, "learning_rate": 4.042713591651377e-06, "loss": 0.4233, "step": 16312 }, { "epoch": 2.6851641041818106, "grad_norm": 0.2903961775110076, "learning_rate": 4.042245963754376e-06, "loss": 0.4377, "step": 16313 }, { "epoch": 2.685328688400927, "grad_norm": 0.2753754545659661, "learning_rate": 4.041778340852204e-06, "loss": 0.4058, "step": 16314 }, { "epoch": 2.6854932726200436, "grad_norm": 0.3303682132491632, "learning_rate": 4.041310722949964e-06, "loss": 0.4355, "step": 16315 }, { "epoch": 2.68565785683916, "grad_norm": 0.37255543523726975, "learning_rate": 4.040843110052758e-06, "loss": 0.4319, "step": 16316 }, { "epoch": 2.6858224410582765, "grad_norm": 0.3054564467132957, "learning_rate": 4.04037550216569e-06, "loss": 0.4291, "step": 16317 }, { "epoch": 2.685987025277393, "grad_norm": 0.28915295925285905, "learning_rate": 4.0399078992938595e-06, "loss": 0.463, "step": 16318 }, { "epoch": 2.6861516094965094, "grad_norm": 0.4022542124139846, "learning_rate": 4.0394403014423725e-06, "loss": 0.4405, "step": 16319 }, { "epoch": 2.686316193715626, "grad_norm": 0.4806554260121287, "learning_rate": 4.038972708616328e-06, "loss": 0.4386, "step": 16320 }, { "epoch": 2.6864807779347424, "grad_norm": 0.4025570728681745, "learning_rate": 4.038505120820829e-06, "loss": 0.4217, "step": 16321 }, { "epoch": 2.686645362153859, "grad_norm": 0.33411015406408345, "learning_rate": 4.0380375380609785e-06, "loss": 0.4164, "step": 16322 }, { "epoch": 2.6868099463729753, "grad_norm": 0.38337978563252073, "learning_rate": 4.037569960341877e-06, "loss": 0.442, "step": 16323 }, { "epoch": 2.686974530592092, "grad_norm": 0.3286192345753033, "learning_rate": 4.0371023876686285e-06, "loss": 0.4229, "step": 16324 }, { "epoch": 2.6871391148112083, "grad_norm": 0.2887883452897136, "learning_rate": 4.036634820046333e-06, "loss": 0.429, "step": 16325 }, { "epoch": 2.6873036990303247, "grad_norm": 0.6183909761308701, "learning_rate": 4.036167257480095e-06, "loss": 0.4306, "step": 16326 }, { "epoch": 2.687468283249441, "grad_norm": 0.294742555568296, "learning_rate": 4.0356996999750146e-06, "loss": 0.4292, "step": 16327 }, { "epoch": 2.6876328674685577, "grad_norm": 0.32687745293903564, "learning_rate": 4.035232147536191e-06, "loss": 0.421, "step": 16328 }, { "epoch": 2.687797451687674, "grad_norm": 0.3037644479826378, "learning_rate": 4.034764600168733e-06, "loss": 0.4258, "step": 16329 }, { "epoch": 2.6879620359067906, "grad_norm": 0.30225917798625956, "learning_rate": 4.034297057877735e-06, "loss": 0.4382, "step": 16330 }, { "epoch": 2.688126620125907, "grad_norm": 0.38987761613900895, "learning_rate": 4.033829520668302e-06, "loss": 0.4328, "step": 16331 }, { "epoch": 2.688291204345023, "grad_norm": 0.3210169110933282, "learning_rate": 4.033361988545535e-06, "loss": 0.4412, "step": 16332 }, { "epoch": 2.6884557885641396, "grad_norm": 0.31864691654286775, "learning_rate": 4.032894461514537e-06, "loss": 0.4282, "step": 16333 }, { "epoch": 2.688620372783256, "grad_norm": 0.4825974822411688, "learning_rate": 4.0324269395804074e-06, "loss": 0.4326, "step": 16334 }, { "epoch": 2.6887849570023725, "grad_norm": 0.32769347237972846, "learning_rate": 4.031959422748249e-06, "loss": 0.4267, "step": 16335 }, { "epoch": 2.688949541221489, "grad_norm": 0.38048815364433103, "learning_rate": 4.031491911023163e-06, "loss": 0.4389, "step": 16336 }, { "epoch": 2.6891141254406055, "grad_norm": 1.0058854085830922, "learning_rate": 4.031024404410248e-06, "loss": 0.4429, "step": 16337 }, { "epoch": 2.689278709659722, "grad_norm": 0.3754249272517528, "learning_rate": 4.03055690291461e-06, "loss": 0.4199, "step": 16338 }, { "epoch": 2.6894432938788384, "grad_norm": 0.2947882584863735, "learning_rate": 4.030089406541345e-06, "loss": 0.4281, "step": 16339 }, { "epoch": 2.689607878097955, "grad_norm": 0.3096317789054828, "learning_rate": 4.029621915295561e-06, "loss": 0.4298, "step": 16340 }, { "epoch": 2.6897724623170713, "grad_norm": 0.35530437347375415, "learning_rate": 4.029154429182352e-06, "loss": 0.4357, "step": 16341 }, { "epoch": 2.689937046536188, "grad_norm": 0.3012366191872031, "learning_rate": 4.0286869482068215e-06, "loss": 0.4405, "step": 16342 }, { "epoch": 2.6901016307553043, "grad_norm": 0.31298119816673015, "learning_rate": 4.028219472374073e-06, "loss": 0.4375, "step": 16343 }, { "epoch": 2.6902662149744208, "grad_norm": 0.32781224232259865, "learning_rate": 4.027752001689203e-06, "loss": 0.4406, "step": 16344 }, { "epoch": 2.6904307991935372, "grad_norm": 0.414384595744943, "learning_rate": 4.027284536157316e-06, "loss": 0.4224, "step": 16345 }, { "epoch": 2.6905953834126537, "grad_norm": 0.36499776322117494, "learning_rate": 4.026817075783511e-06, "loss": 0.4376, "step": 16346 }, { "epoch": 2.69075996763177, "grad_norm": 0.32248926422427887, "learning_rate": 4.02634962057289e-06, "loss": 0.4432, "step": 16347 }, { "epoch": 2.6909245518508866, "grad_norm": 0.338708489474019, "learning_rate": 4.025882170530552e-06, "loss": 0.459, "step": 16348 }, { "epoch": 2.691089136070003, "grad_norm": 0.294052606229598, "learning_rate": 4.025414725661601e-06, "loss": 0.4451, "step": 16349 }, { "epoch": 2.6912537202891196, "grad_norm": 0.2780252582537487, "learning_rate": 4.024947285971133e-06, "loss": 0.4453, "step": 16350 }, { "epoch": 2.691418304508236, "grad_norm": 0.33408514253873084, "learning_rate": 4.02447985146425e-06, "loss": 0.4298, "step": 16351 }, { "epoch": 2.6915828887273525, "grad_norm": 0.3270836296226998, "learning_rate": 4.024012422146054e-06, "loss": 0.4399, "step": 16352 }, { "epoch": 2.691747472946469, "grad_norm": 0.36075527590040307, "learning_rate": 4.023544998021644e-06, "loss": 0.4183, "step": 16353 }, { "epoch": 2.6919120571655855, "grad_norm": 0.3363764146010592, "learning_rate": 4.023077579096121e-06, "loss": 0.4425, "step": 16354 }, { "epoch": 2.692076641384702, "grad_norm": 0.31465186885890206, "learning_rate": 4.022610165374585e-06, "loss": 0.4223, "step": 16355 }, { "epoch": 2.6922412256038184, "grad_norm": 0.35284763955478143, "learning_rate": 4.022142756862137e-06, "loss": 0.4383, "step": 16356 }, { "epoch": 2.692405809822935, "grad_norm": 0.3134771967967619, "learning_rate": 4.021675353563877e-06, "loss": 0.44, "step": 16357 }, { "epoch": 2.6925703940420513, "grad_norm": 0.3802097153284701, "learning_rate": 4.0212079554849025e-06, "loss": 0.4319, "step": 16358 }, { "epoch": 2.692734978261168, "grad_norm": 0.34680413997601384, "learning_rate": 4.020740562630319e-06, "loss": 0.4456, "step": 16359 }, { "epoch": 2.6928995624802843, "grad_norm": 0.4307466925610047, "learning_rate": 4.02027317500522e-06, "loss": 0.4387, "step": 16360 }, { "epoch": 2.6930641466994008, "grad_norm": 0.3429686840682955, "learning_rate": 4.01980579261471e-06, "loss": 0.4355, "step": 16361 }, { "epoch": 2.693228730918517, "grad_norm": 0.2995725153651655, "learning_rate": 4.019338415463888e-06, "loss": 0.4388, "step": 16362 }, { "epoch": 2.6933933151376337, "grad_norm": 0.5330069986000292, "learning_rate": 4.018871043557852e-06, "loss": 0.4396, "step": 16363 }, { "epoch": 2.69355789935675, "grad_norm": 0.3288145812661788, "learning_rate": 4.018403676901704e-06, "loss": 0.4132, "step": 16364 }, { "epoch": 2.6937224835758666, "grad_norm": 0.29583734843271503, "learning_rate": 4.017936315500543e-06, "loss": 0.451, "step": 16365 }, { "epoch": 2.693887067794983, "grad_norm": 0.42497783015345564, "learning_rate": 4.017468959359469e-06, "loss": 0.4285, "step": 16366 }, { "epoch": 2.6940516520140996, "grad_norm": 0.2844809828785978, "learning_rate": 4.017001608483579e-06, "loss": 0.4358, "step": 16367 }, { "epoch": 2.694216236233216, "grad_norm": 0.3626814403818623, "learning_rate": 4.016534262877978e-06, "loss": 0.4234, "step": 16368 }, { "epoch": 2.6943808204523325, "grad_norm": 0.28919512072085424, "learning_rate": 4.016066922547759e-06, "loss": 0.4327, "step": 16369 }, { "epoch": 2.694545404671449, "grad_norm": 0.32003358765545853, "learning_rate": 4.015599587498026e-06, "loss": 0.4388, "step": 16370 }, { "epoch": 2.6947099888905655, "grad_norm": 0.36882718405394466, "learning_rate": 4.015132257733878e-06, "loss": 0.4623, "step": 16371 }, { "epoch": 2.694874573109682, "grad_norm": 0.27980427256449153, "learning_rate": 4.014664933260411e-06, "loss": 0.4161, "step": 16372 }, { "epoch": 2.6950391573287984, "grad_norm": 0.41433725457500464, "learning_rate": 4.0141976140827285e-06, "loss": 0.4193, "step": 16373 }, { "epoch": 2.6952037415479144, "grad_norm": 0.38174671828343304, "learning_rate": 4.013730300205927e-06, "loss": 0.4233, "step": 16374 }, { "epoch": 2.695368325767031, "grad_norm": 0.37448167649323577, "learning_rate": 4.0132629916351064e-06, "loss": 0.4348, "step": 16375 }, { "epoch": 2.6955329099861474, "grad_norm": 0.3227325820515218, "learning_rate": 4.012795688375366e-06, "loss": 0.4289, "step": 16376 }, { "epoch": 2.695697494205264, "grad_norm": 0.34668685475852096, "learning_rate": 4.012328390431804e-06, "loss": 0.4328, "step": 16377 }, { "epoch": 2.6958620784243803, "grad_norm": 0.27383474398604524, "learning_rate": 4.0118610978095216e-06, "loss": 0.4297, "step": 16378 }, { "epoch": 2.6960266626434968, "grad_norm": 0.42749690459672113, "learning_rate": 4.011393810513614e-06, "loss": 0.4651, "step": 16379 }, { "epoch": 2.6961912468626132, "grad_norm": 0.3957381987373052, "learning_rate": 4.010926528549184e-06, "loss": 0.4396, "step": 16380 }, { "epoch": 2.6963558310817297, "grad_norm": 0.4465267398360211, "learning_rate": 4.010459251921327e-06, "loss": 0.4428, "step": 16381 }, { "epoch": 2.696520415300846, "grad_norm": 0.33690374814706475, "learning_rate": 4.009991980635144e-06, "loss": 0.4245, "step": 16382 }, { "epoch": 2.6966849995199627, "grad_norm": 0.264495976809934, "learning_rate": 4.0095247146957325e-06, "loss": 0.418, "step": 16383 }, { "epoch": 2.696849583739079, "grad_norm": 0.28920170373751336, "learning_rate": 4.009057454108191e-06, "loss": 0.4269, "step": 16384 }, { "epoch": 2.6970141679581956, "grad_norm": 1.0817155635714626, "learning_rate": 4.00859019887762e-06, "loss": 0.4385, "step": 16385 }, { "epoch": 2.697178752177312, "grad_norm": 0.3838047671684187, "learning_rate": 4.008122949009116e-06, "loss": 0.4376, "step": 16386 }, { "epoch": 2.6973433363964285, "grad_norm": 0.2556844042050589, "learning_rate": 4.007655704507779e-06, "loss": 0.419, "step": 16387 }, { "epoch": 2.697507920615545, "grad_norm": 0.27940657185380346, "learning_rate": 4.007188465378704e-06, "loss": 0.4193, "step": 16388 }, { "epoch": 2.6976725048346615, "grad_norm": 0.3339629542859908, "learning_rate": 4.006721231626995e-06, "loss": 0.4421, "step": 16389 }, { "epoch": 2.697837089053778, "grad_norm": 0.3339466019931385, "learning_rate": 4.006254003257744e-06, "loss": 0.4305, "step": 16390 }, { "epoch": 2.6980016732728944, "grad_norm": 0.28327025098936864, "learning_rate": 4.0057867802760545e-06, "loss": 0.4298, "step": 16391 }, { "epoch": 2.698166257492011, "grad_norm": 0.33148361090437845, "learning_rate": 4.005319562687021e-06, "loss": 0.4252, "step": 16392 }, { "epoch": 2.6983308417111274, "grad_norm": 0.32939786900837537, "learning_rate": 4.004852350495744e-06, "loss": 0.4211, "step": 16393 }, { "epoch": 2.698495425930244, "grad_norm": 0.3725020408204437, "learning_rate": 4.004385143707321e-06, "loss": 0.4585, "step": 16394 }, { "epoch": 2.6986600101493603, "grad_norm": 0.2853854565429146, "learning_rate": 4.003917942326848e-06, "loss": 0.4301, "step": 16395 }, { "epoch": 2.6988245943684768, "grad_norm": 0.3735961596222982, "learning_rate": 4.0034507463594254e-06, "loss": 0.4314, "step": 16396 }, { "epoch": 2.6989891785875932, "grad_norm": 0.3555759193710977, "learning_rate": 4.002983555810149e-06, "loss": 0.4408, "step": 16397 }, { "epoch": 2.6991537628067097, "grad_norm": 0.3541206601416742, "learning_rate": 4.00251637068412e-06, "loss": 0.4236, "step": 16398 }, { "epoch": 2.6993183470258257, "grad_norm": 0.2697075924755978, "learning_rate": 4.00204919098643e-06, "loss": 0.4272, "step": 16399 }, { "epoch": 2.699482931244942, "grad_norm": 0.2698943695657363, "learning_rate": 4.0015820167221844e-06, "loss": 0.4351, "step": 16400 }, { "epoch": 2.6996475154640587, "grad_norm": 0.3023080111451641, "learning_rate": 4.001114847896476e-06, "loss": 0.4313, "step": 16401 }, { "epoch": 2.699812099683175, "grad_norm": 0.33379035571948806, "learning_rate": 4.0006476845144015e-06, "loss": 0.4331, "step": 16402 }, { "epoch": 2.6999766839022916, "grad_norm": 0.4000340059845679, "learning_rate": 4.000180526581062e-06, "loss": 0.4484, "step": 16403 }, { "epoch": 2.700141268121408, "grad_norm": 0.3095464294829794, "learning_rate": 3.999713374101551e-06, "loss": 0.4427, "step": 16404 }, { "epoch": 2.7003058523405246, "grad_norm": 0.4313563968877461, "learning_rate": 3.999246227080969e-06, "loss": 0.4514, "step": 16405 }, { "epoch": 2.700470436559641, "grad_norm": 0.3398078212298633, "learning_rate": 3.998779085524413e-06, "loss": 0.43, "step": 16406 }, { "epoch": 2.7006350207787575, "grad_norm": 0.3391341204498403, "learning_rate": 3.99831194943698e-06, "loss": 0.4186, "step": 16407 }, { "epoch": 2.700799604997874, "grad_norm": 0.39121165508808176, "learning_rate": 3.997844818823767e-06, "loss": 0.418, "step": 16408 }, { "epoch": 2.7009641892169904, "grad_norm": 0.2978515677764767, "learning_rate": 3.997377693689868e-06, "loss": 0.4307, "step": 16409 }, { "epoch": 2.701128773436107, "grad_norm": 0.269733507262398, "learning_rate": 3.996910574040387e-06, "loss": 0.4378, "step": 16410 }, { "epoch": 2.7012933576552234, "grad_norm": 0.293333395883526, "learning_rate": 3.996443459880414e-06, "loss": 0.4246, "step": 16411 }, { "epoch": 2.70145794187434, "grad_norm": 0.33065446535807685, "learning_rate": 3.9959763512150505e-06, "loss": 0.4373, "step": 16412 }, { "epoch": 2.7016225260934563, "grad_norm": 0.4420974144520157, "learning_rate": 3.99550924804939e-06, "loss": 0.4277, "step": 16413 }, { "epoch": 2.701787110312573, "grad_norm": 0.35927542344814284, "learning_rate": 3.995042150388534e-06, "loss": 0.4208, "step": 16414 }, { "epoch": 2.7019516945316893, "grad_norm": 0.32015570904244817, "learning_rate": 3.994575058237575e-06, "loss": 0.4468, "step": 16415 }, { "epoch": 2.7021162787508057, "grad_norm": 0.34575732730265396, "learning_rate": 3.994107971601611e-06, "loss": 0.4225, "step": 16416 }, { "epoch": 2.702280862969922, "grad_norm": 0.30017266073248694, "learning_rate": 3.9936408904857404e-06, "loss": 0.4345, "step": 16417 }, { "epoch": 2.7024454471890387, "grad_norm": 0.2841028088209639, "learning_rate": 3.993173814895056e-06, "loss": 0.4281, "step": 16418 }, { "epoch": 2.702610031408155, "grad_norm": 0.3727144194184109, "learning_rate": 3.9927067448346584e-06, "loss": 0.4396, "step": 16419 }, { "epoch": 2.7027746156272716, "grad_norm": 1.1118286108688196, "learning_rate": 3.992239680309641e-06, "loss": 0.4223, "step": 16420 }, { "epoch": 2.702939199846388, "grad_norm": 0.33739735935023996, "learning_rate": 3.991772621325103e-06, "loss": 0.4294, "step": 16421 }, { "epoch": 2.7031037840655046, "grad_norm": 0.3207517013717223, "learning_rate": 3.991305567886139e-06, "loss": 0.4354, "step": 16422 }, { "epoch": 2.703268368284621, "grad_norm": 0.34552698173846835, "learning_rate": 3.990838519997845e-06, "loss": 0.4524, "step": 16423 }, { "epoch": 2.7034329525037375, "grad_norm": 0.29631186943187254, "learning_rate": 3.990371477665319e-06, "loss": 0.432, "step": 16424 }, { "epoch": 2.703597536722854, "grad_norm": 0.3395910937313192, "learning_rate": 3.989904440893654e-06, "loss": 0.43, "step": 16425 }, { "epoch": 2.7037621209419704, "grad_norm": 0.30236926557935484, "learning_rate": 3.98943740968795e-06, "loss": 0.4171, "step": 16426 }, { "epoch": 2.703926705161087, "grad_norm": 0.45268445138775065, "learning_rate": 3.9889703840533e-06, "loss": 0.451, "step": 16427 }, { "epoch": 2.7040912893802034, "grad_norm": 0.2825909781347472, "learning_rate": 3.9885033639948025e-06, "loss": 0.4377, "step": 16428 }, { "epoch": 2.70425587359932, "grad_norm": 0.6358393266769788, "learning_rate": 3.988036349517551e-06, "loss": 0.4415, "step": 16429 }, { "epoch": 2.7044204578184363, "grad_norm": 0.3386341951236184, "learning_rate": 3.987569340626644e-06, "loss": 0.419, "step": 16430 }, { "epoch": 2.704585042037553, "grad_norm": 0.43150716763894154, "learning_rate": 3.987102337327176e-06, "loss": 0.4085, "step": 16431 }, { "epoch": 2.7047496262566693, "grad_norm": 0.33112369492597604, "learning_rate": 3.986635339624241e-06, "loss": 0.441, "step": 16432 }, { "epoch": 2.7049142104757857, "grad_norm": 0.7055101510823322, "learning_rate": 3.986168347522937e-06, "loss": 0.4415, "step": 16433 }, { "epoch": 2.705078794694902, "grad_norm": 0.3058808341878057, "learning_rate": 3.985701361028358e-06, "loss": 0.419, "step": 16434 }, { "epoch": 2.7052433789140187, "grad_norm": 0.5188963271133904, "learning_rate": 3.985234380145601e-06, "loss": 0.4249, "step": 16435 }, { "epoch": 2.705407963133135, "grad_norm": 0.36464984739773737, "learning_rate": 3.984767404879761e-06, "loss": 0.4299, "step": 16436 }, { "epoch": 2.7055725473522516, "grad_norm": 0.3289296636196006, "learning_rate": 3.9843004352359335e-06, "loss": 0.4267, "step": 16437 }, { "epoch": 2.705737131571368, "grad_norm": 0.33757350971555905, "learning_rate": 3.983833471219215e-06, "loss": 0.4125, "step": 16438 }, { "epoch": 2.7059017157904846, "grad_norm": 0.31527342354316334, "learning_rate": 3.983366512834697e-06, "loss": 0.4364, "step": 16439 }, { "epoch": 2.706066300009601, "grad_norm": 0.3019138030598178, "learning_rate": 3.982899560087481e-06, "loss": 0.4311, "step": 16440 }, { "epoch": 2.706230884228717, "grad_norm": 0.31228858196188747, "learning_rate": 3.982432612982656e-06, "loss": 0.4314, "step": 16441 }, { "epoch": 2.7063954684478335, "grad_norm": 0.3658219525437423, "learning_rate": 3.98196567152532e-06, "loss": 0.4518, "step": 16442 }, { "epoch": 2.70656005266695, "grad_norm": 0.24508146841699552, "learning_rate": 3.9814987357205685e-06, "loss": 0.4343, "step": 16443 }, { "epoch": 2.7067246368860665, "grad_norm": 0.31452290874256633, "learning_rate": 3.981031805573496e-06, "loss": 0.4207, "step": 16444 }, { "epoch": 2.706889221105183, "grad_norm": 0.2755412575819989, "learning_rate": 3.980564881089197e-06, "loss": 0.4472, "step": 16445 }, { "epoch": 2.7070538053242994, "grad_norm": 0.30094494464038807, "learning_rate": 3.980097962272766e-06, "loss": 0.4148, "step": 16446 }, { "epoch": 2.707218389543416, "grad_norm": 0.3007444972335924, "learning_rate": 3.9796310491293e-06, "loss": 0.4177, "step": 16447 }, { "epoch": 2.7073829737625323, "grad_norm": 0.3387548685818209, "learning_rate": 3.97916414166389e-06, "loss": 0.4534, "step": 16448 }, { "epoch": 2.707547557981649, "grad_norm": 0.26601239553772493, "learning_rate": 3.978697239881636e-06, "loss": 0.4238, "step": 16449 }, { "epoch": 2.7077121422007653, "grad_norm": 0.4986743261050538, "learning_rate": 3.978230343787627e-06, "loss": 0.4303, "step": 16450 }, { "epoch": 2.7078767264198818, "grad_norm": 0.3267574436279148, "learning_rate": 3.977763453386963e-06, "loss": 0.4246, "step": 16451 }, { "epoch": 2.7080413106389982, "grad_norm": 0.32715433919622156, "learning_rate": 3.977296568684735e-06, "loss": 0.4469, "step": 16452 }, { "epoch": 2.7082058948581147, "grad_norm": 0.326640328060673, "learning_rate": 3.976829689686037e-06, "loss": 0.4384, "step": 16453 }, { "epoch": 2.708370479077231, "grad_norm": 0.2731441484587725, "learning_rate": 3.9763628163959646e-06, "loss": 0.4244, "step": 16454 }, { "epoch": 2.7085350632963476, "grad_norm": 0.29145356682928636, "learning_rate": 3.975895948819612e-06, "loss": 0.4303, "step": 16455 }, { "epoch": 2.708699647515464, "grad_norm": 0.3826056619240171, "learning_rate": 3.975429086962075e-06, "loss": 0.4366, "step": 16456 }, { "epoch": 2.7088642317345806, "grad_norm": 0.2903672346142324, "learning_rate": 3.974962230828445e-06, "loss": 0.43, "step": 16457 }, { "epoch": 2.709028815953697, "grad_norm": 0.31055339133125603, "learning_rate": 3.9744953804238184e-06, "loss": 0.4243, "step": 16458 }, { "epoch": 2.7091934001728135, "grad_norm": 0.30907072386314455, "learning_rate": 3.97402853575329e-06, "loss": 0.4558, "step": 16459 }, { "epoch": 2.70935798439193, "grad_norm": 0.39217728225346443, "learning_rate": 3.973561696821949e-06, "loss": 0.4383, "step": 16460 }, { "epoch": 2.7095225686110465, "grad_norm": 0.2949136541812381, "learning_rate": 3.973094863634896e-06, "loss": 0.4406, "step": 16461 }, { "epoch": 2.709687152830163, "grad_norm": 0.2874774588586657, "learning_rate": 3.972628036197219e-06, "loss": 0.433, "step": 16462 }, { "epoch": 2.7098517370492794, "grad_norm": 0.37726555702971565, "learning_rate": 3.972161214514016e-06, "loss": 0.4182, "step": 16463 }, { "epoch": 2.710016321268396, "grad_norm": 0.3523421698654293, "learning_rate": 3.971694398590378e-06, "loss": 0.4313, "step": 16464 }, { "epoch": 2.7101809054875123, "grad_norm": 0.35601099773408845, "learning_rate": 3.9712275884314006e-06, "loss": 0.4233, "step": 16465 }, { "epoch": 2.7103454897066284, "grad_norm": 4.607132561798621, "learning_rate": 3.9707607840421765e-06, "loss": 0.4371, "step": 16466 }, { "epoch": 2.710510073925745, "grad_norm": 0.32006363388751585, "learning_rate": 3.9702939854277995e-06, "loss": 0.4252, "step": 16467 }, { "epoch": 2.7106746581448613, "grad_norm": 0.3703664092191253, "learning_rate": 3.969827192593364e-06, "loss": 0.4229, "step": 16468 }, { "epoch": 2.7108392423639778, "grad_norm": 0.3535642572338403, "learning_rate": 3.96936040554396e-06, "loss": 0.4352, "step": 16469 }, { "epoch": 2.7110038265830942, "grad_norm": 0.34953714902004956, "learning_rate": 3.968893624284687e-06, "loss": 0.4501, "step": 16470 }, { "epoch": 2.7111684108022107, "grad_norm": 0.3261552636191001, "learning_rate": 3.968426848820632e-06, "loss": 0.4519, "step": 16471 }, { "epoch": 2.711332995021327, "grad_norm": 0.4002886697589968, "learning_rate": 3.967960079156893e-06, "loss": 0.4319, "step": 16472 }, { "epoch": 2.7114975792404437, "grad_norm": 0.43804861055115035, "learning_rate": 3.96749331529856e-06, "loss": 0.4346, "step": 16473 }, { "epoch": 2.71166216345956, "grad_norm": 0.29537495233881805, "learning_rate": 3.967026557250728e-06, "loss": 0.4134, "step": 16474 }, { "epoch": 2.7118267476786766, "grad_norm": 0.41638120350919683, "learning_rate": 3.966559805018489e-06, "loss": 0.3992, "step": 16475 }, { "epoch": 2.711991331897793, "grad_norm": 0.3401077405746412, "learning_rate": 3.966093058606936e-06, "loss": 0.4158, "step": 16476 }, { "epoch": 2.7121559161169095, "grad_norm": 0.49076143533819405, "learning_rate": 3.9656263180211646e-06, "loss": 0.4334, "step": 16477 }, { "epoch": 2.712320500336026, "grad_norm": 0.35596719185727005, "learning_rate": 3.965159583266263e-06, "loss": 0.4405, "step": 16478 }, { "epoch": 2.7124850845551425, "grad_norm": 0.2811140700107037, "learning_rate": 3.9646928543473284e-06, "loss": 0.4281, "step": 16479 }, { "epoch": 2.712649668774259, "grad_norm": 0.36662815582599095, "learning_rate": 3.964226131269451e-06, "loss": 0.4426, "step": 16480 }, { "epoch": 2.7128142529933754, "grad_norm": 0.3329050753007927, "learning_rate": 3.963759414037725e-06, "loss": 0.4567, "step": 16481 }, { "epoch": 2.712978837212492, "grad_norm": 0.3967360921283001, "learning_rate": 3.9632927026572415e-06, "loss": 0.4515, "step": 16482 }, { "epoch": 2.7131434214316084, "grad_norm": 0.4789891785118059, "learning_rate": 3.962825997133094e-06, "loss": 0.4336, "step": 16483 }, { "epoch": 2.713308005650725, "grad_norm": 0.49288810478031103, "learning_rate": 3.962359297470375e-06, "loss": 0.433, "step": 16484 }, { "epoch": 2.7134725898698413, "grad_norm": 0.374289866565762, "learning_rate": 3.961892603674176e-06, "loss": 0.419, "step": 16485 }, { "epoch": 2.7136371740889578, "grad_norm": 0.339703176599694, "learning_rate": 3.96142591574959e-06, "loss": 0.4183, "step": 16486 }, { "epoch": 2.7138017583080742, "grad_norm": 0.3705111284263445, "learning_rate": 3.96095923370171e-06, "loss": 0.4245, "step": 16487 }, { "epoch": 2.7139663425271907, "grad_norm": 0.38620231082094414, "learning_rate": 3.9604925575356285e-06, "loss": 0.4077, "step": 16488 }, { "epoch": 2.714130926746307, "grad_norm": 0.36650262469548656, "learning_rate": 3.960025887256437e-06, "loss": 0.4306, "step": 16489 }, { "epoch": 2.7142955109654237, "grad_norm": 0.3655590240723453, "learning_rate": 3.959559222869226e-06, "loss": 0.4168, "step": 16490 }, { "epoch": 2.71446009518454, "grad_norm": 0.5492373426547444, "learning_rate": 3.959092564379091e-06, "loss": 0.426, "step": 16491 }, { "epoch": 2.7146246794036566, "grad_norm": 0.32705909412887524, "learning_rate": 3.9586259117911205e-06, "loss": 0.4088, "step": 16492 }, { "epoch": 2.714789263622773, "grad_norm": 0.28925992733428907, "learning_rate": 3.958159265110409e-06, "loss": 0.4242, "step": 16493 }, { "epoch": 2.7149538478418895, "grad_norm": 0.34237158057699313, "learning_rate": 3.957692624342046e-06, "loss": 0.4267, "step": 16494 }, { "epoch": 2.715118432061006, "grad_norm": 0.36573089990123203, "learning_rate": 3.957225989491125e-06, "loss": 0.448, "step": 16495 }, { "epoch": 2.7152830162801225, "grad_norm": 0.396777803147069, "learning_rate": 3.956759360562738e-06, "loss": 0.4295, "step": 16496 }, { "epoch": 2.715447600499239, "grad_norm": 0.31646470571881147, "learning_rate": 3.956292737561976e-06, "loss": 0.4159, "step": 16497 }, { "epoch": 2.7156121847183554, "grad_norm": 0.3603492209505748, "learning_rate": 3.9558261204939305e-06, "loss": 0.4007, "step": 16498 }, { "epoch": 2.715776768937472, "grad_norm": 0.33269287449159657, "learning_rate": 3.955359509363693e-06, "loss": 0.4473, "step": 16499 }, { "epoch": 2.7159413531565884, "grad_norm": 0.3743128484780302, "learning_rate": 3.954892904176356e-06, "loss": 0.4347, "step": 16500 }, { "epoch": 2.716105937375705, "grad_norm": 0.31514688689823894, "learning_rate": 3.954426304937008e-06, "loss": 0.4237, "step": 16501 }, { "epoch": 2.7162705215948213, "grad_norm": 1.7890578690514742, "learning_rate": 3.953959711650745e-06, "loss": 0.4232, "step": 16502 }, { "epoch": 2.7164351058139378, "grad_norm": 0.30094650376522986, "learning_rate": 3.953493124322655e-06, "loss": 0.4214, "step": 16503 }, { "epoch": 2.7165996900330542, "grad_norm": 0.3094817709581162, "learning_rate": 3.953026542957829e-06, "loss": 0.4319, "step": 16504 }, { "epoch": 2.7167642742521707, "grad_norm": 0.34239085799206986, "learning_rate": 3.9525599675613594e-06, "loss": 0.4326, "step": 16505 }, { "epoch": 2.716928858471287, "grad_norm": 0.3131738401297161, "learning_rate": 3.952093398138336e-06, "loss": 0.4406, "step": 16506 }, { "epoch": 2.717093442690403, "grad_norm": 0.32423565735021526, "learning_rate": 3.951626834693853e-06, "loss": 0.4283, "step": 16507 }, { "epoch": 2.7172580269095197, "grad_norm": 0.37070568272760424, "learning_rate": 3.951160277232997e-06, "loss": 0.4436, "step": 16508 }, { "epoch": 2.717422611128636, "grad_norm": 0.2891131256851677, "learning_rate": 3.950693725760863e-06, "loss": 0.4357, "step": 16509 }, { "epoch": 2.7175871953477526, "grad_norm": 0.28978087673201264, "learning_rate": 3.950227180282538e-06, "loss": 0.4372, "step": 16510 }, { "epoch": 2.717751779566869, "grad_norm": 0.5355941888768845, "learning_rate": 3.949760640803116e-06, "loss": 0.4186, "step": 16511 }, { "epoch": 2.7179163637859856, "grad_norm": 0.3526071429229731, "learning_rate": 3.949294107327686e-06, "loss": 0.4179, "step": 16512 }, { "epoch": 2.718080948005102, "grad_norm": 0.5767881584871625, "learning_rate": 3.948827579861338e-06, "loss": 0.4172, "step": 16513 }, { "epoch": 2.7182455322242185, "grad_norm": 0.7289027212814949, "learning_rate": 3.948361058409165e-06, "loss": 0.4297, "step": 16514 }, { "epoch": 2.718410116443335, "grad_norm": 0.30909871256741495, "learning_rate": 3.947894542976254e-06, "loss": 0.436, "step": 16515 }, { "epoch": 2.7185747006624514, "grad_norm": 0.3246466845888603, "learning_rate": 3.947428033567699e-06, "loss": 0.4501, "step": 16516 }, { "epoch": 2.718739284881568, "grad_norm": 0.2665993992574491, "learning_rate": 3.946961530188588e-06, "loss": 0.4145, "step": 16517 }, { "epoch": 2.7189038691006844, "grad_norm": 0.27713457592729585, "learning_rate": 3.9464950328440124e-06, "loss": 0.4148, "step": 16518 }, { "epoch": 2.719068453319801, "grad_norm": 0.43622092377899335, "learning_rate": 3.946028541539064e-06, "loss": 0.4334, "step": 16519 }, { "epoch": 2.7192330375389173, "grad_norm": 0.31195549073953505, "learning_rate": 3.9455620562788275e-06, "loss": 0.4259, "step": 16520 }, { "epoch": 2.719397621758034, "grad_norm": 0.3053428947657628, "learning_rate": 3.945095577068399e-06, "loss": 0.4174, "step": 16521 }, { "epoch": 2.7195622059771503, "grad_norm": 0.32858949379606484, "learning_rate": 3.944629103912863e-06, "loss": 0.44, "step": 16522 }, { "epoch": 2.7197267901962667, "grad_norm": 0.3173134537251033, "learning_rate": 3.944162636817316e-06, "loss": 0.4361, "step": 16523 }, { "epoch": 2.719891374415383, "grad_norm": 0.36359440024895223, "learning_rate": 3.943696175786843e-06, "loss": 0.4275, "step": 16524 }, { "epoch": 2.7200559586344997, "grad_norm": 0.3136275602179564, "learning_rate": 3.9432297208265365e-06, "loss": 0.4161, "step": 16525 }, { "epoch": 2.720220542853616, "grad_norm": 0.3560583689227809, "learning_rate": 3.942763271941484e-06, "loss": 0.4317, "step": 16526 }, { "epoch": 2.7203851270727326, "grad_norm": 0.352877130841058, "learning_rate": 3.942296829136776e-06, "loss": 0.4453, "step": 16527 }, { "epoch": 2.720549711291849, "grad_norm": 0.29121856925706796, "learning_rate": 3.941830392417503e-06, "loss": 0.4293, "step": 16528 }, { "epoch": 2.7207142955109656, "grad_norm": 0.31819722571074455, "learning_rate": 3.941363961788754e-06, "loss": 0.4381, "step": 16529 }, { "epoch": 2.720878879730082, "grad_norm": 0.27919041748825024, "learning_rate": 3.940897537255619e-06, "loss": 0.4351, "step": 16530 }, { "epoch": 2.7210434639491985, "grad_norm": 0.3181876486582578, "learning_rate": 3.940431118823185e-06, "loss": 0.4341, "step": 16531 }, { "epoch": 2.7212080481683145, "grad_norm": 0.4905460053945161, "learning_rate": 3.939964706496546e-06, "loss": 0.4485, "step": 16532 }, { "epoch": 2.721372632387431, "grad_norm": 0.36060063288372324, "learning_rate": 3.9394983002807875e-06, "loss": 0.4614, "step": 16533 }, { "epoch": 2.7215372166065475, "grad_norm": 0.2891799665529831, "learning_rate": 3.939031900180999e-06, "loss": 0.4301, "step": 16534 }, { "epoch": 2.721701800825664, "grad_norm": 0.31653000551268723, "learning_rate": 3.938565506202271e-06, "loss": 0.4368, "step": 16535 }, { "epoch": 2.7218663850447804, "grad_norm": 0.31446163576171354, "learning_rate": 3.938099118349692e-06, "loss": 0.4115, "step": 16536 }, { "epoch": 2.722030969263897, "grad_norm": 0.4224125133332122, "learning_rate": 3.9376327366283514e-06, "loss": 0.4275, "step": 16537 }, { "epoch": 2.7221955534830133, "grad_norm": 0.41937226295599567, "learning_rate": 3.937166361043337e-06, "loss": 0.4261, "step": 16538 }, { "epoch": 2.72236013770213, "grad_norm": 0.32751387604214655, "learning_rate": 3.93669999159974e-06, "loss": 0.4322, "step": 16539 }, { "epoch": 2.7225247219212463, "grad_norm": 0.31879617266630655, "learning_rate": 3.936233628302649e-06, "loss": 0.4266, "step": 16540 }, { "epoch": 2.7226893061403628, "grad_norm": 0.2738330940521524, "learning_rate": 3.935767271157148e-06, "loss": 0.4184, "step": 16541 }, { "epoch": 2.7228538903594792, "grad_norm": 0.33398045292163164, "learning_rate": 3.935300920168334e-06, "loss": 0.4391, "step": 16542 }, { "epoch": 2.7230184745785957, "grad_norm": 0.35001300209930836, "learning_rate": 3.934834575341287e-06, "loss": 0.4128, "step": 16543 }, { "epoch": 2.723183058797712, "grad_norm": 0.41861161950891773, "learning_rate": 3.934368236681102e-06, "loss": 0.4333, "step": 16544 }, { "epoch": 2.7233476430168286, "grad_norm": 0.42298701569274116, "learning_rate": 3.9339019041928625e-06, "loss": 0.399, "step": 16545 }, { "epoch": 2.723512227235945, "grad_norm": 0.30381499839029175, "learning_rate": 3.9334355778816614e-06, "loss": 0.4262, "step": 16546 }, { "epoch": 2.7236768114550616, "grad_norm": 0.4276762118814055, "learning_rate": 3.9329692577525854e-06, "loss": 0.4185, "step": 16547 }, { "epoch": 2.723841395674178, "grad_norm": 0.3489145625046093, "learning_rate": 3.932502943810722e-06, "loss": 0.4392, "step": 16548 }, { "epoch": 2.7240059798932945, "grad_norm": 0.31725816333011786, "learning_rate": 3.932036636061161e-06, "loss": 0.4141, "step": 16549 }, { "epoch": 2.724170564112411, "grad_norm": 0.6398241391454529, "learning_rate": 3.931570334508987e-06, "loss": 0.4138, "step": 16550 }, { "epoch": 2.7243351483315275, "grad_norm": 0.4477747233220181, "learning_rate": 3.931104039159293e-06, "loss": 0.4398, "step": 16551 }, { "epoch": 2.724499732550644, "grad_norm": 0.32522477304337494, "learning_rate": 3.930637750017162e-06, "loss": 0.4228, "step": 16552 }, { "epoch": 2.7246643167697604, "grad_norm": 0.26621290472588405, "learning_rate": 3.930171467087688e-06, "loss": 0.4412, "step": 16553 }, { "epoch": 2.724828900988877, "grad_norm": 0.31334048099794237, "learning_rate": 3.929705190375953e-06, "loss": 0.4332, "step": 16554 }, { "epoch": 2.7249934852079933, "grad_norm": 0.4502108996851825, "learning_rate": 3.929238919887049e-06, "loss": 0.4309, "step": 16555 }, { "epoch": 2.72515806942711, "grad_norm": 0.3080396753891864, "learning_rate": 3.9287726556260615e-06, "loss": 0.435, "step": 16556 }, { "epoch": 2.7253226536462263, "grad_norm": 0.2974240979985078, "learning_rate": 3.9283063975980785e-06, "loss": 0.4384, "step": 16557 }, { "epoch": 2.7254872378653427, "grad_norm": 0.3634519406751955, "learning_rate": 3.927840145808188e-06, "loss": 0.4346, "step": 16558 }, { "epoch": 2.725651822084459, "grad_norm": 0.2961719414650674, "learning_rate": 3.927373900261478e-06, "loss": 0.4359, "step": 16559 }, { "epoch": 2.7258164063035757, "grad_norm": 1.3632269864910096, "learning_rate": 3.926907660963035e-06, "loss": 0.4161, "step": 16560 }, { "epoch": 2.725980990522692, "grad_norm": 0.48981983726866346, "learning_rate": 3.926441427917946e-06, "loss": 0.4361, "step": 16561 }, { "epoch": 2.7261455747418086, "grad_norm": 0.3451352277184602, "learning_rate": 3.925975201131302e-06, "loss": 0.4428, "step": 16562 }, { "epoch": 2.726310158960925, "grad_norm": 0.3423792475728588, "learning_rate": 3.925508980608186e-06, "loss": 0.4071, "step": 16563 }, { "epoch": 2.7264747431800416, "grad_norm": 0.35651849165342947, "learning_rate": 3.925042766353686e-06, "loss": 0.4573, "step": 16564 }, { "epoch": 2.726639327399158, "grad_norm": 0.33468366739995337, "learning_rate": 3.9245765583728905e-06, "loss": 0.4134, "step": 16565 }, { "epoch": 2.7268039116182745, "grad_norm": 0.3264344724816696, "learning_rate": 3.924110356670885e-06, "loss": 0.4397, "step": 16566 }, { "epoch": 2.726968495837391, "grad_norm": 0.2621226652285822, "learning_rate": 3.923644161252759e-06, "loss": 0.4345, "step": 16567 }, { "epoch": 2.7271330800565075, "grad_norm": 0.38974860554227214, "learning_rate": 3.923177972123597e-06, "loss": 0.4474, "step": 16568 }, { "epoch": 2.727297664275624, "grad_norm": 0.3402501223880213, "learning_rate": 3.922711789288487e-06, "loss": 0.4651, "step": 16569 }, { "epoch": 2.7274622484947404, "grad_norm": 0.31851552409547534, "learning_rate": 3.922245612752517e-06, "loss": 0.432, "step": 16570 }, { "epoch": 2.727626832713857, "grad_norm": 0.4068273023864668, "learning_rate": 3.921779442520769e-06, "loss": 0.4339, "step": 16571 }, { "epoch": 2.7277914169329733, "grad_norm": 1.0664734260304998, "learning_rate": 3.921313278598336e-06, "loss": 0.4232, "step": 16572 }, { "epoch": 2.72795600115209, "grad_norm": 0.3619878185276546, "learning_rate": 3.920847120990299e-06, "loss": 0.4427, "step": 16573 }, { "epoch": 2.728120585371206, "grad_norm": 0.3263631633804949, "learning_rate": 3.920380969701749e-06, "loss": 0.4447, "step": 16574 }, { "epoch": 2.7282851695903223, "grad_norm": 0.6487706367050764, "learning_rate": 3.919914824737769e-06, "loss": 0.4403, "step": 16575 }, { "epoch": 2.7284497538094388, "grad_norm": 0.39630519759832494, "learning_rate": 3.919448686103448e-06, "loss": 0.4551, "step": 16576 }, { "epoch": 2.7286143380285552, "grad_norm": 0.3550421170640398, "learning_rate": 3.918982553803872e-06, "loss": 0.4432, "step": 16577 }, { "epoch": 2.7287789222476717, "grad_norm": 0.4224330935360739, "learning_rate": 3.9185164278441245e-06, "loss": 0.4201, "step": 16578 }, { "epoch": 2.728943506466788, "grad_norm": 0.3213326665891005, "learning_rate": 3.918050308229295e-06, "loss": 0.4252, "step": 16579 }, { "epoch": 2.7291080906859047, "grad_norm": 0.31491941534195894, "learning_rate": 3.917584194964467e-06, "loss": 0.4371, "step": 16580 }, { "epoch": 2.729272674905021, "grad_norm": 0.35263803623632617, "learning_rate": 3.917118088054731e-06, "loss": 0.4346, "step": 16581 }, { "epoch": 2.7294372591241376, "grad_norm": 0.33412122258213794, "learning_rate": 3.916651987505166e-06, "loss": 0.4307, "step": 16582 }, { "epoch": 2.729601843343254, "grad_norm": 0.3795048520762357, "learning_rate": 3.916185893320864e-06, "loss": 0.4262, "step": 16583 }, { "epoch": 2.7297664275623705, "grad_norm": 0.34055335812743365, "learning_rate": 3.915719805506909e-06, "loss": 0.4465, "step": 16584 }, { "epoch": 2.729931011781487, "grad_norm": 0.30848973764445026, "learning_rate": 3.915253724068384e-06, "loss": 0.4478, "step": 16585 }, { "epoch": 2.7300955960006035, "grad_norm": 0.3324198552438988, "learning_rate": 3.9147876490103784e-06, "loss": 0.4406, "step": 16586 }, { "epoch": 2.73026018021972, "grad_norm": 0.2761581425577677, "learning_rate": 3.914321580337975e-06, "loss": 0.4362, "step": 16587 }, { "epoch": 2.7304247644388364, "grad_norm": 0.2873771774415666, "learning_rate": 3.913855518056263e-06, "loss": 0.4433, "step": 16588 }, { "epoch": 2.730589348657953, "grad_norm": 0.3365185935551332, "learning_rate": 3.913389462170324e-06, "loss": 0.4265, "step": 16589 }, { "epoch": 2.7307539328770694, "grad_norm": 0.3202568834928568, "learning_rate": 3.9129234126852455e-06, "loss": 0.4205, "step": 16590 }, { "epoch": 2.730918517096186, "grad_norm": 0.31466464812505096, "learning_rate": 3.912457369606114e-06, "loss": 0.4354, "step": 16591 }, { "epoch": 2.7310831013153023, "grad_norm": 0.32197124336552835, "learning_rate": 3.9119913329380105e-06, "loss": 0.4313, "step": 16592 }, { "epoch": 2.7312476855344188, "grad_norm": 0.3262108831936048, "learning_rate": 3.911525302686025e-06, "loss": 0.4475, "step": 16593 }, { "epoch": 2.7314122697535352, "grad_norm": 0.3018341625636483, "learning_rate": 3.911059278855239e-06, "loss": 0.4259, "step": 16594 }, { "epoch": 2.7315768539726517, "grad_norm": 0.5083638087103188, "learning_rate": 3.9105932614507404e-06, "loss": 0.4358, "step": 16595 }, { "epoch": 2.731741438191768, "grad_norm": 0.39664415812454396, "learning_rate": 3.910127250477611e-06, "loss": 0.4399, "step": 16596 }, { "epoch": 2.7319060224108846, "grad_norm": 0.30830712980996544, "learning_rate": 3.90966124594094e-06, "loss": 0.4512, "step": 16597 }, { "epoch": 2.732070606630001, "grad_norm": 0.36449293175256364, "learning_rate": 3.909195247845808e-06, "loss": 0.4285, "step": 16598 }, { "epoch": 2.732235190849117, "grad_norm": 0.2824279588786678, "learning_rate": 3.9087292561973025e-06, "loss": 0.4244, "step": 16599 }, { "epoch": 2.7323997750682336, "grad_norm": 0.3732075192421565, "learning_rate": 3.908263271000509e-06, "loss": 0.4402, "step": 16600 }, { "epoch": 2.73256435928735, "grad_norm": 0.3843866741677414, "learning_rate": 3.907797292260507e-06, "loss": 0.4059, "step": 16601 }, { "epoch": 2.7327289435064666, "grad_norm": 0.5765114906730495, "learning_rate": 3.907331319982388e-06, "loss": 0.443, "step": 16602 }, { "epoch": 2.732893527725583, "grad_norm": 0.3048172546078079, "learning_rate": 3.9068653541712295e-06, "loss": 0.4256, "step": 16603 }, { "epoch": 2.7330581119446995, "grad_norm": 0.2729739798913893, "learning_rate": 3.906399394832123e-06, "loss": 0.4412, "step": 16604 }, { "epoch": 2.733222696163816, "grad_norm": 0.28601395344284813, "learning_rate": 3.905933441970147e-06, "loss": 0.428, "step": 16605 }, { "epoch": 2.7333872803829324, "grad_norm": 0.2899991559501144, "learning_rate": 3.90546749559039e-06, "loss": 0.4449, "step": 16606 }, { "epoch": 2.733551864602049, "grad_norm": 0.383461257118734, "learning_rate": 3.905001555697934e-06, "loss": 0.4328, "step": 16607 }, { "epoch": 2.7337164488211654, "grad_norm": 0.3344920575647727, "learning_rate": 3.904535622297862e-06, "loss": 0.4243, "step": 16608 }, { "epoch": 2.733881033040282, "grad_norm": 0.311637742348854, "learning_rate": 3.9040696953952615e-06, "loss": 0.4287, "step": 16609 }, { "epoch": 2.7340456172593983, "grad_norm": 0.39159452522404453, "learning_rate": 3.9036037749952134e-06, "loss": 0.4326, "step": 16610 }, { "epoch": 2.734210201478515, "grad_norm": 0.3379876446766164, "learning_rate": 3.903137861102804e-06, "loss": 0.4384, "step": 16611 }, { "epoch": 2.7343747856976313, "grad_norm": 0.32601922212061046, "learning_rate": 3.902671953723115e-06, "loss": 0.4263, "step": 16612 }, { "epoch": 2.7345393699167477, "grad_norm": 0.3037710053756364, "learning_rate": 3.902206052861233e-06, "loss": 0.4291, "step": 16613 }, { "epoch": 2.734703954135864, "grad_norm": 0.39634282364124573, "learning_rate": 3.901740158522239e-06, "loss": 0.4384, "step": 16614 }, { "epoch": 2.7348685383549807, "grad_norm": 0.3083150422178533, "learning_rate": 3.901274270711217e-06, "loss": 0.4434, "step": 16615 }, { "epoch": 2.735033122574097, "grad_norm": 0.2880021907018946, "learning_rate": 3.900808389433251e-06, "loss": 0.4219, "step": 16616 }, { "epoch": 2.7351977067932136, "grad_norm": 0.28308455496725854, "learning_rate": 3.900342514693425e-06, "loss": 0.4293, "step": 16617 }, { "epoch": 2.73536229101233, "grad_norm": 0.33136345258347305, "learning_rate": 3.899876646496823e-06, "loss": 0.42, "step": 16618 }, { "epoch": 2.7355268752314466, "grad_norm": 0.30752082102442413, "learning_rate": 3.899410784848526e-06, "loss": 0.4239, "step": 16619 }, { "epoch": 2.735691459450563, "grad_norm": 0.42132254252174034, "learning_rate": 3.89894492975362e-06, "loss": 0.4559, "step": 16620 }, { "epoch": 2.7358560436696795, "grad_norm": 0.3928050979267358, "learning_rate": 3.898479081217188e-06, "loss": 0.4629, "step": 16621 }, { "epoch": 2.736020627888796, "grad_norm": 0.4182456753045492, "learning_rate": 3.89801323924431e-06, "loss": 0.4231, "step": 16622 }, { "epoch": 2.7361852121079124, "grad_norm": 0.401752609400978, "learning_rate": 3.897547403840073e-06, "loss": 0.4278, "step": 16623 }, { "epoch": 2.736349796327029, "grad_norm": 0.3373797177237633, "learning_rate": 3.897081575009557e-06, "loss": 0.4372, "step": 16624 }, { "epoch": 2.7365143805461454, "grad_norm": 0.31456143649250423, "learning_rate": 3.896615752757847e-06, "loss": 0.4234, "step": 16625 }, { "epoch": 2.736678964765262, "grad_norm": 0.4505892755130963, "learning_rate": 3.896149937090024e-06, "loss": 0.4297, "step": 16626 }, { "epoch": 2.7368435489843783, "grad_norm": 0.3310531708308449, "learning_rate": 3.895684128011174e-06, "loss": 0.4197, "step": 16627 }, { "epoch": 2.737008133203495, "grad_norm": 0.32229938221905113, "learning_rate": 3.895218325526376e-06, "loss": 0.4275, "step": 16628 }, { "epoch": 2.7371727174226113, "grad_norm": 0.3673752839728046, "learning_rate": 3.894752529640714e-06, "loss": 0.445, "step": 16629 }, { "epoch": 2.7373373016417277, "grad_norm": 0.3024556838370898, "learning_rate": 3.894286740359272e-06, "loss": 0.4344, "step": 16630 }, { "epoch": 2.737501885860844, "grad_norm": 0.355569626536372, "learning_rate": 3.8938209576871305e-06, "loss": 0.4331, "step": 16631 }, { "epoch": 2.7376664700799607, "grad_norm": 0.3329281372530806, "learning_rate": 3.893355181629374e-06, "loss": 0.425, "step": 16632 }, { "epoch": 2.737831054299077, "grad_norm": 0.2996473940409809, "learning_rate": 3.892889412191082e-06, "loss": 0.4515, "step": 16633 }, { "epoch": 2.7379956385181936, "grad_norm": 0.31016466286381267, "learning_rate": 3.8924236493773395e-06, "loss": 0.431, "step": 16634 }, { "epoch": 2.73816022273731, "grad_norm": 0.3464979108227558, "learning_rate": 3.8919578931932275e-06, "loss": 0.4303, "step": 16635 }, { "epoch": 2.7383248069564265, "grad_norm": 0.31263854785111733, "learning_rate": 3.891492143643829e-06, "loss": 0.4384, "step": 16636 }, { "epoch": 2.738489391175543, "grad_norm": 0.32162531400133154, "learning_rate": 3.891026400734224e-06, "loss": 0.4422, "step": 16637 }, { "epoch": 2.7386539753946595, "grad_norm": 0.31048427444162374, "learning_rate": 3.890560664469496e-06, "loss": 0.4208, "step": 16638 }, { "epoch": 2.738818559613776, "grad_norm": 0.3184756943376093, "learning_rate": 3.890094934854727e-06, "loss": 0.4388, "step": 16639 }, { "epoch": 2.7389831438328924, "grad_norm": 0.32833610141557723, "learning_rate": 3.889629211894999e-06, "loss": 0.4252, "step": 16640 }, { "epoch": 2.7391477280520085, "grad_norm": 0.32894370976124304, "learning_rate": 3.889163495595393e-06, "loss": 0.4323, "step": 16641 }, { "epoch": 2.739312312271125, "grad_norm": 0.7708130390270826, "learning_rate": 3.888697785960991e-06, "loss": 0.4433, "step": 16642 }, { "epoch": 2.7394768964902414, "grad_norm": 0.2399325823005569, "learning_rate": 3.888232082996877e-06, "loss": 0.4197, "step": 16643 }, { "epoch": 2.739641480709358, "grad_norm": 0.700859157485469, "learning_rate": 3.887766386708129e-06, "loss": 0.4392, "step": 16644 }, { "epoch": 2.7398060649284743, "grad_norm": 0.4217182459833526, "learning_rate": 3.887300697099829e-06, "loss": 0.4433, "step": 16645 }, { "epoch": 2.739970649147591, "grad_norm": 0.46884546102603025, "learning_rate": 3.8868350141770595e-06, "loss": 0.4262, "step": 16646 }, { "epoch": 2.7401352333667073, "grad_norm": 0.3110915519154355, "learning_rate": 3.8863693379449015e-06, "loss": 0.4337, "step": 16647 }, { "epoch": 2.7402998175858237, "grad_norm": 0.3352084069388875, "learning_rate": 3.8859036684084364e-06, "loss": 0.4221, "step": 16648 }, { "epoch": 2.74046440180494, "grad_norm": 0.36378279540182057, "learning_rate": 3.885438005572746e-06, "loss": 0.4214, "step": 16649 }, { "epoch": 2.7406289860240567, "grad_norm": 0.3158512433778828, "learning_rate": 3.884972349442911e-06, "loss": 0.4154, "step": 16650 }, { "epoch": 2.740793570243173, "grad_norm": 0.307548477999172, "learning_rate": 3.8845067000240125e-06, "loss": 0.4315, "step": 16651 }, { "epoch": 2.7409581544622896, "grad_norm": 0.38812292332638837, "learning_rate": 3.8840410573211286e-06, "loss": 0.4481, "step": 16652 }, { "epoch": 2.741122738681406, "grad_norm": 0.2783823676112584, "learning_rate": 3.883575421339347e-06, "loss": 0.4244, "step": 16653 }, { "epoch": 2.7412873229005226, "grad_norm": 0.3557886541737026, "learning_rate": 3.8831097920837395e-06, "loss": 0.4439, "step": 16654 }, { "epoch": 2.741451907119639, "grad_norm": 0.32008413498538474, "learning_rate": 3.882644169559396e-06, "loss": 0.4327, "step": 16655 }, { "epoch": 2.7416164913387555, "grad_norm": 0.304571256600945, "learning_rate": 3.882178553771391e-06, "loss": 0.4448, "step": 16656 }, { "epoch": 2.741781075557872, "grad_norm": 0.27985675276001454, "learning_rate": 3.881712944724808e-06, "loss": 0.4445, "step": 16657 }, { "epoch": 2.7419456597769885, "grad_norm": 0.37539069739389663, "learning_rate": 3.881247342424726e-06, "loss": 0.4306, "step": 16658 }, { "epoch": 2.742110243996105, "grad_norm": 0.3604991298614318, "learning_rate": 3.880781746876226e-06, "loss": 0.4246, "step": 16659 }, { "epoch": 2.7422748282152214, "grad_norm": 0.34497239921676615, "learning_rate": 3.880316158084388e-06, "loss": 0.431, "step": 16660 }, { "epoch": 2.742439412434338, "grad_norm": 0.32782234520476394, "learning_rate": 3.879850576054293e-06, "loss": 0.4266, "step": 16661 }, { "epoch": 2.7426039966534543, "grad_norm": 0.355377901894709, "learning_rate": 3.879385000791022e-06, "loss": 0.4313, "step": 16662 }, { "epoch": 2.742768580872571, "grad_norm": 0.3622834351699398, "learning_rate": 3.878919432299652e-06, "loss": 0.4421, "step": 16663 }, { "epoch": 2.7429331650916873, "grad_norm": 0.3121404542685007, "learning_rate": 3.878453870585268e-06, "loss": 0.4405, "step": 16664 }, { "epoch": 2.7430977493108037, "grad_norm": 0.5422525704023888, "learning_rate": 3.877988315652946e-06, "loss": 0.4363, "step": 16665 }, { "epoch": 2.7432623335299198, "grad_norm": 0.33630247392790386, "learning_rate": 3.877522767507767e-06, "loss": 0.4226, "step": 16666 }, { "epoch": 2.7434269177490362, "grad_norm": 0.3488597023051155, "learning_rate": 3.877057226154812e-06, "loss": 0.4228, "step": 16667 }, { "epoch": 2.7435915019681527, "grad_norm": 0.5512200340488606, "learning_rate": 3.876591691599159e-06, "loss": 0.4241, "step": 16668 }, { "epoch": 2.743756086187269, "grad_norm": 0.2967852679199231, "learning_rate": 3.87612616384589e-06, "loss": 0.451, "step": 16669 }, { "epoch": 2.7439206704063857, "grad_norm": 0.329573071370927, "learning_rate": 3.875660642900081e-06, "loss": 0.4412, "step": 16670 }, { "epoch": 2.744085254625502, "grad_norm": 0.3357303574720884, "learning_rate": 3.875195128766815e-06, "loss": 0.4352, "step": 16671 }, { "epoch": 2.7442498388446186, "grad_norm": 0.3118697057421077, "learning_rate": 3.874729621451172e-06, "loss": 0.422, "step": 16672 }, { "epoch": 2.744414423063735, "grad_norm": 0.324592192857279, "learning_rate": 3.874264120958227e-06, "loss": 0.433, "step": 16673 }, { "epoch": 2.7445790072828515, "grad_norm": 0.356134316887983, "learning_rate": 3.873798627293065e-06, "loss": 0.4342, "step": 16674 }, { "epoch": 2.744743591501968, "grad_norm": 0.5216140005545202, "learning_rate": 3.873333140460761e-06, "loss": 0.4235, "step": 16675 }, { "epoch": 2.7449081757210845, "grad_norm": 0.5477538161873636, "learning_rate": 3.872867660466396e-06, "loss": 0.4384, "step": 16676 }, { "epoch": 2.745072759940201, "grad_norm": 0.29347997209182203, "learning_rate": 3.872402187315048e-06, "loss": 0.4281, "step": 16677 }, { "epoch": 2.7452373441593174, "grad_norm": 0.3099905198378355, "learning_rate": 3.871936721011797e-06, "loss": 0.4214, "step": 16678 }, { "epoch": 2.745401928378434, "grad_norm": 0.4122803175887726, "learning_rate": 3.8714712615617226e-06, "loss": 0.445, "step": 16679 }, { "epoch": 2.7455665125975504, "grad_norm": 0.9142629401607756, "learning_rate": 3.8710058089699025e-06, "loss": 0.4357, "step": 16680 }, { "epoch": 2.745731096816667, "grad_norm": 0.3001133513388538, "learning_rate": 3.870540363241417e-06, "loss": 0.4291, "step": 16681 }, { "epoch": 2.7458956810357833, "grad_norm": 0.4168866990618788, "learning_rate": 3.8700749243813415e-06, "loss": 0.4269, "step": 16682 }, { "epoch": 2.7460602652548998, "grad_norm": 0.30645876076786355, "learning_rate": 3.86960949239476e-06, "loss": 0.4262, "step": 16683 }, { "epoch": 2.7462248494740162, "grad_norm": 0.30916232701484836, "learning_rate": 3.869144067286745e-06, "loss": 0.4342, "step": 16684 }, { "epoch": 2.7463894336931327, "grad_norm": 0.3309646231801186, "learning_rate": 3.868678649062381e-06, "loss": 0.4199, "step": 16685 }, { "epoch": 2.746554017912249, "grad_norm": 0.36138802785749186, "learning_rate": 3.868213237726742e-06, "loss": 0.4446, "step": 16686 }, { "epoch": 2.7467186021313656, "grad_norm": 0.33220801329003613, "learning_rate": 3.8677478332849084e-06, "loss": 0.4171, "step": 16687 }, { "epoch": 2.746883186350482, "grad_norm": 0.30982777008522744, "learning_rate": 3.867282435741959e-06, "loss": 0.4282, "step": 16688 }, { "epoch": 2.7470477705695986, "grad_norm": 0.36053955933065407, "learning_rate": 3.8668170451029694e-06, "loss": 0.4546, "step": 16689 }, { "epoch": 2.747212354788715, "grad_norm": 0.3115291673408375, "learning_rate": 3.8663516613730204e-06, "loss": 0.4223, "step": 16690 }, { "epoch": 2.7473769390078315, "grad_norm": 0.36712438720724067, "learning_rate": 3.865886284557188e-06, "loss": 0.4214, "step": 16691 }, { "epoch": 2.747541523226948, "grad_norm": 0.4542084555280313, "learning_rate": 3.865420914660553e-06, "loss": 0.43, "step": 16692 }, { "epoch": 2.7477061074460645, "grad_norm": 0.31832298830736644, "learning_rate": 3.86495555168819e-06, "loss": 0.4374, "step": 16693 }, { "epoch": 2.747870691665181, "grad_norm": 0.3556434858838793, "learning_rate": 3.864490195645182e-06, "loss": 0.4368, "step": 16694 }, { "epoch": 2.7480352758842974, "grad_norm": 0.3578218709637887, "learning_rate": 3.864024846536602e-06, "loss": 0.4255, "step": 16695 }, { "epoch": 2.748199860103414, "grad_norm": 0.272539267466431, "learning_rate": 3.863559504367527e-06, "loss": 0.4247, "step": 16696 }, { "epoch": 2.7483644443225304, "grad_norm": 0.36092119887107127, "learning_rate": 3.863094169143038e-06, "loss": 0.4139, "step": 16697 }, { "epoch": 2.748529028541647, "grad_norm": 0.3138993620997971, "learning_rate": 3.862628840868211e-06, "loss": 0.4451, "step": 16698 }, { "epoch": 2.7486936127607633, "grad_norm": 0.36947159209977626, "learning_rate": 3.862163519548124e-06, "loss": 0.4372, "step": 16699 }, { "epoch": 2.7488581969798798, "grad_norm": 0.3174300936180199, "learning_rate": 3.861698205187853e-06, "loss": 0.4305, "step": 16700 }, { "epoch": 2.7490227811989962, "grad_norm": 0.290319003808263, "learning_rate": 3.861232897792478e-06, "loss": 0.4328, "step": 16701 }, { "epoch": 2.7491873654181127, "grad_norm": 0.3098957433837338, "learning_rate": 3.860767597367076e-06, "loss": 0.4244, "step": 16702 }, { "epoch": 2.749351949637229, "grad_norm": 0.32129061982609347, "learning_rate": 3.860302303916718e-06, "loss": 0.4378, "step": 16703 }, { "epoch": 2.7495165338563456, "grad_norm": 0.3653148104201589, "learning_rate": 3.85983701744649e-06, "loss": 0.4245, "step": 16704 }, { "epoch": 2.749681118075462, "grad_norm": 0.7149367519498637, "learning_rate": 3.859371737961464e-06, "loss": 0.4298, "step": 16705 }, { "epoch": 2.7498457022945786, "grad_norm": 0.36724382587372284, "learning_rate": 3.858906465466718e-06, "loss": 0.4278, "step": 16706 }, { "epoch": 2.7500102865136946, "grad_norm": 0.2925452421742609, "learning_rate": 3.858441199967328e-06, "loss": 0.4211, "step": 16707 }, { "epoch": 2.750174870732811, "grad_norm": 0.2683395976583217, "learning_rate": 3.857975941468373e-06, "loss": 0.4175, "step": 16708 }, { "epoch": 2.7503394549519276, "grad_norm": 0.3136844198419058, "learning_rate": 3.857510689974927e-06, "loss": 0.447, "step": 16709 }, { "epoch": 2.750504039171044, "grad_norm": 0.3030603614875939, "learning_rate": 3.857045445492068e-06, "loss": 0.4385, "step": 16710 }, { "epoch": 2.7506686233901605, "grad_norm": 0.283294155437198, "learning_rate": 3.856580208024873e-06, "loss": 0.4033, "step": 16711 }, { "epoch": 2.750833207609277, "grad_norm": 0.5659848655842254, "learning_rate": 3.8561149775784174e-06, "loss": 0.4346, "step": 16712 }, { "epoch": 2.7509977918283934, "grad_norm": 0.3151413366114171, "learning_rate": 3.85564975415778e-06, "loss": 0.4431, "step": 16713 }, { "epoch": 2.75116237604751, "grad_norm": 0.2758156307202911, "learning_rate": 3.855184537768033e-06, "loss": 0.4267, "step": 16714 }, { "epoch": 2.7513269602666264, "grad_norm": 0.3780966181832298, "learning_rate": 3.854719328414257e-06, "loss": 0.4239, "step": 16715 }, { "epoch": 2.751491544485743, "grad_norm": 0.2821931796559113, "learning_rate": 3.854254126101525e-06, "loss": 0.417, "step": 16716 }, { "epoch": 2.7516561287048593, "grad_norm": 0.3266347677830889, "learning_rate": 3.853788930834914e-06, "loss": 0.4276, "step": 16717 }, { "epoch": 2.751820712923976, "grad_norm": 0.312747836556435, "learning_rate": 3.853323742619501e-06, "loss": 0.4294, "step": 16718 }, { "epoch": 2.7519852971430923, "grad_norm": 0.35643769445508905, "learning_rate": 3.852858561460361e-06, "loss": 0.4214, "step": 16719 }, { "epoch": 2.7521498813622087, "grad_norm": 0.3929579469658608, "learning_rate": 3.8523933873625695e-06, "loss": 0.4401, "step": 16720 }, { "epoch": 2.752314465581325, "grad_norm": 0.32304296447919406, "learning_rate": 3.851928220331202e-06, "loss": 0.4291, "step": 16721 }, { "epoch": 2.7524790498004417, "grad_norm": 0.29631745968376494, "learning_rate": 3.851463060371337e-06, "loss": 0.4295, "step": 16722 }, { "epoch": 2.752643634019558, "grad_norm": 0.31666231721168825, "learning_rate": 3.850997907488048e-06, "loss": 0.4301, "step": 16723 }, { "epoch": 2.7528082182386746, "grad_norm": 0.3134924938039652, "learning_rate": 3.850532761686411e-06, "loss": 0.4151, "step": 16724 }, { "epoch": 2.752972802457791, "grad_norm": 0.3341055846699733, "learning_rate": 3.850067622971502e-06, "loss": 0.4158, "step": 16725 }, { "epoch": 2.7531373866769075, "grad_norm": 0.3944613390653974, "learning_rate": 3.849602491348394e-06, "loss": 0.4179, "step": 16726 }, { "epoch": 2.753301970896024, "grad_norm": 0.34549310637003094, "learning_rate": 3.849137366822165e-06, "loss": 0.4228, "step": 16727 }, { "epoch": 2.7534665551151405, "grad_norm": 0.48093379685876775, "learning_rate": 3.848672249397888e-06, "loss": 0.4329, "step": 16728 }, { "epoch": 2.753631139334257, "grad_norm": 0.2653898037593713, "learning_rate": 3.8482071390806405e-06, "loss": 0.4263, "step": 16729 }, { "epoch": 2.7537957235533734, "grad_norm": 0.3569534889161102, "learning_rate": 3.8477420358754955e-06, "loss": 0.4311, "step": 16730 }, { "epoch": 2.75396030777249, "grad_norm": 0.37801092775394957, "learning_rate": 3.847276939787531e-06, "loss": 0.4154, "step": 16731 }, { "epoch": 2.754124891991606, "grad_norm": 0.28751365365028175, "learning_rate": 3.84681185082182e-06, "loss": 0.4275, "step": 16732 }, { "epoch": 2.7542894762107224, "grad_norm": 0.35444783230134985, "learning_rate": 3.846346768983435e-06, "loss": 0.4274, "step": 16733 }, { "epoch": 2.754454060429839, "grad_norm": 0.35730086522911014, "learning_rate": 3.845881694277456e-06, "loss": 0.4165, "step": 16734 }, { "epoch": 2.7546186446489553, "grad_norm": 0.34307664484838024, "learning_rate": 3.845416626708952e-06, "loss": 0.4241, "step": 16735 }, { "epoch": 2.754783228868072, "grad_norm": 0.5066426180430484, "learning_rate": 3.844951566283003e-06, "loss": 0.4287, "step": 16736 }, { "epoch": 2.7549478130871883, "grad_norm": 0.2992610263698568, "learning_rate": 3.84448651300468e-06, "loss": 0.4282, "step": 16737 }, { "epoch": 2.7551123973063048, "grad_norm": 0.36298104637670003, "learning_rate": 3.844021466879059e-06, "loss": 0.4406, "step": 16738 }, { "epoch": 2.755276981525421, "grad_norm": 0.25724806667635997, "learning_rate": 3.843556427911214e-06, "loss": 0.4136, "step": 16739 }, { "epoch": 2.7554415657445377, "grad_norm": 0.29443173858665445, "learning_rate": 3.843091396106218e-06, "loss": 0.4388, "step": 16740 }, { "epoch": 2.755606149963654, "grad_norm": 0.525197192040113, "learning_rate": 3.842626371469148e-06, "loss": 0.4149, "step": 16741 }, { "epoch": 2.7557707341827706, "grad_norm": 0.31673011717278055, "learning_rate": 3.842161354005076e-06, "loss": 0.4333, "step": 16742 }, { "epoch": 2.755935318401887, "grad_norm": 0.3159479306854577, "learning_rate": 3.8416963437190776e-06, "loss": 0.4299, "step": 16743 }, { "epoch": 2.7560999026210036, "grad_norm": 0.3180567009797619, "learning_rate": 3.841231340616224e-06, "loss": 0.4415, "step": 16744 }, { "epoch": 2.75626448684012, "grad_norm": 0.3046299149431181, "learning_rate": 3.840766344701594e-06, "loss": 0.431, "step": 16745 }, { "epoch": 2.7564290710592365, "grad_norm": 0.32926788198908075, "learning_rate": 3.840301355980257e-06, "loss": 0.4296, "step": 16746 }, { "epoch": 2.756593655278353, "grad_norm": 0.3202232197890211, "learning_rate": 3.839836374457288e-06, "loss": 0.4212, "step": 16747 }, { "epoch": 2.7567582394974695, "grad_norm": 0.3292743896916637, "learning_rate": 3.839371400137761e-06, "loss": 0.4425, "step": 16748 }, { "epoch": 2.756922823716586, "grad_norm": 0.3225566149924342, "learning_rate": 3.838906433026749e-06, "loss": 0.4294, "step": 16749 }, { "epoch": 2.7570874079357024, "grad_norm": 0.43059850699838753, "learning_rate": 3.838441473129328e-06, "loss": 0.4374, "step": 16750 }, { "epoch": 2.757251992154819, "grad_norm": 0.3935972124510023, "learning_rate": 3.837976520450567e-06, "loss": 0.4468, "step": 16751 }, { "epoch": 2.7574165763739353, "grad_norm": 0.3989199727825013, "learning_rate": 3.837511574995545e-06, "loss": 0.4457, "step": 16752 }, { "epoch": 2.757581160593052, "grad_norm": 0.3078040887317582, "learning_rate": 3.837046636769331e-06, "loss": 0.4484, "step": 16753 }, { "epoch": 2.7577457448121683, "grad_norm": 0.4155164881385347, "learning_rate": 3.836581705776998e-06, "loss": 0.4235, "step": 16754 }, { "epoch": 2.7579103290312847, "grad_norm": 0.9218484543356673, "learning_rate": 3.836116782023624e-06, "loss": 0.427, "step": 16755 }, { "epoch": 2.758074913250401, "grad_norm": 0.4694404570897363, "learning_rate": 3.835651865514277e-06, "loss": 0.4346, "step": 16756 }, { "epoch": 2.7582394974695177, "grad_norm": 0.3609226084141087, "learning_rate": 3.835186956254031e-06, "loss": 0.4463, "step": 16757 }, { "epoch": 2.758404081688634, "grad_norm": 0.38001556363092925, "learning_rate": 3.834722054247959e-06, "loss": 0.4076, "step": 16758 }, { "epoch": 2.7585686659077506, "grad_norm": 0.3146518163215466, "learning_rate": 3.834257159501137e-06, "loss": 0.4341, "step": 16759 }, { "epoch": 2.758733250126867, "grad_norm": 0.3258618934792849, "learning_rate": 3.8337922720186326e-06, "loss": 0.4361, "step": 16760 }, { "epoch": 2.7588978343459836, "grad_norm": 0.41980482455061724, "learning_rate": 3.8333273918055226e-06, "loss": 0.4028, "step": 16761 }, { "epoch": 2.7590624185651, "grad_norm": 0.3035359627965373, "learning_rate": 3.832862518866879e-06, "loss": 0.4333, "step": 16762 }, { "epoch": 2.7592270027842165, "grad_norm": 0.2792010265807511, "learning_rate": 3.83239765320777e-06, "loss": 0.4239, "step": 16763 }, { "epoch": 2.759391587003333, "grad_norm": 0.3004060675697017, "learning_rate": 3.8319327948332744e-06, "loss": 0.4394, "step": 16764 }, { "epoch": 2.7595561712224494, "grad_norm": 0.46446135062881594, "learning_rate": 3.8314679437484594e-06, "loss": 0.4369, "step": 16765 }, { "epoch": 2.759720755441566, "grad_norm": 0.44409316368375035, "learning_rate": 3.8310030999584014e-06, "loss": 0.4264, "step": 16766 }, { "epoch": 2.7598853396606824, "grad_norm": 0.3156353402627495, "learning_rate": 3.830538263468169e-06, "loss": 0.404, "step": 16767 }, { "epoch": 2.760049923879799, "grad_norm": 0.5656328540429268, "learning_rate": 3.830073434282837e-06, "loss": 0.4191, "step": 16768 }, { "epoch": 2.7602145080989153, "grad_norm": 0.34012852273365457, "learning_rate": 3.829608612407476e-06, "loss": 0.415, "step": 16769 }, { "epoch": 2.760379092318032, "grad_norm": 0.3749393374332376, "learning_rate": 3.829143797847157e-06, "loss": 0.4375, "step": 16770 }, { "epoch": 2.7605436765371483, "grad_norm": 0.35255460757698165, "learning_rate": 3.828678990606955e-06, "loss": 0.4348, "step": 16771 }, { "epoch": 2.7607082607562647, "grad_norm": 0.26503899237119494, "learning_rate": 3.828214190691939e-06, "loss": 0.4285, "step": 16772 }, { "epoch": 2.760872844975381, "grad_norm": 0.2901410596208873, "learning_rate": 3.827749398107182e-06, "loss": 0.4175, "step": 16773 }, { "epoch": 2.7610374291944972, "grad_norm": 0.5798815384488164, "learning_rate": 3.827284612857754e-06, "loss": 0.4396, "step": 16774 }, { "epoch": 2.7612020134136137, "grad_norm": 0.3729746218682766, "learning_rate": 3.82681983494873e-06, "loss": 0.4217, "step": 16775 }, { "epoch": 2.76136659763273, "grad_norm": 0.3113239690103586, "learning_rate": 3.826355064385179e-06, "loss": 0.4367, "step": 16776 }, { "epoch": 2.7615311818518467, "grad_norm": 0.3720192886770401, "learning_rate": 3.825890301172171e-06, "loss": 0.435, "step": 16777 }, { "epoch": 2.761695766070963, "grad_norm": 0.29142275136633367, "learning_rate": 3.82542554531478e-06, "loss": 0.413, "step": 16778 }, { "epoch": 2.7618603502900796, "grad_norm": 0.3192271465070386, "learning_rate": 3.824960796818076e-06, "loss": 0.4261, "step": 16779 }, { "epoch": 2.762024934509196, "grad_norm": 0.3484695951419301, "learning_rate": 3.82449605568713e-06, "loss": 0.4347, "step": 16780 }, { "epoch": 2.7621895187283125, "grad_norm": 0.31107231719861583, "learning_rate": 3.824031321927014e-06, "loss": 0.4563, "step": 16781 }, { "epoch": 2.762354102947429, "grad_norm": 0.30327560992114966, "learning_rate": 3.823566595542798e-06, "loss": 0.4013, "step": 16782 }, { "epoch": 2.7625186871665455, "grad_norm": 0.392686760340269, "learning_rate": 3.823101876539556e-06, "loss": 0.4263, "step": 16783 }, { "epoch": 2.762683271385662, "grad_norm": 0.4008420205309258, "learning_rate": 3.822637164922352e-06, "loss": 0.4276, "step": 16784 }, { "epoch": 2.7628478556047784, "grad_norm": 0.32373652463314856, "learning_rate": 3.822172460696264e-06, "loss": 0.4248, "step": 16785 }, { "epoch": 2.763012439823895, "grad_norm": 0.32680014460348633, "learning_rate": 3.821707763866358e-06, "loss": 0.4492, "step": 16786 }, { "epoch": 2.7631770240430114, "grad_norm": 0.35168595861593227, "learning_rate": 3.821243074437706e-06, "loss": 0.443, "step": 16787 }, { "epoch": 2.763341608262128, "grad_norm": 0.3233521352290655, "learning_rate": 3.820778392415379e-06, "loss": 0.4318, "step": 16788 }, { "epoch": 2.7635061924812443, "grad_norm": 0.5374254233166486, "learning_rate": 3.820313717804448e-06, "loss": 0.4281, "step": 16789 }, { "epoch": 2.7636707767003608, "grad_norm": 0.35161183229666393, "learning_rate": 3.819849050609982e-06, "loss": 0.4144, "step": 16790 }, { "epoch": 2.7638353609194772, "grad_norm": 0.46119190096611545, "learning_rate": 3.81938439083705e-06, "loss": 0.4266, "step": 16791 }, { "epoch": 2.7639999451385937, "grad_norm": 0.2709732273695854, "learning_rate": 3.818919738490726e-06, "loss": 0.4288, "step": 16792 }, { "epoch": 2.76416452935771, "grad_norm": 0.2927553770775444, "learning_rate": 3.818455093576078e-06, "loss": 0.4487, "step": 16793 }, { "epoch": 2.7643291135768266, "grad_norm": 0.33049937178502276, "learning_rate": 3.817990456098176e-06, "loss": 0.4283, "step": 16794 }, { "epoch": 2.764493697795943, "grad_norm": 0.5151043041028656, "learning_rate": 3.817525826062088e-06, "loss": 0.4361, "step": 16795 }, { "epoch": 2.7646582820150596, "grad_norm": 0.44716660786155266, "learning_rate": 3.8170612034728886e-06, "loss": 0.4351, "step": 16796 }, { "epoch": 2.764822866234176, "grad_norm": 0.3042095373540588, "learning_rate": 3.8165965883356435e-06, "loss": 0.4206, "step": 16797 }, { "epoch": 2.7649874504532925, "grad_norm": 0.3198591642725671, "learning_rate": 3.816131980655422e-06, "loss": 0.446, "step": 16798 }, { "epoch": 2.7651520346724086, "grad_norm": 0.3194612663957039, "learning_rate": 3.815667380437298e-06, "loss": 0.417, "step": 16799 }, { "epoch": 2.765316618891525, "grad_norm": 0.5235493512889577, "learning_rate": 3.815202787686337e-06, "loss": 0.4101, "step": 16800 }, { "epoch": 2.7654812031106415, "grad_norm": 0.2875249223023592, "learning_rate": 3.8147382024076104e-06, "loss": 0.4281, "step": 16801 }, { "epoch": 2.765645787329758, "grad_norm": 0.4026276153596572, "learning_rate": 3.8142736246061864e-06, "loss": 0.4598, "step": 16802 }, { "epoch": 2.7658103715488744, "grad_norm": 0.27404729719891796, "learning_rate": 3.813809054287135e-06, "loss": 0.4086, "step": 16803 }, { "epoch": 2.765974955767991, "grad_norm": 0.3145397887623035, "learning_rate": 3.8133444914555255e-06, "loss": 0.4493, "step": 16804 }, { "epoch": 2.7661395399871074, "grad_norm": 0.3397846067693523, "learning_rate": 3.8128799361164277e-06, "loss": 0.4198, "step": 16805 }, { "epoch": 2.766304124206224, "grad_norm": 0.35722500103087296, "learning_rate": 3.81241538827491e-06, "loss": 0.4365, "step": 16806 }, { "epoch": 2.7664687084253403, "grad_norm": 0.30238379013409583, "learning_rate": 3.8119508479360402e-06, "loss": 0.4608, "step": 16807 }, { "epoch": 2.766633292644457, "grad_norm": 0.3440353515744379, "learning_rate": 3.811486315104889e-06, "loss": 0.4381, "step": 16808 }, { "epoch": 2.7667978768635733, "grad_norm": 0.35348389908653965, "learning_rate": 3.8110217897865228e-06, "loss": 0.4347, "step": 16809 }, { "epoch": 2.7669624610826897, "grad_norm": 0.5701370655291501, "learning_rate": 3.810557271986013e-06, "loss": 0.4434, "step": 16810 }, { "epoch": 2.767127045301806, "grad_norm": 0.3437702405996044, "learning_rate": 3.8100927617084262e-06, "loss": 0.4401, "step": 16811 }, { "epoch": 2.7672916295209227, "grad_norm": 0.3378670724201335, "learning_rate": 3.8096282589588325e-06, "loss": 0.4188, "step": 16812 }, { "epoch": 2.767456213740039, "grad_norm": 0.35544705652232955, "learning_rate": 3.8091637637423014e-06, "loss": 0.4518, "step": 16813 }, { "epoch": 2.7676207979591556, "grad_norm": 0.31464462787067055, "learning_rate": 3.808699276063896e-06, "loss": 0.4274, "step": 16814 }, { "epoch": 2.767785382178272, "grad_norm": 0.2459535141240698, "learning_rate": 3.808234795928692e-06, "loss": 0.425, "step": 16815 }, { "epoch": 2.7679499663973886, "grad_norm": 0.3086223489073096, "learning_rate": 3.80777032334175e-06, "loss": 0.4215, "step": 16816 }, { "epoch": 2.768114550616505, "grad_norm": 0.26936310532731195, "learning_rate": 3.807305858308145e-06, "loss": 0.405, "step": 16817 }, { "epoch": 2.7682791348356215, "grad_norm": 0.3025565113277676, "learning_rate": 3.80684140083294e-06, "loss": 0.437, "step": 16818 }, { "epoch": 2.768443719054738, "grad_norm": 0.2957806754182231, "learning_rate": 3.8063769509212065e-06, "loss": 0.4352, "step": 16819 }, { "epoch": 2.7686083032738544, "grad_norm": 0.33027851952704007, "learning_rate": 3.8059125085780105e-06, "loss": 0.4393, "step": 16820 }, { "epoch": 2.768772887492971, "grad_norm": 0.36777594773645805, "learning_rate": 3.8054480738084195e-06, "loss": 0.4286, "step": 16821 }, { "epoch": 2.7689374717120874, "grad_norm": 0.361574667511148, "learning_rate": 3.804983646617503e-06, "loss": 0.418, "step": 16822 }, { "epoch": 2.769102055931204, "grad_norm": 0.36971154574152404, "learning_rate": 3.804519227010326e-06, "loss": 0.4427, "step": 16823 }, { "epoch": 2.7692666401503203, "grad_norm": 0.32077205106904844, "learning_rate": 3.8040548149919594e-06, "loss": 0.4231, "step": 16824 }, { "epoch": 2.769431224369437, "grad_norm": 0.2944188371284831, "learning_rate": 3.8035904105674672e-06, "loss": 0.4175, "step": 16825 }, { "epoch": 2.7695958085885533, "grad_norm": 0.31903723649409144, "learning_rate": 3.8031260137419207e-06, "loss": 0.4325, "step": 16826 }, { "epoch": 2.7697603928076697, "grad_norm": 0.3292297061095488, "learning_rate": 3.802661624520384e-06, "loss": 0.4273, "step": 16827 }, { "epoch": 2.769924977026786, "grad_norm": 0.3541969818547337, "learning_rate": 3.802197242907924e-06, "loss": 0.4334, "step": 16828 }, { "epoch": 2.7700895612459027, "grad_norm": 0.3672254462240884, "learning_rate": 3.8017328689096106e-06, "loss": 0.4251, "step": 16829 }, { "epoch": 2.770254145465019, "grad_norm": 0.3457483782958529, "learning_rate": 3.801268502530509e-06, "loss": 0.4237, "step": 16830 }, { "epoch": 2.7704187296841356, "grad_norm": 0.28709007258308455, "learning_rate": 3.800804143775687e-06, "loss": 0.4207, "step": 16831 }, { "epoch": 2.770583313903252, "grad_norm": 0.2958632409294842, "learning_rate": 3.8003397926502106e-06, "loss": 0.4531, "step": 16832 }, { "epoch": 2.7707478981223685, "grad_norm": 0.35996068001301706, "learning_rate": 3.7998754491591478e-06, "loss": 0.4378, "step": 16833 }, { "epoch": 2.770912482341485, "grad_norm": 0.45725835910002477, "learning_rate": 3.7994111133075653e-06, "loss": 0.4312, "step": 16834 }, { "epoch": 2.7710770665606015, "grad_norm": 0.37072152053424307, "learning_rate": 3.7989467851005267e-06, "loss": 0.4404, "step": 16835 }, { "epoch": 2.771241650779718, "grad_norm": 0.38066903273037256, "learning_rate": 3.7984824645431036e-06, "loss": 0.4407, "step": 16836 }, { "epoch": 2.7714062349988344, "grad_norm": 0.33648006260136515, "learning_rate": 3.7980181516403584e-06, "loss": 0.4347, "step": 16837 }, { "epoch": 2.771570819217951, "grad_norm": 0.3286620878957204, "learning_rate": 3.797553846397359e-06, "loss": 0.4404, "step": 16838 }, { "epoch": 2.7717354034370674, "grad_norm": 0.3275340096220997, "learning_rate": 3.7970895488191717e-06, "loss": 0.4444, "step": 16839 }, { "epoch": 2.771899987656184, "grad_norm": 0.2860578424125304, "learning_rate": 3.7966252589108634e-06, "loss": 0.4433, "step": 16840 }, { "epoch": 2.7720645718753, "grad_norm": 0.7913351638025952, "learning_rate": 3.7961609766774994e-06, "loss": 0.4224, "step": 16841 }, { "epoch": 2.7722291560944163, "grad_norm": 0.4807048698131998, "learning_rate": 3.795696702124145e-06, "loss": 0.4401, "step": 16842 }, { "epoch": 2.772393740313533, "grad_norm": 0.37526450105030956, "learning_rate": 3.7952324352558676e-06, "loss": 0.4269, "step": 16843 }, { "epoch": 2.7725583245326493, "grad_norm": 0.2909990302337196, "learning_rate": 3.7947681760777325e-06, "loss": 0.4238, "step": 16844 }, { "epoch": 2.7727229087517657, "grad_norm": 0.3265977697998563, "learning_rate": 3.7943039245948074e-06, "loss": 0.4365, "step": 16845 }, { "epoch": 2.772887492970882, "grad_norm": 0.3940798235253495, "learning_rate": 3.7938396808121525e-06, "loss": 0.4415, "step": 16846 }, { "epoch": 2.7730520771899987, "grad_norm": 0.320725253909283, "learning_rate": 3.793375444734841e-06, "loss": 0.4166, "step": 16847 }, { "epoch": 2.773216661409115, "grad_norm": 0.3463614463862739, "learning_rate": 3.7929112163679314e-06, "loss": 0.4337, "step": 16848 }, { "epoch": 2.7733812456282316, "grad_norm": 0.5034829550088746, "learning_rate": 3.7924469957164952e-06, "loss": 0.4265, "step": 16849 }, { "epoch": 2.773545829847348, "grad_norm": 0.40248275758221413, "learning_rate": 3.791982782785594e-06, "loss": 0.4103, "step": 16850 }, { "epoch": 2.7737104140664646, "grad_norm": 0.3980328594843701, "learning_rate": 3.7915185775802934e-06, "loss": 0.424, "step": 16851 }, { "epoch": 2.773874998285581, "grad_norm": 0.3646532818983775, "learning_rate": 3.7910543801056603e-06, "loss": 0.4261, "step": 16852 }, { "epoch": 2.7740395825046975, "grad_norm": 0.3362183017590112, "learning_rate": 3.7905901903667576e-06, "loss": 0.4297, "step": 16853 }, { "epoch": 2.774204166723814, "grad_norm": 0.37016511030631144, "learning_rate": 3.7901260083686523e-06, "loss": 0.4294, "step": 16854 }, { "epoch": 2.7743687509429305, "grad_norm": 0.361155081565757, "learning_rate": 3.7896618341164087e-06, "loss": 0.4351, "step": 16855 }, { "epoch": 2.774533335162047, "grad_norm": 0.3273799523643519, "learning_rate": 3.789197667615093e-06, "loss": 0.4266, "step": 16856 }, { "epoch": 2.7746979193811634, "grad_norm": 0.33376176425607657, "learning_rate": 3.7887335088697676e-06, "loss": 0.4245, "step": 16857 }, { "epoch": 2.77486250360028, "grad_norm": 0.3355292741124985, "learning_rate": 3.7882693578854975e-06, "loss": 0.4354, "step": 16858 }, { "epoch": 2.7750270878193963, "grad_norm": 0.5419348669241673, "learning_rate": 3.787805214667349e-06, "loss": 0.4349, "step": 16859 }, { "epoch": 2.775191672038513, "grad_norm": 0.30543632162174206, "learning_rate": 3.787341079220385e-06, "loss": 0.4534, "step": 16860 }, { "epoch": 2.7753562562576293, "grad_norm": 0.42483169974023705, "learning_rate": 3.7868769515496715e-06, "loss": 0.4455, "step": 16861 }, { "epoch": 2.7755208404767457, "grad_norm": 0.28050471837453267, "learning_rate": 3.7864128316602714e-06, "loss": 0.4655, "step": 16862 }, { "epoch": 2.775685424695862, "grad_norm": 0.3282592417587267, "learning_rate": 3.78594871955725e-06, "loss": 0.4241, "step": 16863 }, { "epoch": 2.7758500089149787, "grad_norm": 0.2451870935424479, "learning_rate": 3.7854846152456732e-06, "loss": 0.4291, "step": 16864 }, { "epoch": 2.776014593134095, "grad_norm": 0.32971710114845926, "learning_rate": 3.7850205187305992e-06, "loss": 0.4254, "step": 16865 }, { "epoch": 2.776179177353211, "grad_norm": 0.3777895255987735, "learning_rate": 3.7845564300170998e-06, "loss": 0.4203, "step": 16866 }, { "epoch": 2.7763437615723277, "grad_norm": 0.5279816235551633, "learning_rate": 3.784092349110231e-06, "loss": 0.4436, "step": 16867 }, { "epoch": 2.776508345791444, "grad_norm": 0.2845430605160568, "learning_rate": 3.7836282760150646e-06, "loss": 0.4277, "step": 16868 }, { "epoch": 2.7766729300105606, "grad_norm": 0.4033960800609004, "learning_rate": 3.7831642107366587e-06, "loss": 0.4309, "step": 16869 }, { "epoch": 2.776837514229677, "grad_norm": 0.30830629475683824, "learning_rate": 3.7827001532800793e-06, "loss": 0.4338, "step": 16870 }, { "epoch": 2.7770020984487935, "grad_norm": 0.27374971523185726, "learning_rate": 3.7822361036503898e-06, "loss": 0.4401, "step": 16871 }, { "epoch": 2.77716668266791, "grad_norm": 0.36793554515865656, "learning_rate": 3.7817720618526523e-06, "loss": 0.4354, "step": 16872 }, { "epoch": 2.7773312668870265, "grad_norm": 0.3654739408901122, "learning_rate": 3.781308027891932e-06, "loss": 0.4197, "step": 16873 }, { "epoch": 2.777495851106143, "grad_norm": 0.2981515247509971, "learning_rate": 3.7808440017732917e-06, "loss": 0.4182, "step": 16874 }, { "epoch": 2.7776604353252594, "grad_norm": 0.2876842090046348, "learning_rate": 3.780379983501796e-06, "loss": 0.4387, "step": 16875 }, { "epoch": 2.777825019544376, "grad_norm": 0.5915380201347793, "learning_rate": 3.779915973082504e-06, "loss": 0.4339, "step": 16876 }, { "epoch": 2.7779896037634924, "grad_norm": 0.47905263011333626, "learning_rate": 3.779451970520484e-06, "loss": 0.4493, "step": 16877 }, { "epoch": 2.778154187982609, "grad_norm": 0.30270546075192667, "learning_rate": 3.778987975820796e-06, "loss": 0.4363, "step": 16878 }, { "epoch": 2.7783187722017253, "grad_norm": 0.29825553662753534, "learning_rate": 3.7785239889885024e-06, "loss": 0.4263, "step": 16879 }, { "epoch": 2.7784833564208418, "grad_norm": 0.33033442175320543, "learning_rate": 3.778060010028668e-06, "loss": 0.4245, "step": 16880 }, { "epoch": 2.7786479406399582, "grad_norm": 0.36814904182310326, "learning_rate": 3.7775960389463538e-06, "loss": 0.443, "step": 16881 }, { "epoch": 2.7788125248590747, "grad_norm": 0.43006987518328926, "learning_rate": 3.777132075746624e-06, "loss": 0.4367, "step": 16882 }, { "epoch": 2.778977109078191, "grad_norm": 0.8368175177155547, "learning_rate": 3.77666812043454e-06, "loss": 0.4217, "step": 16883 }, { "epoch": 2.7791416932973076, "grad_norm": 0.3555436805334529, "learning_rate": 3.7762041730151654e-06, "loss": 0.4227, "step": 16884 }, { "epoch": 2.779306277516424, "grad_norm": 0.2839381903773783, "learning_rate": 3.775740233493563e-06, "loss": 0.4171, "step": 16885 }, { "epoch": 2.7794708617355406, "grad_norm": 0.2696148806985721, "learning_rate": 3.7752763018747915e-06, "loss": 0.4255, "step": 16886 }, { "epoch": 2.779635445954657, "grad_norm": 0.3281658213205818, "learning_rate": 3.7748123781639183e-06, "loss": 0.4477, "step": 16887 }, { "epoch": 2.7798000301737735, "grad_norm": 0.38679247783092735, "learning_rate": 3.7743484623660007e-06, "loss": 0.4131, "step": 16888 }, { "epoch": 2.77996461439289, "grad_norm": 0.33188086006927525, "learning_rate": 3.7738845544861047e-06, "loss": 0.4346, "step": 16889 }, { "epoch": 2.7801291986120065, "grad_norm": 0.4873338195639668, "learning_rate": 3.7734206545292896e-06, "loss": 0.4251, "step": 16890 }, { "epoch": 2.780293782831123, "grad_norm": 0.2832226392994431, "learning_rate": 3.772956762500619e-06, "loss": 0.448, "step": 16891 }, { "epoch": 2.7804583670502394, "grad_norm": 0.3029700069785049, "learning_rate": 3.7724928784051533e-06, "loss": 0.4292, "step": 16892 }, { "epoch": 2.780622951269356, "grad_norm": 0.30564716249735363, "learning_rate": 3.772029002247956e-06, "loss": 0.4342, "step": 16893 }, { "epoch": 2.7807875354884724, "grad_norm": 0.3342439886152099, "learning_rate": 3.7715651340340884e-06, "loss": 0.4289, "step": 16894 }, { "epoch": 2.780952119707589, "grad_norm": 0.3117023301162004, "learning_rate": 3.7711012737686088e-06, "loss": 0.4428, "step": 16895 }, { "epoch": 2.7811167039267053, "grad_norm": 0.34039420971910117, "learning_rate": 3.770637421456584e-06, "loss": 0.4372, "step": 16896 }, { "epoch": 2.7812812881458218, "grad_norm": 0.337347976138304, "learning_rate": 3.7701735771030696e-06, "loss": 0.4603, "step": 16897 }, { "epoch": 2.7814458723649382, "grad_norm": 0.4024168661032073, "learning_rate": 3.7697097407131326e-06, "loss": 0.4501, "step": 16898 }, { "epoch": 2.7816104565840547, "grad_norm": 0.36673311040074835, "learning_rate": 3.76924591229183e-06, "loss": 0.4253, "step": 16899 }, { "epoch": 2.781775040803171, "grad_norm": 0.3466210120876975, "learning_rate": 3.7687820918442248e-06, "loss": 0.4126, "step": 16900 }, { "epoch": 2.7819396250222876, "grad_norm": 0.4994253757949831, "learning_rate": 3.768318279375378e-06, "loss": 0.4371, "step": 16901 }, { "epoch": 2.782104209241404, "grad_norm": 0.32390373399047606, "learning_rate": 3.7678544748903487e-06, "loss": 0.4394, "step": 16902 }, { "epoch": 2.7822687934605206, "grad_norm": 0.467440952888479, "learning_rate": 3.7673906783942002e-06, "loss": 0.4308, "step": 16903 }, { "epoch": 2.782433377679637, "grad_norm": 0.3679244911932761, "learning_rate": 3.7669268898919917e-06, "loss": 0.4397, "step": 16904 }, { "epoch": 2.7825979618987535, "grad_norm": 0.28117900107872473, "learning_rate": 3.7664631093887853e-06, "loss": 0.4439, "step": 16905 }, { "epoch": 2.78276254611787, "grad_norm": 0.3595577765149056, "learning_rate": 3.765999336889639e-06, "loss": 0.4472, "step": 16906 }, { "epoch": 2.7829271303369865, "grad_norm": 0.3413085463391491, "learning_rate": 3.7655355723996175e-06, "loss": 0.4487, "step": 16907 }, { "epoch": 2.7830917145561025, "grad_norm": 0.3171633221867142, "learning_rate": 3.765071815923778e-06, "loss": 0.4384, "step": 16908 }, { "epoch": 2.783256298775219, "grad_norm": 0.41788203100764226, "learning_rate": 3.76460806746718e-06, "loss": 0.4313, "step": 16909 }, { "epoch": 2.7834208829943354, "grad_norm": 0.33256616962038477, "learning_rate": 3.7641443270348864e-06, "loss": 0.4195, "step": 16910 }, { "epoch": 2.783585467213452, "grad_norm": 0.4438550838839359, "learning_rate": 3.763680594631956e-06, "loss": 0.4469, "step": 16911 }, { "epoch": 2.7837500514325684, "grad_norm": 0.3723740914529145, "learning_rate": 3.763216870263449e-06, "loss": 0.4182, "step": 16912 }, { "epoch": 2.783914635651685, "grad_norm": 0.38287134229195346, "learning_rate": 3.762753153934425e-06, "loss": 0.4364, "step": 16913 }, { "epoch": 2.7840792198708013, "grad_norm": 0.3048030213220403, "learning_rate": 3.762289445649945e-06, "loss": 0.4375, "step": 16914 }, { "epoch": 2.784243804089918, "grad_norm": 0.318188085140894, "learning_rate": 3.7618257454150693e-06, "loss": 0.4243, "step": 16915 }, { "epoch": 2.7844083883090343, "grad_norm": 0.34353950372326697, "learning_rate": 3.7613620532348534e-06, "loss": 0.4318, "step": 16916 }, { "epoch": 2.7845729725281507, "grad_norm": 0.30445659026707206, "learning_rate": 3.760898369114363e-06, "loss": 0.417, "step": 16917 }, { "epoch": 2.784737556747267, "grad_norm": 0.2983743769361913, "learning_rate": 3.7604346930586528e-06, "loss": 0.4367, "step": 16918 }, { "epoch": 2.7849021409663837, "grad_norm": 0.30771506829920947, "learning_rate": 3.7599710250727854e-06, "loss": 0.4376, "step": 16919 }, { "epoch": 2.7850667251855, "grad_norm": 0.35212283779278725, "learning_rate": 3.7595073651618173e-06, "loss": 0.4356, "step": 16920 }, { "epoch": 2.7852313094046166, "grad_norm": 0.4328481200989893, "learning_rate": 3.7590437133308104e-06, "loss": 0.4232, "step": 16921 }, { "epoch": 2.785395893623733, "grad_norm": 0.2879514901774684, "learning_rate": 3.758580069584823e-06, "loss": 0.4249, "step": 16922 }, { "epoch": 2.7855604778428495, "grad_norm": 0.3926481862322684, "learning_rate": 3.7581164339289125e-06, "loss": 0.4157, "step": 16923 }, { "epoch": 2.785725062061966, "grad_norm": 0.2779738532693352, "learning_rate": 3.757652806368141e-06, "loss": 0.42, "step": 16924 }, { "epoch": 2.7858896462810825, "grad_norm": 0.2873667962660394, "learning_rate": 3.7571891869075646e-06, "loss": 0.434, "step": 16925 }, { "epoch": 2.786054230500199, "grad_norm": 0.3209100044525901, "learning_rate": 3.7567255755522452e-06, "loss": 0.4236, "step": 16926 }, { "epoch": 2.7862188147193154, "grad_norm": 0.2933805601306257, "learning_rate": 3.7562619723072367e-06, "loss": 0.4281, "step": 16927 }, { "epoch": 2.786383398938432, "grad_norm": 0.3328129468806271, "learning_rate": 3.755798377177604e-06, "loss": 0.4346, "step": 16928 }, { "epoch": 2.7865479831575484, "grad_norm": 0.3088428740042805, "learning_rate": 3.7553347901683987e-06, "loss": 0.4294, "step": 16929 }, { "epoch": 2.786712567376665, "grad_norm": 0.35085409735244044, "learning_rate": 3.7548712112846866e-06, "loss": 0.4382, "step": 16930 }, { "epoch": 2.7868771515957813, "grad_norm": 0.3807144512865194, "learning_rate": 3.754407640531521e-06, "loss": 0.4311, "step": 16931 }, { "epoch": 2.787041735814898, "grad_norm": 0.48497774083128187, "learning_rate": 3.753944077913961e-06, "loss": 0.4377, "step": 16932 }, { "epoch": 2.787206320034014, "grad_norm": 0.35729850499027854, "learning_rate": 3.7534805234370668e-06, "loss": 0.4393, "step": 16933 }, { "epoch": 2.7873709042531303, "grad_norm": 0.5948625168020623, "learning_rate": 3.7530169771058935e-06, "loss": 0.4313, "step": 16934 }, { "epoch": 2.7875354884722467, "grad_norm": 0.36432472655003784, "learning_rate": 3.7525534389255022e-06, "loss": 0.4484, "step": 16935 }, { "epoch": 2.787700072691363, "grad_norm": 0.30984674335063966, "learning_rate": 3.7520899089009486e-06, "loss": 0.4332, "step": 16936 }, { "epoch": 2.7878646569104797, "grad_norm": 0.7437331781268104, "learning_rate": 3.7516263870372935e-06, "loss": 0.4521, "step": 16937 }, { "epoch": 2.788029241129596, "grad_norm": 0.4071443712642099, "learning_rate": 3.7511628733395913e-06, "loss": 0.4261, "step": 16938 }, { "epoch": 2.7881938253487126, "grad_norm": 0.3537902345034585, "learning_rate": 3.7506993678128995e-06, "loss": 0.4321, "step": 16939 }, { "epoch": 2.788358409567829, "grad_norm": 0.33455054322997646, "learning_rate": 3.7502358704622792e-06, "loss": 0.4215, "step": 16940 }, { "epoch": 2.7885229937869456, "grad_norm": 0.3280338772128488, "learning_rate": 3.7497723812927843e-06, "loss": 0.4377, "step": 16941 }, { "epoch": 2.788687578006062, "grad_norm": 0.34642732226049505, "learning_rate": 3.7493089003094744e-06, "loss": 0.4113, "step": 16942 }, { "epoch": 2.7888521622251785, "grad_norm": 0.34188860854341835, "learning_rate": 3.7488454275174056e-06, "loss": 0.4193, "step": 16943 }, { "epoch": 2.789016746444295, "grad_norm": 0.36554530240010263, "learning_rate": 3.748381962921636e-06, "loss": 0.4051, "step": 16944 }, { "epoch": 2.7891813306634115, "grad_norm": 0.30335350273377804, "learning_rate": 3.747918506527225e-06, "loss": 0.4373, "step": 16945 }, { "epoch": 2.789345914882528, "grad_norm": 0.34263542785101275, "learning_rate": 3.747455058339223e-06, "loss": 0.4121, "step": 16946 }, { "epoch": 2.7895104991016444, "grad_norm": 0.5502643653929845, "learning_rate": 3.7469916183626937e-06, "loss": 0.4322, "step": 16947 }, { "epoch": 2.789675083320761, "grad_norm": 0.3538609617300198, "learning_rate": 3.7465281866026898e-06, "loss": 0.4224, "step": 16948 }, { "epoch": 2.7898396675398773, "grad_norm": 0.290319516274371, "learning_rate": 3.7460647630642714e-06, "loss": 0.4507, "step": 16949 }, { "epoch": 2.790004251758994, "grad_norm": 0.3516323884400021, "learning_rate": 3.7456013477524915e-06, "loss": 0.4471, "step": 16950 }, { "epoch": 2.7901688359781103, "grad_norm": 0.39564626942861164, "learning_rate": 3.74513794067241e-06, "loss": 0.4133, "step": 16951 }, { "epoch": 2.7903334201972267, "grad_norm": 0.3694548077180987, "learning_rate": 3.7446745418290816e-06, "loss": 0.4272, "step": 16952 }, { "epoch": 2.790498004416343, "grad_norm": 0.43330598934010617, "learning_rate": 3.7442111512275626e-06, "loss": 0.4454, "step": 16953 }, { "epoch": 2.7906625886354597, "grad_norm": 0.29778710218876897, "learning_rate": 3.7437477688729105e-06, "loss": 0.4174, "step": 16954 }, { "epoch": 2.790827172854576, "grad_norm": 0.42565006211331385, "learning_rate": 3.7432843947701805e-06, "loss": 0.4039, "step": 16955 }, { "epoch": 2.7909917570736926, "grad_norm": 0.29606168440900626, "learning_rate": 3.7428210289244293e-06, "loss": 0.4128, "step": 16956 }, { "epoch": 2.791156341292809, "grad_norm": 0.3511754169964407, "learning_rate": 3.742357671340713e-06, "loss": 0.435, "step": 16957 }, { "epoch": 2.7913209255119256, "grad_norm": 0.4027590318484724, "learning_rate": 3.7418943220240884e-06, "loss": 0.4387, "step": 16958 }, { "epoch": 2.791485509731042, "grad_norm": 0.3527348623847974, "learning_rate": 3.7414309809796097e-06, "loss": 0.4414, "step": 16959 }, { "epoch": 2.7916500939501585, "grad_norm": 0.33095859367945063, "learning_rate": 3.7409676482123323e-06, "loss": 0.4443, "step": 16960 }, { "epoch": 2.791814678169275, "grad_norm": 0.29026368944239717, "learning_rate": 3.7405043237273143e-06, "loss": 0.4319, "step": 16961 }, { "epoch": 2.7919792623883914, "grad_norm": 0.32769594815474834, "learning_rate": 3.740041007529609e-06, "loss": 0.4364, "step": 16962 }, { "epoch": 2.792143846607508, "grad_norm": 0.38324519531876516, "learning_rate": 3.7395776996242737e-06, "loss": 0.449, "step": 16963 }, { "epoch": 2.7923084308266244, "grad_norm": 0.31201759520893474, "learning_rate": 3.739114400016362e-06, "loss": 0.4356, "step": 16964 }, { "epoch": 2.792473015045741, "grad_norm": 0.32916106873222084, "learning_rate": 3.7386511087109312e-06, "loss": 0.4334, "step": 16965 }, { "epoch": 2.7926375992648573, "grad_norm": 0.2519412393686649, "learning_rate": 3.738187825713037e-06, "loss": 0.4169, "step": 16966 }, { "epoch": 2.792802183483974, "grad_norm": 0.3511877128500632, "learning_rate": 3.7377245510277306e-06, "loss": 0.429, "step": 16967 }, { "epoch": 2.7929667677030903, "grad_norm": 0.2943120983708326, "learning_rate": 3.7372612846600715e-06, "loss": 0.4519, "step": 16968 }, { "epoch": 2.7931313519222067, "grad_norm": 0.31550500106456264, "learning_rate": 3.7367980266151117e-06, "loss": 0.4291, "step": 16969 }, { "epoch": 2.793295936141323, "grad_norm": 0.2594130714411502, "learning_rate": 3.7363347768979084e-06, "loss": 0.4216, "step": 16970 }, { "epoch": 2.7934605203604397, "grad_norm": 0.3066082610071582, "learning_rate": 3.7358715355135136e-06, "loss": 0.4528, "step": 16971 }, { "epoch": 2.793625104579556, "grad_norm": 0.2696462542228448, "learning_rate": 3.735408302466985e-06, "loss": 0.4357, "step": 16972 }, { "epoch": 2.7937896887986726, "grad_norm": 0.3063848075313442, "learning_rate": 3.734945077763375e-06, "loss": 0.4342, "step": 16973 }, { "epoch": 2.7939542730177886, "grad_norm": 0.2953028168617143, "learning_rate": 3.7344818614077394e-06, "loss": 0.427, "step": 16974 }, { "epoch": 2.794118857236905, "grad_norm": 0.4258592757205851, "learning_rate": 3.7340186534051324e-06, "loss": 0.4474, "step": 16975 }, { "epoch": 2.7942834414560216, "grad_norm": 0.27834239294905616, "learning_rate": 3.7335554537606074e-06, "loss": 0.442, "step": 16976 }, { "epoch": 2.794448025675138, "grad_norm": 0.3230209326259235, "learning_rate": 3.7330922624792216e-06, "loss": 0.4276, "step": 16977 }, { "epoch": 2.7946126098942545, "grad_norm": 0.3221333441755021, "learning_rate": 3.732629079566024e-06, "loss": 0.4458, "step": 16978 }, { "epoch": 2.794777194113371, "grad_norm": 0.3676011024234717, "learning_rate": 3.7321659050260737e-06, "loss": 0.4368, "step": 16979 }, { "epoch": 2.7949417783324875, "grad_norm": 0.2712662495086995, "learning_rate": 3.7317027388644214e-06, "loss": 0.4179, "step": 16980 }, { "epoch": 2.795106362551604, "grad_norm": 0.30231518602792223, "learning_rate": 3.731239581086123e-06, "loss": 0.4042, "step": 16981 }, { "epoch": 2.7952709467707204, "grad_norm": 1.6063668657993666, "learning_rate": 3.730776431696231e-06, "loss": 0.4352, "step": 16982 }, { "epoch": 2.795435530989837, "grad_norm": 0.3833217390368779, "learning_rate": 3.7303132906997994e-06, "loss": 0.4282, "step": 16983 }, { "epoch": 2.7956001152089534, "grad_norm": 0.3571528913706697, "learning_rate": 3.7298501581018827e-06, "loss": 0.4333, "step": 16984 }, { "epoch": 2.79576469942807, "grad_norm": 0.4181591768839053, "learning_rate": 3.729387033907533e-06, "loss": 0.4391, "step": 16985 }, { "epoch": 2.7959292836471863, "grad_norm": 0.3195424170629623, "learning_rate": 3.7289239181218048e-06, "loss": 0.4393, "step": 16986 }, { "epoch": 2.7960938678663028, "grad_norm": 0.29082044594392087, "learning_rate": 3.7284608107497503e-06, "loss": 0.4378, "step": 16987 }, { "epoch": 2.7962584520854192, "grad_norm": 0.3120472537805515, "learning_rate": 3.727997711796426e-06, "loss": 0.4211, "step": 16988 }, { "epoch": 2.7964230363045357, "grad_norm": 0.2821648618544632, "learning_rate": 3.7275346212668806e-06, "loss": 0.4056, "step": 16989 }, { "epoch": 2.796587620523652, "grad_norm": 0.46635738511994507, "learning_rate": 3.727071539166169e-06, "loss": 0.4369, "step": 16990 }, { "epoch": 2.7967522047427686, "grad_norm": 0.34837292018747074, "learning_rate": 3.7266084654993447e-06, "loss": 0.4262, "step": 16991 }, { "epoch": 2.796916788961885, "grad_norm": 0.3238602963355004, "learning_rate": 3.7261454002714594e-06, "loss": 0.4305, "step": 16992 }, { "epoch": 2.7970813731810016, "grad_norm": 0.2700471790603966, "learning_rate": 3.725682343487568e-06, "loss": 0.4366, "step": 16993 }, { "epoch": 2.797245957400118, "grad_norm": 0.32729950519911205, "learning_rate": 3.72521929515272e-06, "loss": 0.4444, "step": 16994 }, { "epoch": 2.7974105416192345, "grad_norm": 0.3845240801769013, "learning_rate": 3.7247562552719714e-06, "loss": 0.4285, "step": 16995 }, { "epoch": 2.797575125838351, "grad_norm": 0.303382662774618, "learning_rate": 3.7242932238503742e-06, "loss": 0.4372, "step": 16996 }, { "epoch": 2.7977397100574675, "grad_norm": 0.3544453013233295, "learning_rate": 3.7238302008929765e-06, "loss": 0.448, "step": 16997 }, { "epoch": 2.797904294276584, "grad_norm": 0.376369276483892, "learning_rate": 3.7233671864048373e-06, "loss": 0.452, "step": 16998 }, { "epoch": 2.7980688784957, "grad_norm": 0.28156389454081876, "learning_rate": 3.722904180391002e-06, "loss": 0.4315, "step": 16999 }, { "epoch": 2.7982334627148164, "grad_norm": 0.2676048084949686, "learning_rate": 3.7224411828565293e-06, "loss": 0.4146, "step": 17000 }, { "epoch": 2.798398046933933, "grad_norm": 0.28636233503036546, "learning_rate": 3.7219781938064657e-06, "loss": 0.4321, "step": 17001 }, { "epoch": 2.7985626311530494, "grad_norm": 0.28896095189299925, "learning_rate": 3.721515213245867e-06, "loss": 0.4383, "step": 17002 }, { "epoch": 2.798727215372166, "grad_norm": 0.3737462974148848, "learning_rate": 3.7210522411797837e-06, "loss": 0.4433, "step": 17003 }, { "epoch": 2.7988917995912823, "grad_norm": 0.33586658724596585, "learning_rate": 3.7205892776132666e-06, "loss": 0.4311, "step": 17004 }, { "epoch": 2.799056383810399, "grad_norm": 0.3544658458659146, "learning_rate": 3.720126322551368e-06, "loss": 0.4241, "step": 17005 }, { "epoch": 2.7992209680295153, "grad_norm": 0.3527511763637352, "learning_rate": 3.71966337599914e-06, "loss": 0.4472, "step": 17006 }, { "epoch": 2.7993855522486317, "grad_norm": 0.3095155768711578, "learning_rate": 3.7192004379616358e-06, "loss": 0.4237, "step": 17007 }, { "epoch": 2.799550136467748, "grad_norm": 1.0535976014865251, "learning_rate": 3.7187375084439014e-06, "loss": 0.4342, "step": 17008 }, { "epoch": 2.7997147206868647, "grad_norm": 0.3112590515808455, "learning_rate": 3.7182745874509945e-06, "loss": 0.4138, "step": 17009 }, { "epoch": 2.799879304905981, "grad_norm": 0.33246102620116796, "learning_rate": 3.717811674987962e-06, "loss": 0.4391, "step": 17010 }, { "epoch": 2.8000438891250976, "grad_norm": 0.36308513014633353, "learning_rate": 3.717348771059856e-06, "loss": 0.4302, "step": 17011 }, { "epoch": 2.800208473344214, "grad_norm": 0.43090264422151375, "learning_rate": 3.716885875671728e-06, "loss": 0.4362, "step": 17012 }, { "epoch": 2.8003730575633305, "grad_norm": 0.3120753273054859, "learning_rate": 3.7164229888286287e-06, "loss": 0.412, "step": 17013 }, { "epoch": 2.800537641782447, "grad_norm": 0.3137670501286763, "learning_rate": 3.71596011053561e-06, "loss": 0.4341, "step": 17014 }, { "epoch": 2.8007022260015635, "grad_norm": 0.31210905483561246, "learning_rate": 3.71549724079772e-06, "loss": 0.4248, "step": 17015 }, { "epoch": 2.80086681022068, "grad_norm": 0.3238692595647709, "learning_rate": 3.7150343796200123e-06, "loss": 0.427, "step": 17016 }, { "epoch": 2.8010313944397964, "grad_norm": 0.3889252924366496, "learning_rate": 3.714571527007535e-06, "loss": 0.4369, "step": 17017 }, { "epoch": 2.801195978658913, "grad_norm": 0.3874796764943105, "learning_rate": 3.714108682965341e-06, "loss": 0.4403, "step": 17018 }, { "epoch": 2.8013605628780294, "grad_norm": 0.602048942349635, "learning_rate": 3.7136458474984805e-06, "loss": 0.4419, "step": 17019 }, { "epoch": 2.801525147097146, "grad_norm": 0.4316973975224754, "learning_rate": 3.7131830206120005e-06, "loss": 0.4211, "step": 17020 }, { "epoch": 2.8016897313162623, "grad_norm": 0.3025552469638141, "learning_rate": 3.7127202023109545e-06, "loss": 0.4494, "step": 17021 }, { "epoch": 2.801854315535379, "grad_norm": 0.30613352483498824, "learning_rate": 3.71225739260039e-06, "loss": 0.4203, "step": 17022 }, { "epoch": 2.8020188997544953, "grad_norm": 0.3390169431145897, "learning_rate": 3.71179459148536e-06, "loss": 0.431, "step": 17023 }, { "epoch": 2.8021834839736117, "grad_norm": 0.32462568250008716, "learning_rate": 3.7113317989709114e-06, "loss": 0.4362, "step": 17024 }, { "epoch": 2.802348068192728, "grad_norm": 0.3769661778823802, "learning_rate": 3.710869015062097e-06, "loss": 0.44, "step": 17025 }, { "epoch": 2.8025126524118447, "grad_norm": 0.344819910321778, "learning_rate": 3.7104062397639652e-06, "loss": 0.4211, "step": 17026 }, { "epoch": 2.802677236630961, "grad_norm": 0.28936962174503733, "learning_rate": 3.709943473081563e-06, "loss": 0.4193, "step": 17027 }, { "epoch": 2.8028418208500776, "grad_norm": 0.3059987994253555, "learning_rate": 3.7094807150199453e-06, "loss": 0.4427, "step": 17028 }, { "epoch": 2.803006405069194, "grad_norm": 0.3496518395650141, "learning_rate": 3.7090179655841552e-06, "loss": 0.4169, "step": 17029 }, { "epoch": 2.8031709892883105, "grad_norm": 0.29300593577265654, "learning_rate": 3.7085552247792484e-06, "loss": 0.4374, "step": 17030 }, { "epoch": 2.803335573507427, "grad_norm": 0.3291116815151797, "learning_rate": 3.708092492610269e-06, "loss": 0.4292, "step": 17031 }, { "epoch": 2.8035001577265435, "grad_norm": 0.7757549504963241, "learning_rate": 3.70762976908227e-06, "loss": 0.4292, "step": 17032 }, { "epoch": 2.80366474194566, "grad_norm": 0.3413843417394545, "learning_rate": 3.707167054200298e-06, "loss": 0.4519, "step": 17033 }, { "epoch": 2.8038293261647764, "grad_norm": 0.3344160874415403, "learning_rate": 3.706704347969402e-06, "loss": 0.4169, "step": 17034 }, { "epoch": 2.803993910383893, "grad_norm": 0.3432974498324751, "learning_rate": 3.706241650394633e-06, "loss": 0.4291, "step": 17035 }, { "epoch": 2.8041584946030094, "grad_norm": 0.327641039036722, "learning_rate": 3.7057789614810363e-06, "loss": 0.4333, "step": 17036 }, { "epoch": 2.804323078822126, "grad_norm": 0.3558890376868826, "learning_rate": 3.7053162812336638e-06, "loss": 0.4206, "step": 17037 }, { "epoch": 2.8044876630412423, "grad_norm": 0.30115121607919093, "learning_rate": 3.7048536096575627e-06, "loss": 0.4199, "step": 17038 }, { "epoch": 2.8046522472603588, "grad_norm": 1.5654426664598493, "learning_rate": 3.704390946757783e-06, "loss": 0.436, "step": 17039 }, { "epoch": 2.8048168314794752, "grad_norm": 0.3369589627104296, "learning_rate": 3.7039282925393706e-06, "loss": 0.4327, "step": 17040 }, { "epoch": 2.8049814156985913, "grad_norm": 0.2708386930655964, "learning_rate": 3.7034656470073745e-06, "loss": 0.4139, "step": 17041 }, { "epoch": 2.8051459999177077, "grad_norm": 0.31847056505151394, "learning_rate": 3.7030030101668444e-06, "loss": 0.4155, "step": 17042 }, { "epoch": 2.805310584136824, "grad_norm": 0.36442981805925195, "learning_rate": 3.702540382022826e-06, "loss": 0.4248, "step": 17043 }, { "epoch": 2.8054751683559407, "grad_norm": 0.29620262128536895, "learning_rate": 3.70207776258037e-06, "loss": 0.4361, "step": 17044 }, { "epoch": 2.805639752575057, "grad_norm": 0.9831257028070958, "learning_rate": 3.7016151518445214e-06, "loss": 0.4267, "step": 17045 }, { "epoch": 2.8058043367941736, "grad_norm": 0.3674522650750558, "learning_rate": 3.7011525498203315e-06, "loss": 0.4439, "step": 17046 }, { "epoch": 2.80596892101329, "grad_norm": 0.44696920449614114, "learning_rate": 3.7006899565128467e-06, "loss": 0.4549, "step": 17047 }, { "epoch": 2.8061335052324066, "grad_norm": 0.48658489052050297, "learning_rate": 3.7002273719271106e-06, "loss": 0.4421, "step": 17048 }, { "epoch": 2.806298089451523, "grad_norm": 0.28648773556330664, "learning_rate": 3.699764796068178e-06, "loss": 0.4522, "step": 17049 }, { "epoch": 2.8064626736706395, "grad_norm": 0.6411059808424378, "learning_rate": 3.69930222894109e-06, "loss": 0.4365, "step": 17050 }, { "epoch": 2.806627257889756, "grad_norm": 0.33897861852767236, "learning_rate": 3.6988396705508977e-06, "loss": 0.4445, "step": 17051 }, { "epoch": 2.8067918421088724, "grad_norm": 0.3062163499760631, "learning_rate": 3.6983771209026466e-06, "loss": 0.4368, "step": 17052 }, { "epoch": 2.806956426327989, "grad_norm": 0.29910086203118597, "learning_rate": 3.6979145800013855e-06, "loss": 0.4156, "step": 17053 }, { "epoch": 2.8071210105471054, "grad_norm": 0.33507309223228005, "learning_rate": 3.6974520478521587e-06, "loss": 0.4207, "step": 17054 }, { "epoch": 2.807285594766222, "grad_norm": 0.35606412297007517, "learning_rate": 3.696989524460016e-06, "loss": 0.411, "step": 17055 }, { "epoch": 2.8074501789853383, "grad_norm": 0.4319969872987501, "learning_rate": 3.6965270098300035e-06, "loss": 0.4558, "step": 17056 }, { "epoch": 2.807614763204455, "grad_norm": 0.3174102870346743, "learning_rate": 3.6960645039671665e-06, "loss": 0.4363, "step": 17057 }, { "epoch": 2.8077793474235713, "grad_norm": 0.26966269521273795, "learning_rate": 3.6956020068765547e-06, "loss": 0.4319, "step": 17058 }, { "epoch": 2.8079439316426877, "grad_norm": 0.3015423359547079, "learning_rate": 3.6951395185632096e-06, "loss": 0.4293, "step": 17059 }, { "epoch": 2.808108515861804, "grad_norm": 0.39543701173947554, "learning_rate": 3.6946770390321837e-06, "loss": 0.4475, "step": 17060 }, { "epoch": 2.8082731000809207, "grad_norm": 0.29558063112252037, "learning_rate": 3.694214568288518e-06, "loss": 0.4326, "step": 17061 }, { "epoch": 2.808437684300037, "grad_norm": 0.29241953332047743, "learning_rate": 3.693752106337264e-06, "loss": 0.4463, "step": 17062 }, { "epoch": 2.8086022685191536, "grad_norm": 0.36922966517747435, "learning_rate": 3.6932896531834643e-06, "loss": 0.4424, "step": 17063 }, { "epoch": 2.80876685273827, "grad_norm": 0.34813899240461527, "learning_rate": 3.692827208832165e-06, "loss": 0.4427, "step": 17064 }, { "epoch": 2.8089314369573866, "grad_norm": 0.2882741202352136, "learning_rate": 3.6923647732884143e-06, "loss": 0.4429, "step": 17065 }, { "epoch": 2.8090960211765026, "grad_norm": 0.28096012817176935, "learning_rate": 3.6919023465572558e-06, "loss": 0.4275, "step": 17066 }, { "epoch": 2.809260605395619, "grad_norm": 0.3340488275443412, "learning_rate": 3.6914399286437373e-06, "loss": 0.4357, "step": 17067 }, { "epoch": 2.8094251896147355, "grad_norm": 0.3125108279274556, "learning_rate": 3.6909775195529026e-06, "loss": 0.4279, "step": 17068 }, { "epoch": 2.809589773833852, "grad_norm": 0.2881721468616627, "learning_rate": 3.6905151192898004e-06, "loss": 0.4254, "step": 17069 }, { "epoch": 2.8097543580529685, "grad_norm": 0.33957698439504513, "learning_rate": 3.690052727859473e-06, "loss": 0.4403, "step": 17070 }, { "epoch": 2.809918942272085, "grad_norm": 0.3042511204962381, "learning_rate": 3.6895903452669655e-06, "loss": 0.4062, "step": 17071 }, { "epoch": 2.8100835264912014, "grad_norm": 0.3131632403271567, "learning_rate": 3.689127971517327e-06, "loss": 0.4254, "step": 17072 }, { "epoch": 2.810248110710318, "grad_norm": 0.32292931932722746, "learning_rate": 3.6886656066155985e-06, "loss": 0.4297, "step": 17073 }, { "epoch": 2.8104126949294344, "grad_norm": 0.3258889677088053, "learning_rate": 3.6882032505668283e-06, "loss": 0.4245, "step": 17074 }, { "epoch": 2.810577279148551, "grad_norm": 0.3213719613226214, "learning_rate": 3.687740903376059e-06, "loss": 0.4184, "step": 17075 }, { "epoch": 2.8107418633676673, "grad_norm": 0.28843989368418793, "learning_rate": 3.687278565048338e-06, "loss": 0.423, "step": 17076 }, { "epoch": 2.8109064475867838, "grad_norm": 0.382218246035676, "learning_rate": 3.6868162355887096e-06, "loss": 0.4173, "step": 17077 }, { "epoch": 2.8110710318059002, "grad_norm": 0.3187230594758411, "learning_rate": 3.686353915002216e-06, "loss": 0.4292, "step": 17078 }, { "epoch": 2.8112356160250167, "grad_norm": 0.28468997295293, "learning_rate": 3.685891603293906e-06, "loss": 0.4286, "step": 17079 }, { "epoch": 2.811400200244133, "grad_norm": 0.325067665311539, "learning_rate": 3.685429300468819e-06, "loss": 0.4266, "step": 17080 }, { "epoch": 2.8115647844632496, "grad_norm": 0.5363543466171763, "learning_rate": 3.6849670065320047e-06, "loss": 0.4149, "step": 17081 }, { "epoch": 2.811729368682366, "grad_norm": 0.36713578219493537, "learning_rate": 3.684504721488504e-06, "loss": 0.4214, "step": 17082 }, { "epoch": 2.8118939529014826, "grad_norm": 0.3556618817554257, "learning_rate": 3.684042445343363e-06, "loss": 0.4279, "step": 17083 }, { "epoch": 2.812058537120599, "grad_norm": 0.3790316644542553, "learning_rate": 3.683580178101626e-06, "loss": 0.4204, "step": 17084 }, { "epoch": 2.8122231213397155, "grad_norm": 0.44054579314572406, "learning_rate": 3.6831179197683345e-06, "loss": 0.4296, "step": 17085 }, { "epoch": 2.812387705558832, "grad_norm": 0.3110667703692109, "learning_rate": 3.682655670348535e-06, "loss": 0.4293, "step": 17086 }, { "epoch": 2.8125522897779485, "grad_norm": 0.33293568691450487, "learning_rate": 3.68219342984727e-06, "loss": 0.4437, "step": 17087 }, { "epoch": 2.812716873997065, "grad_norm": 0.47825615572547775, "learning_rate": 3.6817311982695857e-06, "loss": 0.4353, "step": 17088 }, { "epoch": 2.8128814582161814, "grad_norm": 0.36050258840546906, "learning_rate": 3.6812689756205207e-06, "loss": 0.4256, "step": 17089 }, { "epoch": 2.813046042435298, "grad_norm": 0.2679257712308787, "learning_rate": 3.6808067619051253e-06, "loss": 0.4215, "step": 17090 }, { "epoch": 2.8132106266544143, "grad_norm": 0.3498276050713189, "learning_rate": 3.6803445571284385e-06, "loss": 0.4165, "step": 17091 }, { "epoch": 2.813375210873531, "grad_norm": 0.29415549838126503, "learning_rate": 3.679882361295504e-06, "loss": 0.425, "step": 17092 }, { "epoch": 2.8135397950926473, "grad_norm": 0.3274825930326004, "learning_rate": 3.679420174411367e-06, "loss": 0.4117, "step": 17093 }, { "epoch": 2.8137043793117638, "grad_norm": 0.380254456127662, "learning_rate": 3.678957996481068e-06, "loss": 0.4653, "step": 17094 }, { "epoch": 2.8138689635308802, "grad_norm": 0.38134174916262903, "learning_rate": 3.6784958275096528e-06, "loss": 0.414, "step": 17095 }, { "epoch": 2.8140335477499967, "grad_norm": 0.2777538832839576, "learning_rate": 3.6780336675021627e-06, "loss": 0.4113, "step": 17096 }, { "epoch": 2.814198131969113, "grad_norm": 0.39757878901837895, "learning_rate": 3.6775715164636414e-06, "loss": 0.4386, "step": 17097 }, { "epoch": 2.8143627161882296, "grad_norm": 0.2996085061697317, "learning_rate": 3.677109374399131e-06, "loss": 0.4326, "step": 17098 }, { "epoch": 2.814527300407346, "grad_norm": 0.3132830922240271, "learning_rate": 3.676647241313676e-06, "loss": 0.4243, "step": 17099 }, { "epoch": 2.8146918846264626, "grad_norm": 0.308012152473725, "learning_rate": 3.6761851172123183e-06, "loss": 0.4346, "step": 17100 }, { "epoch": 2.814856468845579, "grad_norm": 0.28324542469615294, "learning_rate": 3.6757230021000977e-06, "loss": 0.4333, "step": 17101 }, { "epoch": 2.8150210530646955, "grad_norm": 0.4152341168327462, "learning_rate": 3.6752608959820596e-06, "loss": 0.4325, "step": 17102 }, { "epoch": 2.815185637283812, "grad_norm": 0.30608123523114217, "learning_rate": 3.674798798863245e-06, "loss": 0.4336, "step": 17103 }, { "epoch": 2.8153502215029285, "grad_norm": 0.3394719081879419, "learning_rate": 3.674336710748697e-06, "loss": 0.4335, "step": 17104 }, { "epoch": 2.815514805722045, "grad_norm": 0.3288775694862542, "learning_rate": 3.6738746316434567e-06, "loss": 0.4478, "step": 17105 }, { "epoch": 2.8156793899411614, "grad_norm": 0.4548371543200483, "learning_rate": 3.673412561552568e-06, "loss": 0.4359, "step": 17106 }, { "epoch": 2.815843974160278, "grad_norm": 0.3064065707396901, "learning_rate": 3.672950500481072e-06, "loss": 0.4131, "step": 17107 }, { "epoch": 2.816008558379394, "grad_norm": 0.37563674605906305, "learning_rate": 3.6724884484340074e-06, "loss": 0.4072, "step": 17108 }, { "epoch": 2.8161731425985104, "grad_norm": 0.31215206312051025, "learning_rate": 3.6720264054164214e-06, "loss": 0.4345, "step": 17109 }, { "epoch": 2.816337726817627, "grad_norm": 0.29020191266348566, "learning_rate": 3.67156437143335e-06, "loss": 0.4291, "step": 17110 }, { "epoch": 2.8165023110367433, "grad_norm": 0.3353379042670644, "learning_rate": 3.6711023464898397e-06, "loss": 0.4314, "step": 17111 }, { "epoch": 2.81666689525586, "grad_norm": 0.31147570248059336, "learning_rate": 3.670640330590929e-06, "loss": 0.4281, "step": 17112 }, { "epoch": 2.8168314794749763, "grad_norm": 0.3117734222192966, "learning_rate": 3.67017832374166e-06, "loss": 0.4448, "step": 17113 }, { "epoch": 2.8169960636940927, "grad_norm": 0.4132043734099047, "learning_rate": 3.6697163259470743e-06, "loss": 0.4474, "step": 17114 }, { "epoch": 2.817160647913209, "grad_norm": 0.4119011796010191, "learning_rate": 3.6692543372122123e-06, "loss": 0.4319, "step": 17115 }, { "epoch": 2.8173252321323257, "grad_norm": 0.3155796783904074, "learning_rate": 3.668792357542116e-06, "loss": 0.4182, "step": 17116 }, { "epoch": 2.817489816351442, "grad_norm": 0.30699109023803706, "learning_rate": 3.6683303869418244e-06, "loss": 0.4266, "step": 17117 }, { "epoch": 2.8176544005705586, "grad_norm": 0.6887270606990578, "learning_rate": 3.667868425416381e-06, "loss": 0.4262, "step": 17118 }, { "epoch": 2.817818984789675, "grad_norm": 0.2751247994882229, "learning_rate": 3.6674064729708247e-06, "loss": 0.4238, "step": 17119 }, { "epoch": 2.8179835690087915, "grad_norm": 0.459838495793609, "learning_rate": 3.666944529610198e-06, "loss": 0.4364, "step": 17120 }, { "epoch": 2.818148153227908, "grad_norm": 0.31715489732174246, "learning_rate": 3.666482595339539e-06, "loss": 0.4115, "step": 17121 }, { "epoch": 2.8183127374470245, "grad_norm": 0.3810854234866949, "learning_rate": 3.6660206701638885e-06, "loss": 0.435, "step": 17122 }, { "epoch": 2.818477321666141, "grad_norm": 0.2980712904841457, "learning_rate": 3.665558754088289e-06, "loss": 0.4293, "step": 17123 }, { "epoch": 2.8186419058852574, "grad_norm": 0.31554529600333936, "learning_rate": 3.6650968471177778e-06, "loss": 0.4396, "step": 17124 }, { "epoch": 2.818806490104374, "grad_norm": 0.3526745457275579, "learning_rate": 3.664634949257398e-06, "loss": 0.4386, "step": 17125 }, { "epoch": 2.8189710743234904, "grad_norm": 0.26955385001081505, "learning_rate": 3.6641730605121874e-06, "loss": 0.4217, "step": 17126 }, { "epoch": 2.819135658542607, "grad_norm": 0.3302344739823321, "learning_rate": 3.663711180887187e-06, "loss": 0.4473, "step": 17127 }, { "epoch": 2.8193002427617233, "grad_norm": 0.38799946904704896, "learning_rate": 3.663249310387438e-06, "loss": 0.4147, "step": 17128 }, { "epoch": 2.8194648269808398, "grad_norm": 0.36345893830202847, "learning_rate": 3.6627874490179753e-06, "loss": 0.4215, "step": 17129 }, { "epoch": 2.8196294111999562, "grad_norm": 0.3921152616720411, "learning_rate": 3.6623255967838453e-06, "loss": 0.4397, "step": 17130 }, { "epoch": 2.8197939954190727, "grad_norm": 0.3370515743146148, "learning_rate": 3.6618637536900817e-06, "loss": 0.423, "step": 17131 }, { "epoch": 2.819958579638189, "grad_norm": 0.298672711834632, "learning_rate": 3.6614019197417274e-06, "loss": 0.4148, "step": 17132 }, { "epoch": 2.820123163857305, "grad_norm": 0.3674852867505034, "learning_rate": 3.6609400949438195e-06, "loss": 0.45, "step": 17133 }, { "epoch": 2.8202877480764217, "grad_norm": 0.4480024381728986, "learning_rate": 3.6604782793013998e-06, "loss": 0.422, "step": 17134 }, { "epoch": 2.820452332295538, "grad_norm": 0.43238289475601743, "learning_rate": 3.660016472819506e-06, "loss": 0.4339, "step": 17135 }, { "epoch": 2.8206169165146546, "grad_norm": 0.26018647672172557, "learning_rate": 3.6595546755031758e-06, "loss": 0.4051, "step": 17136 }, { "epoch": 2.820781500733771, "grad_norm": 0.27947140164193335, "learning_rate": 3.659092887357451e-06, "loss": 0.4224, "step": 17137 }, { "epoch": 2.8209460849528876, "grad_norm": 0.2779553942761288, "learning_rate": 3.6586311083873674e-06, "loss": 0.4294, "step": 17138 }, { "epoch": 2.821110669172004, "grad_norm": 0.3123836058428518, "learning_rate": 3.658169338597968e-06, "loss": 0.3996, "step": 17139 }, { "epoch": 2.8212752533911205, "grad_norm": 0.29982736494768003, "learning_rate": 3.657707577994286e-06, "loss": 0.435, "step": 17140 }, { "epoch": 2.821439837610237, "grad_norm": 0.2998633135173355, "learning_rate": 3.6572458265813648e-06, "loss": 0.4326, "step": 17141 }, { "epoch": 2.8216044218293534, "grad_norm": 1.2875260936074302, "learning_rate": 3.6567840843642383e-06, "loss": 0.4414, "step": 17142 }, { "epoch": 2.82176900604847, "grad_norm": 0.36380125041897954, "learning_rate": 3.6563223513479505e-06, "loss": 0.4419, "step": 17143 }, { "epoch": 2.8219335902675864, "grad_norm": 0.3031862883250551, "learning_rate": 3.6558606275375356e-06, "loss": 0.4327, "step": 17144 }, { "epoch": 2.822098174486703, "grad_norm": 0.31900799897958043, "learning_rate": 3.655398912938031e-06, "loss": 0.4384, "step": 17145 }, { "epoch": 2.8222627587058193, "grad_norm": 0.3025173566880341, "learning_rate": 3.654937207554478e-06, "loss": 0.4343, "step": 17146 }, { "epoch": 2.822427342924936, "grad_norm": 0.2663358722032339, "learning_rate": 3.6544755113919113e-06, "loss": 0.4463, "step": 17147 }, { "epoch": 2.8225919271440523, "grad_norm": 0.3513917982076639, "learning_rate": 3.654013824455372e-06, "loss": 0.4307, "step": 17148 }, { "epoch": 2.8227565113631687, "grad_norm": 0.5643742825543426, "learning_rate": 3.653552146749895e-06, "loss": 0.433, "step": 17149 }, { "epoch": 2.822921095582285, "grad_norm": 0.3137193911706826, "learning_rate": 3.65309047828052e-06, "loss": 0.4303, "step": 17150 }, { "epoch": 2.8230856798014017, "grad_norm": 0.846721645930326, "learning_rate": 3.652628819052285e-06, "loss": 0.4398, "step": 17151 }, { "epoch": 2.823250264020518, "grad_norm": 0.35462356857183863, "learning_rate": 3.652167169070224e-06, "loss": 0.4333, "step": 17152 }, { "epoch": 2.8234148482396346, "grad_norm": 0.2740207894520864, "learning_rate": 3.6517055283393776e-06, "loss": 0.4125, "step": 17153 }, { "epoch": 2.823579432458751, "grad_norm": 0.3036748323797673, "learning_rate": 3.6512438968647807e-06, "loss": 0.4514, "step": 17154 }, { "epoch": 2.8237440166778676, "grad_norm": 0.29306246669216246, "learning_rate": 3.6507822746514722e-06, "loss": 0.4343, "step": 17155 }, { "epoch": 2.823908600896984, "grad_norm": 0.2752629222420503, "learning_rate": 3.650320661704488e-06, "loss": 0.4429, "step": 17156 }, { "epoch": 2.8240731851161005, "grad_norm": 0.30854742164160615, "learning_rate": 3.6498590580288672e-06, "loss": 0.4468, "step": 17157 }, { "epoch": 2.824237769335217, "grad_norm": 0.31442516825330213, "learning_rate": 3.649397463629646e-06, "loss": 0.4469, "step": 17158 }, { "epoch": 2.8244023535543334, "grad_norm": 0.31816277090241346, "learning_rate": 3.6489358785118563e-06, "loss": 0.4317, "step": 17159 }, { "epoch": 2.82456693777345, "grad_norm": 0.3820945237326531, "learning_rate": 3.6484743026805423e-06, "loss": 0.4237, "step": 17160 }, { "epoch": 2.8247315219925664, "grad_norm": 0.3340528223521202, "learning_rate": 3.648012736140734e-06, "loss": 0.429, "step": 17161 }, { "epoch": 2.824896106211683, "grad_norm": 0.2914111218959242, "learning_rate": 3.6475511788974735e-06, "loss": 0.4571, "step": 17162 }, { "epoch": 2.8250606904307993, "grad_norm": 0.31147738717265716, "learning_rate": 3.6470896309557925e-06, "loss": 0.4219, "step": 17163 }, { "epoch": 2.825225274649916, "grad_norm": 0.31456881040370577, "learning_rate": 3.6466280923207297e-06, "loss": 0.4485, "step": 17164 }, { "epoch": 2.8253898588690323, "grad_norm": 0.2519660907558552, "learning_rate": 3.6461665629973207e-06, "loss": 0.43, "step": 17165 }, { "epoch": 2.8255544430881487, "grad_norm": 0.32504354549709374, "learning_rate": 3.6457050429906e-06, "loss": 0.4183, "step": 17166 }, { "epoch": 2.825719027307265, "grad_norm": 0.3063441324912973, "learning_rate": 3.6452435323056063e-06, "loss": 0.4233, "step": 17167 }, { "epoch": 2.8258836115263817, "grad_norm": 0.2825098525755312, "learning_rate": 3.644782030947373e-06, "loss": 0.4453, "step": 17168 }, { "epoch": 2.826048195745498, "grad_norm": 0.3196720940852849, "learning_rate": 3.6443205389209384e-06, "loss": 0.4341, "step": 17169 }, { "epoch": 2.8262127799646146, "grad_norm": 0.30787067603334795, "learning_rate": 3.6438590562313345e-06, "loss": 0.4246, "step": 17170 }, { "epoch": 2.826377364183731, "grad_norm": 0.3398306678988993, "learning_rate": 3.643397582883602e-06, "loss": 0.428, "step": 17171 }, { "epoch": 2.8265419484028476, "grad_norm": 0.6485214365786198, "learning_rate": 3.642936118882771e-06, "loss": 0.424, "step": 17172 }, { "epoch": 2.826706532621964, "grad_norm": 0.287575292203491, "learning_rate": 3.642474664233879e-06, "loss": 0.4514, "step": 17173 }, { "epoch": 2.82687111684108, "grad_norm": 0.3992671162860481, "learning_rate": 3.6420132189419624e-06, "loss": 0.4402, "step": 17174 }, { "epoch": 2.8270357010601965, "grad_norm": 0.29128519496400274, "learning_rate": 3.641551783012054e-06, "loss": 0.4127, "step": 17175 }, { "epoch": 2.827200285279313, "grad_norm": 0.33820307623461265, "learning_rate": 3.64109035644919e-06, "loss": 0.4298, "step": 17176 }, { "epoch": 2.8273648694984295, "grad_norm": 0.5258143016939788, "learning_rate": 3.640628939258405e-06, "loss": 0.4313, "step": 17177 }, { "epoch": 2.827529453717546, "grad_norm": 0.33421939150531105, "learning_rate": 3.640167531444735e-06, "loss": 0.4433, "step": 17178 }, { "epoch": 2.8276940379366624, "grad_norm": 0.3489542375657524, "learning_rate": 3.639706133013212e-06, "loss": 0.421, "step": 17179 }, { "epoch": 2.827858622155779, "grad_norm": 0.3365482386357486, "learning_rate": 3.6392447439688744e-06, "loss": 0.4159, "step": 17180 }, { "epoch": 2.8280232063748953, "grad_norm": 0.33791730236571293, "learning_rate": 3.638783364316755e-06, "loss": 0.4452, "step": 17181 }, { "epoch": 2.828187790594012, "grad_norm": 0.3057041855992184, "learning_rate": 3.6383219940618863e-06, "loss": 0.4048, "step": 17182 }, { "epoch": 2.8283523748131283, "grad_norm": 0.33764982152904666, "learning_rate": 3.6378606332093046e-06, "loss": 0.4338, "step": 17183 }, { "epoch": 2.8285169590322448, "grad_norm": 0.3030336775119331, "learning_rate": 3.6373992817640427e-06, "loss": 0.4195, "step": 17184 }, { "epoch": 2.8286815432513612, "grad_norm": 0.3335775815830645, "learning_rate": 3.636937939731137e-06, "loss": 0.4098, "step": 17185 }, { "epoch": 2.8288461274704777, "grad_norm": 0.2719457502171315, "learning_rate": 3.6364766071156185e-06, "loss": 0.4247, "step": 17186 }, { "epoch": 2.829010711689594, "grad_norm": 0.2997921615260896, "learning_rate": 3.636015283922524e-06, "loss": 0.444, "step": 17187 }, { "epoch": 2.8291752959087106, "grad_norm": 0.3194612110051689, "learning_rate": 3.635553970156886e-06, "loss": 0.445, "step": 17188 }, { "epoch": 2.829339880127827, "grad_norm": 0.313158008599718, "learning_rate": 3.6350926658237367e-06, "loss": 0.4327, "step": 17189 }, { "epoch": 2.8295044643469436, "grad_norm": 0.28656355456574967, "learning_rate": 3.634631370928113e-06, "loss": 0.4306, "step": 17190 }, { "epoch": 2.82966904856606, "grad_norm": 0.49093999604571625, "learning_rate": 3.6341700854750433e-06, "loss": 0.4238, "step": 17191 }, { "epoch": 2.8298336327851765, "grad_norm": 0.312583077459305, "learning_rate": 3.6337088094695677e-06, "loss": 0.4267, "step": 17192 }, { "epoch": 2.829998217004293, "grad_norm": 0.433786744476954, "learning_rate": 3.6332475429167125e-06, "loss": 0.4285, "step": 17193 }, { "epoch": 2.8301628012234095, "grad_norm": 0.3632006308997577, "learning_rate": 3.632786285821517e-06, "loss": 0.418, "step": 17194 }, { "epoch": 2.830327385442526, "grad_norm": 0.30953167349296806, "learning_rate": 3.6323250381890107e-06, "loss": 0.4368, "step": 17195 }, { "epoch": 2.8304919696616424, "grad_norm": 0.31743954151520987, "learning_rate": 3.6318638000242263e-06, "loss": 0.4372, "step": 17196 }, { "epoch": 2.830656553880759, "grad_norm": 0.30182454303424505, "learning_rate": 3.6314025713321985e-06, "loss": 0.4286, "step": 17197 }, { "epoch": 2.8308211380998753, "grad_norm": 0.38935321783769483, "learning_rate": 3.630941352117958e-06, "loss": 0.4297, "step": 17198 }, { "epoch": 2.8309857223189914, "grad_norm": 0.30084220698938574, "learning_rate": 3.63048014238654e-06, "loss": 0.4244, "step": 17199 }, { "epoch": 2.831150306538108, "grad_norm": 0.3576438574713621, "learning_rate": 3.6300189421429752e-06, "loss": 0.414, "step": 17200 }, { "epoch": 2.8313148907572243, "grad_norm": 0.8043356118524135, "learning_rate": 3.6295577513922975e-06, "loss": 0.4545, "step": 17201 }, { "epoch": 2.831479474976341, "grad_norm": 0.4242885589241204, "learning_rate": 3.6290965701395374e-06, "loss": 0.4416, "step": 17202 }, { "epoch": 2.8316440591954573, "grad_norm": 0.2974338529965238, "learning_rate": 3.6286353983897272e-06, "loss": 0.4296, "step": 17203 }, { "epoch": 2.8318086434145737, "grad_norm": 0.3484590649079067, "learning_rate": 3.6281742361479004e-06, "loss": 0.429, "step": 17204 }, { "epoch": 2.83197322763369, "grad_norm": 0.3206797393939164, "learning_rate": 3.627713083419088e-06, "loss": 0.4444, "step": 17205 }, { "epoch": 2.8321378118528067, "grad_norm": 0.32626885839653796, "learning_rate": 3.627251940208323e-06, "loss": 0.4112, "step": 17206 }, { "epoch": 2.832302396071923, "grad_norm": 0.31874147694777155, "learning_rate": 3.626790806520635e-06, "loss": 0.4244, "step": 17207 }, { "epoch": 2.8324669802910396, "grad_norm": 0.35748771708537413, "learning_rate": 3.6263296823610594e-06, "loss": 0.4601, "step": 17208 }, { "epoch": 2.832631564510156, "grad_norm": 0.2769410845748084, "learning_rate": 3.6258685677346256e-06, "loss": 0.4284, "step": 17209 }, { "epoch": 2.8327961487292725, "grad_norm": 0.34461088576620236, "learning_rate": 3.6254074626463626e-06, "loss": 0.4311, "step": 17210 }, { "epoch": 2.832960732948389, "grad_norm": 0.286261312823275, "learning_rate": 3.624946367101307e-06, "loss": 0.4372, "step": 17211 }, { "epoch": 2.8331253171675055, "grad_norm": 0.3474396602887607, "learning_rate": 3.624485281104485e-06, "loss": 0.4219, "step": 17212 }, { "epoch": 2.833289901386622, "grad_norm": 0.2918359905236362, "learning_rate": 3.6240242046609327e-06, "loss": 0.4382, "step": 17213 }, { "epoch": 2.8334544856057384, "grad_norm": 0.29510176438923397, "learning_rate": 3.6235631377756767e-06, "loss": 0.4456, "step": 17214 }, { "epoch": 2.833619069824855, "grad_norm": 0.3666142725303603, "learning_rate": 3.623102080453751e-06, "loss": 0.4147, "step": 17215 }, { "epoch": 2.8337836540439714, "grad_norm": 0.3449592834790887, "learning_rate": 3.6226410327001854e-06, "loss": 0.4254, "step": 17216 }, { "epoch": 2.833948238263088, "grad_norm": 0.474561524529995, "learning_rate": 3.62217999452001e-06, "loss": 0.4259, "step": 17217 }, { "epoch": 2.8341128224822043, "grad_norm": 0.3081507109268821, "learning_rate": 3.6217189659182566e-06, "loss": 0.4151, "step": 17218 }, { "epoch": 2.834277406701321, "grad_norm": 0.3440200959668479, "learning_rate": 3.621257946899955e-06, "loss": 0.4122, "step": 17219 }, { "epoch": 2.8344419909204372, "grad_norm": 0.292391216530612, "learning_rate": 3.6207969374701375e-06, "loss": 0.448, "step": 17220 }, { "epoch": 2.8346065751395537, "grad_norm": 0.26545459000328425, "learning_rate": 3.6203359376338298e-06, "loss": 0.4372, "step": 17221 }, { "epoch": 2.83477115935867, "grad_norm": 0.33534217289780477, "learning_rate": 3.6198749473960685e-06, "loss": 0.4374, "step": 17222 }, { "epoch": 2.8349357435777867, "grad_norm": 0.35976061171612134, "learning_rate": 3.6194139667618774e-06, "loss": 0.4318, "step": 17223 }, { "epoch": 2.835100327796903, "grad_norm": 0.5249155215893581, "learning_rate": 3.618952995736293e-06, "loss": 0.4324, "step": 17224 }, { "epoch": 2.8352649120160196, "grad_norm": 0.3894814558327808, "learning_rate": 3.6184920343243406e-06, "loss": 0.4212, "step": 17225 }, { "epoch": 2.835429496235136, "grad_norm": 0.2899878228648192, "learning_rate": 3.6180310825310505e-06, "loss": 0.4332, "step": 17226 }, { "epoch": 2.8355940804542525, "grad_norm": 0.2955486946134394, "learning_rate": 3.617570140361454e-06, "loss": 0.4232, "step": 17227 }, { "epoch": 2.835758664673369, "grad_norm": 0.4856743837521025, "learning_rate": 3.6171092078205794e-06, "loss": 0.4292, "step": 17228 }, { "epoch": 2.8359232488924855, "grad_norm": 0.2852401565044446, "learning_rate": 3.6166482849134573e-06, "loss": 0.4355, "step": 17229 }, { "epoch": 2.836087833111602, "grad_norm": 0.4321899265913684, "learning_rate": 3.6161873716451158e-06, "loss": 0.4199, "step": 17230 }, { "epoch": 2.8362524173307184, "grad_norm": 0.33996527413233607, "learning_rate": 3.6157264680205853e-06, "loss": 0.4311, "step": 17231 }, { "epoch": 2.836417001549835, "grad_norm": 0.3379685630381169, "learning_rate": 3.6152655740448963e-06, "loss": 0.437, "step": 17232 }, { "epoch": 2.8365815857689514, "grad_norm": 0.36981142619782814, "learning_rate": 3.6148046897230744e-06, "loss": 0.4246, "step": 17233 }, { "epoch": 2.836746169988068, "grad_norm": 0.3729699046682807, "learning_rate": 3.6143438150601513e-06, "loss": 0.4486, "step": 17234 }, { "epoch": 2.8369107542071843, "grad_norm": 0.4633154252788908, "learning_rate": 3.6138829500611537e-06, "loss": 0.4265, "step": 17235 }, { "epoch": 2.8370753384263008, "grad_norm": 0.44586719350424964, "learning_rate": 3.6134220947311134e-06, "loss": 0.4336, "step": 17236 }, { "epoch": 2.8372399226454172, "grad_norm": 0.2940424118350281, "learning_rate": 3.6129612490750555e-06, "loss": 0.4263, "step": 17237 }, { "epoch": 2.8374045068645337, "grad_norm": 0.25917279093973444, "learning_rate": 3.612500413098012e-06, "loss": 0.4268, "step": 17238 }, { "epoch": 2.83756909108365, "grad_norm": 0.31873135212679354, "learning_rate": 3.612039586805011e-06, "loss": 0.4382, "step": 17239 }, { "epoch": 2.8377336753027667, "grad_norm": 0.30085694031050964, "learning_rate": 3.611578770201076e-06, "loss": 0.4152, "step": 17240 }, { "epoch": 2.8378982595218827, "grad_norm": 0.3951723660102914, "learning_rate": 3.611117963291242e-06, "loss": 0.4378, "step": 17241 }, { "epoch": 2.838062843740999, "grad_norm": 0.36034714559106945, "learning_rate": 3.6106571660805315e-06, "loss": 0.4363, "step": 17242 }, { "epoch": 2.8382274279601156, "grad_norm": 0.34931684788064654, "learning_rate": 3.6101963785739774e-06, "loss": 0.417, "step": 17243 }, { "epoch": 2.838392012179232, "grad_norm": 0.36458261655129637, "learning_rate": 3.609735600776604e-06, "loss": 0.4313, "step": 17244 }, { "epoch": 2.8385565963983486, "grad_norm": 0.30291655119692484, "learning_rate": 3.609274832693441e-06, "loss": 0.432, "step": 17245 }, { "epoch": 2.838721180617465, "grad_norm": 0.25942457772664856, "learning_rate": 3.608814074329516e-06, "loss": 0.4488, "step": 17246 }, { "epoch": 2.8388857648365815, "grad_norm": 0.3107357539755343, "learning_rate": 3.6083533256898546e-06, "loss": 0.4363, "step": 17247 }, { "epoch": 2.839050349055698, "grad_norm": 0.38995430767614275, "learning_rate": 3.607892586779487e-06, "loss": 0.4233, "step": 17248 }, { "epoch": 2.8392149332748144, "grad_norm": 0.35307916284665136, "learning_rate": 3.6074318576034388e-06, "loss": 0.4271, "step": 17249 }, { "epoch": 2.839379517493931, "grad_norm": 0.3120446790305785, "learning_rate": 3.6069711381667385e-06, "loss": 0.4256, "step": 17250 }, { "epoch": 2.8395441017130474, "grad_norm": 0.4791097839923694, "learning_rate": 3.606510428474412e-06, "loss": 0.4365, "step": 17251 }, { "epoch": 2.839708685932164, "grad_norm": 0.3135030321580087, "learning_rate": 3.606049728531489e-06, "loss": 0.4154, "step": 17252 }, { "epoch": 2.8398732701512803, "grad_norm": 0.3228397935016283, "learning_rate": 3.6055890383429934e-06, "loss": 0.4498, "step": 17253 }, { "epoch": 2.840037854370397, "grad_norm": 0.5532350580857291, "learning_rate": 3.6051283579139522e-06, "loss": 0.4389, "step": 17254 }, { "epoch": 2.8402024385895133, "grad_norm": 0.43181483714512964, "learning_rate": 3.604667687249395e-06, "loss": 0.4346, "step": 17255 }, { "epoch": 2.8403670228086297, "grad_norm": 0.33784989362316176, "learning_rate": 3.604207026354345e-06, "loss": 0.4429, "step": 17256 }, { "epoch": 2.840531607027746, "grad_norm": 0.31093719233205325, "learning_rate": 3.603746375233831e-06, "loss": 0.4227, "step": 17257 }, { "epoch": 2.8406961912468627, "grad_norm": 0.25628383000837457, "learning_rate": 3.6032857338928787e-06, "loss": 0.4347, "step": 17258 }, { "epoch": 2.840860775465979, "grad_norm": 0.3291742395980252, "learning_rate": 3.602825102336515e-06, "loss": 0.4139, "step": 17259 }, { "epoch": 2.8410253596850956, "grad_norm": 0.286703019845901, "learning_rate": 3.602364480569767e-06, "loss": 0.4358, "step": 17260 }, { "epoch": 2.841189943904212, "grad_norm": 0.3262119607648993, "learning_rate": 3.601903868597657e-06, "loss": 0.4492, "step": 17261 }, { "epoch": 2.8413545281233286, "grad_norm": 0.2642906042273244, "learning_rate": 3.601443266425216e-06, "loss": 0.4361, "step": 17262 }, { "epoch": 2.841519112342445, "grad_norm": 0.30197138080336206, "learning_rate": 3.6009826740574656e-06, "loss": 0.4273, "step": 17263 }, { "epoch": 2.8416836965615615, "grad_norm": 0.3278386663356908, "learning_rate": 3.600522091499434e-06, "loss": 0.4491, "step": 17264 }, { "epoch": 2.841848280780678, "grad_norm": 0.366249872998856, "learning_rate": 3.600061518756146e-06, "loss": 0.4365, "step": 17265 }, { "epoch": 2.842012864999794, "grad_norm": 0.32644035623795087, "learning_rate": 3.5996009558326287e-06, "loss": 0.4083, "step": 17266 }, { "epoch": 2.8421774492189105, "grad_norm": 0.33766069885696454, "learning_rate": 3.5991404027339047e-06, "loss": 0.4114, "step": 17267 }, { "epoch": 2.842342033438027, "grad_norm": 0.34999581741863056, "learning_rate": 3.5986798594650023e-06, "loss": 0.4398, "step": 17268 }, { "epoch": 2.8425066176571434, "grad_norm": 0.29690638842098677, "learning_rate": 3.598219326030946e-06, "loss": 0.4379, "step": 17269 }, { "epoch": 2.84267120187626, "grad_norm": 0.5821848353060289, "learning_rate": 3.597758802436759e-06, "loss": 0.4272, "step": 17270 }, { "epoch": 2.8428357860953763, "grad_norm": 0.27318836340541974, "learning_rate": 3.5972982886874706e-06, "loss": 0.4364, "step": 17271 }, { "epoch": 2.843000370314493, "grad_norm": 0.2907001035804837, "learning_rate": 3.5968377847880992e-06, "loss": 0.4385, "step": 17272 }, { "epoch": 2.8431649545336093, "grad_norm": 0.35475419183949747, "learning_rate": 3.5963772907436767e-06, "loss": 0.4404, "step": 17273 }, { "epoch": 2.8433295387527258, "grad_norm": 0.2929670971585379, "learning_rate": 3.5959168065592217e-06, "loss": 0.4269, "step": 17274 }, { "epoch": 2.8434941229718422, "grad_norm": 0.31006705955902614, "learning_rate": 3.5954563322397647e-06, "loss": 0.419, "step": 17275 }, { "epoch": 2.8436587071909587, "grad_norm": 0.4804142954059058, "learning_rate": 3.594995867790326e-06, "loss": 0.431, "step": 17276 }, { "epoch": 2.843823291410075, "grad_norm": 0.38870693622529257, "learning_rate": 3.59453541321593e-06, "loss": 0.4359, "step": 17277 }, { "epoch": 2.8439878756291916, "grad_norm": 0.3286508395949563, "learning_rate": 3.5940749685216036e-06, "loss": 0.4262, "step": 17278 }, { "epoch": 2.844152459848308, "grad_norm": 0.3152080061760849, "learning_rate": 3.5936145337123686e-06, "loss": 0.4265, "step": 17279 }, { "epoch": 2.8443170440674246, "grad_norm": 0.3129194774671554, "learning_rate": 3.593154108793251e-06, "loss": 0.4297, "step": 17280 }, { "epoch": 2.844481628286541, "grad_norm": 0.38874431455021785, "learning_rate": 3.592693693769273e-06, "loss": 0.4383, "step": 17281 }, { "epoch": 2.8446462125056575, "grad_norm": 0.37803432983638346, "learning_rate": 3.592233288645461e-06, "loss": 0.4305, "step": 17282 }, { "epoch": 2.844810796724774, "grad_norm": 0.45048005972917665, "learning_rate": 3.591772893426836e-06, "loss": 0.426, "step": 17283 }, { "epoch": 2.8449753809438905, "grad_norm": 0.28005271787070796, "learning_rate": 3.591312508118422e-06, "loss": 0.4315, "step": 17284 }, { "epoch": 2.845139965163007, "grad_norm": 0.33550972302654886, "learning_rate": 3.590852132725244e-06, "loss": 0.4234, "step": 17285 }, { "epoch": 2.8453045493821234, "grad_norm": 0.32102823504171063, "learning_rate": 3.5903917672523238e-06, "loss": 0.442, "step": 17286 }, { "epoch": 2.84546913360124, "grad_norm": 0.35483966039125975, "learning_rate": 3.5899314117046864e-06, "loss": 0.4241, "step": 17287 }, { "epoch": 2.8456337178203563, "grad_norm": 0.31500137223962354, "learning_rate": 3.5894710660873536e-06, "loss": 0.4432, "step": 17288 }, { "epoch": 2.845798302039473, "grad_norm": 0.29601726038616466, "learning_rate": 3.5890107304053498e-06, "loss": 0.4144, "step": 17289 }, { "epoch": 2.8459628862585893, "grad_norm": 0.2838766819295395, "learning_rate": 3.588550404663699e-06, "loss": 0.4503, "step": 17290 }, { "epoch": 2.8461274704777058, "grad_norm": 0.32551607430476087, "learning_rate": 3.5880900888674185e-06, "loss": 0.4515, "step": 17291 }, { "epoch": 2.8462920546968222, "grad_norm": 0.303023119862285, "learning_rate": 3.5876297830215387e-06, "loss": 0.4282, "step": 17292 }, { "epoch": 2.8464566389159387, "grad_norm": 0.3677300499257935, "learning_rate": 3.5871694871310755e-06, "loss": 0.4279, "step": 17293 }, { "epoch": 2.846621223135055, "grad_norm": 0.2903798105398287, "learning_rate": 3.586709201201057e-06, "loss": 0.426, "step": 17294 }, { "epoch": 2.8467858073541716, "grad_norm": 0.2793437932527116, "learning_rate": 3.5862489252365017e-06, "loss": 0.4373, "step": 17295 }, { "epoch": 2.846950391573288, "grad_norm": 0.3094071114496996, "learning_rate": 3.585788659242434e-06, "loss": 0.4581, "step": 17296 }, { "epoch": 2.8471149757924046, "grad_norm": 0.3039670945668618, "learning_rate": 3.5853284032238763e-06, "loss": 0.4145, "step": 17297 }, { "epoch": 2.847279560011521, "grad_norm": 0.6203273417092992, "learning_rate": 3.584868157185849e-06, "loss": 0.4232, "step": 17298 }, { "epoch": 2.8474441442306375, "grad_norm": 0.43621089193038654, "learning_rate": 3.5844079211333753e-06, "loss": 0.4257, "step": 17299 }, { "epoch": 2.847608728449754, "grad_norm": 0.3902462502959947, "learning_rate": 3.5839476950714765e-06, "loss": 0.4344, "step": 17300 }, { "epoch": 2.8477733126688705, "grad_norm": 0.4501508527387159, "learning_rate": 3.5834874790051763e-06, "loss": 0.4309, "step": 17301 }, { "epoch": 2.847937896887987, "grad_norm": 0.37557686051108913, "learning_rate": 3.583027272939493e-06, "loss": 0.4415, "step": 17302 }, { "epoch": 2.8481024811071034, "grad_norm": 0.3013870558351485, "learning_rate": 3.5825670768794525e-06, "loss": 0.4305, "step": 17303 }, { "epoch": 2.84826706532622, "grad_norm": 0.3542117988239237, "learning_rate": 3.5821068908300713e-06, "loss": 0.4372, "step": 17304 }, { "epoch": 2.8484316495453363, "grad_norm": 0.31145913931400526, "learning_rate": 3.5816467147963757e-06, "loss": 0.4244, "step": 17305 }, { "epoch": 2.848596233764453, "grad_norm": 0.3143075586074146, "learning_rate": 3.5811865487833844e-06, "loss": 0.4287, "step": 17306 }, { "epoch": 2.8487608179835693, "grad_norm": 0.38184845853847327, "learning_rate": 3.5807263927961173e-06, "loss": 0.4578, "step": 17307 }, { "epoch": 2.8489254022026853, "grad_norm": 0.32585932374428855, "learning_rate": 3.580266246839598e-06, "loss": 0.4245, "step": 17308 }, { "epoch": 2.849089986421802, "grad_norm": 0.3651766852599073, "learning_rate": 3.579806110918845e-06, "loss": 0.4211, "step": 17309 }, { "epoch": 2.8492545706409182, "grad_norm": 0.4191280874392949, "learning_rate": 3.5793459850388822e-06, "loss": 0.4079, "step": 17310 }, { "epoch": 2.8494191548600347, "grad_norm": 0.32954077567316326, "learning_rate": 3.578885869204727e-06, "loss": 0.4324, "step": 17311 }, { "epoch": 2.849583739079151, "grad_norm": 0.3743297123453578, "learning_rate": 3.578425763421403e-06, "loss": 0.4265, "step": 17312 }, { "epoch": 2.8497483232982677, "grad_norm": 0.2850966383078337, "learning_rate": 3.57796566769393e-06, "loss": 0.4212, "step": 17313 }, { "epoch": 2.849912907517384, "grad_norm": 0.41129961107418034, "learning_rate": 3.5775055820273255e-06, "loss": 0.4365, "step": 17314 }, { "epoch": 2.8500774917365006, "grad_norm": 0.3414907552588458, "learning_rate": 3.5770455064266136e-06, "loss": 0.4338, "step": 17315 }, { "epoch": 2.850242075955617, "grad_norm": 0.29009696112826655, "learning_rate": 3.576585440896811e-06, "loss": 0.4174, "step": 17316 }, { "epoch": 2.8504066601747335, "grad_norm": 0.4310254529123287, "learning_rate": 3.5761253854429407e-06, "loss": 0.4204, "step": 17317 }, { "epoch": 2.85057124439385, "grad_norm": 0.31112114797040397, "learning_rate": 3.575665340070021e-06, "loss": 0.4341, "step": 17318 }, { "epoch": 2.8507358286129665, "grad_norm": 0.3279739330863808, "learning_rate": 3.5752053047830725e-06, "loss": 0.4322, "step": 17319 }, { "epoch": 2.850900412832083, "grad_norm": 0.3301961644714612, "learning_rate": 3.5747452795871153e-06, "loss": 0.4349, "step": 17320 }, { "epoch": 2.8510649970511994, "grad_norm": 0.277429509503672, "learning_rate": 3.574285264487167e-06, "loss": 0.4504, "step": 17321 }, { "epoch": 2.851229581270316, "grad_norm": 0.4076592298941959, "learning_rate": 3.57382525948825e-06, "loss": 0.4261, "step": 17322 }, { "epoch": 2.8513941654894324, "grad_norm": 0.2610069194071665, "learning_rate": 3.5733652645953796e-06, "loss": 0.4286, "step": 17323 }, { "epoch": 2.851558749708549, "grad_norm": 0.3170634069177754, "learning_rate": 3.5729052798135806e-06, "loss": 0.4292, "step": 17324 }, { "epoch": 2.8517233339276653, "grad_norm": 0.37389489628158157, "learning_rate": 3.572445305147866e-06, "loss": 0.4369, "step": 17325 }, { "epoch": 2.8518879181467818, "grad_norm": 0.30498221629286726, "learning_rate": 3.571985340603261e-06, "loss": 0.4343, "step": 17326 }, { "epoch": 2.8520525023658982, "grad_norm": 0.32470011381080516, "learning_rate": 3.571525386184781e-06, "loss": 0.4253, "step": 17327 }, { "epoch": 2.8522170865850147, "grad_norm": 0.3199015472926689, "learning_rate": 3.5710654418974444e-06, "loss": 0.4437, "step": 17328 }, { "epoch": 2.852381670804131, "grad_norm": 0.3304374071937174, "learning_rate": 3.5706055077462715e-06, "loss": 0.4349, "step": 17329 }, { "epoch": 2.8525462550232477, "grad_norm": 0.5648046194401402, "learning_rate": 3.5701455837362798e-06, "loss": 0.4487, "step": 17330 }, { "epoch": 2.852710839242364, "grad_norm": 0.3312659649653881, "learning_rate": 3.5696856698724896e-06, "loss": 0.4234, "step": 17331 }, { "epoch": 2.8528754234614806, "grad_norm": 0.2999730938000472, "learning_rate": 3.5692257661599166e-06, "loss": 0.4201, "step": 17332 }, { "epoch": 2.8530400076805966, "grad_norm": 0.3054685782593052, "learning_rate": 3.5687658726035825e-06, "loss": 0.4441, "step": 17333 }, { "epoch": 2.853204591899713, "grad_norm": 0.29541052668524603, "learning_rate": 3.5683059892085025e-06, "loss": 0.4456, "step": 17334 }, { "epoch": 2.8533691761188296, "grad_norm": 0.39264174913702915, "learning_rate": 3.5678461159796953e-06, "loss": 0.4203, "step": 17335 }, { "epoch": 2.853533760337946, "grad_norm": 0.2988594884479697, "learning_rate": 3.56738625292218e-06, "loss": 0.4346, "step": 17336 }, { "epoch": 2.8536983445570625, "grad_norm": 0.29792359684867575, "learning_rate": 3.5669264000409724e-06, "loss": 0.4224, "step": 17337 }, { "epoch": 2.853862928776179, "grad_norm": 0.31966824404577093, "learning_rate": 3.566466557341093e-06, "loss": 0.43, "step": 17338 }, { "epoch": 2.8540275129952954, "grad_norm": 0.3570823411675509, "learning_rate": 3.5660067248275568e-06, "loss": 0.4297, "step": 17339 }, { "epoch": 2.854192097214412, "grad_norm": 0.2703363496172826, "learning_rate": 3.565546902505383e-06, "loss": 0.4263, "step": 17340 }, { "epoch": 2.8543566814335284, "grad_norm": 0.3790065141753752, "learning_rate": 3.5650870903795897e-06, "loss": 0.437, "step": 17341 }, { "epoch": 2.854521265652645, "grad_norm": 0.3175430522401861, "learning_rate": 3.56462728845519e-06, "loss": 0.4308, "step": 17342 }, { "epoch": 2.8546858498717613, "grad_norm": 0.3151100167548812, "learning_rate": 3.5641674967372074e-06, "loss": 0.4393, "step": 17343 }, { "epoch": 2.854850434090878, "grad_norm": 0.33334304766233647, "learning_rate": 3.563707715230652e-06, "loss": 0.4384, "step": 17344 }, { "epoch": 2.8550150183099943, "grad_norm": 0.28834839600363854, "learning_rate": 3.5632479439405475e-06, "loss": 0.4298, "step": 17345 }, { "epoch": 2.8551796025291107, "grad_norm": 0.3013133808479478, "learning_rate": 3.562788182871906e-06, "loss": 0.4468, "step": 17346 }, { "epoch": 2.855344186748227, "grad_norm": 0.2896137490725572, "learning_rate": 3.5623284320297465e-06, "loss": 0.4125, "step": 17347 }, { "epoch": 2.8555087709673437, "grad_norm": 0.2920795502549955, "learning_rate": 3.561868691419084e-06, "loss": 0.4212, "step": 17348 }, { "epoch": 2.85567335518646, "grad_norm": 0.3502524102384909, "learning_rate": 3.561408961044937e-06, "loss": 0.4219, "step": 17349 }, { "epoch": 2.8558379394055766, "grad_norm": 0.2963465990646538, "learning_rate": 3.5609492409123205e-06, "loss": 0.4222, "step": 17350 }, { "epoch": 2.856002523624693, "grad_norm": 0.46442209356470837, "learning_rate": 3.5604895310262503e-06, "loss": 0.4449, "step": 17351 }, { "epoch": 2.8561671078438096, "grad_norm": 0.33772961723301825, "learning_rate": 3.5600298313917452e-06, "loss": 0.4438, "step": 17352 }, { "epoch": 2.856331692062926, "grad_norm": 0.2988364502504436, "learning_rate": 3.559570142013817e-06, "loss": 0.4336, "step": 17353 }, { "epoch": 2.8564962762820425, "grad_norm": 0.3835527090232755, "learning_rate": 3.5591104628974865e-06, "loss": 0.4489, "step": 17354 }, { "epoch": 2.856660860501159, "grad_norm": 0.31238237100554256, "learning_rate": 3.558650794047764e-06, "loss": 0.4256, "step": 17355 }, { "epoch": 2.8568254447202754, "grad_norm": 0.30854875596452236, "learning_rate": 3.558191135469672e-06, "loss": 0.4261, "step": 17356 }, { "epoch": 2.856990028939392, "grad_norm": 0.3281040543302565, "learning_rate": 3.5577314871682214e-06, "loss": 0.4355, "step": 17357 }, { "epoch": 2.8571546131585084, "grad_norm": 0.46861731742534457, "learning_rate": 3.557271849148427e-06, "loss": 0.4253, "step": 17358 }, { "epoch": 2.857319197377625, "grad_norm": 0.31972857803363086, "learning_rate": 3.556812221415307e-06, "loss": 0.4595, "step": 17359 }, { "epoch": 2.8574837815967413, "grad_norm": 0.34830650317616807, "learning_rate": 3.5563526039738758e-06, "loss": 0.4333, "step": 17360 }, { "epoch": 2.857648365815858, "grad_norm": 0.26551698406335, "learning_rate": 3.5558929968291484e-06, "loss": 0.4248, "step": 17361 }, { "epoch": 2.8578129500349743, "grad_norm": 0.3552027859489436, "learning_rate": 3.5554333999861386e-06, "loss": 0.4278, "step": 17362 }, { "epoch": 2.8579775342540907, "grad_norm": 0.49062427891614496, "learning_rate": 3.5549738134498643e-06, "loss": 0.4365, "step": 17363 }, { "epoch": 2.858142118473207, "grad_norm": 0.39124919194394747, "learning_rate": 3.554514237225339e-06, "loss": 0.4399, "step": 17364 }, { "epoch": 2.8583067026923237, "grad_norm": 2.118110390489228, "learning_rate": 3.554054671317576e-06, "loss": 0.4454, "step": 17365 }, { "epoch": 2.85847128691144, "grad_norm": 0.3123363002781529, "learning_rate": 3.5535951157315907e-06, "loss": 0.4008, "step": 17366 }, { "epoch": 2.8586358711305566, "grad_norm": 0.3400430711403663, "learning_rate": 3.5531355704723975e-06, "loss": 0.4138, "step": 17367 }, { "epoch": 2.858800455349673, "grad_norm": 0.34525379179719407, "learning_rate": 3.552676035545012e-06, "loss": 0.4349, "step": 17368 }, { "epoch": 2.8589650395687896, "grad_norm": 0.2987836071432275, "learning_rate": 3.552216510954446e-06, "loss": 0.4202, "step": 17369 }, { "epoch": 2.859129623787906, "grad_norm": 0.3772431570981102, "learning_rate": 3.551756996705717e-06, "loss": 0.4281, "step": 17370 }, { "epoch": 2.8592942080070225, "grad_norm": 0.27907664529479576, "learning_rate": 3.5512974928038372e-06, "loss": 0.4228, "step": 17371 }, { "epoch": 2.859458792226139, "grad_norm": 0.4616168933271382, "learning_rate": 3.5508379992538183e-06, "loss": 0.4043, "step": 17372 }, { "epoch": 2.8596233764452554, "grad_norm": 0.3168197813319331, "learning_rate": 3.5503785160606784e-06, "loss": 0.4307, "step": 17373 }, { "epoch": 2.859787960664372, "grad_norm": 0.31139488991072456, "learning_rate": 3.549919043229427e-06, "loss": 0.3988, "step": 17374 }, { "epoch": 2.859952544883488, "grad_norm": 0.34256371374820876, "learning_rate": 3.5494595807650833e-06, "loss": 0.4285, "step": 17375 }, { "epoch": 2.8601171291026044, "grad_norm": 0.32999590179031507, "learning_rate": 3.5490001286726546e-06, "loss": 0.4054, "step": 17376 }, { "epoch": 2.860281713321721, "grad_norm": 0.3151567022626986, "learning_rate": 3.5485406869571574e-06, "loss": 0.424, "step": 17377 }, { "epoch": 2.8604462975408373, "grad_norm": 0.2944405518922663, "learning_rate": 3.5480812556236054e-06, "loss": 0.404, "step": 17378 }, { "epoch": 2.860610881759954, "grad_norm": 0.31649209921217125, "learning_rate": 3.5476218346770096e-06, "loss": 0.4265, "step": 17379 }, { "epoch": 2.8607754659790703, "grad_norm": 0.32118599442886553, "learning_rate": 3.547162424122385e-06, "loss": 0.4076, "step": 17380 }, { "epoch": 2.8609400501981868, "grad_norm": 0.34538102601435855, "learning_rate": 3.546703023964743e-06, "loss": 0.4455, "step": 17381 }, { "epoch": 2.8611046344173032, "grad_norm": 0.5686932534326282, "learning_rate": 3.546243634209098e-06, "loss": 0.4399, "step": 17382 }, { "epoch": 2.8612692186364197, "grad_norm": 0.35543383109799415, "learning_rate": 3.545784254860461e-06, "loss": 0.4455, "step": 17383 }, { "epoch": 2.861433802855536, "grad_norm": 0.6460606174437876, "learning_rate": 3.545324885923847e-06, "loss": 0.4401, "step": 17384 }, { "epoch": 2.8615983870746526, "grad_norm": 0.30017636310835727, "learning_rate": 3.544865527404266e-06, "loss": 0.4167, "step": 17385 }, { "epoch": 2.861762971293769, "grad_norm": 0.31817270123624747, "learning_rate": 3.5444061793067305e-06, "loss": 0.4135, "step": 17386 }, { "epoch": 2.8619275555128856, "grad_norm": 2.0942530739338068, "learning_rate": 3.5439468416362544e-06, "loss": 0.424, "step": 17387 }, { "epoch": 2.862092139732002, "grad_norm": 0.3462842069028323, "learning_rate": 3.543487514397848e-06, "loss": 0.4154, "step": 17388 }, { "epoch": 2.8622567239511185, "grad_norm": 0.31711819493067556, "learning_rate": 3.543028197596524e-06, "loss": 0.4368, "step": 17389 }, { "epoch": 2.862421308170235, "grad_norm": 0.392098853077831, "learning_rate": 3.5425688912372943e-06, "loss": 0.4379, "step": 17390 }, { "epoch": 2.8625858923893515, "grad_norm": 0.566164161477723, "learning_rate": 3.5421095953251714e-06, "loss": 0.4067, "step": 17391 }, { "epoch": 2.862750476608468, "grad_norm": 0.6690822106221481, "learning_rate": 3.5416503098651653e-06, "loss": 0.4234, "step": 17392 }, { "epoch": 2.8629150608275844, "grad_norm": 0.302852719194461, "learning_rate": 3.5411910348622897e-06, "loss": 0.4129, "step": 17393 }, { "epoch": 2.863079645046701, "grad_norm": 0.2589054574663652, "learning_rate": 3.5407317703215562e-06, "loss": 0.4519, "step": 17394 }, { "epoch": 2.8632442292658173, "grad_norm": 0.4101958079789623, "learning_rate": 3.5402725162479723e-06, "loss": 0.4272, "step": 17395 }, { "epoch": 2.863408813484934, "grad_norm": 0.26525454642865537, "learning_rate": 3.5398132726465524e-06, "loss": 0.4278, "step": 17396 }, { "epoch": 2.8635733977040503, "grad_norm": 0.3779916268979564, "learning_rate": 3.539354039522306e-06, "loss": 0.451, "step": 17397 }, { "epoch": 2.8637379819231668, "grad_norm": 0.36607349629965114, "learning_rate": 3.5388948168802463e-06, "loss": 0.4443, "step": 17398 }, { "epoch": 2.8639025661422832, "grad_norm": 0.3140599194344739, "learning_rate": 3.538435604725382e-06, "loss": 0.4313, "step": 17399 }, { "epoch": 2.8640671503613992, "grad_norm": 0.2858037967176737, "learning_rate": 3.5379764030627252e-06, "loss": 0.4064, "step": 17400 }, { "epoch": 2.8642317345805157, "grad_norm": 0.4345367756236418, "learning_rate": 3.537517211897286e-06, "loss": 0.4254, "step": 17401 }, { "epoch": 2.864396318799632, "grad_norm": 0.5451837729821961, "learning_rate": 3.5370580312340736e-06, "loss": 0.4369, "step": 17402 }, { "epoch": 2.8645609030187487, "grad_norm": 0.37529945803443476, "learning_rate": 3.5365988610781017e-06, "loss": 0.4083, "step": 17403 }, { "epoch": 2.864725487237865, "grad_norm": 0.3037152828583387, "learning_rate": 3.5361397014343765e-06, "loss": 0.4587, "step": 17404 }, { "epoch": 2.8648900714569816, "grad_norm": 0.35187267786260457, "learning_rate": 3.5356805523079118e-06, "loss": 0.4292, "step": 17405 }, { "epoch": 2.865054655676098, "grad_norm": 0.3494197798213748, "learning_rate": 3.5352214137037142e-06, "loss": 0.435, "step": 17406 }, { "epoch": 2.8652192398952145, "grad_norm": 0.4412802947995881, "learning_rate": 3.534762285626798e-06, "loss": 0.4333, "step": 17407 }, { "epoch": 2.865383824114331, "grad_norm": 0.475036997514968, "learning_rate": 3.5343031680821692e-06, "loss": 0.4351, "step": 17408 }, { "epoch": 2.8655484083334475, "grad_norm": 0.3499265166691491, "learning_rate": 3.5338440610748383e-06, "loss": 0.4361, "step": 17409 }, { "epoch": 2.865712992552564, "grad_norm": 0.3023794348814161, "learning_rate": 3.5333849646098163e-06, "loss": 0.4304, "step": 17410 }, { "epoch": 2.8658775767716804, "grad_norm": 0.366119061707016, "learning_rate": 3.5329258786921108e-06, "loss": 0.42, "step": 17411 }, { "epoch": 2.866042160990797, "grad_norm": 0.3774323397580119, "learning_rate": 3.532466803326733e-06, "loss": 0.4231, "step": 17412 }, { "epoch": 2.8662067452099134, "grad_norm": 0.31430262520379176, "learning_rate": 3.5320077385186905e-06, "loss": 0.4533, "step": 17413 }, { "epoch": 2.86637132942903, "grad_norm": 0.3252067503048915, "learning_rate": 3.5315486842729953e-06, "loss": 0.4278, "step": 17414 }, { "epoch": 2.8665359136481463, "grad_norm": 0.33254081249056655, "learning_rate": 3.531089640594653e-06, "loss": 0.4292, "step": 17415 }, { "epoch": 2.8667004978672628, "grad_norm": 0.32644976125481445, "learning_rate": 3.5306306074886727e-06, "loss": 0.4156, "step": 17416 }, { "epoch": 2.8668650820863792, "grad_norm": 0.36852603099967607, "learning_rate": 3.5301715849600656e-06, "loss": 0.4215, "step": 17417 }, { "epoch": 2.8670296663054957, "grad_norm": 0.3235318335429592, "learning_rate": 3.5297125730138384e-06, "loss": 0.4174, "step": 17418 }, { "epoch": 2.867194250524612, "grad_norm": 0.28132456297739444, "learning_rate": 3.5292535716550013e-06, "loss": 0.4454, "step": 17419 }, { "epoch": 2.8673588347437287, "grad_norm": 0.3586742226160919, "learning_rate": 3.5287945808885602e-06, "loss": 0.4249, "step": 17420 }, { "epoch": 2.867523418962845, "grad_norm": 0.3134723949749536, "learning_rate": 3.5283356007195267e-06, "loss": 0.4231, "step": 17421 }, { "epoch": 2.8676880031819616, "grad_norm": 0.2842395584626507, "learning_rate": 3.5278766311529083e-06, "loss": 0.4224, "step": 17422 }, { "epoch": 2.867852587401078, "grad_norm": 0.5224422130616279, "learning_rate": 3.527417672193709e-06, "loss": 0.4343, "step": 17423 }, { "epoch": 2.8680171716201945, "grad_norm": 0.8431668915744212, "learning_rate": 3.526958723846942e-06, "loss": 0.4366, "step": 17424 }, { "epoch": 2.868181755839311, "grad_norm": 0.30047647153435625, "learning_rate": 3.5264997861176117e-06, "loss": 0.4433, "step": 17425 }, { "epoch": 2.8683463400584275, "grad_norm": 0.3399635986036373, "learning_rate": 3.526040859010729e-06, "loss": 0.426, "step": 17426 }, { "epoch": 2.868510924277544, "grad_norm": 0.34800450958763834, "learning_rate": 3.525581942531298e-06, "loss": 0.4311, "step": 17427 }, { "epoch": 2.8686755084966604, "grad_norm": 0.35944897049779106, "learning_rate": 3.5251230366843294e-06, "loss": 0.4309, "step": 17428 }, { "epoch": 2.868840092715777, "grad_norm": 0.36495359198570226, "learning_rate": 3.5246641414748273e-06, "loss": 0.4274, "step": 17429 }, { "epoch": 2.8690046769348934, "grad_norm": 0.34425709534357957, "learning_rate": 3.524205256907803e-06, "loss": 0.4272, "step": 17430 }, { "epoch": 2.86916926115401, "grad_norm": 0.621363614941926, "learning_rate": 3.52374638298826e-06, "loss": 0.4213, "step": 17431 }, { "epoch": 2.8693338453731263, "grad_norm": 0.33192276900525747, "learning_rate": 3.523287519721207e-06, "loss": 0.4098, "step": 17432 }, { "epoch": 2.8694984295922428, "grad_norm": 0.3357576294843523, "learning_rate": 3.522828667111653e-06, "loss": 0.4359, "step": 17433 }, { "epoch": 2.8696630138113592, "grad_norm": 0.3811141647375937, "learning_rate": 3.5223698251645993e-06, "loss": 0.4202, "step": 17434 }, { "epoch": 2.8698275980304757, "grad_norm": 0.2769062285422329, "learning_rate": 3.521910993885058e-06, "loss": 0.4435, "step": 17435 }, { "epoch": 2.869992182249592, "grad_norm": 0.3068595168744032, "learning_rate": 3.5214521732780315e-06, "loss": 0.4417, "step": 17436 }, { "epoch": 2.8701567664687087, "grad_norm": 0.3188531411080381, "learning_rate": 3.5209933633485308e-06, "loss": 0.4268, "step": 17437 }, { "epoch": 2.870321350687825, "grad_norm": 0.36896820242208617, "learning_rate": 3.5205345641015593e-06, "loss": 0.4448, "step": 17438 }, { "epoch": 2.8704859349069416, "grad_norm": 0.39656766242190944, "learning_rate": 3.520075775542123e-06, "loss": 0.4211, "step": 17439 }, { "epoch": 2.870650519126058, "grad_norm": 0.32019722022746766, "learning_rate": 3.519616997675229e-06, "loss": 0.4338, "step": 17440 }, { "epoch": 2.870815103345174, "grad_norm": 0.2839626543679687, "learning_rate": 3.519158230505882e-06, "loss": 0.4261, "step": 17441 }, { "epoch": 2.8709796875642906, "grad_norm": 0.3489103217881381, "learning_rate": 3.5186994740390904e-06, "loss": 0.4542, "step": 17442 }, { "epoch": 2.871144271783407, "grad_norm": 0.38422954466838666, "learning_rate": 3.5182407282798575e-06, "loss": 0.4119, "step": 17443 }, { "epoch": 2.8713088560025235, "grad_norm": 0.46146800085205075, "learning_rate": 3.5177819932331905e-06, "loss": 0.4263, "step": 17444 }, { "epoch": 2.87147344022164, "grad_norm": 0.4116554893544578, "learning_rate": 3.517323268904096e-06, "loss": 0.4317, "step": 17445 }, { "epoch": 2.8716380244407564, "grad_norm": 0.44247422368616773, "learning_rate": 3.5168645552975754e-06, "loss": 0.4339, "step": 17446 }, { "epoch": 2.871802608659873, "grad_norm": 0.25576977603313145, "learning_rate": 3.516405852418638e-06, "loss": 0.4011, "step": 17447 }, { "epoch": 2.8719671928789894, "grad_norm": 0.3409850047465722, "learning_rate": 3.5159471602722856e-06, "loss": 0.4195, "step": 17448 }, { "epoch": 2.872131777098106, "grad_norm": 0.3528487025903805, "learning_rate": 3.5154884788635263e-06, "loss": 0.4265, "step": 17449 }, { "epoch": 2.8722963613172223, "grad_norm": 0.9376489113719304, "learning_rate": 3.515029808197363e-06, "loss": 0.4388, "step": 17450 }, { "epoch": 2.872460945536339, "grad_norm": 0.26099699964732215, "learning_rate": 3.5145711482788023e-06, "loss": 0.4194, "step": 17451 }, { "epoch": 2.8726255297554553, "grad_norm": 0.38026553404402125, "learning_rate": 3.5141124991128477e-06, "loss": 0.4176, "step": 17452 }, { "epoch": 2.8727901139745717, "grad_norm": 0.29526089747718154, "learning_rate": 3.5136538607045034e-06, "loss": 0.4332, "step": 17453 }, { "epoch": 2.872954698193688, "grad_norm": 0.30887571600772057, "learning_rate": 3.513195233058776e-06, "loss": 0.4357, "step": 17454 }, { "epoch": 2.8731192824128047, "grad_norm": 0.344099036251958, "learning_rate": 3.5127366161806655e-06, "loss": 0.4215, "step": 17455 }, { "epoch": 2.873283866631921, "grad_norm": 0.4456054969025151, "learning_rate": 3.512278010075182e-06, "loss": 0.4396, "step": 17456 }, { "epoch": 2.8734484508510376, "grad_norm": 0.4540551366967078, "learning_rate": 3.511819414747325e-06, "loss": 0.4468, "step": 17457 }, { "epoch": 2.873613035070154, "grad_norm": 0.3225405214185915, "learning_rate": 3.511360830202101e-06, "loss": 0.4459, "step": 17458 }, { "epoch": 2.8737776192892706, "grad_norm": 0.261252551838841, "learning_rate": 3.5109022564445134e-06, "loss": 0.428, "step": 17459 }, { "epoch": 2.873942203508387, "grad_norm": 0.34881152820244593, "learning_rate": 3.510443693479564e-06, "loss": 0.4519, "step": 17460 }, { "epoch": 2.8741067877275035, "grad_norm": 0.3918089656304943, "learning_rate": 3.5099851413122596e-06, "loss": 0.4387, "step": 17461 }, { "epoch": 2.87427137194662, "grad_norm": 0.39959836359225365, "learning_rate": 3.509526599947601e-06, "loss": 0.4246, "step": 17462 }, { "epoch": 2.8744359561657364, "grad_norm": 0.33673458398190126, "learning_rate": 3.5090680693905937e-06, "loss": 0.432, "step": 17463 }, { "epoch": 2.874600540384853, "grad_norm": 0.2979815248415507, "learning_rate": 3.50860954964624e-06, "loss": 0.4131, "step": 17464 }, { "epoch": 2.8747651246039694, "grad_norm": 0.39709378080258345, "learning_rate": 3.5081510407195443e-06, "loss": 0.4392, "step": 17465 }, { "epoch": 2.8749297088230854, "grad_norm": 0.313235605852531, "learning_rate": 3.5076925426155077e-06, "loss": 0.4301, "step": 17466 }, { "epoch": 2.875094293042202, "grad_norm": 0.3350202953389066, "learning_rate": 3.507234055339133e-06, "loss": 0.4333, "step": 17467 }, { "epoch": 2.8752588772613183, "grad_norm": 0.3393102073674103, "learning_rate": 3.506775578895426e-06, "loss": 0.4146, "step": 17468 }, { "epoch": 2.875423461480435, "grad_norm": 0.3597545770156154, "learning_rate": 3.506317113289385e-06, "loss": 0.4253, "step": 17469 }, { "epoch": 2.8755880456995513, "grad_norm": 0.42835161910731306, "learning_rate": 3.505858658526017e-06, "loss": 0.4154, "step": 17470 }, { "epoch": 2.8757526299186678, "grad_norm": 0.33497983250833635, "learning_rate": 3.5054002146103216e-06, "loss": 0.4248, "step": 17471 }, { "epoch": 2.8759172141377842, "grad_norm": 0.32949795625342293, "learning_rate": 3.5049417815473027e-06, "loss": 0.4187, "step": 17472 }, { "epoch": 2.8760817983569007, "grad_norm": 0.32898726958386537, "learning_rate": 3.504483359341961e-06, "loss": 0.4363, "step": 17473 }, { "epoch": 2.876246382576017, "grad_norm": 0.4864876133253663, "learning_rate": 3.504024947999301e-06, "loss": 0.444, "step": 17474 }, { "epoch": 2.8764109667951336, "grad_norm": 0.3015309938868187, "learning_rate": 3.5035665475243237e-06, "loss": 0.4062, "step": 17475 }, { "epoch": 2.87657555101425, "grad_norm": 0.278539466113373, "learning_rate": 3.503108157922029e-06, "loss": 0.3998, "step": 17476 }, { "epoch": 2.8767401352333666, "grad_norm": 0.3085679687824653, "learning_rate": 3.502649779197421e-06, "loss": 0.4325, "step": 17477 }, { "epoch": 2.876904719452483, "grad_norm": 0.2981762361349608, "learning_rate": 3.5021914113554993e-06, "loss": 0.4345, "step": 17478 }, { "epoch": 2.8770693036715995, "grad_norm": 0.42018455478878153, "learning_rate": 3.5017330544012684e-06, "loss": 0.4417, "step": 17479 }, { "epoch": 2.877233887890716, "grad_norm": 0.3194290282093159, "learning_rate": 3.501274708339727e-06, "loss": 0.4217, "step": 17480 }, { "epoch": 2.8773984721098325, "grad_norm": 0.2961955978280596, "learning_rate": 3.500816373175878e-06, "loss": 0.4146, "step": 17481 }, { "epoch": 2.877563056328949, "grad_norm": 0.35320320042854264, "learning_rate": 3.5003580489147217e-06, "loss": 0.4167, "step": 17482 }, { "epoch": 2.8777276405480654, "grad_norm": 0.2914516303858204, "learning_rate": 3.499899735561259e-06, "loss": 0.4225, "step": 17483 }, { "epoch": 2.877892224767182, "grad_norm": 0.5742647961884108, "learning_rate": 3.4994414331204927e-06, "loss": 0.4424, "step": 17484 }, { "epoch": 2.8780568089862983, "grad_norm": 0.47125135149148517, "learning_rate": 3.4989831415974196e-06, "loss": 0.4127, "step": 17485 }, { "epoch": 2.878221393205415, "grad_norm": 0.277633921305222, "learning_rate": 3.498524860997046e-06, "loss": 0.4379, "step": 17486 }, { "epoch": 2.8783859774245313, "grad_norm": 0.35444514695476276, "learning_rate": 3.498066591324366e-06, "loss": 0.415, "step": 17487 }, { "epoch": 2.8785505616436478, "grad_norm": 0.3702431165102328, "learning_rate": 3.4976083325843866e-06, "loss": 0.4407, "step": 17488 }, { "epoch": 2.8787151458627642, "grad_norm": 0.36745646953654393, "learning_rate": 3.4971500847821044e-06, "loss": 0.427, "step": 17489 }, { "epoch": 2.8788797300818807, "grad_norm": 0.3012571879088693, "learning_rate": 3.4966918479225185e-06, "loss": 0.4189, "step": 17490 }, { "epoch": 2.879044314300997, "grad_norm": 0.3522602230756812, "learning_rate": 3.4962336220106322e-06, "loss": 0.4279, "step": 17491 }, { "epoch": 2.8792088985201136, "grad_norm": 0.47725249684852394, "learning_rate": 3.495775407051443e-06, "loss": 0.4388, "step": 17492 }, { "epoch": 2.87937348273923, "grad_norm": 0.3846884788711646, "learning_rate": 3.495317203049953e-06, "loss": 0.4184, "step": 17493 }, { "epoch": 2.8795380669583466, "grad_norm": 0.4336049163299836, "learning_rate": 3.494859010011159e-06, "loss": 0.441, "step": 17494 }, { "epoch": 2.879702651177463, "grad_norm": 0.35240779292563884, "learning_rate": 3.494400827940064e-06, "loss": 0.4257, "step": 17495 }, { "epoch": 2.8798672353965795, "grad_norm": 0.3353048719207727, "learning_rate": 3.4939426568416664e-06, "loss": 0.4356, "step": 17496 }, { "epoch": 2.880031819615696, "grad_norm": 0.41796988582902395, "learning_rate": 3.4934844967209633e-06, "loss": 0.42, "step": 17497 }, { "epoch": 2.8801964038348125, "grad_norm": 0.33536615585729473, "learning_rate": 3.493026347582957e-06, "loss": 0.4173, "step": 17498 }, { "epoch": 2.880360988053929, "grad_norm": 0.3332212640094779, "learning_rate": 3.4925682094326437e-06, "loss": 0.416, "step": 17499 }, { "epoch": 2.8805255722730454, "grad_norm": 0.3471955782673057, "learning_rate": 3.492110082275025e-06, "loss": 0.4401, "step": 17500 }, { "epoch": 2.880690156492162, "grad_norm": 0.3282416402822312, "learning_rate": 3.4916519661150986e-06, "loss": 0.4458, "step": 17501 }, { "epoch": 2.8808547407112783, "grad_norm": 0.36896467746199585, "learning_rate": 3.4911938609578637e-06, "loss": 0.443, "step": 17502 }, { "epoch": 2.881019324930395, "grad_norm": 0.3332197037487723, "learning_rate": 3.4907357668083204e-06, "loss": 0.4565, "step": 17503 }, { "epoch": 2.8811839091495113, "grad_norm": 0.34050078360924096, "learning_rate": 3.490277683671462e-06, "loss": 0.4408, "step": 17504 }, { "epoch": 2.8813484933686278, "grad_norm": 0.3992178710229831, "learning_rate": 3.489819611552294e-06, "loss": 0.4526, "step": 17505 }, { "epoch": 2.881513077587744, "grad_norm": 0.2679678739319319, "learning_rate": 3.489361550455808e-06, "loss": 0.4292, "step": 17506 }, { "epoch": 2.8816776618068607, "grad_norm": 0.3198761892317233, "learning_rate": 3.4889035003870087e-06, "loss": 0.4267, "step": 17507 }, { "epoch": 2.8818422460259767, "grad_norm": 0.2830674041493437, "learning_rate": 3.4884454613508885e-06, "loss": 0.4211, "step": 17508 }, { "epoch": 2.882006830245093, "grad_norm": 0.416042072950869, "learning_rate": 3.487987433352449e-06, "loss": 0.4309, "step": 17509 }, { "epoch": 2.8821714144642097, "grad_norm": 0.516522262405375, "learning_rate": 3.4875294163966868e-06, "loss": 0.4154, "step": 17510 }, { "epoch": 2.882335998683326, "grad_norm": 0.49139703605895474, "learning_rate": 3.4870714104885977e-06, "loss": 0.4338, "step": 17511 }, { "epoch": 2.8825005829024426, "grad_norm": 0.2969184023413605, "learning_rate": 3.4866134156331832e-06, "loss": 0.4435, "step": 17512 }, { "epoch": 2.882665167121559, "grad_norm": 0.3249885720627407, "learning_rate": 3.4861554318354375e-06, "loss": 0.4418, "step": 17513 }, { "epoch": 2.8828297513406755, "grad_norm": 0.3798585206088055, "learning_rate": 3.485697459100359e-06, "loss": 0.4363, "step": 17514 }, { "epoch": 2.882994335559792, "grad_norm": 0.2624652605932739, "learning_rate": 3.4852394974329453e-06, "loss": 0.4459, "step": 17515 }, { "epoch": 2.8831589197789085, "grad_norm": 0.3741199036591475, "learning_rate": 3.484781546838195e-06, "loss": 0.4175, "step": 17516 }, { "epoch": 2.883323503998025, "grad_norm": 0.28807914592528444, "learning_rate": 3.4843236073210996e-06, "loss": 0.4435, "step": 17517 }, { "epoch": 2.8834880882171414, "grad_norm": 0.3991709257107399, "learning_rate": 3.483865678886663e-06, "loss": 0.4195, "step": 17518 }, { "epoch": 2.883652672436258, "grad_norm": 0.2976102140387971, "learning_rate": 3.483407761539878e-06, "loss": 0.4226, "step": 17519 }, { "epoch": 2.8838172566553744, "grad_norm": 0.29345802221273715, "learning_rate": 3.4829498552857405e-06, "loss": 0.4297, "step": 17520 }, { "epoch": 2.883981840874491, "grad_norm": 0.3035537774606567, "learning_rate": 3.4824919601292494e-06, "loss": 0.4397, "step": 17521 }, { "epoch": 2.8841464250936073, "grad_norm": 0.29332745685733963, "learning_rate": 3.4820340760753984e-06, "loss": 0.4377, "step": 17522 }, { "epoch": 2.8843110093127238, "grad_norm": 0.34426693441979905, "learning_rate": 3.4815762031291865e-06, "loss": 0.4279, "step": 17523 }, { "epoch": 2.8844755935318402, "grad_norm": 0.3380721839942416, "learning_rate": 3.481118341295608e-06, "loss": 0.4203, "step": 17524 }, { "epoch": 2.8846401777509567, "grad_norm": 0.28653978493406196, "learning_rate": 3.4806604905796602e-06, "loss": 0.4092, "step": 17525 }, { "epoch": 2.884804761970073, "grad_norm": 0.4618970250864827, "learning_rate": 3.480202650986339e-06, "loss": 0.4352, "step": 17526 }, { "epoch": 2.8849693461891897, "grad_norm": 0.7745580166610236, "learning_rate": 3.4797448225206386e-06, "loss": 0.4404, "step": 17527 }, { "epoch": 2.885133930408306, "grad_norm": 0.3572989624425346, "learning_rate": 3.479287005187556e-06, "loss": 0.4239, "step": 17528 }, { "epoch": 2.8852985146274226, "grad_norm": 0.3958605014986573, "learning_rate": 3.4788291989920847e-06, "loss": 0.4419, "step": 17529 }, { "epoch": 2.885463098846539, "grad_norm": 0.29876933676602424, "learning_rate": 3.478371403939223e-06, "loss": 0.4596, "step": 17530 }, { "epoch": 2.8856276830656555, "grad_norm": 0.30567302667861224, "learning_rate": 3.4779136200339635e-06, "loss": 0.4351, "step": 17531 }, { "epoch": 2.885792267284772, "grad_norm": 0.7991158892795251, "learning_rate": 3.477455847281304e-06, "loss": 0.4336, "step": 17532 }, { "epoch": 2.885956851503888, "grad_norm": 0.2860365966641438, "learning_rate": 3.476998085686238e-06, "loss": 0.4166, "step": 17533 }, { "epoch": 2.8861214357230045, "grad_norm": 0.35813329852227593, "learning_rate": 3.47654033525376e-06, "loss": 0.428, "step": 17534 }, { "epoch": 2.886286019942121, "grad_norm": 0.340048960958515, "learning_rate": 3.476082595988867e-06, "loss": 0.4089, "step": 17535 }, { "epoch": 2.8864506041612374, "grad_norm": 0.4793172775125557, "learning_rate": 3.475624867896549e-06, "loss": 0.4241, "step": 17536 }, { "epoch": 2.886615188380354, "grad_norm": 0.3980190982195709, "learning_rate": 3.475167150981807e-06, "loss": 0.4445, "step": 17537 }, { "epoch": 2.8867797725994704, "grad_norm": 0.28018392859825864, "learning_rate": 3.4747094452496293e-06, "loss": 0.4342, "step": 17538 }, { "epoch": 2.886944356818587, "grad_norm": 0.2853823124582563, "learning_rate": 3.474251750705015e-06, "loss": 0.4237, "step": 17539 }, { "epoch": 2.8871089410377033, "grad_norm": 0.36079531254173375, "learning_rate": 3.473794067352956e-06, "loss": 0.4208, "step": 17540 }, { "epoch": 2.88727352525682, "grad_norm": 0.37046200754921405, "learning_rate": 3.4733363951984453e-06, "loss": 0.4247, "step": 17541 }, { "epoch": 2.8874381094759363, "grad_norm": 0.3782141148710571, "learning_rate": 3.472878734246479e-06, "loss": 0.4253, "step": 17542 }, { "epoch": 2.8876026936950527, "grad_norm": 0.2860690892595623, "learning_rate": 3.472421084502049e-06, "loss": 0.4166, "step": 17543 }, { "epoch": 2.887767277914169, "grad_norm": 0.2638562180584719, "learning_rate": 3.471963445970151e-06, "loss": 0.4282, "step": 17544 }, { "epoch": 2.8879318621332857, "grad_norm": 0.3075702618199297, "learning_rate": 3.471505818655777e-06, "loss": 0.4205, "step": 17545 }, { "epoch": 2.888096446352402, "grad_norm": 0.3551562646617458, "learning_rate": 3.471048202563923e-06, "loss": 0.4259, "step": 17546 }, { "epoch": 2.8882610305715186, "grad_norm": 0.30878179949248125, "learning_rate": 3.470590597699579e-06, "loss": 0.4219, "step": 17547 }, { "epoch": 2.888425614790635, "grad_norm": 0.3359741753003401, "learning_rate": 3.4701330040677384e-06, "loss": 0.4353, "step": 17548 }, { "epoch": 2.8885901990097516, "grad_norm": 1.0444794328678997, "learning_rate": 3.4696754216733972e-06, "loss": 0.4264, "step": 17549 }, { "epoch": 2.888754783228868, "grad_norm": 0.32698250234672555, "learning_rate": 3.4692178505215448e-06, "loss": 0.4573, "step": 17550 }, { "epoch": 2.8889193674479845, "grad_norm": 0.27411367522197744, "learning_rate": 3.4687602906171773e-06, "loss": 0.4268, "step": 17551 }, { "epoch": 2.889083951667101, "grad_norm": 0.33172186293468775, "learning_rate": 3.4683027419652847e-06, "loss": 0.4443, "step": 17552 }, { "epoch": 2.8892485358862174, "grad_norm": 0.3046771536424512, "learning_rate": 3.4678452045708622e-06, "loss": 0.4141, "step": 17553 }, { "epoch": 2.889413120105334, "grad_norm": 0.36778006334965835, "learning_rate": 3.4673876784389016e-06, "loss": 0.4505, "step": 17554 }, { "epoch": 2.8895777043244504, "grad_norm": 0.49063857055608223, "learning_rate": 3.4669301635743913e-06, "loss": 0.4283, "step": 17555 }, { "epoch": 2.889742288543567, "grad_norm": 0.394256688057511, "learning_rate": 3.46647265998233e-06, "loss": 0.4507, "step": 17556 }, { "epoch": 2.8899068727626833, "grad_norm": 0.30983342194561003, "learning_rate": 3.4660151676677034e-06, "loss": 0.4348, "step": 17557 }, { "epoch": 2.8900714569818, "grad_norm": 0.30241903659440256, "learning_rate": 3.4655576866355097e-06, "loss": 0.4417, "step": 17558 }, { "epoch": 2.8902360412009163, "grad_norm": 0.3153832356377743, "learning_rate": 3.465100216890736e-06, "loss": 0.4471, "step": 17559 }, { "epoch": 2.8904006254200327, "grad_norm": 0.40039286031331656, "learning_rate": 3.4646427584383765e-06, "loss": 0.4565, "step": 17560 }, { "epoch": 2.890565209639149, "grad_norm": 0.3497720118990314, "learning_rate": 3.4641853112834214e-06, "loss": 0.46, "step": 17561 }, { "epoch": 2.8907297938582657, "grad_norm": 0.3215586088161052, "learning_rate": 3.463727875430863e-06, "loss": 0.4388, "step": 17562 }, { "epoch": 2.890894378077382, "grad_norm": 0.2806799404627749, "learning_rate": 3.463270450885693e-06, "loss": 0.4062, "step": 17563 }, { "epoch": 2.8910589622964986, "grad_norm": 0.30590924071024106, "learning_rate": 3.462813037652901e-06, "loss": 0.4415, "step": 17564 }, { "epoch": 2.891223546515615, "grad_norm": 0.39795707215124965, "learning_rate": 3.462355635737481e-06, "loss": 0.4133, "step": 17565 }, { "epoch": 2.8913881307347316, "grad_norm": 0.28841486979329806, "learning_rate": 3.4618982451444197e-06, "loss": 0.4262, "step": 17566 }, { "epoch": 2.891552714953848, "grad_norm": 0.30530892538511867, "learning_rate": 3.4614408658787127e-06, "loss": 0.4272, "step": 17567 }, { "epoch": 2.8917172991729645, "grad_norm": 0.6587955945287981, "learning_rate": 3.4609834979453457e-06, "loss": 0.4235, "step": 17568 }, { "epoch": 2.891881883392081, "grad_norm": 0.3581837116766279, "learning_rate": 3.4605261413493155e-06, "loss": 0.4399, "step": 17569 }, { "epoch": 2.8920464676111974, "grad_norm": 0.30612813779363107, "learning_rate": 3.4600687960956076e-06, "loss": 0.4445, "step": 17570 }, { "epoch": 2.892211051830314, "grad_norm": 0.31405506424736673, "learning_rate": 3.4596114621892133e-06, "loss": 0.4457, "step": 17571 }, { "epoch": 2.8923756360494304, "grad_norm": 0.2900030281499961, "learning_rate": 3.4591541396351244e-06, "loss": 0.4448, "step": 17572 }, { "epoch": 2.892540220268547, "grad_norm": 0.28487788863551433, "learning_rate": 3.4586968284383298e-06, "loss": 0.4158, "step": 17573 }, { "epoch": 2.8927048044876633, "grad_norm": 0.27083017128369313, "learning_rate": 3.4582395286038197e-06, "loss": 0.4343, "step": 17574 }, { "epoch": 2.8928693887067793, "grad_norm": 0.326629990273933, "learning_rate": 3.4577822401365838e-06, "loss": 0.4274, "step": 17575 }, { "epoch": 2.893033972925896, "grad_norm": 0.3504863651738228, "learning_rate": 3.457324963041613e-06, "loss": 0.4321, "step": 17576 }, { "epoch": 2.8931985571450123, "grad_norm": 0.4574549399371932, "learning_rate": 3.4568676973238973e-06, "loss": 0.4291, "step": 17577 }, { "epoch": 2.8933631413641288, "grad_norm": 0.3123392716219587, "learning_rate": 3.4564104429884223e-06, "loss": 0.415, "step": 17578 }, { "epoch": 2.8935277255832452, "grad_norm": 0.3238830641887869, "learning_rate": 3.4559532000401816e-06, "loss": 0.4326, "step": 17579 }, { "epoch": 2.8936923098023617, "grad_norm": 0.2935385519404049, "learning_rate": 3.455495968484162e-06, "loss": 0.4331, "step": 17580 }, { "epoch": 2.893856894021478, "grad_norm": 0.33603634738461574, "learning_rate": 3.4550387483253537e-06, "loss": 0.4509, "step": 17581 }, { "epoch": 2.8940214782405946, "grad_norm": 0.27276667451662073, "learning_rate": 3.4545815395687453e-06, "loss": 0.4247, "step": 17582 }, { "epoch": 2.894186062459711, "grad_norm": 0.27054568096228415, "learning_rate": 3.454124342219327e-06, "loss": 0.4311, "step": 17583 }, { "epoch": 2.8943506466788276, "grad_norm": 0.3317891914521292, "learning_rate": 3.453667156282087e-06, "loss": 0.4182, "step": 17584 }, { "epoch": 2.894515230897944, "grad_norm": 0.29678551621684296, "learning_rate": 3.4532099817620107e-06, "loss": 0.4264, "step": 17585 }, { "epoch": 2.8946798151170605, "grad_norm": 0.40450783480095226, "learning_rate": 3.452752818664092e-06, "loss": 0.4304, "step": 17586 }, { "epoch": 2.894844399336177, "grad_norm": 0.2833720786401857, "learning_rate": 3.452295666993314e-06, "loss": 0.435, "step": 17587 }, { "epoch": 2.8950089835552935, "grad_norm": 0.3042480121931492, "learning_rate": 3.4518385267546703e-06, "loss": 0.4325, "step": 17588 }, { "epoch": 2.89517356777441, "grad_norm": 0.27890726007985217, "learning_rate": 3.451381397953144e-06, "loss": 0.4472, "step": 17589 }, { "epoch": 2.8953381519935264, "grad_norm": 0.354090550441332, "learning_rate": 3.450924280593727e-06, "loss": 0.4144, "step": 17590 }, { "epoch": 2.895502736212643, "grad_norm": 0.3311840660126875, "learning_rate": 3.4504671746814054e-06, "loss": 0.4328, "step": 17591 }, { "epoch": 2.8956673204317593, "grad_norm": 0.3337723131257527, "learning_rate": 3.450010080221166e-06, "loss": 0.4356, "step": 17592 }, { "epoch": 2.895831904650876, "grad_norm": 0.3563506569008164, "learning_rate": 3.449552997217999e-06, "loss": 0.4425, "step": 17593 }, { "epoch": 2.8959964888699923, "grad_norm": 0.29006337535950466, "learning_rate": 3.4490959256768887e-06, "loss": 0.4301, "step": 17594 }, { "epoch": 2.8961610730891088, "grad_norm": 0.4306552211887624, "learning_rate": 3.448638865602825e-06, "loss": 0.4371, "step": 17595 }, { "epoch": 2.896325657308225, "grad_norm": 0.37960210912353, "learning_rate": 3.4481818170007944e-06, "loss": 0.4201, "step": 17596 }, { "epoch": 2.8964902415273417, "grad_norm": 0.2869460692042597, "learning_rate": 3.4477247798757857e-06, "loss": 0.4372, "step": 17597 }, { "epoch": 2.896654825746458, "grad_norm": 0.332782372252181, "learning_rate": 3.447267754232781e-06, "loss": 0.4449, "step": 17598 }, { "epoch": 2.8968194099655746, "grad_norm": 0.41277560059229834, "learning_rate": 3.446810740076774e-06, "loss": 0.436, "step": 17599 }, { "epoch": 2.8969839941846907, "grad_norm": 0.3226705898444896, "learning_rate": 3.446353737412746e-06, "loss": 0.4327, "step": 17600 }, { "epoch": 2.897148578403807, "grad_norm": 0.3131374708035312, "learning_rate": 3.445896746245685e-06, "loss": 0.4289, "step": 17601 }, { "epoch": 2.8973131626229236, "grad_norm": 0.38371779238723275, "learning_rate": 3.4454397665805785e-06, "loss": 0.4148, "step": 17602 }, { "epoch": 2.89747774684204, "grad_norm": 0.3992085554809812, "learning_rate": 3.444982798422412e-06, "loss": 0.4241, "step": 17603 }, { "epoch": 2.8976423310611565, "grad_norm": 0.27723506544946885, "learning_rate": 3.4445258417761734e-06, "loss": 0.4323, "step": 17604 }, { "epoch": 2.897806915280273, "grad_norm": 0.47666218029781204, "learning_rate": 3.444068896646846e-06, "loss": 0.4318, "step": 17605 }, { "epoch": 2.8979714994993895, "grad_norm": 0.30510120682304553, "learning_rate": 3.4436119630394186e-06, "loss": 0.4209, "step": 17606 }, { "epoch": 2.898136083718506, "grad_norm": 0.4314918849748407, "learning_rate": 3.4431550409588767e-06, "loss": 0.4047, "step": 17607 }, { "epoch": 2.8983006679376224, "grad_norm": 0.35357462801580614, "learning_rate": 3.442698130410203e-06, "loss": 0.4384, "step": 17608 }, { "epoch": 2.898465252156739, "grad_norm": 0.30285283816494646, "learning_rate": 3.4422412313983867e-06, "loss": 0.4402, "step": 17609 }, { "epoch": 2.8986298363758554, "grad_norm": 0.36947669740277606, "learning_rate": 3.441784343928411e-06, "loss": 0.4324, "step": 17610 }, { "epoch": 2.898794420594972, "grad_norm": 0.3775632311292628, "learning_rate": 3.441327468005263e-06, "loss": 0.4165, "step": 17611 }, { "epoch": 2.8989590048140883, "grad_norm": 0.34658287083305633, "learning_rate": 3.4408706036339264e-06, "loss": 0.4354, "step": 17612 }, { "epoch": 2.8991235890332048, "grad_norm": 0.35313998235139205, "learning_rate": 3.440413750819388e-06, "loss": 0.4429, "step": 17613 }, { "epoch": 2.8992881732523212, "grad_norm": 0.2952482822347777, "learning_rate": 3.439956909566632e-06, "loss": 0.4444, "step": 17614 }, { "epoch": 2.8994527574714377, "grad_norm": 0.42786781905398574, "learning_rate": 3.4395000798806418e-06, "loss": 0.4234, "step": 17615 }, { "epoch": 2.899617341690554, "grad_norm": 0.3311169314032787, "learning_rate": 3.439043261766405e-06, "loss": 0.416, "step": 17616 }, { "epoch": 2.8997819259096707, "grad_norm": 0.33214432687721374, "learning_rate": 3.4385864552289023e-06, "loss": 0.4295, "step": 17617 }, { "epoch": 2.899946510128787, "grad_norm": 0.469509524189476, "learning_rate": 3.438129660273124e-06, "loss": 0.4452, "step": 17618 }, { "epoch": 2.9001110943479036, "grad_norm": 0.3206636648928445, "learning_rate": 3.4376728769040476e-06, "loss": 0.4426, "step": 17619 }, { "epoch": 2.90027567856702, "grad_norm": 0.37333838540874276, "learning_rate": 3.437216105126664e-06, "loss": 0.4281, "step": 17620 }, { "epoch": 2.9004402627861365, "grad_norm": 0.340705525024164, "learning_rate": 3.4367593449459526e-06, "loss": 0.4219, "step": 17621 }, { "epoch": 2.900604847005253, "grad_norm": 0.3381100580160605, "learning_rate": 3.4363025963668984e-06, "loss": 0.4294, "step": 17622 }, { "epoch": 2.9007694312243695, "grad_norm": 0.28531483631558896, "learning_rate": 3.4358458593944868e-06, "loss": 0.4275, "step": 17623 }, { "epoch": 2.900934015443486, "grad_norm": 0.27056745382096664, "learning_rate": 3.4353891340336987e-06, "loss": 0.4127, "step": 17624 }, { "epoch": 2.9010985996626024, "grad_norm": 0.29242580050975925, "learning_rate": 3.434932420289521e-06, "loss": 0.4216, "step": 17625 }, { "epoch": 2.901263183881719, "grad_norm": 0.39910999046507917, "learning_rate": 3.434475718166935e-06, "loss": 0.4253, "step": 17626 }, { "epoch": 2.9014277681008354, "grad_norm": 0.32521720295621315, "learning_rate": 3.4340190276709265e-06, "loss": 0.4128, "step": 17627 }, { "epoch": 2.901592352319952, "grad_norm": 0.36843049245490517, "learning_rate": 3.433562348806475e-06, "loss": 0.4299, "step": 17628 }, { "epoch": 2.9017569365390683, "grad_norm": 0.614088766386188, "learning_rate": 3.4331056815785662e-06, "loss": 0.4194, "step": 17629 }, { "epoch": 2.9019215207581848, "grad_norm": 0.34208998169652544, "learning_rate": 3.432649025992183e-06, "loss": 0.4416, "step": 17630 }, { "epoch": 2.9020861049773012, "grad_norm": 0.41863096666227234, "learning_rate": 3.4321923820523056e-06, "loss": 0.4309, "step": 17631 }, { "epoch": 2.9022506891964177, "grad_norm": 0.3387098617077992, "learning_rate": 3.4317357497639205e-06, "loss": 0.4542, "step": 17632 }, { "epoch": 2.902415273415534, "grad_norm": 0.3880538504682831, "learning_rate": 3.431279129132007e-06, "loss": 0.4242, "step": 17633 }, { "epoch": 2.9025798576346507, "grad_norm": 0.29517302631006787, "learning_rate": 3.430822520161551e-06, "loss": 0.4431, "step": 17634 }, { "epoch": 2.902744441853767, "grad_norm": 0.3217759709408525, "learning_rate": 3.4303659228575337e-06, "loss": 0.4526, "step": 17635 }, { "epoch": 2.9029090260728836, "grad_norm": 0.5737184089042462, "learning_rate": 3.4299093372249333e-06, "loss": 0.4197, "step": 17636 }, { "epoch": 2.903073610292, "grad_norm": 0.38688542526269387, "learning_rate": 3.429452763268738e-06, "loss": 0.4194, "step": 17637 }, { "epoch": 2.9032381945111165, "grad_norm": 0.3002994499825206, "learning_rate": 3.4289962009939244e-06, "loss": 0.4142, "step": 17638 }, { "epoch": 2.903402778730233, "grad_norm": 0.35474702447463446, "learning_rate": 3.4285396504054792e-06, "loss": 0.4275, "step": 17639 }, { "epoch": 2.9035673629493495, "grad_norm": 0.2754523365816297, "learning_rate": 3.42808311150838e-06, "loss": 0.4247, "step": 17640 }, { "epoch": 2.9037319471684655, "grad_norm": 0.30124553560901396, "learning_rate": 3.4276265843076113e-06, "loss": 0.4152, "step": 17641 }, { "epoch": 2.903896531387582, "grad_norm": 0.4355278989267508, "learning_rate": 3.4271700688081523e-06, "loss": 0.4205, "step": 17642 }, { "epoch": 2.9040611156066984, "grad_norm": 0.2938263585504233, "learning_rate": 3.4267135650149863e-06, "loss": 0.4264, "step": 17643 }, { "epoch": 2.904225699825815, "grad_norm": 0.30681761907619126, "learning_rate": 3.4262570729330934e-06, "loss": 0.4337, "step": 17644 }, { "epoch": 2.9043902840449314, "grad_norm": 0.387173245592773, "learning_rate": 3.425800592567454e-06, "loss": 0.4488, "step": 17645 }, { "epoch": 2.904554868264048, "grad_norm": 0.3232799699547336, "learning_rate": 3.42534412392305e-06, "loss": 0.4482, "step": 17646 }, { "epoch": 2.9047194524831643, "grad_norm": 0.27505508379676474, "learning_rate": 3.4248876670048623e-06, "loss": 0.4503, "step": 17647 }, { "epoch": 2.904884036702281, "grad_norm": 0.3042903382131154, "learning_rate": 3.424431221817872e-06, "loss": 0.4219, "step": 17648 }, { "epoch": 2.9050486209213973, "grad_norm": 0.40167340114884303, "learning_rate": 3.423974788367057e-06, "loss": 0.4319, "step": 17649 }, { "epoch": 2.9052132051405137, "grad_norm": 0.40027143285984046, "learning_rate": 3.423518366657402e-06, "loss": 0.4273, "step": 17650 }, { "epoch": 2.90537778935963, "grad_norm": 0.38083677010822076, "learning_rate": 3.4230619566938843e-06, "loss": 0.4543, "step": 17651 }, { "epoch": 2.9055423735787467, "grad_norm": 0.3445738566986874, "learning_rate": 3.4226055584814837e-06, "loss": 0.4232, "step": 17652 }, { "epoch": 2.905706957797863, "grad_norm": 0.2830924066386983, "learning_rate": 3.4221491720251826e-06, "loss": 0.4173, "step": 17653 }, { "epoch": 2.9058715420169796, "grad_norm": 0.4947656362937295, "learning_rate": 3.4216927973299577e-06, "loss": 0.4471, "step": 17654 }, { "epoch": 2.906036126236096, "grad_norm": 0.2769119444352562, "learning_rate": 3.4212364344007925e-06, "loss": 0.4312, "step": 17655 }, { "epoch": 2.9062007104552126, "grad_norm": 0.6499934734448956, "learning_rate": 3.420780083242663e-06, "loss": 0.4367, "step": 17656 }, { "epoch": 2.906365294674329, "grad_norm": 0.370733265460432, "learning_rate": 3.4203237438605524e-06, "loss": 0.431, "step": 17657 }, { "epoch": 2.9065298788934455, "grad_norm": 0.3411392926466954, "learning_rate": 3.4198674162594386e-06, "loss": 0.4339, "step": 17658 }, { "epoch": 2.906694463112562, "grad_norm": 0.39875733082792414, "learning_rate": 3.4194111004442983e-06, "loss": 0.4408, "step": 17659 }, { "epoch": 2.9068590473316784, "grad_norm": 0.39200771850558425, "learning_rate": 3.4189547964201145e-06, "loss": 0.428, "step": 17660 }, { "epoch": 2.907023631550795, "grad_norm": 0.2871409612914079, "learning_rate": 3.4184985041918627e-06, "loss": 0.42, "step": 17661 }, { "epoch": 2.9071882157699114, "grad_norm": 0.3061717105592732, "learning_rate": 3.4180422237645253e-06, "loss": 0.4212, "step": 17662 }, { "epoch": 2.907352799989028, "grad_norm": 0.2843559900515243, "learning_rate": 3.417585955143078e-06, "loss": 0.4528, "step": 17663 }, { "epoch": 2.9075173842081443, "grad_norm": 0.3243487704921083, "learning_rate": 3.417129698332502e-06, "loss": 0.4382, "step": 17664 }, { "epoch": 2.907681968427261, "grad_norm": 0.2980024565274041, "learning_rate": 3.4166734533377747e-06, "loss": 0.4441, "step": 17665 }, { "epoch": 2.907846552646377, "grad_norm": 0.34459119960972434, "learning_rate": 3.416217220163873e-06, "loss": 0.4292, "step": 17666 }, { "epoch": 2.9080111368654933, "grad_norm": 1.0791551102000445, "learning_rate": 3.4157609988157784e-06, "loss": 0.4346, "step": 17667 }, { "epoch": 2.9081757210846098, "grad_norm": 0.6016048549628633, "learning_rate": 3.415304789298464e-06, "loss": 0.4227, "step": 17668 }, { "epoch": 2.9083403053037262, "grad_norm": 0.43372572493792233, "learning_rate": 3.414848591616914e-06, "loss": 0.4334, "step": 17669 }, { "epoch": 2.9085048895228427, "grad_norm": 0.3356910552899443, "learning_rate": 3.4143924057761e-06, "loss": 0.4142, "step": 17670 }, { "epoch": 2.908669473741959, "grad_norm": 0.3306805036575712, "learning_rate": 3.413936231781006e-06, "loss": 0.4217, "step": 17671 }, { "epoch": 2.9088340579610756, "grad_norm": 0.30701960353201796, "learning_rate": 3.4134800696366054e-06, "loss": 0.4342, "step": 17672 }, { "epoch": 2.908998642180192, "grad_norm": 0.29094351464733453, "learning_rate": 3.4130239193478754e-06, "loss": 0.4628, "step": 17673 }, { "epoch": 2.9091632263993086, "grad_norm": 0.3301904863774064, "learning_rate": 3.412567780919796e-06, "loss": 0.4084, "step": 17674 }, { "epoch": 2.909327810618425, "grad_norm": 0.3566565100217812, "learning_rate": 3.4121116543573416e-06, "loss": 0.4111, "step": 17675 }, { "epoch": 2.9094923948375415, "grad_norm": 0.266684336790093, "learning_rate": 3.411655539665492e-06, "loss": 0.4072, "step": 17676 }, { "epoch": 2.909656979056658, "grad_norm": 0.29303718820543445, "learning_rate": 3.411199436849221e-06, "loss": 0.4292, "step": 17677 }, { "epoch": 2.9098215632757745, "grad_norm": 0.32859176300301063, "learning_rate": 3.4107433459135096e-06, "loss": 0.422, "step": 17678 }, { "epoch": 2.909986147494891, "grad_norm": 0.302610520053293, "learning_rate": 3.4102872668633315e-06, "loss": 0.4267, "step": 17679 }, { "epoch": 2.9101507317140074, "grad_norm": 0.3279636996325862, "learning_rate": 3.409831199703662e-06, "loss": 0.431, "step": 17680 }, { "epoch": 2.910315315933124, "grad_norm": 0.33009465835008517, "learning_rate": 3.4093751444394813e-06, "loss": 0.4268, "step": 17681 }, { "epoch": 2.9104799001522403, "grad_norm": 0.366280287216141, "learning_rate": 3.4089191010757623e-06, "loss": 0.4109, "step": 17682 }, { "epoch": 2.910644484371357, "grad_norm": 0.39484965699830255, "learning_rate": 3.408463069617484e-06, "loss": 0.4315, "step": 17683 }, { "epoch": 2.9108090685904733, "grad_norm": 0.3019101335583522, "learning_rate": 3.4080070500696195e-06, "loss": 0.4227, "step": 17684 }, { "epoch": 2.9109736528095898, "grad_norm": 0.4397943867074195, "learning_rate": 3.4075510424371476e-06, "loss": 0.4305, "step": 17685 }, { "epoch": 2.911138237028706, "grad_norm": 0.3450226086647439, "learning_rate": 3.407095046725042e-06, "loss": 0.4217, "step": 17686 }, { "epoch": 2.9113028212478227, "grad_norm": 0.5353065765995956, "learning_rate": 3.4066390629382793e-06, "loss": 0.4498, "step": 17687 }, { "epoch": 2.911467405466939, "grad_norm": 0.37676124154671603, "learning_rate": 3.406183091081836e-06, "loss": 0.4175, "step": 17688 }, { "epoch": 2.9116319896860556, "grad_norm": 0.37203745809563854, "learning_rate": 3.405727131160684e-06, "loss": 0.4295, "step": 17689 }, { "epoch": 2.911796573905172, "grad_norm": 0.34226933420973427, "learning_rate": 3.405271183179803e-06, "loss": 0.4425, "step": 17690 }, { "epoch": 2.9119611581242886, "grad_norm": 0.38383361264212845, "learning_rate": 3.404815247144164e-06, "loss": 0.43, "step": 17691 }, { "epoch": 2.912125742343405, "grad_norm": 0.2692537578894957, "learning_rate": 3.4043593230587454e-06, "loss": 0.4179, "step": 17692 }, { "epoch": 2.9122903265625215, "grad_norm": 0.38348850251716265, "learning_rate": 3.4039034109285192e-06, "loss": 0.4272, "step": 17693 }, { "epoch": 2.912454910781638, "grad_norm": 0.2814115397311186, "learning_rate": 3.4034475107584626e-06, "loss": 0.4167, "step": 17694 }, { "epoch": 2.9126194950007545, "grad_norm": 0.3396851526485684, "learning_rate": 3.4029916225535498e-06, "loss": 0.4293, "step": 17695 }, { "epoch": 2.912784079219871, "grad_norm": 0.3057095780168015, "learning_rate": 3.4025357463187524e-06, "loss": 0.4347, "step": 17696 }, { "epoch": 2.9129486634389874, "grad_norm": 0.5856334016361633, "learning_rate": 3.4020798820590497e-06, "loss": 0.4328, "step": 17697 }, { "epoch": 2.913113247658104, "grad_norm": 0.3165345726469618, "learning_rate": 3.4016240297794105e-06, "loss": 0.4376, "step": 17698 }, { "epoch": 2.9132778318772203, "grad_norm": 0.26790571564430643, "learning_rate": 3.401168189484814e-06, "loss": 0.4139, "step": 17699 }, { "epoch": 2.913442416096337, "grad_norm": 0.35081641582732853, "learning_rate": 3.4007123611802284e-06, "loss": 0.4417, "step": 17700 }, { "epoch": 2.9136070003154533, "grad_norm": 0.4507318909432953, "learning_rate": 3.400256544870634e-06, "loss": 0.4251, "step": 17701 }, { "epoch": 2.9137715845345697, "grad_norm": 0.29400161745694303, "learning_rate": 3.3998007405610003e-06, "loss": 0.446, "step": 17702 }, { "epoch": 2.913936168753686, "grad_norm": 0.3430924489995087, "learning_rate": 3.399344948256301e-06, "loss": 0.426, "step": 17703 }, { "epoch": 2.9141007529728027, "grad_norm": 0.3507783651897055, "learning_rate": 3.398889167961511e-06, "loss": 0.425, "step": 17704 }, { "epoch": 2.914265337191919, "grad_norm": 0.43556437810017934, "learning_rate": 3.3984333996816024e-06, "loss": 0.4478, "step": 17705 }, { "epoch": 2.9144299214110356, "grad_norm": 0.3252357809987994, "learning_rate": 3.39797764342155e-06, "loss": 0.4286, "step": 17706 }, { "epoch": 2.914594505630152, "grad_norm": 0.3040779954874744, "learning_rate": 3.397521899186324e-06, "loss": 0.4188, "step": 17707 }, { "epoch": 2.914759089849268, "grad_norm": 0.34531608497152533, "learning_rate": 3.3970661669809005e-06, "loss": 0.4204, "step": 17708 }, { "epoch": 2.9149236740683846, "grad_norm": 0.38340548767782345, "learning_rate": 3.396610446810252e-06, "loss": 0.4526, "step": 17709 }, { "epoch": 2.915088258287501, "grad_norm": 0.31692373966980675, "learning_rate": 3.3961547386793476e-06, "loss": 0.4431, "step": 17710 }, { "epoch": 2.9152528425066175, "grad_norm": 0.2927901325514057, "learning_rate": 3.3956990425931637e-06, "loss": 0.4157, "step": 17711 }, { "epoch": 2.915417426725734, "grad_norm": 0.3381982550939023, "learning_rate": 3.3952433585566697e-06, "loss": 0.4346, "step": 17712 }, { "epoch": 2.9155820109448505, "grad_norm": 0.3908240460861359, "learning_rate": 3.39478768657484e-06, "loss": 0.4323, "step": 17713 }, { "epoch": 2.915746595163967, "grad_norm": 0.35719831437371163, "learning_rate": 3.394332026652645e-06, "loss": 0.4052, "step": 17714 }, { "epoch": 2.9159111793830834, "grad_norm": 0.30052725251285, "learning_rate": 3.393876378795059e-06, "loss": 0.4373, "step": 17715 }, { "epoch": 2.9160757636022, "grad_norm": 0.3120089814016768, "learning_rate": 3.393420743007053e-06, "loss": 0.433, "step": 17716 }, { "epoch": 2.9162403478213164, "grad_norm": 0.48157706619602825, "learning_rate": 3.3929651192935958e-06, "loss": 0.4404, "step": 17717 }, { "epoch": 2.916404932040433, "grad_norm": 0.34109414817190065, "learning_rate": 3.3925095076596646e-06, "loss": 0.4327, "step": 17718 }, { "epoch": 2.9165695162595493, "grad_norm": 0.293311088739958, "learning_rate": 3.392053908110224e-06, "loss": 0.4034, "step": 17719 }, { "epoch": 2.9167341004786658, "grad_norm": 0.3200744243515672, "learning_rate": 3.391598320650252e-06, "loss": 0.4451, "step": 17720 }, { "epoch": 2.9168986846977822, "grad_norm": 0.2798542360586772, "learning_rate": 3.3911427452847153e-06, "loss": 0.4271, "step": 17721 }, { "epoch": 2.9170632689168987, "grad_norm": 0.4470121931155287, "learning_rate": 3.390687182018587e-06, "loss": 0.441, "step": 17722 }, { "epoch": 2.917227853136015, "grad_norm": 0.3077008837417923, "learning_rate": 3.3902316308568364e-06, "loss": 0.4371, "step": 17723 }, { "epoch": 2.9173924373551317, "grad_norm": 0.3355025362900558, "learning_rate": 3.389776091804437e-06, "loss": 0.4223, "step": 17724 }, { "epoch": 2.917557021574248, "grad_norm": 0.30686200898920396, "learning_rate": 3.389320564866357e-06, "loss": 0.4231, "step": 17725 }, { "epoch": 2.9177216057933646, "grad_norm": 0.368801143123311, "learning_rate": 3.388865050047567e-06, "loss": 0.4262, "step": 17726 }, { "epoch": 2.917886190012481, "grad_norm": 0.4914583171959461, "learning_rate": 3.388409547353039e-06, "loss": 0.4434, "step": 17727 }, { "epoch": 2.9180507742315975, "grad_norm": 0.3669387457957941, "learning_rate": 3.3879540567877417e-06, "loss": 0.4469, "step": 17728 }, { "epoch": 2.918215358450714, "grad_norm": 0.2725498437245764, "learning_rate": 3.387498578356648e-06, "loss": 0.4399, "step": 17729 }, { "epoch": 2.9183799426698305, "grad_norm": 0.3033736328383057, "learning_rate": 3.3870431120647223e-06, "loss": 0.4277, "step": 17730 }, { "epoch": 2.918544526888947, "grad_norm": 0.3208317703581786, "learning_rate": 3.386587657916941e-06, "loss": 0.4261, "step": 17731 }, { "epoch": 2.9187091111080634, "grad_norm": 0.3267134398822339, "learning_rate": 3.38613221591827e-06, "loss": 0.4305, "step": 17732 }, { "epoch": 2.9188736953271794, "grad_norm": 0.35091926586516287, "learning_rate": 3.385676786073679e-06, "loss": 0.4268, "step": 17733 }, { "epoch": 2.919038279546296, "grad_norm": 0.40270745330681973, "learning_rate": 3.385221368388139e-06, "loss": 0.4418, "step": 17734 }, { "epoch": 2.9192028637654124, "grad_norm": 0.511617782510701, "learning_rate": 3.3847659628666174e-06, "loss": 0.4323, "step": 17735 }, { "epoch": 2.919367447984529, "grad_norm": 0.3435292141125187, "learning_rate": 3.3843105695140855e-06, "loss": 0.4325, "step": 17736 }, { "epoch": 2.9195320322036453, "grad_norm": 0.726359205620102, "learning_rate": 3.38385518833551e-06, "loss": 0.4008, "step": 17737 }, { "epoch": 2.919696616422762, "grad_norm": 0.49802888669458983, "learning_rate": 3.3833998193358633e-06, "loss": 0.4347, "step": 17738 }, { "epoch": 2.9198612006418783, "grad_norm": 0.30764054482203373, "learning_rate": 3.3829444625201125e-06, "loss": 0.4294, "step": 17739 }, { "epoch": 2.9200257848609947, "grad_norm": 0.3067873725877841, "learning_rate": 3.3824891178932242e-06, "loss": 0.4325, "step": 17740 }, { "epoch": 2.920190369080111, "grad_norm": 0.32780206845639454, "learning_rate": 3.382033785460169e-06, "loss": 0.4433, "step": 17741 }, { "epoch": 2.9203549532992277, "grad_norm": 0.4208820936167372, "learning_rate": 3.3815784652259143e-06, "loss": 0.4156, "step": 17742 }, { "epoch": 2.920519537518344, "grad_norm": 0.34487559592914324, "learning_rate": 3.38112315719543e-06, "loss": 0.4259, "step": 17743 }, { "epoch": 2.9206841217374606, "grad_norm": 0.33124493288463996, "learning_rate": 3.380667861373683e-06, "loss": 0.4227, "step": 17744 }, { "epoch": 2.920848705956577, "grad_norm": 0.5375485315494445, "learning_rate": 3.3802125777656416e-06, "loss": 0.4336, "step": 17745 }, { "epoch": 2.9210132901756936, "grad_norm": 0.3042149214419165, "learning_rate": 3.379757306376274e-06, "loss": 0.4223, "step": 17746 }, { "epoch": 2.92117787439481, "grad_norm": 0.4076199164068452, "learning_rate": 3.3793020472105464e-06, "loss": 0.4299, "step": 17747 }, { "epoch": 2.9213424586139265, "grad_norm": 0.2753391754673677, "learning_rate": 3.3788468002734295e-06, "loss": 0.4278, "step": 17748 }, { "epoch": 2.921507042833043, "grad_norm": 0.42015172301237264, "learning_rate": 3.3783915655698867e-06, "loss": 0.453, "step": 17749 }, { "epoch": 2.9216716270521594, "grad_norm": 0.3835463068552829, "learning_rate": 3.3779363431048895e-06, "loss": 0.4461, "step": 17750 }, { "epoch": 2.921836211271276, "grad_norm": 0.3857933309307179, "learning_rate": 3.3774811328834003e-06, "loss": 0.4282, "step": 17751 }, { "epoch": 2.9220007954903924, "grad_norm": 0.4795193717887788, "learning_rate": 3.3770259349103926e-06, "loss": 0.4299, "step": 17752 }, { "epoch": 2.922165379709509, "grad_norm": 0.3049139503695512, "learning_rate": 3.376570749190828e-06, "loss": 0.4354, "step": 17753 }, { "epoch": 2.9223299639286253, "grad_norm": 0.3519921407722308, "learning_rate": 3.3761155757296744e-06, "loss": 0.4414, "step": 17754 }, { "epoch": 2.922494548147742, "grad_norm": 0.27790101218221924, "learning_rate": 3.3756604145319e-06, "loss": 0.4282, "step": 17755 }, { "epoch": 2.9226591323668583, "grad_norm": 0.44352435070616175, "learning_rate": 3.3752052656024694e-06, "loss": 0.4174, "step": 17756 }, { "epoch": 2.9228237165859747, "grad_norm": 0.7634773223040751, "learning_rate": 3.374750128946351e-06, "loss": 0.4345, "step": 17757 }, { "epoch": 2.922988300805091, "grad_norm": 0.2984704720453994, "learning_rate": 3.3742950045685096e-06, "loss": 0.4211, "step": 17758 }, { "epoch": 2.9231528850242077, "grad_norm": 0.2873456683663334, "learning_rate": 3.3738398924739133e-06, "loss": 0.4269, "step": 17759 }, { "epoch": 2.923317469243324, "grad_norm": 0.2681318304864954, "learning_rate": 3.373384792667526e-06, "loss": 0.4168, "step": 17760 }, { "epoch": 2.9234820534624406, "grad_norm": 0.31182455738571124, "learning_rate": 3.3729297051543125e-06, "loss": 0.4427, "step": 17761 }, { "epoch": 2.923646637681557, "grad_norm": 0.38556683725634705, "learning_rate": 3.3724746299392423e-06, "loss": 0.4332, "step": 17762 }, { "epoch": 2.9238112219006736, "grad_norm": 0.3109254497590433, "learning_rate": 3.3720195670272774e-06, "loss": 0.4176, "step": 17763 }, { "epoch": 2.92397580611979, "grad_norm": 0.41052865934839455, "learning_rate": 3.3715645164233857e-06, "loss": 0.4197, "step": 17764 }, { "epoch": 2.9241403903389065, "grad_norm": 0.47507018610702884, "learning_rate": 3.3711094781325302e-06, "loss": 0.4279, "step": 17765 }, { "epoch": 2.924304974558023, "grad_norm": 0.3087772347515225, "learning_rate": 3.3706544521596794e-06, "loss": 0.4373, "step": 17766 }, { "epoch": 2.9244695587771394, "grad_norm": 0.5240269329553043, "learning_rate": 3.3701994385097955e-06, "loss": 0.4133, "step": 17767 }, { "epoch": 2.924634142996256, "grad_norm": 0.3167803647593282, "learning_rate": 3.3697444371878446e-06, "loss": 0.4282, "step": 17768 }, { "epoch": 2.9247987272153724, "grad_norm": 0.32589636198064426, "learning_rate": 3.369289448198793e-06, "loss": 0.4122, "step": 17769 }, { "epoch": 2.924963311434489, "grad_norm": 0.436712996183922, "learning_rate": 3.368834471547601e-06, "loss": 0.4315, "step": 17770 }, { "epoch": 2.9251278956536053, "grad_norm": 0.36515264087424926, "learning_rate": 3.3683795072392378e-06, "loss": 0.4334, "step": 17771 }, { "epoch": 2.925292479872722, "grad_norm": 0.33728393279336444, "learning_rate": 3.3679245552786643e-06, "loss": 0.441, "step": 17772 }, { "epoch": 2.9254570640918383, "grad_norm": 0.3032828860887325, "learning_rate": 3.367469615670847e-06, "loss": 0.4386, "step": 17773 }, { "epoch": 2.9256216483109547, "grad_norm": 0.37763499166331943, "learning_rate": 3.367014688420749e-06, "loss": 0.4214, "step": 17774 }, { "epoch": 2.9257862325300708, "grad_norm": 0.2951330033560652, "learning_rate": 3.366559773533335e-06, "loss": 0.4445, "step": 17775 }, { "epoch": 2.9259508167491872, "grad_norm": 0.30043150877186625, "learning_rate": 3.366104871013568e-06, "loss": 0.4342, "step": 17776 }, { "epoch": 2.9261154009683037, "grad_norm": 0.40274738954194816, "learning_rate": 3.3656499808664123e-06, "loss": 0.4189, "step": 17777 }, { "epoch": 2.92627998518742, "grad_norm": 0.3009201296585576, "learning_rate": 3.365195103096832e-06, "loss": 0.4267, "step": 17778 }, { "epoch": 2.9264445694065366, "grad_norm": 0.2916867542924242, "learning_rate": 3.3647402377097877e-06, "loss": 0.4137, "step": 17779 }, { "epoch": 2.926609153625653, "grad_norm": 0.259953729653548, "learning_rate": 3.364285384710247e-06, "loss": 0.4436, "step": 17780 }, { "epoch": 2.9267737378447696, "grad_norm": 0.24759841138821312, "learning_rate": 3.3638305441031683e-06, "loss": 0.4248, "step": 17781 }, { "epoch": 2.926938322063886, "grad_norm": 0.4603191646651383, "learning_rate": 3.36337571589352e-06, "loss": 0.4267, "step": 17782 }, { "epoch": 2.9271029062830025, "grad_norm": 0.33841942087743543, "learning_rate": 3.3629209000862623e-06, "loss": 0.427, "step": 17783 }, { "epoch": 2.927267490502119, "grad_norm": 0.2910413296679468, "learning_rate": 3.362466096686356e-06, "loss": 0.4187, "step": 17784 }, { "epoch": 2.9274320747212355, "grad_norm": 0.3569167002034256, "learning_rate": 3.362011305698767e-06, "loss": 0.4152, "step": 17785 }, { "epoch": 2.927596658940352, "grad_norm": 0.30999334016630953, "learning_rate": 3.3615565271284556e-06, "loss": 0.4454, "step": 17786 }, { "epoch": 2.9277612431594684, "grad_norm": 0.30668375560823136, "learning_rate": 3.3611017609803855e-06, "loss": 0.4255, "step": 17787 }, { "epoch": 2.927925827378585, "grad_norm": 0.3213329643906008, "learning_rate": 3.360647007259518e-06, "loss": 0.4311, "step": 17788 }, { "epoch": 2.9280904115977013, "grad_norm": 0.39838423121582006, "learning_rate": 3.3601922659708165e-06, "loss": 0.4196, "step": 17789 }, { "epoch": 2.928254995816818, "grad_norm": 0.4026760553620033, "learning_rate": 3.3597375371192427e-06, "loss": 0.445, "step": 17790 }, { "epoch": 2.9284195800359343, "grad_norm": 0.29864249003243665, "learning_rate": 3.359282820709756e-06, "loss": 0.4314, "step": 17791 }, { "epoch": 2.9285841642550507, "grad_norm": 0.3050868808958821, "learning_rate": 3.3588281167473213e-06, "loss": 0.436, "step": 17792 }, { "epoch": 2.928748748474167, "grad_norm": 0.35953047262041005, "learning_rate": 3.3583734252368973e-06, "loss": 0.4484, "step": 17793 }, { "epoch": 2.9289133326932837, "grad_norm": 0.3837045335016808, "learning_rate": 3.3579187461834484e-06, "loss": 0.4034, "step": 17794 }, { "epoch": 2.9290779169124, "grad_norm": 0.280143497290015, "learning_rate": 3.3574640795919327e-06, "loss": 0.4285, "step": 17795 }, { "epoch": 2.9292425011315166, "grad_norm": 0.3197785437707039, "learning_rate": 3.357009425467314e-06, "loss": 0.433, "step": 17796 }, { "epoch": 2.929407085350633, "grad_norm": 0.32012188869001085, "learning_rate": 3.356554783814553e-06, "loss": 0.4198, "step": 17797 }, { "epoch": 2.9295716695697496, "grad_norm": 0.3085764226696516, "learning_rate": 3.356100154638608e-06, "loss": 0.4484, "step": 17798 }, { "epoch": 2.929736253788866, "grad_norm": 0.3337658242361266, "learning_rate": 3.3556455379444443e-06, "loss": 0.43, "step": 17799 }, { "epoch": 2.929900838007982, "grad_norm": 0.3262390653566618, "learning_rate": 3.3551909337370166e-06, "loss": 0.4364, "step": 17800 }, { "epoch": 2.9300654222270985, "grad_norm": 0.2862178416575866, "learning_rate": 3.354736342021291e-06, "loss": 0.4307, "step": 17801 }, { "epoch": 2.930230006446215, "grad_norm": 0.28962583779258144, "learning_rate": 3.354281762802224e-06, "loss": 0.4298, "step": 17802 }, { "epoch": 2.9303945906653315, "grad_norm": 0.3390162700678234, "learning_rate": 3.3538271960847778e-06, "loss": 0.422, "step": 17803 }, { "epoch": 2.930559174884448, "grad_norm": 0.3559782541224994, "learning_rate": 3.353372641873912e-06, "loss": 0.4394, "step": 17804 }, { "epoch": 2.9307237591035644, "grad_norm": 0.33601042460541813, "learning_rate": 3.3529181001745855e-06, "loss": 0.4316, "step": 17805 }, { "epoch": 2.930888343322681, "grad_norm": 0.29696711292317896, "learning_rate": 3.352463570991759e-06, "loss": 0.428, "step": 17806 }, { "epoch": 2.9310529275417974, "grad_norm": 0.3066111993945832, "learning_rate": 3.352009054330392e-06, "loss": 0.4359, "step": 17807 }, { "epoch": 2.931217511760914, "grad_norm": 0.29821728742624687, "learning_rate": 3.3515545501954443e-06, "loss": 0.4345, "step": 17808 }, { "epoch": 2.9313820959800303, "grad_norm": 0.32587962170500345, "learning_rate": 3.351100058591875e-06, "loss": 0.4319, "step": 17809 }, { "epoch": 2.9315466801991468, "grad_norm": 0.41296420067737905, "learning_rate": 3.350645579524644e-06, "loss": 0.4125, "step": 17810 }, { "epoch": 2.9317112644182632, "grad_norm": 0.40766325984073193, "learning_rate": 3.350191112998708e-06, "loss": 0.4543, "step": 17811 }, { "epoch": 2.9318758486373797, "grad_norm": 0.34426257609135424, "learning_rate": 3.3497366590190294e-06, "loss": 0.4411, "step": 17812 }, { "epoch": 2.932040432856496, "grad_norm": 0.35398131134009875, "learning_rate": 3.349282217590566e-06, "loss": 0.4278, "step": 17813 }, { "epoch": 2.9322050170756127, "grad_norm": 0.260521569928367, "learning_rate": 3.348827788718273e-06, "loss": 0.4142, "step": 17814 }, { "epoch": 2.932369601294729, "grad_norm": 0.31993887898910756, "learning_rate": 3.3483733724071136e-06, "loss": 0.4372, "step": 17815 }, { "epoch": 2.9325341855138456, "grad_norm": 0.30420162588792804, "learning_rate": 3.3479189686620434e-06, "loss": 0.4063, "step": 17816 }, { "epoch": 2.932698769732962, "grad_norm": 0.3112488967412044, "learning_rate": 3.347464577488022e-06, "loss": 0.4084, "step": 17817 }, { "epoch": 2.9328633539520785, "grad_norm": 0.3019402613651006, "learning_rate": 3.347010198890007e-06, "loss": 0.4246, "step": 17818 }, { "epoch": 2.933027938171195, "grad_norm": 0.2904121659476341, "learning_rate": 3.3465558328729574e-06, "loss": 0.4282, "step": 17819 }, { "epoch": 2.9331925223903115, "grad_norm": 0.3680894362455315, "learning_rate": 3.3461014794418306e-06, "loss": 0.4292, "step": 17820 }, { "epoch": 2.933357106609428, "grad_norm": 0.8192649715094241, "learning_rate": 3.3456471386015826e-06, "loss": 0.4157, "step": 17821 }, { "epoch": 2.9335216908285444, "grad_norm": 0.2984160228076957, "learning_rate": 3.345192810357173e-06, "loss": 0.4317, "step": 17822 }, { "epoch": 2.933686275047661, "grad_norm": 0.2975559190054634, "learning_rate": 3.3447384947135584e-06, "loss": 0.4226, "step": 17823 }, { "epoch": 2.9338508592667774, "grad_norm": 0.2975348369606467, "learning_rate": 3.3442841916756962e-06, "loss": 0.4478, "step": 17824 }, { "epoch": 2.934015443485894, "grad_norm": 0.26314404116089213, "learning_rate": 3.3438299012485428e-06, "loss": 0.4198, "step": 17825 }, { "epoch": 2.9341800277050103, "grad_norm": 0.5471598419819609, "learning_rate": 3.3433756234370574e-06, "loss": 0.4293, "step": 17826 }, { "epoch": 2.9343446119241268, "grad_norm": 0.27370109412400817, "learning_rate": 3.342921358246195e-06, "loss": 0.4321, "step": 17827 }, { "epoch": 2.9345091961432432, "grad_norm": 0.3078652883814931, "learning_rate": 3.342467105680913e-06, "loss": 0.4292, "step": 17828 }, { "epoch": 2.9346737803623597, "grad_norm": 0.37786786549615187, "learning_rate": 3.342012865746169e-06, "loss": 0.4358, "step": 17829 }, { "epoch": 2.934838364581476, "grad_norm": 0.2570317047160048, "learning_rate": 3.341558638446916e-06, "loss": 0.4209, "step": 17830 }, { "epoch": 2.9350029488005926, "grad_norm": 0.292775737045322, "learning_rate": 3.3411044237881156e-06, "loss": 0.4218, "step": 17831 }, { "epoch": 2.935167533019709, "grad_norm": 0.3469717961034144, "learning_rate": 3.3406502217747186e-06, "loss": 0.4323, "step": 17832 }, { "epoch": 2.9353321172388256, "grad_norm": 0.2748724519305954, "learning_rate": 3.340196032411686e-06, "loss": 0.4294, "step": 17833 }, { "epoch": 2.935496701457942, "grad_norm": 0.34156871435154457, "learning_rate": 3.339741855703971e-06, "loss": 0.4181, "step": 17834 }, { "epoch": 2.9356612856770585, "grad_norm": 0.35911621554021983, "learning_rate": 3.3392876916565288e-06, "loss": 0.4466, "step": 17835 }, { "epoch": 2.935825869896175, "grad_norm": 0.3112423328817136, "learning_rate": 3.3388335402743165e-06, "loss": 0.4508, "step": 17836 }, { "epoch": 2.9359904541152915, "grad_norm": 0.3101049266574248, "learning_rate": 3.3383794015622886e-06, "loss": 0.4365, "step": 17837 }, { "epoch": 2.936155038334408, "grad_norm": 0.40839422295382893, "learning_rate": 3.337925275525402e-06, "loss": 0.4697, "step": 17838 }, { "epoch": 2.9363196225535244, "grad_norm": 0.3001045748710366, "learning_rate": 3.3374711621686104e-06, "loss": 0.4264, "step": 17839 }, { "epoch": 2.936484206772641, "grad_norm": 0.36535566306293327, "learning_rate": 3.33701706149687e-06, "loss": 0.4172, "step": 17840 }, { "epoch": 2.936648790991757, "grad_norm": 0.3262169272894157, "learning_rate": 3.336562973515136e-06, "loss": 0.4328, "step": 17841 }, { "epoch": 2.9368133752108734, "grad_norm": 0.3334451630590715, "learning_rate": 3.3361088982283614e-06, "loss": 0.4247, "step": 17842 }, { "epoch": 2.93697795942999, "grad_norm": 0.3085853407263294, "learning_rate": 3.3356548356415024e-06, "loss": 0.4309, "step": 17843 }, { "epoch": 2.9371425436491063, "grad_norm": 0.38830741910708716, "learning_rate": 3.335200785759512e-06, "loss": 0.4231, "step": 17844 }, { "epoch": 2.937307127868223, "grad_norm": 0.40923246564507443, "learning_rate": 3.3347467485873472e-06, "loss": 0.426, "step": 17845 }, { "epoch": 2.9374717120873393, "grad_norm": 0.31888854734326744, "learning_rate": 3.3342927241299597e-06, "loss": 0.4291, "step": 17846 }, { "epoch": 2.9376362963064557, "grad_norm": 0.3555392981963307, "learning_rate": 3.333838712392306e-06, "loss": 0.4407, "step": 17847 }, { "epoch": 2.937800880525572, "grad_norm": 0.27002953860896445, "learning_rate": 3.333384713379337e-06, "loss": 0.4179, "step": 17848 }, { "epoch": 2.9379654647446887, "grad_norm": 0.31473988399066993, "learning_rate": 3.3329307270960096e-06, "loss": 0.4227, "step": 17849 }, { "epoch": 2.938130048963805, "grad_norm": 0.29185727361215336, "learning_rate": 3.332476753547278e-06, "loss": 0.4283, "step": 17850 }, { "epoch": 2.9382946331829216, "grad_norm": 0.28049103874445924, "learning_rate": 3.332022792738091e-06, "loss": 0.4286, "step": 17851 }, { "epoch": 2.938459217402038, "grad_norm": 0.30212525955101455, "learning_rate": 3.3315688446734078e-06, "loss": 0.4362, "step": 17852 }, { "epoch": 2.9386238016211546, "grad_norm": 0.2904014180261415, "learning_rate": 3.331114909358177e-06, "loss": 0.4406, "step": 17853 }, { "epoch": 2.938788385840271, "grad_norm": 0.3172390058145223, "learning_rate": 3.330660986797355e-06, "loss": 0.405, "step": 17854 }, { "epoch": 2.9389529700593875, "grad_norm": 0.2798833508296669, "learning_rate": 3.330207076995893e-06, "loss": 0.4175, "step": 17855 }, { "epoch": 2.939117554278504, "grad_norm": 0.3491267780694839, "learning_rate": 3.3297531799587447e-06, "loss": 0.4351, "step": 17856 }, { "epoch": 2.9392821384976204, "grad_norm": 0.2964701733388763, "learning_rate": 3.3292992956908635e-06, "loss": 0.4277, "step": 17857 }, { "epoch": 2.939446722716737, "grad_norm": 0.3005906746028151, "learning_rate": 3.3288454241971997e-06, "loss": 0.4321, "step": 17858 }, { "epoch": 2.9396113069358534, "grad_norm": 0.33130600309368236, "learning_rate": 3.328391565482708e-06, "loss": 0.413, "step": 17859 }, { "epoch": 2.93977589115497, "grad_norm": 0.26528857819174023, "learning_rate": 3.3279377195523392e-06, "loss": 0.4358, "step": 17860 }, { "epoch": 2.9399404753740863, "grad_norm": 0.28999419057896214, "learning_rate": 3.3274838864110487e-06, "loss": 0.4465, "step": 17861 }, { "epoch": 2.940105059593203, "grad_norm": 0.6408457564674434, "learning_rate": 3.3270300660637825e-06, "loss": 0.4379, "step": 17862 }, { "epoch": 2.9402696438123193, "grad_norm": 0.4237045146833924, "learning_rate": 3.3265762585154993e-06, "loss": 0.4219, "step": 17863 }, { "epoch": 2.9404342280314357, "grad_norm": 0.2729461510661597, "learning_rate": 3.3261224637711465e-06, "loss": 0.4524, "step": 17864 }, { "epoch": 2.940598812250552, "grad_norm": 0.3003957467857865, "learning_rate": 3.325668681835676e-06, "loss": 0.424, "step": 17865 }, { "epoch": 2.9407633964696682, "grad_norm": 0.29451358615558454, "learning_rate": 3.3252149127140414e-06, "loss": 0.4476, "step": 17866 }, { "epoch": 2.9409279806887847, "grad_norm": 0.31582228502791126, "learning_rate": 3.3247611564111913e-06, "loss": 0.438, "step": 17867 }, { "epoch": 2.941092564907901, "grad_norm": 0.3113893632208832, "learning_rate": 3.32430741293208e-06, "loss": 0.4042, "step": 17868 }, { "epoch": 2.9412571491270176, "grad_norm": 0.4684080838883566, "learning_rate": 3.3238536822816554e-06, "loss": 0.4273, "step": 17869 }, { "epoch": 2.941421733346134, "grad_norm": 0.3282530409190831, "learning_rate": 3.323399964464871e-06, "loss": 0.4317, "step": 17870 }, { "epoch": 2.9415863175652506, "grad_norm": 0.387300135890649, "learning_rate": 3.3229462594866778e-06, "loss": 0.4472, "step": 17871 }, { "epoch": 2.941750901784367, "grad_norm": 0.3230209122635716, "learning_rate": 3.3224925673520237e-06, "loss": 0.4375, "step": 17872 }, { "epoch": 2.9419154860034835, "grad_norm": 0.2884446232718566, "learning_rate": 3.322038888065861e-06, "loss": 0.4448, "step": 17873 }, { "epoch": 2.9420800702226, "grad_norm": 0.37083274672382366, "learning_rate": 3.3215852216331395e-06, "loss": 0.4327, "step": 17874 }, { "epoch": 2.9422446544417165, "grad_norm": 0.29977657531127405, "learning_rate": 3.3211315680588108e-06, "loss": 0.4318, "step": 17875 }, { "epoch": 2.942409238660833, "grad_norm": 0.47841463020664227, "learning_rate": 3.320677927347822e-06, "loss": 0.4537, "step": 17876 }, { "epoch": 2.9425738228799494, "grad_norm": 0.3629239245166473, "learning_rate": 3.320224299505127e-06, "loss": 0.4355, "step": 17877 }, { "epoch": 2.942738407099066, "grad_norm": 0.4161272794584303, "learning_rate": 3.3197706845356733e-06, "loss": 0.4212, "step": 17878 }, { "epoch": 2.9429029913181823, "grad_norm": 0.291344277246969, "learning_rate": 3.31931708244441e-06, "loss": 0.4333, "step": 17879 }, { "epoch": 2.943067575537299, "grad_norm": 0.40246922396376766, "learning_rate": 3.318863493236289e-06, "loss": 0.4366, "step": 17880 }, { "epoch": 2.9432321597564153, "grad_norm": 0.29864377914785467, "learning_rate": 3.3184099169162553e-06, "loss": 0.4286, "step": 17881 }, { "epoch": 2.9433967439755317, "grad_norm": 0.27655697850060473, "learning_rate": 3.3179563534892642e-06, "loss": 0.4205, "step": 17882 }, { "epoch": 2.943561328194648, "grad_norm": 0.2935291739535782, "learning_rate": 3.3175028029602586e-06, "loss": 0.4354, "step": 17883 }, { "epoch": 2.9437259124137647, "grad_norm": 0.49612396605339865, "learning_rate": 3.3170492653341935e-06, "loss": 0.4144, "step": 17884 }, { "epoch": 2.943890496632881, "grad_norm": 0.41181039529890884, "learning_rate": 3.3165957406160135e-06, "loss": 0.4503, "step": 17885 }, { "epoch": 2.9440550808519976, "grad_norm": 0.34263661056886074, "learning_rate": 3.316142228810668e-06, "loss": 0.4118, "step": 17886 }, { "epoch": 2.944219665071114, "grad_norm": 0.28397946210819824, "learning_rate": 3.3156887299231066e-06, "loss": 0.4209, "step": 17887 }, { "epoch": 2.9443842492902306, "grad_norm": 0.3446468897435312, "learning_rate": 3.315235243958276e-06, "loss": 0.4275, "step": 17888 }, { "epoch": 2.944548833509347, "grad_norm": 0.34994566616221195, "learning_rate": 3.314781770921127e-06, "loss": 0.4281, "step": 17889 }, { "epoch": 2.9447134177284635, "grad_norm": 0.2673280268924159, "learning_rate": 3.3143283108166047e-06, "loss": 0.4245, "step": 17890 }, { "epoch": 2.94487800194758, "grad_norm": 0.3052985036142152, "learning_rate": 3.3138748636496613e-06, "loss": 0.4355, "step": 17891 }, { "epoch": 2.9450425861666965, "grad_norm": 0.24296991418516659, "learning_rate": 3.3134214294252385e-06, "loss": 0.4266, "step": 17892 }, { "epoch": 2.945207170385813, "grad_norm": 0.348211095834571, "learning_rate": 3.3129680081482908e-06, "loss": 0.4502, "step": 17893 }, { "epoch": 2.9453717546049294, "grad_norm": 0.3066715616668804, "learning_rate": 3.3125145998237613e-06, "loss": 0.4228, "step": 17894 }, { "epoch": 2.945536338824046, "grad_norm": 0.7027922140802841, "learning_rate": 3.312061204456598e-06, "loss": 0.4426, "step": 17895 }, { "epoch": 2.9457009230431623, "grad_norm": 0.35514336309042926, "learning_rate": 3.3116078220517486e-06, "loss": 0.4192, "step": 17896 }, { "epoch": 2.945865507262279, "grad_norm": 0.2990787120046496, "learning_rate": 3.311154452614161e-06, "loss": 0.4526, "step": 17897 }, { "epoch": 2.9460300914813953, "grad_norm": 0.3404325301953905, "learning_rate": 3.3107010961487814e-06, "loss": 0.4163, "step": 17898 }, { "epoch": 2.9461946757005117, "grad_norm": 0.3363829382675295, "learning_rate": 3.310247752660556e-06, "loss": 0.4156, "step": 17899 }, { "epoch": 2.946359259919628, "grad_norm": 0.3528096123152522, "learning_rate": 3.309794422154433e-06, "loss": 0.4366, "step": 17900 }, { "epoch": 2.9465238441387447, "grad_norm": 0.45103200585706277, "learning_rate": 3.3093411046353597e-06, "loss": 0.4289, "step": 17901 }, { "epoch": 2.946688428357861, "grad_norm": 0.25641087569932003, "learning_rate": 3.308887800108278e-06, "loss": 0.421, "step": 17902 }, { "epoch": 2.9468530125769776, "grad_norm": 0.36216526002760185, "learning_rate": 3.3084345085781404e-06, "loss": 0.433, "step": 17903 }, { "epoch": 2.947017596796094, "grad_norm": 0.3100472023891358, "learning_rate": 3.3079812300498875e-06, "loss": 0.4348, "step": 17904 }, { "epoch": 2.9471821810152106, "grad_norm": 0.28062098127009283, "learning_rate": 3.3075279645284693e-06, "loss": 0.4502, "step": 17905 }, { "epoch": 2.947346765234327, "grad_norm": 0.3299692058165166, "learning_rate": 3.307074712018829e-06, "loss": 0.4278, "step": 17906 }, { "epoch": 2.9475113494534435, "grad_norm": 0.42246795002332643, "learning_rate": 3.3066214725259136e-06, "loss": 0.4224, "step": 17907 }, { "epoch": 2.9476759336725595, "grad_norm": 0.275676822494741, "learning_rate": 3.3061682460546688e-06, "loss": 0.4379, "step": 17908 }, { "epoch": 2.947840517891676, "grad_norm": 0.29540839746440317, "learning_rate": 3.3057150326100385e-06, "loss": 0.4214, "step": 17909 }, { "epoch": 2.9480051021107925, "grad_norm": 0.31534615907655184, "learning_rate": 3.305261832196971e-06, "loss": 0.4332, "step": 17910 }, { "epoch": 2.948169686329909, "grad_norm": 0.3277670198611158, "learning_rate": 3.3048086448204074e-06, "loss": 0.4238, "step": 17911 }, { "epoch": 2.9483342705490254, "grad_norm": 0.3162074351131492, "learning_rate": 3.304355470485297e-06, "loss": 0.4091, "step": 17912 }, { "epoch": 2.948498854768142, "grad_norm": 0.4291195573102557, "learning_rate": 3.30390230919658e-06, "loss": 0.4411, "step": 17913 }, { "epoch": 2.9486634389872584, "grad_norm": 0.31681130856431333, "learning_rate": 3.303449160959206e-06, "loss": 0.4334, "step": 17914 }, { "epoch": 2.948828023206375, "grad_norm": 0.31191226705513325, "learning_rate": 3.302996025778117e-06, "loss": 0.4234, "step": 17915 }, { "epoch": 2.9489926074254913, "grad_norm": 0.29734890492769767, "learning_rate": 3.3025429036582564e-06, "loss": 0.4267, "step": 17916 }, { "epoch": 2.9491571916446078, "grad_norm": 0.2899155809468505, "learning_rate": 3.3020897946045703e-06, "loss": 0.4282, "step": 17917 }, { "epoch": 2.9493217758637242, "grad_norm": 0.25672904390104206, "learning_rate": 3.301636698622002e-06, "loss": 0.4406, "step": 17918 }, { "epoch": 2.9494863600828407, "grad_norm": 0.2789684768772823, "learning_rate": 3.301183615715496e-06, "loss": 0.442, "step": 17919 }, { "epoch": 2.949650944301957, "grad_norm": 0.2896431189484109, "learning_rate": 3.300730545889996e-06, "loss": 0.4393, "step": 17920 }, { "epoch": 2.9498155285210736, "grad_norm": 0.3326451688102741, "learning_rate": 3.3002774891504457e-06, "loss": 0.4504, "step": 17921 }, { "epoch": 2.94998011274019, "grad_norm": 0.3025192215294995, "learning_rate": 3.2998244455017897e-06, "loss": 0.4544, "step": 17922 }, { "epoch": 2.9501446969593066, "grad_norm": 0.36848992028546734, "learning_rate": 3.299371414948969e-06, "loss": 0.4385, "step": 17923 }, { "epoch": 2.950309281178423, "grad_norm": 0.27464068844369605, "learning_rate": 3.298918397496929e-06, "loss": 0.4181, "step": 17924 }, { "epoch": 2.9504738653975395, "grad_norm": 0.3508843613547432, "learning_rate": 3.2984653931506105e-06, "loss": 0.4378, "step": 17925 }, { "epoch": 2.950638449616656, "grad_norm": 0.3168887900492193, "learning_rate": 3.29801240191496e-06, "loss": 0.4072, "step": 17926 }, { "epoch": 2.9508030338357725, "grad_norm": 0.36933439551737113, "learning_rate": 3.2975594237949166e-06, "loss": 0.4455, "step": 17927 }, { "epoch": 2.950967618054889, "grad_norm": 0.32323328773774185, "learning_rate": 3.2971064587954263e-06, "loss": 0.4321, "step": 17928 }, { "epoch": 2.9511322022740054, "grad_norm": 0.2852129816505733, "learning_rate": 3.2966535069214315e-06, "loss": 0.4272, "step": 17929 }, { "epoch": 2.951296786493122, "grad_norm": 0.3360420850201866, "learning_rate": 3.2962005681778704e-06, "loss": 0.443, "step": 17930 }, { "epoch": 2.9514613707122384, "grad_norm": 0.28600922843412036, "learning_rate": 3.2957476425696905e-06, "loss": 0.4447, "step": 17931 }, { "epoch": 2.951625954931355, "grad_norm": 0.35561012211751764, "learning_rate": 3.295294730101829e-06, "loss": 0.4325, "step": 17932 }, { "epoch": 2.951790539150471, "grad_norm": 0.3208730436976375, "learning_rate": 3.2948418307792336e-06, "loss": 0.4253, "step": 17933 }, { "epoch": 2.9519551233695873, "grad_norm": 0.2722359535079783, "learning_rate": 3.294388944606842e-06, "loss": 0.4368, "step": 17934 }, { "epoch": 2.952119707588704, "grad_norm": 0.2885866212922849, "learning_rate": 3.293936071589597e-06, "loss": 0.4176, "step": 17935 }, { "epoch": 2.9522842918078203, "grad_norm": 0.4040311456081655, "learning_rate": 3.2934832117324396e-06, "loss": 0.4256, "step": 17936 }, { "epoch": 2.9524488760269367, "grad_norm": 0.33857774276731406, "learning_rate": 3.2930303650403133e-06, "loss": 0.4594, "step": 17937 }, { "epoch": 2.952613460246053, "grad_norm": 0.3265745780732682, "learning_rate": 3.292577531518158e-06, "loss": 0.4259, "step": 17938 }, { "epoch": 2.9527780444651697, "grad_norm": 0.29969628802037535, "learning_rate": 3.292124711170913e-06, "loss": 0.4242, "step": 17939 }, { "epoch": 2.952942628684286, "grad_norm": 0.27139539898638215, "learning_rate": 3.2916719040035227e-06, "loss": 0.4326, "step": 17940 }, { "epoch": 2.9531072129034026, "grad_norm": 0.33381717418697626, "learning_rate": 3.2912191100209247e-06, "loss": 0.4323, "step": 17941 }, { "epoch": 2.953271797122519, "grad_norm": 0.2919655671312646, "learning_rate": 3.2907663292280632e-06, "loss": 0.4084, "step": 17942 }, { "epoch": 2.9534363813416356, "grad_norm": 0.3061555882408243, "learning_rate": 3.290313561629875e-06, "loss": 0.4326, "step": 17943 }, { "epoch": 2.953600965560752, "grad_norm": 0.2730678949072666, "learning_rate": 3.2898608072313046e-06, "loss": 0.4271, "step": 17944 }, { "epoch": 2.9537655497798685, "grad_norm": 0.25751069019917944, "learning_rate": 3.28940806603729e-06, "loss": 0.4044, "step": 17945 }, { "epoch": 2.953930133998985, "grad_norm": 0.32760159845054954, "learning_rate": 3.28895533805277e-06, "loss": 0.4326, "step": 17946 }, { "epoch": 2.9540947182181014, "grad_norm": 0.3229338862618099, "learning_rate": 3.2885026232826865e-06, "loss": 0.4296, "step": 17947 }, { "epoch": 2.954259302437218, "grad_norm": 0.283933379779198, "learning_rate": 3.2880499217319778e-06, "loss": 0.4437, "step": 17948 }, { "epoch": 2.9544238866563344, "grad_norm": 0.33073947677974724, "learning_rate": 3.2875972334055866e-06, "loss": 0.4251, "step": 17949 }, { "epoch": 2.954588470875451, "grad_norm": 0.30371658072334495, "learning_rate": 3.2871445583084486e-06, "loss": 0.4361, "step": 17950 }, { "epoch": 2.9547530550945673, "grad_norm": 0.34983625322813805, "learning_rate": 3.2866918964455068e-06, "loss": 0.4238, "step": 17951 }, { "epoch": 2.954917639313684, "grad_norm": 0.2555862807040976, "learning_rate": 3.2862392478216996e-06, "loss": 0.4112, "step": 17952 }, { "epoch": 2.9550822235328003, "grad_norm": 0.511680191622086, "learning_rate": 3.2857866124419632e-06, "loss": 0.4365, "step": 17953 }, { "epoch": 2.9552468077519167, "grad_norm": 0.3025622763140895, "learning_rate": 3.2853339903112397e-06, "loss": 0.4468, "step": 17954 }, { "epoch": 2.955411391971033, "grad_norm": 0.30488710406531333, "learning_rate": 3.284881381434466e-06, "loss": 0.4548, "step": 17955 }, { "epoch": 2.9555759761901497, "grad_norm": 0.29399243678847514, "learning_rate": 3.284428785816583e-06, "loss": 0.4458, "step": 17956 }, { "epoch": 2.955740560409266, "grad_norm": 0.2704503173871928, "learning_rate": 3.283976203462527e-06, "loss": 0.4337, "step": 17957 }, { "epoch": 2.9559051446283826, "grad_norm": 0.36732907875717224, "learning_rate": 3.2835236343772375e-06, "loss": 0.4158, "step": 17958 }, { "epoch": 2.956069728847499, "grad_norm": 0.3164426199884768, "learning_rate": 3.283071078565653e-06, "loss": 0.4329, "step": 17959 }, { "epoch": 2.9562343130666155, "grad_norm": 0.31369984071288404, "learning_rate": 3.2826185360327103e-06, "loss": 0.4209, "step": 17960 }, { "epoch": 2.956398897285732, "grad_norm": 0.3399865849521089, "learning_rate": 3.2821660067833503e-06, "loss": 0.4232, "step": 17961 }, { "epoch": 2.9565634815048485, "grad_norm": 0.3107880242346844, "learning_rate": 3.281713490822506e-06, "loss": 0.4189, "step": 17962 }, { "epoch": 2.956728065723965, "grad_norm": 0.41034059979693965, "learning_rate": 3.2812609881551207e-06, "loss": 0.4374, "step": 17963 }, { "epoch": 2.9568926499430814, "grad_norm": 0.3564405810029044, "learning_rate": 3.2808084987861263e-06, "loss": 0.4211, "step": 17964 }, { "epoch": 2.957057234162198, "grad_norm": 0.36128352218561705, "learning_rate": 3.2803560227204654e-06, "loss": 0.4399, "step": 17965 }, { "epoch": 2.9572218183813144, "grad_norm": 0.48125988482566323, "learning_rate": 3.279903559963072e-06, "loss": 0.4336, "step": 17966 }, { "epoch": 2.957386402600431, "grad_norm": 0.30767701112235857, "learning_rate": 3.2794511105188835e-06, "loss": 0.4245, "step": 17967 }, { "epoch": 2.9575509868195473, "grad_norm": 0.3268399282048589, "learning_rate": 3.2789986743928377e-06, "loss": 0.4268, "step": 17968 }, { "epoch": 2.957715571038664, "grad_norm": 0.2830963299254264, "learning_rate": 3.2785462515898706e-06, "loss": 0.4249, "step": 17969 }, { "epoch": 2.9578801552577803, "grad_norm": 0.2991147151326696, "learning_rate": 3.27809384211492e-06, "loss": 0.4392, "step": 17970 }, { "epoch": 2.9580447394768967, "grad_norm": 0.37204084254622605, "learning_rate": 3.277641445972921e-06, "loss": 0.4376, "step": 17971 }, { "epoch": 2.958209323696013, "grad_norm": 0.2818280247339522, "learning_rate": 3.2771890631688128e-06, "loss": 0.4306, "step": 17972 }, { "epoch": 2.9583739079151297, "grad_norm": 0.2951545466599934, "learning_rate": 3.276736693707526e-06, "loss": 0.4288, "step": 17973 }, { "epoch": 2.958538492134246, "grad_norm": 0.4873851836731497, "learning_rate": 3.276284337594004e-06, "loss": 0.4191, "step": 17974 }, { "epoch": 2.958703076353362, "grad_norm": 0.3930468198978656, "learning_rate": 3.275831994833177e-06, "loss": 0.4402, "step": 17975 }, { "epoch": 2.9588676605724786, "grad_norm": 0.2764863576168517, "learning_rate": 3.275379665429982e-06, "loss": 0.4427, "step": 17976 }, { "epoch": 2.959032244791595, "grad_norm": 0.3303930389333476, "learning_rate": 3.2749273493893567e-06, "loss": 0.4386, "step": 17977 }, { "epoch": 2.9591968290107116, "grad_norm": 0.29647947734247887, "learning_rate": 3.2744750467162338e-06, "loss": 0.4325, "step": 17978 }, { "epoch": 2.959361413229828, "grad_norm": 0.3180004148333902, "learning_rate": 3.2740227574155516e-06, "loss": 0.4111, "step": 17979 }, { "epoch": 2.9595259974489445, "grad_norm": 0.7645019242291001, "learning_rate": 3.273570481492242e-06, "loss": 0.4312, "step": 17980 }, { "epoch": 2.959690581668061, "grad_norm": 0.2773599141407237, "learning_rate": 3.2731182189512437e-06, "loss": 0.4186, "step": 17981 }, { "epoch": 2.9598551658871775, "grad_norm": 0.32322410607835417, "learning_rate": 3.2726659697974906e-06, "loss": 0.4221, "step": 17982 }, { "epoch": 2.960019750106294, "grad_norm": 0.3347967700591786, "learning_rate": 3.2722137340359128e-06, "loss": 0.4509, "step": 17983 }, { "epoch": 2.9601843343254104, "grad_norm": 0.32349798325468926, "learning_rate": 3.2717615116714523e-06, "loss": 0.4223, "step": 17984 }, { "epoch": 2.960348918544527, "grad_norm": 0.2557811785916594, "learning_rate": 3.271309302709038e-06, "loss": 0.4437, "step": 17985 }, { "epoch": 2.9605135027636433, "grad_norm": 0.2802734711444316, "learning_rate": 3.2708571071536076e-06, "loss": 0.424, "step": 17986 }, { "epoch": 2.96067808698276, "grad_norm": 0.3835232587119877, "learning_rate": 3.270404925010092e-06, "loss": 0.4255, "step": 17987 }, { "epoch": 2.9608426712018763, "grad_norm": 0.4379819983304958, "learning_rate": 3.269952756283429e-06, "loss": 0.4133, "step": 17988 }, { "epoch": 2.9610072554209927, "grad_norm": 0.33758890094692523, "learning_rate": 3.2695006009785503e-06, "loss": 0.4357, "step": 17989 }, { "epoch": 2.961171839640109, "grad_norm": 1.5701706025079465, "learning_rate": 3.2690484591003883e-06, "loss": 0.4507, "step": 17990 }, { "epoch": 2.9613364238592257, "grad_norm": 0.34036360947169975, "learning_rate": 3.26859633065388e-06, "loss": 0.4198, "step": 17991 }, { "epoch": 2.961501008078342, "grad_norm": 0.3288506108564918, "learning_rate": 3.2681442156439565e-06, "loss": 0.4209, "step": 17992 }, { "epoch": 2.9616655922974586, "grad_norm": 0.4595635703723497, "learning_rate": 3.2676921140755524e-06, "loss": 0.4386, "step": 17993 }, { "epoch": 2.961830176516575, "grad_norm": 0.4226522308980864, "learning_rate": 3.267240025953598e-06, "loss": 0.418, "step": 17994 }, { "epoch": 2.9619947607356916, "grad_norm": 0.3557572729847082, "learning_rate": 3.2667879512830316e-06, "loss": 0.4484, "step": 17995 }, { "epoch": 2.962159344954808, "grad_norm": 0.28465195785827985, "learning_rate": 3.266335890068781e-06, "loss": 0.4249, "step": 17996 }, { "epoch": 2.9623239291739245, "grad_norm": 0.30735732145131517, "learning_rate": 3.2658838423157806e-06, "loss": 0.42, "step": 17997 }, { "epoch": 2.962488513393041, "grad_norm": 0.3742831146688339, "learning_rate": 3.265431808028964e-06, "loss": 0.4296, "step": 17998 }, { "epoch": 2.9626530976121574, "grad_norm": 0.3994875408359112, "learning_rate": 3.2649797872132614e-06, "loss": 0.4255, "step": 17999 }, { "epoch": 2.9628176818312735, "grad_norm": 0.3290846626459086, "learning_rate": 3.264527779873608e-06, "loss": 0.4473, "step": 18000 }, { "epoch": 2.96298226605039, "grad_norm": 0.3710954399446858, "learning_rate": 3.2640757860149335e-06, "loss": 0.4315, "step": 18001 }, { "epoch": 2.9631468502695064, "grad_norm": 0.2933739663224296, "learning_rate": 3.263623805642171e-06, "loss": 0.4374, "step": 18002 }, { "epoch": 2.963311434488623, "grad_norm": 0.32381176287437485, "learning_rate": 3.263171838760254e-06, "loss": 0.4303, "step": 18003 }, { "epoch": 2.9634760187077394, "grad_norm": 0.33407116028706746, "learning_rate": 3.26271988537411e-06, "loss": 0.4459, "step": 18004 }, { "epoch": 2.963640602926856, "grad_norm": 0.35993035731713136, "learning_rate": 3.2622679454886734e-06, "loss": 0.4172, "step": 18005 }, { "epoch": 2.9638051871459723, "grad_norm": 0.2500691271741941, "learning_rate": 3.2618160191088743e-06, "loss": 0.4284, "step": 18006 }, { "epoch": 2.9639697713650888, "grad_norm": 0.28256234629310606, "learning_rate": 3.2613641062396454e-06, "loss": 0.4517, "step": 18007 }, { "epoch": 2.9641343555842052, "grad_norm": 0.449253121128817, "learning_rate": 3.2609122068859164e-06, "loss": 0.4362, "step": 18008 }, { "epoch": 2.9642989398033217, "grad_norm": 0.2832709344301676, "learning_rate": 3.2604603210526197e-06, "loss": 0.4367, "step": 18009 }, { "epoch": 2.964463524022438, "grad_norm": 0.8754438257605548, "learning_rate": 3.2600084487446854e-06, "loss": 0.4173, "step": 18010 }, { "epoch": 2.9646281082415546, "grad_norm": 0.388661775026451, "learning_rate": 3.259556589967043e-06, "loss": 0.4363, "step": 18011 }, { "epoch": 2.964792692460671, "grad_norm": 0.317062220781525, "learning_rate": 3.259104744724626e-06, "loss": 0.4336, "step": 18012 }, { "epoch": 2.9649572766797876, "grad_norm": 0.3325180440188855, "learning_rate": 3.2586529130223596e-06, "loss": 0.4218, "step": 18013 }, { "epoch": 2.965121860898904, "grad_norm": 0.42725282445783, "learning_rate": 3.25820109486518e-06, "loss": 0.4465, "step": 18014 }, { "epoch": 2.9652864451180205, "grad_norm": 0.2973333427472893, "learning_rate": 3.257749290258012e-06, "loss": 0.4088, "step": 18015 }, { "epoch": 2.965451029337137, "grad_norm": 0.29434542873997904, "learning_rate": 3.2572974992057905e-06, "loss": 0.4318, "step": 18016 }, { "epoch": 2.9656156135562535, "grad_norm": 0.3345839461462118, "learning_rate": 3.2568457217134415e-06, "loss": 0.4231, "step": 18017 }, { "epoch": 2.96578019777537, "grad_norm": 0.851610140823981, "learning_rate": 3.2563939577858965e-06, "loss": 0.4497, "step": 18018 }, { "epoch": 2.9659447819944864, "grad_norm": 0.3371965846051764, "learning_rate": 3.2559422074280836e-06, "loss": 0.4251, "step": 18019 }, { "epoch": 2.966109366213603, "grad_norm": 0.33373839748298434, "learning_rate": 3.2554904706449325e-06, "loss": 0.428, "step": 18020 }, { "epoch": 2.9662739504327194, "grad_norm": 0.3017815513825485, "learning_rate": 3.255038747441374e-06, "loss": 0.4271, "step": 18021 }, { "epoch": 2.966438534651836, "grad_norm": 0.44310968910509685, "learning_rate": 3.254587037822334e-06, "loss": 0.4318, "step": 18022 }, { "epoch": 2.9666031188709523, "grad_norm": 0.3164345259894699, "learning_rate": 3.254135341792746e-06, "loss": 0.4194, "step": 18023 }, { "epoch": 2.9667677030900688, "grad_norm": 0.3420479482428773, "learning_rate": 3.253683659357532e-06, "loss": 0.4386, "step": 18024 }, { "epoch": 2.9669322873091852, "grad_norm": 0.2682684443609395, "learning_rate": 3.253231990521628e-06, "loss": 0.4296, "step": 18025 }, { "epoch": 2.9670968715283017, "grad_norm": 0.2814989143324126, "learning_rate": 3.252780335289958e-06, "loss": 0.4304, "step": 18026 }, { "epoch": 2.967261455747418, "grad_norm": 0.33286025813902886, "learning_rate": 3.25232869366745e-06, "loss": 0.4282, "step": 18027 }, { "epoch": 2.9674260399665346, "grad_norm": 0.2880496410360446, "learning_rate": 3.2518770656590347e-06, "loss": 0.437, "step": 18028 }, { "epoch": 2.967590624185651, "grad_norm": 0.3780400505630464, "learning_rate": 3.2514254512696377e-06, "loss": 0.4435, "step": 18029 }, { "epoch": 2.9677552084047676, "grad_norm": 0.33580026727556184, "learning_rate": 3.2509738505041884e-06, "loss": 0.4404, "step": 18030 }, { "epoch": 2.967919792623884, "grad_norm": 0.3335263603656979, "learning_rate": 3.250522263367613e-06, "loss": 0.4098, "step": 18031 }, { "epoch": 2.9680843768430005, "grad_norm": 0.32127404286546213, "learning_rate": 3.2500706898648417e-06, "loss": 0.4358, "step": 18032 }, { "epoch": 2.968248961062117, "grad_norm": 0.3090836506465173, "learning_rate": 3.2496191300008004e-06, "loss": 0.4251, "step": 18033 }, { "epoch": 2.9684135452812335, "grad_norm": 0.3899512882733559, "learning_rate": 3.249167583780414e-06, "loss": 0.4194, "step": 18034 }, { "epoch": 2.96857812950035, "grad_norm": 0.29893313967120105, "learning_rate": 3.2487160512086143e-06, "loss": 0.4246, "step": 18035 }, { "epoch": 2.9687427137194664, "grad_norm": 0.6207934829357318, "learning_rate": 3.248264532290323e-06, "loss": 0.4358, "step": 18036 }, { "epoch": 2.968907297938583, "grad_norm": 0.53899296820925, "learning_rate": 3.2478130270304716e-06, "loss": 0.4318, "step": 18037 }, { "epoch": 2.9690718821576993, "grad_norm": 0.27980429461566236, "learning_rate": 3.247361535433983e-06, "loss": 0.4299, "step": 18038 }, { "epoch": 2.969236466376816, "grad_norm": 0.3590153093987848, "learning_rate": 3.2469100575057864e-06, "loss": 0.4175, "step": 18039 }, { "epoch": 2.9694010505959323, "grad_norm": 0.3317868256936113, "learning_rate": 3.2464585932508075e-06, "loss": 0.4384, "step": 18040 }, { "epoch": 2.9695656348150488, "grad_norm": 1.0971192220495734, "learning_rate": 3.246007142673971e-06, "loss": 0.4354, "step": 18041 }, { "epoch": 2.969730219034165, "grad_norm": 0.35863929218581314, "learning_rate": 3.2455557057802063e-06, "loss": 0.438, "step": 18042 }, { "epoch": 2.9698948032532813, "grad_norm": 0.3274544277696532, "learning_rate": 3.2451042825744333e-06, "loss": 0.4466, "step": 18043 }, { "epoch": 2.9700593874723977, "grad_norm": 0.3313034653954024, "learning_rate": 3.2446528730615857e-06, "loss": 0.4321, "step": 18044 }, { "epoch": 2.970223971691514, "grad_norm": 0.30389316188656185, "learning_rate": 3.2442014772465814e-06, "loss": 0.4462, "step": 18045 }, { "epoch": 2.9703885559106307, "grad_norm": 0.2934094414626938, "learning_rate": 3.243750095134352e-06, "loss": 0.4343, "step": 18046 }, { "epoch": 2.970553140129747, "grad_norm": 0.34425230910755483, "learning_rate": 3.24329872672982e-06, "loss": 0.4098, "step": 18047 }, { "epoch": 2.9707177243488636, "grad_norm": 0.30450840666561735, "learning_rate": 3.2428473720379085e-06, "loss": 0.4053, "step": 18048 }, { "epoch": 2.97088230856798, "grad_norm": 0.41018405150240905, "learning_rate": 3.242396031063547e-06, "loss": 0.4099, "step": 18049 }, { "epoch": 2.9710468927870965, "grad_norm": 0.29096578844948423, "learning_rate": 3.241944703811657e-06, "loss": 0.4463, "step": 18050 }, { "epoch": 2.971211477006213, "grad_norm": 0.31110995772814504, "learning_rate": 3.2414933902871647e-06, "loss": 0.4294, "step": 18051 }, { "epoch": 2.9713760612253295, "grad_norm": 0.28542042993102296, "learning_rate": 3.2410420904949933e-06, "loss": 0.4252, "step": 18052 }, { "epoch": 2.971540645444446, "grad_norm": 0.33471298146038364, "learning_rate": 3.2405908044400692e-06, "loss": 0.434, "step": 18053 }, { "epoch": 2.9717052296635624, "grad_norm": 0.2776863662421888, "learning_rate": 3.2401395321273175e-06, "loss": 0.4107, "step": 18054 }, { "epoch": 2.971869813882679, "grad_norm": 0.31484487462523253, "learning_rate": 3.2396882735616576e-06, "loss": 0.4328, "step": 18055 }, { "epoch": 2.9720343981017954, "grad_norm": 0.325430290067415, "learning_rate": 3.2392370287480172e-06, "loss": 0.4164, "step": 18056 }, { "epoch": 2.972198982320912, "grad_norm": 0.28709782951035795, "learning_rate": 3.238785797691318e-06, "loss": 0.429, "step": 18057 }, { "epoch": 2.9723635665400283, "grad_norm": 0.3961152654109752, "learning_rate": 3.238334580396486e-06, "loss": 0.4247, "step": 18058 }, { "epoch": 2.972528150759145, "grad_norm": 0.473927739825349, "learning_rate": 3.2378833768684428e-06, "loss": 0.432, "step": 18059 }, { "epoch": 2.9726927349782613, "grad_norm": 0.2904381855630161, "learning_rate": 3.2374321871121134e-06, "loss": 0.4234, "step": 18060 }, { "epoch": 2.9728573191973777, "grad_norm": 0.33940041419371153, "learning_rate": 3.236981011132419e-06, "loss": 0.4241, "step": 18061 }, { "epoch": 2.973021903416494, "grad_norm": 0.39408484606284216, "learning_rate": 3.236529848934285e-06, "loss": 0.4076, "step": 18062 }, { "epoch": 2.9731864876356107, "grad_norm": 0.33323138770900007, "learning_rate": 3.2360787005226334e-06, "loss": 0.4065, "step": 18063 }, { "epoch": 2.973351071854727, "grad_norm": 0.4534485413101231, "learning_rate": 3.2356275659023844e-06, "loss": 0.4268, "step": 18064 }, { "epoch": 2.9735156560738436, "grad_norm": 0.30028391006421495, "learning_rate": 3.2351764450784653e-06, "loss": 0.4139, "step": 18065 }, { "epoch": 2.97368024029296, "grad_norm": 0.3178120219809389, "learning_rate": 3.2347253380557944e-06, "loss": 0.4408, "step": 18066 }, { "epoch": 2.973844824512076, "grad_norm": 0.39134603889554914, "learning_rate": 3.2342742448392964e-06, "loss": 0.4269, "step": 18067 }, { "epoch": 2.9740094087311926, "grad_norm": 0.2806020550149464, "learning_rate": 3.2338231654338913e-06, "loss": 0.4288, "step": 18068 }, { "epoch": 2.974173992950309, "grad_norm": 0.34966794647504806, "learning_rate": 3.2333720998445036e-06, "loss": 0.4295, "step": 18069 }, { "epoch": 2.9743385771694255, "grad_norm": 0.30994245389032565, "learning_rate": 3.2329210480760544e-06, "loss": 0.4247, "step": 18070 }, { "epoch": 2.974503161388542, "grad_norm": 0.3643813783368933, "learning_rate": 3.2324700101334647e-06, "loss": 0.4254, "step": 18071 }, { "epoch": 2.9746677456076585, "grad_norm": 0.4606551678610015, "learning_rate": 3.2320189860216563e-06, "loss": 0.4257, "step": 18072 }, { "epoch": 2.974832329826775, "grad_norm": 0.31573605145434186, "learning_rate": 3.2315679757455496e-06, "loss": 0.4299, "step": 18073 }, { "epoch": 2.9749969140458914, "grad_norm": 0.33672582474808266, "learning_rate": 3.2311169793100697e-06, "loss": 0.4219, "step": 18074 }, { "epoch": 2.975161498265008, "grad_norm": 0.3283598533366402, "learning_rate": 3.230665996720131e-06, "loss": 0.4217, "step": 18075 }, { "epoch": 2.9753260824841243, "grad_norm": 0.27162495539309256, "learning_rate": 3.2302150279806625e-06, "loss": 0.4196, "step": 18076 }, { "epoch": 2.975490666703241, "grad_norm": 0.4643539845857057, "learning_rate": 3.2297640730965787e-06, "loss": 0.4439, "step": 18077 }, { "epoch": 2.9756552509223573, "grad_norm": 0.368683998694101, "learning_rate": 3.229313132072802e-06, "loss": 0.4451, "step": 18078 }, { "epoch": 2.9758198351414737, "grad_norm": 0.3074675786557951, "learning_rate": 3.228862204914254e-06, "loss": 0.4114, "step": 18079 }, { "epoch": 2.97598441936059, "grad_norm": 0.34895046305914723, "learning_rate": 3.228411291625854e-06, "loss": 0.4456, "step": 18080 }, { "epoch": 2.9761490035797067, "grad_norm": 0.37398568795790305, "learning_rate": 3.2279603922125224e-06, "loss": 0.4327, "step": 18081 }, { "epoch": 2.976313587798823, "grad_norm": 0.3974813545686356, "learning_rate": 3.2275095066791794e-06, "loss": 0.4314, "step": 18082 }, { "epoch": 2.9764781720179396, "grad_norm": 0.4649457248662648, "learning_rate": 3.2270586350307456e-06, "loss": 0.4399, "step": 18083 }, { "epoch": 2.976642756237056, "grad_norm": 0.3309637999187168, "learning_rate": 3.2266077772721406e-06, "loss": 0.4189, "step": 18084 }, { "epoch": 2.9768073404561726, "grad_norm": 0.30971886634736673, "learning_rate": 3.2261569334082805e-06, "loss": 0.4194, "step": 18085 }, { "epoch": 2.976971924675289, "grad_norm": 0.281886081834552, "learning_rate": 3.2257061034440897e-06, "loss": 0.4336, "step": 18086 }, { "epoch": 2.9771365088944055, "grad_norm": 0.3450071004495589, "learning_rate": 3.2252552873844843e-06, "loss": 0.4452, "step": 18087 }, { "epoch": 2.977301093113522, "grad_norm": 0.35518661833333515, "learning_rate": 3.224804485234385e-06, "loss": 0.4484, "step": 18088 }, { "epoch": 2.9774656773326384, "grad_norm": 0.28753201503866177, "learning_rate": 3.2243536969987096e-06, "loss": 0.4397, "step": 18089 }, { "epoch": 2.977630261551755, "grad_norm": 0.3600946230149472, "learning_rate": 3.2239029226823786e-06, "loss": 0.435, "step": 18090 }, { "epoch": 2.9777948457708714, "grad_norm": 0.2928849477916003, "learning_rate": 3.223452162290309e-06, "loss": 0.4306, "step": 18091 }, { "epoch": 2.977959429989988, "grad_norm": 0.33409587075416086, "learning_rate": 3.22300141582742e-06, "loss": 0.4141, "step": 18092 }, { "epoch": 2.9781240142091043, "grad_norm": 0.36276975155674085, "learning_rate": 3.2225506832986306e-06, "loss": 0.433, "step": 18093 }, { "epoch": 2.978288598428221, "grad_norm": 0.3506459464381036, "learning_rate": 3.222099964708856e-06, "loss": 0.4429, "step": 18094 }, { "epoch": 2.9784531826473373, "grad_norm": 0.2919003799206049, "learning_rate": 3.22164926006302e-06, "loss": 0.4616, "step": 18095 }, { "epoch": 2.9786177668664537, "grad_norm": 0.2964169570888546, "learning_rate": 3.2211985693660334e-06, "loss": 0.4249, "step": 18096 }, { "epoch": 2.97878235108557, "grad_norm": 0.6718651471413585, "learning_rate": 3.220747892622821e-06, "loss": 0.4115, "step": 18097 }, { "epoch": 2.9789469353046867, "grad_norm": 0.3263407815241886, "learning_rate": 3.2202972298382957e-06, "loss": 0.4261, "step": 18098 }, { "epoch": 2.979111519523803, "grad_norm": 0.2992958976602193, "learning_rate": 3.219846581017375e-06, "loss": 0.4321, "step": 18099 }, { "epoch": 2.9792761037429196, "grad_norm": 0.3037050417576219, "learning_rate": 3.219395946164979e-06, "loss": 0.4311, "step": 18100 }, { "epoch": 2.979440687962036, "grad_norm": 0.33330487575134055, "learning_rate": 3.218945325286023e-06, "loss": 0.4412, "step": 18101 }, { "epoch": 2.9796052721811526, "grad_norm": 0.39981885883986784, "learning_rate": 3.2184947183854244e-06, "loss": 0.4415, "step": 18102 }, { "epoch": 2.979769856400269, "grad_norm": 0.3282863712935466, "learning_rate": 3.218044125468099e-06, "loss": 0.4143, "step": 18103 }, { "epoch": 2.9799344406193855, "grad_norm": 0.37273799725123524, "learning_rate": 3.217593546538967e-06, "loss": 0.4079, "step": 18104 }, { "epoch": 2.980099024838502, "grad_norm": 0.3147167169465058, "learning_rate": 3.2171429816029397e-06, "loss": 0.4171, "step": 18105 }, { "epoch": 2.9802636090576184, "grad_norm": 0.3325195855156046, "learning_rate": 3.2166924306649386e-06, "loss": 0.4365, "step": 18106 }, { "epoch": 2.980428193276735, "grad_norm": 0.32653489583676465, "learning_rate": 3.216241893729876e-06, "loss": 0.4246, "step": 18107 }, { "epoch": 2.980592777495851, "grad_norm": 0.3390616252801089, "learning_rate": 3.2157913708026696e-06, "loss": 0.4163, "step": 18108 }, { "epoch": 2.9807573617149674, "grad_norm": 0.27059271621322145, "learning_rate": 3.215340861888235e-06, "loss": 0.4383, "step": 18109 }, { "epoch": 2.980921945934084, "grad_norm": 0.30289374970872623, "learning_rate": 3.2148903669914884e-06, "loss": 0.4012, "step": 18110 }, { "epoch": 2.9810865301532004, "grad_norm": 0.41009121277019245, "learning_rate": 3.214439886117346e-06, "loss": 0.4446, "step": 18111 }, { "epoch": 2.981251114372317, "grad_norm": 0.3620401172185879, "learning_rate": 3.213989419270721e-06, "loss": 0.4313, "step": 18112 }, { "epoch": 2.9814156985914333, "grad_norm": 0.3128518314458058, "learning_rate": 3.2135389664565316e-06, "loss": 0.4273, "step": 18113 }, { "epoch": 2.9815802828105498, "grad_norm": 0.5072903575923572, "learning_rate": 3.2130885276796925e-06, "loss": 0.4363, "step": 18114 }, { "epoch": 2.9817448670296662, "grad_norm": 0.3272357301115453, "learning_rate": 3.2126381029451146e-06, "loss": 0.4381, "step": 18115 }, { "epoch": 2.9819094512487827, "grad_norm": 0.2878470990612326, "learning_rate": 3.2121876922577193e-06, "loss": 0.4424, "step": 18116 }, { "epoch": 2.982074035467899, "grad_norm": 0.3381122306172689, "learning_rate": 3.2117372956224154e-06, "loss": 0.427, "step": 18117 }, { "epoch": 2.9822386196870156, "grad_norm": 0.36845413810216926, "learning_rate": 3.2112869130441214e-06, "loss": 0.4313, "step": 18118 }, { "epoch": 2.982403203906132, "grad_norm": 0.3289819483762642, "learning_rate": 3.2108365445277496e-06, "loss": 0.4278, "step": 18119 }, { "epoch": 2.9825677881252486, "grad_norm": 0.35928315823892576, "learning_rate": 3.210386190078215e-06, "loss": 0.4373, "step": 18120 }, { "epoch": 2.982732372344365, "grad_norm": 0.6564359798749756, "learning_rate": 3.2099358497004318e-06, "loss": 0.4324, "step": 18121 }, { "epoch": 2.9828969565634815, "grad_norm": 0.31058811923763524, "learning_rate": 3.2094855233993134e-06, "loss": 0.4344, "step": 18122 }, { "epoch": 2.983061540782598, "grad_norm": 0.2963960899838568, "learning_rate": 3.2090352111797756e-06, "loss": 0.4362, "step": 18123 }, { "epoch": 2.9832261250017145, "grad_norm": 0.32776813298408725, "learning_rate": 3.2085849130467273e-06, "loss": 0.4204, "step": 18124 }, { "epoch": 2.983390709220831, "grad_norm": 0.5110102874344136, "learning_rate": 3.2081346290050877e-06, "loss": 0.4203, "step": 18125 }, { "epoch": 2.9835552934399474, "grad_norm": 0.30187833255750773, "learning_rate": 3.2076843590597653e-06, "loss": 0.4406, "step": 18126 }, { "epoch": 2.983719877659064, "grad_norm": 0.2855605888514474, "learning_rate": 3.207234103215677e-06, "loss": 0.4084, "step": 18127 }, { "epoch": 2.9838844618781803, "grad_norm": 0.32957430467355686, "learning_rate": 3.206783861477735e-06, "loss": 0.4372, "step": 18128 }, { "epoch": 2.984049046097297, "grad_norm": 0.38975332516752953, "learning_rate": 3.2063336338508493e-06, "loss": 0.421, "step": 18129 }, { "epoch": 2.9842136303164133, "grad_norm": 0.2980585596483328, "learning_rate": 3.2058834203399356e-06, "loss": 0.4303, "step": 18130 }, { "epoch": 2.9843782145355298, "grad_norm": 0.3398236694925661, "learning_rate": 3.205433220949905e-06, "loss": 0.4294, "step": 18131 }, { "epoch": 2.9845427987546462, "grad_norm": 0.26269951204573494, "learning_rate": 3.2049830356856713e-06, "loss": 0.4531, "step": 18132 }, { "epoch": 2.9847073829737623, "grad_norm": 0.36395240545705293, "learning_rate": 3.204532864552145e-06, "loss": 0.4328, "step": 18133 }, { "epoch": 2.9848719671928787, "grad_norm": 0.4336705652596672, "learning_rate": 3.20408270755424e-06, "loss": 0.4283, "step": 18134 }, { "epoch": 2.985036551411995, "grad_norm": 0.3006524868559794, "learning_rate": 3.203632564696868e-06, "loss": 0.4013, "step": 18135 }, { "epoch": 2.9852011356311117, "grad_norm": 0.322686194402558, "learning_rate": 3.2031824359849384e-06, "loss": 0.3927, "step": 18136 }, { "epoch": 2.985365719850228, "grad_norm": 0.3043040631434431, "learning_rate": 3.202732321423365e-06, "loss": 0.4276, "step": 18137 }, { "epoch": 2.9855303040693446, "grad_norm": 0.3084164121969129, "learning_rate": 3.202282221017058e-06, "loss": 0.4297, "step": 18138 }, { "epoch": 2.985694888288461, "grad_norm": 0.35498579787356155, "learning_rate": 3.2018321347709307e-06, "loss": 0.4414, "step": 18139 }, { "epoch": 2.9858594725075776, "grad_norm": 0.27871479742328575, "learning_rate": 3.201382062689892e-06, "loss": 0.429, "step": 18140 }, { "epoch": 2.986024056726694, "grad_norm": 0.3174931162113501, "learning_rate": 3.2009320047788546e-06, "loss": 0.4191, "step": 18141 }, { "epoch": 2.9861886409458105, "grad_norm": 0.6164069800922533, "learning_rate": 3.2004819610427284e-06, "loss": 0.4144, "step": 18142 }, { "epoch": 2.986353225164927, "grad_norm": 0.5379883698054649, "learning_rate": 3.2000319314864245e-06, "loss": 0.4237, "step": 18143 }, { "epoch": 2.9865178093840434, "grad_norm": 0.28452726517508903, "learning_rate": 3.1995819161148554e-06, "loss": 0.4254, "step": 18144 }, { "epoch": 2.98668239360316, "grad_norm": 0.3020895643523373, "learning_rate": 3.1991319149329263e-06, "loss": 0.4359, "step": 18145 }, { "epoch": 2.9868469778222764, "grad_norm": 0.35551300631374627, "learning_rate": 3.1986819279455528e-06, "loss": 0.4462, "step": 18146 }, { "epoch": 2.987011562041393, "grad_norm": 0.2904016493322226, "learning_rate": 3.198231955157641e-06, "loss": 0.414, "step": 18147 }, { "epoch": 2.9871761462605093, "grad_norm": 0.35154727598182106, "learning_rate": 3.1977819965741037e-06, "loss": 0.4332, "step": 18148 }, { "epoch": 2.987340730479626, "grad_norm": 0.33008611051504305, "learning_rate": 3.1973320521998486e-06, "loss": 0.4413, "step": 18149 }, { "epoch": 2.9875053146987423, "grad_norm": 0.2835558509576164, "learning_rate": 3.1968821220397872e-06, "loss": 0.4272, "step": 18150 }, { "epoch": 2.9876698989178587, "grad_norm": 0.3609501104969594, "learning_rate": 3.1964322060988277e-06, "loss": 0.4495, "step": 18151 }, { "epoch": 2.987834483136975, "grad_norm": 0.2888185844920322, "learning_rate": 3.1959823043818795e-06, "loss": 0.4171, "step": 18152 }, { "epoch": 2.9879990673560917, "grad_norm": 0.2936427475665115, "learning_rate": 3.1955324168938523e-06, "loss": 0.422, "step": 18153 }, { "epoch": 2.988163651575208, "grad_norm": 0.3308612101987003, "learning_rate": 3.195082543639654e-06, "loss": 0.4351, "step": 18154 }, { "epoch": 2.9883282357943246, "grad_norm": 0.30759910008070246, "learning_rate": 3.194632684624196e-06, "loss": 0.4394, "step": 18155 }, { "epoch": 2.988492820013441, "grad_norm": 0.5856876981238213, "learning_rate": 3.1941828398523823e-06, "loss": 0.4548, "step": 18156 }, { "epoch": 2.9886574042325575, "grad_norm": 0.3194599288209232, "learning_rate": 3.1937330093291276e-06, "loss": 0.4259, "step": 18157 }, { "epoch": 2.988821988451674, "grad_norm": 0.36234103840466797, "learning_rate": 3.1932831930593354e-06, "loss": 0.4367, "step": 18158 }, { "epoch": 2.9889865726707905, "grad_norm": 0.30308141678484296, "learning_rate": 3.192833391047915e-06, "loss": 0.4476, "step": 18159 }, { "epoch": 2.989151156889907, "grad_norm": 0.30146952433299834, "learning_rate": 3.192383603299775e-06, "loss": 0.4143, "step": 18160 }, { "epoch": 2.9893157411090234, "grad_norm": 0.5577042790559326, "learning_rate": 3.191933829819823e-06, "loss": 0.451, "step": 18161 }, { "epoch": 2.98948032532814, "grad_norm": 0.35589513547973517, "learning_rate": 3.1914840706129684e-06, "loss": 0.444, "step": 18162 }, { "epoch": 2.9896449095472564, "grad_norm": 0.3002996637792063, "learning_rate": 3.191034325684116e-06, "loss": 0.4246, "step": 18163 }, { "epoch": 2.989809493766373, "grad_norm": 0.328292620084996, "learning_rate": 3.1905845950381755e-06, "loss": 0.4256, "step": 18164 }, { "epoch": 2.9899740779854893, "grad_norm": 0.6209902002277193, "learning_rate": 3.190134878680054e-06, "loss": 0.4308, "step": 18165 }, { "epoch": 2.990138662204606, "grad_norm": 0.3843180872343453, "learning_rate": 3.1896851766146555e-06, "loss": 0.4423, "step": 18166 }, { "epoch": 2.9903032464237222, "grad_norm": 0.4509998126064767, "learning_rate": 3.1892354888468922e-06, "loss": 0.4425, "step": 18167 }, { "epoch": 2.9904678306428387, "grad_norm": 0.29625525143325837, "learning_rate": 3.1887858153816664e-06, "loss": 0.4219, "step": 18168 }, { "epoch": 2.990632414861955, "grad_norm": 0.45827014828035467, "learning_rate": 3.188336156223887e-06, "loss": 0.4435, "step": 18169 }, { "epoch": 2.9907969990810717, "grad_norm": 0.37408544422334067, "learning_rate": 3.1878865113784593e-06, "loss": 0.4229, "step": 18170 }, { "epoch": 2.990961583300188, "grad_norm": 0.36305505750525263, "learning_rate": 3.1874368808502908e-06, "loss": 0.4315, "step": 18171 }, { "epoch": 2.9911261675193046, "grad_norm": 0.326156630131236, "learning_rate": 3.186987264644288e-06, "loss": 0.4212, "step": 18172 }, { "epoch": 2.991290751738421, "grad_norm": 0.2828807883216947, "learning_rate": 3.1865376627653548e-06, "loss": 0.4239, "step": 18173 }, { "epoch": 2.9914553359575375, "grad_norm": 0.3545049958634287, "learning_rate": 3.1860880752184006e-06, "loss": 0.43, "step": 18174 }, { "epoch": 2.9916199201766536, "grad_norm": 0.33878685908841927, "learning_rate": 3.1856385020083256e-06, "loss": 0.4352, "step": 18175 }, { "epoch": 2.99178450439577, "grad_norm": 0.39217285035326416, "learning_rate": 3.185188943140042e-06, "loss": 0.4303, "step": 18176 }, { "epoch": 2.9919490886148865, "grad_norm": 0.34070472410426383, "learning_rate": 3.1847393986184486e-06, "loss": 0.4486, "step": 18177 }, { "epoch": 2.992113672834003, "grad_norm": 0.654226350541991, "learning_rate": 3.184289868448457e-06, "loss": 0.42, "step": 18178 }, { "epoch": 2.9922782570531195, "grad_norm": 0.35024427558591065, "learning_rate": 3.1838403526349687e-06, "loss": 0.4253, "step": 18179 }, { "epoch": 2.992442841272236, "grad_norm": 0.2887993308092518, "learning_rate": 3.183390851182888e-06, "loss": 0.419, "step": 18180 }, { "epoch": 2.9926074254913524, "grad_norm": 0.2852910550694381, "learning_rate": 3.182941364097121e-06, "loss": 0.4248, "step": 18181 }, { "epoch": 2.992772009710469, "grad_norm": 0.31844235323231934, "learning_rate": 3.1824918913825725e-06, "loss": 0.4137, "step": 18182 }, { "epoch": 2.9929365939295853, "grad_norm": 0.3990404528599743, "learning_rate": 3.1820424330441472e-06, "loss": 0.4466, "step": 18183 }, { "epoch": 2.993101178148702, "grad_norm": 0.3081020974323921, "learning_rate": 3.1815929890867476e-06, "loss": 0.4373, "step": 18184 }, { "epoch": 2.9932657623678183, "grad_norm": 0.29220689300256936, "learning_rate": 3.1811435595152807e-06, "loss": 0.4105, "step": 18185 }, { "epoch": 2.9934303465869347, "grad_norm": 0.38694100904722095, "learning_rate": 3.180694144334647e-06, "loss": 0.4339, "step": 18186 }, { "epoch": 2.993594930806051, "grad_norm": 0.38125835788420304, "learning_rate": 3.180244743549755e-06, "loss": 0.4457, "step": 18187 }, { "epoch": 2.9937595150251677, "grad_norm": 1.1024776134142784, "learning_rate": 3.1797953571655043e-06, "loss": 0.4411, "step": 18188 }, { "epoch": 2.993924099244284, "grad_norm": 0.45183397917621165, "learning_rate": 3.1793459851867988e-06, "loss": 0.4411, "step": 18189 }, { "epoch": 2.9940886834634006, "grad_norm": 0.32011488526140774, "learning_rate": 3.1788966276185444e-06, "loss": 0.4292, "step": 18190 }, { "epoch": 2.994253267682517, "grad_norm": 0.5045650725636368, "learning_rate": 3.178447284465641e-06, "loss": 0.4179, "step": 18191 }, { "epoch": 2.9944178519016336, "grad_norm": 0.5554772919589578, "learning_rate": 3.177997955732995e-06, "loss": 0.4136, "step": 18192 }, { "epoch": 2.99458243612075, "grad_norm": 0.475211106424764, "learning_rate": 3.177548641425506e-06, "loss": 0.4173, "step": 18193 }, { "epoch": 2.9947470203398665, "grad_norm": 0.3771283509591505, "learning_rate": 3.1770993415480795e-06, "loss": 0.4452, "step": 18194 }, { "epoch": 2.994911604558983, "grad_norm": 0.42594227266935014, "learning_rate": 3.1766500561056188e-06, "loss": 0.4117, "step": 18195 }, { "epoch": 2.9950761887780994, "grad_norm": 0.3180131807086205, "learning_rate": 3.176200785103021e-06, "loss": 0.4285, "step": 18196 }, { "epoch": 2.995240772997216, "grad_norm": 0.29612484007700896, "learning_rate": 3.175751528545195e-06, "loss": 0.4318, "step": 18197 }, { "epoch": 2.9954053572163324, "grad_norm": 0.28254929449057237, "learning_rate": 3.1753022864370376e-06, "loss": 0.4345, "step": 18198 }, { "epoch": 2.995569941435449, "grad_norm": 0.3183231439188339, "learning_rate": 3.1748530587834543e-06, "loss": 0.4349, "step": 18199 }, { "epoch": 2.995734525654565, "grad_norm": 0.46343260585566537, "learning_rate": 3.1744038455893443e-06, "loss": 0.4357, "step": 18200 }, { "epoch": 2.9958991098736814, "grad_norm": 0.29529337466275296, "learning_rate": 3.173954646859612e-06, "loss": 0.4184, "step": 18201 }, { "epoch": 2.996063694092798, "grad_norm": 0.31809070614092383, "learning_rate": 3.173505462599157e-06, "loss": 0.43, "step": 18202 }, { "epoch": 2.9962282783119143, "grad_norm": 0.30457104675831437, "learning_rate": 3.1730562928128797e-06, "loss": 0.4354, "step": 18203 }, { "epoch": 2.9963928625310308, "grad_norm": 0.3062917076160698, "learning_rate": 3.1726071375056836e-06, "loss": 0.4087, "step": 18204 }, { "epoch": 2.9965574467501472, "grad_norm": 0.3124888663990827, "learning_rate": 3.1721579966824684e-06, "loss": 0.4394, "step": 18205 }, { "epoch": 2.9967220309692637, "grad_norm": 0.33867425297635956, "learning_rate": 3.1717088703481363e-06, "loss": 0.4486, "step": 18206 }, { "epoch": 2.99688661518838, "grad_norm": 0.273558008512084, "learning_rate": 3.171259758507584e-06, "loss": 0.4071, "step": 18207 }, { "epoch": 2.9970511994074966, "grad_norm": 0.5285508319253733, "learning_rate": 3.1708106611657184e-06, "loss": 0.3861, "step": 18208 }, { "epoch": 2.997215783626613, "grad_norm": 0.3128874485399929, "learning_rate": 3.170361578327435e-06, "loss": 0.4164, "step": 18209 }, { "epoch": 2.9973803678457296, "grad_norm": 0.37256940242292064, "learning_rate": 3.1699125099976353e-06, "loss": 0.4131, "step": 18210 }, { "epoch": 2.997544952064846, "grad_norm": 0.35543677230722404, "learning_rate": 3.169463456181219e-06, "loss": 0.4285, "step": 18211 }, { "epoch": 2.9977095362839625, "grad_norm": 0.5258426866304687, "learning_rate": 3.1690144168830863e-06, "loss": 0.4382, "step": 18212 }, { "epoch": 2.997874120503079, "grad_norm": 0.3970173787688329, "learning_rate": 3.168565392108138e-06, "loss": 0.4528, "step": 18213 }, { "epoch": 2.9980387047221955, "grad_norm": 0.38818627062660305, "learning_rate": 3.1681163818612722e-06, "loss": 0.4337, "step": 18214 }, { "epoch": 2.998203288941312, "grad_norm": 0.32909854511156844, "learning_rate": 3.167667386147389e-06, "loss": 0.4244, "step": 18215 }, { "epoch": 2.9983678731604284, "grad_norm": 0.31824541264496803, "learning_rate": 3.1672184049713886e-06, "loss": 0.4243, "step": 18216 }, { "epoch": 2.998532457379545, "grad_norm": 0.2944951297577863, "learning_rate": 3.166769438338168e-06, "loss": 0.45, "step": 18217 }, { "epoch": 2.9986970415986614, "grad_norm": 0.31699040399767714, "learning_rate": 3.1663204862526272e-06, "loss": 0.4246, "step": 18218 }, { "epoch": 2.998861625817778, "grad_norm": 0.28009816206800014, "learning_rate": 3.1658715487196644e-06, "loss": 0.4535, "step": 18219 }, { "epoch": 2.9990262100368943, "grad_norm": 0.29611811965362095, "learning_rate": 3.16542262574418e-06, "loss": 0.409, "step": 18220 }, { "epoch": 2.9991907942560108, "grad_norm": 0.32801166259446557, "learning_rate": 3.16497371733107e-06, "loss": 0.4215, "step": 18221 }, { "epoch": 2.9993553784751272, "grad_norm": 0.29321722798470556, "learning_rate": 3.1645248234852354e-06, "loss": 0.4182, "step": 18222 }, { "epoch": 2.9995199626942437, "grad_norm": 0.40426280821449323, "learning_rate": 3.164075944211572e-06, "loss": 0.4137, "step": 18223 }, { "epoch": 2.99968454691336, "grad_norm": 0.5970825070217444, "learning_rate": 3.1636270795149784e-06, "loss": 0.4313, "step": 18224 }, { "epoch": 2.9998491311324766, "grad_norm": 0.2644700809917794, "learning_rate": 3.1631782294003545e-06, "loss": 0.4145, "step": 18225 }, { "epoch": 3.000013715351593, "grad_norm": 0.2995399869875604, "learning_rate": 3.162729393872594e-06, "loss": 0.4123, "step": 18226 }, { "epoch": 3.0001782995707096, "grad_norm": 0.27034051788182417, "learning_rate": 3.1622805729365987e-06, "loss": 0.4177, "step": 18227 }, { "epoch": 3.000342883789826, "grad_norm": 0.3971305869673882, "learning_rate": 3.1618317665972613e-06, "loss": 0.4242, "step": 18228 }, { "epoch": 3.0005074680089425, "grad_norm": 0.3414507516646599, "learning_rate": 3.1613829748594845e-06, "loss": 0.4184, "step": 18229 }, { "epoch": 3.000672052228059, "grad_norm": 0.3465525713100099, "learning_rate": 3.1609341977281607e-06, "loss": 0.4125, "step": 18230 }, { "epoch": 3.0008366364471755, "grad_norm": 0.33514919669558996, "learning_rate": 3.160485435208189e-06, "loss": 0.4305, "step": 18231 }, { "epoch": 3.001001220666292, "grad_norm": 0.33085851251192505, "learning_rate": 3.1600366873044664e-06, "loss": 0.4442, "step": 18232 }, { "epoch": 3.0011658048854084, "grad_norm": 0.27738310096517826, "learning_rate": 3.159587954021887e-06, "loss": 0.44, "step": 18233 }, { "epoch": 3.001330389104525, "grad_norm": 0.46496425947877273, "learning_rate": 3.1591392353653504e-06, "loss": 0.4317, "step": 18234 }, { "epoch": 3.0014949733236413, "grad_norm": 0.5780192641756802, "learning_rate": 3.15869053133975e-06, "loss": 0.4333, "step": 18235 }, { "epoch": 3.001659557542758, "grad_norm": 0.4204830312536245, "learning_rate": 3.1582418419499853e-06, "loss": 0.4236, "step": 18236 }, { "epoch": 3.0018241417618743, "grad_norm": 0.31813304116451246, "learning_rate": 3.157793167200947e-06, "loss": 0.4434, "step": 18237 }, { "epoch": 3.0019887259809903, "grad_norm": 0.3340994695144974, "learning_rate": 3.157344507097537e-06, "loss": 0.4131, "step": 18238 }, { "epoch": 3.002153310200107, "grad_norm": 0.32359193355644744, "learning_rate": 3.1568958616446466e-06, "loss": 0.4416, "step": 18239 }, { "epoch": 3.0023178944192233, "grad_norm": 0.3914003964350955, "learning_rate": 3.1564472308471723e-06, "loss": 0.4414, "step": 18240 }, { "epoch": 3.0024824786383397, "grad_norm": 1.5983605977406183, "learning_rate": 3.1559986147100103e-06, "loss": 0.4182, "step": 18241 }, { "epoch": 3.002647062857456, "grad_norm": 0.2656196346512441, "learning_rate": 3.155550013238054e-06, "loss": 0.4238, "step": 18242 }, { "epoch": 3.0028116470765727, "grad_norm": 0.3435030234191996, "learning_rate": 3.1551014264362003e-06, "loss": 0.4419, "step": 18243 }, { "epoch": 3.002976231295689, "grad_norm": 0.2774345818391581, "learning_rate": 3.1546528543093423e-06, "loss": 0.4331, "step": 18244 }, { "epoch": 3.0031408155148056, "grad_norm": 0.2795919065919006, "learning_rate": 3.154204296862376e-06, "loss": 0.4321, "step": 18245 }, { "epoch": 3.003305399733922, "grad_norm": 0.3060950103109964, "learning_rate": 3.1537557541001964e-06, "loss": 0.4324, "step": 18246 }, { "epoch": 3.0034699839530385, "grad_norm": 0.38995358074078834, "learning_rate": 3.153307226027694e-06, "loss": 0.4382, "step": 18247 }, { "epoch": 3.003634568172155, "grad_norm": 0.352334026299767, "learning_rate": 3.1528587126497684e-06, "loss": 0.4104, "step": 18248 }, { "epoch": 3.0037991523912715, "grad_norm": 0.714688159733198, "learning_rate": 3.1524102139713094e-06, "loss": 0.4323, "step": 18249 }, { "epoch": 3.003963736610388, "grad_norm": 0.3182506796247081, "learning_rate": 3.1519617299972128e-06, "loss": 0.4201, "step": 18250 }, { "epoch": 3.0041283208295044, "grad_norm": 0.39036458320277834, "learning_rate": 3.151513260732371e-06, "loss": 0.4338, "step": 18251 }, { "epoch": 3.004292905048621, "grad_norm": 0.2987276384376063, "learning_rate": 3.151064806181679e-06, "loss": 0.4213, "step": 18252 }, { "epoch": 3.0044574892677374, "grad_norm": 0.3210273705242925, "learning_rate": 3.15061636635003e-06, "loss": 0.4198, "step": 18253 }, { "epoch": 3.004622073486854, "grad_norm": 0.2906514077196313, "learning_rate": 3.1501679412423154e-06, "loss": 0.4283, "step": 18254 }, { "epoch": 3.0047866577059703, "grad_norm": 0.325529310036599, "learning_rate": 3.1497195308634315e-06, "loss": 0.4142, "step": 18255 }, { "epoch": 3.004951241925087, "grad_norm": 0.2876029969384978, "learning_rate": 3.149271135218266e-06, "loss": 0.4263, "step": 18256 }, { "epoch": 3.0051158261442033, "grad_norm": 0.3009431027312247, "learning_rate": 3.148822754311718e-06, "loss": 0.4323, "step": 18257 }, { "epoch": 3.0052804103633197, "grad_norm": 1.0456543690184665, "learning_rate": 3.148374388148674e-06, "loss": 0.4425, "step": 18258 }, { "epoch": 3.005444994582436, "grad_norm": 0.41242992191256006, "learning_rate": 3.147926036734032e-06, "loss": 0.4133, "step": 18259 }, { "epoch": 3.0056095788015527, "grad_norm": 0.3018174934163969, "learning_rate": 3.147477700072681e-06, "loss": 0.4475, "step": 18260 }, { "epoch": 3.005774163020669, "grad_norm": 0.30378887389327897, "learning_rate": 3.1470293781695114e-06, "loss": 0.4343, "step": 18261 }, { "epoch": 3.0059387472397856, "grad_norm": 0.3030320265528302, "learning_rate": 3.1465810710294193e-06, "loss": 0.4224, "step": 18262 }, { "epoch": 3.006103331458902, "grad_norm": 0.32468997777886277, "learning_rate": 3.146132778657293e-06, "loss": 0.4206, "step": 18263 }, { "epoch": 3.0062679156780185, "grad_norm": 0.3073859418492667, "learning_rate": 3.145684501058027e-06, "loss": 0.4397, "step": 18264 }, { "epoch": 3.006432499897135, "grad_norm": 0.3535746497464689, "learning_rate": 3.1452362382365093e-06, "loss": 0.428, "step": 18265 }, { "epoch": 3.0065970841162515, "grad_norm": 0.3509794888005568, "learning_rate": 3.1447879901976346e-06, "loss": 0.4145, "step": 18266 }, { "epoch": 3.006761668335368, "grad_norm": 0.37055426427310695, "learning_rate": 3.1443397569462916e-06, "loss": 0.4202, "step": 18267 }, { "epoch": 3.0069262525544844, "grad_norm": 0.3983158517239644, "learning_rate": 3.143891538487374e-06, "loss": 0.4378, "step": 18268 }, { "epoch": 3.007090836773601, "grad_norm": 0.8847329921051714, "learning_rate": 3.1434433348257696e-06, "loss": 0.4332, "step": 18269 }, { "epoch": 3.0072554209927174, "grad_norm": 0.40040346566986273, "learning_rate": 3.1429951459663688e-06, "loss": 0.4313, "step": 18270 }, { "epoch": 3.0074200052118334, "grad_norm": 0.30438473774909447, "learning_rate": 3.1425469719140652e-06, "loss": 0.4317, "step": 18271 }, { "epoch": 3.00758458943095, "grad_norm": 0.26880189766547447, "learning_rate": 3.142098812673746e-06, "loss": 0.4123, "step": 18272 }, { "epoch": 3.0077491736500663, "grad_norm": 0.43929111116823344, "learning_rate": 3.1416506682503034e-06, "loss": 0.4256, "step": 18273 }, { "epoch": 3.007913757869183, "grad_norm": 0.31012222245409504, "learning_rate": 3.1412025386486254e-06, "loss": 0.4152, "step": 18274 }, { "epoch": 3.0080783420882993, "grad_norm": 0.3281837024857674, "learning_rate": 3.140754423873605e-06, "loss": 0.4276, "step": 18275 }, { "epoch": 3.0082429263074157, "grad_norm": 0.359541434317751, "learning_rate": 3.14030632393013e-06, "loss": 0.4311, "step": 18276 }, { "epoch": 3.008407510526532, "grad_norm": 0.381024987330811, "learning_rate": 3.1398582388230866e-06, "loss": 0.4349, "step": 18277 }, { "epoch": 3.0085720947456487, "grad_norm": 0.2725960858202017, "learning_rate": 3.1394101685573705e-06, "loss": 0.4377, "step": 18278 }, { "epoch": 3.008736678964765, "grad_norm": 0.29303313243267587, "learning_rate": 3.1389621131378657e-06, "loss": 0.4281, "step": 18279 }, { "epoch": 3.0089012631838816, "grad_norm": 0.35231873351827986, "learning_rate": 3.1385140725694644e-06, "loss": 0.4281, "step": 18280 }, { "epoch": 3.009065847402998, "grad_norm": 0.32507177675599463, "learning_rate": 3.138066046857053e-06, "loss": 0.4141, "step": 18281 }, { "epoch": 3.0092304316221146, "grad_norm": 0.3090038291024942, "learning_rate": 3.137618036005522e-06, "loss": 0.4278, "step": 18282 }, { "epoch": 3.009395015841231, "grad_norm": 0.34411886423257904, "learning_rate": 3.137170040019759e-06, "loss": 0.435, "step": 18283 }, { "epoch": 3.0095596000603475, "grad_norm": 0.3207335840825621, "learning_rate": 3.1367220589046525e-06, "loss": 0.4271, "step": 18284 }, { "epoch": 3.009724184279464, "grad_norm": 0.394758107332526, "learning_rate": 3.1362740926650915e-06, "loss": 0.423, "step": 18285 }, { "epoch": 3.0098887684985804, "grad_norm": 0.3173756255656595, "learning_rate": 3.1358261413059622e-06, "loss": 0.4243, "step": 18286 }, { "epoch": 3.010053352717697, "grad_norm": 0.36759515811796317, "learning_rate": 3.1353782048321555e-06, "loss": 0.4229, "step": 18287 }, { "epoch": 3.0102179369368134, "grad_norm": 0.3570919253203036, "learning_rate": 3.1349302832485547e-06, "loss": 0.4363, "step": 18288 }, { "epoch": 3.01038252115593, "grad_norm": 0.44230721314041077, "learning_rate": 3.1344823765600525e-06, "loss": 0.4322, "step": 18289 }, { "epoch": 3.0105471053750463, "grad_norm": 0.41149678706373655, "learning_rate": 3.1340344847715324e-06, "loss": 0.4149, "step": 18290 }, { "epoch": 3.010711689594163, "grad_norm": 0.3896154491901678, "learning_rate": 3.133586607887882e-06, "loss": 0.4451, "step": 18291 }, { "epoch": 3.0108762738132793, "grad_norm": 0.4611229387144769, "learning_rate": 3.1331387459139904e-06, "loss": 0.4311, "step": 18292 }, { "epoch": 3.0110408580323957, "grad_norm": 0.41381418119138585, "learning_rate": 3.1326908988547425e-06, "loss": 0.4272, "step": 18293 }, { "epoch": 3.011205442251512, "grad_norm": 0.33006048790236775, "learning_rate": 3.1322430667150263e-06, "loss": 0.4307, "step": 18294 }, { "epoch": 3.0113700264706287, "grad_norm": 0.2806686050008286, "learning_rate": 3.1317952494997273e-06, "loss": 0.4334, "step": 18295 }, { "epoch": 3.011534610689745, "grad_norm": 0.303403131668887, "learning_rate": 3.1313474472137334e-06, "loss": 0.4349, "step": 18296 }, { "epoch": 3.0116991949088616, "grad_norm": 0.36015904202072035, "learning_rate": 3.130899659861931e-06, "loss": 0.4269, "step": 18297 }, { "epoch": 3.011863779127978, "grad_norm": 0.39213784370577165, "learning_rate": 3.1304518874492027e-06, "loss": 0.4165, "step": 18298 }, { "epoch": 3.0120283633470946, "grad_norm": 0.4320711827383614, "learning_rate": 3.130004129980438e-06, "loss": 0.4225, "step": 18299 }, { "epoch": 3.012192947566211, "grad_norm": 0.3721589267015674, "learning_rate": 3.1295563874605203e-06, "loss": 0.4128, "step": 18300 }, { "epoch": 3.0123575317853275, "grad_norm": 0.38413959309953516, "learning_rate": 3.1291086598943376e-06, "loss": 0.4374, "step": 18301 }, { "epoch": 3.012522116004444, "grad_norm": 0.41329479123233775, "learning_rate": 3.128660947286773e-06, "loss": 0.4546, "step": 18302 }, { "epoch": 3.0126867002235604, "grad_norm": 0.2678832781833614, "learning_rate": 3.128213249642714e-06, "loss": 0.4175, "step": 18303 }, { "epoch": 3.0128512844426765, "grad_norm": 0.35521270889326156, "learning_rate": 3.1277655669670446e-06, "loss": 0.4205, "step": 18304 }, { "epoch": 3.013015868661793, "grad_norm": 0.3104405779774533, "learning_rate": 3.1273178992646486e-06, "loss": 0.4259, "step": 18305 }, { "epoch": 3.0131804528809094, "grad_norm": 0.2858893085394464, "learning_rate": 3.1268702465404133e-06, "loss": 0.4198, "step": 18306 }, { "epoch": 3.013345037100026, "grad_norm": 0.37408477887754, "learning_rate": 3.1264226087992197e-06, "loss": 0.4288, "step": 18307 }, { "epoch": 3.0135096213191424, "grad_norm": 0.4217822609844656, "learning_rate": 3.1259749860459565e-06, "loss": 0.4422, "step": 18308 }, { "epoch": 3.013674205538259, "grad_norm": 0.26756539192671847, "learning_rate": 3.125527378285504e-06, "loss": 0.41, "step": 18309 }, { "epoch": 3.0138387897573753, "grad_norm": 0.47541946698410265, "learning_rate": 3.1250797855227504e-06, "loss": 0.4163, "step": 18310 }, { "epoch": 3.0140033739764918, "grad_norm": 0.384741342753798, "learning_rate": 3.1246322077625755e-06, "loss": 0.438, "step": 18311 }, { "epoch": 3.0141679581956082, "grad_norm": 0.28876904645439844, "learning_rate": 3.1241846450098664e-06, "loss": 0.4262, "step": 18312 }, { "epoch": 3.0143325424147247, "grad_norm": 0.28806536770785424, "learning_rate": 3.1237370972695048e-06, "loss": 0.4164, "step": 18313 }, { "epoch": 3.014497126633841, "grad_norm": 0.3084635180596369, "learning_rate": 3.1232895645463744e-06, "loss": 0.4196, "step": 18314 }, { "epoch": 3.0146617108529576, "grad_norm": 0.3054116371458221, "learning_rate": 3.122842046845359e-06, "loss": 0.4368, "step": 18315 }, { "epoch": 3.014826295072074, "grad_norm": 0.3836994030013055, "learning_rate": 3.1223945441713413e-06, "loss": 0.4339, "step": 18316 }, { "epoch": 3.0149908792911906, "grad_norm": 0.373344678246291, "learning_rate": 3.1219470565292064e-06, "loss": 0.4271, "step": 18317 }, { "epoch": 3.015155463510307, "grad_norm": 0.3233689476672488, "learning_rate": 3.121499583923832e-06, "loss": 0.4241, "step": 18318 }, { "epoch": 3.0153200477294235, "grad_norm": 0.49564569197291564, "learning_rate": 3.121052126360107e-06, "loss": 0.4406, "step": 18319 }, { "epoch": 3.01548463194854, "grad_norm": 0.46988181103347443, "learning_rate": 3.12060468384291e-06, "loss": 0.4386, "step": 18320 }, { "epoch": 3.0156492161676565, "grad_norm": 0.4603952940576739, "learning_rate": 3.1201572563771234e-06, "loss": 0.4121, "step": 18321 }, { "epoch": 3.015813800386773, "grad_norm": 0.3319710266670054, "learning_rate": 3.1197098439676307e-06, "loss": 0.4435, "step": 18322 }, { "epoch": 3.0159783846058894, "grad_norm": 0.28959240783036866, "learning_rate": 3.1192624466193127e-06, "loss": 0.415, "step": 18323 }, { "epoch": 3.016142968825006, "grad_norm": 0.2921197590228239, "learning_rate": 3.118815064337052e-06, "loss": 0.4336, "step": 18324 }, { "epoch": 3.0163075530441223, "grad_norm": 0.29168013057506503, "learning_rate": 3.1183676971257303e-06, "loss": 0.4458, "step": 18325 }, { "epoch": 3.016472137263239, "grad_norm": 0.3304046783916581, "learning_rate": 3.117920344990229e-06, "loss": 0.4069, "step": 18326 }, { "epoch": 3.0166367214823553, "grad_norm": 0.3093798804530362, "learning_rate": 3.1174730079354304e-06, "loss": 0.4122, "step": 18327 }, { "epoch": 3.0168013057014718, "grad_norm": 0.26771936900248855, "learning_rate": 3.117025685966212e-06, "loss": 0.4209, "step": 18328 }, { "epoch": 3.0169658899205882, "grad_norm": 0.40780476703473845, "learning_rate": 3.11657837908746e-06, "loss": 0.4304, "step": 18329 }, { "epoch": 3.0171304741397047, "grad_norm": 0.32813383839828936, "learning_rate": 3.1161310873040504e-06, "loss": 0.4279, "step": 18330 }, { "epoch": 3.017295058358821, "grad_norm": 0.2798937789325171, "learning_rate": 3.1156838106208675e-06, "loss": 0.423, "step": 18331 }, { "epoch": 3.0174596425779376, "grad_norm": 0.32055734462181357, "learning_rate": 3.11523654904279e-06, "loss": 0.4241, "step": 18332 }, { "epoch": 3.017624226797054, "grad_norm": 0.3172206049676667, "learning_rate": 3.1147893025746985e-06, "loss": 0.4325, "step": 18333 }, { "epoch": 3.0177888110161706, "grad_norm": 0.3434309037151508, "learning_rate": 3.1143420712214735e-06, "loss": 0.4463, "step": 18334 }, { "epoch": 3.017953395235287, "grad_norm": 0.3742062244031275, "learning_rate": 3.113894854987994e-06, "loss": 0.434, "step": 18335 }, { "epoch": 3.0181179794544035, "grad_norm": 0.25831774897452026, "learning_rate": 3.1134476538791415e-06, "loss": 0.4124, "step": 18336 }, { "epoch": 3.0182825636735195, "grad_norm": 0.3204505571282885, "learning_rate": 3.1130004678997944e-06, "loss": 0.4201, "step": 18337 }, { "epoch": 3.018447147892636, "grad_norm": 0.3682921298167499, "learning_rate": 3.112553297054834e-06, "loss": 0.4264, "step": 18338 }, { "epoch": 3.0186117321117525, "grad_norm": 0.23909293619699393, "learning_rate": 3.1121061413491355e-06, "loss": 0.4198, "step": 18339 }, { "epoch": 3.018776316330869, "grad_norm": 0.29361493096236757, "learning_rate": 3.1116590007875837e-06, "loss": 0.4076, "step": 18340 }, { "epoch": 3.0189409005499854, "grad_norm": 0.4612104279180189, "learning_rate": 3.1112118753750543e-06, "loss": 0.4377, "step": 18341 }, { "epoch": 3.019105484769102, "grad_norm": 0.281475975355378, "learning_rate": 3.110764765116425e-06, "loss": 0.438, "step": 18342 }, { "epoch": 3.0192700689882184, "grad_norm": 0.3623311275412427, "learning_rate": 3.1103176700165773e-06, "loss": 0.4306, "step": 18343 }, { "epoch": 3.019434653207335, "grad_norm": 0.37699299963368665, "learning_rate": 3.1098705900803884e-06, "loss": 0.4141, "step": 18344 }, { "epoch": 3.0195992374264513, "grad_norm": 0.33703887500358937, "learning_rate": 3.109423525312737e-06, "loss": 0.4311, "step": 18345 }, { "epoch": 3.019763821645568, "grad_norm": 0.3045504218476557, "learning_rate": 3.1089764757185005e-06, "loss": 0.4219, "step": 18346 }, { "epoch": 3.0199284058646843, "grad_norm": 0.3574769206992018, "learning_rate": 3.108529441302558e-06, "loss": 0.4169, "step": 18347 }, { "epoch": 3.0200929900838007, "grad_norm": 0.31503034784782696, "learning_rate": 3.1080824220697882e-06, "loss": 0.4291, "step": 18348 }, { "epoch": 3.020257574302917, "grad_norm": 0.38223697652308825, "learning_rate": 3.107635418025066e-06, "loss": 0.423, "step": 18349 }, { "epoch": 3.0204221585220337, "grad_norm": 0.3895900387968357, "learning_rate": 3.1071884291732707e-06, "loss": 0.4337, "step": 18350 }, { "epoch": 3.02058674274115, "grad_norm": 0.38589754748451754, "learning_rate": 3.106741455519278e-06, "loss": 0.4202, "step": 18351 }, { "epoch": 3.0207513269602666, "grad_norm": 0.474879256779032, "learning_rate": 3.106294497067968e-06, "loss": 0.4267, "step": 18352 }, { "epoch": 3.020915911179383, "grad_norm": 0.3209696342392641, "learning_rate": 3.105847553824216e-06, "loss": 0.4245, "step": 18353 }, { "epoch": 3.0210804953984995, "grad_norm": 0.34778376983732656, "learning_rate": 3.105400625792899e-06, "loss": 0.4101, "step": 18354 }, { "epoch": 3.021245079617616, "grad_norm": 0.33399183177152747, "learning_rate": 3.1049537129788937e-06, "loss": 0.4097, "step": 18355 }, { "epoch": 3.0214096638367325, "grad_norm": 0.2988899506318333, "learning_rate": 3.104506815387077e-06, "loss": 0.4483, "step": 18356 }, { "epoch": 3.021574248055849, "grad_norm": 0.3500392847200933, "learning_rate": 3.1040599330223264e-06, "loss": 0.447, "step": 18357 }, { "epoch": 3.0217388322749654, "grad_norm": 0.48930436377342484, "learning_rate": 3.1036130658895134e-06, "loss": 0.4277, "step": 18358 }, { "epoch": 3.021903416494082, "grad_norm": 0.3234127630400757, "learning_rate": 3.1031662139935205e-06, "loss": 0.4391, "step": 18359 }, { "epoch": 3.0220680007131984, "grad_norm": 0.27320307065801214, "learning_rate": 3.102719377339217e-06, "loss": 0.4229, "step": 18360 }, { "epoch": 3.022232584932315, "grad_norm": 0.5085343141406332, "learning_rate": 3.1022725559314855e-06, "loss": 0.4194, "step": 18361 }, { "epoch": 3.0223971691514313, "grad_norm": 0.2773280004945635, "learning_rate": 3.101825749775196e-06, "loss": 0.4516, "step": 18362 }, { "epoch": 3.0225617533705478, "grad_norm": 0.382392052409632, "learning_rate": 3.1013789588752266e-06, "loss": 0.444, "step": 18363 }, { "epoch": 3.0227263375896642, "grad_norm": 0.36171418520805143, "learning_rate": 3.1009321832364516e-06, "loss": 0.4243, "step": 18364 }, { "epoch": 3.0228909218087807, "grad_norm": 0.3450737581253108, "learning_rate": 3.1004854228637462e-06, "loss": 0.442, "step": 18365 }, { "epoch": 3.023055506027897, "grad_norm": 0.49415695404695414, "learning_rate": 3.1000386777619857e-06, "loss": 0.4384, "step": 18366 }, { "epoch": 3.0232200902470137, "grad_norm": 0.3017298209376994, "learning_rate": 3.099591947936044e-06, "loss": 0.4414, "step": 18367 }, { "epoch": 3.02338467446613, "grad_norm": 0.4087842470656403, "learning_rate": 3.099145233390797e-06, "loss": 0.4327, "step": 18368 }, { "epoch": 3.0235492586852466, "grad_norm": 0.40924406967671867, "learning_rate": 3.0986985341311157e-06, "loss": 0.4331, "step": 18369 }, { "epoch": 3.023713842904363, "grad_norm": 0.2763617320415296, "learning_rate": 3.098251850161879e-06, "loss": 0.4223, "step": 18370 }, { "epoch": 3.023878427123479, "grad_norm": 0.9895023088998105, "learning_rate": 3.0978051814879594e-06, "loss": 0.4436, "step": 18371 }, { "epoch": 3.0240430113425956, "grad_norm": 0.3230702156672306, "learning_rate": 3.0973585281142276e-06, "loss": 0.4243, "step": 18372 }, { "epoch": 3.024207595561712, "grad_norm": 0.334355242509786, "learning_rate": 3.0969118900455616e-06, "loss": 0.4267, "step": 18373 }, { "epoch": 3.0243721797808285, "grad_norm": 0.30770411266118664, "learning_rate": 3.0964652672868314e-06, "loss": 0.4324, "step": 18374 }, { "epoch": 3.024536763999945, "grad_norm": 0.2885193574945699, "learning_rate": 3.096018659842913e-06, "loss": 0.4276, "step": 18375 }, { "epoch": 3.0247013482190614, "grad_norm": 0.34113089461111257, "learning_rate": 3.0955720677186783e-06, "loss": 0.4392, "step": 18376 }, { "epoch": 3.024865932438178, "grad_norm": 0.452973237345632, "learning_rate": 3.0951254909190017e-06, "loss": 0.4398, "step": 18377 }, { "epoch": 3.0250305166572944, "grad_norm": 0.34959258473425364, "learning_rate": 3.0946789294487554e-06, "loss": 0.4286, "step": 18378 }, { "epoch": 3.025195100876411, "grad_norm": 0.3275405935360879, "learning_rate": 3.0942323833128095e-06, "loss": 0.4168, "step": 18379 }, { "epoch": 3.0253596850955273, "grad_norm": 0.3018346913482798, "learning_rate": 3.093785852516041e-06, "loss": 0.4273, "step": 18380 }, { "epoch": 3.025524269314644, "grad_norm": 0.36688729357237526, "learning_rate": 3.0933393370633182e-06, "loss": 0.42, "step": 18381 }, { "epoch": 3.0256888535337603, "grad_norm": 0.4861306614021659, "learning_rate": 3.0928928369595167e-06, "loss": 0.3911, "step": 18382 }, { "epoch": 3.0258534377528767, "grad_norm": 0.3492407070341485, "learning_rate": 3.0924463522095054e-06, "loss": 0.4361, "step": 18383 }, { "epoch": 3.026018021971993, "grad_norm": 0.3980551315439607, "learning_rate": 3.0919998828181593e-06, "loss": 0.4216, "step": 18384 }, { "epoch": 3.0261826061911097, "grad_norm": 0.38357388279611354, "learning_rate": 3.0915534287903485e-06, "loss": 0.419, "step": 18385 }, { "epoch": 3.026347190410226, "grad_norm": 0.4891027846917855, "learning_rate": 3.091106990130943e-06, "loss": 0.4204, "step": 18386 }, { "epoch": 3.0265117746293426, "grad_norm": 0.3391882237310598, "learning_rate": 3.0906605668448177e-06, "loss": 0.4179, "step": 18387 }, { "epoch": 3.026676358848459, "grad_norm": 0.33533144601984904, "learning_rate": 3.0902141589368396e-06, "loss": 0.4356, "step": 18388 }, { "epoch": 3.0268409430675756, "grad_norm": 0.3100015251700531, "learning_rate": 3.0897677664118844e-06, "loss": 0.4194, "step": 18389 }, { "epoch": 3.027005527286692, "grad_norm": 0.3667714832533851, "learning_rate": 3.089321389274818e-06, "loss": 0.4085, "step": 18390 }, { "epoch": 3.0271701115058085, "grad_norm": 0.32901637661456695, "learning_rate": 3.088875027530516e-06, "loss": 0.4409, "step": 18391 }, { "epoch": 3.027334695724925, "grad_norm": 0.3926995176310053, "learning_rate": 3.088428681183845e-06, "loss": 0.425, "step": 18392 }, { "epoch": 3.0274992799440414, "grad_norm": 0.33404719587309123, "learning_rate": 3.087982350239677e-06, "loss": 0.4376, "step": 18393 }, { "epoch": 3.027663864163158, "grad_norm": 0.27830564539727404, "learning_rate": 3.087536034702883e-06, "loss": 0.4219, "step": 18394 }, { "epoch": 3.0278284483822744, "grad_norm": 0.411080967064806, "learning_rate": 3.0870897345783307e-06, "loss": 0.4101, "step": 18395 }, { "epoch": 3.027993032601391, "grad_norm": 0.337151936162758, "learning_rate": 3.0866434498708924e-06, "loss": 0.4232, "step": 18396 }, { "epoch": 3.0281576168205073, "grad_norm": 0.28520005890645866, "learning_rate": 3.086197180585436e-06, "loss": 0.4461, "step": 18397 }, { "epoch": 3.028322201039624, "grad_norm": 0.33666331248057874, "learning_rate": 3.0857509267268324e-06, "loss": 0.4296, "step": 18398 }, { "epoch": 3.0284867852587403, "grad_norm": 0.30655635466216224, "learning_rate": 3.0853046882999488e-06, "loss": 0.4181, "step": 18399 }, { "epoch": 3.0286513694778567, "grad_norm": 0.37866615924108316, "learning_rate": 3.084858465309659e-06, "loss": 0.4393, "step": 18400 } ], "logging_steps": 1.0, "max_steps": 30375, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.8309052386849587e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }