{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999783366911462, "eval_steps": 500, "global_step": 5770, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017330647083035463, "grad_norm": 75.5286304145975, "learning_rate": 5.7471264367816094e-08, "loss": 1.8641, "step": 1 }, { "epoch": 0.00034661294166070926, "grad_norm": 61.232175000677145, "learning_rate": 1.1494252873563219e-07, "loss": 1.872, "step": 2 }, { "epoch": 0.0005199194124910639, "grad_norm": 68.69745064711267, "learning_rate": 1.7241379310344828e-07, "loss": 1.8626, "step": 3 }, { "epoch": 0.0006932258833214185, "grad_norm": 62.35069031945426, "learning_rate": 2.2988505747126437e-07, "loss": 1.9068, "step": 4 }, { "epoch": 0.0008665323541517732, "grad_norm": 63.193087713687724, "learning_rate": 2.8735632183908047e-07, "loss": 1.8345, "step": 5 }, { "epoch": 0.0010398388249821277, "grad_norm": 68.94408241569016, "learning_rate": 3.4482758620689656e-07, "loss": 1.933, "step": 6 }, { "epoch": 0.0012131452958124825, "grad_norm": 61.800322256124076, "learning_rate": 4.0229885057471266e-07, "loss": 1.9573, "step": 7 }, { "epoch": 0.001386451766642837, "grad_norm": 61.23307189113565, "learning_rate": 4.5977011494252875e-07, "loss": 1.9045, "step": 8 }, { "epoch": 0.0015597582374731916, "grad_norm": 56.48463248040424, "learning_rate": 5.172413793103449e-07, "loss": 1.7469, "step": 9 }, { "epoch": 0.0017330647083035463, "grad_norm": 52.11684322580366, "learning_rate": 5.747126436781609e-07, "loss": 1.7172, "step": 10 }, { "epoch": 0.0019063711791339009, "grad_norm": 48.944946175796495, "learning_rate": 6.321839080459771e-07, "loss": 1.7062, "step": 11 }, { "epoch": 0.0020796776499642554, "grad_norm": 44.54939659946419, "learning_rate": 6.896551724137931e-07, "loss": 1.6854, "step": 12 }, { "epoch": 0.00225298412079461, "grad_norm": 39.743906730031505, "learning_rate": 7.471264367816093e-07, "loss": 1.5103, "step": 13 }, { "epoch": 0.002426290591624965, "grad_norm": 21.57412260814287, "learning_rate": 8.045977011494253e-07, "loss": 1.3563, "step": 14 }, { "epoch": 0.0025995970624553195, "grad_norm": 23.693634796074164, "learning_rate": 8.620689655172415e-07, "loss": 1.5263, "step": 15 }, { "epoch": 0.002772903533285674, "grad_norm": 18.319469239408868, "learning_rate": 9.195402298850575e-07, "loss": 1.3075, "step": 16 }, { "epoch": 0.0029462100041160286, "grad_norm": 18.589840936579854, "learning_rate": 9.770114942528738e-07, "loss": 1.3375, "step": 17 }, { "epoch": 0.003119516474946383, "grad_norm": 24.53746530900592, "learning_rate": 1.0344827586206898e-06, "loss": 1.3331, "step": 18 }, { "epoch": 0.003292822945776738, "grad_norm": 28.63148524422072, "learning_rate": 1.0919540229885058e-06, "loss": 1.2676, "step": 19 }, { "epoch": 0.0034661294166070927, "grad_norm": 19.251956923570305, "learning_rate": 1.1494252873563219e-06, "loss": 1.1703, "step": 20 }, { "epoch": 0.003639435887437447, "grad_norm": 16.895342289595785, "learning_rate": 1.2068965517241381e-06, "loss": 1.1389, "step": 21 }, { "epoch": 0.0038127423582678018, "grad_norm": 21.40824887202279, "learning_rate": 1.2643678160919542e-06, "loss": 1.2274, "step": 22 }, { "epoch": 0.003986048829098156, "grad_norm": 14.785564215190536, "learning_rate": 1.3218390804597702e-06, "loss": 1.2085, "step": 23 }, { "epoch": 0.004159355299928511, "grad_norm": 15.98913777951213, "learning_rate": 1.3793103448275862e-06, "loss": 1.1372, "step": 24 }, { "epoch": 0.004332661770758865, "grad_norm": 8.67902631494975, "learning_rate": 1.4367816091954023e-06, "loss": 1.0583, "step": 25 }, { "epoch": 0.00450596824158922, "grad_norm": 6.2973429061693995, "learning_rate": 1.4942528735632185e-06, "loss": 0.9782, "step": 26 }, { "epoch": 0.004679274712419575, "grad_norm": 5.081479064108751, "learning_rate": 1.5517241379310346e-06, "loss": 0.912, "step": 27 }, { "epoch": 0.00485258118324993, "grad_norm": 4.683879730032283, "learning_rate": 1.6091954022988506e-06, "loss": 0.87, "step": 28 }, { "epoch": 0.0050258876540802844, "grad_norm": 5.637711821112234, "learning_rate": 1.6666666666666667e-06, "loss": 0.9224, "step": 29 }, { "epoch": 0.005199194124910639, "grad_norm": 5.448120868089278, "learning_rate": 1.724137931034483e-06, "loss": 0.9798, "step": 30 }, { "epoch": 0.0053725005957409935, "grad_norm": 3.7557502671614116, "learning_rate": 1.781609195402299e-06, "loss": 0.831, "step": 31 }, { "epoch": 0.005545807066571348, "grad_norm": 4.2924947532104305, "learning_rate": 1.839080459770115e-06, "loss": 0.9257, "step": 32 }, { "epoch": 0.005719113537401703, "grad_norm": 4.314798299878706, "learning_rate": 1.896551724137931e-06, "loss": 0.8732, "step": 33 }, { "epoch": 0.005892420008232057, "grad_norm": 3.5590767395964553, "learning_rate": 1.9540229885057475e-06, "loss": 0.9617, "step": 34 }, { "epoch": 0.006065726479062412, "grad_norm": 4.191812737238874, "learning_rate": 2.0114942528735633e-06, "loss": 0.9217, "step": 35 }, { "epoch": 0.006239032949892766, "grad_norm": 3.1147621966131176, "learning_rate": 2.0689655172413796e-06, "loss": 0.8325, "step": 36 }, { "epoch": 0.006412339420723122, "grad_norm": 4.680833118741082, "learning_rate": 2.1264367816091954e-06, "loss": 0.9331, "step": 37 }, { "epoch": 0.006585645891553476, "grad_norm": 8.061107564851818, "learning_rate": 2.1839080459770117e-06, "loss": 0.8549, "step": 38 }, { "epoch": 0.006758952362383831, "grad_norm": 3.6317447558325098, "learning_rate": 2.241379310344828e-06, "loss": 0.8633, "step": 39 }, { "epoch": 0.006932258833214185, "grad_norm": 3.486892969354754, "learning_rate": 2.2988505747126437e-06, "loss": 0.8763, "step": 40 }, { "epoch": 0.00710556530404454, "grad_norm": 3.1116205144937585, "learning_rate": 2.35632183908046e-06, "loss": 0.8669, "step": 41 }, { "epoch": 0.007278871774874894, "grad_norm": 2.8705253157984107, "learning_rate": 2.4137931034482762e-06, "loss": 0.9036, "step": 42 }, { "epoch": 0.007452178245705249, "grad_norm": 2.6450325789613465, "learning_rate": 2.471264367816092e-06, "loss": 0.9112, "step": 43 }, { "epoch": 0.0076254847165356035, "grad_norm": 4.0776631858774754, "learning_rate": 2.5287356321839083e-06, "loss": 0.8811, "step": 44 }, { "epoch": 0.007798791187365958, "grad_norm": 2.9713526365315213, "learning_rate": 2.5862068965517246e-06, "loss": 0.9009, "step": 45 }, { "epoch": 0.007972097658196313, "grad_norm": 3.4755607063748175, "learning_rate": 2.6436781609195404e-06, "loss": 0.7883, "step": 46 }, { "epoch": 0.008145404129026668, "grad_norm": 3.8171693389992343, "learning_rate": 2.7011494252873567e-06, "loss": 0.8552, "step": 47 }, { "epoch": 0.008318710599857022, "grad_norm": 5.654567596814533, "learning_rate": 2.7586206896551725e-06, "loss": 0.8301, "step": 48 }, { "epoch": 0.008492017070687377, "grad_norm": 3.7116749992645843, "learning_rate": 2.8160919540229887e-06, "loss": 0.8772, "step": 49 }, { "epoch": 0.00866532354151773, "grad_norm": 2.740896926187624, "learning_rate": 2.8735632183908046e-06, "loss": 0.85, "step": 50 }, { "epoch": 0.008838630012348086, "grad_norm": 2.359125121418521, "learning_rate": 2.931034482758621e-06, "loss": 0.8234, "step": 51 }, { "epoch": 0.00901193648317844, "grad_norm": 3.373615026174753, "learning_rate": 2.988505747126437e-06, "loss": 0.8235, "step": 52 }, { "epoch": 0.009185242954008795, "grad_norm": 2.966808765977845, "learning_rate": 3.0459770114942533e-06, "loss": 0.7796, "step": 53 }, { "epoch": 0.00935854942483915, "grad_norm": 2.6683825314388843, "learning_rate": 3.103448275862069e-06, "loss": 0.7168, "step": 54 }, { "epoch": 0.009531855895669504, "grad_norm": 2.9161898719620423, "learning_rate": 3.1609195402298854e-06, "loss": 0.8328, "step": 55 }, { "epoch": 0.00970516236649986, "grad_norm": 3.030813623853042, "learning_rate": 3.2183908045977012e-06, "loss": 0.9046, "step": 56 }, { "epoch": 0.009878468837330213, "grad_norm": 2.6956831509733283, "learning_rate": 3.2758620689655175e-06, "loss": 0.8667, "step": 57 }, { "epoch": 0.010051775308160569, "grad_norm": 9.20798006154704, "learning_rate": 3.3333333333333333e-06, "loss": 0.875, "step": 58 }, { "epoch": 0.010225081778990923, "grad_norm": 4.931761375260089, "learning_rate": 3.3908045977011496e-06, "loss": 0.8559, "step": 59 }, { "epoch": 0.010398388249821278, "grad_norm": 2.5134242736233015, "learning_rate": 3.448275862068966e-06, "loss": 0.7395, "step": 60 }, { "epoch": 0.010571694720651632, "grad_norm": 7.2064056840695185, "learning_rate": 3.505747126436782e-06, "loss": 0.9473, "step": 61 }, { "epoch": 0.010745001191481987, "grad_norm": 4.691847055032363, "learning_rate": 3.563218390804598e-06, "loss": 0.7865, "step": 62 }, { "epoch": 0.01091830766231234, "grad_norm": 2.4727220051089502, "learning_rate": 3.620689655172414e-06, "loss": 0.8288, "step": 63 }, { "epoch": 0.011091614133142696, "grad_norm": 3.6731318514301314, "learning_rate": 3.67816091954023e-06, "loss": 0.8065, "step": 64 }, { "epoch": 0.011264920603973052, "grad_norm": 3.537398685472358, "learning_rate": 3.7356321839080462e-06, "loss": 0.8387, "step": 65 }, { "epoch": 0.011438227074803405, "grad_norm": 4.141271275457114, "learning_rate": 3.793103448275862e-06, "loss": 0.8248, "step": 66 }, { "epoch": 0.01161153354563376, "grad_norm": 5.695885570604596, "learning_rate": 3.850574712643678e-06, "loss": 0.8095, "step": 67 }, { "epoch": 0.011784840016464114, "grad_norm": 2.8469910391449162, "learning_rate": 3.908045977011495e-06, "loss": 0.7745, "step": 68 }, { "epoch": 0.01195814648729447, "grad_norm": 2.6200497714206823, "learning_rate": 3.96551724137931e-06, "loss": 0.7798, "step": 69 }, { "epoch": 0.012131452958124823, "grad_norm": 2.9615740106547817, "learning_rate": 4.022988505747127e-06, "loss": 0.6311, "step": 70 }, { "epoch": 0.012304759428955179, "grad_norm": 2.8230305691925515, "learning_rate": 4.080459770114943e-06, "loss": 0.7846, "step": 71 }, { "epoch": 0.012478065899785533, "grad_norm": 2.31542906746987, "learning_rate": 4.137931034482759e-06, "loss": 0.6959, "step": 72 }, { "epoch": 0.012651372370615888, "grad_norm": 2.6025292816172283, "learning_rate": 4.1954022988505746e-06, "loss": 0.9238, "step": 73 }, { "epoch": 0.012824678841446243, "grad_norm": 3.6021430815652353, "learning_rate": 4.252873563218391e-06, "loss": 0.7591, "step": 74 }, { "epoch": 0.012997985312276597, "grad_norm": 11.130609245794913, "learning_rate": 4.310344827586207e-06, "loss": 0.7207, "step": 75 }, { "epoch": 0.013171291783106952, "grad_norm": 2.5507932625393956, "learning_rate": 4.367816091954023e-06, "loss": 0.7528, "step": 76 }, { "epoch": 0.013344598253937306, "grad_norm": 2.622358679173099, "learning_rate": 4.42528735632184e-06, "loss": 0.6332, "step": 77 }, { "epoch": 0.013517904724767662, "grad_norm": 2.277753701882802, "learning_rate": 4.482758620689656e-06, "loss": 0.7155, "step": 78 }, { "epoch": 0.013691211195598015, "grad_norm": 3.752900016960814, "learning_rate": 4.540229885057471e-06, "loss": 0.8877, "step": 79 }, { "epoch": 0.01386451766642837, "grad_norm": 2.3984655845143643, "learning_rate": 4.5977011494252875e-06, "loss": 0.7568, "step": 80 }, { "epoch": 0.014037824137258724, "grad_norm": 7.213942964421369, "learning_rate": 4.655172413793104e-06, "loss": 0.8347, "step": 81 }, { "epoch": 0.01421113060808908, "grad_norm": 3.093804682686801, "learning_rate": 4.71264367816092e-06, "loss": 0.7754, "step": 82 }, { "epoch": 0.014384437078919433, "grad_norm": 4.657068460318381, "learning_rate": 4.770114942528735e-06, "loss": 0.8245, "step": 83 }, { "epoch": 0.014557743549749789, "grad_norm": 3.1648741888568286, "learning_rate": 4.8275862068965525e-06, "loss": 0.6862, "step": 84 }, { "epoch": 0.014731050020580144, "grad_norm": 4.552443920573681, "learning_rate": 4.885057471264369e-06, "loss": 0.7528, "step": 85 }, { "epoch": 0.014904356491410498, "grad_norm": 2.3985955870373292, "learning_rate": 4.942528735632184e-06, "loss": 0.6905, "step": 86 }, { "epoch": 0.015077662962240853, "grad_norm": 2.41720143819991, "learning_rate": 5e-06, "loss": 0.8043, "step": 87 }, { "epoch": 0.015250969433071207, "grad_norm": 2.644803955132594, "learning_rate": 5.057471264367817e-06, "loss": 0.6931, "step": 88 }, { "epoch": 0.015424275903901562, "grad_norm": 2.3165195835745016, "learning_rate": 5.114942528735632e-06, "loss": 0.7266, "step": 89 }, { "epoch": 0.015597582374731916, "grad_norm": 2.9871296446617794, "learning_rate": 5.172413793103449e-06, "loss": 0.8395, "step": 90 }, { "epoch": 0.01577088884556227, "grad_norm": 3.02269742645, "learning_rate": 5.2298850574712646e-06, "loss": 0.7115, "step": 91 }, { "epoch": 0.015944195316392625, "grad_norm": 2.6224607110066875, "learning_rate": 5.287356321839081e-06, "loss": 0.8521, "step": 92 }, { "epoch": 0.01611750178722298, "grad_norm": 2.546518665755555, "learning_rate": 5.344827586206896e-06, "loss": 0.6806, "step": 93 }, { "epoch": 0.016290808258053336, "grad_norm": 6.669015628434104, "learning_rate": 5.402298850574713e-06, "loss": 0.7862, "step": 94 }, { "epoch": 0.016464114728883688, "grad_norm": 3.907397256169487, "learning_rate": 5.45977011494253e-06, "loss": 0.801, "step": 95 }, { "epoch": 0.016637421199714043, "grad_norm": 2.486095706361765, "learning_rate": 5.517241379310345e-06, "loss": 0.7257, "step": 96 }, { "epoch": 0.0168107276705444, "grad_norm": 2.8075404319126607, "learning_rate": 5.574712643678162e-06, "loss": 0.6273, "step": 97 }, { "epoch": 0.016984034141374754, "grad_norm": 3.1118549584290442, "learning_rate": 5.6321839080459775e-06, "loss": 0.7046, "step": 98 }, { "epoch": 0.01715734061220511, "grad_norm": 8.35107386755077, "learning_rate": 5.689655172413794e-06, "loss": 0.7718, "step": 99 }, { "epoch": 0.01733064708303546, "grad_norm": 2.646524337038956, "learning_rate": 5.747126436781609e-06, "loss": 0.7306, "step": 100 }, { "epoch": 0.017503953553865817, "grad_norm": 2.9508514998471496, "learning_rate": 5.804597701149426e-06, "loss": 0.7262, "step": 101 }, { "epoch": 0.017677260024696172, "grad_norm": 4.509351811737297, "learning_rate": 5.862068965517242e-06, "loss": 0.737, "step": 102 }, { "epoch": 0.017850566495526528, "grad_norm": 3.0239752887862617, "learning_rate": 5.919540229885058e-06, "loss": 0.7155, "step": 103 }, { "epoch": 0.01802387296635688, "grad_norm": 3.1873415335731505, "learning_rate": 5.977011494252874e-06, "loss": 0.7063, "step": 104 }, { "epoch": 0.018197179437187235, "grad_norm": 5.163885969511845, "learning_rate": 6.03448275862069e-06, "loss": 0.8317, "step": 105 }, { "epoch": 0.01837048590801759, "grad_norm": 6.831347304814634, "learning_rate": 6.091954022988507e-06, "loss": 0.6875, "step": 106 }, { "epoch": 0.018543792378847946, "grad_norm": 2.4346857044516095, "learning_rate": 6.149425287356322e-06, "loss": 0.7576, "step": 107 }, { "epoch": 0.0187170988496783, "grad_norm": 2.798379474538742, "learning_rate": 6.206896551724138e-06, "loss": 0.6728, "step": 108 }, { "epoch": 0.018890405320508653, "grad_norm": 2.9194227780532858, "learning_rate": 6.264367816091954e-06, "loss": 0.6205, "step": 109 }, { "epoch": 0.01906371179133901, "grad_norm": 3.6740448326518655, "learning_rate": 6.321839080459771e-06, "loss": 0.7416, "step": 110 }, { "epoch": 0.019237018262169364, "grad_norm": 3.0537216549030544, "learning_rate": 6.379310344827587e-06, "loss": 0.8413, "step": 111 }, { "epoch": 0.01941032473299972, "grad_norm": 5.110637699987942, "learning_rate": 6.4367816091954025e-06, "loss": 0.7636, "step": 112 }, { "epoch": 0.01958363120383007, "grad_norm": 2.6659978137600553, "learning_rate": 6.49425287356322e-06, "loss": 0.7293, "step": 113 }, { "epoch": 0.019756937674660427, "grad_norm": 2.8372808739991227, "learning_rate": 6.551724137931035e-06, "loss": 0.7929, "step": 114 }, { "epoch": 0.019930244145490782, "grad_norm": 3.0216538724329154, "learning_rate": 6.609195402298851e-06, "loss": 0.7577, "step": 115 }, { "epoch": 0.020103550616321138, "grad_norm": 2.9227759055783746, "learning_rate": 6.666666666666667e-06, "loss": 0.7457, "step": 116 }, { "epoch": 0.020276857087151493, "grad_norm": 3.75615586941332, "learning_rate": 6.724137931034484e-06, "loss": 0.7532, "step": 117 }, { "epoch": 0.020450163557981845, "grad_norm": 2.347677669901704, "learning_rate": 6.781609195402299e-06, "loss": 0.6564, "step": 118 }, { "epoch": 0.0206234700288122, "grad_norm": 2.3742987586039037, "learning_rate": 6.839080459770115e-06, "loss": 0.7085, "step": 119 }, { "epoch": 0.020796776499642556, "grad_norm": 3.278223229845093, "learning_rate": 6.896551724137932e-06, "loss": 0.759, "step": 120 }, { "epoch": 0.02097008297047291, "grad_norm": 3.4466927042636053, "learning_rate": 6.954022988505748e-06, "loss": 0.7547, "step": 121 }, { "epoch": 0.021143389441303263, "grad_norm": 2.4649883478485335, "learning_rate": 7.011494252873564e-06, "loss": 0.6761, "step": 122 }, { "epoch": 0.02131669591213362, "grad_norm": 3.7026968742953827, "learning_rate": 7.0689655172413796e-06, "loss": 0.8226, "step": 123 }, { "epoch": 0.021490002382963974, "grad_norm": 3.5251921122959655, "learning_rate": 7.126436781609196e-06, "loss": 0.7765, "step": 124 }, { "epoch": 0.02166330885379433, "grad_norm": 2.3266405430746135, "learning_rate": 7.183908045977011e-06, "loss": 0.7464, "step": 125 }, { "epoch": 0.02183661532462468, "grad_norm": 2.768809371807671, "learning_rate": 7.241379310344828e-06, "loss": 0.7605, "step": 126 }, { "epoch": 0.022009921795455037, "grad_norm": 3.379158158961723, "learning_rate": 7.298850574712645e-06, "loss": 0.7017, "step": 127 }, { "epoch": 0.022183228266285392, "grad_norm": 2.2360654755174316, "learning_rate": 7.35632183908046e-06, "loss": 0.6194, "step": 128 }, { "epoch": 0.022356534737115748, "grad_norm": 5.449058411698476, "learning_rate": 7.413793103448277e-06, "loss": 0.7587, "step": 129 }, { "epoch": 0.022529841207946103, "grad_norm": 5.929706925932501, "learning_rate": 7.4712643678160925e-06, "loss": 0.6635, "step": 130 }, { "epoch": 0.022703147678776455, "grad_norm": 3.1022913864000063, "learning_rate": 7.528735632183909e-06, "loss": 0.7317, "step": 131 }, { "epoch": 0.02287645414960681, "grad_norm": 4.231749135902444, "learning_rate": 7.586206896551724e-06, "loss": 0.6926, "step": 132 }, { "epoch": 0.023049760620437166, "grad_norm": 2.719051833992706, "learning_rate": 7.64367816091954e-06, "loss": 0.7795, "step": 133 }, { "epoch": 0.02322306709126752, "grad_norm": 3.2436836450662745, "learning_rate": 7.701149425287356e-06, "loss": 0.6411, "step": 134 }, { "epoch": 0.023396373562097873, "grad_norm": 3.4780421796547163, "learning_rate": 7.758620689655173e-06, "loss": 0.6685, "step": 135 }, { "epoch": 0.02356968003292823, "grad_norm": 9.39534287709622, "learning_rate": 7.81609195402299e-06, "loss": 0.6845, "step": 136 }, { "epoch": 0.023742986503758584, "grad_norm": 4.537433670456964, "learning_rate": 7.873563218390805e-06, "loss": 0.7292, "step": 137 }, { "epoch": 0.02391629297458894, "grad_norm": 4.212360529597501, "learning_rate": 7.93103448275862e-06, "loss": 0.682, "step": 138 }, { "epoch": 0.024089599445419295, "grad_norm": 2.7887072021410475, "learning_rate": 7.988505747126438e-06, "loss": 0.7127, "step": 139 }, { "epoch": 0.024262905916249647, "grad_norm": 3.0383323935490125, "learning_rate": 8.045977011494253e-06, "loss": 0.7599, "step": 140 }, { "epoch": 0.024436212387080002, "grad_norm": 3.4014708057297085, "learning_rate": 8.103448275862069e-06, "loss": 0.7139, "step": 141 }, { "epoch": 0.024609518857910358, "grad_norm": 2.706484043572531, "learning_rate": 8.160919540229886e-06, "loss": 0.6972, "step": 142 }, { "epoch": 0.024782825328740713, "grad_norm": 3.946653741765654, "learning_rate": 8.218390804597703e-06, "loss": 0.7142, "step": 143 }, { "epoch": 0.024956131799571065, "grad_norm": 2.68720571165194, "learning_rate": 8.275862068965518e-06, "loss": 0.5579, "step": 144 }, { "epoch": 0.02512943827040142, "grad_norm": 2.3969029920340543, "learning_rate": 8.333333333333334e-06, "loss": 0.7539, "step": 145 }, { "epoch": 0.025302744741231776, "grad_norm": 3.0029904664779306, "learning_rate": 8.390804597701149e-06, "loss": 0.7788, "step": 146 }, { "epoch": 0.02547605121206213, "grad_norm": 3.0302202375419176, "learning_rate": 8.448275862068966e-06, "loss": 0.7358, "step": 147 }, { "epoch": 0.025649357682892487, "grad_norm": 2.9598624304481937, "learning_rate": 8.505747126436782e-06, "loss": 0.8059, "step": 148 }, { "epoch": 0.02582266415372284, "grad_norm": 2.735462324079475, "learning_rate": 8.563218390804599e-06, "loss": 0.7426, "step": 149 }, { "epoch": 0.025995970624553194, "grad_norm": 3.5289849009092253, "learning_rate": 8.620689655172414e-06, "loss": 0.704, "step": 150 }, { "epoch": 0.02616927709538355, "grad_norm": 3.038209139903885, "learning_rate": 8.678160919540231e-06, "loss": 0.6826, "step": 151 }, { "epoch": 0.026342583566213905, "grad_norm": 9.752303584472713, "learning_rate": 8.735632183908047e-06, "loss": 0.7705, "step": 152 }, { "epoch": 0.026515890037044257, "grad_norm": 11.040463259028622, "learning_rate": 8.793103448275862e-06, "loss": 0.6305, "step": 153 }, { "epoch": 0.026689196507874612, "grad_norm": 3.1123252946126576, "learning_rate": 8.85057471264368e-06, "loss": 0.7668, "step": 154 }, { "epoch": 0.026862502978704968, "grad_norm": 2.6315666396030903, "learning_rate": 8.908045977011495e-06, "loss": 0.6916, "step": 155 }, { "epoch": 0.027035809449535323, "grad_norm": 4.122713912400238, "learning_rate": 8.965517241379312e-06, "loss": 0.6215, "step": 156 }, { "epoch": 0.027209115920365675, "grad_norm": 2.853690834473771, "learning_rate": 9.022988505747127e-06, "loss": 0.6318, "step": 157 }, { "epoch": 0.02738242239119603, "grad_norm": 2.7098927220727065, "learning_rate": 9.080459770114942e-06, "loss": 0.7244, "step": 158 }, { "epoch": 0.027555728862026386, "grad_norm": 2.7445685503295234, "learning_rate": 9.13793103448276e-06, "loss": 0.6697, "step": 159 }, { "epoch": 0.02772903533285674, "grad_norm": 2.5708322861511936, "learning_rate": 9.195402298850575e-06, "loss": 0.5561, "step": 160 }, { "epoch": 0.027902341803687097, "grad_norm": 5.745060602298621, "learning_rate": 9.252873563218392e-06, "loss": 0.5987, "step": 161 }, { "epoch": 0.02807564827451745, "grad_norm": 2.4363955583651737, "learning_rate": 9.310344827586207e-06, "loss": 0.682, "step": 162 }, { "epoch": 0.028248954745347804, "grad_norm": 2.939636864701838, "learning_rate": 9.367816091954025e-06, "loss": 0.758, "step": 163 }, { "epoch": 0.02842226121617816, "grad_norm": 2.575199614450396, "learning_rate": 9.42528735632184e-06, "loss": 0.6657, "step": 164 }, { "epoch": 0.028595567687008515, "grad_norm": 4.167559115268317, "learning_rate": 9.482758620689655e-06, "loss": 0.7085, "step": 165 }, { "epoch": 0.028768874157838867, "grad_norm": 2.5364596643412027, "learning_rate": 9.54022988505747e-06, "loss": 0.683, "step": 166 }, { "epoch": 0.028942180628669222, "grad_norm": 2.3008709321139458, "learning_rate": 9.597701149425288e-06, "loss": 0.6899, "step": 167 }, { "epoch": 0.029115487099499578, "grad_norm": 2.9384672918571484, "learning_rate": 9.655172413793105e-06, "loss": 0.7269, "step": 168 }, { "epoch": 0.029288793570329933, "grad_norm": 7.457865911017509, "learning_rate": 9.71264367816092e-06, "loss": 0.6927, "step": 169 }, { "epoch": 0.02946210004116029, "grad_norm": 2.901007194706344, "learning_rate": 9.770114942528738e-06, "loss": 0.675, "step": 170 }, { "epoch": 0.02963540651199064, "grad_norm": 3.003872216812096, "learning_rate": 9.827586206896553e-06, "loss": 0.638, "step": 171 }, { "epoch": 0.029808712982820996, "grad_norm": 3.146739154056887, "learning_rate": 9.885057471264368e-06, "loss": 0.6255, "step": 172 }, { "epoch": 0.02998201945365135, "grad_norm": 2.4956587440190523, "learning_rate": 9.942528735632184e-06, "loss": 0.6397, "step": 173 }, { "epoch": 0.030155325924481707, "grad_norm": 3.7153403480634632, "learning_rate": 1e-05, "loss": 0.7367, "step": 174 }, { "epoch": 0.03032863239531206, "grad_norm": 2.527820936217062, "learning_rate": 9.999999212075998e-06, "loss": 0.7194, "step": 175 }, { "epoch": 0.030501938866142414, "grad_norm": 3.3553870481940047, "learning_rate": 9.999996848304237e-06, "loss": 0.7548, "step": 176 }, { "epoch": 0.03067524533697277, "grad_norm": 2.7320352149672558, "learning_rate": 9.99999290868546e-06, "loss": 0.6917, "step": 177 }, { "epoch": 0.030848551807803125, "grad_norm": 3.0561184148116523, "learning_rate": 9.999987393220914e-06, "loss": 0.7315, "step": 178 }, { "epoch": 0.031021858278633477, "grad_norm": 2.7555525317176772, "learning_rate": 9.999980301912335e-06, "loss": 0.6782, "step": 179 }, { "epoch": 0.031195164749463832, "grad_norm": 2.2979515945795375, "learning_rate": 9.999971634761957e-06, "loss": 0.7041, "step": 180 }, { "epoch": 0.03136847122029419, "grad_norm": 3.2167164312932592, "learning_rate": 9.999961391772512e-06, "loss": 0.699, "step": 181 }, { "epoch": 0.03154177769112454, "grad_norm": 2.5959494438488018, "learning_rate": 9.999949572947229e-06, "loss": 0.6567, "step": 182 }, { "epoch": 0.0317150841619549, "grad_norm": 2.6913344420012213, "learning_rate": 9.999936178289831e-06, "loss": 0.6512, "step": 183 }, { "epoch": 0.03188839063278525, "grad_norm": 3.3373555455252455, "learning_rate": 9.999921207804544e-06, "loss": 0.6892, "step": 184 }, { "epoch": 0.03206169710361561, "grad_norm": 2.8091793619002927, "learning_rate": 9.99990466149608e-06, "loss": 0.7353, "step": 185 }, { "epoch": 0.03223500357444596, "grad_norm": 2.815661215604321, "learning_rate": 9.99988653936966e-06, "loss": 0.7016, "step": 186 }, { "epoch": 0.03240831004527631, "grad_norm": 2.625586749557899, "learning_rate": 9.999866841430992e-06, "loss": 0.6809, "step": 187 }, { "epoch": 0.03258161651610667, "grad_norm": 2.877330327617442, "learning_rate": 9.999845567686284e-06, "loss": 0.6803, "step": 188 }, { "epoch": 0.032754922986937024, "grad_norm": 3.268571159145997, "learning_rate": 9.999822718142243e-06, "loss": 0.6378, "step": 189 }, { "epoch": 0.032928229457767376, "grad_norm": 3.10791473665365, "learning_rate": 9.999798292806068e-06, "loss": 0.6569, "step": 190 }, { "epoch": 0.033101535928597735, "grad_norm": 2.767229147453233, "learning_rate": 9.999772291685459e-06, "loss": 0.7133, "step": 191 }, { "epoch": 0.03327484239942809, "grad_norm": 2.894790973339823, "learning_rate": 9.999744714788611e-06, "loss": 0.6709, "step": 192 }, { "epoch": 0.033448148870258446, "grad_norm": 2.5991177838605144, "learning_rate": 9.999715562124213e-06, "loss": 0.6895, "step": 193 }, { "epoch": 0.0336214553410888, "grad_norm": 2.672488707477917, "learning_rate": 9.999684833701455e-06, "loss": 0.7656, "step": 194 }, { "epoch": 0.03379476181191915, "grad_norm": 2.810254005177292, "learning_rate": 9.999652529530022e-06, "loss": 0.5533, "step": 195 }, { "epoch": 0.03396806828274951, "grad_norm": 2.7382153698145997, "learning_rate": 9.999618649620093e-06, "loss": 0.7161, "step": 196 }, { "epoch": 0.03414137475357986, "grad_norm": 2.999633226199531, "learning_rate": 9.99958319398235e-06, "loss": 0.6999, "step": 197 }, { "epoch": 0.03431468122441022, "grad_norm": 3.3929516796264845, "learning_rate": 9.999546162627962e-06, "loss": 0.641, "step": 198 }, { "epoch": 0.03448798769524057, "grad_norm": 3.5271428293572082, "learning_rate": 9.999507555568606e-06, "loss": 0.7451, "step": 199 }, { "epoch": 0.03466129416607092, "grad_norm": 2.632731862056239, "learning_rate": 9.999467372816445e-06, "loss": 0.681, "step": 200 }, { "epoch": 0.03483460063690128, "grad_norm": 3.9451124879957704, "learning_rate": 9.999425614384147e-06, "loss": 0.6825, "step": 201 }, { "epoch": 0.035007907107731634, "grad_norm": 3.3200235802698774, "learning_rate": 9.99938228028487e-06, "loss": 0.7132, "step": 202 }, { "epoch": 0.03518121357856199, "grad_norm": 4.0743489970721285, "learning_rate": 9.999337370532273e-06, "loss": 0.7327, "step": 203 }, { "epoch": 0.035354520049392345, "grad_norm": 2.5930040578559996, "learning_rate": 9.99929088514051e-06, "loss": 0.6571, "step": 204 }, { "epoch": 0.0355278265202227, "grad_norm": 3.2356683666441732, "learning_rate": 9.999242824124232e-06, "loss": 0.6678, "step": 205 }, { "epoch": 0.035701132991053056, "grad_norm": 5.103287094774822, "learning_rate": 9.999193187498586e-06, "loss": 0.6886, "step": 206 }, { "epoch": 0.03587443946188341, "grad_norm": 2.5121802034688767, "learning_rate": 9.999141975279217e-06, "loss": 0.7389, "step": 207 }, { "epoch": 0.03604774593271376, "grad_norm": 3.6365428331655694, "learning_rate": 9.999089187482265e-06, "loss": 0.7744, "step": 208 }, { "epoch": 0.03622105240354412, "grad_norm": 3.129593963157418, "learning_rate": 9.999034824124365e-06, "loss": 0.6644, "step": 209 }, { "epoch": 0.03639435887437447, "grad_norm": 4.020184099788099, "learning_rate": 9.998978885222654e-06, "loss": 0.7118, "step": 210 }, { "epoch": 0.03656766534520483, "grad_norm": 2.6800167795960603, "learning_rate": 9.99892137079476e-06, "loss": 0.6482, "step": 211 }, { "epoch": 0.03674097181603518, "grad_norm": 5.345290838090335, "learning_rate": 9.998862280858811e-06, "loss": 0.6836, "step": 212 }, { "epoch": 0.03691427828686553, "grad_norm": 3.3943178160751453, "learning_rate": 9.998801615433429e-06, "loss": 0.7262, "step": 213 }, { "epoch": 0.03708758475769589, "grad_norm": 5.09998701195063, "learning_rate": 9.998739374537737e-06, "loss": 0.7487, "step": 214 }, { "epoch": 0.037260891228526244, "grad_norm": 2.8550812064212407, "learning_rate": 9.998675558191346e-06, "loss": 0.73, "step": 215 }, { "epoch": 0.0374341976993566, "grad_norm": 2.7727284689081637, "learning_rate": 9.998610166414375e-06, "loss": 0.7266, "step": 216 }, { "epoch": 0.037607504170186955, "grad_norm": 2.4440520389355855, "learning_rate": 9.998543199227431e-06, "loss": 0.6477, "step": 217 }, { "epoch": 0.03778081064101731, "grad_norm": 2.4074093198142212, "learning_rate": 9.998474656651618e-06, "loss": 0.5963, "step": 218 }, { "epoch": 0.037954117111847666, "grad_norm": 3.0798251484104053, "learning_rate": 9.99840453870854e-06, "loss": 0.7412, "step": 219 }, { "epoch": 0.03812742358267802, "grad_norm": 3.816930576557912, "learning_rate": 9.998332845420298e-06, "loss": 0.6719, "step": 220 }, { "epoch": 0.03830073005350837, "grad_norm": 2.5894059085942263, "learning_rate": 9.998259576809484e-06, "loss": 0.5651, "step": 221 }, { "epoch": 0.03847403652433873, "grad_norm": 2.618259503794612, "learning_rate": 9.998184732899193e-06, "loss": 0.6612, "step": 222 }, { "epoch": 0.03864734299516908, "grad_norm": 2.2059958419181647, "learning_rate": 9.998108313713013e-06, "loss": 0.5668, "step": 223 }, { "epoch": 0.03882064946599944, "grad_norm": 2.3670178116468166, "learning_rate": 9.998030319275028e-06, "loss": 0.7191, "step": 224 }, { "epoch": 0.03899395593682979, "grad_norm": 4.989007602148187, "learning_rate": 9.99795074960982e-06, "loss": 0.6307, "step": 225 }, { "epoch": 0.03916726240766014, "grad_norm": 2.5183743335069226, "learning_rate": 9.997869604742466e-06, "loss": 0.6248, "step": 226 }, { "epoch": 0.0393405688784905, "grad_norm": 2.9022984011767763, "learning_rate": 9.99778688469854e-06, "loss": 0.6578, "step": 227 }, { "epoch": 0.039513875349320854, "grad_norm": 3.1167054586972354, "learning_rate": 9.997702589504118e-06, "loss": 0.6366, "step": 228 }, { "epoch": 0.03968718182015121, "grad_norm": 2.6707326265742384, "learning_rate": 9.99761671918576e-06, "loss": 0.6606, "step": 229 }, { "epoch": 0.039860488290981565, "grad_norm": 2.489622075587435, "learning_rate": 9.997529273770535e-06, "loss": 0.6726, "step": 230 }, { "epoch": 0.04003379476181192, "grad_norm": 3.0944297845278506, "learning_rate": 9.997440253285999e-06, "loss": 0.5656, "step": 231 }, { "epoch": 0.040207101232642276, "grad_norm": 3.0959378170950735, "learning_rate": 9.997349657760212e-06, "loss": 0.7354, "step": 232 }, { "epoch": 0.04038040770347263, "grad_norm": 9.171841644043969, "learning_rate": 9.997257487221726e-06, "loss": 0.6451, "step": 233 }, { "epoch": 0.040553714174302986, "grad_norm": 3.203021927014676, "learning_rate": 9.997163741699591e-06, "loss": 0.7219, "step": 234 }, { "epoch": 0.04072702064513334, "grad_norm": 7.729197417275744, "learning_rate": 9.99706842122335e-06, "loss": 0.6324, "step": 235 }, { "epoch": 0.04090032711596369, "grad_norm": 2.8633248110968554, "learning_rate": 9.996971525823048e-06, "loss": 0.6494, "step": 236 }, { "epoch": 0.04107363358679405, "grad_norm": 3.0464147722009156, "learning_rate": 9.996873055529222e-06, "loss": 0.6742, "step": 237 }, { "epoch": 0.0412469400576244, "grad_norm": 4.238844813077472, "learning_rate": 9.996773010372906e-06, "loss": 0.6792, "step": 238 }, { "epoch": 0.04142024652845475, "grad_norm": 2.7367127342595166, "learning_rate": 9.996671390385635e-06, "loss": 0.7008, "step": 239 }, { "epoch": 0.04159355299928511, "grad_norm": 2.2804252069420086, "learning_rate": 9.996568195599433e-06, "loss": 0.6569, "step": 240 }, { "epoch": 0.041766859470115464, "grad_norm": 2.487403138382507, "learning_rate": 9.996463426046826e-06, "loss": 0.6093, "step": 241 }, { "epoch": 0.04194016594094582, "grad_norm": 2.1586001501736045, "learning_rate": 9.99635708176083e-06, "loss": 0.6701, "step": 242 }, { "epoch": 0.042113472411776175, "grad_norm": 2.9523616094460516, "learning_rate": 9.996249162774967e-06, "loss": 0.6648, "step": 243 }, { "epoch": 0.04228677888260653, "grad_norm": 2.7858977836228815, "learning_rate": 9.996139669123246e-06, "loss": 0.698, "step": 244 }, { "epoch": 0.042460085353436886, "grad_norm": 3.9053491717436186, "learning_rate": 9.996028600840177e-06, "loss": 0.7132, "step": 245 }, { "epoch": 0.04263339182426724, "grad_norm": 2.6194130619414735, "learning_rate": 9.995915957960766e-06, "loss": 0.7028, "step": 246 }, { "epoch": 0.042806698295097596, "grad_norm": 3.3311708704433234, "learning_rate": 9.995801740520517e-06, "loss": 0.6697, "step": 247 }, { "epoch": 0.04298000476592795, "grad_norm": 3.0713413929424545, "learning_rate": 9.995685948555423e-06, "loss": 0.7096, "step": 248 }, { "epoch": 0.0431533112367583, "grad_norm": 2.4324855143034694, "learning_rate": 9.99556858210198e-06, "loss": 0.6473, "step": 249 }, { "epoch": 0.04332661770758866, "grad_norm": 7.676319946285214, "learning_rate": 9.995449641197178e-06, "loss": 0.6443, "step": 250 }, { "epoch": 0.04349992417841901, "grad_norm": 3.355817713658938, "learning_rate": 9.995329125878506e-06, "loss": 0.7175, "step": 251 }, { "epoch": 0.04367323064924936, "grad_norm": 2.8914858338333818, "learning_rate": 9.995207036183944e-06, "loss": 0.6271, "step": 252 }, { "epoch": 0.04384653712007972, "grad_norm": 3.0450662206534655, "learning_rate": 9.995083372151973e-06, "loss": 0.7197, "step": 253 }, { "epoch": 0.044019843590910074, "grad_norm": 2.1376239912468207, "learning_rate": 9.994958133821565e-06, "loss": 0.5728, "step": 254 }, { "epoch": 0.04419315006174043, "grad_norm": 2.7767523992556873, "learning_rate": 9.994831321232195e-06, "loss": 0.5403, "step": 255 }, { "epoch": 0.044366456532570785, "grad_norm": 2.545907025641338, "learning_rate": 9.994702934423828e-06, "loss": 0.6131, "step": 256 }, { "epoch": 0.04453976300340114, "grad_norm": 3.318079706241597, "learning_rate": 9.994572973436928e-06, "loss": 0.622, "step": 257 }, { "epoch": 0.044713069474231495, "grad_norm": 2.0955662311161944, "learning_rate": 9.994441438312456e-06, "loss": 0.6181, "step": 258 }, { "epoch": 0.04488637594506185, "grad_norm": 16.76594069542005, "learning_rate": 9.994308329091867e-06, "loss": 0.6698, "step": 259 }, { "epoch": 0.045059682415892206, "grad_norm": 2.718874549310714, "learning_rate": 9.994173645817114e-06, "loss": 0.5744, "step": 260 }, { "epoch": 0.04523298888672256, "grad_norm": 2.6329666255710746, "learning_rate": 9.994037388530642e-06, "loss": 0.5939, "step": 261 }, { "epoch": 0.04540629535755291, "grad_norm": 8.842964296023176, "learning_rate": 9.9938995572754e-06, "loss": 0.5825, "step": 262 }, { "epoch": 0.04557960182838327, "grad_norm": 3.3824202851686356, "learning_rate": 9.993760152094823e-06, "loss": 0.6078, "step": 263 }, { "epoch": 0.04575290829921362, "grad_norm": 2.501172520778371, "learning_rate": 9.993619173032852e-06, "loss": 0.662, "step": 264 }, { "epoch": 0.04592621477004398, "grad_norm": 2.503302818081437, "learning_rate": 9.993476620133915e-06, "loss": 0.6221, "step": 265 }, { "epoch": 0.04609952124087433, "grad_norm": 4.737928093686886, "learning_rate": 9.993332493442944e-06, "loss": 0.7152, "step": 266 }, { "epoch": 0.046272827711704684, "grad_norm": 2.152429196663101, "learning_rate": 9.99318679300536e-06, "loss": 0.5984, "step": 267 }, { "epoch": 0.04644613418253504, "grad_norm": 2.5131077012638268, "learning_rate": 9.993039518867087e-06, "loss": 0.7126, "step": 268 }, { "epoch": 0.046619440653365395, "grad_norm": 3.0044714626608244, "learning_rate": 9.992890671074539e-06, "loss": 0.6819, "step": 269 }, { "epoch": 0.04679274712419575, "grad_norm": 2.4112610433179418, "learning_rate": 9.992740249674628e-06, "loss": 0.6375, "step": 270 }, { "epoch": 0.046966053595026105, "grad_norm": 2.487604509798023, "learning_rate": 9.992588254714763e-06, "loss": 0.665, "step": 271 }, { "epoch": 0.04713936006585646, "grad_norm": 2.686718274939578, "learning_rate": 9.99243468624285e-06, "loss": 0.6935, "step": 272 }, { "epoch": 0.047312666536686816, "grad_norm": 2.679470032098012, "learning_rate": 9.992279544307287e-06, "loss": 0.6407, "step": 273 }, { "epoch": 0.04748597300751717, "grad_norm": 3.248632130221147, "learning_rate": 9.99212282895697e-06, "loss": 0.6691, "step": 274 }, { "epoch": 0.04765927947834752, "grad_norm": 3.797391460693194, "learning_rate": 9.99196454024129e-06, "loss": 0.6319, "step": 275 }, { "epoch": 0.04783258594917788, "grad_norm": 4.6854346335505594, "learning_rate": 9.991804678210137e-06, "loss": 0.7669, "step": 276 }, { "epoch": 0.04800589242000823, "grad_norm": 2.7562545514843513, "learning_rate": 9.991643242913893e-06, "loss": 0.6307, "step": 277 }, { "epoch": 0.04817919889083859, "grad_norm": 7.484773858075071, "learning_rate": 9.991480234403438e-06, "loss": 0.5572, "step": 278 }, { "epoch": 0.04835250536166894, "grad_norm": 3.9056225177085984, "learning_rate": 9.991315652730148e-06, "loss": 0.6453, "step": 279 }, { "epoch": 0.048525811832499294, "grad_norm": 9.2203564852275, "learning_rate": 9.991149497945896e-06, "loss": 0.591, "step": 280 }, { "epoch": 0.04869911830332965, "grad_norm": 2.8353140356673703, "learning_rate": 9.990981770103044e-06, "loss": 0.6582, "step": 281 }, { "epoch": 0.048872424774160005, "grad_norm": 2.616142004539889, "learning_rate": 9.990812469254458e-06, "loss": 0.6083, "step": 282 }, { "epoch": 0.049045731244990357, "grad_norm": 2.593730387425399, "learning_rate": 9.990641595453497e-06, "loss": 0.5941, "step": 283 }, { "epoch": 0.049219037715820715, "grad_norm": 3.1638648690731026, "learning_rate": 9.990469148754012e-06, "loss": 0.6473, "step": 284 }, { "epoch": 0.04939234418665107, "grad_norm": 3.05771652103375, "learning_rate": 9.990295129210356e-06, "loss": 0.6392, "step": 285 }, { "epoch": 0.049565650657481426, "grad_norm": 2.349658397134062, "learning_rate": 9.990119536877373e-06, "loss": 0.6188, "step": 286 }, { "epoch": 0.04973895712831178, "grad_norm": 3.1199539971365655, "learning_rate": 9.989942371810407e-06, "loss": 0.6361, "step": 287 }, { "epoch": 0.04991226359914213, "grad_norm": 2.93449560752857, "learning_rate": 9.98976363406529e-06, "loss": 0.5767, "step": 288 }, { "epoch": 0.05008557006997249, "grad_norm": 2.3927177966837916, "learning_rate": 9.989583323698359e-06, "loss": 0.6439, "step": 289 }, { "epoch": 0.05025887654080284, "grad_norm": 4.580048729719321, "learning_rate": 9.989401440766443e-06, "loss": 0.7211, "step": 290 }, { "epoch": 0.0504321830116332, "grad_norm": 3.035457914825161, "learning_rate": 9.989217985326862e-06, "loss": 0.5877, "step": 291 }, { "epoch": 0.05060548948246355, "grad_norm": 2.6873847341601453, "learning_rate": 9.989032957437439e-06, "loss": 0.6178, "step": 292 }, { "epoch": 0.050778795953293904, "grad_norm": 2.702682234031925, "learning_rate": 9.988846357156486e-06, "loss": 0.5472, "step": 293 }, { "epoch": 0.05095210242412426, "grad_norm": 2.8212650810585904, "learning_rate": 9.988658184542817e-06, "loss": 0.6537, "step": 294 }, { "epoch": 0.051125408894954615, "grad_norm": 2.5088633351736673, "learning_rate": 9.988468439655736e-06, "loss": 0.6723, "step": 295 }, { "epoch": 0.05129871536578497, "grad_norm": 2.5318548626976645, "learning_rate": 9.988277122555048e-06, "loss": 0.5852, "step": 296 }, { "epoch": 0.051472021836615325, "grad_norm": 2.605240777397842, "learning_rate": 9.988084233301046e-06, "loss": 0.7501, "step": 297 }, { "epoch": 0.05164532830744568, "grad_norm": 2.6994191089354604, "learning_rate": 9.987889771954525e-06, "loss": 0.6083, "step": 298 }, { "epoch": 0.051818634778276036, "grad_norm": 2.668035926142334, "learning_rate": 9.987693738576774e-06, "loss": 0.6524, "step": 299 }, { "epoch": 0.05199194124910639, "grad_norm": 2.9630029011971213, "learning_rate": 9.987496133229575e-06, "loss": 0.6516, "step": 300 }, { "epoch": 0.05216524771993674, "grad_norm": 2.087739238710549, "learning_rate": 9.987296955975207e-06, "loss": 0.6464, "step": 301 }, { "epoch": 0.0523385541907671, "grad_norm": 2.2873851862280006, "learning_rate": 9.987096206876447e-06, "loss": 0.6486, "step": 302 }, { "epoch": 0.05251186066159745, "grad_norm": 2.543960421649395, "learning_rate": 9.986893885996565e-06, "loss": 0.5955, "step": 303 }, { "epoch": 0.05268516713242781, "grad_norm": 3.4345251231138736, "learning_rate": 9.986689993399324e-06, "loss": 0.6139, "step": 304 }, { "epoch": 0.05285847360325816, "grad_norm": 3.0033609175055886, "learning_rate": 9.986484529148986e-06, "loss": 0.6627, "step": 305 }, { "epoch": 0.053031780074088514, "grad_norm": 2.1645762821073777, "learning_rate": 9.986277493310308e-06, "loss": 0.6403, "step": 306 }, { "epoch": 0.05320508654491887, "grad_norm": 2.7878899817578136, "learning_rate": 9.98606888594854e-06, "loss": 0.5463, "step": 307 }, { "epoch": 0.053378393015749225, "grad_norm": 4.762649098655837, "learning_rate": 9.985858707129427e-06, "loss": 0.7293, "step": 308 }, { "epoch": 0.05355169948657958, "grad_norm": 2.844698804946636, "learning_rate": 9.985646956919215e-06, "loss": 0.6439, "step": 309 }, { "epoch": 0.053725005957409935, "grad_norm": 3.4684054237454287, "learning_rate": 9.985433635384638e-06, "loss": 0.6591, "step": 310 }, { "epoch": 0.05389831242824029, "grad_norm": 3.5872092840444743, "learning_rate": 9.98521874259293e-06, "loss": 0.7443, "step": 311 }, { "epoch": 0.054071618899070646, "grad_norm": 2.4844063195921904, "learning_rate": 9.985002278611819e-06, "loss": 0.5748, "step": 312 }, { "epoch": 0.054244925369901, "grad_norm": 2.483382315455209, "learning_rate": 9.984784243509527e-06, "loss": 0.7402, "step": 313 }, { "epoch": 0.05441823184073135, "grad_norm": 2.7125091235229353, "learning_rate": 9.984564637354773e-06, "loss": 0.7086, "step": 314 }, { "epoch": 0.05459153831156171, "grad_norm": 3.097949211474347, "learning_rate": 9.98434346021677e-06, "loss": 0.631, "step": 315 }, { "epoch": 0.05476484478239206, "grad_norm": 3.633033582372556, "learning_rate": 9.984120712165224e-06, "loss": 0.7057, "step": 316 }, { "epoch": 0.05493815125322242, "grad_norm": 2.5415699690755966, "learning_rate": 9.98389639327034e-06, "loss": 0.6249, "step": 317 }, { "epoch": 0.05511145772405277, "grad_norm": 2.8195364110793335, "learning_rate": 9.983670503602817e-06, "loss": 0.6552, "step": 318 }, { "epoch": 0.055284764194883124, "grad_norm": 2.9119396016234447, "learning_rate": 9.983443043233852e-06, "loss": 0.5985, "step": 319 }, { "epoch": 0.05545807066571348, "grad_norm": 2.4676695914938898, "learning_rate": 9.983214012235124e-06, "loss": 0.6679, "step": 320 }, { "epoch": 0.055631377136543834, "grad_norm": 3.0311798596836734, "learning_rate": 9.982983410678826e-06, "loss": 0.7192, "step": 321 }, { "epoch": 0.05580468360737419, "grad_norm": 4.181684804647662, "learning_rate": 9.982751238637633e-06, "loss": 0.7078, "step": 322 }, { "epoch": 0.055977990078204545, "grad_norm": 2.302117792630471, "learning_rate": 9.982517496184719e-06, "loss": 0.6442, "step": 323 }, { "epoch": 0.0561512965490349, "grad_norm": 2.990310984165259, "learning_rate": 9.982282183393751e-06, "loss": 0.6553, "step": 324 }, { "epoch": 0.056324603019865256, "grad_norm": 3.440259865320403, "learning_rate": 9.982045300338894e-06, "loss": 0.6387, "step": 325 }, { "epoch": 0.05649790949069561, "grad_norm": 3.0090065342254526, "learning_rate": 9.981806847094806e-06, "loss": 0.6598, "step": 326 }, { "epoch": 0.05667121596152596, "grad_norm": 2.959714891436782, "learning_rate": 9.98156682373664e-06, "loss": 0.6419, "step": 327 }, { "epoch": 0.05684452243235632, "grad_norm": 2.738333605345278, "learning_rate": 9.981325230340045e-06, "loss": 0.5328, "step": 328 }, { "epoch": 0.05701782890318667, "grad_norm": 3.7851211434511556, "learning_rate": 9.981082066981162e-06, "loss": 0.6163, "step": 329 }, { "epoch": 0.05719113537401703, "grad_norm": 2.472198713050678, "learning_rate": 9.98083733373663e-06, "loss": 0.6719, "step": 330 }, { "epoch": 0.05736444184484738, "grad_norm": 3.3199512282322017, "learning_rate": 9.980591030683581e-06, "loss": 0.6264, "step": 331 }, { "epoch": 0.057537748315677734, "grad_norm": 3.5678547183725153, "learning_rate": 9.980343157899643e-06, "loss": 0.6589, "step": 332 }, { "epoch": 0.05771105478650809, "grad_norm": 2.193411053922797, "learning_rate": 9.980093715462939e-06, "loss": 0.6296, "step": 333 }, { "epoch": 0.057884361257338444, "grad_norm": 3.2906812896899127, "learning_rate": 9.979842703452083e-06, "loss": 0.6182, "step": 334 }, { "epoch": 0.0580576677281688, "grad_norm": 2.796015145749377, "learning_rate": 9.979590121946186e-06, "loss": 0.6294, "step": 335 }, { "epoch": 0.058230974198999155, "grad_norm": 2.682559065251622, "learning_rate": 9.979335971024857e-06, "loss": 0.6736, "step": 336 }, { "epoch": 0.05840428066982951, "grad_norm": 3.9009775987501376, "learning_rate": 9.979080250768195e-06, "loss": 0.6742, "step": 337 }, { "epoch": 0.058577587140659866, "grad_norm": 3.093817223850597, "learning_rate": 9.978822961256796e-06, "loss": 0.6632, "step": 338 }, { "epoch": 0.05875089361149022, "grad_norm": 2.4251091523098984, "learning_rate": 9.978564102571749e-06, "loss": 0.6439, "step": 339 }, { "epoch": 0.05892420008232058, "grad_norm": 2.4302623261797627, "learning_rate": 9.978303674794637e-06, "loss": 0.6926, "step": 340 }, { "epoch": 0.05909750655315093, "grad_norm": 2.78886721100622, "learning_rate": 9.978041678007543e-06, "loss": 0.6446, "step": 341 }, { "epoch": 0.05927081302398128, "grad_norm": 2.7851943202639573, "learning_rate": 9.977778112293038e-06, "loss": 0.6743, "step": 342 }, { "epoch": 0.05944411949481164, "grad_norm": 8.367676152401335, "learning_rate": 9.977512977734187e-06, "loss": 0.6174, "step": 343 }, { "epoch": 0.05961742596564199, "grad_norm": 2.7209798551340194, "learning_rate": 9.977246274414559e-06, "loss": 0.7233, "step": 344 }, { "epoch": 0.059790732436472344, "grad_norm": 2.7999773260424456, "learning_rate": 9.976978002418204e-06, "loss": 0.6288, "step": 345 }, { "epoch": 0.0599640389073027, "grad_norm": 2.5950953541248643, "learning_rate": 9.976708161829677e-06, "loss": 0.5803, "step": 346 }, { "epoch": 0.060137345378133054, "grad_norm": 36.098253728558426, "learning_rate": 9.976436752734025e-06, "loss": 0.6441, "step": 347 }, { "epoch": 0.06031065184896341, "grad_norm": 2.6851453076816645, "learning_rate": 9.976163775216783e-06, "loss": 0.6872, "step": 348 }, { "epoch": 0.060483958319793765, "grad_norm": 3.2663528792804395, "learning_rate": 9.975889229363988e-06, "loss": 0.6834, "step": 349 }, { "epoch": 0.06065726479062412, "grad_norm": 3.0042567500147026, "learning_rate": 9.975613115262169e-06, "loss": 0.6334, "step": 350 }, { "epoch": 0.060830571261454476, "grad_norm": 2.729977313210052, "learning_rate": 9.975335432998348e-06, "loss": 0.6384, "step": 351 }, { "epoch": 0.06100387773228483, "grad_norm": 3.7040237393203426, "learning_rate": 9.975056182660043e-06, "loss": 0.7081, "step": 352 }, { "epoch": 0.06117718420311519, "grad_norm": 2.3489609131813842, "learning_rate": 9.974775364335262e-06, "loss": 0.6238, "step": 353 }, { "epoch": 0.06135049067394554, "grad_norm": 3.5688126054840947, "learning_rate": 9.974492978112516e-06, "loss": 0.6088, "step": 354 }, { "epoch": 0.06152379714477589, "grad_norm": 3.078361674171105, "learning_rate": 9.974209024080801e-06, "loss": 0.6709, "step": 355 }, { "epoch": 0.06169710361560625, "grad_norm": 2.3514175426284005, "learning_rate": 9.97392350232961e-06, "loss": 0.5975, "step": 356 }, { "epoch": 0.0618704100864366, "grad_norm": 4.121355104700284, "learning_rate": 9.97363641294893e-06, "loss": 0.6051, "step": 357 }, { "epoch": 0.062043716557266954, "grad_norm": 2.571165359326931, "learning_rate": 9.973347756029249e-06, "loss": 0.6989, "step": 358 }, { "epoch": 0.06221702302809731, "grad_norm": 3.585254516815228, "learning_rate": 9.973057531661534e-06, "loss": 0.7062, "step": 359 }, { "epoch": 0.062390329498927664, "grad_norm": 5.591457443003859, "learning_rate": 9.97276573993726e-06, "loss": 0.7184, "step": 360 }, { "epoch": 0.06256363596975802, "grad_norm": 3.7036985717289257, "learning_rate": 9.972472380948392e-06, "loss": 0.5953, "step": 361 }, { "epoch": 0.06273694244058838, "grad_norm": 2.9604723536605033, "learning_rate": 9.972177454787386e-06, "loss": 0.629, "step": 362 }, { "epoch": 0.06291024891141873, "grad_norm": 3.079437051504633, "learning_rate": 9.971880961547194e-06, "loss": 0.6715, "step": 363 }, { "epoch": 0.06308355538224908, "grad_norm": 2.6736114209536406, "learning_rate": 9.971582901321261e-06, "loss": 0.7118, "step": 364 }, { "epoch": 0.06325686185307944, "grad_norm": 2.643773681950836, "learning_rate": 9.971283274203527e-06, "loss": 0.6594, "step": 365 }, { "epoch": 0.0634301683239098, "grad_norm": 5.058767632812555, "learning_rate": 9.970982080288428e-06, "loss": 0.648, "step": 366 }, { "epoch": 0.06360347479474014, "grad_norm": 2.3641532962275744, "learning_rate": 9.970679319670883e-06, "loss": 0.6568, "step": 367 }, { "epoch": 0.0637767812655705, "grad_norm": 3.3700106210035368, "learning_rate": 9.970374992446323e-06, "loss": 0.7514, "step": 368 }, { "epoch": 0.06395008773640086, "grad_norm": 3.0993739543466443, "learning_rate": 9.970069098710658e-06, "loss": 0.6955, "step": 369 }, { "epoch": 0.06412339420723122, "grad_norm": 2.4027401540543436, "learning_rate": 9.969761638560296e-06, "loss": 0.6617, "step": 370 }, { "epoch": 0.06429670067806156, "grad_norm": 2.3607242043346948, "learning_rate": 9.969452612092139e-06, "loss": 0.5299, "step": 371 }, { "epoch": 0.06447000714889192, "grad_norm": 3.7784268836166475, "learning_rate": 9.969142019403584e-06, "loss": 0.7263, "step": 372 }, { "epoch": 0.06464331361972228, "grad_norm": 3.2752956838604255, "learning_rate": 9.96882986059252e-06, "loss": 0.6477, "step": 373 }, { "epoch": 0.06481662009055263, "grad_norm": 2.4000338703228676, "learning_rate": 9.96851613575733e-06, "loss": 0.7063, "step": 374 }, { "epoch": 0.06498992656138299, "grad_norm": 3.587292031464229, "learning_rate": 9.968200844996889e-06, "loss": 0.5273, "step": 375 }, { "epoch": 0.06516323303221334, "grad_norm": 2.4468957863271052, "learning_rate": 9.96788398841057e-06, "loss": 0.5746, "step": 376 }, { "epoch": 0.06533653950304369, "grad_norm": 3.4321098932920058, "learning_rate": 9.967565566098235e-06, "loss": 0.6773, "step": 377 }, { "epoch": 0.06550984597387405, "grad_norm": 2.3419314479241247, "learning_rate": 9.967245578160239e-06, "loss": 0.5873, "step": 378 }, { "epoch": 0.0656831524447044, "grad_norm": 7.319413646526129, "learning_rate": 9.966924024697436e-06, "loss": 0.6213, "step": 379 }, { "epoch": 0.06585645891553475, "grad_norm": 2.837837328885087, "learning_rate": 9.966600905811168e-06, "loss": 0.6847, "step": 380 }, { "epoch": 0.06602976538636511, "grad_norm": 2.7194645144329503, "learning_rate": 9.966276221603273e-06, "loss": 0.613, "step": 381 }, { "epoch": 0.06620307185719547, "grad_norm": 3.6095502792545964, "learning_rate": 9.965949972176081e-06, "loss": 0.7062, "step": 382 }, { "epoch": 0.06637637832802583, "grad_norm": 2.2847612680252665, "learning_rate": 9.965622157632417e-06, "loss": 0.6817, "step": 383 }, { "epoch": 0.06654968479885617, "grad_norm": 2.6863203113669147, "learning_rate": 9.965292778075596e-06, "loss": 0.6283, "step": 384 }, { "epoch": 0.06672299126968653, "grad_norm": 4.152475515684451, "learning_rate": 9.96496183360943e-06, "loss": 0.6226, "step": 385 }, { "epoch": 0.06689629774051689, "grad_norm": 2.3729684769916375, "learning_rate": 9.964629324338221e-06, "loss": 0.533, "step": 386 }, { "epoch": 0.06706960421134724, "grad_norm": 3.4708795499826395, "learning_rate": 9.964295250366771e-06, "loss": 0.6456, "step": 387 }, { "epoch": 0.0672429106821776, "grad_norm": 2.7480050345649207, "learning_rate": 9.963959611800364e-06, "loss": 0.5845, "step": 388 }, { "epoch": 0.06741621715300795, "grad_norm": 2.5005148485337956, "learning_rate": 9.963622408744784e-06, "loss": 0.6332, "step": 389 }, { "epoch": 0.0675895236238383, "grad_norm": 3.145224051498438, "learning_rate": 9.96328364130631e-06, "loss": 0.6833, "step": 390 }, { "epoch": 0.06776283009466866, "grad_norm": 2.6473059894665503, "learning_rate": 9.962943309591708e-06, "loss": 0.5283, "step": 391 }, { "epoch": 0.06793613656549902, "grad_norm": 2.5980366982508407, "learning_rate": 9.962601413708243e-06, "loss": 0.6094, "step": 392 }, { "epoch": 0.06810944303632938, "grad_norm": 3.904493522166238, "learning_rate": 9.96225795376367e-06, "loss": 0.6012, "step": 393 }, { "epoch": 0.06828274950715972, "grad_norm": 3.9046019999823622, "learning_rate": 9.961912929866234e-06, "loss": 0.6399, "step": 394 }, { "epoch": 0.06845605597799008, "grad_norm": 2.8492902314950697, "learning_rate": 9.961566342124678e-06, "loss": 0.6163, "step": 395 }, { "epoch": 0.06862936244882044, "grad_norm": 2.944569542688179, "learning_rate": 9.961218190648236e-06, "loss": 0.6745, "step": 396 }, { "epoch": 0.06880266891965078, "grad_norm": 2.610229859345336, "learning_rate": 9.960868475546637e-06, "loss": 0.6253, "step": 397 }, { "epoch": 0.06897597539048114, "grad_norm": 3.2541097552104974, "learning_rate": 9.960517196930097e-06, "loss": 0.6154, "step": 398 }, { "epoch": 0.0691492818613115, "grad_norm": 2.74048230074788, "learning_rate": 9.960164354909329e-06, "loss": 0.576, "step": 399 }, { "epoch": 0.06932258833214185, "grad_norm": 3.4885173056231014, "learning_rate": 9.95980994959554e-06, "loss": 0.7182, "step": 400 }, { "epoch": 0.0694958948029722, "grad_norm": 2.8600441298443444, "learning_rate": 9.959453981100426e-06, "loss": 0.6534, "step": 401 }, { "epoch": 0.06966920127380256, "grad_norm": 4.515816157350406, "learning_rate": 9.959096449536179e-06, "loss": 0.574, "step": 402 }, { "epoch": 0.06984250774463291, "grad_norm": 2.3827735892919737, "learning_rate": 9.95873735501548e-06, "loss": 0.6353, "step": 403 }, { "epoch": 0.07001581421546327, "grad_norm": 2.7568021973791876, "learning_rate": 9.958376697651506e-06, "loss": 0.5858, "step": 404 }, { "epoch": 0.07018912068629363, "grad_norm": 2.7210970632975093, "learning_rate": 9.958014477557925e-06, "loss": 0.7729, "step": 405 }, { "epoch": 0.07036242715712399, "grad_norm": 2.1779315231628362, "learning_rate": 9.957650694848897e-06, "loss": 0.5728, "step": 406 }, { "epoch": 0.07053573362795433, "grad_norm": 2.4676980218715614, "learning_rate": 9.957285349639078e-06, "loss": 0.6289, "step": 407 }, { "epoch": 0.07070904009878469, "grad_norm": 2.3423031391393128, "learning_rate": 9.95691844204361e-06, "loss": 0.607, "step": 408 }, { "epoch": 0.07088234656961505, "grad_norm": 2.31163280221489, "learning_rate": 9.956549972178133e-06, "loss": 0.6735, "step": 409 }, { "epoch": 0.0710556530404454, "grad_norm": 2.4230216610690687, "learning_rate": 9.956179940158779e-06, "loss": 0.5406, "step": 410 }, { "epoch": 0.07122895951127575, "grad_norm": 2.3757915725883434, "learning_rate": 9.955808346102167e-06, "loss": 0.7412, "step": 411 }, { "epoch": 0.07140226598210611, "grad_norm": 2.7295473240205395, "learning_rate": 9.955435190125415e-06, "loss": 0.7296, "step": 412 }, { "epoch": 0.07157557245293646, "grad_norm": 5.519013684029163, "learning_rate": 9.955060472346132e-06, "loss": 0.6787, "step": 413 }, { "epoch": 0.07174887892376682, "grad_norm": 2.3067389011082127, "learning_rate": 9.954684192882414e-06, "loss": 0.5584, "step": 414 }, { "epoch": 0.07192218539459717, "grad_norm": 2.001787822389997, "learning_rate": 9.954306351852853e-06, "loss": 0.5494, "step": 415 }, { "epoch": 0.07209549186542752, "grad_norm": 2.489748625287719, "learning_rate": 9.953926949376536e-06, "loss": 0.6438, "step": 416 }, { "epoch": 0.07226879833625788, "grad_norm": 3.0226185663005256, "learning_rate": 9.953545985573036e-06, "loss": 0.6086, "step": 417 }, { "epoch": 0.07244210480708824, "grad_norm": 2.5478591887436046, "learning_rate": 9.953163460562422e-06, "loss": 0.5813, "step": 418 }, { "epoch": 0.0726154112779186, "grad_norm": 2.3334902998578255, "learning_rate": 9.952779374465256e-06, "loss": 0.6125, "step": 419 }, { "epoch": 0.07278871774874894, "grad_norm": 3.497677505229658, "learning_rate": 9.95239372740259e-06, "loss": 0.5414, "step": 420 }, { "epoch": 0.0729620242195793, "grad_norm": 2.927034422952652, "learning_rate": 9.952006519495968e-06, "loss": 0.7123, "step": 421 }, { "epoch": 0.07313533069040966, "grad_norm": 2.8548573252182807, "learning_rate": 9.951617750867423e-06, "loss": 0.7141, "step": 422 }, { "epoch": 0.07330863716124, "grad_norm": 3.1202871524393543, "learning_rate": 9.951227421639487e-06, "loss": 0.6333, "step": 423 }, { "epoch": 0.07348194363207036, "grad_norm": 4.401721375315686, "learning_rate": 9.950835531935178e-06, "loss": 0.66, "step": 424 }, { "epoch": 0.07365525010290072, "grad_norm": 2.4788707870023505, "learning_rate": 9.950442081878008e-06, "loss": 0.6681, "step": 425 }, { "epoch": 0.07382855657373107, "grad_norm": 4.885769715776673, "learning_rate": 9.950047071591982e-06, "loss": 0.6313, "step": 426 }, { "epoch": 0.07400186304456143, "grad_norm": 2.258723340151339, "learning_rate": 9.949650501201593e-06, "loss": 0.5512, "step": 427 }, { "epoch": 0.07417516951539178, "grad_norm": 2.5012692350078782, "learning_rate": 9.949252370831827e-06, "loss": 0.673, "step": 428 }, { "epoch": 0.07434847598622213, "grad_norm": 5.6542835835400025, "learning_rate": 9.948852680608167e-06, "loss": 0.7532, "step": 429 }, { "epoch": 0.07452178245705249, "grad_norm": 2.6532159839685794, "learning_rate": 9.948451430656581e-06, "loss": 0.5803, "step": 430 }, { "epoch": 0.07469508892788285, "grad_norm": 3.1176237330723806, "learning_rate": 9.94804862110353e-06, "loss": 0.6606, "step": 431 }, { "epoch": 0.0748683953987132, "grad_norm": 2.8283725370984394, "learning_rate": 9.947644252075968e-06, "loss": 0.6024, "step": 432 }, { "epoch": 0.07504170186954355, "grad_norm": 2.3609315988211717, "learning_rate": 9.947238323701338e-06, "loss": 0.6477, "step": 433 }, { "epoch": 0.07521500834037391, "grad_norm": 2.4899153398337073, "learning_rate": 9.946830836107579e-06, "loss": 0.6325, "step": 434 }, { "epoch": 0.07538831481120427, "grad_norm": 2.466198960131293, "learning_rate": 9.946421789423117e-06, "loss": 0.6681, "step": 435 }, { "epoch": 0.07556162128203461, "grad_norm": 3.470523014100514, "learning_rate": 9.946011183776872e-06, "loss": 0.7131, "step": 436 }, { "epoch": 0.07573492775286497, "grad_norm": 2.921489130937825, "learning_rate": 9.945599019298256e-06, "loss": 0.6238, "step": 437 }, { "epoch": 0.07590823422369533, "grad_norm": 2.6966313482103375, "learning_rate": 9.945185296117165e-06, "loss": 0.6827, "step": 438 }, { "epoch": 0.07608154069452568, "grad_norm": 2.965782676108751, "learning_rate": 9.944770014364e-06, "loss": 0.6673, "step": 439 }, { "epoch": 0.07625484716535604, "grad_norm": 3.4022958590432997, "learning_rate": 9.944353174169638e-06, "loss": 0.6069, "step": 440 }, { "epoch": 0.0764281536361864, "grad_norm": 3.6295306742997817, "learning_rate": 9.943934775665457e-06, "loss": 0.6314, "step": 441 }, { "epoch": 0.07660146010701674, "grad_norm": 2.7877174524034443, "learning_rate": 9.943514818983326e-06, "loss": 0.5071, "step": 442 }, { "epoch": 0.0767747665778471, "grad_norm": 2.9705168086377385, "learning_rate": 9.9430933042556e-06, "loss": 0.599, "step": 443 }, { "epoch": 0.07694807304867746, "grad_norm": 2.772696388965827, "learning_rate": 9.942670231615127e-06, "loss": 0.5696, "step": 444 }, { "epoch": 0.07712137951950782, "grad_norm": 7.236605112605998, "learning_rate": 9.942245601195249e-06, "loss": 0.574, "step": 445 }, { "epoch": 0.07729468599033816, "grad_norm": 2.535133335808816, "learning_rate": 9.941819413129794e-06, "loss": 0.5843, "step": 446 }, { "epoch": 0.07746799246116852, "grad_norm": 2.7640388867437573, "learning_rate": 9.941391667553084e-06, "loss": 0.5839, "step": 447 }, { "epoch": 0.07764129893199888, "grad_norm": 2.271666735292444, "learning_rate": 9.940962364599933e-06, "loss": 0.5904, "step": 448 }, { "epoch": 0.07781460540282922, "grad_norm": 3.2978507510501105, "learning_rate": 9.940531504405644e-06, "loss": 0.4577, "step": 449 }, { "epoch": 0.07798791187365958, "grad_norm": 3.17681743438403, "learning_rate": 9.94009908710601e-06, "loss": 0.6781, "step": 450 }, { "epoch": 0.07816121834448994, "grad_norm": 2.3768479788485197, "learning_rate": 9.939665112837316e-06, "loss": 0.6125, "step": 451 }, { "epoch": 0.07833452481532029, "grad_norm": 3.563341544323021, "learning_rate": 9.939229581736339e-06, "loss": 0.6495, "step": 452 }, { "epoch": 0.07850783128615064, "grad_norm": 6.397683876317642, "learning_rate": 9.938792493940343e-06, "loss": 0.6366, "step": 453 }, { "epoch": 0.078681137756981, "grad_norm": 2.927733060283053, "learning_rate": 9.938353849587086e-06, "loss": 0.6656, "step": 454 }, { "epoch": 0.07885444422781136, "grad_norm": 4.916332586003579, "learning_rate": 9.937913648814814e-06, "loss": 0.7354, "step": 455 }, { "epoch": 0.07902775069864171, "grad_norm": 3.0103011364864236, "learning_rate": 9.937471891762267e-06, "loss": 0.6271, "step": 456 }, { "epoch": 0.07920105716947207, "grad_norm": 2.7570550158008484, "learning_rate": 9.937028578568673e-06, "loss": 0.6867, "step": 457 }, { "epoch": 0.07937436364030243, "grad_norm": 2.633443553598262, "learning_rate": 9.93658370937375e-06, "loss": 0.5476, "step": 458 }, { "epoch": 0.07954767011113277, "grad_norm": 2.2922633224798235, "learning_rate": 9.936137284317706e-06, "loss": 0.6215, "step": 459 }, { "epoch": 0.07972097658196313, "grad_norm": 2.388908635521802, "learning_rate": 9.935689303541243e-06, "loss": 0.7298, "step": 460 }, { "epoch": 0.07989428305279349, "grad_norm": 2.4737566064858085, "learning_rate": 9.93523976718555e-06, "loss": 0.6777, "step": 461 }, { "epoch": 0.08006758952362383, "grad_norm": 3.0437626235305926, "learning_rate": 9.934788675392306e-06, "loss": 0.5354, "step": 462 }, { "epoch": 0.08024089599445419, "grad_norm": 3.0450083127733794, "learning_rate": 9.934336028303683e-06, "loss": 0.6022, "step": 463 }, { "epoch": 0.08041420246528455, "grad_norm": 5.195583204706381, "learning_rate": 9.933881826062344e-06, "loss": 0.7052, "step": 464 }, { "epoch": 0.0805875089361149, "grad_norm": 3.057111362879994, "learning_rate": 9.933426068811434e-06, "loss": 0.6502, "step": 465 }, { "epoch": 0.08076081540694525, "grad_norm": 2.261266141936099, "learning_rate": 9.932968756694597e-06, "loss": 0.5009, "step": 466 }, { "epoch": 0.08093412187777561, "grad_norm": 2.529374537052249, "learning_rate": 9.932509889855965e-06, "loss": 0.6846, "step": 467 }, { "epoch": 0.08110742834860597, "grad_norm": 2.5560564338226195, "learning_rate": 9.932049468440157e-06, "loss": 0.6969, "step": 468 }, { "epoch": 0.08128073481943632, "grad_norm": 4.436752417512404, "learning_rate": 9.931587492592283e-06, "loss": 0.7016, "step": 469 }, { "epoch": 0.08145404129026668, "grad_norm": 3.194579114973696, "learning_rate": 9.931123962457947e-06, "loss": 0.7411, "step": 470 }, { "epoch": 0.08162734776109704, "grad_norm": 2.182602256791616, "learning_rate": 9.930658878183238e-06, "loss": 0.5511, "step": 471 }, { "epoch": 0.08180065423192738, "grad_norm": 4.677247510511194, "learning_rate": 9.930192239914733e-06, "loss": 0.6112, "step": 472 }, { "epoch": 0.08197396070275774, "grad_norm": 2.801748960312767, "learning_rate": 9.929724047799507e-06, "loss": 0.5941, "step": 473 }, { "epoch": 0.0821472671735881, "grad_norm": 2.7654713170257343, "learning_rate": 9.929254301985119e-06, "loss": 0.548, "step": 474 }, { "epoch": 0.08232057364441844, "grad_norm": 2.5608778940931334, "learning_rate": 9.928783002619618e-06, "loss": 0.6812, "step": 475 }, { "epoch": 0.0824938801152488, "grad_norm": 2.45528370237716, "learning_rate": 9.928310149851541e-06, "loss": 0.5975, "step": 476 }, { "epoch": 0.08266718658607916, "grad_norm": 2.605945199072821, "learning_rate": 9.927835743829922e-06, "loss": 0.6456, "step": 477 }, { "epoch": 0.0828404930569095, "grad_norm": 3.260768300292636, "learning_rate": 9.927359784704274e-06, "loss": 0.6402, "step": 478 }, { "epoch": 0.08301379952773986, "grad_norm": 4.659185535940799, "learning_rate": 9.926882272624609e-06, "loss": 0.5911, "step": 479 }, { "epoch": 0.08318710599857022, "grad_norm": 5.765869133107149, "learning_rate": 9.92640320774142e-06, "loss": 0.6022, "step": 480 }, { "epoch": 0.08336041246940058, "grad_norm": 2.6770887330032274, "learning_rate": 9.925922590205698e-06, "loss": 0.6977, "step": 481 }, { "epoch": 0.08353371894023093, "grad_norm": 2.810334855631104, "learning_rate": 9.925440420168917e-06, "loss": 0.6796, "step": 482 }, { "epoch": 0.08370702541106129, "grad_norm": 2.4732503836254334, "learning_rate": 9.924956697783042e-06, "loss": 0.6338, "step": 483 }, { "epoch": 0.08388033188189165, "grad_norm": 4.581064253035307, "learning_rate": 9.924471423200527e-06, "loss": 0.6193, "step": 484 }, { "epoch": 0.08405363835272199, "grad_norm": 3.03102944214612, "learning_rate": 9.923984596574319e-06, "loss": 0.6507, "step": 485 }, { "epoch": 0.08422694482355235, "grad_norm": 3.2020961287878102, "learning_rate": 9.923496218057846e-06, "loss": 0.6289, "step": 486 }, { "epoch": 0.08440025129438271, "grad_norm": 2.3297216248587667, "learning_rate": 9.923006287805036e-06, "loss": 0.6713, "step": 487 }, { "epoch": 0.08457355776521305, "grad_norm": 2.9230682115686726, "learning_rate": 9.922514805970293e-06, "loss": 0.558, "step": 488 }, { "epoch": 0.08474686423604341, "grad_norm": 2.756114947040671, "learning_rate": 9.922021772708525e-06, "loss": 0.4415, "step": 489 }, { "epoch": 0.08492017070687377, "grad_norm": 2.855740031418209, "learning_rate": 9.921527188175115e-06, "loss": 0.6545, "step": 490 }, { "epoch": 0.08509347717770412, "grad_norm": 2.256321594860437, "learning_rate": 9.921031052525945e-06, "loss": 0.5775, "step": 491 }, { "epoch": 0.08526678364853447, "grad_norm": 2.5487368366406438, "learning_rate": 9.920533365917379e-06, "loss": 0.6498, "step": 492 }, { "epoch": 0.08544009011936483, "grad_norm": 2.7412826134281585, "learning_rate": 9.920034128506273e-06, "loss": 0.6681, "step": 493 }, { "epoch": 0.08561339659019519, "grad_norm": 2.537240066154986, "learning_rate": 9.919533340449974e-06, "loss": 0.5618, "step": 494 }, { "epoch": 0.08578670306102554, "grad_norm": 2.822764131099054, "learning_rate": 9.919031001906313e-06, "loss": 0.5622, "step": 495 }, { "epoch": 0.0859600095318559, "grad_norm": 2.9338528065963936, "learning_rate": 9.918527113033612e-06, "loss": 0.6801, "step": 496 }, { "epoch": 0.08613331600268626, "grad_norm": 3.394733814380271, "learning_rate": 9.91802167399068e-06, "loss": 0.6834, "step": 497 }, { "epoch": 0.0863066224735166, "grad_norm": 9.661480286805398, "learning_rate": 9.917514684936819e-06, "loss": 0.5428, "step": 498 }, { "epoch": 0.08647992894434696, "grad_norm": 5.803859347815905, "learning_rate": 9.917006146031817e-06, "loss": 0.6624, "step": 499 }, { "epoch": 0.08665323541517732, "grad_norm": 2.975721818211477, "learning_rate": 9.916496057435947e-06, "loss": 0.6607, "step": 500 }, { "epoch": 0.08682654188600766, "grad_norm": 2.407768889534235, "learning_rate": 9.915984419309973e-06, "loss": 0.6081, "step": 501 }, { "epoch": 0.08699984835683802, "grad_norm": 2.3347351428688725, "learning_rate": 9.915471231815153e-06, "loss": 0.5481, "step": 502 }, { "epoch": 0.08717315482766838, "grad_norm": 3.269130744985925, "learning_rate": 9.914956495113223e-06, "loss": 0.5983, "step": 503 }, { "epoch": 0.08734646129849873, "grad_norm": 2.7236891310107394, "learning_rate": 9.914440209366415e-06, "loss": 0.5963, "step": 504 }, { "epoch": 0.08751976776932908, "grad_norm": 4.250307616231967, "learning_rate": 9.913922374737442e-06, "loss": 0.5758, "step": 505 }, { "epoch": 0.08769307424015944, "grad_norm": 3.150948402774562, "learning_rate": 9.913402991389516e-06, "loss": 0.5831, "step": 506 }, { "epoch": 0.0878663807109898, "grad_norm": 2.8853127448665554, "learning_rate": 9.912882059486328e-06, "loss": 0.6092, "step": 507 }, { "epoch": 0.08803968718182015, "grad_norm": 2.7501316931580635, "learning_rate": 9.912359579192059e-06, "loss": 0.6345, "step": 508 }, { "epoch": 0.0882129936526505, "grad_norm": 2.6857431013431263, "learning_rate": 9.91183555067138e-06, "loss": 0.6043, "step": 509 }, { "epoch": 0.08838630012348087, "grad_norm": 3.3035533219223607, "learning_rate": 9.911309974089449e-06, "loss": 0.6071, "step": 510 }, { "epoch": 0.08855960659431121, "grad_norm": 2.4723550910077274, "learning_rate": 9.910782849611913e-06, "loss": 0.6074, "step": 511 }, { "epoch": 0.08873291306514157, "grad_norm": 3.0568441402765405, "learning_rate": 9.910254177404901e-06, "loss": 0.6162, "step": 512 }, { "epoch": 0.08890621953597193, "grad_norm": 3.100936209742338, "learning_rate": 9.90972395763504e-06, "loss": 0.6648, "step": 513 }, { "epoch": 0.08907952600680227, "grad_norm": 2.9907981609990544, "learning_rate": 9.909192190469437e-06, "loss": 0.6461, "step": 514 }, { "epoch": 0.08925283247763263, "grad_norm": 5.524241946784421, "learning_rate": 9.908658876075687e-06, "loss": 0.529, "step": 515 }, { "epoch": 0.08942613894846299, "grad_norm": 2.5741246659858676, "learning_rate": 9.908124014621876e-06, "loss": 0.5318, "step": 516 }, { "epoch": 0.08959944541929334, "grad_norm": 2.7113555409294294, "learning_rate": 9.907587606276576e-06, "loss": 0.6863, "step": 517 }, { "epoch": 0.0897727518901237, "grad_norm": 2.436137987411326, "learning_rate": 9.90704965120885e-06, "loss": 0.6519, "step": 518 }, { "epoch": 0.08994605836095405, "grad_norm": 3.134080913332646, "learning_rate": 9.906510149588237e-06, "loss": 0.618, "step": 519 }, { "epoch": 0.09011936483178441, "grad_norm": 3.3098168541561113, "learning_rate": 9.90596910158478e-06, "loss": 0.5838, "step": 520 }, { "epoch": 0.09029267130261476, "grad_norm": 2.477279818154649, "learning_rate": 9.905426507368994e-06, "loss": 0.6089, "step": 521 }, { "epoch": 0.09046597777344512, "grad_norm": 2.9299086066561646, "learning_rate": 9.904882367111894e-06, "loss": 0.6338, "step": 522 }, { "epoch": 0.09063928424427548, "grad_norm": 2.506696507226648, "learning_rate": 9.904336680984973e-06, "loss": 0.5489, "step": 523 }, { "epoch": 0.09081259071510582, "grad_norm": 2.8066306044238454, "learning_rate": 9.903789449160215e-06, "loss": 0.6435, "step": 524 }, { "epoch": 0.09098589718593618, "grad_norm": 3.4915601164679386, "learning_rate": 9.903240671810092e-06, "loss": 0.5995, "step": 525 }, { "epoch": 0.09115920365676654, "grad_norm": 2.7335561302513716, "learning_rate": 9.90269034910756e-06, "loss": 0.6294, "step": 526 }, { "epoch": 0.09133251012759688, "grad_norm": 2.548669708826574, "learning_rate": 9.902138481226067e-06, "loss": 0.5886, "step": 527 }, { "epoch": 0.09150581659842724, "grad_norm": 3.077244120032935, "learning_rate": 9.901585068339542e-06, "loss": 0.5828, "step": 528 }, { "epoch": 0.0916791230692576, "grad_norm": 3.069301203065552, "learning_rate": 9.901030110622407e-06, "loss": 0.5825, "step": 529 }, { "epoch": 0.09185242954008796, "grad_norm": 2.741549172856756, "learning_rate": 9.900473608249566e-06, "loss": 0.5686, "step": 530 }, { "epoch": 0.0920257360109183, "grad_norm": 2.2576156766755187, "learning_rate": 9.89991556139641e-06, "loss": 0.6224, "step": 531 }, { "epoch": 0.09219904248174866, "grad_norm": 3.4103985109747503, "learning_rate": 9.89935597023882e-06, "loss": 0.5855, "step": 532 }, { "epoch": 0.09237234895257902, "grad_norm": 3.8111988437262707, "learning_rate": 9.898794834953162e-06, "loss": 0.6093, "step": 533 }, { "epoch": 0.09254565542340937, "grad_norm": 2.506464390735691, "learning_rate": 9.89823215571629e-06, "loss": 0.6089, "step": 534 }, { "epoch": 0.09271896189423973, "grad_norm": 3.130602532265675, "learning_rate": 9.89766793270554e-06, "loss": 0.5978, "step": 535 }, { "epoch": 0.09289226836507009, "grad_norm": 2.8198164401071253, "learning_rate": 9.897102166098744e-06, "loss": 0.5711, "step": 536 }, { "epoch": 0.09306557483590043, "grad_norm": 2.5141975862660413, "learning_rate": 9.89653485607421e-06, "loss": 0.6774, "step": 537 }, { "epoch": 0.09323888130673079, "grad_norm": 2.931354293461897, "learning_rate": 9.895966002810735e-06, "loss": 0.576, "step": 538 }, { "epoch": 0.09341218777756115, "grad_norm": 3.0401477867980526, "learning_rate": 9.89539560648761e-06, "loss": 0.5617, "step": 539 }, { "epoch": 0.0935854942483915, "grad_norm": 4.485882868115668, "learning_rate": 9.8948236672846e-06, "loss": 0.5797, "step": 540 }, { "epoch": 0.09375880071922185, "grad_norm": 3.0457268942471867, "learning_rate": 9.894250185381969e-06, "loss": 0.6506, "step": 541 }, { "epoch": 0.09393210719005221, "grad_norm": 2.9053598248019337, "learning_rate": 9.893675160960456e-06, "loss": 0.5543, "step": 542 }, { "epoch": 0.09410541366088257, "grad_norm": 3.2186627654136433, "learning_rate": 9.893098594201297e-06, "loss": 0.5583, "step": 543 }, { "epoch": 0.09427872013171291, "grad_norm": 3.0635174464316886, "learning_rate": 9.8925204852862e-06, "loss": 0.6398, "step": 544 }, { "epoch": 0.09445202660254327, "grad_norm": 2.720311244966177, "learning_rate": 9.891940834397376e-06, "loss": 0.623, "step": 545 }, { "epoch": 0.09462533307337363, "grad_norm": 3.727847096823671, "learning_rate": 9.891359641717508e-06, "loss": 0.5605, "step": 546 }, { "epoch": 0.09479863954420398, "grad_norm": 2.731674377461844, "learning_rate": 9.890776907429774e-06, "loss": 0.5868, "step": 547 }, { "epoch": 0.09497194601503434, "grad_norm": 8.075444365695688, "learning_rate": 9.89019263171783e-06, "loss": 0.7094, "step": 548 }, { "epoch": 0.0951452524858647, "grad_norm": 2.7496158145947667, "learning_rate": 9.889606814765824e-06, "loss": 0.5475, "step": 549 }, { "epoch": 0.09531855895669504, "grad_norm": 2.8630346479147435, "learning_rate": 9.889019456758387e-06, "loss": 0.6244, "step": 550 }, { "epoch": 0.0954918654275254, "grad_norm": 3.0080157723619525, "learning_rate": 9.888430557880638e-06, "loss": 0.6164, "step": 551 }, { "epoch": 0.09566517189835576, "grad_norm": 3.145509007060434, "learning_rate": 9.88784011831818e-06, "loss": 0.6215, "step": 552 }, { "epoch": 0.0958384783691861, "grad_norm": 2.696807732194898, "learning_rate": 9.887248138257099e-06, "loss": 0.6866, "step": 553 }, { "epoch": 0.09601178484001646, "grad_norm": 4.269549284842672, "learning_rate": 9.886654617883971e-06, "loss": 0.6454, "step": 554 }, { "epoch": 0.09618509131084682, "grad_norm": 2.7782606263585152, "learning_rate": 9.886059557385855e-06, "loss": 0.5504, "step": 555 }, { "epoch": 0.09635839778167718, "grad_norm": 2.9618659288945133, "learning_rate": 9.885462956950295e-06, "loss": 0.5902, "step": 556 }, { "epoch": 0.09653170425250752, "grad_norm": 4.901435412216263, "learning_rate": 9.884864816765325e-06, "loss": 0.6634, "step": 557 }, { "epoch": 0.09670501072333788, "grad_norm": 2.8979644496188466, "learning_rate": 9.884265137019457e-06, "loss": 0.631, "step": 558 }, { "epoch": 0.09687831719416824, "grad_norm": 7.309355461706261, "learning_rate": 9.883663917901694e-06, "loss": 0.5914, "step": 559 }, { "epoch": 0.09705162366499859, "grad_norm": 2.3905543716357482, "learning_rate": 9.883061159601521e-06, "loss": 0.5981, "step": 560 }, { "epoch": 0.09722493013582895, "grad_norm": 2.821652489036145, "learning_rate": 9.882456862308909e-06, "loss": 0.6866, "step": 561 }, { "epoch": 0.0973982366066593, "grad_norm": 6.672522334073997, "learning_rate": 9.881851026214315e-06, "loss": 0.5606, "step": 562 }, { "epoch": 0.09757154307748965, "grad_norm": 2.7261900154843466, "learning_rate": 9.881243651508677e-06, "loss": 0.628, "step": 563 }, { "epoch": 0.09774484954832001, "grad_norm": 2.7951833453328327, "learning_rate": 9.880634738383426e-06, "loss": 0.6537, "step": 564 }, { "epoch": 0.09791815601915037, "grad_norm": 2.4567028408442164, "learning_rate": 9.880024287030468e-06, "loss": 0.7373, "step": 565 }, { "epoch": 0.09809146248998071, "grad_norm": 8.24368502671514, "learning_rate": 9.879412297642202e-06, "loss": 0.6325, "step": 566 }, { "epoch": 0.09826476896081107, "grad_norm": 9.388180164309873, "learning_rate": 9.878798770411508e-06, "loss": 0.6869, "step": 567 }, { "epoch": 0.09843807543164143, "grad_norm": 2.3756858401526584, "learning_rate": 9.87818370553175e-06, "loss": 0.5599, "step": 568 }, { "epoch": 0.09861138190247179, "grad_norm": 5.558538894177099, "learning_rate": 9.877567103196778e-06, "loss": 0.5538, "step": 569 }, { "epoch": 0.09878468837330213, "grad_norm": 3.9816069834285033, "learning_rate": 9.876948963600927e-06, "loss": 0.6037, "step": 570 }, { "epoch": 0.0989579948441325, "grad_norm": 3.199160393985556, "learning_rate": 9.876329286939018e-06, "loss": 0.6158, "step": 571 }, { "epoch": 0.09913130131496285, "grad_norm": 4.362055912671442, "learning_rate": 9.875708073406348e-06, "loss": 0.5735, "step": 572 }, { "epoch": 0.0993046077857932, "grad_norm": 2.7897126549442777, "learning_rate": 9.87508532319871e-06, "loss": 0.5506, "step": 573 }, { "epoch": 0.09947791425662356, "grad_norm": 2.5939689935702144, "learning_rate": 9.874461036512375e-06, "loss": 0.6042, "step": 574 }, { "epoch": 0.09965122072745392, "grad_norm": 3.0874010644562433, "learning_rate": 9.873835213544097e-06, "loss": 0.5473, "step": 575 }, { "epoch": 0.09982452719828426, "grad_norm": 3.423234405557175, "learning_rate": 9.873207854491118e-06, "loss": 0.67, "step": 576 }, { "epoch": 0.09999783366911462, "grad_norm": 3.4398971371431006, "learning_rate": 9.872578959551163e-06, "loss": 0.5644, "step": 577 }, { "epoch": 0.10017114013994498, "grad_norm": 7.669248560720481, "learning_rate": 9.87194852892244e-06, "loss": 0.5935, "step": 578 }, { "epoch": 0.10034444661077532, "grad_norm": 2.664078666886839, "learning_rate": 9.87131656280364e-06, "loss": 0.6504, "step": 579 }, { "epoch": 0.10051775308160568, "grad_norm": 5.701347305176252, "learning_rate": 9.870683061393945e-06, "loss": 0.6204, "step": 580 }, { "epoch": 0.10069105955243604, "grad_norm": 2.5691329590515353, "learning_rate": 9.870048024893009e-06, "loss": 0.5283, "step": 581 }, { "epoch": 0.1008643660232664, "grad_norm": 5.575062920574988, "learning_rate": 9.869411453500978e-06, "loss": 0.5741, "step": 582 }, { "epoch": 0.10103767249409674, "grad_norm": 2.3117328616683626, "learning_rate": 9.86877334741848e-06, "loss": 0.6013, "step": 583 }, { "epoch": 0.1012109789649271, "grad_norm": 2.7808545126339608, "learning_rate": 9.86813370684663e-06, "loss": 0.5836, "step": 584 }, { "epoch": 0.10138428543575746, "grad_norm": 4.53159608812374, "learning_rate": 9.867492531987018e-06, "loss": 0.6212, "step": 585 }, { "epoch": 0.10155759190658781, "grad_norm": 3.144223303360678, "learning_rate": 9.866849823041724e-06, "loss": 0.6066, "step": 586 }, { "epoch": 0.10173089837741817, "grad_norm": 2.765732809605035, "learning_rate": 9.866205580213314e-06, "loss": 0.604, "step": 587 }, { "epoch": 0.10190420484824853, "grad_norm": 12.421498974090376, "learning_rate": 9.865559803704832e-06, "loss": 0.6177, "step": 588 }, { "epoch": 0.10207751131907887, "grad_norm": 3.087864816052431, "learning_rate": 9.864912493719805e-06, "loss": 0.6093, "step": 589 }, { "epoch": 0.10225081778990923, "grad_norm": 3.100596074883347, "learning_rate": 9.864263650462247e-06, "loss": 0.6073, "step": 590 }, { "epoch": 0.10242412426073959, "grad_norm": 2.6092584791375204, "learning_rate": 9.863613274136654e-06, "loss": 0.5995, "step": 591 }, { "epoch": 0.10259743073156995, "grad_norm": 2.576318975851907, "learning_rate": 9.862961364948005e-06, "loss": 0.7003, "step": 592 }, { "epoch": 0.10277073720240029, "grad_norm": 2.4897765600803132, "learning_rate": 9.862307923101759e-06, "loss": 0.5834, "step": 593 }, { "epoch": 0.10294404367323065, "grad_norm": 3.490796037730046, "learning_rate": 9.861652948803866e-06, "loss": 0.5506, "step": 594 }, { "epoch": 0.10311735014406101, "grad_norm": 2.408733038431817, "learning_rate": 9.86099644226075e-06, "loss": 0.5906, "step": 595 }, { "epoch": 0.10329065661489135, "grad_norm": 3.0774844929165637, "learning_rate": 9.860338403679323e-06, "loss": 0.5215, "step": 596 }, { "epoch": 0.10346396308572171, "grad_norm": 2.8768449049004436, "learning_rate": 9.859678833266981e-06, "loss": 0.6663, "step": 597 }, { "epoch": 0.10363726955655207, "grad_norm": 4.285844912832777, "learning_rate": 9.859017731231597e-06, "loss": 0.6073, "step": 598 }, { "epoch": 0.10381057602738242, "grad_norm": 3.3143441633604867, "learning_rate": 9.858355097781531e-06, "loss": 0.6034, "step": 599 }, { "epoch": 0.10398388249821278, "grad_norm": 6.217346389114062, "learning_rate": 9.857690933125628e-06, "loss": 0.6399, "step": 600 }, { "epoch": 0.10415718896904314, "grad_norm": 2.4758374358199626, "learning_rate": 9.857025237473209e-06, "loss": 0.637, "step": 601 }, { "epoch": 0.10433049543987348, "grad_norm": 2.473925299557571, "learning_rate": 9.856358011034083e-06, "loss": 0.5674, "step": 602 }, { "epoch": 0.10450380191070384, "grad_norm": 3.4853240401272574, "learning_rate": 9.85568925401854e-06, "loss": 0.5835, "step": 603 }, { "epoch": 0.1046771083815342, "grad_norm": 2.968066085148552, "learning_rate": 9.855018966637346e-06, "loss": 0.6142, "step": 604 }, { "epoch": 0.10485041485236456, "grad_norm": 3.699544008010711, "learning_rate": 9.854347149101765e-06, "loss": 0.6224, "step": 605 }, { "epoch": 0.1050237213231949, "grad_norm": 2.479851264490001, "learning_rate": 9.853673801623526e-06, "loss": 0.5299, "step": 606 }, { "epoch": 0.10519702779402526, "grad_norm": 2.6804838368205943, "learning_rate": 9.85299892441485e-06, "loss": 0.6601, "step": 607 }, { "epoch": 0.10537033426485562, "grad_norm": 3.0191951466613207, "learning_rate": 9.852322517688437e-06, "loss": 0.5162, "step": 608 }, { "epoch": 0.10554364073568596, "grad_norm": 2.4158970572142535, "learning_rate": 9.851644581657473e-06, "loss": 0.5442, "step": 609 }, { "epoch": 0.10571694720651632, "grad_norm": 3.2944166191815105, "learning_rate": 9.850965116535618e-06, "loss": 0.6367, "step": 610 }, { "epoch": 0.10589025367734668, "grad_norm": 3.0794044384137154, "learning_rate": 9.850284122537023e-06, "loss": 0.5425, "step": 611 }, { "epoch": 0.10606356014817703, "grad_norm": 3.824400484189805, "learning_rate": 9.849601599876315e-06, "loss": 0.678, "step": 612 }, { "epoch": 0.10623686661900739, "grad_norm": 2.618340670945653, "learning_rate": 9.848917548768604e-06, "loss": 0.5648, "step": 613 }, { "epoch": 0.10641017308983775, "grad_norm": 3.3279653200364785, "learning_rate": 9.84823196942948e-06, "loss": 0.5194, "step": 614 }, { "epoch": 0.10658347956066809, "grad_norm": 2.8174415415168346, "learning_rate": 9.847544862075023e-06, "loss": 0.6746, "step": 615 }, { "epoch": 0.10675678603149845, "grad_norm": 4.167258777195333, "learning_rate": 9.846856226921782e-06, "loss": 0.6381, "step": 616 }, { "epoch": 0.10693009250232881, "grad_norm": 2.407175003794626, "learning_rate": 9.846166064186796e-06, "loss": 0.558, "step": 617 }, { "epoch": 0.10710339897315917, "grad_norm": 3.32617458363353, "learning_rate": 9.845474374087584e-06, "loss": 0.7288, "step": 618 }, { "epoch": 0.10727670544398951, "grad_norm": 3.1412633935776806, "learning_rate": 9.844781156842147e-06, "loss": 0.6117, "step": 619 }, { "epoch": 0.10745001191481987, "grad_norm": 2.5308862205927016, "learning_rate": 9.844086412668963e-06, "loss": 0.6463, "step": 620 }, { "epoch": 0.10762331838565023, "grad_norm": 2.7098920280461174, "learning_rate": 9.843390141786995e-06, "loss": 0.6663, "step": 621 }, { "epoch": 0.10779662485648057, "grad_norm": 2.5339659307488445, "learning_rate": 9.842692344415688e-06, "loss": 0.6142, "step": 622 }, { "epoch": 0.10796993132731093, "grad_norm": 3.024016275077416, "learning_rate": 9.841993020774964e-06, "loss": 0.6653, "step": 623 }, { "epoch": 0.10814323779814129, "grad_norm": 6.8808524959916095, "learning_rate": 9.841292171085232e-06, "loss": 0.6293, "step": 624 }, { "epoch": 0.10831654426897164, "grad_norm": 2.766373110098021, "learning_rate": 9.840589795567376e-06, "loss": 0.6269, "step": 625 }, { "epoch": 0.108489850739802, "grad_norm": 3.291564557224536, "learning_rate": 9.839885894442763e-06, "loss": 0.6195, "step": 626 }, { "epoch": 0.10866315721063236, "grad_norm": 3.1935800781544903, "learning_rate": 9.839180467933244e-06, "loss": 0.6509, "step": 627 }, { "epoch": 0.1088364636814627, "grad_norm": 2.4401625282385386, "learning_rate": 9.838473516261144e-06, "loss": 0.5617, "step": 628 }, { "epoch": 0.10900977015229306, "grad_norm": 2.8072821783776014, "learning_rate": 9.837765039649277e-06, "loss": 0.601, "step": 629 }, { "epoch": 0.10918307662312342, "grad_norm": 2.2126145372449644, "learning_rate": 9.83705503832093e-06, "loss": 0.6041, "step": 630 }, { "epoch": 0.10935638309395378, "grad_norm": 3.211112832722477, "learning_rate": 9.836343512499875e-06, "loss": 0.6364, "step": 631 }, { "epoch": 0.10952968956478412, "grad_norm": 2.982169596719212, "learning_rate": 9.835630462410363e-06, "loss": 0.6511, "step": 632 }, { "epoch": 0.10970299603561448, "grad_norm": 2.468480397463193, "learning_rate": 9.834915888277129e-06, "loss": 0.5644, "step": 633 }, { "epoch": 0.10987630250644484, "grad_norm": 2.3237674070949037, "learning_rate": 9.834199790325378e-06, "loss": 0.617, "step": 634 }, { "epoch": 0.11004960897727518, "grad_norm": 4.8242983383676705, "learning_rate": 9.833482168780807e-06, "loss": 0.5949, "step": 635 }, { "epoch": 0.11022291544810554, "grad_norm": 2.661014696397145, "learning_rate": 9.832763023869592e-06, "loss": 0.5525, "step": 636 }, { "epoch": 0.1103962219189359, "grad_norm": 2.62855753746184, "learning_rate": 9.832042355818378e-06, "loss": 0.6418, "step": 637 }, { "epoch": 0.11056952838976625, "grad_norm": 2.6146636028891597, "learning_rate": 9.8313201648543e-06, "loss": 0.5916, "step": 638 }, { "epoch": 0.1107428348605966, "grad_norm": 3.289882136184736, "learning_rate": 9.830596451204974e-06, "loss": 0.6503, "step": 639 }, { "epoch": 0.11091614133142697, "grad_norm": 2.8337186708811655, "learning_rate": 9.82987121509849e-06, "loss": 0.7008, "step": 640 }, { "epoch": 0.11108944780225731, "grad_norm": 4.041492717286803, "learning_rate": 9.829144456763422e-06, "loss": 0.5432, "step": 641 }, { "epoch": 0.11126275427308767, "grad_norm": 2.33303871452255, "learning_rate": 9.828416176428819e-06, "loss": 0.5721, "step": 642 }, { "epoch": 0.11143606074391803, "grad_norm": 2.639007625043011, "learning_rate": 9.827686374324214e-06, "loss": 0.6051, "step": 643 }, { "epoch": 0.11160936721474839, "grad_norm": 2.3121242382894556, "learning_rate": 9.82695505067962e-06, "loss": 0.544, "step": 644 }, { "epoch": 0.11178267368557873, "grad_norm": 2.198920034055818, "learning_rate": 9.826222205725529e-06, "loss": 0.5584, "step": 645 }, { "epoch": 0.11195598015640909, "grad_norm": 3.717753992058603, "learning_rate": 9.825487839692907e-06, "loss": 0.5875, "step": 646 }, { "epoch": 0.11212928662723945, "grad_norm": 2.323392917075492, "learning_rate": 9.824751952813207e-06, "loss": 0.5867, "step": 647 }, { "epoch": 0.1123025930980698, "grad_norm": 2.347153705321993, "learning_rate": 9.824014545318359e-06, "loss": 0.6913, "step": 648 }, { "epoch": 0.11247589956890015, "grad_norm": 2.895384749910761, "learning_rate": 9.823275617440768e-06, "loss": 0.6369, "step": 649 }, { "epoch": 0.11264920603973051, "grad_norm": 2.4273803163773433, "learning_rate": 9.822535169413325e-06, "loss": 0.5378, "step": 650 }, { "epoch": 0.11282251251056086, "grad_norm": 2.8061049805522895, "learning_rate": 9.821793201469394e-06, "loss": 0.5264, "step": 651 }, { "epoch": 0.11299581898139122, "grad_norm": 4.4754757354326715, "learning_rate": 9.821049713842824e-06, "loss": 0.5973, "step": 652 }, { "epoch": 0.11316912545222158, "grad_norm": 2.4403564896651866, "learning_rate": 9.820304706767938e-06, "loss": 0.5656, "step": 653 }, { "epoch": 0.11334243192305192, "grad_norm": 2.621903171877813, "learning_rate": 9.819558180479538e-06, "loss": 0.6757, "step": 654 }, { "epoch": 0.11351573839388228, "grad_norm": 2.623230880623398, "learning_rate": 9.818810135212907e-06, "loss": 0.6105, "step": 655 }, { "epoch": 0.11368904486471264, "grad_norm": 2.6985162091878236, "learning_rate": 9.818060571203807e-06, "loss": 0.5357, "step": 656 }, { "epoch": 0.113862351335543, "grad_norm": 2.845092514874578, "learning_rate": 9.817309488688477e-06, "loss": 0.5005, "step": 657 }, { "epoch": 0.11403565780637334, "grad_norm": 2.351492966461336, "learning_rate": 9.816556887903639e-06, "loss": 0.567, "step": 658 }, { "epoch": 0.1142089642772037, "grad_norm": 2.5658265474774957, "learning_rate": 9.815802769086484e-06, "loss": 0.6567, "step": 659 }, { "epoch": 0.11438227074803406, "grad_norm": 2.5847831824330965, "learning_rate": 9.815047132474693e-06, "loss": 0.5971, "step": 660 }, { "epoch": 0.1145555772188644, "grad_norm": 3.1461676449466727, "learning_rate": 9.814289978306415e-06, "loss": 0.5388, "step": 661 }, { "epoch": 0.11472888368969476, "grad_norm": 2.4714859246636203, "learning_rate": 9.813531306820284e-06, "loss": 0.6397, "step": 662 }, { "epoch": 0.11490219016052512, "grad_norm": 4.892747376885241, "learning_rate": 9.812771118255411e-06, "loss": 0.6805, "step": 663 }, { "epoch": 0.11507549663135547, "grad_norm": 3.3158737270040715, "learning_rate": 9.812009412851382e-06, "loss": 0.6297, "step": 664 }, { "epoch": 0.11524880310218583, "grad_norm": 12.190811670448117, "learning_rate": 9.811246190848267e-06, "loss": 0.6409, "step": 665 }, { "epoch": 0.11542210957301619, "grad_norm": 2.543665251868723, "learning_rate": 9.810481452486606e-06, "loss": 0.6039, "step": 666 }, { "epoch": 0.11559541604384654, "grad_norm": 3.352639498520995, "learning_rate": 9.809715198007424e-06, "loss": 0.6903, "step": 667 }, { "epoch": 0.11576872251467689, "grad_norm": 6.646490051161176, "learning_rate": 9.808947427652223e-06, "loss": 0.6055, "step": 668 }, { "epoch": 0.11594202898550725, "grad_norm": 3.561619569007553, "learning_rate": 9.808178141662976e-06, "loss": 0.7013, "step": 669 }, { "epoch": 0.1161153354563376, "grad_norm": 2.2109991397839384, "learning_rate": 9.80740734028214e-06, "loss": 0.6276, "step": 670 }, { "epoch": 0.11628864192716795, "grad_norm": 3.4089169501187238, "learning_rate": 9.806635023752651e-06, "loss": 0.6542, "step": 671 }, { "epoch": 0.11646194839799831, "grad_norm": 2.4255563379234344, "learning_rate": 9.805861192317918e-06, "loss": 0.6749, "step": 672 }, { "epoch": 0.11663525486882867, "grad_norm": 4.2186276721476625, "learning_rate": 9.805085846221828e-06, "loss": 0.5767, "step": 673 }, { "epoch": 0.11680856133965901, "grad_norm": 9.216856435190557, "learning_rate": 9.804308985708748e-06, "loss": 0.6049, "step": 674 }, { "epoch": 0.11698186781048937, "grad_norm": 3.917555118215342, "learning_rate": 9.80353061102352e-06, "loss": 0.5992, "step": 675 }, { "epoch": 0.11715517428131973, "grad_norm": 2.3029879129568878, "learning_rate": 9.802750722411465e-06, "loss": 0.632, "step": 676 }, { "epoch": 0.11732848075215008, "grad_norm": 2.4776761325815793, "learning_rate": 9.801969320118381e-06, "loss": 0.5493, "step": 677 }, { "epoch": 0.11750178722298044, "grad_norm": 3.306234810534243, "learning_rate": 9.80118640439054e-06, "loss": 0.6233, "step": 678 }, { "epoch": 0.1176750936938108, "grad_norm": 2.780427762930543, "learning_rate": 9.800401975474692e-06, "loss": 0.6335, "step": 679 }, { "epoch": 0.11784840016464115, "grad_norm": 3.01895017414118, "learning_rate": 9.799616033618069e-06, "loss": 0.6814, "step": 680 }, { "epoch": 0.1180217066354715, "grad_norm": 2.715531642255959, "learning_rate": 9.798828579068374e-06, "loss": 0.6636, "step": 681 }, { "epoch": 0.11819501310630186, "grad_norm": 2.345964622003219, "learning_rate": 9.798039612073789e-06, "loss": 0.5273, "step": 682 }, { "epoch": 0.11836831957713222, "grad_norm": 2.630226971358879, "learning_rate": 9.797249132882972e-06, "loss": 0.5492, "step": 683 }, { "epoch": 0.11854162604796256, "grad_norm": 2.408446663780958, "learning_rate": 9.796457141745057e-06, "loss": 0.593, "step": 684 }, { "epoch": 0.11871493251879292, "grad_norm": 3.5263773549162067, "learning_rate": 9.79566363890966e-06, "loss": 0.5773, "step": 685 }, { "epoch": 0.11888823898962328, "grad_norm": 7.902837946593165, "learning_rate": 9.794868624626863e-06, "loss": 0.5816, "step": 686 }, { "epoch": 0.11906154546045362, "grad_norm": 2.3660822660365257, "learning_rate": 9.794072099147236e-06, "loss": 0.5667, "step": 687 }, { "epoch": 0.11923485193128398, "grad_norm": 2.6104750171624316, "learning_rate": 9.793274062721814e-06, "loss": 0.6447, "step": 688 }, { "epoch": 0.11940815840211434, "grad_norm": 2.5542238274708517, "learning_rate": 9.792474515602118e-06, "loss": 0.6852, "step": 689 }, { "epoch": 0.11958146487294469, "grad_norm": 3.5084212394488885, "learning_rate": 9.79167345804014e-06, "loss": 0.6834, "step": 690 }, { "epoch": 0.11975477134377505, "grad_norm": 2.5912842845869495, "learning_rate": 9.79087089028835e-06, "loss": 0.5292, "step": 691 }, { "epoch": 0.1199280778146054, "grad_norm": 2.71579759394766, "learning_rate": 9.790066812599687e-06, "loss": 0.537, "step": 692 }, { "epoch": 0.12010138428543576, "grad_norm": 2.243925214677202, "learning_rate": 9.78926122522758e-06, "loss": 0.5985, "step": 693 }, { "epoch": 0.12027469075626611, "grad_norm": 2.3407215008821805, "learning_rate": 9.788454128425921e-06, "loss": 0.6158, "step": 694 }, { "epoch": 0.12044799722709647, "grad_norm": 4.045123448900264, "learning_rate": 9.787645522449084e-06, "loss": 0.619, "step": 695 }, { "epoch": 0.12062130369792683, "grad_norm": 4.029905013214202, "learning_rate": 9.786835407551915e-06, "loss": 0.6157, "step": 696 }, { "epoch": 0.12079461016875717, "grad_norm": 2.6171102237194397, "learning_rate": 9.78602378398974e-06, "loss": 0.5498, "step": 697 }, { "epoch": 0.12096791663958753, "grad_norm": 2.607733804868798, "learning_rate": 9.785210652018358e-06, "loss": 0.5821, "step": 698 }, { "epoch": 0.12114122311041789, "grad_norm": 7.500055700940274, "learning_rate": 9.784396011894041e-06, "loss": 0.5972, "step": 699 }, { "epoch": 0.12131452958124823, "grad_norm": 3.6940685777887814, "learning_rate": 9.783579863873543e-06, "loss": 0.6246, "step": 700 }, { "epoch": 0.1214878360520786, "grad_norm": 2.710750931389678, "learning_rate": 9.782762208214083e-06, "loss": 0.6284, "step": 701 }, { "epoch": 0.12166114252290895, "grad_norm": 3.2125384775551016, "learning_rate": 9.781943045173368e-06, "loss": 0.5612, "step": 702 }, { "epoch": 0.1218344489937393, "grad_norm": 2.8491416562052896, "learning_rate": 9.781122375009567e-06, "loss": 0.6824, "step": 703 }, { "epoch": 0.12200775546456966, "grad_norm": 3.0609478392195304, "learning_rate": 9.780300197981334e-06, "loss": 0.6034, "step": 704 }, { "epoch": 0.12218106193540001, "grad_norm": 2.892644042031247, "learning_rate": 9.779476514347794e-06, "loss": 0.6425, "step": 705 }, { "epoch": 0.12235436840623037, "grad_norm": 4.105434190253986, "learning_rate": 9.778651324368546e-06, "loss": 0.6264, "step": 706 }, { "epoch": 0.12252767487706072, "grad_norm": 4.621774896626414, "learning_rate": 9.777824628303665e-06, "loss": 0.6511, "step": 707 }, { "epoch": 0.12270098134789108, "grad_norm": 3.020857886818992, "learning_rate": 9.776996426413703e-06, "loss": 0.6224, "step": 708 }, { "epoch": 0.12287428781872144, "grad_norm": 2.418426342221716, "learning_rate": 9.776166718959678e-06, "loss": 0.5765, "step": 709 }, { "epoch": 0.12304759428955178, "grad_norm": 3.132329368552942, "learning_rate": 9.775335506203097e-06, "loss": 0.5444, "step": 710 }, { "epoch": 0.12322090076038214, "grad_norm": 2.4969311227880797, "learning_rate": 9.774502788405924e-06, "loss": 0.6137, "step": 711 }, { "epoch": 0.1233942072312125, "grad_norm": 3.2074289343206592, "learning_rate": 9.773668565830613e-06, "loss": 0.5733, "step": 712 }, { "epoch": 0.12356751370204284, "grad_norm": 4.117018596830417, "learning_rate": 9.772832838740083e-06, "loss": 0.5132, "step": 713 }, { "epoch": 0.1237408201728732, "grad_norm": 11.233717406170943, "learning_rate": 9.77199560739773e-06, "loss": 0.5371, "step": 714 }, { "epoch": 0.12391412664370356, "grad_norm": 2.95637476846154, "learning_rate": 9.771156872067423e-06, "loss": 0.6362, "step": 715 }, { "epoch": 0.12408743311453391, "grad_norm": 4.263578065894264, "learning_rate": 9.770316633013508e-06, "loss": 0.5609, "step": 716 }, { "epoch": 0.12426073958536427, "grad_norm": 5.199888478020275, "learning_rate": 9.7694748905008e-06, "loss": 0.775, "step": 717 }, { "epoch": 0.12443404605619462, "grad_norm": 2.121049671734262, "learning_rate": 9.768631644794595e-06, "loss": 0.643, "step": 718 }, { "epoch": 0.12460735252702498, "grad_norm": 2.245965572176155, "learning_rate": 9.767786896160653e-06, "loss": 0.5724, "step": 719 }, { "epoch": 0.12478065899785533, "grad_norm": 2.1378259294503685, "learning_rate": 9.766940644865216e-06, "loss": 0.5744, "step": 720 }, { "epoch": 0.12495396546868569, "grad_norm": 2.1324218228212874, "learning_rate": 9.766092891174998e-06, "loss": 0.5564, "step": 721 }, { "epoch": 0.12512727193951603, "grad_norm": 2.4363120697121734, "learning_rate": 9.765243635357185e-06, "loss": 0.586, "step": 722 }, { "epoch": 0.1253005784103464, "grad_norm": 3.3994772421820243, "learning_rate": 9.764392877679432e-06, "loss": 0.6142, "step": 723 }, { "epoch": 0.12547388488117675, "grad_norm": 2.3386171045388355, "learning_rate": 9.763540618409876e-06, "loss": 0.6094, "step": 724 }, { "epoch": 0.1256471913520071, "grad_norm": 2.086992015003118, "learning_rate": 9.762686857817122e-06, "loss": 0.5554, "step": 725 }, { "epoch": 0.12582049782283747, "grad_norm": 2.215763930794365, "learning_rate": 9.761831596170252e-06, "loss": 0.583, "step": 726 }, { "epoch": 0.12599380429366783, "grad_norm": 1.9476810165320988, "learning_rate": 9.760974833738815e-06, "loss": 0.4851, "step": 727 }, { "epoch": 0.12616711076449816, "grad_norm": 2.6030377703009777, "learning_rate": 9.760116570792839e-06, "loss": 0.5898, "step": 728 }, { "epoch": 0.12634041723532852, "grad_norm": 2.3323302666560624, "learning_rate": 9.75925680760282e-06, "loss": 0.6665, "step": 729 }, { "epoch": 0.12651372370615888, "grad_norm": 3.5216835967380904, "learning_rate": 9.75839554443973e-06, "loss": 0.6893, "step": 730 }, { "epoch": 0.12668703017698923, "grad_norm": 2.297514787701213, "learning_rate": 9.757532781575014e-06, "loss": 0.5255, "step": 731 }, { "epoch": 0.1268603366478196, "grad_norm": 2.9963105549596882, "learning_rate": 9.756668519280587e-06, "loss": 0.5891, "step": 732 }, { "epoch": 0.12703364311864995, "grad_norm": 2.4019498791151577, "learning_rate": 9.75580275782884e-06, "loss": 0.5198, "step": 733 }, { "epoch": 0.12720694958948028, "grad_norm": 2.6374247654689706, "learning_rate": 9.754935497492634e-06, "loss": 0.5478, "step": 734 }, { "epoch": 0.12738025606031064, "grad_norm": 2.295196628887477, "learning_rate": 9.754066738545302e-06, "loss": 0.6285, "step": 735 }, { "epoch": 0.127553562531141, "grad_norm": 2.5461208680323106, "learning_rate": 9.753196481260654e-06, "loss": 0.5064, "step": 736 }, { "epoch": 0.12772686900197136, "grad_norm": 2.6322364963471276, "learning_rate": 9.752324725912962e-06, "loss": 0.5607, "step": 737 }, { "epoch": 0.12790017547280172, "grad_norm": 12.024302413157333, "learning_rate": 9.751451472776983e-06, "loss": 0.6057, "step": 738 }, { "epoch": 0.12807348194363208, "grad_norm": 2.1994587481947687, "learning_rate": 9.750576722127938e-06, "loss": 0.6014, "step": 739 }, { "epoch": 0.12824678841446244, "grad_norm": 3.8447607828369437, "learning_rate": 9.749700474241522e-06, "loss": 0.5334, "step": 740 }, { "epoch": 0.12842009488529277, "grad_norm": 2.8850936409524564, "learning_rate": 9.7488227293939e-06, "loss": 0.6503, "step": 741 }, { "epoch": 0.12859340135612313, "grad_norm": 3.1590645679545033, "learning_rate": 9.74794348786171e-06, "loss": 0.578, "step": 742 }, { "epoch": 0.12876670782695349, "grad_norm": 2.2510304078585883, "learning_rate": 9.747062749922068e-06, "loss": 0.6143, "step": 743 }, { "epoch": 0.12894001429778384, "grad_norm": 2.6469927184024753, "learning_rate": 9.746180515852549e-06, "loss": 0.5645, "step": 744 }, { "epoch": 0.1291133207686142, "grad_norm": 2.6172545649058327, "learning_rate": 9.745296785931208e-06, "loss": 0.6435, "step": 745 }, { "epoch": 0.12928662723944456, "grad_norm": 2.6416693929365396, "learning_rate": 9.744411560436572e-06, "loss": 0.6196, "step": 746 }, { "epoch": 0.1294599337102749, "grad_norm": 2.479875294543255, "learning_rate": 9.743524839647638e-06, "loss": 0.5444, "step": 747 }, { "epoch": 0.12963324018110525, "grad_norm": 3.364314862025597, "learning_rate": 9.74263662384387e-06, "loss": 0.6191, "step": 748 }, { "epoch": 0.1298065466519356, "grad_norm": 4.509092702158952, "learning_rate": 9.741746913305208e-06, "loss": 0.5301, "step": 749 }, { "epoch": 0.12997985312276597, "grad_norm": 2.768238687525804, "learning_rate": 9.74085570831206e-06, "loss": 0.6256, "step": 750 }, { "epoch": 0.13015315959359633, "grad_norm": 11.856064194074065, "learning_rate": 9.739963009145312e-06, "loss": 0.6072, "step": 751 }, { "epoch": 0.1303264660644267, "grad_norm": 2.587359983842557, "learning_rate": 9.73906881608631e-06, "loss": 0.613, "step": 752 }, { "epoch": 0.13049977253525705, "grad_norm": 2.3433519717662623, "learning_rate": 9.738173129416879e-06, "loss": 0.6325, "step": 753 }, { "epoch": 0.13067307900608738, "grad_norm": 2.5718251707145865, "learning_rate": 9.73727594941931e-06, "loss": 0.5064, "step": 754 }, { "epoch": 0.13084638547691774, "grad_norm": 4.174028828011085, "learning_rate": 9.736377276376372e-06, "loss": 0.6146, "step": 755 }, { "epoch": 0.1310196919477481, "grad_norm": 3.080969483241802, "learning_rate": 9.735477110571294e-06, "loss": 0.6499, "step": 756 }, { "epoch": 0.13119299841857845, "grad_norm": 3.3476773161760964, "learning_rate": 9.734575452287784e-06, "loss": 0.6184, "step": 757 }, { "epoch": 0.1313663048894088, "grad_norm": 3.980071405527514, "learning_rate": 9.733672301810016e-06, "loss": 0.5594, "step": 758 }, { "epoch": 0.13153961136023917, "grad_norm": 2.368277913639649, "learning_rate": 9.732767659422635e-06, "loss": 0.5716, "step": 759 }, { "epoch": 0.1317129178310695, "grad_norm": 3.1098642643514656, "learning_rate": 9.731861525410758e-06, "loss": 0.6694, "step": 760 }, { "epoch": 0.13188622430189986, "grad_norm": 3.2709638012559834, "learning_rate": 9.730953900059971e-06, "loss": 0.5829, "step": 761 }, { "epoch": 0.13205953077273022, "grad_norm": 3.122080256445684, "learning_rate": 9.73004478365633e-06, "loss": 0.6662, "step": 762 }, { "epoch": 0.13223283724356058, "grad_norm": 2.4220178958956873, "learning_rate": 9.72913417648636e-06, "loss": 0.6266, "step": 763 }, { "epoch": 0.13240614371439094, "grad_norm": 2.4421646371200216, "learning_rate": 9.728222078837056e-06, "loss": 0.6589, "step": 764 }, { "epoch": 0.1325794501852213, "grad_norm": 2.442097023901634, "learning_rate": 9.727308490995886e-06, "loss": 0.618, "step": 765 }, { "epoch": 0.13275275665605166, "grad_norm": 3.127184857781871, "learning_rate": 9.726393413250781e-06, "loss": 0.5952, "step": 766 }, { "epoch": 0.132926063126882, "grad_norm": 2.7487123629438583, "learning_rate": 9.725476845890152e-06, "loss": 0.6008, "step": 767 }, { "epoch": 0.13309936959771235, "grad_norm": 2.2699537697148426, "learning_rate": 9.724558789202867e-06, "loss": 0.5991, "step": 768 }, { "epoch": 0.1332726760685427, "grad_norm": 3.1459941178045003, "learning_rate": 9.723639243478272e-06, "loss": 0.6631, "step": 769 }, { "epoch": 0.13344598253937306, "grad_norm": 2.5346499007998147, "learning_rate": 9.722718209006181e-06, "loss": 0.5776, "step": 770 }, { "epoch": 0.13361928901020342, "grad_norm": 2.323026590087224, "learning_rate": 9.721795686076874e-06, "loss": 0.6197, "step": 771 }, { "epoch": 0.13379259548103378, "grad_norm": 2.404877593383889, "learning_rate": 9.720871674981102e-06, "loss": 0.5787, "step": 772 }, { "epoch": 0.13396590195186414, "grad_norm": 2.3795204891016803, "learning_rate": 9.71994617601009e-06, "loss": 0.5568, "step": 773 }, { "epoch": 0.13413920842269447, "grad_norm": 2.4698409353383273, "learning_rate": 9.71901918945552e-06, "loss": 0.6288, "step": 774 }, { "epoch": 0.13431251489352483, "grad_norm": 2.391482327972818, "learning_rate": 9.718090715609555e-06, "loss": 0.7029, "step": 775 }, { "epoch": 0.1344858213643552, "grad_norm": 2.2722490545611187, "learning_rate": 9.717160754764821e-06, "loss": 0.6525, "step": 776 }, { "epoch": 0.13465912783518555, "grad_norm": 2.2934601517070785, "learning_rate": 9.716229307214411e-06, "loss": 0.6378, "step": 777 }, { "epoch": 0.1348324343060159, "grad_norm": 2.599552108848155, "learning_rate": 9.715296373251891e-06, "loss": 0.5818, "step": 778 }, { "epoch": 0.13500574077684627, "grad_norm": 3.3907266252867054, "learning_rate": 9.714361953171293e-06, "loss": 0.5186, "step": 779 }, { "epoch": 0.1351790472476766, "grad_norm": 2.510445827911172, "learning_rate": 9.713426047267117e-06, "loss": 0.6383, "step": 780 }, { "epoch": 0.13535235371850696, "grad_norm": 3.2291982586306167, "learning_rate": 9.712488655834334e-06, "loss": 0.527, "step": 781 }, { "epoch": 0.13552566018933732, "grad_norm": 2.5195839366694766, "learning_rate": 9.711549779168379e-06, "loss": 0.688, "step": 782 }, { "epoch": 0.13569896666016767, "grad_norm": 1.9924860223641363, "learning_rate": 9.71060941756516e-06, "loss": 0.4935, "step": 783 }, { "epoch": 0.13587227313099803, "grad_norm": 2.8789631372941527, "learning_rate": 9.709667571321049e-06, "loss": 0.5495, "step": 784 }, { "epoch": 0.1360455796018284, "grad_norm": 2.809333460584284, "learning_rate": 9.708724240732886e-06, "loss": 0.6374, "step": 785 }, { "epoch": 0.13621888607265875, "grad_norm": 3.4132640261953773, "learning_rate": 9.70777942609798e-06, "loss": 0.5824, "step": 786 }, { "epoch": 0.13639219254348908, "grad_norm": 2.770894054291713, "learning_rate": 9.706833127714112e-06, "loss": 0.647, "step": 787 }, { "epoch": 0.13656549901431944, "grad_norm": 2.017020177499869, "learning_rate": 9.705885345879523e-06, "loss": 0.5105, "step": 788 }, { "epoch": 0.1367388054851498, "grad_norm": 25.201476073671905, "learning_rate": 9.704936080892923e-06, "loss": 0.6457, "step": 789 }, { "epoch": 0.13691211195598016, "grad_norm": 2.7746375122098335, "learning_rate": 9.703985333053498e-06, "loss": 0.6857, "step": 790 }, { "epoch": 0.13708541842681052, "grad_norm": 2.9088558369714494, "learning_rate": 9.70303310266089e-06, "loss": 0.6648, "step": 791 }, { "epoch": 0.13725872489764088, "grad_norm": 2.2529552313617165, "learning_rate": 9.702079390015213e-06, "loss": 0.5757, "step": 792 }, { "epoch": 0.1374320313684712, "grad_norm": 2.426079809546905, "learning_rate": 9.701124195417048e-06, "loss": 0.5719, "step": 793 }, { "epoch": 0.13760533783930157, "grad_norm": 2.432335865571952, "learning_rate": 9.700167519167448e-06, "loss": 0.6108, "step": 794 }, { "epoch": 0.13777864431013193, "grad_norm": 3.3598812305658114, "learning_rate": 9.699209361567923e-06, "loss": 0.5313, "step": 795 }, { "epoch": 0.13795195078096228, "grad_norm": 2.2923902576038553, "learning_rate": 9.698249722920456e-06, "loss": 0.6145, "step": 796 }, { "epoch": 0.13812525725179264, "grad_norm": 30.15882381096397, "learning_rate": 9.697288603527499e-06, "loss": 0.6133, "step": 797 }, { "epoch": 0.138298563722623, "grad_norm": 2.2116703588135023, "learning_rate": 9.696326003691964e-06, "loss": 0.6157, "step": 798 }, { "epoch": 0.13847187019345336, "grad_norm": 2.323713626920763, "learning_rate": 9.695361923717236e-06, "loss": 0.6161, "step": 799 }, { "epoch": 0.1386451766642837, "grad_norm": 2.3423271317033123, "learning_rate": 9.69439636390716e-06, "loss": 0.527, "step": 800 }, { "epoch": 0.13881848313511405, "grad_norm": 4.27847587779515, "learning_rate": 9.693429324566056e-06, "loss": 0.5309, "step": 801 }, { "epoch": 0.1389917896059444, "grad_norm": 3.0713347716708777, "learning_rate": 9.692460805998703e-06, "loss": 0.584, "step": 802 }, { "epoch": 0.13916509607677477, "grad_norm": 3.08610536133174, "learning_rate": 9.69149080851035e-06, "loss": 0.652, "step": 803 }, { "epoch": 0.13933840254760513, "grad_norm": 3.4743937960062956, "learning_rate": 9.690519332406706e-06, "loss": 0.629, "step": 804 }, { "epoch": 0.1395117090184355, "grad_norm": 2.6363928138348016, "learning_rate": 9.689546377993955e-06, "loss": 0.559, "step": 805 }, { "epoch": 0.13968501548926582, "grad_norm": 2.5037553206846623, "learning_rate": 9.688571945578742e-06, "loss": 0.5998, "step": 806 }, { "epoch": 0.13985832196009618, "grad_norm": 2.4872015293511303, "learning_rate": 9.68759603546818e-06, "loss": 0.6468, "step": 807 }, { "epoch": 0.14003162843092654, "grad_norm": 2.3857707005445965, "learning_rate": 9.686618647969841e-06, "loss": 0.6255, "step": 808 }, { "epoch": 0.1402049349017569, "grad_norm": 2.9743335003614635, "learning_rate": 9.685639783391773e-06, "loss": 0.6276, "step": 809 }, { "epoch": 0.14037824137258725, "grad_norm": 3.5306835390352322, "learning_rate": 9.684659442042482e-06, "loss": 0.726, "step": 810 }, { "epoch": 0.1405515478434176, "grad_norm": 3.0073037641797726, "learning_rate": 9.683677624230943e-06, "loss": 0.633, "step": 811 }, { "epoch": 0.14072485431424797, "grad_norm": 2.3488107262064477, "learning_rate": 9.682694330266593e-06, "loss": 0.5621, "step": 812 }, { "epoch": 0.1408981607850783, "grad_norm": 2.7694173730459433, "learning_rate": 9.68170956045934e-06, "loss": 0.5674, "step": 813 }, { "epoch": 0.14107146725590866, "grad_norm": 2.1953373995857155, "learning_rate": 9.68072331511955e-06, "loss": 0.5918, "step": 814 }, { "epoch": 0.14124477372673902, "grad_norm": 2.1820239332515725, "learning_rate": 9.67973559455806e-06, "loss": 0.6287, "step": 815 }, { "epoch": 0.14141808019756938, "grad_norm": 2.3460160632133857, "learning_rate": 9.678746399086167e-06, "loss": 0.5702, "step": 816 }, { "epoch": 0.14159138666839974, "grad_norm": 4.738223963209168, "learning_rate": 9.677755729015636e-06, "loss": 0.5831, "step": 817 }, { "epoch": 0.1417646931392301, "grad_norm": 4.203542259190618, "learning_rate": 9.676763584658699e-06, "loss": 0.6361, "step": 818 }, { "epoch": 0.14193799961006043, "grad_norm": 2.4437233862313095, "learning_rate": 9.675769966328048e-06, "loss": 0.5003, "step": 819 }, { "epoch": 0.1421113060808908, "grad_norm": 3.499562034130815, "learning_rate": 9.674774874336839e-06, "loss": 0.5738, "step": 820 }, { "epoch": 0.14228461255172115, "grad_norm": 2.335519338543752, "learning_rate": 9.673778308998697e-06, "loss": 0.5551, "step": 821 }, { "epoch": 0.1424579190225515, "grad_norm": 2.8819148527033094, "learning_rate": 9.672780270627708e-06, "loss": 0.5781, "step": 822 }, { "epoch": 0.14263122549338186, "grad_norm": 2.344313814363177, "learning_rate": 9.671780759538426e-06, "loss": 0.5746, "step": 823 }, { "epoch": 0.14280453196421222, "grad_norm": 3.0198560576747897, "learning_rate": 9.670779776045864e-06, "loss": 0.667, "step": 824 }, { "epoch": 0.14297783843504258, "grad_norm": 3.19265619485411, "learning_rate": 9.6697773204655e-06, "loss": 0.6621, "step": 825 }, { "epoch": 0.1431511449058729, "grad_norm": 2.7090394385361973, "learning_rate": 9.668773393113282e-06, "loss": 0.5474, "step": 826 }, { "epoch": 0.14332445137670327, "grad_norm": 2.6439816463052854, "learning_rate": 9.667767994305614e-06, "loss": 0.6459, "step": 827 }, { "epoch": 0.14349775784753363, "grad_norm": 2.486190923860754, "learning_rate": 9.66676112435937e-06, "loss": 0.5516, "step": 828 }, { "epoch": 0.143671064318364, "grad_norm": 2.2849481050939207, "learning_rate": 9.665752783591882e-06, "loss": 0.592, "step": 829 }, { "epoch": 0.14384437078919435, "grad_norm": 2.426539034404487, "learning_rate": 9.664742972320949e-06, "loss": 0.5845, "step": 830 }, { "epoch": 0.1440176772600247, "grad_norm": 2.5973696553587677, "learning_rate": 9.663731690864832e-06, "loss": 0.6761, "step": 831 }, { "epoch": 0.14419098373085504, "grad_norm": 5.335407331167947, "learning_rate": 9.66271893954226e-06, "loss": 0.5485, "step": 832 }, { "epoch": 0.1443642902016854, "grad_norm": 2.352559139678941, "learning_rate": 9.661704718672416e-06, "loss": 0.5581, "step": 833 }, { "epoch": 0.14453759667251576, "grad_norm": 2.5366153096803017, "learning_rate": 9.660689028574955e-06, "loss": 0.5547, "step": 834 }, { "epoch": 0.14471090314334611, "grad_norm": 2.1859914757649803, "learning_rate": 9.659671869569993e-06, "loss": 0.7065, "step": 835 }, { "epoch": 0.14488420961417647, "grad_norm": 4.1250330131040265, "learning_rate": 9.658653241978103e-06, "loss": 0.5918, "step": 836 }, { "epoch": 0.14505751608500683, "grad_norm": 2.353672604483909, "learning_rate": 9.65763314612033e-06, "loss": 0.5882, "step": 837 }, { "epoch": 0.1452308225558372, "grad_norm": 2.7729582266393042, "learning_rate": 9.656611582318173e-06, "loss": 0.568, "step": 838 }, { "epoch": 0.14540412902666752, "grad_norm": 3.4123745936108416, "learning_rate": 9.655588550893602e-06, "loss": 0.6055, "step": 839 }, { "epoch": 0.14557743549749788, "grad_norm": 2.4692712843290248, "learning_rate": 9.654564052169044e-06, "loss": 0.5975, "step": 840 }, { "epoch": 0.14575074196832824, "grad_norm": 7.226985482748584, "learning_rate": 9.653538086467389e-06, "loss": 0.6065, "step": 841 }, { "epoch": 0.1459240484391586, "grad_norm": 2.363583120746715, "learning_rate": 9.65251065411199e-06, "loss": 0.5967, "step": 842 }, { "epoch": 0.14609735490998896, "grad_norm": 3.2519749898543924, "learning_rate": 9.651481755426663e-06, "loss": 0.6259, "step": 843 }, { "epoch": 0.14627066138081932, "grad_norm": 2.8152100355560132, "learning_rate": 9.650451390735685e-06, "loss": 0.6118, "step": 844 }, { "epoch": 0.14644396785164965, "grad_norm": 2.170911854509619, "learning_rate": 9.649419560363797e-06, "loss": 0.5745, "step": 845 }, { "epoch": 0.14661727432248, "grad_norm": 2.2935655683004517, "learning_rate": 9.6483862646362e-06, "loss": 0.5615, "step": 846 }, { "epoch": 0.14679058079331037, "grad_norm": 2.7086928324894313, "learning_rate": 9.647351503878556e-06, "loss": 0.6012, "step": 847 }, { "epoch": 0.14696388726414072, "grad_norm": 2.1712646092945227, "learning_rate": 9.646315278416993e-06, "loss": 0.6127, "step": 848 }, { "epoch": 0.14713719373497108, "grad_norm": 3.6693753800820352, "learning_rate": 9.645277588578095e-06, "loss": 0.5623, "step": 849 }, { "epoch": 0.14731050020580144, "grad_norm": 2.6059583729102593, "learning_rate": 9.644238434688913e-06, "loss": 0.5571, "step": 850 }, { "epoch": 0.1474838066766318, "grad_norm": 2.168208065195929, "learning_rate": 9.643197817076954e-06, "loss": 0.5939, "step": 851 }, { "epoch": 0.14765711314746213, "grad_norm": 3.2688742644817474, "learning_rate": 9.64215573607019e-06, "loss": 0.6456, "step": 852 }, { "epoch": 0.1478304196182925, "grad_norm": 2.533482625037172, "learning_rate": 9.641112191997052e-06, "loss": 0.6489, "step": 853 }, { "epoch": 0.14800372608912285, "grad_norm": 2.091702236081764, "learning_rate": 9.640067185186436e-06, "loss": 0.6156, "step": 854 }, { "epoch": 0.1481770325599532, "grad_norm": 2.427311221347176, "learning_rate": 9.639020715967697e-06, "loss": 0.6051, "step": 855 }, { "epoch": 0.14835033903078357, "grad_norm": 2.8715719737090892, "learning_rate": 9.637972784670648e-06, "loss": 0.5907, "step": 856 }, { "epoch": 0.14852364550161393, "grad_norm": 6.310266433274274, "learning_rate": 9.636923391625563e-06, "loss": 0.5671, "step": 857 }, { "epoch": 0.14869695197244426, "grad_norm": 3.288236232147462, "learning_rate": 9.635872537163183e-06, "loss": 0.5877, "step": 858 }, { "epoch": 0.14887025844327462, "grad_norm": 3.059296203077548, "learning_rate": 9.634820221614703e-06, "loss": 0.5869, "step": 859 }, { "epoch": 0.14904356491410498, "grad_norm": 2.3063048422450874, "learning_rate": 9.633766445311783e-06, "loss": 0.61, "step": 860 }, { "epoch": 0.14921687138493533, "grad_norm": 2.296899309485392, "learning_rate": 9.632711208586537e-06, "loss": 0.5905, "step": 861 }, { "epoch": 0.1493901778557657, "grad_norm": 2.2100426017925554, "learning_rate": 9.631654511771549e-06, "loss": 0.5839, "step": 862 }, { "epoch": 0.14956348432659605, "grad_norm": 2.2995940132285018, "learning_rate": 9.630596355199854e-06, "loss": 0.5372, "step": 863 }, { "epoch": 0.1497367907974264, "grad_norm": 2.7073018481874205, "learning_rate": 9.629536739204955e-06, "loss": 0.585, "step": 864 }, { "epoch": 0.14991009726825674, "grad_norm": 2.7414220610170412, "learning_rate": 9.628475664120803e-06, "loss": 0.5363, "step": 865 }, { "epoch": 0.1500834037390871, "grad_norm": 3.907824048842364, "learning_rate": 9.627413130281823e-06, "loss": 0.5754, "step": 866 }, { "epoch": 0.15025671020991746, "grad_norm": 2.714642411340786, "learning_rate": 9.626349138022893e-06, "loss": 0.5473, "step": 867 }, { "epoch": 0.15043001668074782, "grad_norm": 3.2432635025626473, "learning_rate": 9.625283687679347e-06, "loss": 0.648, "step": 868 }, { "epoch": 0.15060332315157818, "grad_norm": 5.28374474340835, "learning_rate": 9.624216779586987e-06, "loss": 0.6428, "step": 869 }, { "epoch": 0.15077662962240854, "grad_norm": 2.130048390533609, "learning_rate": 9.623148414082067e-06, "loss": 0.5521, "step": 870 }, { "epoch": 0.15094993609323887, "grad_norm": 3.2576418545018466, "learning_rate": 9.622078591501305e-06, "loss": 0.6171, "step": 871 }, { "epoch": 0.15112324256406923, "grad_norm": 2.845718508142136, "learning_rate": 9.621007312181878e-06, "loss": 0.5415, "step": 872 }, { "epoch": 0.15129654903489959, "grad_norm": 5.554969511513824, "learning_rate": 9.619934576461415e-06, "loss": 0.5826, "step": 873 }, { "epoch": 0.15146985550572994, "grad_norm": 2.467864065850732, "learning_rate": 9.618860384678017e-06, "loss": 0.593, "step": 874 }, { "epoch": 0.1516431619765603, "grad_norm": 2.6155606837618426, "learning_rate": 9.61778473717023e-06, "loss": 0.6541, "step": 875 }, { "epoch": 0.15181646844739066, "grad_norm": 2.0606281556682755, "learning_rate": 9.61670763427707e-06, "loss": 0.5974, "step": 876 }, { "epoch": 0.15198977491822102, "grad_norm": 2.270307894688239, "learning_rate": 9.615629076338005e-06, "loss": 0.6429, "step": 877 }, { "epoch": 0.15216308138905135, "grad_norm": 2.508322072486019, "learning_rate": 9.614549063692966e-06, "loss": 0.564, "step": 878 }, { "epoch": 0.1523363878598817, "grad_norm": 5.153951346129401, "learning_rate": 9.613467596682336e-06, "loss": 0.5542, "step": 879 }, { "epoch": 0.15250969433071207, "grad_norm": 2.583319093218079, "learning_rate": 9.612384675646965e-06, "loss": 0.6211, "step": 880 }, { "epoch": 0.15268300080154243, "grad_norm": 2.759772002449627, "learning_rate": 9.611300300928152e-06, "loss": 0.6276, "step": 881 }, { "epoch": 0.1528563072723728, "grad_norm": 3.1574270008424077, "learning_rate": 9.610214472867662e-06, "loss": 0.6481, "step": 882 }, { "epoch": 0.15302961374320315, "grad_norm": 3.0730001322724414, "learning_rate": 9.609127191807717e-06, "loss": 0.6466, "step": 883 }, { "epoch": 0.15320292021403348, "grad_norm": 2.127311968154773, "learning_rate": 9.60803845809099e-06, "loss": 0.6545, "step": 884 }, { "epoch": 0.15337622668486384, "grad_norm": 2.315854081344996, "learning_rate": 9.60694827206062e-06, "loss": 0.5556, "step": 885 }, { "epoch": 0.1535495331556942, "grad_norm": 5.049939518729781, "learning_rate": 9.605856634060203e-06, "loss": 0.5633, "step": 886 }, { "epoch": 0.15372283962652455, "grad_norm": 2.5896411697614505, "learning_rate": 9.604763544433783e-06, "loss": 0.609, "step": 887 }, { "epoch": 0.1538961460973549, "grad_norm": 2.68902245204678, "learning_rate": 9.603669003525872e-06, "loss": 0.6179, "step": 888 }, { "epoch": 0.15406945256818527, "grad_norm": 2.251482513539711, "learning_rate": 9.60257301168144e-06, "loss": 0.5707, "step": 889 }, { "epoch": 0.15424275903901563, "grad_norm": 3.1189051683727795, "learning_rate": 9.601475569245905e-06, "loss": 0.5759, "step": 890 }, { "epoch": 0.15441606550984596, "grad_norm": 2.107853191781067, "learning_rate": 9.600376676565149e-06, "loss": 0.6428, "step": 891 }, { "epoch": 0.15458937198067632, "grad_norm": 3.513459965352997, "learning_rate": 9.59927633398551e-06, "loss": 0.5588, "step": 892 }, { "epoch": 0.15476267845150668, "grad_norm": 2.785603331606666, "learning_rate": 9.598174541853784e-06, "loss": 0.6558, "step": 893 }, { "epoch": 0.15493598492233704, "grad_norm": 2.4776574440540022, "learning_rate": 9.59707130051722e-06, "loss": 0.562, "step": 894 }, { "epoch": 0.1551092913931674, "grad_norm": 2.490062555936623, "learning_rate": 9.595966610323527e-06, "loss": 0.6412, "step": 895 }, { "epoch": 0.15528259786399776, "grad_norm": 2.5407180601018435, "learning_rate": 9.594860471620868e-06, "loss": 0.5792, "step": 896 }, { "epoch": 0.1554559043348281, "grad_norm": 2.77629588636968, "learning_rate": 9.593752884757867e-06, "loss": 0.5876, "step": 897 }, { "epoch": 0.15562921080565845, "grad_norm": 3.0322738953015502, "learning_rate": 9.592643850083602e-06, "loss": 0.5657, "step": 898 }, { "epoch": 0.1558025172764888, "grad_norm": 3.1960371814607056, "learning_rate": 9.591533367947605e-06, "loss": 0.555, "step": 899 }, { "epoch": 0.15597582374731916, "grad_norm": 2.4284811361502037, "learning_rate": 9.59042143869987e-06, "loss": 0.6196, "step": 900 }, { "epoch": 0.15614913021814952, "grad_norm": 2.3619172823420027, "learning_rate": 9.589308062690835e-06, "loss": 0.594, "step": 901 }, { "epoch": 0.15632243668897988, "grad_norm": 3.7593712420597156, "learning_rate": 9.58819324027141e-06, "loss": 0.6257, "step": 902 }, { "epoch": 0.15649574315981024, "grad_norm": 2.592565907848182, "learning_rate": 9.58707697179295e-06, "loss": 0.6619, "step": 903 }, { "epoch": 0.15666904963064057, "grad_norm": 2.2128089537963724, "learning_rate": 9.585959257607272e-06, "loss": 0.5159, "step": 904 }, { "epoch": 0.15684235610147093, "grad_norm": 3.5744315765822736, "learning_rate": 9.584840098066642e-06, "loss": 0.6577, "step": 905 }, { "epoch": 0.1570156625723013, "grad_norm": 3.125261170759526, "learning_rate": 9.583719493523784e-06, "loss": 0.615, "step": 906 }, { "epoch": 0.15718896904313165, "grad_norm": 3.0219387056702485, "learning_rate": 9.582597444331882e-06, "loss": 0.6292, "step": 907 }, { "epoch": 0.157362275513962, "grad_norm": 2.5093613331251086, "learning_rate": 9.581473950844571e-06, "loss": 0.5663, "step": 908 }, { "epoch": 0.15753558198479237, "grad_norm": 2.791113838984866, "learning_rate": 9.58034901341594e-06, "loss": 0.5362, "step": 909 }, { "epoch": 0.15770888845562273, "grad_norm": 2.8919851636224796, "learning_rate": 9.57922263240054e-06, "loss": 0.6606, "step": 910 }, { "epoch": 0.15788219492645306, "grad_norm": 2.5592603707093113, "learning_rate": 9.578094808153365e-06, "loss": 0.5297, "step": 911 }, { "epoch": 0.15805550139728342, "grad_norm": 2.1651747462725552, "learning_rate": 9.576965541029877e-06, "loss": 0.6339, "step": 912 }, { "epoch": 0.15822880786811377, "grad_norm": 2.826532129843841, "learning_rate": 9.575834831385982e-06, "loss": 0.6204, "step": 913 }, { "epoch": 0.15840211433894413, "grad_norm": 2.58877904716585, "learning_rate": 9.57470267957805e-06, "loss": 0.5975, "step": 914 }, { "epoch": 0.1585754208097745, "grad_norm": 2.646923760185827, "learning_rate": 9.573569085962895e-06, "loss": 0.6747, "step": 915 }, { "epoch": 0.15874872728060485, "grad_norm": 1.8368317169288442, "learning_rate": 9.572434050897798e-06, "loss": 0.5232, "step": 916 }, { "epoch": 0.15892203375143518, "grad_norm": 2.069072261103749, "learning_rate": 9.57129757474048e-06, "loss": 0.5609, "step": 917 }, { "epoch": 0.15909534022226554, "grad_norm": 2.4056589129387906, "learning_rate": 9.570159657849131e-06, "loss": 0.6286, "step": 918 }, { "epoch": 0.1592686466930959, "grad_norm": 2.3585630928729318, "learning_rate": 9.569020300582385e-06, "loss": 0.5927, "step": 919 }, { "epoch": 0.15944195316392626, "grad_norm": 2.4468528271351904, "learning_rate": 9.567879503299331e-06, "loss": 0.578, "step": 920 }, { "epoch": 0.15961525963475662, "grad_norm": 3.0006506704779357, "learning_rate": 9.566737266359516e-06, "loss": 0.6535, "step": 921 }, { "epoch": 0.15978856610558698, "grad_norm": 2.1860304400043025, "learning_rate": 9.565593590122938e-06, "loss": 0.6098, "step": 922 }, { "epoch": 0.15996187257641734, "grad_norm": 2.28519331114911, "learning_rate": 9.564448474950048e-06, "loss": 0.5808, "step": 923 }, { "epoch": 0.16013517904724767, "grad_norm": 2.5730660637217255, "learning_rate": 9.56330192120175e-06, "loss": 0.6807, "step": 924 }, { "epoch": 0.16030848551807803, "grad_norm": 2.4220274094698806, "learning_rate": 9.562153929239407e-06, "loss": 0.6529, "step": 925 }, { "epoch": 0.16048179198890838, "grad_norm": 2.4783578758164477, "learning_rate": 9.561004499424828e-06, "loss": 0.5162, "step": 926 }, { "epoch": 0.16065509845973874, "grad_norm": 2.354401046075591, "learning_rate": 9.55985363212028e-06, "loss": 0.5801, "step": 927 }, { "epoch": 0.1608284049305691, "grad_norm": 2.57307239967712, "learning_rate": 9.55870132768848e-06, "loss": 0.5918, "step": 928 }, { "epoch": 0.16100171140139946, "grad_norm": 2.3985634102428155, "learning_rate": 9.5575475864926e-06, "loss": 0.5706, "step": 929 }, { "epoch": 0.1611750178722298, "grad_norm": 2.479483501150008, "learning_rate": 9.556392408896265e-06, "loss": 0.6704, "step": 930 }, { "epoch": 0.16134832434306015, "grad_norm": 3.01054192393905, "learning_rate": 9.555235795263551e-06, "loss": 0.5424, "step": 931 }, { "epoch": 0.1615216308138905, "grad_norm": 2.0032123530782453, "learning_rate": 9.554077745958987e-06, "loss": 0.5571, "step": 932 }, { "epoch": 0.16169493728472087, "grad_norm": 2.9846925902042845, "learning_rate": 9.552918261347554e-06, "loss": 0.6461, "step": 933 }, { "epoch": 0.16186824375555123, "grad_norm": 3.264547786473545, "learning_rate": 9.551757341794689e-06, "loss": 0.6134, "step": 934 }, { "epoch": 0.1620415502263816, "grad_norm": 2.714914986966135, "learning_rate": 9.55059498766628e-06, "loss": 0.5719, "step": 935 }, { "epoch": 0.16221485669721195, "grad_norm": 3.540898784508069, "learning_rate": 9.549431199328659e-06, "loss": 0.6097, "step": 936 }, { "epoch": 0.16238816316804228, "grad_norm": 2.2966633975200805, "learning_rate": 9.548265977148623e-06, "loss": 0.5906, "step": 937 }, { "epoch": 0.16256146963887264, "grad_norm": 2.309073995306185, "learning_rate": 9.547099321493411e-06, "loss": 0.5921, "step": 938 }, { "epoch": 0.162734776109703, "grad_norm": 2.1117942328637214, "learning_rate": 9.545931232730718e-06, "loss": 0.5947, "step": 939 }, { "epoch": 0.16290808258053335, "grad_norm": 2.2386235133549177, "learning_rate": 9.544761711228692e-06, "loss": 0.6791, "step": 940 }, { "epoch": 0.1630813890513637, "grad_norm": 2.411762653903589, "learning_rate": 9.54359075735593e-06, "loss": 0.5643, "step": 941 }, { "epoch": 0.16325469552219407, "grad_norm": 2.5760422805198626, "learning_rate": 9.54241837148148e-06, "loss": 0.558, "step": 942 }, { "epoch": 0.1634280019930244, "grad_norm": 2.3003949250833853, "learning_rate": 9.541244553974843e-06, "loss": 0.6532, "step": 943 }, { "epoch": 0.16360130846385476, "grad_norm": 4.124770132703995, "learning_rate": 9.54006930520597e-06, "loss": 0.5564, "step": 944 }, { "epoch": 0.16377461493468512, "grad_norm": 3.3259715236970218, "learning_rate": 9.538892625545264e-06, "loss": 0.658, "step": 945 }, { "epoch": 0.16394792140551548, "grad_norm": 3.6504212697427967, "learning_rate": 9.537714515363578e-06, "loss": 0.536, "step": 946 }, { "epoch": 0.16412122787634584, "grad_norm": 5.227070793514566, "learning_rate": 9.536534975032218e-06, "loss": 0.6093, "step": 947 }, { "epoch": 0.1642945343471762, "grad_norm": 3.0056775183234445, "learning_rate": 9.535354004922941e-06, "loss": 0.6801, "step": 948 }, { "epoch": 0.16446784081800656, "grad_norm": 2.5356429010623427, "learning_rate": 9.534171605407948e-06, "loss": 0.6558, "step": 949 }, { "epoch": 0.1646411472888369, "grad_norm": 2.4630700060092736, "learning_rate": 9.532987776859897e-06, "loss": 0.5175, "step": 950 }, { "epoch": 0.16481445375966725, "grad_norm": 2.2362064892274867, "learning_rate": 9.531802519651897e-06, "loss": 0.5272, "step": 951 }, { "epoch": 0.1649877602304976, "grad_norm": 32.14999062062093, "learning_rate": 9.530615834157504e-06, "loss": 0.5411, "step": 952 }, { "epoch": 0.16516106670132796, "grad_norm": 3.015697270875637, "learning_rate": 9.529427720750724e-06, "loss": 0.631, "step": 953 }, { "epoch": 0.16533437317215832, "grad_norm": 2.3592109753332524, "learning_rate": 9.528238179806015e-06, "loss": 0.5338, "step": 954 }, { "epoch": 0.16550767964298868, "grad_norm": 2.6984926141461667, "learning_rate": 9.527047211698284e-06, "loss": 0.6298, "step": 955 }, { "epoch": 0.165680986113819, "grad_norm": 3.276996623451241, "learning_rate": 9.525854816802889e-06, "loss": 0.5412, "step": 956 }, { "epoch": 0.16585429258464937, "grad_norm": 2.5646742804199953, "learning_rate": 9.524660995495635e-06, "loss": 0.5821, "step": 957 }, { "epoch": 0.16602759905547973, "grad_norm": 4.925701995318957, "learning_rate": 9.52346574815278e-06, "loss": 0.663, "step": 958 }, { "epoch": 0.1662009055263101, "grad_norm": 3.5539311947656915, "learning_rate": 9.522269075151027e-06, "loss": 0.5773, "step": 959 }, { "epoch": 0.16637421199714045, "grad_norm": 4.202248621174743, "learning_rate": 9.521070976867535e-06, "loss": 0.6214, "step": 960 }, { "epoch": 0.1665475184679708, "grad_norm": 2.2212418546325807, "learning_rate": 9.519871453679904e-06, "loss": 0.5351, "step": 961 }, { "epoch": 0.16672082493880117, "grad_norm": 3.0971242588645436, "learning_rate": 9.518670505966189e-06, "loss": 0.6746, "step": 962 }, { "epoch": 0.1668941314096315, "grad_norm": 2.3300789465646314, "learning_rate": 9.517468134104891e-06, "loss": 0.6149, "step": 963 }, { "epoch": 0.16706743788046186, "grad_norm": 4.857359276037987, "learning_rate": 9.516264338474963e-06, "loss": 0.5085, "step": 964 }, { "epoch": 0.16724074435129221, "grad_norm": 3.516244718011669, "learning_rate": 9.515059119455806e-06, "loss": 0.5505, "step": 965 }, { "epoch": 0.16741405082212257, "grad_norm": 2.0358307796209, "learning_rate": 9.513852477427264e-06, "loss": 0.4595, "step": 966 }, { "epoch": 0.16758735729295293, "grad_norm": 2.2911026529691325, "learning_rate": 9.512644412769638e-06, "loss": 0.5627, "step": 967 }, { "epoch": 0.1677606637637833, "grad_norm": 3.35109192559926, "learning_rate": 9.51143492586367e-06, "loss": 0.6099, "step": 968 }, { "epoch": 0.16793397023461362, "grad_norm": 2.6337093478116236, "learning_rate": 9.510224017090555e-06, "loss": 0.5999, "step": 969 }, { "epoch": 0.16810727670544398, "grad_norm": 4.421527125704644, "learning_rate": 9.509011686831936e-06, "loss": 0.6351, "step": 970 }, { "epoch": 0.16828058317627434, "grad_norm": 2.404232133390096, "learning_rate": 9.5077979354699e-06, "loss": 0.6365, "step": 971 }, { "epoch": 0.1684538896471047, "grad_norm": 2.502993508560797, "learning_rate": 9.506582763386987e-06, "loss": 0.575, "step": 972 }, { "epoch": 0.16862719611793506, "grad_norm": 2.4184551891217714, "learning_rate": 9.505366170966181e-06, "loss": 0.6193, "step": 973 }, { "epoch": 0.16880050258876542, "grad_norm": 4.3697703958005825, "learning_rate": 9.504148158590915e-06, "loss": 0.5527, "step": 974 }, { "epoch": 0.16897380905959578, "grad_norm": 2.3124854028206636, "learning_rate": 9.50292872664507e-06, "loss": 0.5588, "step": 975 }, { "epoch": 0.1691471155304261, "grad_norm": 2.6407209530080356, "learning_rate": 9.501707875512972e-06, "loss": 0.5215, "step": 976 }, { "epoch": 0.16932042200125647, "grad_norm": 3.8236606294166586, "learning_rate": 9.500485605579398e-06, "loss": 0.6889, "step": 977 }, { "epoch": 0.16949372847208682, "grad_norm": 2.970297921525272, "learning_rate": 9.49926191722957e-06, "loss": 0.6913, "step": 978 }, { "epoch": 0.16966703494291718, "grad_norm": 5.227296290145245, "learning_rate": 9.498036810849158e-06, "loss": 0.6327, "step": 979 }, { "epoch": 0.16984034141374754, "grad_norm": 2.462342393170747, "learning_rate": 9.496810286824278e-06, "loss": 0.5432, "step": 980 }, { "epoch": 0.1700136478845779, "grad_norm": 3.06526498572205, "learning_rate": 9.495582345541492e-06, "loss": 0.5558, "step": 981 }, { "epoch": 0.17018695435540823, "grad_norm": 2.4362462182944724, "learning_rate": 9.494352987387811e-06, "loss": 0.5931, "step": 982 }, { "epoch": 0.1703602608262386, "grad_norm": 2.46435072643231, "learning_rate": 9.49312221275069e-06, "loss": 0.5086, "step": 983 }, { "epoch": 0.17053356729706895, "grad_norm": 2.153529312933454, "learning_rate": 9.491890022018033e-06, "loss": 0.5678, "step": 984 }, { "epoch": 0.1707068737678993, "grad_norm": 5.994871801025323, "learning_rate": 9.490656415578188e-06, "loss": 0.5525, "step": 985 }, { "epoch": 0.17088018023872967, "grad_norm": 2.344753467265669, "learning_rate": 9.489421393819953e-06, "loss": 0.6171, "step": 986 }, { "epoch": 0.17105348670956003, "grad_norm": 2.519080375227663, "learning_rate": 9.488184957132564e-06, "loss": 0.5305, "step": 987 }, { "epoch": 0.17122679318039039, "grad_norm": 2.3228729838745283, "learning_rate": 9.486947105905713e-06, "loss": 0.635, "step": 988 }, { "epoch": 0.17140009965122072, "grad_norm": 2.283897446555888, "learning_rate": 9.485707840529532e-06, "loss": 0.5459, "step": 989 }, { "epoch": 0.17157340612205108, "grad_norm": 3.941647296107583, "learning_rate": 9.4844671613946e-06, "loss": 0.5497, "step": 990 }, { "epoch": 0.17174671259288143, "grad_norm": 2.0946017337402525, "learning_rate": 9.483225068891938e-06, "loss": 0.6201, "step": 991 }, { "epoch": 0.1719200190637118, "grad_norm": 3.6562050179912573, "learning_rate": 9.48198156341302e-06, "loss": 0.5755, "step": 992 }, { "epoch": 0.17209332553454215, "grad_norm": 2.4198770970150707, "learning_rate": 9.48073664534976e-06, "loss": 0.6227, "step": 993 }, { "epoch": 0.1722666320053725, "grad_norm": 2.2980154648144473, "learning_rate": 9.479490315094515e-06, "loss": 0.6094, "step": 994 }, { "epoch": 0.17243993847620284, "grad_norm": 2.1281170945062433, "learning_rate": 9.478242573040098e-06, "loss": 0.5849, "step": 995 }, { "epoch": 0.1726132449470332, "grad_norm": 2.3869429469444845, "learning_rate": 9.476993419579751e-06, "loss": 0.622, "step": 996 }, { "epoch": 0.17278655141786356, "grad_norm": 3.2154428669311788, "learning_rate": 9.475742855107173e-06, "loss": 0.6197, "step": 997 }, { "epoch": 0.17295985788869392, "grad_norm": 7.1700903543499335, "learning_rate": 9.474490880016507e-06, "loss": 0.6279, "step": 998 }, { "epoch": 0.17313316435952428, "grad_norm": 2.570056825646241, "learning_rate": 9.473237494702331e-06, "loss": 0.6369, "step": 999 }, { "epoch": 0.17330647083035464, "grad_norm": 2.2862751504808894, "learning_rate": 9.471982699559679e-06, "loss": 0.5705, "step": 1000 }, { "epoch": 0.173479777301185, "grad_norm": 3.0131249916828304, "learning_rate": 9.47072649498402e-06, "loss": 0.55, "step": 1001 }, { "epoch": 0.17365308377201533, "grad_norm": 2.2261453911061646, "learning_rate": 9.469468881371277e-06, "loss": 0.6524, "step": 1002 }, { "epoch": 0.17382639024284569, "grad_norm": 2.5735594709797116, "learning_rate": 9.468209859117807e-06, "loss": 0.6949, "step": 1003 }, { "epoch": 0.17399969671367604, "grad_norm": 2.6313162182734255, "learning_rate": 9.466949428620418e-06, "loss": 0.6299, "step": 1004 }, { "epoch": 0.1741730031845064, "grad_norm": 2.5504915673158615, "learning_rate": 9.465687590276357e-06, "loss": 0.6098, "step": 1005 }, { "epoch": 0.17434630965533676, "grad_norm": 2.319868765259887, "learning_rate": 9.46442434448332e-06, "loss": 0.5639, "step": 1006 }, { "epoch": 0.17451961612616712, "grad_norm": 4.45749627524982, "learning_rate": 9.463159691639441e-06, "loss": 0.5641, "step": 1007 }, { "epoch": 0.17469292259699745, "grad_norm": 2.599654440175935, "learning_rate": 9.461893632143302e-06, "loss": 0.5762, "step": 1008 }, { "epoch": 0.1748662290678278, "grad_norm": 4.80567293728476, "learning_rate": 9.460626166393925e-06, "loss": 0.5582, "step": 1009 }, { "epoch": 0.17503953553865817, "grad_norm": 2.1248766141110447, "learning_rate": 9.459357294790778e-06, "loss": 0.5831, "step": 1010 }, { "epoch": 0.17521284200948853, "grad_norm": 2.2802612601827006, "learning_rate": 9.458087017733771e-06, "loss": 0.5729, "step": 1011 }, { "epoch": 0.1753861484803189, "grad_norm": 2.6523019049695806, "learning_rate": 9.456815335623256e-06, "loss": 0.6006, "step": 1012 }, { "epoch": 0.17555945495114925, "grad_norm": 3.3410872532608087, "learning_rate": 9.455542248860028e-06, "loss": 0.5617, "step": 1013 }, { "epoch": 0.1757327614219796, "grad_norm": 2.2833068779545207, "learning_rate": 9.454267757845326e-06, "loss": 0.6478, "step": 1014 }, { "epoch": 0.17590606789280994, "grad_norm": 5.371333469646626, "learning_rate": 9.452991862980832e-06, "loss": 0.6139, "step": 1015 }, { "epoch": 0.1760793743636403, "grad_norm": 2.526110415354224, "learning_rate": 9.451714564668664e-06, "loss": 0.6026, "step": 1016 }, { "epoch": 0.17625268083447065, "grad_norm": 2.5996251847779512, "learning_rate": 9.450435863311395e-06, "loss": 0.5305, "step": 1017 }, { "epoch": 0.176425987305301, "grad_norm": 2.3404239081882965, "learning_rate": 9.449155759312028e-06, "loss": 0.5753, "step": 1018 }, { "epoch": 0.17659929377613137, "grad_norm": 2.273106744170403, "learning_rate": 9.447874253074014e-06, "loss": 0.5998, "step": 1019 }, { "epoch": 0.17677260024696173, "grad_norm": 2.56529588924703, "learning_rate": 9.446591345001247e-06, "loss": 0.5327, "step": 1020 }, { "epoch": 0.17694590671779206, "grad_norm": 3.041419175201505, "learning_rate": 9.445307035498055e-06, "loss": 0.6134, "step": 1021 }, { "epoch": 0.17711921318862242, "grad_norm": 3.266745783819023, "learning_rate": 9.444021324969219e-06, "loss": 0.7197, "step": 1022 }, { "epoch": 0.17729251965945278, "grad_norm": 2.477562684247372, "learning_rate": 9.442734213819952e-06, "loss": 0.6681, "step": 1023 }, { "epoch": 0.17746582613028314, "grad_norm": 2.682584394178791, "learning_rate": 9.441445702455916e-06, "loss": 0.5302, "step": 1024 }, { "epoch": 0.1776391326011135, "grad_norm": 5.489790159839377, "learning_rate": 9.44015579128321e-06, "loss": 0.5673, "step": 1025 }, { "epoch": 0.17781243907194386, "grad_norm": 2.6453048392932312, "learning_rate": 9.43886448070837e-06, "loss": 0.5549, "step": 1026 }, { "epoch": 0.17798574554277422, "grad_norm": 3.6790354171361663, "learning_rate": 9.437571771138383e-06, "loss": 0.5883, "step": 1027 }, { "epoch": 0.17815905201360455, "grad_norm": 6.240610317950071, "learning_rate": 9.43627766298067e-06, "loss": 0.6118, "step": 1028 }, { "epoch": 0.1783323584844349, "grad_norm": 2.244398829252468, "learning_rate": 9.434982156643095e-06, "loss": 0.5502, "step": 1029 }, { "epoch": 0.17850566495526526, "grad_norm": 2.170472571351895, "learning_rate": 9.433685252533962e-06, "loss": 0.5909, "step": 1030 }, { "epoch": 0.17867897142609562, "grad_norm": 2.6991536417791093, "learning_rate": 9.432386951062017e-06, "loss": 0.6229, "step": 1031 }, { "epoch": 0.17885227789692598, "grad_norm": 2.7177289348293883, "learning_rate": 9.431087252636441e-06, "loss": 0.5822, "step": 1032 }, { "epoch": 0.17902558436775634, "grad_norm": 2.512824375031713, "learning_rate": 9.429786157666863e-06, "loss": 0.5568, "step": 1033 }, { "epoch": 0.17919889083858667, "grad_norm": 5.333438837113608, "learning_rate": 9.428483666563347e-06, "loss": 0.485, "step": 1034 }, { "epoch": 0.17937219730941703, "grad_norm": 2.549195226020858, "learning_rate": 9.427179779736401e-06, "loss": 0.6113, "step": 1035 }, { "epoch": 0.1795455037802474, "grad_norm": 2.6544121277009176, "learning_rate": 9.425874497596967e-06, "loss": 0.5301, "step": 1036 }, { "epoch": 0.17971881025107775, "grad_norm": 2.292516011806466, "learning_rate": 9.424567820556435e-06, "loss": 0.5858, "step": 1037 }, { "epoch": 0.1798921167219081, "grad_norm": 2.0490193210279624, "learning_rate": 9.423259749026624e-06, "loss": 0.518, "step": 1038 }, { "epoch": 0.18006542319273847, "grad_norm": 2.3526266297023057, "learning_rate": 9.421950283419803e-06, "loss": 0.6162, "step": 1039 }, { "epoch": 0.18023872966356883, "grad_norm": 4.714172326382967, "learning_rate": 9.420639424148675e-06, "loss": 0.5727, "step": 1040 }, { "epoch": 0.18041203613439916, "grad_norm": 2.2915532717690987, "learning_rate": 9.419327171626378e-06, "loss": 0.5513, "step": 1041 }, { "epoch": 0.18058534260522952, "grad_norm": 2.2517741276715673, "learning_rate": 9.418013526266502e-06, "loss": 0.5966, "step": 1042 }, { "epoch": 0.18075864907605987, "grad_norm": 2.4798556987815887, "learning_rate": 9.416698488483064e-06, "loss": 0.5873, "step": 1043 }, { "epoch": 0.18093195554689023, "grad_norm": 2.8581437071068296, "learning_rate": 9.415382058690524e-06, "loss": 0.524, "step": 1044 }, { "epoch": 0.1811052620177206, "grad_norm": 3.7913719932478602, "learning_rate": 9.41406423730378e-06, "loss": 0.4868, "step": 1045 }, { "epoch": 0.18127856848855095, "grad_norm": 2.6344168257546228, "learning_rate": 9.41274502473817e-06, "loss": 0.627, "step": 1046 }, { "epoch": 0.1814518749593813, "grad_norm": 2.3943094075159204, "learning_rate": 9.41142442140947e-06, "loss": 0.526, "step": 1047 }, { "epoch": 0.18162518143021164, "grad_norm": 2.3542882611415847, "learning_rate": 9.410102427733895e-06, "loss": 0.5582, "step": 1048 }, { "epoch": 0.181798487901042, "grad_norm": 2.02853463931048, "learning_rate": 9.408779044128095e-06, "loss": 0.5466, "step": 1049 }, { "epoch": 0.18197179437187236, "grad_norm": 2.6199058075204467, "learning_rate": 9.407454271009162e-06, "loss": 0.5778, "step": 1050 }, { "epoch": 0.18214510084270272, "grad_norm": 2.386307760320249, "learning_rate": 9.406128108794624e-06, "loss": 0.6246, "step": 1051 }, { "epoch": 0.18231840731353308, "grad_norm": 3.851390994999524, "learning_rate": 9.404800557902445e-06, "loss": 0.6152, "step": 1052 }, { "epoch": 0.18249171378436344, "grad_norm": 2.866659281878702, "learning_rate": 9.403471618751032e-06, "loss": 0.4607, "step": 1053 }, { "epoch": 0.18266502025519377, "grad_norm": 4.091397792263725, "learning_rate": 9.402141291759223e-06, "loss": 0.5475, "step": 1054 }, { "epoch": 0.18283832672602413, "grad_norm": 3.241664540755167, "learning_rate": 9.400809577346301e-06, "loss": 0.558, "step": 1055 }, { "epoch": 0.18301163319685448, "grad_norm": 2.2108350992534707, "learning_rate": 9.399476475931977e-06, "loss": 0.4576, "step": 1056 }, { "epoch": 0.18318493966768484, "grad_norm": 1.9807469336817325, "learning_rate": 9.398141987936405e-06, "loss": 0.5535, "step": 1057 }, { "epoch": 0.1833582461385152, "grad_norm": 4.019718148423748, "learning_rate": 9.396806113780177e-06, "loss": 0.6079, "step": 1058 }, { "epoch": 0.18353155260934556, "grad_norm": 1.9263868764794192, "learning_rate": 9.39546885388432e-06, "loss": 0.5243, "step": 1059 }, { "epoch": 0.18370485908017592, "grad_norm": 2.775706990029135, "learning_rate": 9.394130208670296e-06, "loss": 0.6291, "step": 1060 }, { "epoch": 0.18387816555100625, "grad_norm": 2.836149420050326, "learning_rate": 9.392790178560005e-06, "loss": 0.5935, "step": 1061 }, { "epoch": 0.1840514720218366, "grad_norm": 2.1901463947672206, "learning_rate": 9.391448763975786e-06, "loss": 0.5902, "step": 1062 }, { "epoch": 0.18422477849266697, "grad_norm": 3.1217888576922426, "learning_rate": 9.390105965340411e-06, "loss": 0.6807, "step": 1063 }, { "epoch": 0.18439808496349733, "grad_norm": 2.313655503927857, "learning_rate": 9.388761783077088e-06, "loss": 0.4971, "step": 1064 }, { "epoch": 0.1845713914343277, "grad_norm": 2.285670424954061, "learning_rate": 9.387416217609464e-06, "loss": 0.5458, "step": 1065 }, { "epoch": 0.18474469790515805, "grad_norm": 2.3235362272682205, "learning_rate": 9.38606926936162e-06, "loss": 0.6247, "step": 1066 }, { "epoch": 0.18491800437598838, "grad_norm": 2.832524132336392, "learning_rate": 9.384720938758074e-06, "loss": 0.6488, "step": 1067 }, { "epoch": 0.18509131084681874, "grad_norm": 2.383480851620369, "learning_rate": 9.383371226223777e-06, "loss": 0.6012, "step": 1068 }, { "epoch": 0.1852646173176491, "grad_norm": 2.335288022099342, "learning_rate": 9.382020132184117e-06, "loss": 0.5494, "step": 1069 }, { "epoch": 0.18543792378847945, "grad_norm": 2.469049155834797, "learning_rate": 9.38066765706492e-06, "loss": 0.5998, "step": 1070 }, { "epoch": 0.1856112302593098, "grad_norm": 2.1950258027521254, "learning_rate": 9.379313801292447e-06, "loss": 0.5301, "step": 1071 }, { "epoch": 0.18578453673014017, "grad_norm": 2.5493136862720593, "learning_rate": 9.377958565293386e-06, "loss": 0.5307, "step": 1072 }, { "epoch": 0.18595784320097053, "grad_norm": 1.9318211552669764, "learning_rate": 9.37660194949487e-06, "loss": 0.5567, "step": 1073 }, { "epoch": 0.18613114967180086, "grad_norm": 2.459314064789678, "learning_rate": 9.375243954324462e-06, "loss": 0.4935, "step": 1074 }, { "epoch": 0.18630445614263122, "grad_norm": 2.636927265226177, "learning_rate": 9.37388458021016e-06, "loss": 0.6406, "step": 1075 }, { "epoch": 0.18647776261346158, "grad_norm": 4.007377506263647, "learning_rate": 9.372523827580402e-06, "loss": 0.657, "step": 1076 }, { "epoch": 0.18665106908429194, "grad_norm": 2.7825147344794625, "learning_rate": 9.37116169686405e-06, "loss": 0.6374, "step": 1077 }, { "epoch": 0.1868243755551223, "grad_norm": 3.14888991113423, "learning_rate": 9.369798188490409e-06, "loss": 0.6396, "step": 1078 }, { "epoch": 0.18699768202595266, "grad_norm": 2.197545032548617, "learning_rate": 9.368433302889216e-06, "loss": 0.5305, "step": 1079 }, { "epoch": 0.187170988496783, "grad_norm": 2.4339165397391698, "learning_rate": 9.36706704049064e-06, "loss": 0.5823, "step": 1080 }, { "epoch": 0.18734429496761335, "grad_norm": 2.649274397629768, "learning_rate": 9.365699401725285e-06, "loss": 0.6158, "step": 1081 }, { "epoch": 0.1875176014384437, "grad_norm": 2.4987850708807473, "learning_rate": 9.364330387024192e-06, "loss": 0.4959, "step": 1082 }, { "epoch": 0.18769090790927406, "grad_norm": 2.5196234403132998, "learning_rate": 9.36295999681883e-06, "loss": 0.5909, "step": 1083 }, { "epoch": 0.18786421438010442, "grad_norm": 2.1359864533286492, "learning_rate": 9.361588231541105e-06, "loss": 0.6246, "step": 1084 }, { "epoch": 0.18803752085093478, "grad_norm": 3.2184466531948104, "learning_rate": 9.360215091623357e-06, "loss": 0.5419, "step": 1085 }, { "epoch": 0.18821082732176514, "grad_norm": 2.323903382504865, "learning_rate": 9.358840577498356e-06, "loss": 0.6255, "step": 1086 }, { "epoch": 0.18838413379259547, "grad_norm": 2.080472541774184, "learning_rate": 9.357464689599307e-06, "loss": 0.6253, "step": 1087 }, { "epoch": 0.18855744026342583, "grad_norm": 2.519111910350151, "learning_rate": 9.35608742835985e-06, "loss": 0.5598, "step": 1088 }, { "epoch": 0.1887307467342562, "grad_norm": 2.1143201245078234, "learning_rate": 9.354708794214055e-06, "loss": 0.5331, "step": 1089 }, { "epoch": 0.18890405320508655, "grad_norm": 2.3010748587559786, "learning_rate": 9.353328787596426e-06, "loss": 0.5094, "step": 1090 }, { "epoch": 0.1890773596759169, "grad_norm": 2.6714169604244753, "learning_rate": 9.351947408941897e-06, "loss": 0.589, "step": 1091 }, { "epoch": 0.18925066614674727, "grad_norm": 3.0704096136513077, "learning_rate": 9.350564658685842e-06, "loss": 0.5963, "step": 1092 }, { "epoch": 0.1894239726175776, "grad_norm": 2.5648928911607642, "learning_rate": 9.349180537264053e-06, "loss": 0.6047, "step": 1093 }, { "epoch": 0.18959727908840796, "grad_norm": 2.545391446599953, "learning_rate": 9.34779504511277e-06, "loss": 0.7026, "step": 1094 }, { "epoch": 0.18977058555923831, "grad_norm": 2.4988047967096176, "learning_rate": 9.346408182668658e-06, "loss": 0.6831, "step": 1095 }, { "epoch": 0.18994389203006867, "grad_norm": 2.8107191542328946, "learning_rate": 9.34501995036881e-06, "loss": 0.6037, "step": 1096 }, { "epoch": 0.19011719850089903, "grad_norm": 2.380816678273612, "learning_rate": 9.343630348650757e-06, "loss": 0.5038, "step": 1097 }, { "epoch": 0.1902905049717294, "grad_norm": 2.408183660532735, "learning_rate": 9.342239377952457e-06, "loss": 0.5614, "step": 1098 }, { "epoch": 0.19046381144255975, "grad_norm": 2.361188610620566, "learning_rate": 9.340847038712307e-06, "loss": 0.5641, "step": 1099 }, { "epoch": 0.19063711791339008, "grad_norm": 2.754853060885699, "learning_rate": 9.339453331369124e-06, "loss": 0.5629, "step": 1100 }, { "epoch": 0.19081042438422044, "grad_norm": 2.403171968558466, "learning_rate": 9.338058256362163e-06, "loss": 0.5951, "step": 1101 }, { "epoch": 0.1909837308550508, "grad_norm": 2.439608278695443, "learning_rate": 9.336661814131114e-06, "loss": 0.5671, "step": 1102 }, { "epoch": 0.19115703732588116, "grad_norm": 3.0210754335550654, "learning_rate": 9.33526400511609e-06, "loss": 0.5987, "step": 1103 }, { "epoch": 0.19133034379671152, "grad_norm": 1.9484163518498805, "learning_rate": 9.333864829757634e-06, "loss": 0.4814, "step": 1104 }, { "epoch": 0.19150365026754188, "grad_norm": 8.270956929769337, "learning_rate": 9.332464288496732e-06, "loss": 0.6088, "step": 1105 }, { "epoch": 0.1916769567383722, "grad_norm": 2.7941437722936637, "learning_rate": 9.331062381774785e-06, "loss": 0.5537, "step": 1106 }, { "epoch": 0.19185026320920257, "grad_norm": 2.6237326231181295, "learning_rate": 9.329659110033635e-06, "loss": 0.6139, "step": 1107 }, { "epoch": 0.19202356968003292, "grad_norm": 2.2370525069709855, "learning_rate": 9.328254473715548e-06, "loss": 0.5956, "step": 1108 }, { "epoch": 0.19219687615086328, "grad_norm": 2.2180888160499475, "learning_rate": 9.326848473263227e-06, "loss": 0.5352, "step": 1109 }, { "epoch": 0.19237018262169364, "grad_norm": 2.3887737879344106, "learning_rate": 9.325441109119795e-06, "loss": 0.5654, "step": 1110 }, { "epoch": 0.192543489092524, "grad_norm": 3.6139263276153812, "learning_rate": 9.324032381728813e-06, "loss": 0.5065, "step": 1111 }, { "epoch": 0.19271679556335436, "grad_norm": 2.2753121116646953, "learning_rate": 9.322622291534271e-06, "loss": 0.619, "step": 1112 }, { "epoch": 0.1928901020341847, "grad_norm": 2.550792125327354, "learning_rate": 9.321210838980583e-06, "loss": 0.55, "step": 1113 }, { "epoch": 0.19306340850501505, "grad_norm": 2.805746792289464, "learning_rate": 9.3197980245126e-06, "loss": 0.6317, "step": 1114 }, { "epoch": 0.1932367149758454, "grad_norm": 2.2949013684342634, "learning_rate": 9.318383848575595e-06, "loss": 0.5469, "step": 1115 }, { "epoch": 0.19341002144667577, "grad_norm": 2.4948200682548176, "learning_rate": 9.316968311615273e-06, "loss": 0.6912, "step": 1116 }, { "epoch": 0.19358332791750613, "grad_norm": 3.909052100946121, "learning_rate": 9.315551414077769e-06, "loss": 0.5739, "step": 1117 }, { "epoch": 0.19375663438833648, "grad_norm": 2.391988890427719, "learning_rate": 9.314133156409648e-06, "loss": 0.5823, "step": 1118 }, { "epoch": 0.19392994085916682, "grad_norm": 2.6015330181355822, "learning_rate": 9.312713539057899e-06, "loss": 0.6529, "step": 1119 }, { "epoch": 0.19410324732999718, "grad_norm": 2.9142590511088393, "learning_rate": 9.311292562469943e-06, "loss": 0.6663, "step": 1120 }, { "epoch": 0.19427655380082753, "grad_norm": 2.5320266248001406, "learning_rate": 9.30987022709363e-06, "loss": 0.603, "step": 1121 }, { "epoch": 0.1944498602716579, "grad_norm": 2.5994953545774377, "learning_rate": 9.308446533377236e-06, "loss": 0.6005, "step": 1122 }, { "epoch": 0.19462316674248825, "grad_norm": 2.4411076782454404, "learning_rate": 9.307021481769464e-06, "loss": 0.6032, "step": 1123 }, { "epoch": 0.1947964732133186, "grad_norm": 2.761251977736935, "learning_rate": 9.30559507271945e-06, "loss": 0.5936, "step": 1124 }, { "epoch": 0.19496977968414897, "grad_norm": 2.2677964854716484, "learning_rate": 9.304167306676752e-06, "loss": 0.59, "step": 1125 }, { "epoch": 0.1951430861549793, "grad_norm": 3.967949270100643, "learning_rate": 9.302738184091363e-06, "loss": 0.4856, "step": 1126 }, { "epoch": 0.19531639262580966, "grad_norm": 2.427730867955641, "learning_rate": 9.301307705413693e-06, "loss": 0.6523, "step": 1127 }, { "epoch": 0.19548969909664002, "grad_norm": 2.59352543844579, "learning_rate": 9.29987587109459e-06, "loss": 0.5906, "step": 1128 }, { "epoch": 0.19566300556747038, "grad_norm": 2.295581864671576, "learning_rate": 9.298442681585323e-06, "loss": 0.5424, "step": 1129 }, { "epoch": 0.19583631203830074, "grad_norm": 2.24393150451388, "learning_rate": 9.29700813733759e-06, "loss": 0.5307, "step": 1130 }, { "epoch": 0.1960096185091311, "grad_norm": 2.2263841247220117, "learning_rate": 9.295572238803512e-06, "loss": 0.5667, "step": 1131 }, { "epoch": 0.19618292497996143, "grad_norm": 2.334505126471756, "learning_rate": 9.294134986435647e-06, "loss": 0.4254, "step": 1132 }, { "epoch": 0.19635623145079178, "grad_norm": 2.234957579264526, "learning_rate": 9.292696380686972e-06, "loss": 0.5477, "step": 1133 }, { "epoch": 0.19652953792162214, "grad_norm": 2.384103904969379, "learning_rate": 9.291256422010888e-06, "loss": 0.594, "step": 1134 }, { "epoch": 0.1967028443924525, "grad_norm": 4.219465788581651, "learning_rate": 9.289815110861227e-06, "loss": 0.6359, "step": 1135 }, { "epoch": 0.19687615086328286, "grad_norm": 2.395615163416144, "learning_rate": 9.288372447692252e-06, "loss": 0.5489, "step": 1136 }, { "epoch": 0.19704945733411322, "grad_norm": 2.2130971810610056, "learning_rate": 9.286928432958637e-06, "loss": 0.6023, "step": 1137 }, { "epoch": 0.19722276380494358, "grad_norm": 2.618062811193382, "learning_rate": 9.2854830671155e-06, "loss": 0.6303, "step": 1138 }, { "epoch": 0.1973960702757739, "grad_norm": 2.671362264680211, "learning_rate": 9.284036350618373e-06, "loss": 0.6754, "step": 1139 }, { "epoch": 0.19756937674660427, "grad_norm": 2.2664494395188886, "learning_rate": 9.282588283923217e-06, "loss": 0.5574, "step": 1140 }, { "epoch": 0.19774268321743463, "grad_norm": 2.169715302983051, "learning_rate": 9.28113886748642e-06, "loss": 0.5828, "step": 1141 }, { "epoch": 0.197915989688265, "grad_norm": 2.407385513721472, "learning_rate": 9.279688101764792e-06, "loss": 0.5232, "step": 1142 }, { "epoch": 0.19808929615909535, "grad_norm": 2.2102482371872822, "learning_rate": 9.278235987215571e-06, "loss": 0.5455, "step": 1143 }, { "epoch": 0.1982626026299257, "grad_norm": 3.908138218244294, "learning_rate": 9.276782524296421e-06, "loss": 0.4976, "step": 1144 }, { "epoch": 0.19843590910075604, "grad_norm": 2.5416318885753153, "learning_rate": 9.275327713465428e-06, "loss": 0.5714, "step": 1145 }, { "epoch": 0.1986092155715864, "grad_norm": 5.250603923933624, "learning_rate": 9.273871555181101e-06, "loss": 0.6577, "step": 1146 }, { "epoch": 0.19878252204241675, "grad_norm": 4.882074952807621, "learning_rate": 9.272414049902383e-06, "loss": 0.524, "step": 1147 }, { "epoch": 0.1989558285132471, "grad_norm": 2.6387765710652413, "learning_rate": 9.270955198088631e-06, "loss": 0.6615, "step": 1148 }, { "epoch": 0.19912913498407747, "grad_norm": 2.3209477727840317, "learning_rate": 9.269495000199632e-06, "loss": 0.564, "step": 1149 }, { "epoch": 0.19930244145490783, "grad_norm": 2.1525225497038987, "learning_rate": 9.268033456695595e-06, "loss": 0.6269, "step": 1150 }, { "epoch": 0.1994757479257382, "grad_norm": 3.016175634425345, "learning_rate": 9.266570568037157e-06, "loss": 0.5227, "step": 1151 }, { "epoch": 0.19964905439656852, "grad_norm": 12.454828753633644, "learning_rate": 9.265106334685372e-06, "loss": 0.5782, "step": 1152 }, { "epoch": 0.19982236086739888, "grad_norm": 2.6232482473874157, "learning_rate": 9.263640757101726e-06, "loss": 0.5649, "step": 1153 }, { "epoch": 0.19999566733822924, "grad_norm": 2.3098836681611004, "learning_rate": 9.26217383574812e-06, "loss": 0.639, "step": 1154 }, { "epoch": 0.2001689738090596, "grad_norm": 2.466961741917702, "learning_rate": 9.260705571086887e-06, "loss": 0.5719, "step": 1155 }, { "epoch": 0.20034228027988996, "grad_norm": 2.2629117239094536, "learning_rate": 9.25923596358078e-06, "loss": 0.6174, "step": 1156 }, { "epoch": 0.20051558675072031, "grad_norm": 2.7110343714921386, "learning_rate": 9.25776501369297e-06, "loss": 0.5069, "step": 1157 }, { "epoch": 0.20068889322155065, "grad_norm": 2.110170687237678, "learning_rate": 9.256292721887056e-06, "loss": 0.544, "step": 1158 }, { "epoch": 0.200862199692381, "grad_norm": 2.1795847830868973, "learning_rate": 9.254819088627065e-06, "loss": 0.5393, "step": 1159 }, { "epoch": 0.20103550616321136, "grad_norm": 2.4008511284694247, "learning_rate": 9.253344114377438e-06, "loss": 0.5815, "step": 1160 }, { "epoch": 0.20120881263404172, "grad_norm": 2.4955859163700334, "learning_rate": 9.251867799603041e-06, "loss": 0.5683, "step": 1161 }, { "epoch": 0.20138211910487208, "grad_norm": 3.901562803002662, "learning_rate": 9.250390144769165e-06, "loss": 0.6242, "step": 1162 }, { "epoch": 0.20155542557570244, "grad_norm": 3.3350740640780097, "learning_rate": 9.248911150341522e-06, "loss": 0.5883, "step": 1163 }, { "epoch": 0.2017287320465328, "grad_norm": 2.598168634168679, "learning_rate": 9.247430816786245e-06, "loss": 0.6395, "step": 1164 }, { "epoch": 0.20190203851736313, "grad_norm": 3.1620214519903955, "learning_rate": 9.24594914456989e-06, "loss": 0.5564, "step": 1165 }, { "epoch": 0.2020753449881935, "grad_norm": 2.7945810244474534, "learning_rate": 9.244466134159439e-06, "loss": 0.5807, "step": 1166 }, { "epoch": 0.20224865145902385, "grad_norm": 4.410765436126248, "learning_rate": 9.242981786022286e-06, "loss": 0.5764, "step": 1167 }, { "epoch": 0.2024219579298542, "grad_norm": 2.1912068930418105, "learning_rate": 9.241496100626257e-06, "loss": 0.5478, "step": 1168 }, { "epoch": 0.20259526440068457, "grad_norm": 2.50145228355684, "learning_rate": 9.240009078439592e-06, "loss": 0.6911, "step": 1169 }, { "epoch": 0.20276857087151492, "grad_norm": 2.45107669556343, "learning_rate": 9.238520719930957e-06, "loss": 0.62, "step": 1170 }, { "epoch": 0.20294187734234526, "grad_norm": 2.1322872132526873, "learning_rate": 9.237031025569434e-06, "loss": 0.5326, "step": 1171 }, { "epoch": 0.20311518381317561, "grad_norm": 3.6113496859250462, "learning_rate": 9.235539995824533e-06, "loss": 0.6221, "step": 1172 }, { "epoch": 0.20328849028400597, "grad_norm": 2.1552875702187753, "learning_rate": 9.234047631166182e-06, "loss": 0.534, "step": 1173 }, { "epoch": 0.20346179675483633, "grad_norm": 2.459342672489567, "learning_rate": 9.232553932064727e-06, "loss": 0.5899, "step": 1174 }, { "epoch": 0.2036351032256667, "grad_norm": 2.454265308374866, "learning_rate": 9.231058898990935e-06, "loss": 0.6215, "step": 1175 }, { "epoch": 0.20380840969649705, "grad_norm": 2.726492963159825, "learning_rate": 9.229562532415996e-06, "loss": 0.5335, "step": 1176 }, { "epoch": 0.2039817161673274, "grad_norm": 2.831908141106537, "learning_rate": 9.228064832811524e-06, "loss": 0.5867, "step": 1177 }, { "epoch": 0.20415502263815774, "grad_norm": 4.918673510991917, "learning_rate": 9.22656580064954e-06, "loss": 0.6381, "step": 1178 }, { "epoch": 0.2043283291089881, "grad_norm": 3.565033936411207, "learning_rate": 9.225065436402498e-06, "loss": 0.6075, "step": 1179 }, { "epoch": 0.20450163557981846, "grad_norm": 2.8085892366958687, "learning_rate": 9.223563740543269e-06, "loss": 0.6607, "step": 1180 }, { "epoch": 0.20467494205064882, "grad_norm": 2.3747585569679783, "learning_rate": 9.222060713545137e-06, "loss": 0.5955, "step": 1181 }, { "epoch": 0.20484824852147918, "grad_norm": 2.279354645107684, "learning_rate": 9.220556355881817e-06, "loss": 0.6445, "step": 1182 }, { "epoch": 0.20502155499230953, "grad_norm": 2.1248197009758796, "learning_rate": 9.219050668027431e-06, "loss": 0.5202, "step": 1183 }, { "epoch": 0.2051948614631399, "grad_norm": 2.7670051045515125, "learning_rate": 9.217543650456528e-06, "loss": 0.5728, "step": 1184 }, { "epoch": 0.20536816793397022, "grad_norm": 2.7467307664691196, "learning_rate": 9.216035303644073e-06, "loss": 0.5587, "step": 1185 }, { "epoch": 0.20554147440480058, "grad_norm": 2.821541497797925, "learning_rate": 9.214525628065453e-06, "loss": 0.6021, "step": 1186 }, { "epoch": 0.20571478087563094, "grad_norm": 2.835063025660319, "learning_rate": 9.213014624196473e-06, "loss": 0.5466, "step": 1187 }, { "epoch": 0.2058880873464613, "grad_norm": 2.46511402283256, "learning_rate": 9.21150229251335e-06, "loss": 0.5079, "step": 1188 }, { "epoch": 0.20606139381729166, "grad_norm": 2.0818772210699414, "learning_rate": 9.209988633492733e-06, "loss": 0.5529, "step": 1189 }, { "epoch": 0.20623470028812202, "grad_norm": 2.766975787003726, "learning_rate": 9.208473647611674e-06, "loss": 0.6608, "step": 1190 }, { "epoch": 0.20640800675895235, "grad_norm": 2.5266825613969157, "learning_rate": 9.206957335347653e-06, "loss": 0.579, "step": 1191 }, { "epoch": 0.2065813132297827, "grad_norm": 2.901297688720138, "learning_rate": 9.205439697178567e-06, "loss": 0.4755, "step": 1192 }, { "epoch": 0.20675461970061307, "grad_norm": 5.347543030686582, "learning_rate": 9.203920733582726e-06, "loss": 0.5603, "step": 1193 }, { "epoch": 0.20692792617144343, "grad_norm": 2.391938859106359, "learning_rate": 9.202400445038866e-06, "loss": 0.561, "step": 1194 }, { "epoch": 0.20710123264227379, "grad_norm": 3.002349744579286, "learning_rate": 9.200878832026132e-06, "loss": 0.5779, "step": 1195 }, { "epoch": 0.20727453911310414, "grad_norm": 3.7196519012686564, "learning_rate": 9.199355895024091e-06, "loss": 0.5459, "step": 1196 }, { "epoch": 0.2074478455839345, "grad_norm": 2.3377513057474184, "learning_rate": 9.197831634512728e-06, "loss": 0.5784, "step": 1197 }, { "epoch": 0.20762115205476483, "grad_norm": 3.492182120790033, "learning_rate": 9.19630605097244e-06, "loss": 0.577, "step": 1198 }, { "epoch": 0.2077944585255952, "grad_norm": 2.3875386596339996, "learning_rate": 9.19477914488405e-06, "loss": 0.5449, "step": 1199 }, { "epoch": 0.20796776499642555, "grad_norm": 2.067538455102945, "learning_rate": 9.193250916728789e-06, "loss": 0.5149, "step": 1200 }, { "epoch": 0.2081410714672559, "grad_norm": 7.5169666872060095, "learning_rate": 9.191721366988306e-06, "loss": 0.5301, "step": 1201 }, { "epoch": 0.20831437793808627, "grad_norm": 3.152371730179997, "learning_rate": 9.190190496144673e-06, "loss": 0.5297, "step": 1202 }, { "epoch": 0.20848768440891663, "grad_norm": 3.3945485019339166, "learning_rate": 9.188658304680371e-06, "loss": 0.5124, "step": 1203 }, { "epoch": 0.20866099087974696, "grad_norm": 2.890499650750058, "learning_rate": 9.187124793078302e-06, "loss": 0.5524, "step": 1204 }, { "epoch": 0.20883429735057732, "grad_norm": 2.2715787482116165, "learning_rate": 9.18558996182178e-06, "loss": 0.4733, "step": 1205 }, { "epoch": 0.20900760382140768, "grad_norm": 2.0964546897288874, "learning_rate": 9.18405381139454e-06, "loss": 0.6397, "step": 1206 }, { "epoch": 0.20918091029223804, "grad_norm": 2.4222879917864644, "learning_rate": 9.182516342280728e-06, "loss": 0.6196, "step": 1207 }, { "epoch": 0.2093542167630684, "grad_norm": 2.0080406916779765, "learning_rate": 9.180977554964906e-06, "loss": 0.6162, "step": 1208 }, { "epoch": 0.20952752323389875, "grad_norm": 2.389516324214661, "learning_rate": 9.179437449932057e-06, "loss": 0.5201, "step": 1209 }, { "epoch": 0.2097008297047291, "grad_norm": 2.4474307612618857, "learning_rate": 9.177896027667572e-06, "loss": 0.6135, "step": 1210 }, { "epoch": 0.20987413617555944, "grad_norm": 2.7253020673704844, "learning_rate": 9.176353288657262e-06, "loss": 0.6022, "step": 1211 }, { "epoch": 0.2100474426463898, "grad_norm": 2.391318716019637, "learning_rate": 9.17480923338735e-06, "loss": 0.5468, "step": 1212 }, { "epoch": 0.21022074911722016, "grad_norm": 2.471423833138452, "learning_rate": 9.173263862344478e-06, "loss": 0.5916, "step": 1213 }, { "epoch": 0.21039405558805052, "grad_norm": 2.603520293621258, "learning_rate": 9.171717176015698e-06, "loss": 0.6287, "step": 1214 }, { "epoch": 0.21056736205888088, "grad_norm": 2.929942060832295, "learning_rate": 9.170169174888478e-06, "loss": 0.5653, "step": 1215 }, { "epoch": 0.21074066852971124, "grad_norm": 2.449623515888312, "learning_rate": 9.168619859450702e-06, "loss": 0.628, "step": 1216 }, { "epoch": 0.21091397500054157, "grad_norm": 2.722228985415911, "learning_rate": 9.167069230190669e-06, "loss": 0.6284, "step": 1217 }, { "epoch": 0.21108728147137193, "grad_norm": 2.916569412130412, "learning_rate": 9.165517287597085e-06, "loss": 0.5673, "step": 1218 }, { "epoch": 0.2112605879422023, "grad_norm": 2.742221797803541, "learning_rate": 9.163964032159079e-06, "loss": 0.5747, "step": 1219 }, { "epoch": 0.21143389441303265, "grad_norm": 2.0875459059075943, "learning_rate": 9.16240946436619e-06, "loss": 0.5526, "step": 1220 }, { "epoch": 0.211607200883863, "grad_norm": 2.1548785697662387, "learning_rate": 9.160853584708369e-06, "loss": 0.5148, "step": 1221 }, { "epoch": 0.21178050735469336, "grad_norm": 2.6060133732750455, "learning_rate": 9.159296393675982e-06, "loss": 0.5313, "step": 1222 }, { "epoch": 0.21195381382552372, "grad_norm": 4.145713933239617, "learning_rate": 9.157737891759809e-06, "loss": 0.6032, "step": 1223 }, { "epoch": 0.21212712029635405, "grad_norm": 2.1056327646526363, "learning_rate": 9.156178079451044e-06, "loss": 0.4944, "step": 1224 }, { "epoch": 0.2123004267671844, "grad_norm": 2.282745808476869, "learning_rate": 9.154616957241287e-06, "loss": 0.59, "step": 1225 }, { "epoch": 0.21247373323801477, "grad_norm": 2.16809129697432, "learning_rate": 9.153054525622563e-06, "loss": 0.5972, "step": 1226 }, { "epoch": 0.21264703970884513, "grad_norm": 2.619910762397157, "learning_rate": 9.1514907850873e-06, "loss": 0.6341, "step": 1227 }, { "epoch": 0.2128203461796755, "grad_norm": 2.0585276771228656, "learning_rate": 9.14992573612834e-06, "loss": 0.5682, "step": 1228 }, { "epoch": 0.21299365265050585, "grad_norm": 2.283634515751437, "learning_rate": 9.148359379238941e-06, "loss": 0.522, "step": 1229 }, { "epoch": 0.21316695912133618, "grad_norm": 2.353740981211803, "learning_rate": 9.14679171491277e-06, "loss": 0.6094, "step": 1230 }, { "epoch": 0.21334026559216654, "grad_norm": 3.4639386085205848, "learning_rate": 9.14522274364391e-06, "loss": 0.5763, "step": 1231 }, { "epoch": 0.2135135720629969, "grad_norm": 2.1868098727574186, "learning_rate": 9.143652465926846e-06, "loss": 0.5788, "step": 1232 }, { "epoch": 0.21368687853382726, "grad_norm": 2.495139063271024, "learning_rate": 9.14208088225649e-06, "loss": 0.6134, "step": 1233 }, { "epoch": 0.21386018500465762, "grad_norm": 3.2482045289474066, "learning_rate": 9.140507993128152e-06, "loss": 0.5871, "step": 1234 }, { "epoch": 0.21403349147548797, "grad_norm": 2.481957585053895, "learning_rate": 9.13893379903756e-06, "loss": 0.4957, "step": 1235 }, { "epoch": 0.21420679794631833, "grad_norm": 2.2788005963567413, "learning_rate": 9.137358300480854e-06, "loss": 0.5904, "step": 1236 }, { "epoch": 0.21438010441714866, "grad_norm": 2.2874734681723203, "learning_rate": 9.13578149795458e-06, "loss": 0.5496, "step": 1237 }, { "epoch": 0.21455341088797902, "grad_norm": 6.399082032013257, "learning_rate": 9.134203391955702e-06, "loss": 0.5819, "step": 1238 }, { "epoch": 0.21472671735880938, "grad_norm": 2.937351991540915, "learning_rate": 9.13262398298159e-06, "loss": 0.5245, "step": 1239 }, { "epoch": 0.21490002382963974, "grad_norm": 2.561562742463716, "learning_rate": 9.131043271530024e-06, "loss": 0.5828, "step": 1240 }, { "epoch": 0.2150733303004701, "grad_norm": 1.9845628446519228, "learning_rate": 9.129461258099195e-06, "loss": 0.6519, "step": 1241 }, { "epoch": 0.21524663677130046, "grad_norm": 2.791762837228922, "learning_rate": 9.12787794318771e-06, "loss": 0.6645, "step": 1242 }, { "epoch": 0.2154199432421308, "grad_norm": 2.2033902087713213, "learning_rate": 9.12629332729458e-06, "loss": 0.5717, "step": 1243 }, { "epoch": 0.21559324971296115, "grad_norm": 2.3847381701937223, "learning_rate": 9.124707410919225e-06, "loss": 0.5441, "step": 1244 }, { "epoch": 0.2157665561837915, "grad_norm": 4.239354647356285, "learning_rate": 9.123120194561481e-06, "loss": 0.5963, "step": 1245 }, { "epoch": 0.21593986265462187, "grad_norm": 2.411866209482763, "learning_rate": 9.121531678721589e-06, "loss": 0.586, "step": 1246 }, { "epoch": 0.21611316912545223, "grad_norm": 2.55639935248663, "learning_rate": 9.1199418639002e-06, "loss": 0.6726, "step": 1247 }, { "epoch": 0.21628647559628258, "grad_norm": 2.2864902896897474, "learning_rate": 9.118350750598377e-06, "loss": 0.5145, "step": 1248 }, { "epoch": 0.21645978206711294, "grad_norm": 2.261431889141433, "learning_rate": 9.11675833931759e-06, "loss": 0.6738, "step": 1249 }, { "epoch": 0.21663308853794327, "grad_norm": 3.3397163893934594, "learning_rate": 9.115164630559719e-06, "loss": 0.6479, "step": 1250 }, { "epoch": 0.21680639500877363, "grad_norm": 2.140558599917646, "learning_rate": 9.11356962482705e-06, "loss": 0.5785, "step": 1251 }, { "epoch": 0.216979701479604, "grad_norm": 2.518168865636175, "learning_rate": 9.111973322622284e-06, "loss": 0.5462, "step": 1252 }, { "epoch": 0.21715300795043435, "grad_norm": 2.443604152935593, "learning_rate": 9.110375724448526e-06, "loss": 0.5722, "step": 1253 }, { "epoch": 0.2173263144212647, "grad_norm": 2.5947654717302737, "learning_rate": 9.108776830809289e-06, "loss": 0.6275, "step": 1254 }, { "epoch": 0.21749962089209507, "grad_norm": 2.3673850273036936, "learning_rate": 9.107176642208496e-06, "loss": 0.5706, "step": 1255 }, { "epoch": 0.2176729273629254, "grad_norm": 2.120331073601205, "learning_rate": 9.105575159150477e-06, "loss": 0.6126, "step": 1256 }, { "epoch": 0.21784623383375576, "grad_norm": 3.1857847261940297, "learning_rate": 9.103972382139974e-06, "loss": 0.6299, "step": 1257 }, { "epoch": 0.21801954030458612, "grad_norm": 2.540862346512072, "learning_rate": 9.102368311682131e-06, "loss": 0.6024, "step": 1258 }, { "epoch": 0.21819284677541648, "grad_norm": 2.714033204857926, "learning_rate": 9.1007629482825e-06, "loss": 0.5869, "step": 1259 }, { "epoch": 0.21836615324624684, "grad_norm": 2.1450054764963, "learning_rate": 9.099156292447048e-06, "loss": 0.5878, "step": 1260 }, { "epoch": 0.2185394597170772, "grad_norm": 2.2808558045271976, "learning_rate": 9.097548344682143e-06, "loss": 0.5533, "step": 1261 }, { "epoch": 0.21871276618790755, "grad_norm": 2.6884523997806986, "learning_rate": 9.095939105494557e-06, "loss": 0.5474, "step": 1262 }, { "epoch": 0.21888607265873788, "grad_norm": 2.3092091508198442, "learning_rate": 9.094328575391479e-06, "loss": 0.5429, "step": 1263 }, { "epoch": 0.21905937912956824, "grad_norm": 2.1938769758816288, "learning_rate": 9.092716754880493e-06, "loss": 0.5669, "step": 1264 }, { "epoch": 0.2192326856003986, "grad_norm": 2.0086460597346725, "learning_rate": 9.0911036444696e-06, "loss": 0.6132, "step": 1265 }, { "epoch": 0.21940599207122896, "grad_norm": 2.430545954756806, "learning_rate": 9.089489244667205e-06, "loss": 0.5709, "step": 1266 }, { "epoch": 0.21957929854205932, "grad_norm": 2.3067888173787674, "learning_rate": 9.087873555982112e-06, "loss": 0.6079, "step": 1267 }, { "epoch": 0.21975260501288968, "grad_norm": 2.3576071760985653, "learning_rate": 9.086256578923542e-06, "loss": 0.622, "step": 1268 }, { "epoch": 0.21992591148372, "grad_norm": 2.5201288453264907, "learning_rate": 9.084638314001114e-06, "loss": 0.6525, "step": 1269 }, { "epoch": 0.22009921795455037, "grad_norm": 2.174700592386522, "learning_rate": 9.083018761724858e-06, "loss": 0.5983, "step": 1270 }, { "epoch": 0.22027252442538073, "grad_norm": 2.7249642557911917, "learning_rate": 9.081397922605206e-06, "loss": 0.5718, "step": 1271 }, { "epoch": 0.2204458308962111, "grad_norm": 2.2187554446567272, "learning_rate": 9.079775797152999e-06, "loss": 0.5915, "step": 1272 }, { "epoch": 0.22061913736704145, "grad_norm": 2.8467845041873807, "learning_rate": 9.07815238587948e-06, "loss": 0.5727, "step": 1273 }, { "epoch": 0.2207924438378718, "grad_norm": 2.728673339135622, "learning_rate": 9.076527689296299e-06, "loss": 0.6169, "step": 1274 }, { "epoch": 0.22096575030870216, "grad_norm": 3.0146401215361576, "learning_rate": 9.074901707915512e-06, "loss": 0.5899, "step": 1275 }, { "epoch": 0.2211390567795325, "grad_norm": 2.561082235386362, "learning_rate": 9.073274442249577e-06, "loss": 0.4592, "step": 1276 }, { "epoch": 0.22131236325036285, "grad_norm": 2.3269668911188153, "learning_rate": 9.07164589281136e-06, "loss": 0.6291, "step": 1277 }, { "epoch": 0.2214856697211932, "grad_norm": 2.4853054967131114, "learning_rate": 9.070016060114132e-06, "loss": 0.6689, "step": 1278 }, { "epoch": 0.22165897619202357, "grad_norm": 2.22483578463711, "learning_rate": 9.068384944671564e-06, "loss": 0.4875, "step": 1279 }, { "epoch": 0.22183228266285393, "grad_norm": 2.1067502284752933, "learning_rate": 9.066752546997734e-06, "loss": 0.4922, "step": 1280 }, { "epoch": 0.2220055891336843, "grad_norm": 2.5415827209923645, "learning_rate": 9.065118867607127e-06, "loss": 0.5682, "step": 1281 }, { "epoch": 0.22217889560451462, "grad_norm": 2.2536231503451822, "learning_rate": 9.063483907014626e-06, "loss": 0.596, "step": 1282 }, { "epoch": 0.22235220207534498, "grad_norm": 2.1194160546520355, "learning_rate": 9.061847665735523e-06, "loss": 0.6163, "step": 1283 }, { "epoch": 0.22252550854617534, "grad_norm": 2.5834147630223048, "learning_rate": 9.060210144285509e-06, "loss": 0.5851, "step": 1284 }, { "epoch": 0.2226988150170057, "grad_norm": 2.133000492485801, "learning_rate": 9.058571343180685e-06, "loss": 0.5956, "step": 1285 }, { "epoch": 0.22287212148783606, "grad_norm": 2.9009735638033276, "learning_rate": 9.056931262937547e-06, "loss": 0.6004, "step": 1286 }, { "epoch": 0.22304542795866641, "grad_norm": 2.5019184531061547, "learning_rate": 9.055289904073e-06, "loss": 0.5743, "step": 1287 }, { "epoch": 0.22321873442949677, "grad_norm": 3.3276304883672636, "learning_rate": 9.053647267104352e-06, "loss": 0.5691, "step": 1288 }, { "epoch": 0.2233920409003271, "grad_norm": 2.373304319080075, "learning_rate": 9.05200335254931e-06, "loss": 0.6423, "step": 1289 }, { "epoch": 0.22356534737115746, "grad_norm": 3.7426713215450587, "learning_rate": 9.050358160925988e-06, "loss": 0.5404, "step": 1290 }, { "epoch": 0.22373865384198782, "grad_norm": 5.6110371256709675, "learning_rate": 9.048711692752898e-06, "loss": 0.5924, "step": 1291 }, { "epoch": 0.22391196031281818, "grad_norm": 2.8639010649137226, "learning_rate": 9.047063948548959e-06, "loss": 0.5729, "step": 1292 }, { "epoch": 0.22408526678364854, "grad_norm": 11.502644104744695, "learning_rate": 9.045414928833487e-06, "loss": 0.5909, "step": 1293 }, { "epoch": 0.2242585732544789, "grad_norm": 13.936169810918008, "learning_rate": 9.043764634126206e-06, "loss": 0.4783, "step": 1294 }, { "epoch": 0.22443187972530923, "grad_norm": 3.8238472368130094, "learning_rate": 9.042113064947237e-06, "loss": 0.5705, "step": 1295 }, { "epoch": 0.2246051861961396, "grad_norm": 2.2279274319414357, "learning_rate": 9.040460221817107e-06, "loss": 0.6032, "step": 1296 }, { "epoch": 0.22477849266696995, "grad_norm": 2.374419885157649, "learning_rate": 9.038806105256736e-06, "loss": 0.5498, "step": 1297 }, { "epoch": 0.2249517991378003, "grad_norm": 2.7242148666923014, "learning_rate": 9.037150715787457e-06, "loss": 0.6146, "step": 1298 }, { "epoch": 0.22512510560863067, "grad_norm": 2.679707140639907, "learning_rate": 9.035494053930998e-06, "loss": 0.7173, "step": 1299 }, { "epoch": 0.22529841207946102, "grad_norm": 2.8470659275680767, "learning_rate": 9.033836120209484e-06, "loss": 0.6249, "step": 1300 }, { "epoch": 0.22547171855029138, "grad_norm": 2.669067922986569, "learning_rate": 9.032176915145449e-06, "loss": 0.5966, "step": 1301 }, { "epoch": 0.22564502502112171, "grad_norm": 2.435249334849337, "learning_rate": 9.030516439261825e-06, "loss": 0.5323, "step": 1302 }, { "epoch": 0.22581833149195207, "grad_norm": 2.285981589835866, "learning_rate": 9.028854693081941e-06, "loss": 0.5967, "step": 1303 }, { "epoch": 0.22599163796278243, "grad_norm": 2.447497603000011, "learning_rate": 9.027191677129528e-06, "loss": 0.5293, "step": 1304 }, { "epoch": 0.2261649444336128, "grad_norm": 2.7613115625253815, "learning_rate": 9.025527391928722e-06, "loss": 0.5974, "step": 1305 }, { "epoch": 0.22633825090444315, "grad_norm": 2.997678247049225, "learning_rate": 9.02386183800405e-06, "loss": 0.5554, "step": 1306 }, { "epoch": 0.2265115573752735, "grad_norm": 2.4174769757279684, "learning_rate": 9.02219501588045e-06, "loss": 0.5782, "step": 1307 }, { "epoch": 0.22668486384610384, "grad_norm": 2.1147346210469196, "learning_rate": 9.020526926083248e-06, "loss": 0.5398, "step": 1308 }, { "epoch": 0.2268581703169342, "grad_norm": 2.267737380567767, "learning_rate": 9.018857569138178e-06, "loss": 0.5367, "step": 1309 }, { "epoch": 0.22703147678776456, "grad_norm": 3.072471539270217, "learning_rate": 9.01718694557137e-06, "loss": 0.6177, "step": 1310 }, { "epoch": 0.22720478325859492, "grad_norm": 2.7952313107189664, "learning_rate": 9.015515055909356e-06, "loss": 0.6369, "step": 1311 }, { "epoch": 0.22737808972942528, "grad_norm": 2.3142480036889705, "learning_rate": 9.013841900679061e-06, "loss": 0.5398, "step": 1312 }, { "epoch": 0.22755139620025563, "grad_norm": 2.3085825448935826, "learning_rate": 9.012167480407816e-06, "loss": 0.6102, "step": 1313 }, { "epoch": 0.227724702671086, "grad_norm": 2.5117941236938055, "learning_rate": 9.010491795623345e-06, "loss": 0.5822, "step": 1314 }, { "epoch": 0.22789800914191632, "grad_norm": 2.2975125767867026, "learning_rate": 9.008814846853772e-06, "loss": 0.6119, "step": 1315 }, { "epoch": 0.22807131561274668, "grad_norm": 3.7614512193922054, "learning_rate": 9.007136634627622e-06, "loss": 0.5658, "step": 1316 }, { "epoch": 0.22824462208357704, "grad_norm": 2.6924393251987433, "learning_rate": 9.005457159473818e-06, "loss": 0.663, "step": 1317 }, { "epoch": 0.2284179285544074, "grad_norm": 2.350381164266999, "learning_rate": 9.003776421921678e-06, "loss": 0.591, "step": 1318 }, { "epoch": 0.22859123502523776, "grad_norm": 2.0627242360094042, "learning_rate": 9.002094422500918e-06, "loss": 0.5125, "step": 1319 }, { "epoch": 0.22876454149606812, "grad_norm": 2.2752208479603215, "learning_rate": 9.000411161741656e-06, "loss": 0.5934, "step": 1320 }, { "epoch": 0.22893784796689848, "grad_norm": 2.217487649886462, "learning_rate": 8.998726640174402e-06, "loss": 0.6294, "step": 1321 }, { "epoch": 0.2291111544377288, "grad_norm": 3.347514262889605, "learning_rate": 8.997040858330068e-06, "loss": 0.5257, "step": 1322 }, { "epoch": 0.22928446090855917, "grad_norm": 2.896338661095291, "learning_rate": 8.99535381673996e-06, "loss": 0.5397, "step": 1323 }, { "epoch": 0.22945776737938953, "grad_norm": 2.4642195113697563, "learning_rate": 8.99366551593578e-06, "loss": 0.6087, "step": 1324 }, { "epoch": 0.22963107385021989, "grad_norm": 3.970465269041143, "learning_rate": 8.991975956449636e-06, "loss": 0.486, "step": 1325 }, { "epoch": 0.22980438032105024, "grad_norm": 2.543704021573576, "learning_rate": 8.99028513881402e-06, "loss": 0.5577, "step": 1326 }, { "epoch": 0.2299776867918806, "grad_norm": 2.462825247219911, "learning_rate": 8.988593063561827e-06, "loss": 0.6001, "step": 1327 }, { "epoch": 0.23015099326271093, "grad_norm": 3.7735415107417714, "learning_rate": 8.986899731226346e-06, "loss": 0.5718, "step": 1328 }, { "epoch": 0.2303242997335413, "grad_norm": 2.6225075851512845, "learning_rate": 8.98520514234127e-06, "loss": 0.6167, "step": 1329 }, { "epoch": 0.23049760620437165, "grad_norm": 2.414962440181118, "learning_rate": 8.983509297440674e-06, "loss": 0.5333, "step": 1330 }, { "epoch": 0.230670912675202, "grad_norm": 2.981535556716405, "learning_rate": 8.981812197059043e-06, "loss": 0.5551, "step": 1331 }, { "epoch": 0.23084421914603237, "grad_norm": 2.8640173725113742, "learning_rate": 8.98011384173125e-06, "loss": 0.6799, "step": 1332 }, { "epoch": 0.23101752561686273, "grad_norm": 2.6243936228439226, "learning_rate": 8.978414231992565e-06, "loss": 0.5912, "step": 1333 }, { "epoch": 0.2311908320876931, "grad_norm": 2.0717990989001835, "learning_rate": 8.976713368378651e-06, "loss": 0.6187, "step": 1334 }, { "epoch": 0.23136413855852342, "grad_norm": 3.1291135674462, "learning_rate": 8.97501125142557e-06, "loss": 0.5804, "step": 1335 }, { "epoch": 0.23153744502935378, "grad_norm": 2.050618739626035, "learning_rate": 8.97330788166978e-06, "loss": 0.5863, "step": 1336 }, { "epoch": 0.23171075150018414, "grad_norm": 1.878598433871113, "learning_rate": 8.971603259648127e-06, "loss": 0.3697, "step": 1337 }, { "epoch": 0.2318840579710145, "grad_norm": 2.4249796288220815, "learning_rate": 8.969897385897857e-06, "loss": 0.557, "step": 1338 }, { "epoch": 0.23205736444184485, "grad_norm": 2.1136263542859375, "learning_rate": 8.968190260956613e-06, "loss": 0.5395, "step": 1339 }, { "epoch": 0.2322306709126752, "grad_norm": 2.2719617042481297, "learning_rate": 8.966481885362426e-06, "loss": 0.5963, "step": 1340 }, { "epoch": 0.23240397738350554, "grad_norm": 2.406293615379191, "learning_rate": 8.964772259653723e-06, "loss": 0.632, "step": 1341 }, { "epoch": 0.2325772838543359, "grad_norm": 2.428890666820043, "learning_rate": 8.963061384369329e-06, "loss": 0.5006, "step": 1342 }, { "epoch": 0.23275059032516626, "grad_norm": 2.3588037807946654, "learning_rate": 8.961349260048457e-06, "loss": 0.5328, "step": 1343 }, { "epoch": 0.23292389679599662, "grad_norm": 2.576275223041513, "learning_rate": 8.959635887230719e-06, "loss": 0.5827, "step": 1344 }, { "epoch": 0.23309720326682698, "grad_norm": 2.3691523777538728, "learning_rate": 8.957921266456118e-06, "loss": 0.6088, "step": 1345 }, { "epoch": 0.23327050973765734, "grad_norm": 2.2739913087380788, "learning_rate": 8.95620539826505e-06, "loss": 0.5721, "step": 1346 }, { "epoch": 0.2334438162084877, "grad_norm": 2.5267704657232186, "learning_rate": 8.954488283198301e-06, "loss": 0.5034, "step": 1347 }, { "epoch": 0.23361712267931803, "grad_norm": 2.1732121507983, "learning_rate": 8.95276992179706e-06, "loss": 0.5729, "step": 1348 }, { "epoch": 0.2337904291501484, "grad_norm": 2.0169526763331707, "learning_rate": 8.951050314602897e-06, "loss": 0.5293, "step": 1349 }, { "epoch": 0.23396373562097875, "grad_norm": 2.021794427118876, "learning_rate": 8.949329462157778e-06, "loss": 0.5841, "step": 1350 }, { "epoch": 0.2341370420918091, "grad_norm": 1.993293080883347, "learning_rate": 8.947607365004073e-06, "loss": 0.496, "step": 1351 }, { "epoch": 0.23431034856263946, "grad_norm": 2.894842236898156, "learning_rate": 8.945884023684523e-06, "loss": 0.5188, "step": 1352 }, { "epoch": 0.23448365503346982, "grad_norm": 2.3812186836478526, "learning_rate": 8.944159438742282e-06, "loss": 0.5804, "step": 1353 }, { "epoch": 0.23465696150430015, "grad_norm": 2.0392109100330296, "learning_rate": 8.942433610720882e-06, "loss": 0.6185, "step": 1354 }, { "epoch": 0.2348302679751305, "grad_norm": 4.963601294116199, "learning_rate": 8.940706540164253e-06, "loss": 0.4729, "step": 1355 }, { "epoch": 0.23500357444596087, "grad_norm": 2.269252261311487, "learning_rate": 8.938978227616715e-06, "loss": 0.6106, "step": 1356 }, { "epoch": 0.23517688091679123, "grad_norm": 4.618635090695748, "learning_rate": 8.93724867362298e-06, "loss": 0.5954, "step": 1357 }, { "epoch": 0.2353501873876216, "grad_norm": 2.236321163609922, "learning_rate": 8.935517878728149e-06, "loss": 0.6034, "step": 1358 }, { "epoch": 0.23552349385845195, "grad_norm": 2.125362179036975, "learning_rate": 8.933785843477718e-06, "loss": 0.5246, "step": 1359 }, { "epoch": 0.2356968003292823, "grad_norm": 2.4984996318311565, "learning_rate": 8.93205256841757e-06, "loss": 0.5827, "step": 1360 }, { "epoch": 0.23587010680011264, "grad_norm": 1.9565784205282608, "learning_rate": 8.930318054093983e-06, "loss": 0.5509, "step": 1361 }, { "epoch": 0.236043413270943, "grad_norm": 2.7023623894689544, "learning_rate": 8.928582301053621e-06, "loss": 0.4924, "step": 1362 }, { "epoch": 0.23621671974177336, "grad_norm": 2.9809598201257876, "learning_rate": 8.926845309843541e-06, "loss": 0.566, "step": 1363 }, { "epoch": 0.23639002621260372, "grad_norm": 2.366147294924167, "learning_rate": 8.925107081011191e-06, "loss": 0.5815, "step": 1364 }, { "epoch": 0.23656333268343407, "grad_norm": 2.6421341094985427, "learning_rate": 8.923367615104408e-06, "loss": 0.6256, "step": 1365 }, { "epoch": 0.23673663915426443, "grad_norm": 2.767576012336791, "learning_rate": 8.921626912671416e-06, "loss": 0.5455, "step": 1366 }, { "epoch": 0.23690994562509476, "grad_norm": 2.156500862777214, "learning_rate": 8.919884974260835e-06, "loss": 0.5037, "step": 1367 }, { "epoch": 0.23708325209592512, "grad_norm": 2.241854194354291, "learning_rate": 8.918141800421668e-06, "loss": 0.6618, "step": 1368 }, { "epoch": 0.23725655856675548, "grad_norm": 10.542179723476423, "learning_rate": 8.916397391703314e-06, "loss": 0.568, "step": 1369 }, { "epoch": 0.23742986503758584, "grad_norm": 2.108679797888856, "learning_rate": 8.914651748655555e-06, "loss": 0.5281, "step": 1370 }, { "epoch": 0.2376031715084162, "grad_norm": 3.35034957316866, "learning_rate": 8.912904871828563e-06, "loss": 0.5047, "step": 1371 }, { "epoch": 0.23777647797924656, "grad_norm": 1.9840072380538394, "learning_rate": 8.911156761772903e-06, "loss": 0.5989, "step": 1372 }, { "epoch": 0.23794978445007692, "grad_norm": 3.4867375071629123, "learning_rate": 8.909407419039526e-06, "loss": 0.6302, "step": 1373 }, { "epoch": 0.23812309092090725, "grad_norm": 2.6120963863291067, "learning_rate": 8.907656844179773e-06, "loss": 0.6344, "step": 1374 }, { "epoch": 0.2382963973917376, "grad_norm": 5.254272497659165, "learning_rate": 8.90590503774537e-06, "loss": 0.5178, "step": 1375 }, { "epoch": 0.23846970386256797, "grad_norm": 2.128717598652568, "learning_rate": 8.904152000288432e-06, "loss": 0.5265, "step": 1376 }, { "epoch": 0.23864301033339833, "grad_norm": 2.358738755617451, "learning_rate": 8.902397732361466e-06, "loss": 0.6077, "step": 1377 }, { "epoch": 0.23881631680422868, "grad_norm": 2.3364096898721076, "learning_rate": 8.90064223451736e-06, "loss": 0.5634, "step": 1378 }, { "epoch": 0.23898962327505904, "grad_norm": 2.1398568491613683, "learning_rate": 8.898885507309399e-06, "loss": 0.5785, "step": 1379 }, { "epoch": 0.23916292974588937, "grad_norm": 3.916942132563252, "learning_rate": 8.897127551291245e-06, "loss": 0.6024, "step": 1380 }, { "epoch": 0.23933623621671973, "grad_norm": 2.84088726486165, "learning_rate": 8.895368367016957e-06, "loss": 0.6102, "step": 1381 }, { "epoch": 0.2395095426875501, "grad_norm": 3.3148185678231012, "learning_rate": 8.893607955040971e-06, "loss": 0.5544, "step": 1382 }, { "epoch": 0.23968284915838045, "grad_norm": 4.220523631486196, "learning_rate": 8.891846315918118e-06, "loss": 0.5798, "step": 1383 }, { "epoch": 0.2398561556292108, "grad_norm": 2.631913633680136, "learning_rate": 8.890083450203615e-06, "loss": 0.5097, "step": 1384 }, { "epoch": 0.24002946210004117, "grad_norm": 7.322074758587608, "learning_rate": 8.888319358453062e-06, "loss": 0.5842, "step": 1385 }, { "epoch": 0.24020276857087153, "grad_norm": 2.195981840504901, "learning_rate": 8.886554041222445e-06, "loss": 0.6345, "step": 1386 }, { "epoch": 0.24037607504170186, "grad_norm": 2.2757432897455545, "learning_rate": 8.884787499068144e-06, "loss": 0.4345, "step": 1387 }, { "epoch": 0.24054938151253222, "grad_norm": 3.382055651009553, "learning_rate": 8.883019732546912e-06, "loss": 0.6224, "step": 1388 }, { "epoch": 0.24072268798336258, "grad_norm": 2.033357440289399, "learning_rate": 8.8812507422159e-06, "loss": 0.557, "step": 1389 }, { "epoch": 0.24089599445419294, "grad_norm": 2.35955041754592, "learning_rate": 8.87948052863264e-06, "loss": 0.5583, "step": 1390 }, { "epoch": 0.2410693009250233, "grad_norm": 2.415819553201892, "learning_rate": 8.877709092355048e-06, "loss": 0.5084, "step": 1391 }, { "epoch": 0.24124260739585365, "grad_norm": 3.010126319990259, "learning_rate": 8.875936433941427e-06, "loss": 0.6397, "step": 1392 }, { "epoch": 0.24141591386668398, "grad_norm": 11.830631151267404, "learning_rate": 8.874162553950466e-06, "loss": 0.4849, "step": 1393 }, { "epoch": 0.24158922033751434, "grad_norm": 2.527968447369435, "learning_rate": 8.872387452941237e-06, "loss": 0.6179, "step": 1394 }, { "epoch": 0.2417625268083447, "grad_norm": 2.2295044569919966, "learning_rate": 8.870611131473198e-06, "loss": 0.592, "step": 1395 }, { "epoch": 0.24193583327917506, "grad_norm": 2.694509616069952, "learning_rate": 8.868833590106191e-06, "loss": 0.6368, "step": 1396 }, { "epoch": 0.24210913975000542, "grad_norm": 4.1848094331333, "learning_rate": 8.867054829400445e-06, "loss": 0.604, "step": 1397 }, { "epoch": 0.24228244622083578, "grad_norm": 13.305353917300623, "learning_rate": 8.865274849916571e-06, "loss": 0.6856, "step": 1398 }, { "epoch": 0.24245575269166614, "grad_norm": 2.0154974406144324, "learning_rate": 8.863493652215562e-06, "loss": 0.5509, "step": 1399 }, { "epoch": 0.24262905916249647, "grad_norm": 2.9468901332099047, "learning_rate": 8.861711236858802e-06, "loss": 0.5845, "step": 1400 }, { "epoch": 0.24280236563332683, "grad_norm": 2.3171061688568724, "learning_rate": 8.859927604408047e-06, "loss": 0.5443, "step": 1401 }, { "epoch": 0.2429756721041572, "grad_norm": 2.4498933118266204, "learning_rate": 8.85814275542545e-06, "loss": 0.6184, "step": 1402 }, { "epoch": 0.24314897857498755, "grad_norm": 2.2947074122462006, "learning_rate": 8.856356690473537e-06, "loss": 0.5195, "step": 1403 }, { "epoch": 0.2433222850458179, "grad_norm": 2.744604306257959, "learning_rate": 8.854569410115225e-06, "loss": 0.5926, "step": 1404 }, { "epoch": 0.24349559151664826, "grad_norm": 2.2895124331221064, "learning_rate": 8.85278091491381e-06, "loss": 0.5982, "step": 1405 }, { "epoch": 0.2436688979874786, "grad_norm": 2.598117489512132, "learning_rate": 8.850991205432968e-06, "loss": 0.563, "step": 1406 }, { "epoch": 0.24384220445830895, "grad_norm": 2.195754349241869, "learning_rate": 8.849200282236763e-06, "loss": 0.558, "step": 1407 }, { "epoch": 0.2440155109291393, "grad_norm": 2.5698537014606724, "learning_rate": 8.847408145889643e-06, "loss": 0.5523, "step": 1408 }, { "epoch": 0.24418881739996967, "grad_norm": 2.219123684012314, "learning_rate": 8.845614796956428e-06, "loss": 0.5016, "step": 1409 }, { "epoch": 0.24436212387080003, "grad_norm": 24.49178955135536, "learning_rate": 8.843820236002332e-06, "loss": 0.5789, "step": 1410 }, { "epoch": 0.2445354303416304, "grad_norm": 2.310189487672172, "learning_rate": 8.842024463592943e-06, "loss": 0.5583, "step": 1411 }, { "epoch": 0.24470873681246075, "grad_norm": 2.6532405355442577, "learning_rate": 8.840227480294237e-06, "loss": 0.6454, "step": 1412 }, { "epoch": 0.24488204328329108, "grad_norm": 5.1695458890542, "learning_rate": 8.838429286672568e-06, "loss": 0.549, "step": 1413 }, { "epoch": 0.24505534975412144, "grad_norm": 6.144731409921029, "learning_rate": 8.83662988329467e-06, "loss": 0.455, "step": 1414 }, { "epoch": 0.2452286562249518, "grad_norm": 2.6730451073774497, "learning_rate": 8.83482927072766e-06, "loss": 0.6068, "step": 1415 }, { "epoch": 0.24540196269578216, "grad_norm": 6.727649888830263, "learning_rate": 8.833027449539039e-06, "loss": 0.6478, "step": 1416 }, { "epoch": 0.24557526916661251, "grad_norm": 4.901426779313973, "learning_rate": 8.831224420296685e-06, "loss": 0.4965, "step": 1417 }, { "epoch": 0.24574857563744287, "grad_norm": 2.073403918861883, "learning_rate": 8.829420183568857e-06, "loss": 0.5245, "step": 1418 }, { "epoch": 0.2459218821082732, "grad_norm": 2.182952376382807, "learning_rate": 8.827614739924198e-06, "loss": 0.4939, "step": 1419 }, { "epoch": 0.24609518857910356, "grad_norm": 2.2872254907633667, "learning_rate": 8.825808089931727e-06, "loss": 0.5821, "step": 1420 }, { "epoch": 0.24626849504993392, "grad_norm": 2.6165866635981674, "learning_rate": 8.824000234160844e-06, "loss": 0.4946, "step": 1421 }, { "epoch": 0.24644180152076428, "grad_norm": 2.2917480544742848, "learning_rate": 8.822191173181333e-06, "loss": 0.5152, "step": 1422 }, { "epoch": 0.24661510799159464, "grad_norm": 2.330675776567109, "learning_rate": 8.820380907563355e-06, "loss": 0.5761, "step": 1423 }, { "epoch": 0.246788414462425, "grad_norm": 2.9210056673347915, "learning_rate": 8.818569437877447e-06, "loss": 0.5845, "step": 1424 }, { "epoch": 0.24696172093325536, "grad_norm": 2.400993521822199, "learning_rate": 8.816756764694533e-06, "loss": 0.5169, "step": 1425 }, { "epoch": 0.2471350274040857, "grad_norm": 2.257878128361454, "learning_rate": 8.814942888585911e-06, "loss": 0.5919, "step": 1426 }, { "epoch": 0.24730833387491605, "grad_norm": 2.3967525038639153, "learning_rate": 8.813127810123259e-06, "loss": 0.6784, "step": 1427 }, { "epoch": 0.2474816403457464, "grad_norm": 2.5830023831238145, "learning_rate": 8.811311529878636e-06, "loss": 0.5915, "step": 1428 }, { "epoch": 0.24765494681657677, "grad_norm": 2.6449511943300896, "learning_rate": 8.809494048424478e-06, "loss": 0.586, "step": 1429 }, { "epoch": 0.24782825328740712, "grad_norm": 2.5481180325593424, "learning_rate": 8.807675366333598e-06, "loss": 0.5722, "step": 1430 }, { "epoch": 0.24800155975823748, "grad_norm": 3.6885152504391225, "learning_rate": 8.805855484179193e-06, "loss": 0.5128, "step": 1431 }, { "epoch": 0.24817486622906781, "grad_norm": 3.898450625424928, "learning_rate": 8.804034402534831e-06, "loss": 0.5826, "step": 1432 }, { "epoch": 0.24834817269989817, "grad_norm": 5.176348524973473, "learning_rate": 8.802212121974465e-06, "loss": 0.6756, "step": 1433 }, { "epoch": 0.24852147917072853, "grad_norm": 2.802084131619625, "learning_rate": 8.800388643072418e-06, "loss": 0.5405, "step": 1434 }, { "epoch": 0.2486947856415589, "grad_norm": 2.6262313152876486, "learning_rate": 8.798563966403398e-06, "loss": 0.5837, "step": 1435 }, { "epoch": 0.24886809211238925, "grad_norm": 3.233008875509992, "learning_rate": 8.796738092542487e-06, "loss": 0.6233, "step": 1436 }, { "epoch": 0.2490413985832196, "grad_norm": 2.0496803651987916, "learning_rate": 8.794911022065147e-06, "loss": 0.5133, "step": 1437 }, { "epoch": 0.24921470505404997, "grad_norm": 2.520315640230237, "learning_rate": 8.79308275554721e-06, "loss": 0.6047, "step": 1438 }, { "epoch": 0.2493880115248803, "grad_norm": 2.714518107509997, "learning_rate": 8.791253293564897e-06, "loss": 0.5947, "step": 1439 }, { "epoch": 0.24956131799571066, "grad_norm": 2.2191857472950742, "learning_rate": 8.789422636694791e-06, "loss": 0.3772, "step": 1440 }, { "epoch": 0.24973462446654102, "grad_norm": 2.494812175700221, "learning_rate": 8.787590785513867e-06, "loss": 0.6227, "step": 1441 }, { "epoch": 0.24990793093737138, "grad_norm": 2.121164837687816, "learning_rate": 8.785757740599462e-06, "loss": 0.5571, "step": 1442 }, { "epoch": 0.2500812374082017, "grad_norm": 2.2965246506347663, "learning_rate": 8.7839235025293e-06, "loss": 0.5649, "step": 1443 }, { "epoch": 0.25025454387903207, "grad_norm": 2.7118981017824337, "learning_rate": 8.782088071881478e-06, "loss": 0.5633, "step": 1444 }, { "epoch": 0.2504278503498624, "grad_norm": 2.391897965462609, "learning_rate": 8.780251449234465e-06, "loss": 0.6256, "step": 1445 }, { "epoch": 0.2506011568206928, "grad_norm": 2.324787369548283, "learning_rate": 8.77841363516711e-06, "loss": 0.5116, "step": 1446 }, { "epoch": 0.25077446329152314, "grad_norm": 3.126818864949743, "learning_rate": 8.776574630258636e-06, "loss": 0.613, "step": 1447 }, { "epoch": 0.2509477697623535, "grad_norm": 2.356055327057248, "learning_rate": 8.774734435088642e-06, "loss": 0.5559, "step": 1448 }, { "epoch": 0.25112107623318386, "grad_norm": 2.835830263065776, "learning_rate": 8.772893050237098e-06, "loss": 0.6186, "step": 1449 }, { "epoch": 0.2512943827040142, "grad_norm": 3.535513625714495, "learning_rate": 8.77105047628436e-06, "loss": 0.5439, "step": 1450 }, { "epoch": 0.2514676891748446, "grad_norm": 3.154455484934252, "learning_rate": 8.769206713811145e-06, "loss": 0.6211, "step": 1451 }, { "epoch": 0.25164099564567494, "grad_norm": 2.5908460241786138, "learning_rate": 8.767361763398552e-06, "loss": 0.6413, "step": 1452 }, { "epoch": 0.2518143021165053, "grad_norm": 3.1370034254488868, "learning_rate": 8.765515625628054e-06, "loss": 0.5978, "step": 1453 }, { "epoch": 0.25198760858733565, "grad_norm": 2.8173843418225086, "learning_rate": 8.763668301081498e-06, "loss": 0.5363, "step": 1454 }, { "epoch": 0.25216091505816596, "grad_norm": 2.2055593594320326, "learning_rate": 8.761819790341103e-06, "loss": 0.664, "step": 1455 }, { "epoch": 0.2523342215289963, "grad_norm": 2.601859291673845, "learning_rate": 8.759970093989466e-06, "loss": 0.5797, "step": 1456 }, { "epoch": 0.2525075279998267, "grad_norm": 2.5925829225202173, "learning_rate": 8.758119212609554e-06, "loss": 0.61, "step": 1457 }, { "epoch": 0.25268083447065703, "grad_norm": 3.1359187996740854, "learning_rate": 8.756267146784707e-06, "loss": 0.5184, "step": 1458 }, { "epoch": 0.2528541409414874, "grad_norm": 2.5408009690852325, "learning_rate": 8.754413897098642e-06, "loss": 0.5465, "step": 1459 }, { "epoch": 0.25302744741231775, "grad_norm": 2.7092288512684024, "learning_rate": 8.752559464135443e-06, "loss": 0.6127, "step": 1460 }, { "epoch": 0.2532007538831481, "grad_norm": 2.3889065055626246, "learning_rate": 8.750703848479577e-06, "loss": 0.6341, "step": 1461 }, { "epoch": 0.25337406035397847, "grad_norm": 2.537227320235558, "learning_rate": 8.748847050715873e-06, "loss": 0.4821, "step": 1462 }, { "epoch": 0.25354736682480883, "grad_norm": 2.954181744234337, "learning_rate": 8.746989071429539e-06, "loss": 0.6349, "step": 1463 }, { "epoch": 0.2537206732956392, "grad_norm": 5.528206360331638, "learning_rate": 8.745129911206154e-06, "loss": 0.5029, "step": 1464 }, { "epoch": 0.25389397976646955, "grad_norm": 2.778220864164117, "learning_rate": 8.743269570631665e-06, "loss": 0.6168, "step": 1465 }, { "epoch": 0.2540672862372999, "grad_norm": 2.349293990329407, "learning_rate": 8.741408050292401e-06, "loss": 0.5493, "step": 1466 }, { "epoch": 0.25424059270813026, "grad_norm": 2.3284112063762805, "learning_rate": 8.739545350775052e-06, "loss": 0.5962, "step": 1467 }, { "epoch": 0.25441389917896057, "grad_norm": 2.3782997488969313, "learning_rate": 8.737681472666686e-06, "loss": 0.6152, "step": 1468 }, { "epoch": 0.2545872056497909, "grad_norm": 2.4261260416688284, "learning_rate": 8.73581641655474e-06, "loss": 0.5268, "step": 1469 }, { "epoch": 0.2547605121206213, "grad_norm": 2.0457653464749375, "learning_rate": 8.733950183027023e-06, "loss": 0.4843, "step": 1470 }, { "epoch": 0.25493381859145164, "grad_norm": 2.3582614819114203, "learning_rate": 8.732082772671718e-06, "loss": 0.5605, "step": 1471 }, { "epoch": 0.255107125062282, "grad_norm": 2.5634506373233106, "learning_rate": 8.73021418607737e-06, "loss": 0.5829, "step": 1472 }, { "epoch": 0.25528043153311236, "grad_norm": 2.1864037066989335, "learning_rate": 8.728344423832906e-06, "loss": 0.5639, "step": 1473 }, { "epoch": 0.2554537380039427, "grad_norm": 2.525534551461119, "learning_rate": 8.726473486527615e-06, "loss": 0.5919, "step": 1474 }, { "epoch": 0.2556270444747731, "grad_norm": 2.5372284617511043, "learning_rate": 8.724601374751163e-06, "loss": 0.5504, "step": 1475 }, { "epoch": 0.25580035094560344, "grad_norm": 2.6747233737459397, "learning_rate": 8.72272808909358e-06, "loss": 0.5485, "step": 1476 }, { "epoch": 0.2559736574164338, "grad_norm": 4.245968953935075, "learning_rate": 8.72085363014527e-06, "loss": 0.489, "step": 1477 }, { "epoch": 0.25614696388726416, "grad_norm": 2.261744736366839, "learning_rate": 8.718977998497003e-06, "loss": 0.5572, "step": 1478 }, { "epoch": 0.2563202703580945, "grad_norm": 4.357604535096281, "learning_rate": 8.717101194739923e-06, "loss": 0.552, "step": 1479 }, { "epoch": 0.2564935768289249, "grad_norm": 2.391172339589554, "learning_rate": 8.715223219465542e-06, "loss": 0.5472, "step": 1480 }, { "epoch": 0.2566668832997552, "grad_norm": 2.1269480976938375, "learning_rate": 8.713344073265742e-06, "loss": 0.566, "step": 1481 }, { "epoch": 0.25684018977058554, "grad_norm": 2.511075653028515, "learning_rate": 8.711463756732769e-06, "loss": 0.5804, "step": 1482 }, { "epoch": 0.2570134962414159, "grad_norm": 2.621494820473691, "learning_rate": 8.709582270459244e-06, "loss": 0.5412, "step": 1483 }, { "epoch": 0.25718680271224625, "grad_norm": 2.504297953139061, "learning_rate": 8.707699615038152e-06, "loss": 0.6053, "step": 1484 }, { "epoch": 0.2573601091830766, "grad_norm": 2.937516700472613, "learning_rate": 8.705815791062853e-06, "loss": 0.6636, "step": 1485 }, { "epoch": 0.25753341565390697, "grad_norm": 2.7847594680757117, "learning_rate": 8.703930799127068e-06, "loss": 0.4681, "step": 1486 }, { "epoch": 0.25770672212473733, "grad_norm": 2.2047519024546456, "learning_rate": 8.70204463982489e-06, "loss": 0.5294, "step": 1487 }, { "epoch": 0.2578800285955677, "grad_norm": 2.6923911888257415, "learning_rate": 8.700157313750776e-06, "loss": 0.623, "step": 1488 }, { "epoch": 0.25805333506639805, "grad_norm": 2.5943998177622896, "learning_rate": 8.69826882149956e-06, "loss": 0.5853, "step": 1489 }, { "epoch": 0.2582266415372284, "grad_norm": 2.6335009370075038, "learning_rate": 8.696379163666431e-06, "loss": 0.483, "step": 1490 }, { "epoch": 0.25839994800805877, "grad_norm": 3.012729724195139, "learning_rate": 8.694488340846958e-06, "loss": 0.5765, "step": 1491 }, { "epoch": 0.2585732544788891, "grad_norm": 2.4249334174406276, "learning_rate": 8.692596353637066e-06, "loss": 0.561, "step": 1492 }, { "epoch": 0.2587465609497195, "grad_norm": 2.9487760854711436, "learning_rate": 8.690703202633052e-06, "loss": 0.6146, "step": 1493 }, { "epoch": 0.2589198674205498, "grad_norm": 3.0309900099021787, "learning_rate": 8.688808888431583e-06, "loss": 0.5957, "step": 1494 }, { "epoch": 0.25909317389138015, "grad_norm": 2.7729453218553624, "learning_rate": 8.686913411629688e-06, "loss": 0.5152, "step": 1495 }, { "epoch": 0.2592664803622105, "grad_norm": 2.3714203158382956, "learning_rate": 8.68501677282476e-06, "loss": 0.5425, "step": 1496 }, { "epoch": 0.25943978683304086, "grad_norm": 2.527454080367191, "learning_rate": 8.683118972614566e-06, "loss": 0.6157, "step": 1497 }, { "epoch": 0.2596130933038712, "grad_norm": 2.442837246388965, "learning_rate": 8.681220011597235e-06, "loss": 0.5451, "step": 1498 }, { "epoch": 0.2597863997747016, "grad_norm": 2.252914038681525, "learning_rate": 8.679319890371261e-06, "loss": 0.6123, "step": 1499 }, { "epoch": 0.25995970624553194, "grad_norm": 3.5959862391661828, "learning_rate": 8.677418609535502e-06, "loss": 0.5117, "step": 1500 }, { "epoch": 0.2601330127163623, "grad_norm": 6.984538771818101, "learning_rate": 8.675516169689186e-06, "loss": 0.5603, "step": 1501 }, { "epoch": 0.26030631918719266, "grad_norm": 4.9007035814042, "learning_rate": 8.673612571431907e-06, "loss": 0.5933, "step": 1502 }, { "epoch": 0.260479625658023, "grad_norm": 4.072488577229141, "learning_rate": 8.671707815363615e-06, "loss": 0.555, "step": 1503 }, { "epoch": 0.2606529321288534, "grad_norm": 2.283450471798077, "learning_rate": 8.669801902084636e-06, "loss": 0.641, "step": 1504 }, { "epoch": 0.26082623859968374, "grad_norm": 2.4620516191538733, "learning_rate": 8.667894832195657e-06, "loss": 0.5405, "step": 1505 }, { "epoch": 0.2609995450705141, "grad_norm": 3.442159523915693, "learning_rate": 8.665986606297723e-06, "loss": 0.6022, "step": 1506 }, { "epoch": 0.2611728515413444, "grad_norm": 2.32858834401063, "learning_rate": 8.664077224992252e-06, "loss": 0.5875, "step": 1507 }, { "epoch": 0.26134615801217476, "grad_norm": 2.5700190068745647, "learning_rate": 8.662166688881024e-06, "loss": 0.5984, "step": 1508 }, { "epoch": 0.2615194644830051, "grad_norm": 2.6286866711274453, "learning_rate": 8.66025499856618e-06, "loss": 0.6288, "step": 1509 }, { "epoch": 0.2616927709538355, "grad_norm": 2.453720840759672, "learning_rate": 8.658342154650228e-06, "loss": 0.5852, "step": 1510 }, { "epoch": 0.26186607742466583, "grad_norm": 2.3239824647891774, "learning_rate": 8.656428157736036e-06, "loss": 0.5991, "step": 1511 }, { "epoch": 0.2620393838954962, "grad_norm": 2.2399986533685787, "learning_rate": 8.654513008426842e-06, "loss": 0.5989, "step": 1512 }, { "epoch": 0.26221269036632655, "grad_norm": 1.919115506588483, "learning_rate": 8.652596707326238e-06, "loss": 0.5307, "step": 1513 }, { "epoch": 0.2623859968371569, "grad_norm": 3.0519434730112733, "learning_rate": 8.650679255038186e-06, "loss": 0.4556, "step": 1514 }, { "epoch": 0.26255930330798727, "grad_norm": 2.759873819772116, "learning_rate": 8.64876065216701e-06, "loss": 0.6613, "step": 1515 }, { "epoch": 0.2627326097788176, "grad_norm": 2.246475166710744, "learning_rate": 8.646840899317392e-06, "loss": 0.4989, "step": 1516 }, { "epoch": 0.262905916249648, "grad_norm": 3.5482991727956157, "learning_rate": 8.644919997094383e-06, "loss": 0.6177, "step": 1517 }, { "epoch": 0.26307922272047835, "grad_norm": 2.25412615120725, "learning_rate": 8.642997946103393e-06, "loss": 0.4936, "step": 1518 }, { "epoch": 0.2632525291913087, "grad_norm": 2.7608389373420734, "learning_rate": 8.64107474695019e-06, "loss": 0.5961, "step": 1519 }, { "epoch": 0.263425835662139, "grad_norm": 5.593731154097758, "learning_rate": 8.639150400240911e-06, "loss": 0.5936, "step": 1520 }, { "epoch": 0.26359914213296937, "grad_norm": 2.1556846424532137, "learning_rate": 8.637224906582054e-06, "loss": 0.5141, "step": 1521 }, { "epoch": 0.2637724486037997, "grad_norm": 3.345986027488902, "learning_rate": 8.635298266580472e-06, "loss": 0.5558, "step": 1522 }, { "epoch": 0.2639457550746301, "grad_norm": 2.3594604541924094, "learning_rate": 8.633370480843385e-06, "loss": 0.604, "step": 1523 }, { "epoch": 0.26411906154546044, "grad_norm": 3.054225004150265, "learning_rate": 8.63144154997837e-06, "loss": 0.5419, "step": 1524 }, { "epoch": 0.2642923680162908, "grad_norm": 2.258201764029298, "learning_rate": 8.629511474593371e-06, "loss": 0.6425, "step": 1525 }, { "epoch": 0.26446567448712116, "grad_norm": 2.042115045474674, "learning_rate": 8.627580255296686e-06, "loss": 0.5666, "step": 1526 }, { "epoch": 0.2646389809579515, "grad_norm": 2.125399035545205, "learning_rate": 8.62564789269698e-06, "loss": 0.4442, "step": 1527 }, { "epoch": 0.2648122874287819, "grad_norm": 2.539191822752686, "learning_rate": 8.623714387403273e-06, "loss": 0.5451, "step": 1528 }, { "epoch": 0.26498559389961224, "grad_norm": 2.8395332931774417, "learning_rate": 8.621779740024948e-06, "loss": 0.5632, "step": 1529 }, { "epoch": 0.2651589003704426, "grad_norm": 2.444393671144877, "learning_rate": 8.619843951171744e-06, "loss": 0.5375, "step": 1530 }, { "epoch": 0.26533220684127296, "grad_norm": 2.4076453627029237, "learning_rate": 8.617907021453766e-06, "loss": 0.6295, "step": 1531 }, { "epoch": 0.2655055133121033, "grad_norm": 2.172283883745164, "learning_rate": 8.615968951481475e-06, "loss": 0.6253, "step": 1532 }, { "epoch": 0.2656788197829337, "grad_norm": 2.7770103823121977, "learning_rate": 8.61402974186569e-06, "loss": 0.5225, "step": 1533 }, { "epoch": 0.265852126253764, "grad_norm": 2.5220164302436925, "learning_rate": 8.612089393217594e-06, "loss": 0.7181, "step": 1534 }, { "epoch": 0.26602543272459434, "grad_norm": 2.116526590800532, "learning_rate": 8.610147906148723e-06, "loss": 0.5858, "step": 1535 }, { "epoch": 0.2661987391954247, "grad_norm": 2.3859415177340613, "learning_rate": 8.608205281270975e-06, "loss": 0.6161, "step": 1536 }, { "epoch": 0.26637204566625505, "grad_norm": 2.234727869460793, "learning_rate": 8.606261519196608e-06, "loss": 0.5965, "step": 1537 }, { "epoch": 0.2665453521370854, "grad_norm": 2.496208721563278, "learning_rate": 8.604316620538235e-06, "loss": 0.553, "step": 1538 }, { "epoch": 0.26671865860791577, "grad_norm": 2.1376695548879883, "learning_rate": 8.602370585908831e-06, "loss": 0.5662, "step": 1539 }, { "epoch": 0.26689196507874613, "grad_norm": 2.4649003002173995, "learning_rate": 8.600423415921727e-06, "loss": 0.5846, "step": 1540 }, { "epoch": 0.2670652715495765, "grad_norm": 2.206224790410582, "learning_rate": 8.598475111190607e-06, "loss": 0.5959, "step": 1541 }, { "epoch": 0.26723857802040685, "grad_norm": 2.622358706979046, "learning_rate": 8.596525672329522e-06, "loss": 0.5951, "step": 1542 }, { "epoch": 0.2674118844912372, "grad_norm": 2.236227543375757, "learning_rate": 8.594575099952875e-06, "loss": 0.5307, "step": 1543 }, { "epoch": 0.26758519096206757, "grad_norm": 2.4258787552009915, "learning_rate": 8.592623394675426e-06, "loss": 0.5258, "step": 1544 }, { "epoch": 0.2677584974328979, "grad_norm": 4.8955872366951745, "learning_rate": 8.590670557112296e-06, "loss": 0.5892, "step": 1545 }, { "epoch": 0.2679318039037283, "grad_norm": 2.3326992779546774, "learning_rate": 8.588716587878957e-06, "loss": 0.5734, "step": 1546 }, { "epoch": 0.2681051103745586, "grad_norm": 2.148826684222011, "learning_rate": 8.58676148759124e-06, "loss": 0.5842, "step": 1547 }, { "epoch": 0.26827841684538895, "grad_norm": 2.3038591294849966, "learning_rate": 8.584805256865336e-06, "loss": 0.5829, "step": 1548 }, { "epoch": 0.2684517233162193, "grad_norm": 2.378256280998938, "learning_rate": 8.58284789631779e-06, "loss": 0.5204, "step": 1549 }, { "epoch": 0.26862502978704966, "grad_norm": 2.455588392258292, "learning_rate": 8.5808894065655e-06, "loss": 0.5977, "step": 1550 }, { "epoch": 0.26879833625788, "grad_norm": 2.3414901113843274, "learning_rate": 8.578929788225723e-06, "loss": 0.6657, "step": 1551 }, { "epoch": 0.2689716427287104, "grad_norm": 2.37531279687123, "learning_rate": 8.576969041916072e-06, "loss": 0.538, "step": 1552 }, { "epoch": 0.26914494919954074, "grad_norm": 2.336461608234039, "learning_rate": 8.575007168254514e-06, "loss": 0.6376, "step": 1553 }, { "epoch": 0.2693182556703711, "grad_norm": 2.351863362338874, "learning_rate": 8.573044167859373e-06, "loss": 0.5428, "step": 1554 }, { "epoch": 0.26949156214120146, "grad_norm": 2.134377491298314, "learning_rate": 8.571080041349323e-06, "loss": 0.6159, "step": 1555 }, { "epoch": 0.2696648686120318, "grad_norm": 3.04380806120749, "learning_rate": 8.569114789343402e-06, "loss": 0.5596, "step": 1556 }, { "epoch": 0.2698381750828622, "grad_norm": 2.501222061809501, "learning_rate": 8.567148412460997e-06, "loss": 0.6185, "step": 1557 }, { "epoch": 0.27001148155369253, "grad_norm": 2.0288444354543893, "learning_rate": 8.565180911321846e-06, "loss": 0.5158, "step": 1558 }, { "epoch": 0.2701847880245229, "grad_norm": 1.953940006157327, "learning_rate": 8.563212286546053e-06, "loss": 0.5287, "step": 1559 }, { "epoch": 0.2703580944953532, "grad_norm": 2.829428373108799, "learning_rate": 8.56124253875406e-06, "loss": 0.6381, "step": 1560 }, { "epoch": 0.27053140096618356, "grad_norm": 2.614530801103539, "learning_rate": 8.559271668566678e-06, "loss": 0.6042, "step": 1561 }, { "epoch": 0.2707047074370139, "grad_norm": 2.5887604871799845, "learning_rate": 8.557299676605064e-06, "loss": 0.5228, "step": 1562 }, { "epoch": 0.2708780139078443, "grad_norm": 2.9401148763691647, "learning_rate": 8.555326563490727e-06, "loss": 0.5393, "step": 1563 }, { "epoch": 0.27105132037867463, "grad_norm": 1.9746058499339563, "learning_rate": 8.553352329845535e-06, "loss": 0.5099, "step": 1564 }, { "epoch": 0.271224626849505, "grad_norm": 2.6856669864624334, "learning_rate": 8.551376976291707e-06, "loss": 0.5668, "step": 1565 }, { "epoch": 0.27139793332033535, "grad_norm": 2.3442930358644323, "learning_rate": 8.549400503451813e-06, "loss": 0.6741, "step": 1566 }, { "epoch": 0.2715712397911657, "grad_norm": 2.6717802950609593, "learning_rate": 8.547422911948777e-06, "loss": 0.4069, "step": 1567 }, { "epoch": 0.27174454626199607, "grad_norm": 2.7893318218135335, "learning_rate": 8.545444202405875e-06, "loss": 0.4819, "step": 1568 }, { "epoch": 0.2719178527328264, "grad_norm": 2.6972046157508367, "learning_rate": 8.543464375446738e-06, "loss": 0.5269, "step": 1569 }, { "epoch": 0.2720911592036568, "grad_norm": 3.1681827276695014, "learning_rate": 8.541483431695347e-06, "loss": 0.6175, "step": 1570 }, { "epoch": 0.27226446567448714, "grad_norm": 2.765051334878434, "learning_rate": 8.539501371776035e-06, "loss": 0.5351, "step": 1571 }, { "epoch": 0.2724377721453175, "grad_norm": 2.6270828609778194, "learning_rate": 8.537518196313487e-06, "loss": 0.638, "step": 1572 }, { "epoch": 0.2726110786161478, "grad_norm": 3.008153974883318, "learning_rate": 8.535533905932739e-06, "loss": 0.6558, "step": 1573 }, { "epoch": 0.27278438508697817, "grad_norm": 2.4888684403944166, "learning_rate": 8.533548501259179e-06, "loss": 0.597, "step": 1574 }, { "epoch": 0.2729576915578085, "grad_norm": 2.6035495962285604, "learning_rate": 8.531561982918546e-06, "loss": 0.5873, "step": 1575 }, { "epoch": 0.2731309980286389, "grad_norm": 2.4361541656409096, "learning_rate": 8.529574351536933e-06, "loss": 0.658, "step": 1576 }, { "epoch": 0.27330430449946924, "grad_norm": 2.0806795814347945, "learning_rate": 8.527585607740776e-06, "loss": 0.6221, "step": 1577 }, { "epoch": 0.2734776109702996, "grad_norm": 2.1953869333005738, "learning_rate": 8.525595752156871e-06, "loss": 0.615, "step": 1578 }, { "epoch": 0.27365091744112996, "grad_norm": 2.2878001117948226, "learning_rate": 8.523604785412358e-06, "loss": 0.5449, "step": 1579 }, { "epoch": 0.2738242239119603, "grad_norm": 2.9891695528923883, "learning_rate": 8.52161270813473e-06, "loss": 0.5966, "step": 1580 }, { "epoch": 0.2739975303827907, "grad_norm": 2.1739545138637615, "learning_rate": 8.51961952095183e-06, "loss": 0.5802, "step": 1581 }, { "epoch": 0.27417083685362104, "grad_norm": 3.3918068626434437, "learning_rate": 8.517625224491847e-06, "loss": 0.6093, "step": 1582 }, { "epoch": 0.2743441433244514, "grad_norm": 2.1227543236804642, "learning_rate": 8.515629819383325e-06, "loss": 0.5894, "step": 1583 }, { "epoch": 0.27451744979528175, "grad_norm": 2.5516474863668916, "learning_rate": 8.513633306255153e-06, "loss": 0.5501, "step": 1584 }, { "epoch": 0.2746907562661121, "grad_norm": 2.6279416627520726, "learning_rate": 8.511635685736575e-06, "loss": 0.564, "step": 1585 }, { "epoch": 0.2748640627369424, "grad_norm": 2.15812171719121, "learning_rate": 8.509636958457176e-06, "loss": 0.5243, "step": 1586 }, { "epoch": 0.2750373692077728, "grad_norm": 2.312928777537781, "learning_rate": 8.507637125046899e-06, "loss": 0.6101, "step": 1587 }, { "epoch": 0.27521067567860313, "grad_norm": 3.6601331564008306, "learning_rate": 8.505636186136024e-06, "loss": 0.6452, "step": 1588 }, { "epoch": 0.2753839821494335, "grad_norm": 2.7311747903514316, "learning_rate": 8.503634142355192e-06, "loss": 0.5344, "step": 1589 }, { "epoch": 0.27555728862026385, "grad_norm": 2.3871185530913217, "learning_rate": 8.501630994335384e-06, "loss": 0.596, "step": 1590 }, { "epoch": 0.2757305950910942, "grad_norm": 2.6147306949181153, "learning_rate": 8.499626742707931e-06, "loss": 0.6793, "step": 1591 }, { "epoch": 0.27590390156192457, "grad_norm": 2.004029490686666, "learning_rate": 8.497621388104514e-06, "loss": 0.5318, "step": 1592 }, { "epoch": 0.27607720803275493, "grad_norm": 2.076681135462163, "learning_rate": 8.495614931157156e-06, "loss": 0.5972, "step": 1593 }, { "epoch": 0.2762505145035853, "grad_norm": 2.3458474339018296, "learning_rate": 8.493607372498236e-06, "loss": 0.617, "step": 1594 }, { "epoch": 0.27642382097441565, "grad_norm": 2.918912793830786, "learning_rate": 8.491598712760474e-06, "loss": 0.5422, "step": 1595 }, { "epoch": 0.276597127445246, "grad_norm": 2.7771260698598508, "learning_rate": 8.489588952576935e-06, "loss": 0.6003, "step": 1596 }, { "epoch": 0.27677043391607636, "grad_norm": 4.6580610946837195, "learning_rate": 8.48757809258104e-06, "loss": 0.598, "step": 1597 }, { "epoch": 0.2769437403869067, "grad_norm": 2.1132158770906466, "learning_rate": 8.485566133406547e-06, "loss": 0.5168, "step": 1598 }, { "epoch": 0.277117046857737, "grad_norm": 2.789401848995922, "learning_rate": 8.483553075687563e-06, "loss": 0.6077, "step": 1599 }, { "epoch": 0.2772903533285674, "grad_norm": 2.928745166727736, "learning_rate": 8.481538920058547e-06, "loss": 0.6156, "step": 1600 }, { "epoch": 0.27746365979939774, "grad_norm": 2.0973862705713398, "learning_rate": 8.479523667154297e-06, "loss": 0.5293, "step": 1601 }, { "epoch": 0.2776369662702281, "grad_norm": 2.486144512578125, "learning_rate": 8.477507317609961e-06, "loss": 0.5327, "step": 1602 }, { "epoch": 0.27781027274105846, "grad_norm": 2.567385881586035, "learning_rate": 8.47548987206103e-06, "loss": 0.6436, "step": 1603 }, { "epoch": 0.2779835792118888, "grad_norm": 2.953585013251135, "learning_rate": 8.473471331143341e-06, "loss": 0.5296, "step": 1604 }, { "epoch": 0.2781568856827192, "grad_norm": 4.613168019450678, "learning_rate": 8.471451695493077e-06, "loss": 0.5451, "step": 1605 }, { "epoch": 0.27833019215354954, "grad_norm": 2.2601342524023735, "learning_rate": 8.469430965746767e-06, "loss": 0.4871, "step": 1606 }, { "epoch": 0.2785034986243799, "grad_norm": 4.350102738253138, "learning_rate": 8.467409142541284e-06, "loss": 0.5262, "step": 1607 }, { "epoch": 0.27867680509521026, "grad_norm": 2.5510118261959223, "learning_rate": 8.46538622651384e-06, "loss": 0.6126, "step": 1608 }, { "epoch": 0.2788501115660406, "grad_norm": 2.6765456628680377, "learning_rate": 8.463362218302005e-06, "loss": 0.5864, "step": 1609 }, { "epoch": 0.279023418036871, "grad_norm": 2.60138934515152, "learning_rate": 8.461337118543678e-06, "loss": 0.5075, "step": 1610 }, { "epoch": 0.27919672450770133, "grad_norm": 2.246734916445768, "learning_rate": 8.459310927877112e-06, "loss": 0.6349, "step": 1611 }, { "epoch": 0.27937003097853164, "grad_norm": 2.5470616733416165, "learning_rate": 8.4572836469409e-06, "loss": 0.5043, "step": 1612 }, { "epoch": 0.279543337449362, "grad_norm": 2.906121307077948, "learning_rate": 8.45525527637398e-06, "loss": 0.6049, "step": 1613 }, { "epoch": 0.27971664392019235, "grad_norm": 2.9228144268192904, "learning_rate": 8.453225816815629e-06, "loss": 0.4805, "step": 1614 }, { "epoch": 0.2798899503910227, "grad_norm": 2.545646467968784, "learning_rate": 8.451195268905477e-06, "loss": 0.5704, "step": 1615 }, { "epoch": 0.28006325686185307, "grad_norm": 2.17391909871991, "learning_rate": 8.449163633283488e-06, "loss": 0.533, "step": 1616 }, { "epoch": 0.28023656333268343, "grad_norm": 2.799618058112844, "learning_rate": 8.44713091058997e-06, "loss": 0.571, "step": 1617 }, { "epoch": 0.2804098698035138, "grad_norm": 2.3524514896336077, "learning_rate": 8.445097101465579e-06, "loss": 0.5511, "step": 1618 }, { "epoch": 0.28058317627434415, "grad_norm": 2.7495716816411484, "learning_rate": 8.443062206551307e-06, "loss": 0.6853, "step": 1619 }, { "epoch": 0.2807564827451745, "grad_norm": 7.55112502372396, "learning_rate": 8.441026226488493e-06, "loss": 0.6156, "step": 1620 }, { "epoch": 0.28092978921600487, "grad_norm": 2.9041774257627693, "learning_rate": 8.438989161918813e-06, "loss": 0.6444, "step": 1621 }, { "epoch": 0.2811030956868352, "grad_norm": 8.962572207588138, "learning_rate": 8.436951013484294e-06, "loss": 0.5151, "step": 1622 }, { "epoch": 0.2812764021576656, "grad_norm": 3.012357048827944, "learning_rate": 8.43491178182729e-06, "loss": 0.6334, "step": 1623 }, { "epoch": 0.28144970862849594, "grad_norm": 2.939935276161964, "learning_rate": 8.43287146759051e-06, "loss": 0.6177, "step": 1624 }, { "epoch": 0.28162301509932625, "grad_norm": 2.4453593948714167, "learning_rate": 8.430830071417e-06, "loss": 0.6286, "step": 1625 }, { "epoch": 0.2817963215701566, "grad_norm": 3.186499573579854, "learning_rate": 8.428787593950144e-06, "loss": 0.5749, "step": 1626 }, { "epoch": 0.28196962804098696, "grad_norm": 2.9824581588084706, "learning_rate": 8.426744035833667e-06, "loss": 0.5486, "step": 1627 }, { "epoch": 0.2821429345118173, "grad_norm": 2.375893997901972, "learning_rate": 8.424699397711642e-06, "loss": 0.6495, "step": 1628 }, { "epoch": 0.2823162409826477, "grad_norm": 4.551207772502021, "learning_rate": 8.42265368022847e-06, "loss": 0.6882, "step": 1629 }, { "epoch": 0.28248954745347804, "grad_norm": 1.9835213936627374, "learning_rate": 8.420606884028904e-06, "loss": 0.5404, "step": 1630 }, { "epoch": 0.2826628539243084, "grad_norm": 2.264779946057534, "learning_rate": 8.418559009758028e-06, "loss": 0.6551, "step": 1631 }, { "epoch": 0.28283616039513876, "grad_norm": 2.411706663784, "learning_rate": 8.416510058061273e-06, "loss": 0.521, "step": 1632 }, { "epoch": 0.2830094668659691, "grad_norm": 2.116640492040768, "learning_rate": 8.414460029584406e-06, "loss": 0.6153, "step": 1633 }, { "epoch": 0.2831827733367995, "grad_norm": 2.6064326758023997, "learning_rate": 8.412408924973532e-06, "loss": 0.5892, "step": 1634 }, { "epoch": 0.28335607980762983, "grad_norm": 2.5246915502333223, "learning_rate": 8.410356744875097e-06, "loss": 0.5423, "step": 1635 }, { "epoch": 0.2835293862784602, "grad_norm": 2.448939965839782, "learning_rate": 8.408303489935887e-06, "loss": 0.4862, "step": 1636 }, { "epoch": 0.28370269274929055, "grad_norm": 2.681023244859922, "learning_rate": 8.406249160803023e-06, "loss": 0.6316, "step": 1637 }, { "epoch": 0.28387599922012086, "grad_norm": 1.9460411671515758, "learning_rate": 8.404193758123974e-06, "loss": 0.5375, "step": 1638 }, { "epoch": 0.2840493056909512, "grad_norm": 2.687614521569758, "learning_rate": 8.40213728254653e-06, "loss": 0.6436, "step": 1639 }, { "epoch": 0.2842226121617816, "grad_norm": 2.2426307540059325, "learning_rate": 8.400079734718839e-06, "loss": 0.5631, "step": 1640 }, { "epoch": 0.28439591863261193, "grad_norm": 3.2679778518451457, "learning_rate": 8.39802111528937e-06, "loss": 0.5644, "step": 1641 }, { "epoch": 0.2845692251034423, "grad_norm": 2.1577890517250142, "learning_rate": 8.395961424906941e-06, "loss": 0.5699, "step": 1642 }, { "epoch": 0.28474253157427265, "grad_norm": 2.28614611696095, "learning_rate": 8.393900664220707e-06, "loss": 0.5581, "step": 1643 }, { "epoch": 0.284915838045103, "grad_norm": 2.102973145710807, "learning_rate": 8.391838833880152e-06, "loss": 0.5661, "step": 1644 }, { "epoch": 0.28508914451593337, "grad_norm": 27.879223660688083, "learning_rate": 8.389775934535105e-06, "loss": 0.5026, "step": 1645 }, { "epoch": 0.2852624509867637, "grad_norm": 2.9711356633858355, "learning_rate": 8.387711966835727e-06, "loss": 0.6421, "step": 1646 }, { "epoch": 0.2854357574575941, "grad_norm": 2.1686244683647042, "learning_rate": 8.385646931432519e-06, "loss": 0.5479, "step": 1647 }, { "epoch": 0.28560906392842444, "grad_norm": 3.3097101482924223, "learning_rate": 8.383580828976318e-06, "loss": 0.5468, "step": 1648 }, { "epoch": 0.2857823703992548, "grad_norm": 2.21716648550633, "learning_rate": 8.381513660118295e-06, "loss": 0.5537, "step": 1649 }, { "epoch": 0.28595567687008516, "grad_norm": 2.487697388974229, "learning_rate": 8.37944542550996e-06, "loss": 0.6032, "step": 1650 }, { "epoch": 0.28612898334091547, "grad_norm": 2.9046294464440785, "learning_rate": 8.377376125803158e-06, "loss": 0.5786, "step": 1651 }, { "epoch": 0.2863022898117458, "grad_norm": 2.380491037304764, "learning_rate": 8.375305761650069e-06, "loss": 0.5878, "step": 1652 }, { "epoch": 0.2864755962825762, "grad_norm": 2.047479687846398, "learning_rate": 8.373234333703211e-06, "loss": 0.496, "step": 1653 }, { "epoch": 0.28664890275340654, "grad_norm": 2.9855218841411664, "learning_rate": 8.37116184261543e-06, "loss": 0.5174, "step": 1654 }, { "epoch": 0.2868222092242369, "grad_norm": 1.8610411156212638, "learning_rate": 8.369088289039915e-06, "loss": 0.477, "step": 1655 }, { "epoch": 0.28699551569506726, "grad_norm": 3.300217238165477, "learning_rate": 8.367013673630189e-06, "loss": 0.5994, "step": 1656 }, { "epoch": 0.2871688221658976, "grad_norm": 2.094366177281604, "learning_rate": 8.364937997040103e-06, "loss": 0.565, "step": 1657 }, { "epoch": 0.287342128636728, "grad_norm": 2.169060140920137, "learning_rate": 8.362861259923853e-06, "loss": 0.5412, "step": 1658 }, { "epoch": 0.28751543510755834, "grad_norm": 3.3785933714821557, "learning_rate": 8.36078346293596e-06, "loss": 0.564, "step": 1659 }, { "epoch": 0.2876887415783887, "grad_norm": 2.8705463893297876, "learning_rate": 8.358704606731283e-06, "loss": 0.6276, "step": 1660 }, { "epoch": 0.28786204804921905, "grad_norm": 3.6400629014997397, "learning_rate": 8.356624691965012e-06, "loss": 0.5046, "step": 1661 }, { "epoch": 0.2880353545200494, "grad_norm": 2.768958001937491, "learning_rate": 8.354543719292676e-06, "loss": 0.5214, "step": 1662 }, { "epoch": 0.2882086609908798, "grad_norm": 2.3411024816625825, "learning_rate": 8.352461689370134e-06, "loss": 0.6188, "step": 1663 }, { "epoch": 0.2883819674617101, "grad_norm": 2.501516879935599, "learning_rate": 8.350378602853578e-06, "loss": 0.6223, "step": 1664 }, { "epoch": 0.28855527393254043, "grad_norm": 2.2703110764135865, "learning_rate": 8.348294460399534e-06, "loss": 0.5886, "step": 1665 }, { "epoch": 0.2887285804033708, "grad_norm": 2.411685149893689, "learning_rate": 8.346209262664859e-06, "loss": 0.5542, "step": 1666 }, { "epoch": 0.28890188687420115, "grad_norm": 2.3400786255536605, "learning_rate": 8.344123010306744e-06, "loss": 0.5788, "step": 1667 }, { "epoch": 0.2890751933450315, "grad_norm": 3.207704248531586, "learning_rate": 8.342035703982714e-06, "loss": 0.6163, "step": 1668 }, { "epoch": 0.28924849981586187, "grad_norm": 2.297741753452783, "learning_rate": 8.339947344350624e-06, "loss": 0.6285, "step": 1669 }, { "epoch": 0.28942180628669223, "grad_norm": 3.2670090170802037, "learning_rate": 8.337857932068661e-06, "loss": 0.6023, "step": 1670 }, { "epoch": 0.2895951127575226, "grad_norm": 2.252945190303866, "learning_rate": 8.335767467795343e-06, "loss": 0.5052, "step": 1671 }, { "epoch": 0.28976841922835295, "grad_norm": 2.254700288952567, "learning_rate": 8.333675952189522e-06, "loss": 0.5903, "step": 1672 }, { "epoch": 0.2899417256991833, "grad_norm": 2.6314955202058097, "learning_rate": 8.331583385910383e-06, "loss": 0.596, "step": 1673 }, { "epoch": 0.29011503217001366, "grad_norm": 6.315912334775274, "learning_rate": 8.329489769617433e-06, "loss": 0.6201, "step": 1674 }, { "epoch": 0.290288338640844, "grad_norm": 2.116380926345746, "learning_rate": 8.327395103970523e-06, "loss": 0.5757, "step": 1675 }, { "epoch": 0.2904616451116744, "grad_norm": 2.08208160275017, "learning_rate": 8.325299389629821e-06, "loss": 0.5402, "step": 1676 }, { "epoch": 0.2906349515825047, "grad_norm": 4.002470939233773, "learning_rate": 8.32320262725584e-06, "loss": 0.6089, "step": 1677 }, { "epoch": 0.29080825805333504, "grad_norm": 2.255197352556506, "learning_rate": 8.321104817509409e-06, "loss": 0.5838, "step": 1678 }, { "epoch": 0.2909815645241654, "grad_norm": 2.479034528193481, "learning_rate": 8.319005961051698e-06, "loss": 0.5821, "step": 1679 }, { "epoch": 0.29115487099499576, "grad_norm": 2.4809020753421374, "learning_rate": 8.316906058544201e-06, "loss": 0.4779, "step": 1680 }, { "epoch": 0.2913281774658261, "grad_norm": 2.284806522732072, "learning_rate": 8.314805110648745e-06, "loss": 0.5893, "step": 1681 }, { "epoch": 0.2915014839366565, "grad_norm": 2.252730647322767, "learning_rate": 8.312703118027483e-06, "loss": 0.5108, "step": 1682 }, { "epoch": 0.29167479040748684, "grad_norm": 2.8051486458221313, "learning_rate": 8.3106000813429e-06, "loss": 0.6528, "step": 1683 }, { "epoch": 0.2918480968783172, "grad_norm": 2.511266280823336, "learning_rate": 8.30849600125781e-06, "loss": 0.5479, "step": 1684 }, { "epoch": 0.29202140334914756, "grad_norm": 2.862345865552162, "learning_rate": 8.306390878435354e-06, "loss": 0.5031, "step": 1685 }, { "epoch": 0.2921947098199779, "grad_norm": 2.5733184504278377, "learning_rate": 8.304284713539002e-06, "loss": 0.6021, "step": 1686 }, { "epoch": 0.2923680162908083, "grad_norm": 3.5667871204268784, "learning_rate": 8.302177507232553e-06, "loss": 0.499, "step": 1687 }, { "epoch": 0.29254132276163863, "grad_norm": 1.934834442712194, "learning_rate": 8.300069260180138e-06, "loss": 0.5025, "step": 1688 }, { "epoch": 0.292714629232469, "grad_norm": 4.019357493769106, "learning_rate": 8.29795997304621e-06, "loss": 0.6642, "step": 1689 }, { "epoch": 0.2928879357032993, "grad_norm": 2.044143212872519, "learning_rate": 8.29584964649555e-06, "loss": 0.5055, "step": 1690 }, { "epoch": 0.29306124217412965, "grad_norm": 2.4304210793812664, "learning_rate": 8.293738281193274e-06, "loss": 0.6242, "step": 1691 }, { "epoch": 0.29323454864496, "grad_norm": 3.339073289690245, "learning_rate": 8.291625877804816e-06, "loss": 0.6432, "step": 1692 }, { "epoch": 0.2934078551157904, "grad_norm": 2.2928650984295547, "learning_rate": 8.289512436995941e-06, "loss": 0.5317, "step": 1693 }, { "epoch": 0.29358116158662073, "grad_norm": 2.903043374231574, "learning_rate": 8.287397959432744e-06, "loss": 0.6205, "step": 1694 }, { "epoch": 0.2937544680574511, "grad_norm": 2.1094880796472055, "learning_rate": 8.285282445781642e-06, "loss": 0.5107, "step": 1695 }, { "epoch": 0.29392777452828145, "grad_norm": 2.412283839080639, "learning_rate": 8.283165896709381e-06, "loss": 0.6413, "step": 1696 }, { "epoch": 0.2941010809991118, "grad_norm": 2.0144580235220815, "learning_rate": 8.281048312883036e-06, "loss": 0.5264, "step": 1697 }, { "epoch": 0.29427438746994217, "grad_norm": 2.465047014211599, "learning_rate": 8.27892969497e-06, "loss": 0.5009, "step": 1698 }, { "epoch": 0.2944476939407725, "grad_norm": 2.1242814924706157, "learning_rate": 8.276810043637998e-06, "loss": 0.6123, "step": 1699 }, { "epoch": 0.2946210004116029, "grad_norm": 2.5170593170266025, "learning_rate": 8.274689359555083e-06, "loss": 0.6098, "step": 1700 }, { "epoch": 0.29479430688243324, "grad_norm": 6.2198976160349515, "learning_rate": 8.272567643389627e-06, "loss": 0.5676, "step": 1701 }, { "epoch": 0.2949676133532636, "grad_norm": 2.4262473550544894, "learning_rate": 8.270444895810332e-06, "loss": 0.5619, "step": 1702 }, { "epoch": 0.2951409198240939, "grad_norm": 2.762879816449249, "learning_rate": 8.268321117486224e-06, "loss": 0.6039, "step": 1703 }, { "epoch": 0.29531422629492426, "grad_norm": 2.411327275888432, "learning_rate": 8.266196309086652e-06, "loss": 0.5453, "step": 1704 }, { "epoch": 0.2954875327657546, "grad_norm": 2.157851357463099, "learning_rate": 8.26407047128129e-06, "loss": 0.5325, "step": 1705 }, { "epoch": 0.295660839236585, "grad_norm": 2.332695761712086, "learning_rate": 8.26194360474014e-06, "loss": 0.6418, "step": 1706 }, { "epoch": 0.29583414570741534, "grad_norm": 2.414445740275999, "learning_rate": 8.259815710133525e-06, "loss": 0.5581, "step": 1707 }, { "epoch": 0.2960074521782457, "grad_norm": 2.9246310311742816, "learning_rate": 8.257686788132093e-06, "loss": 0.5498, "step": 1708 }, { "epoch": 0.29618075864907606, "grad_norm": 2.1439227828597924, "learning_rate": 8.255556839406814e-06, "loss": 0.6469, "step": 1709 }, { "epoch": 0.2963540651199064, "grad_norm": 2.805687891222462, "learning_rate": 8.253425864628984e-06, "loss": 0.6193, "step": 1710 }, { "epoch": 0.2965273715907368, "grad_norm": 3.517330663272854, "learning_rate": 8.251293864470221e-06, "loss": 0.63, "step": 1711 }, { "epoch": 0.29670067806156714, "grad_norm": 2.1081931182741234, "learning_rate": 8.249160839602465e-06, "loss": 0.5345, "step": 1712 }, { "epoch": 0.2968739845323975, "grad_norm": 2.290058098520787, "learning_rate": 8.247026790697986e-06, "loss": 0.5643, "step": 1713 }, { "epoch": 0.29704729100322785, "grad_norm": 2.313370285560867, "learning_rate": 8.244891718429365e-06, "loss": 0.6393, "step": 1714 }, { "epoch": 0.2972205974740582, "grad_norm": 2.1035432165335908, "learning_rate": 8.242755623469516e-06, "loss": 0.5311, "step": 1715 }, { "epoch": 0.2973939039448885, "grad_norm": 2.4641875936043642, "learning_rate": 8.24061850649167e-06, "loss": 0.5655, "step": 1716 }, { "epoch": 0.2975672104157189, "grad_norm": 2.1588283377508084, "learning_rate": 8.238480368169382e-06, "loss": 0.6064, "step": 1717 }, { "epoch": 0.29774051688654923, "grad_norm": 2.217167765217007, "learning_rate": 8.236341209176526e-06, "loss": 0.5806, "step": 1718 }, { "epoch": 0.2979138233573796, "grad_norm": 2.0424633115322375, "learning_rate": 8.234201030187301e-06, "loss": 0.558, "step": 1719 }, { "epoch": 0.29808712982820995, "grad_norm": 3.6696112673957413, "learning_rate": 8.23205983187623e-06, "loss": 0.4923, "step": 1720 }, { "epoch": 0.2982604362990403, "grad_norm": 2.2440422473824637, "learning_rate": 8.229917614918148e-06, "loss": 0.5787, "step": 1721 }, { "epoch": 0.29843374276987067, "grad_norm": 2.4564456762557687, "learning_rate": 8.227774379988218e-06, "loss": 0.6083, "step": 1722 }, { "epoch": 0.29860704924070103, "grad_norm": 2.765028688786552, "learning_rate": 8.225630127761925e-06, "loss": 0.6087, "step": 1723 }, { "epoch": 0.2987803557115314, "grad_norm": 2.169701288484541, "learning_rate": 8.223484858915071e-06, "loss": 0.596, "step": 1724 }, { "epoch": 0.29895366218236175, "grad_norm": 2.019813674054908, "learning_rate": 8.221338574123778e-06, "loss": 0.4893, "step": 1725 }, { "epoch": 0.2991269686531921, "grad_norm": 2.2864067270113018, "learning_rate": 8.219191274064493e-06, "loss": 0.5127, "step": 1726 }, { "epoch": 0.29930027512402246, "grad_norm": 2.092784202807352, "learning_rate": 8.217042959413976e-06, "loss": 0.6043, "step": 1727 }, { "epoch": 0.2994735815948528, "grad_norm": 2.2253216094092174, "learning_rate": 8.214893630849311e-06, "loss": 0.6026, "step": 1728 }, { "epoch": 0.2996468880656831, "grad_norm": 2.373692125388148, "learning_rate": 8.212743289047904e-06, "loss": 0.5802, "step": 1729 }, { "epoch": 0.2998201945365135, "grad_norm": 2.0250434283861045, "learning_rate": 8.210591934687475e-06, "loss": 0.4414, "step": 1730 }, { "epoch": 0.29999350100734384, "grad_norm": 2.6304301694250753, "learning_rate": 8.208439568446065e-06, "loss": 0.4929, "step": 1731 }, { "epoch": 0.3001668074781742, "grad_norm": 4.712432326360712, "learning_rate": 8.206286191002036e-06, "loss": 0.5634, "step": 1732 }, { "epoch": 0.30034011394900456, "grad_norm": 2.1370622427020334, "learning_rate": 8.204131803034066e-06, "loss": 0.5869, "step": 1733 }, { "epoch": 0.3005134204198349, "grad_norm": 2.062037755921432, "learning_rate": 8.201976405221155e-06, "loss": 0.6016, "step": 1734 }, { "epoch": 0.3006867268906653, "grad_norm": 4.167315628941457, "learning_rate": 8.199819998242615e-06, "loss": 0.5784, "step": 1735 }, { "epoch": 0.30086003336149564, "grad_norm": 3.052687587503479, "learning_rate": 8.197662582778084e-06, "loss": 0.6617, "step": 1736 }, { "epoch": 0.301033339832326, "grad_norm": 2.2183738056221727, "learning_rate": 8.195504159507507e-06, "loss": 0.6272, "step": 1737 }, { "epoch": 0.30120664630315636, "grad_norm": 2.42277750522473, "learning_rate": 8.193344729111162e-06, "loss": 0.5634, "step": 1738 }, { "epoch": 0.3013799527739867, "grad_norm": 2.5285357364374854, "learning_rate": 8.191184292269632e-06, "loss": 0.5908, "step": 1739 }, { "epoch": 0.3015532592448171, "grad_norm": 2.3193402068945885, "learning_rate": 8.189022849663819e-06, "loss": 0.554, "step": 1740 }, { "epoch": 0.30172656571564743, "grad_norm": 4.068063201917682, "learning_rate": 8.186860401974947e-06, "loss": 0.6116, "step": 1741 }, { "epoch": 0.30189987218647774, "grad_norm": 2.029623706787889, "learning_rate": 8.184696949884552e-06, "loss": 0.5764, "step": 1742 }, { "epoch": 0.3020731786573081, "grad_norm": 6.1658812750688465, "learning_rate": 8.182532494074488e-06, "loss": 0.5268, "step": 1743 }, { "epoch": 0.30224648512813845, "grad_norm": 2.2649037967387873, "learning_rate": 8.18036703522693e-06, "loss": 0.5606, "step": 1744 }, { "epoch": 0.3024197915989688, "grad_norm": 2.60037733031908, "learning_rate": 8.17820057402436e-06, "loss": 0.6073, "step": 1745 }, { "epoch": 0.30259309806979917, "grad_norm": 2.119545137761765, "learning_rate": 8.176033111149582e-06, "loss": 0.4997, "step": 1746 }, { "epoch": 0.30276640454062953, "grad_norm": 2.244419236046697, "learning_rate": 8.173864647285714e-06, "loss": 0.528, "step": 1747 }, { "epoch": 0.3029397110114599, "grad_norm": 2.387378272724694, "learning_rate": 8.17169518311619e-06, "loss": 0.5164, "step": 1748 }, { "epoch": 0.30311301748229025, "grad_norm": 2.265855964059737, "learning_rate": 8.169524719324759e-06, "loss": 0.6105, "step": 1749 }, { "epoch": 0.3032863239531206, "grad_norm": 2.3339750557167, "learning_rate": 8.167353256595488e-06, "loss": 0.4741, "step": 1750 }, { "epoch": 0.30345963042395097, "grad_norm": 2.7116011062195735, "learning_rate": 8.165180795612753e-06, "loss": 0.5161, "step": 1751 }, { "epoch": 0.3036329368947813, "grad_norm": 2.4045392428357553, "learning_rate": 8.163007337061248e-06, "loss": 0.6373, "step": 1752 }, { "epoch": 0.3038062433656117, "grad_norm": 2.1555701410755437, "learning_rate": 8.160832881625984e-06, "loss": 0.4407, "step": 1753 }, { "epoch": 0.30397954983644204, "grad_norm": 1.926240682306643, "learning_rate": 8.158657429992279e-06, "loss": 0.6249, "step": 1754 }, { "epoch": 0.30415285630727235, "grad_norm": 3.189182643437652, "learning_rate": 8.15648098284577e-06, "loss": 0.6093, "step": 1755 }, { "epoch": 0.3043261627781027, "grad_norm": 2.4303673183835013, "learning_rate": 8.15430354087241e-06, "loss": 0.5317, "step": 1756 }, { "epoch": 0.30449946924893306, "grad_norm": 2.905480028484914, "learning_rate": 8.15212510475846e-06, "loss": 0.6166, "step": 1757 }, { "epoch": 0.3046727757197634, "grad_norm": 2.3802963872024145, "learning_rate": 8.149945675190497e-06, "loss": 0.5852, "step": 1758 }, { "epoch": 0.3048460821905938, "grad_norm": 2.369396315945079, "learning_rate": 8.14776525285541e-06, "loss": 0.5675, "step": 1759 }, { "epoch": 0.30501938866142414, "grad_norm": 2.409222173436776, "learning_rate": 8.145583838440407e-06, "loss": 0.5462, "step": 1760 }, { "epoch": 0.3051926951322545, "grad_norm": 2.535519249529756, "learning_rate": 8.143401432632998e-06, "loss": 0.575, "step": 1761 }, { "epoch": 0.30536600160308486, "grad_norm": 2.690012738435325, "learning_rate": 8.141218036121012e-06, "loss": 0.5091, "step": 1762 }, { "epoch": 0.3055393080739152, "grad_norm": 1.9869675147308454, "learning_rate": 8.13903364959259e-06, "loss": 0.5803, "step": 1763 }, { "epoch": 0.3057126145447456, "grad_norm": 4.900315414458131, "learning_rate": 8.136848273736184e-06, "loss": 0.5068, "step": 1764 }, { "epoch": 0.30588592101557593, "grad_norm": 3.245951188682303, "learning_rate": 8.134661909240558e-06, "loss": 0.567, "step": 1765 }, { "epoch": 0.3060592274864063, "grad_norm": 2.3224702944008175, "learning_rate": 8.13247455679479e-06, "loss": 0.5674, "step": 1766 }, { "epoch": 0.30623253395723665, "grad_norm": 2.703501873234739, "learning_rate": 8.130286217088264e-06, "loss": 0.5638, "step": 1767 }, { "epoch": 0.30640584042806696, "grad_norm": 2.3954603485968393, "learning_rate": 8.128096890810678e-06, "loss": 0.5974, "step": 1768 }, { "epoch": 0.3065791468988973, "grad_norm": 3.258470034734917, "learning_rate": 8.125906578652041e-06, "loss": 0.6579, "step": 1769 }, { "epoch": 0.3067524533697277, "grad_norm": 2.9711159464219006, "learning_rate": 8.123715281302675e-06, "loss": 0.645, "step": 1770 }, { "epoch": 0.30692575984055803, "grad_norm": 2.420124787873266, "learning_rate": 8.121522999453208e-06, "loss": 0.5564, "step": 1771 }, { "epoch": 0.3070990663113884, "grad_norm": 5.090687268462443, "learning_rate": 8.119329733794583e-06, "loss": 0.5151, "step": 1772 }, { "epoch": 0.30727237278221875, "grad_norm": 2.355551025945157, "learning_rate": 8.11713548501805e-06, "loss": 0.5959, "step": 1773 }, { "epoch": 0.3074456792530491, "grad_norm": 2.4618004428053153, "learning_rate": 8.11494025381517e-06, "loss": 0.6214, "step": 1774 }, { "epoch": 0.30761898572387947, "grad_norm": 3.25023576503592, "learning_rate": 8.11274404087781e-06, "loss": 0.6218, "step": 1775 }, { "epoch": 0.3077922921947098, "grad_norm": 2.528479459349837, "learning_rate": 8.110546846898151e-06, "loss": 0.5026, "step": 1776 }, { "epoch": 0.3079655986655402, "grad_norm": 2.9179888780628653, "learning_rate": 8.108348672568683e-06, "loss": 0.6256, "step": 1777 }, { "epoch": 0.30813890513637054, "grad_norm": 2.8063159736993484, "learning_rate": 8.106149518582204e-06, "loss": 0.6261, "step": 1778 }, { "epoch": 0.3083122116072009, "grad_norm": 2.7643902228850057, "learning_rate": 8.103949385631818e-06, "loss": 0.5532, "step": 1779 }, { "epoch": 0.30848551807803126, "grad_norm": 2.4576027312440707, "learning_rate": 8.101748274410943e-06, "loss": 0.6618, "step": 1780 }, { "epoch": 0.30865882454886157, "grad_norm": 2.220004250124976, "learning_rate": 8.099546185613301e-06, "loss": 0.6153, "step": 1781 }, { "epoch": 0.3088321310196919, "grad_norm": 2.391137718866789, "learning_rate": 8.097343119932924e-06, "loss": 0.6184, "step": 1782 }, { "epoch": 0.3090054374905223, "grad_norm": 2.5005826281910157, "learning_rate": 8.095139078064149e-06, "loss": 0.5204, "step": 1783 }, { "epoch": 0.30917874396135264, "grad_norm": 1.8866958818150594, "learning_rate": 8.092934060701627e-06, "loss": 0.5642, "step": 1784 }, { "epoch": 0.309352050432183, "grad_norm": 2.059106612019964, "learning_rate": 8.090728068540307e-06, "loss": 0.4844, "step": 1785 }, { "epoch": 0.30952535690301336, "grad_norm": 4.006922004986522, "learning_rate": 8.088521102275455e-06, "loss": 0.5616, "step": 1786 }, { "epoch": 0.3096986633738437, "grad_norm": 2.2024442817978853, "learning_rate": 8.086313162602641e-06, "loss": 0.6445, "step": 1787 }, { "epoch": 0.3098719698446741, "grad_norm": 2.0240751657543994, "learning_rate": 8.084104250217736e-06, "loss": 0.4677, "step": 1788 }, { "epoch": 0.31004527631550444, "grad_norm": 3.660093094030083, "learning_rate": 8.081894365816923e-06, "loss": 0.5635, "step": 1789 }, { "epoch": 0.3102185827863348, "grad_norm": 3.599196857760486, "learning_rate": 8.079683510096693e-06, "loss": 0.6156, "step": 1790 }, { "epoch": 0.31039188925716515, "grad_norm": 2.7765115475210824, "learning_rate": 8.077471683753839e-06, "loss": 0.5754, "step": 1791 }, { "epoch": 0.3105651957279955, "grad_norm": 2.9029144070685287, "learning_rate": 8.075258887485463e-06, "loss": 0.5626, "step": 1792 }, { "epoch": 0.31073850219882587, "grad_norm": 2.21238434661309, "learning_rate": 8.073045121988965e-06, "loss": 0.5929, "step": 1793 }, { "epoch": 0.3109118086696562, "grad_norm": 1.9574827900535892, "learning_rate": 8.070830387962064e-06, "loss": 0.5763, "step": 1794 }, { "epoch": 0.31108511514048653, "grad_norm": 3.4881273197321248, "learning_rate": 8.068614686102773e-06, "loss": 0.5998, "step": 1795 }, { "epoch": 0.3112584216113169, "grad_norm": 2.043348557826173, "learning_rate": 8.066398017109416e-06, "loss": 0.5235, "step": 1796 }, { "epoch": 0.31143172808214725, "grad_norm": 2.442554315849746, "learning_rate": 8.064180381680617e-06, "loss": 0.6264, "step": 1797 }, { "epoch": 0.3116050345529776, "grad_norm": 2.2600868424881755, "learning_rate": 8.061961780515311e-06, "loss": 0.6538, "step": 1798 }, { "epoch": 0.31177834102380797, "grad_norm": 2.0459990381664346, "learning_rate": 8.059742214312729e-06, "loss": 0.5374, "step": 1799 }, { "epoch": 0.31195164749463833, "grad_norm": 1.9885769457544897, "learning_rate": 8.057521683772413e-06, "loss": 0.4948, "step": 1800 }, { "epoch": 0.3121249539654687, "grad_norm": 2.223538586015921, "learning_rate": 8.05530018959421e-06, "loss": 0.5834, "step": 1801 }, { "epoch": 0.31229826043629905, "grad_norm": 5.279799076572619, "learning_rate": 8.053077732478261e-06, "loss": 0.624, "step": 1802 }, { "epoch": 0.3124715669071294, "grad_norm": 2.239132661309759, "learning_rate": 8.050854313125021e-06, "loss": 0.5075, "step": 1803 }, { "epoch": 0.31264487337795976, "grad_norm": 2.7018513656383867, "learning_rate": 8.048629932235245e-06, "loss": 0.594, "step": 1804 }, { "epoch": 0.3128181798487901, "grad_norm": 2.248234226653029, "learning_rate": 8.046404590509987e-06, "loss": 0.5066, "step": 1805 }, { "epoch": 0.3129914863196205, "grad_norm": 2.5051887881888124, "learning_rate": 8.044178288650609e-06, "loss": 0.5883, "step": 1806 }, { "epoch": 0.31316479279045084, "grad_norm": 2.375586066948158, "learning_rate": 8.041951027358774e-06, "loss": 0.5656, "step": 1807 }, { "epoch": 0.31333809926128114, "grad_norm": 3.2820709695638213, "learning_rate": 8.039722807336447e-06, "loss": 0.5819, "step": 1808 }, { "epoch": 0.3135114057321115, "grad_norm": 2.1546259235145513, "learning_rate": 8.037493629285892e-06, "loss": 0.5458, "step": 1809 }, { "epoch": 0.31368471220294186, "grad_norm": 4.481582727689905, "learning_rate": 8.035263493909683e-06, "loss": 0.5824, "step": 1810 }, { "epoch": 0.3138580186737722, "grad_norm": 2.2774183602068985, "learning_rate": 8.03303240191069e-06, "loss": 0.5354, "step": 1811 }, { "epoch": 0.3140313251446026, "grad_norm": 2.03814161642138, "learning_rate": 8.030800353992083e-06, "loss": 0.6325, "step": 1812 }, { "epoch": 0.31420463161543294, "grad_norm": 2.8845782554091257, "learning_rate": 8.028567350857336e-06, "loss": 0.6708, "step": 1813 }, { "epoch": 0.3143779380862633, "grad_norm": 3.278963739674634, "learning_rate": 8.026333393210226e-06, "loss": 0.6756, "step": 1814 }, { "epoch": 0.31455124455709366, "grad_norm": 2.740092818852044, "learning_rate": 8.024098481754825e-06, "loss": 0.5253, "step": 1815 }, { "epoch": 0.314724551027924, "grad_norm": 2.6470902896442903, "learning_rate": 8.021862617195513e-06, "loss": 0.5989, "step": 1816 }, { "epoch": 0.3148978574987544, "grad_norm": 2.926047986700922, "learning_rate": 8.019625800236962e-06, "loss": 0.527, "step": 1817 }, { "epoch": 0.31507116396958473, "grad_norm": 11.47635398898297, "learning_rate": 8.017388031584155e-06, "loss": 0.5885, "step": 1818 }, { "epoch": 0.3152444704404151, "grad_norm": 2.638634887083582, "learning_rate": 8.015149311942363e-06, "loss": 0.6164, "step": 1819 }, { "epoch": 0.31541777691124545, "grad_norm": 2.2385273797625715, "learning_rate": 8.012909642017164e-06, "loss": 0.5843, "step": 1820 }, { "epoch": 0.31559108338207575, "grad_norm": 2.495681737963428, "learning_rate": 8.010669022514435e-06, "loss": 0.5873, "step": 1821 }, { "epoch": 0.3157643898529061, "grad_norm": 2.5657173858185165, "learning_rate": 8.00842745414035e-06, "loss": 0.5744, "step": 1822 }, { "epoch": 0.31593769632373647, "grad_norm": 4.400925453432447, "learning_rate": 8.006184937601387e-06, "loss": 0.5703, "step": 1823 }, { "epoch": 0.31611100279456683, "grad_norm": 3.0374330144823007, "learning_rate": 8.003941473604312e-06, "loss": 0.5937, "step": 1824 }, { "epoch": 0.3162843092653972, "grad_norm": 2.2728646872220284, "learning_rate": 8.001697062856203e-06, "loss": 0.5479, "step": 1825 }, { "epoch": 0.31645761573622755, "grad_norm": 2.043550552995797, "learning_rate": 7.999451706064425e-06, "loss": 0.5455, "step": 1826 }, { "epoch": 0.3166309222070579, "grad_norm": 2.3971407690827378, "learning_rate": 7.997205403936647e-06, "loss": 0.62, "step": 1827 }, { "epoch": 0.31680422867788827, "grad_norm": 2.3959782327427486, "learning_rate": 7.994958157180842e-06, "loss": 0.6362, "step": 1828 }, { "epoch": 0.3169775351487186, "grad_norm": 2.3136983352246676, "learning_rate": 7.992709966505266e-06, "loss": 0.5554, "step": 1829 }, { "epoch": 0.317150841619549, "grad_norm": 3.710648591845428, "learning_rate": 7.99046083261848e-06, "loss": 0.5844, "step": 1830 }, { "epoch": 0.31732414809037934, "grad_norm": 2.587890359952439, "learning_rate": 7.988210756229348e-06, "loss": 0.5984, "step": 1831 }, { "epoch": 0.3174974545612097, "grad_norm": 2.8731219002115487, "learning_rate": 7.985959738047026e-06, "loss": 0.5427, "step": 1832 }, { "epoch": 0.31767076103204006, "grad_norm": 2.1623045914137484, "learning_rate": 7.983707778780962e-06, "loss": 0.5364, "step": 1833 }, { "epoch": 0.31784406750287036, "grad_norm": 3.2207761470388103, "learning_rate": 7.981454879140906e-06, "loss": 0.5691, "step": 1834 }, { "epoch": 0.3180173739737007, "grad_norm": 2.1149861214574073, "learning_rate": 7.979201039836904e-06, "loss": 0.5597, "step": 1835 }, { "epoch": 0.3181906804445311, "grad_norm": 2.6779522415256602, "learning_rate": 7.9769462615793e-06, "loss": 0.5191, "step": 1836 }, { "epoch": 0.31836398691536144, "grad_norm": 7.271031849913658, "learning_rate": 7.974690545078727e-06, "loss": 0.524, "step": 1837 }, { "epoch": 0.3185372933861918, "grad_norm": 2.594805544641353, "learning_rate": 7.972433891046123e-06, "loss": 0.573, "step": 1838 }, { "epoch": 0.31871059985702216, "grad_norm": 2.087780232547036, "learning_rate": 7.970176300192715e-06, "loss": 0.5201, "step": 1839 }, { "epoch": 0.3188839063278525, "grad_norm": 2.136511465765221, "learning_rate": 7.967917773230025e-06, "loss": 0.5247, "step": 1840 }, { "epoch": 0.3190572127986829, "grad_norm": 1.974536847688607, "learning_rate": 7.965658310869874e-06, "loss": 0.5195, "step": 1841 }, { "epoch": 0.31923051926951324, "grad_norm": 2.84690444719475, "learning_rate": 7.963397913824376e-06, "loss": 0.6756, "step": 1842 }, { "epoch": 0.3194038257403436, "grad_norm": 2.3503201156532603, "learning_rate": 7.96113658280594e-06, "loss": 0.576, "step": 1843 }, { "epoch": 0.31957713221117395, "grad_norm": 2.0134347138643043, "learning_rate": 7.958874318527265e-06, "loss": 0.5821, "step": 1844 }, { "epoch": 0.3197504386820043, "grad_norm": 2.0327802231809637, "learning_rate": 7.956611121701353e-06, "loss": 0.571, "step": 1845 }, { "epoch": 0.31992374515283467, "grad_norm": 2.2260724322427543, "learning_rate": 7.95434699304149e-06, "loss": 0.5573, "step": 1846 }, { "epoch": 0.320097051623665, "grad_norm": 3.189015125699899, "learning_rate": 7.952081933261267e-06, "loss": 0.6154, "step": 1847 }, { "epoch": 0.32027035809449533, "grad_norm": 2.6476146786996586, "learning_rate": 7.949815943074555e-06, "loss": 0.4754, "step": 1848 }, { "epoch": 0.3204436645653257, "grad_norm": 2.1034784915168077, "learning_rate": 7.947549023195528e-06, "loss": 0.6591, "step": 1849 }, { "epoch": 0.32061697103615605, "grad_norm": 2.1609568240136006, "learning_rate": 7.945281174338652e-06, "loss": 0.5813, "step": 1850 }, { "epoch": 0.3207902775069864, "grad_norm": 2.720817121999663, "learning_rate": 7.943012397218684e-06, "loss": 0.5698, "step": 1851 }, { "epoch": 0.32096358397781677, "grad_norm": 2.3115975185701907, "learning_rate": 7.94074269255067e-06, "loss": 0.5694, "step": 1852 }, { "epoch": 0.3211368904486471, "grad_norm": 2.1972629766821106, "learning_rate": 7.938472061049953e-06, "loss": 0.5825, "step": 1853 }, { "epoch": 0.3213101969194775, "grad_norm": 2.6398802762244546, "learning_rate": 7.936200503432169e-06, "loss": 0.6053, "step": 1854 }, { "epoch": 0.32148350339030785, "grad_norm": 2.339864298848347, "learning_rate": 7.933928020413244e-06, "loss": 0.4931, "step": 1855 }, { "epoch": 0.3216568098611382, "grad_norm": 2.4007912380958247, "learning_rate": 7.931654612709393e-06, "loss": 0.5756, "step": 1856 }, { "epoch": 0.32183011633196856, "grad_norm": 1.916486454569136, "learning_rate": 7.929380281037127e-06, "loss": 0.6924, "step": 1857 }, { "epoch": 0.3220034228027989, "grad_norm": 2.624329977168353, "learning_rate": 7.927105026113246e-06, "loss": 0.5917, "step": 1858 }, { "epoch": 0.3221767292736293, "grad_norm": 2.310842016298969, "learning_rate": 7.924828848654842e-06, "loss": 0.5704, "step": 1859 }, { "epoch": 0.3223500357444596, "grad_norm": 2.912989915320941, "learning_rate": 7.922551749379295e-06, "loss": 0.6043, "step": 1860 }, { "epoch": 0.32252334221528994, "grad_norm": 2.6388681875823665, "learning_rate": 7.920273729004277e-06, "loss": 0.6397, "step": 1861 }, { "epoch": 0.3226966486861203, "grad_norm": 1.9271909206705131, "learning_rate": 7.917994788247754e-06, "loss": 0.5382, "step": 1862 }, { "epoch": 0.32286995515695066, "grad_norm": 2.2281059739800684, "learning_rate": 7.915714927827975e-06, "loss": 0.5915, "step": 1863 }, { "epoch": 0.323043261627781, "grad_norm": 2.3458434940576356, "learning_rate": 7.913434148463486e-06, "loss": 0.5433, "step": 1864 }, { "epoch": 0.3232165680986114, "grad_norm": 1.8560378338382357, "learning_rate": 7.911152450873119e-06, "loss": 0.5001, "step": 1865 }, { "epoch": 0.32338987456944174, "grad_norm": 2.6589987702590046, "learning_rate": 7.908869835775991e-06, "loss": 0.5767, "step": 1866 }, { "epoch": 0.3235631810402721, "grad_norm": 2.376694558414563, "learning_rate": 7.90658630389152e-06, "loss": 0.5743, "step": 1867 }, { "epoch": 0.32373648751110246, "grad_norm": 2.1218270623458624, "learning_rate": 7.904301855939401e-06, "loss": 0.5364, "step": 1868 }, { "epoch": 0.3239097939819328, "grad_norm": 2.1972552233604086, "learning_rate": 7.902016492639626e-06, "loss": 0.5021, "step": 1869 }, { "epoch": 0.3240831004527632, "grad_norm": 2.2854308214716563, "learning_rate": 7.899730214712466e-06, "loss": 0.6196, "step": 1870 }, { "epoch": 0.32425640692359353, "grad_norm": 2.0056430707411113, "learning_rate": 7.897443022878493e-06, "loss": 0.5421, "step": 1871 }, { "epoch": 0.3244297133944239, "grad_norm": 2.380418517165474, "learning_rate": 7.895154917858559e-06, "loss": 0.6009, "step": 1872 }, { "epoch": 0.3246030198652542, "grad_norm": 2.1754132390217658, "learning_rate": 7.892865900373801e-06, "loss": 0.5238, "step": 1873 }, { "epoch": 0.32477632633608455, "grad_norm": 2.5660942483796907, "learning_rate": 7.890575971145651e-06, "loss": 0.5444, "step": 1874 }, { "epoch": 0.3249496328069149, "grad_norm": 2.738462082479635, "learning_rate": 7.888285130895826e-06, "loss": 0.6214, "step": 1875 }, { "epoch": 0.32512293927774527, "grad_norm": 2.7070534938386253, "learning_rate": 7.885993380346328e-06, "loss": 0.5263, "step": 1876 }, { "epoch": 0.32529624574857563, "grad_norm": 4.951837144559988, "learning_rate": 7.883700720219446e-06, "loss": 0.5571, "step": 1877 }, { "epoch": 0.325469552219406, "grad_norm": 2.7305918962484284, "learning_rate": 7.881407151237756e-06, "loss": 0.56, "step": 1878 }, { "epoch": 0.32564285869023635, "grad_norm": 2.3261633934312416, "learning_rate": 7.879112674124126e-06, "loss": 0.561, "step": 1879 }, { "epoch": 0.3258161651610667, "grad_norm": 2.722759411054415, "learning_rate": 7.8768172896017e-06, "loss": 0.5561, "step": 1880 }, { "epoch": 0.32598947163189707, "grad_norm": 2.0156093187312476, "learning_rate": 7.874520998393917e-06, "loss": 0.6087, "step": 1881 }, { "epoch": 0.3261627781027274, "grad_norm": 3.230694385755626, "learning_rate": 7.872223801224496e-06, "loss": 0.6128, "step": 1882 }, { "epoch": 0.3263360845735578, "grad_norm": 7.173452050400842, "learning_rate": 7.869925698817444e-06, "loss": 0.5126, "step": 1883 }, { "epoch": 0.32650939104438814, "grad_norm": 2.441441696195155, "learning_rate": 7.867626691897056e-06, "loss": 0.5951, "step": 1884 }, { "epoch": 0.3266826975152185, "grad_norm": 2.165654622792697, "learning_rate": 7.865326781187904e-06, "loss": 0.6025, "step": 1885 }, { "epoch": 0.3268560039860488, "grad_norm": 2.424971321265165, "learning_rate": 7.863025967414856e-06, "loss": 0.5245, "step": 1886 }, { "epoch": 0.32702931045687916, "grad_norm": 2.25616903101874, "learning_rate": 7.860724251303054e-06, "loss": 0.5976, "step": 1887 }, { "epoch": 0.3272026169277095, "grad_norm": 5.349028573242906, "learning_rate": 7.858421633577929e-06, "loss": 0.5408, "step": 1888 }, { "epoch": 0.3273759233985399, "grad_norm": 2.099416823531955, "learning_rate": 7.856118114965199e-06, "loss": 0.507, "step": 1889 }, { "epoch": 0.32754922986937024, "grad_norm": 2.321074408179394, "learning_rate": 7.853813696190862e-06, "loss": 0.5985, "step": 1890 }, { "epoch": 0.3277225363402006, "grad_norm": 2.155105739194009, "learning_rate": 7.851508377981198e-06, "loss": 0.5595, "step": 1891 }, { "epoch": 0.32789584281103096, "grad_norm": 2.0872503513060305, "learning_rate": 7.849202161062776e-06, "loss": 0.5235, "step": 1892 }, { "epoch": 0.3280691492818613, "grad_norm": 2.212049784099958, "learning_rate": 7.846895046162444e-06, "loss": 0.6266, "step": 1893 }, { "epoch": 0.3282424557526917, "grad_norm": 2.4999842022313032, "learning_rate": 7.844587034007337e-06, "loss": 0.5416, "step": 1894 }, { "epoch": 0.32841576222352203, "grad_norm": 2.490306721743408, "learning_rate": 7.84227812532487e-06, "loss": 0.617, "step": 1895 }, { "epoch": 0.3285890686943524, "grad_norm": 2.7377603367212653, "learning_rate": 7.839968320842737e-06, "loss": 0.598, "step": 1896 }, { "epoch": 0.32876237516518275, "grad_norm": 1.9938943008345138, "learning_rate": 7.83765762128892e-06, "loss": 0.5818, "step": 1897 }, { "epoch": 0.3289356816360131, "grad_norm": 2.591009175713765, "learning_rate": 7.835346027391682e-06, "loss": 0.6314, "step": 1898 }, { "epoch": 0.3291089881068434, "grad_norm": 2.0351172968422424, "learning_rate": 7.833033539879567e-06, "loss": 0.5348, "step": 1899 }, { "epoch": 0.3292822945776738, "grad_norm": 3.3694101662922584, "learning_rate": 7.830720159481404e-06, "loss": 0.5356, "step": 1900 }, { "epoch": 0.32945560104850413, "grad_norm": 3.968189440216438, "learning_rate": 7.828405886926293e-06, "loss": 0.5816, "step": 1901 }, { "epoch": 0.3296289075193345, "grad_norm": 2.3249538238110157, "learning_rate": 7.826090722943627e-06, "loss": 0.5871, "step": 1902 }, { "epoch": 0.32980221399016485, "grad_norm": 2.914438706057213, "learning_rate": 7.823774668263076e-06, "loss": 0.5046, "step": 1903 }, { "epoch": 0.3299755204609952, "grad_norm": 3.772676659846529, "learning_rate": 7.821457723614587e-06, "loss": 0.6369, "step": 1904 }, { "epoch": 0.33014882693182557, "grad_norm": 2.3046659750205722, "learning_rate": 7.819139889728393e-06, "loss": 0.6213, "step": 1905 }, { "epoch": 0.3303221334026559, "grad_norm": 2.3382904927506587, "learning_rate": 7.816821167335005e-06, "loss": 0.5056, "step": 1906 }, { "epoch": 0.3304954398734863, "grad_norm": 2.5105774500107056, "learning_rate": 7.81450155716521e-06, "loss": 0.5518, "step": 1907 }, { "epoch": 0.33066874634431664, "grad_norm": 2.2470783280081528, "learning_rate": 7.812181059950084e-06, "loss": 0.6257, "step": 1908 }, { "epoch": 0.330842052815147, "grad_norm": 2.1751015513591256, "learning_rate": 7.809859676420971e-06, "loss": 0.5754, "step": 1909 }, { "epoch": 0.33101535928597736, "grad_norm": 1.9479761040192851, "learning_rate": 7.807537407309508e-06, "loss": 0.6132, "step": 1910 }, { "epoch": 0.3311886657568077, "grad_norm": 4.761334437036798, "learning_rate": 7.805214253347598e-06, "loss": 0.5109, "step": 1911 }, { "epoch": 0.331361972227638, "grad_norm": 2.4150155994674156, "learning_rate": 7.802890215267429e-06, "loss": 0.5856, "step": 1912 }, { "epoch": 0.3315352786984684, "grad_norm": 2.1802689044763217, "learning_rate": 7.800565293801469e-06, "loss": 0.582, "step": 1913 }, { "epoch": 0.33170858516929874, "grad_norm": 2.4951469677006135, "learning_rate": 7.798239489682463e-06, "loss": 0.5809, "step": 1914 }, { "epoch": 0.3318818916401291, "grad_norm": 2.260487987762319, "learning_rate": 7.795912803643431e-06, "loss": 0.5444, "step": 1915 }, { "epoch": 0.33205519811095946, "grad_norm": 1.841606146834812, "learning_rate": 7.793585236417678e-06, "loss": 0.4998, "step": 1916 }, { "epoch": 0.3322285045817898, "grad_norm": 2.0588105039368427, "learning_rate": 7.791256788738779e-06, "loss": 0.6113, "step": 1917 }, { "epoch": 0.3324018110526202, "grad_norm": 2.9806668521613586, "learning_rate": 7.788927461340589e-06, "loss": 0.5155, "step": 1918 }, { "epoch": 0.33257511752345054, "grad_norm": 3.506216225679643, "learning_rate": 7.786597254957243e-06, "loss": 0.5974, "step": 1919 }, { "epoch": 0.3327484239942809, "grad_norm": 2.911141340665828, "learning_rate": 7.784266170323152e-06, "loss": 0.5371, "step": 1920 }, { "epoch": 0.33292173046511125, "grad_norm": 3.274387047410983, "learning_rate": 7.781934208173003e-06, "loss": 0.6193, "step": 1921 }, { "epoch": 0.3330950369359416, "grad_norm": 2.0695153322920827, "learning_rate": 7.779601369241757e-06, "loss": 0.5259, "step": 1922 }, { "epoch": 0.33326834340677197, "grad_norm": 3.4707243489275026, "learning_rate": 7.777267654264656e-06, "loss": 0.5455, "step": 1923 }, { "epoch": 0.33344164987760233, "grad_norm": 2.0418710362596486, "learning_rate": 7.774933063977218e-06, "loss": 0.5711, "step": 1924 }, { "epoch": 0.33361495634843263, "grad_norm": 2.5707889493845184, "learning_rate": 7.772597599115229e-06, "loss": 0.51, "step": 1925 }, { "epoch": 0.333788262819263, "grad_norm": 1.8519531801846192, "learning_rate": 7.770261260414763e-06, "loss": 0.5532, "step": 1926 }, { "epoch": 0.33396156929009335, "grad_norm": 2.2860515710519715, "learning_rate": 7.767924048612157e-06, "loss": 0.5515, "step": 1927 }, { "epoch": 0.3341348757609237, "grad_norm": 2.52745479900804, "learning_rate": 7.765585964444034e-06, "loss": 0.6354, "step": 1928 }, { "epoch": 0.33430818223175407, "grad_norm": 1.8967293540445125, "learning_rate": 7.763247008647286e-06, "loss": 0.5301, "step": 1929 }, { "epoch": 0.33448148870258443, "grad_norm": 2.041491167690728, "learning_rate": 7.760907181959079e-06, "loss": 0.6126, "step": 1930 }, { "epoch": 0.3346547951734148, "grad_norm": 2.0447550171837676, "learning_rate": 7.758566485116856e-06, "loss": 0.5228, "step": 1931 }, { "epoch": 0.33482810164424515, "grad_norm": 3.334003057543183, "learning_rate": 7.756224918858334e-06, "loss": 0.5851, "step": 1932 }, { "epoch": 0.3350014081150755, "grad_norm": 1.9032326442405638, "learning_rate": 7.753882483921503e-06, "loss": 0.547, "step": 1933 }, { "epoch": 0.33517471458590586, "grad_norm": 1.841604092277526, "learning_rate": 7.751539181044628e-06, "loss": 0.5143, "step": 1934 }, { "epoch": 0.3353480210567362, "grad_norm": 1.849567055075128, "learning_rate": 7.749195010966244e-06, "loss": 0.4843, "step": 1935 }, { "epoch": 0.3355213275275666, "grad_norm": 1.9277035554279944, "learning_rate": 7.746849974425167e-06, "loss": 0.5869, "step": 1936 }, { "epoch": 0.33569463399839694, "grad_norm": 1.9167589734753867, "learning_rate": 7.744504072160478e-06, "loss": 0.5307, "step": 1937 }, { "epoch": 0.33586794046922724, "grad_norm": 2.092667487262055, "learning_rate": 7.742157304911533e-06, "loss": 0.4905, "step": 1938 }, { "epoch": 0.3360412469400576, "grad_norm": 1.9444786263849143, "learning_rate": 7.739809673417966e-06, "loss": 0.5669, "step": 1939 }, { "epoch": 0.33621455341088796, "grad_norm": 3.050373363012957, "learning_rate": 7.737461178419676e-06, "loss": 0.5674, "step": 1940 }, { "epoch": 0.3363878598817183, "grad_norm": 2.8353665383726607, "learning_rate": 7.735111820656838e-06, "loss": 0.6288, "step": 1941 }, { "epoch": 0.3365611663525487, "grad_norm": 2.285457629285864, "learning_rate": 7.732761600869896e-06, "loss": 0.4993, "step": 1942 }, { "epoch": 0.33673447282337904, "grad_norm": 2.314380678471829, "learning_rate": 7.730410519799571e-06, "loss": 0.578, "step": 1943 }, { "epoch": 0.3369077792942094, "grad_norm": 2.4086561683183567, "learning_rate": 7.728058578186853e-06, "loss": 0.5516, "step": 1944 }, { "epoch": 0.33708108576503976, "grad_norm": 1.957313092789824, "learning_rate": 7.725705776772997e-06, "loss": 0.4934, "step": 1945 }, { "epoch": 0.3372543922358701, "grad_norm": 1.8166544203941708, "learning_rate": 7.723352116299541e-06, "loss": 0.4984, "step": 1946 }, { "epoch": 0.3374276987067005, "grad_norm": 2.1743909919216224, "learning_rate": 7.720997597508282e-06, "loss": 0.5002, "step": 1947 }, { "epoch": 0.33760100517753083, "grad_norm": 3.138735233263315, "learning_rate": 7.718642221141298e-06, "loss": 0.51, "step": 1948 }, { "epoch": 0.3377743116483612, "grad_norm": 3.869004223224614, "learning_rate": 7.716285987940925e-06, "loss": 0.6283, "step": 1949 }, { "epoch": 0.33794761811919155, "grad_norm": 2.2599292419018595, "learning_rate": 7.713928898649782e-06, "loss": 0.5149, "step": 1950 }, { "epoch": 0.33812092459002185, "grad_norm": 2.2665030428287, "learning_rate": 7.711570954010749e-06, "loss": 0.5245, "step": 1951 }, { "epoch": 0.3382942310608522, "grad_norm": 2.2514281844017945, "learning_rate": 7.709212154766984e-06, "loss": 0.4369, "step": 1952 }, { "epoch": 0.33846753753168257, "grad_norm": 1.971506379616825, "learning_rate": 7.7068525016619e-06, "loss": 0.5387, "step": 1953 }, { "epoch": 0.33864084400251293, "grad_norm": 1.9437122438783272, "learning_rate": 7.704491995439192e-06, "loss": 0.5577, "step": 1954 }, { "epoch": 0.3388141504733433, "grad_norm": 2.2408197845448385, "learning_rate": 7.702130636842822e-06, "loss": 0.5107, "step": 1955 }, { "epoch": 0.33898745694417365, "grad_norm": 6.009529731764241, "learning_rate": 7.699768426617015e-06, "loss": 0.4974, "step": 1956 }, { "epoch": 0.339160763415004, "grad_norm": 2.576521319779872, "learning_rate": 7.697405365506269e-06, "loss": 0.6586, "step": 1957 }, { "epoch": 0.33933406988583437, "grad_norm": 1.8862849938586295, "learning_rate": 7.695041454255352e-06, "loss": 0.4815, "step": 1958 }, { "epoch": 0.3395073763566647, "grad_norm": 2.1531144210961606, "learning_rate": 7.692676693609291e-06, "loss": 0.6399, "step": 1959 }, { "epoch": 0.3396806828274951, "grad_norm": 2.053157438190017, "learning_rate": 7.690311084313394e-06, "loss": 0.5442, "step": 1960 }, { "epoch": 0.33985398929832544, "grad_norm": 2.9394998470037823, "learning_rate": 7.687944627113221e-06, "loss": 0.6389, "step": 1961 }, { "epoch": 0.3400272957691558, "grad_norm": 2.2293191592525865, "learning_rate": 7.685577322754615e-06, "loss": 0.567, "step": 1962 }, { "epoch": 0.34020060223998616, "grad_norm": 2.1053493198793123, "learning_rate": 7.683209171983673e-06, "loss": 0.5365, "step": 1963 }, { "epoch": 0.34037390871081646, "grad_norm": 2.3016561936850555, "learning_rate": 7.680840175546764e-06, "loss": 0.5595, "step": 1964 }, { "epoch": 0.3405472151816468, "grad_norm": 2.1959747019183053, "learning_rate": 7.67847033419053e-06, "loss": 0.61, "step": 1965 }, { "epoch": 0.3407205216524772, "grad_norm": 2.5058518761882653, "learning_rate": 7.676099648661866e-06, "loss": 0.5925, "step": 1966 }, { "epoch": 0.34089382812330754, "grad_norm": 2.0525562782721543, "learning_rate": 7.673728119707943e-06, "loss": 0.4656, "step": 1967 }, { "epoch": 0.3410671345941379, "grad_norm": 2.4161923675345296, "learning_rate": 7.671355748076195e-06, "loss": 0.6542, "step": 1968 }, { "epoch": 0.34124044106496826, "grad_norm": 5.233528032093774, "learning_rate": 7.66898253451432e-06, "loss": 0.5069, "step": 1969 }, { "epoch": 0.3414137475357986, "grad_norm": 2.5139428956483068, "learning_rate": 7.666608479770285e-06, "loss": 0.5978, "step": 1970 }, { "epoch": 0.341587054006629, "grad_norm": 2.266470950809641, "learning_rate": 7.664233584592318e-06, "loss": 0.5694, "step": 1971 }, { "epoch": 0.34176036047745934, "grad_norm": 2.7008082445881643, "learning_rate": 7.661857849728914e-06, "loss": 0.5485, "step": 1972 }, { "epoch": 0.3419336669482897, "grad_norm": 3.0622646184565117, "learning_rate": 7.659481275928833e-06, "loss": 0.542, "step": 1973 }, { "epoch": 0.34210697341912005, "grad_norm": 2.7747945485946417, "learning_rate": 7.657103863941099e-06, "loss": 0.5904, "step": 1974 }, { "epoch": 0.3422802798899504, "grad_norm": 2.428406740655482, "learning_rate": 7.654725614514998e-06, "loss": 0.5346, "step": 1975 }, { "epoch": 0.34245358636078077, "grad_norm": 3.0919121886003196, "learning_rate": 7.652346528400085e-06, "loss": 0.5081, "step": 1976 }, { "epoch": 0.3426268928316111, "grad_norm": 4.707410119686955, "learning_rate": 7.649966606346174e-06, "loss": 0.568, "step": 1977 }, { "epoch": 0.34280019930244143, "grad_norm": 6.283983119273544, "learning_rate": 7.647585849103343e-06, "loss": 0.5384, "step": 1978 }, { "epoch": 0.3429735057732718, "grad_norm": 4.532361847247409, "learning_rate": 7.645204257421936e-06, "loss": 0.5899, "step": 1979 }, { "epoch": 0.34314681224410215, "grad_norm": 2.229675234776155, "learning_rate": 7.642821832052558e-06, "loss": 0.6095, "step": 1980 }, { "epoch": 0.3433201187149325, "grad_norm": 2.51479941022859, "learning_rate": 7.640438573746076e-06, "loss": 0.5947, "step": 1981 }, { "epoch": 0.34349342518576287, "grad_norm": 1.7818350859257175, "learning_rate": 7.638054483253624e-06, "loss": 0.5433, "step": 1982 }, { "epoch": 0.3436667316565932, "grad_norm": 2.181757085764521, "learning_rate": 7.63566956132659e-06, "loss": 0.4668, "step": 1983 }, { "epoch": 0.3438400381274236, "grad_norm": 2.3082922280331712, "learning_rate": 7.633283808716631e-06, "loss": 0.5809, "step": 1984 }, { "epoch": 0.34401334459825395, "grad_norm": 2.447494957008826, "learning_rate": 7.630897226175664e-06, "loss": 0.5449, "step": 1985 }, { "epoch": 0.3441866510690843, "grad_norm": 2.098230619174065, "learning_rate": 7.628509814455869e-06, "loss": 0.5449, "step": 1986 }, { "epoch": 0.34435995753991466, "grad_norm": 2.4832801521709102, "learning_rate": 7.6261215743096814e-06, "loss": 0.6014, "step": 1987 }, { "epoch": 0.344533264010745, "grad_norm": 2.5486149410280166, "learning_rate": 7.623732506489806e-06, "loss": 0.493, "step": 1988 }, { "epoch": 0.3447065704815754, "grad_norm": 1.9718998985813723, "learning_rate": 7.621342611749201e-06, "loss": 0.5441, "step": 1989 }, { "epoch": 0.3448798769524057, "grad_norm": 2.9677262649992686, "learning_rate": 7.618951890841091e-06, "loss": 0.5614, "step": 1990 }, { "epoch": 0.34505318342323604, "grad_norm": 2.334959686595447, "learning_rate": 7.616560344518959e-06, "loss": 0.5292, "step": 1991 }, { "epoch": 0.3452264898940664, "grad_norm": 2.201497428087475, "learning_rate": 7.614167973536543e-06, "loss": 0.615, "step": 1992 }, { "epoch": 0.34539979636489676, "grad_norm": 2.041431248835658, "learning_rate": 7.611774778647851e-06, "loss": 0.4965, "step": 1993 }, { "epoch": 0.3455731028357271, "grad_norm": 2.349955978275893, "learning_rate": 7.609380760607143e-06, "loss": 0.6135, "step": 1994 }, { "epoch": 0.3457464093065575, "grad_norm": 1.9479631384920366, "learning_rate": 7.606985920168943e-06, "loss": 0.458, "step": 1995 }, { "epoch": 0.34591971577738784, "grad_norm": 2.084056044762915, "learning_rate": 7.604590258088027e-06, "loss": 0.495, "step": 1996 }, { "epoch": 0.3460930222482182, "grad_norm": 2.3224569923588443, "learning_rate": 7.60219377511944e-06, "loss": 0.5842, "step": 1997 }, { "epoch": 0.34626632871904856, "grad_norm": 2.2382606717845976, "learning_rate": 7.599796472018477e-06, "loss": 0.5552, "step": 1998 }, { "epoch": 0.3464396351898789, "grad_norm": 2.0226782961802536, "learning_rate": 7.597398349540697e-06, "loss": 0.532, "step": 1999 }, { "epoch": 0.3466129416607093, "grad_norm": 2.418158127223981, "learning_rate": 7.5949994084419146e-06, "loss": 0.5573, "step": 2000 }, { "epoch": 0.34678624813153963, "grad_norm": 2.2833895851107906, "learning_rate": 7.5925996494782025e-06, "loss": 0.628, "step": 2001 }, { "epoch": 0.34695955460237, "grad_norm": 1.891594655446806, "learning_rate": 7.590199073405893e-06, "loss": 0.477, "step": 2002 }, { "epoch": 0.3471328610732003, "grad_norm": 2.222890446968231, "learning_rate": 7.5877976809815745e-06, "loss": 0.5276, "step": 2003 }, { "epoch": 0.34730616754403065, "grad_norm": 2.043084178614808, "learning_rate": 7.585395472962091e-06, "loss": 0.5603, "step": 2004 }, { "epoch": 0.347479474014861, "grad_norm": 2.1833688134014233, "learning_rate": 7.58299245010455e-06, "loss": 0.582, "step": 2005 }, { "epoch": 0.34765278048569137, "grad_norm": 2.4325032994258255, "learning_rate": 7.580588613166306e-06, "loss": 0.5551, "step": 2006 }, { "epoch": 0.34782608695652173, "grad_norm": 2.673681986658247, "learning_rate": 7.5781839629049765e-06, "loss": 0.6555, "step": 2007 }, { "epoch": 0.3479993934273521, "grad_norm": 2.0765602905023415, "learning_rate": 7.575778500078439e-06, "loss": 0.5967, "step": 2008 }, { "epoch": 0.34817269989818245, "grad_norm": 2.041624975096276, "learning_rate": 7.573372225444817e-06, "loss": 0.4694, "step": 2009 }, { "epoch": 0.3483460063690128, "grad_norm": 2.57619724246489, "learning_rate": 7.570965139762496e-06, "loss": 0.6405, "step": 2010 }, { "epoch": 0.34851931283984317, "grad_norm": 2.8163433007781635, "learning_rate": 7.568557243790118e-06, "loss": 0.5392, "step": 2011 }, { "epoch": 0.3486926193106735, "grad_norm": 2.6509727180213374, "learning_rate": 7.566148538286575e-06, "loss": 0.5667, "step": 2012 }, { "epoch": 0.3488659257815039, "grad_norm": 1.8995104261371736, "learning_rate": 7.5637390240110235e-06, "loss": 0.5091, "step": 2013 }, { "epoch": 0.34903923225233424, "grad_norm": 3.1945682119734413, "learning_rate": 7.561328701722863e-06, "loss": 0.5282, "step": 2014 }, { "epoch": 0.3492125387231646, "grad_norm": 1.969219815037996, "learning_rate": 7.558917572181758e-06, "loss": 0.5224, "step": 2015 }, { "epoch": 0.3493858451939949, "grad_norm": 1.9480468735669436, "learning_rate": 7.556505636147623e-06, "loss": 0.5601, "step": 2016 }, { "epoch": 0.34955915166482526, "grad_norm": 1.9353430154131943, "learning_rate": 7.5540928943806245e-06, "loss": 0.542, "step": 2017 }, { "epoch": 0.3497324581356556, "grad_norm": 2.161996279151328, "learning_rate": 7.551679347641188e-06, "loss": 0.6081, "step": 2018 }, { "epoch": 0.349905764606486, "grad_norm": 2.3482475170044967, "learning_rate": 7.549264996689987e-06, "loss": 0.6266, "step": 2019 }, { "epoch": 0.35007907107731634, "grad_norm": 1.9982678758955061, "learning_rate": 7.546849842287955e-06, "loss": 0.444, "step": 2020 }, { "epoch": 0.3502523775481467, "grad_norm": 1.9543684587966716, "learning_rate": 7.544433885196273e-06, "loss": 0.4321, "step": 2021 }, { "epoch": 0.35042568401897706, "grad_norm": 2.4356429268242508, "learning_rate": 7.542017126176377e-06, "loss": 0.5734, "step": 2022 }, { "epoch": 0.3505989904898074, "grad_norm": 1.9434294235239946, "learning_rate": 7.539599565989958e-06, "loss": 0.5173, "step": 2023 }, { "epoch": 0.3507722969606378, "grad_norm": 2.205677648111289, "learning_rate": 7.537181205398957e-06, "loss": 0.6124, "step": 2024 }, { "epoch": 0.35094560343146813, "grad_norm": 1.899335115589956, "learning_rate": 7.534762045165564e-06, "loss": 0.6529, "step": 2025 }, { "epoch": 0.3511189099022985, "grad_norm": 2.9630575089080997, "learning_rate": 7.532342086052231e-06, "loss": 0.5306, "step": 2026 }, { "epoch": 0.35129221637312885, "grad_norm": 2.61716942905798, "learning_rate": 7.52992132882165e-06, "loss": 0.508, "step": 2027 }, { "epoch": 0.3514655228439592, "grad_norm": 2.0833511753512677, "learning_rate": 7.527499774236774e-06, "loss": 0.5737, "step": 2028 }, { "epoch": 0.3516388293147895, "grad_norm": 2.8076482876962046, "learning_rate": 7.525077423060803e-06, "loss": 0.5808, "step": 2029 }, { "epoch": 0.3518121357856199, "grad_norm": 4.407457327542477, "learning_rate": 7.522654276057184e-06, "loss": 0.5832, "step": 2030 }, { "epoch": 0.35198544225645023, "grad_norm": 1.945477959180068, "learning_rate": 7.520230333989624e-06, "loss": 0.5309, "step": 2031 }, { "epoch": 0.3521587487272806, "grad_norm": 1.9561857236340916, "learning_rate": 7.517805597622075e-06, "loss": 0.5225, "step": 2032 }, { "epoch": 0.35233205519811095, "grad_norm": 2.194293308276615, "learning_rate": 7.515380067718739e-06, "loss": 0.4991, "step": 2033 }, { "epoch": 0.3525053616689413, "grad_norm": 3.70686876052404, "learning_rate": 7.51295374504407e-06, "loss": 0.6185, "step": 2034 }, { "epoch": 0.35267866813977167, "grad_norm": 2.3173373943691336, "learning_rate": 7.510526630362771e-06, "loss": 0.6261, "step": 2035 }, { "epoch": 0.352851974610602, "grad_norm": 2.1820426414190464, "learning_rate": 7.508098724439797e-06, "loss": 0.5112, "step": 2036 }, { "epoch": 0.3530252810814324, "grad_norm": 2.0925222954890095, "learning_rate": 7.5056700280403434e-06, "loss": 0.5953, "step": 2037 }, { "epoch": 0.35319858755226274, "grad_norm": 2.2332341683781136, "learning_rate": 7.503240541929869e-06, "loss": 0.5298, "step": 2038 }, { "epoch": 0.3533718940230931, "grad_norm": 3.5024746794422814, "learning_rate": 7.50081026687407e-06, "loss": 0.5289, "step": 2039 }, { "epoch": 0.35354520049392346, "grad_norm": 3.5085577439657056, "learning_rate": 7.498379203638897e-06, "loss": 0.5112, "step": 2040 }, { "epoch": 0.3537185069647538, "grad_norm": 2.1006509563227973, "learning_rate": 7.495947352990547e-06, "loss": 0.5849, "step": 2041 }, { "epoch": 0.3538918134355841, "grad_norm": 3.436215527386986, "learning_rate": 7.493514715695464e-06, "loss": 0.4844, "step": 2042 }, { "epoch": 0.3540651199064145, "grad_norm": 2.5973050274753757, "learning_rate": 7.491081292520341e-06, "loss": 0.5799, "step": 2043 }, { "epoch": 0.35423842637724484, "grad_norm": 2.320701667165124, "learning_rate": 7.488647084232123e-06, "loss": 0.565, "step": 2044 }, { "epoch": 0.3544117328480752, "grad_norm": 2.0440648751750112, "learning_rate": 7.486212091597993e-06, "loss": 0.5418, "step": 2045 }, { "epoch": 0.35458503931890556, "grad_norm": 3.104964456908982, "learning_rate": 7.483776315385391e-06, "loss": 0.6007, "step": 2046 }, { "epoch": 0.3547583457897359, "grad_norm": 2.5962926643729274, "learning_rate": 7.481339756361997e-06, "loss": 0.6513, "step": 2047 }, { "epoch": 0.3549316522605663, "grad_norm": 1.949543568042643, "learning_rate": 7.478902415295741e-06, "loss": 0.5266, "step": 2048 }, { "epoch": 0.35510495873139664, "grad_norm": 2.369700838709682, "learning_rate": 7.4764642929548e-06, "loss": 0.5302, "step": 2049 }, { "epoch": 0.355278265202227, "grad_norm": 2.5022337993024477, "learning_rate": 7.474025390107595e-06, "loss": 0.6103, "step": 2050 }, { "epoch": 0.35545157167305735, "grad_norm": 2.5495039399756023, "learning_rate": 7.471585707522795e-06, "loss": 0.6133, "step": 2051 }, { "epoch": 0.3556248781438877, "grad_norm": 2.0354300809917, "learning_rate": 7.4691452459693105e-06, "loss": 0.5613, "step": 2052 }, { "epoch": 0.35579818461471807, "grad_norm": 2.278248603013341, "learning_rate": 7.466704006216305e-06, "loss": 0.4842, "step": 2053 }, { "epoch": 0.35597149108554843, "grad_norm": 2.6051904465432023, "learning_rate": 7.464261989033182e-06, "loss": 0.5485, "step": 2054 }, { "epoch": 0.35614479755637873, "grad_norm": 2.1977348647611454, "learning_rate": 7.461819195189588e-06, "loss": 0.5897, "step": 2055 }, { "epoch": 0.3563181040272091, "grad_norm": 2.7836301140487887, "learning_rate": 7.459375625455421e-06, "loss": 0.5627, "step": 2056 }, { "epoch": 0.35649141049803945, "grad_norm": 1.9865345761974913, "learning_rate": 7.456931280600819e-06, "loss": 0.5181, "step": 2057 }, { "epoch": 0.3566647169688698, "grad_norm": 2.7171512877083197, "learning_rate": 7.454486161396164e-06, "loss": 0.5462, "step": 2058 }, { "epoch": 0.35683802343970017, "grad_norm": 3.3393542465195982, "learning_rate": 7.452040268612084e-06, "loss": 0.5592, "step": 2059 }, { "epoch": 0.35701132991053053, "grad_norm": 1.9319836499138217, "learning_rate": 7.449593603019451e-06, "loss": 0.6025, "step": 2060 }, { "epoch": 0.3571846363813609, "grad_norm": 2.131931213111492, "learning_rate": 7.447146165389378e-06, "loss": 0.5961, "step": 2061 }, { "epoch": 0.35735794285219125, "grad_norm": 2.2837035255460534, "learning_rate": 7.444697956493225e-06, "loss": 0.5696, "step": 2062 }, { "epoch": 0.3575312493230216, "grad_norm": 2.963700189895998, "learning_rate": 7.44224897710259e-06, "loss": 0.5747, "step": 2063 }, { "epoch": 0.35770455579385196, "grad_norm": 2.4378996538014506, "learning_rate": 7.439799227989319e-06, "loss": 0.5858, "step": 2064 }, { "epoch": 0.3578778622646823, "grad_norm": 2.1728253867033747, "learning_rate": 7.437348709925498e-06, "loss": 0.6228, "step": 2065 }, { "epoch": 0.3580511687355127, "grad_norm": 3.3127738505192204, "learning_rate": 7.434897423683456e-06, "loss": 0.5452, "step": 2066 }, { "epoch": 0.35822447520634304, "grad_norm": 2.4533054329600543, "learning_rate": 7.4324453700357645e-06, "loss": 0.4765, "step": 2067 }, { "epoch": 0.35839778167717334, "grad_norm": 1.9343754774678836, "learning_rate": 7.429992549755234e-06, "loss": 0.5226, "step": 2068 }, { "epoch": 0.3585710881480037, "grad_norm": 2.4826953260160454, "learning_rate": 7.427538963614922e-06, "loss": 0.5284, "step": 2069 }, { "epoch": 0.35874439461883406, "grad_norm": 2.440290427047382, "learning_rate": 7.425084612388122e-06, "loss": 0.5207, "step": 2070 }, { "epoch": 0.3589177010896644, "grad_norm": 1.8774848767869443, "learning_rate": 7.422629496848372e-06, "loss": 0.5587, "step": 2071 }, { "epoch": 0.3590910075604948, "grad_norm": 2.6392668054193336, "learning_rate": 7.420173617769448e-06, "loss": 0.5709, "step": 2072 }, { "epoch": 0.35926431403132514, "grad_norm": 2.351551098881707, "learning_rate": 7.417716975925371e-06, "loss": 0.449, "step": 2073 }, { "epoch": 0.3594376205021555, "grad_norm": 2.651086229642405, "learning_rate": 7.415259572090397e-06, "loss": 0.6634, "step": 2074 }, { "epoch": 0.35961092697298586, "grad_norm": 2.534407838876079, "learning_rate": 7.41280140703903e-06, "loss": 0.5216, "step": 2075 }, { "epoch": 0.3597842334438162, "grad_norm": 2.8375393991134725, "learning_rate": 7.410342481546002e-06, "loss": 0.6461, "step": 2076 }, { "epoch": 0.3599575399146466, "grad_norm": 1.908327420314976, "learning_rate": 7.407882796386297e-06, "loss": 0.4795, "step": 2077 }, { "epoch": 0.36013084638547693, "grad_norm": 2.1148083534031565, "learning_rate": 7.4054223523351295e-06, "loss": 0.5844, "step": 2078 }, { "epoch": 0.3603041528563073, "grad_norm": 2.1366054576367333, "learning_rate": 7.40296115016796e-06, "loss": 0.6171, "step": 2079 }, { "epoch": 0.36047745932713765, "grad_norm": 1.9817550140031115, "learning_rate": 7.400499190660482e-06, "loss": 0.5291, "step": 2080 }, { "epoch": 0.360650765797968, "grad_norm": 2.10935133524751, "learning_rate": 7.39803647458863e-06, "loss": 0.5419, "step": 2081 }, { "epoch": 0.3608240722687983, "grad_norm": 1.8794545796260829, "learning_rate": 7.39557300272858e-06, "loss": 0.5744, "step": 2082 }, { "epoch": 0.36099737873962867, "grad_norm": 2.094852182809376, "learning_rate": 7.393108775856741e-06, "loss": 0.5251, "step": 2083 }, { "epoch": 0.36117068521045903, "grad_norm": 2.8262565432902944, "learning_rate": 7.390643794749763e-06, "loss": 0.5446, "step": 2084 }, { "epoch": 0.3613439916812894, "grad_norm": 2.149382633523573, "learning_rate": 7.388178060184536e-06, "loss": 0.5654, "step": 2085 }, { "epoch": 0.36151729815211975, "grad_norm": 2.440456826595886, "learning_rate": 7.385711572938178e-06, "loss": 0.5607, "step": 2086 }, { "epoch": 0.3616906046229501, "grad_norm": 2.414542233910546, "learning_rate": 7.383244333788055e-06, "loss": 0.5066, "step": 2087 }, { "epoch": 0.36186391109378047, "grad_norm": 2.1069752329319713, "learning_rate": 7.380776343511769e-06, "loss": 0.4854, "step": 2088 }, { "epoch": 0.3620372175646108, "grad_norm": 2.4465821779857797, "learning_rate": 7.378307602887149e-06, "loss": 0.5559, "step": 2089 }, { "epoch": 0.3622105240354412, "grad_norm": 2.0932487926959746, "learning_rate": 7.375838112692271e-06, "loss": 0.5705, "step": 2090 }, { "epoch": 0.36238383050627154, "grad_norm": 2.4473645459692737, "learning_rate": 7.373367873705442e-06, "loss": 0.5073, "step": 2091 }, { "epoch": 0.3625571369771019, "grad_norm": 2.855829697716637, "learning_rate": 7.3708968867052065e-06, "loss": 0.5774, "step": 2092 }, { "epoch": 0.36273044344793226, "grad_norm": 2.1661767645992915, "learning_rate": 7.368425152470344e-06, "loss": 0.5345, "step": 2093 }, { "epoch": 0.3629037499187626, "grad_norm": 2.232552335285799, "learning_rate": 7.36595267177987e-06, "loss": 0.5069, "step": 2094 }, { "epoch": 0.3630770563895929, "grad_norm": 2.055520298397109, "learning_rate": 7.363479445413038e-06, "loss": 0.5876, "step": 2095 }, { "epoch": 0.3632503628604233, "grad_norm": 1.9643043446155406, "learning_rate": 7.36100547414933e-06, "loss": 0.5577, "step": 2096 }, { "epoch": 0.36342366933125364, "grad_norm": 2.711029572748052, "learning_rate": 7.358530758768467e-06, "loss": 0.6157, "step": 2097 }, { "epoch": 0.363596975802084, "grad_norm": 2.3169879686145416, "learning_rate": 7.356055300050407e-06, "loss": 0.614, "step": 2098 }, { "epoch": 0.36377028227291436, "grad_norm": 2.236014030651414, "learning_rate": 7.353579098775336e-06, "loss": 0.3956, "step": 2099 }, { "epoch": 0.3639435887437447, "grad_norm": 2.5982949417546974, "learning_rate": 7.35110215572368e-06, "loss": 0.6397, "step": 2100 }, { "epoch": 0.3641168952145751, "grad_norm": 2.077027068604294, "learning_rate": 7.348624471676094e-06, "loss": 0.4835, "step": 2101 }, { "epoch": 0.36429020168540543, "grad_norm": 2.257447995466957, "learning_rate": 7.3461460474134695e-06, "loss": 0.567, "step": 2102 }, { "epoch": 0.3644635081562358, "grad_norm": 2.272059135613765, "learning_rate": 7.343666883716931e-06, "loss": 0.5109, "step": 2103 }, { "epoch": 0.36463681462706615, "grad_norm": 2.1429980155759356, "learning_rate": 7.341186981367835e-06, "loss": 0.5901, "step": 2104 }, { "epoch": 0.3648101210978965, "grad_norm": 2.10978359386776, "learning_rate": 7.338706341147772e-06, "loss": 0.5534, "step": 2105 }, { "epoch": 0.36498342756872687, "grad_norm": 2.252751279565658, "learning_rate": 7.336224963838563e-06, "loss": 0.5511, "step": 2106 }, { "epoch": 0.36515673403955723, "grad_norm": 2.1537550258425866, "learning_rate": 7.333742850222264e-06, "loss": 0.5327, "step": 2107 }, { "epoch": 0.36533004051038753, "grad_norm": 1.9355296355524063, "learning_rate": 7.3312600010811615e-06, "loss": 0.5406, "step": 2108 }, { "epoch": 0.3655033469812179, "grad_norm": 2.0933202433954197, "learning_rate": 7.328776417197774e-06, "loss": 0.4142, "step": 2109 }, { "epoch": 0.36567665345204825, "grad_norm": 2.517100221786113, "learning_rate": 7.32629209935485e-06, "loss": 0.5656, "step": 2110 }, { "epoch": 0.3658499599228786, "grad_norm": 1.8403749717729359, "learning_rate": 7.323807048335375e-06, "loss": 0.5067, "step": 2111 }, { "epoch": 0.36602326639370897, "grad_norm": 2.0807944698287746, "learning_rate": 7.321321264922558e-06, "loss": 0.4335, "step": 2112 }, { "epoch": 0.3661965728645393, "grad_norm": 1.9534677232720337, "learning_rate": 7.318834749899843e-06, "loss": 0.5882, "step": 2113 }, { "epoch": 0.3663698793353697, "grad_norm": 2.083368883182167, "learning_rate": 7.316347504050903e-06, "loss": 0.5746, "step": 2114 }, { "epoch": 0.36654318580620004, "grad_norm": 1.853927489388918, "learning_rate": 7.3138595281596455e-06, "loss": 0.5518, "step": 2115 }, { "epoch": 0.3667164922770304, "grad_norm": 3.668588592289911, "learning_rate": 7.311370823010204e-06, "loss": 0.59, "step": 2116 }, { "epoch": 0.36688979874786076, "grad_norm": 6.834049677353939, "learning_rate": 7.3088813893869405e-06, "loss": 0.5571, "step": 2117 }, { "epoch": 0.3670631052186911, "grad_norm": 2.068051519994278, "learning_rate": 7.30639122807445e-06, "loss": 0.44, "step": 2118 }, { "epoch": 0.3672364116895215, "grad_norm": 2.3138201250613495, "learning_rate": 7.303900339857555e-06, "loss": 0.54, "step": 2119 }, { "epoch": 0.36740971816035184, "grad_norm": 2.731302183347752, "learning_rate": 7.301408725521308e-06, "loss": 0.5045, "step": 2120 }, { "epoch": 0.36758302463118214, "grad_norm": 4.667636641674144, "learning_rate": 7.298916385850993e-06, "loss": 0.4748, "step": 2121 }, { "epoch": 0.3677563311020125, "grad_norm": 2.0487066520837773, "learning_rate": 7.296423321632116e-06, "loss": 0.6294, "step": 2122 }, { "epoch": 0.36792963757284286, "grad_norm": 1.9114346769875747, "learning_rate": 7.293929533650415e-06, "loss": 0.5561, "step": 2123 }, { "epoch": 0.3681029440436732, "grad_norm": 2.2109605746156187, "learning_rate": 7.291435022691859e-06, "loss": 0.5584, "step": 2124 }, { "epoch": 0.3682762505145036, "grad_norm": 2.7112945570471596, "learning_rate": 7.28893978954264e-06, "loss": 0.611, "step": 2125 }, { "epoch": 0.36844955698533394, "grad_norm": 2.0663304956250608, "learning_rate": 7.286443834989178e-06, "loss": 0.4993, "step": 2126 }, { "epoch": 0.3686228634561643, "grad_norm": 2.1983558970610604, "learning_rate": 7.2839471598181274e-06, "loss": 0.5068, "step": 2127 }, { "epoch": 0.36879616992699465, "grad_norm": 3.2751793617186253, "learning_rate": 7.281449764816359e-06, "loss": 0.6554, "step": 2128 }, { "epoch": 0.368969476397825, "grad_norm": 3.3708694802670087, "learning_rate": 7.278951650770979e-06, "loss": 0.5006, "step": 2129 }, { "epoch": 0.3691427828686554, "grad_norm": 2.048279520745875, "learning_rate": 7.276452818469315e-06, "loss": 0.5827, "step": 2130 }, { "epoch": 0.36931608933948573, "grad_norm": 2.63007846542726, "learning_rate": 7.273953268698924e-06, "loss": 0.6046, "step": 2131 }, { "epoch": 0.3694893958103161, "grad_norm": 2.3147705661224482, "learning_rate": 7.271453002247588e-06, "loss": 0.5013, "step": 2132 }, { "epoch": 0.36966270228114645, "grad_norm": 2.231360803416665, "learning_rate": 7.268952019903316e-06, "loss": 0.5331, "step": 2133 }, { "epoch": 0.36983600875197675, "grad_norm": 2.5303520390571372, "learning_rate": 7.266450322454338e-06, "loss": 0.6511, "step": 2134 }, { "epoch": 0.3700093152228071, "grad_norm": 2.4642578797083123, "learning_rate": 7.263947910689117e-06, "loss": 0.6202, "step": 2135 }, { "epoch": 0.37018262169363747, "grad_norm": 4.139160972623083, "learning_rate": 7.261444785396334e-06, "loss": 0.5075, "step": 2136 }, { "epoch": 0.37035592816446783, "grad_norm": 2.201563518044584, "learning_rate": 7.258940947364903e-06, "loss": 0.5627, "step": 2137 }, { "epoch": 0.3705292346352982, "grad_norm": 2.4259321579621016, "learning_rate": 7.256436397383951e-06, "loss": 0.5533, "step": 2138 }, { "epoch": 0.37070254110612855, "grad_norm": 2.492977991032316, "learning_rate": 7.253931136242841e-06, "loss": 0.5427, "step": 2139 }, { "epoch": 0.3708758475769589, "grad_norm": 2.1004873728616853, "learning_rate": 7.251425164731153e-06, "loss": 0.5038, "step": 2140 }, { "epoch": 0.37104915404778926, "grad_norm": 2.301955044984064, "learning_rate": 7.248918483638692e-06, "loss": 0.5664, "step": 2141 }, { "epoch": 0.3712224605186196, "grad_norm": 2.196029455959038, "learning_rate": 7.246411093755491e-06, "loss": 0.5873, "step": 2142 }, { "epoch": 0.37139576698945, "grad_norm": 1.910979995947923, "learning_rate": 7.243902995871801e-06, "loss": 0.5327, "step": 2143 }, { "epoch": 0.37156907346028034, "grad_norm": 2.9599112035972643, "learning_rate": 7.241394190778097e-06, "loss": 0.5362, "step": 2144 }, { "epoch": 0.3717423799311107, "grad_norm": 3.0635120219073992, "learning_rate": 7.238884679265081e-06, "loss": 0.504, "step": 2145 }, { "epoch": 0.37191568640194106, "grad_norm": 2.2098895599659216, "learning_rate": 7.236374462123672e-06, "loss": 0.5916, "step": 2146 }, { "epoch": 0.37208899287277136, "grad_norm": 2.203013285831752, "learning_rate": 7.233863540145016e-06, "loss": 0.5769, "step": 2147 }, { "epoch": 0.3722622993436017, "grad_norm": 2.1322658651612176, "learning_rate": 7.231351914120479e-06, "loss": 0.5764, "step": 2148 }, { "epoch": 0.3724356058144321, "grad_norm": 2.8479287189623252, "learning_rate": 7.228839584841649e-06, "loss": 0.517, "step": 2149 }, { "epoch": 0.37260891228526244, "grad_norm": 2.28500573128692, "learning_rate": 7.226326553100334e-06, "loss": 0.4995, "step": 2150 }, { "epoch": 0.3727822187560928, "grad_norm": 2.237094765507329, "learning_rate": 7.223812819688569e-06, "loss": 0.6197, "step": 2151 }, { "epoch": 0.37295552522692316, "grad_norm": 2.1698928872304935, "learning_rate": 7.221298385398604e-06, "loss": 0.5353, "step": 2152 }, { "epoch": 0.3731288316977535, "grad_norm": 2.458156820418099, "learning_rate": 7.218783251022912e-06, "loss": 0.6696, "step": 2153 }, { "epoch": 0.3733021381685839, "grad_norm": 2.049439439032252, "learning_rate": 7.216267417354186e-06, "loss": 0.5733, "step": 2154 }, { "epoch": 0.37347544463941423, "grad_norm": 2.609751603834005, "learning_rate": 7.2137508851853426e-06, "loss": 0.509, "step": 2155 }, { "epoch": 0.3736487511102446, "grad_norm": 3.2088026615385097, "learning_rate": 7.2112336553095155e-06, "loss": 0.5232, "step": 2156 }, { "epoch": 0.37382205758107495, "grad_norm": 2.156375039064109, "learning_rate": 7.208715728520059e-06, "loss": 0.5757, "step": 2157 }, { "epoch": 0.3739953640519053, "grad_norm": 2.257229526345053, "learning_rate": 7.206197105610547e-06, "loss": 0.6637, "step": 2158 }, { "epoch": 0.37416867052273567, "grad_norm": 2.2899408751579107, "learning_rate": 7.2036777873747724e-06, "loss": 0.532, "step": 2159 }, { "epoch": 0.374341976993566, "grad_norm": 2.2038735811784207, "learning_rate": 7.2011577746067484e-06, "loss": 0.5083, "step": 2160 }, { "epoch": 0.37451528346439633, "grad_norm": 1.9784787567568205, "learning_rate": 7.198637068100705e-06, "loss": 0.5617, "step": 2161 }, { "epoch": 0.3746885899352267, "grad_norm": 2.0388429315734418, "learning_rate": 7.196115668651095e-06, "loss": 0.6827, "step": 2162 }, { "epoch": 0.37486189640605705, "grad_norm": 3.4251907465331013, "learning_rate": 7.193593577052585e-06, "loss": 0.5677, "step": 2163 }, { "epoch": 0.3750352028768874, "grad_norm": 1.9762594132748819, "learning_rate": 7.191070794100061e-06, "loss": 0.5266, "step": 2164 }, { "epoch": 0.37520850934771777, "grad_norm": 2.246549800262023, "learning_rate": 7.188547320588629e-06, "loss": 0.5443, "step": 2165 }, { "epoch": 0.3753818158185481, "grad_norm": 2.1802956415337036, "learning_rate": 7.186023157313611e-06, "loss": 0.4745, "step": 2166 }, { "epoch": 0.3755551222893785, "grad_norm": 1.9041509511970556, "learning_rate": 7.183498305070544e-06, "loss": 0.4687, "step": 2167 }, { "epoch": 0.37572842876020884, "grad_norm": 2.192520159795114, "learning_rate": 7.180972764655189e-06, "loss": 0.5603, "step": 2168 }, { "epoch": 0.3759017352310392, "grad_norm": 2.3486715456006784, "learning_rate": 7.178446536863516e-06, "loss": 0.5575, "step": 2169 }, { "epoch": 0.37607504170186956, "grad_norm": 2.0014002207376524, "learning_rate": 7.1759196224917175e-06, "loss": 0.4548, "step": 2170 }, { "epoch": 0.3762483481726999, "grad_norm": 2.9168409437359317, "learning_rate": 7.173392022336197e-06, "loss": 0.5616, "step": 2171 }, { "epoch": 0.3764216546435303, "grad_norm": 2.4054386164424906, "learning_rate": 7.170863737193582e-06, "loss": 0.6132, "step": 2172 }, { "epoch": 0.3765949611143606, "grad_norm": 2.389567233053669, "learning_rate": 7.1683347678607075e-06, "loss": 0.5424, "step": 2173 }, { "epoch": 0.37676826758519094, "grad_norm": 2.200617154585987, "learning_rate": 7.1658051151346275e-06, "loss": 0.5356, "step": 2174 }, { "epoch": 0.3769415740560213, "grad_norm": 2.4479710576556184, "learning_rate": 7.1632747798126135e-06, "loss": 0.642, "step": 2175 }, { "epoch": 0.37711488052685166, "grad_norm": 2.017192709707373, "learning_rate": 7.1607437626921505e-06, "loss": 0.6099, "step": 2176 }, { "epoch": 0.377288186997682, "grad_norm": 2.055292933441871, "learning_rate": 7.158212064570937e-06, "loss": 0.5042, "step": 2177 }, { "epoch": 0.3774614934685124, "grad_norm": 3.0274877841468557, "learning_rate": 7.1556796862468865e-06, "loss": 0.5055, "step": 2178 }, { "epoch": 0.37763479993934274, "grad_norm": 1.9839333736928693, "learning_rate": 7.15314662851813e-06, "loss": 0.5855, "step": 2179 }, { "epoch": 0.3778081064101731, "grad_norm": 2.6520670408614375, "learning_rate": 7.150612892183009e-06, "loss": 0.5979, "step": 2180 }, { "epoch": 0.37798141288100345, "grad_norm": 2.2880968417312206, "learning_rate": 7.148078478040079e-06, "loss": 0.6146, "step": 2181 }, { "epoch": 0.3781547193518338, "grad_norm": 2.0417164062222066, "learning_rate": 7.145543386888111e-06, "loss": 0.5389, "step": 2182 }, { "epoch": 0.37832802582266417, "grad_norm": 2.5220692403732783, "learning_rate": 7.143007619526091e-06, "loss": 0.5657, "step": 2183 }, { "epoch": 0.37850133229349453, "grad_norm": 2.3238037556216002, "learning_rate": 7.140471176753213e-06, "loss": 0.5376, "step": 2184 }, { "epoch": 0.3786746387643249, "grad_norm": 2.233631075681495, "learning_rate": 7.137934059368887e-06, "loss": 0.6124, "step": 2185 }, { "epoch": 0.3788479452351552, "grad_norm": 2.439942817432782, "learning_rate": 7.135396268172737e-06, "loss": 0.5992, "step": 2186 }, { "epoch": 0.37902125170598555, "grad_norm": 2.070581913317371, "learning_rate": 7.132857803964597e-06, "loss": 0.5631, "step": 2187 }, { "epoch": 0.3791945581768159, "grad_norm": 2.3878672792494284, "learning_rate": 7.130318667544511e-06, "loss": 0.516, "step": 2188 }, { "epoch": 0.37936786464764627, "grad_norm": 3.2901071351554347, "learning_rate": 7.127778859712742e-06, "loss": 0.5838, "step": 2189 }, { "epoch": 0.37954117111847663, "grad_norm": 1.82800321577572, "learning_rate": 7.1252383812697565e-06, "loss": 0.5715, "step": 2190 }, { "epoch": 0.379714477589307, "grad_norm": 2.0694863671278463, "learning_rate": 7.122697233016239e-06, "loss": 0.5816, "step": 2191 }, { "epoch": 0.37988778406013735, "grad_norm": 2.1529769996172776, "learning_rate": 7.120155415753081e-06, "loss": 0.5678, "step": 2192 }, { "epoch": 0.3800610905309677, "grad_norm": 1.9698194023260474, "learning_rate": 7.1176129302813856e-06, "loss": 0.6143, "step": 2193 }, { "epoch": 0.38023439700179806, "grad_norm": 2.2735018351330614, "learning_rate": 7.115069777402467e-06, "loss": 0.503, "step": 2194 }, { "epoch": 0.3804077034726284, "grad_norm": 2.3707199227482483, "learning_rate": 7.112525957917848e-06, "loss": 0.549, "step": 2195 }, { "epoch": 0.3805810099434588, "grad_norm": 2.494648100411072, "learning_rate": 7.109981472629268e-06, "loss": 0.5799, "step": 2196 }, { "epoch": 0.38075431641428914, "grad_norm": 5.3852074900154285, "learning_rate": 7.107436322338667e-06, "loss": 0.4404, "step": 2197 }, { "epoch": 0.3809276228851195, "grad_norm": 2.115213476505182, "learning_rate": 7.104890507848201e-06, "loss": 0.5743, "step": 2198 }, { "epoch": 0.3811009293559498, "grad_norm": 2.8144445285512636, "learning_rate": 7.102344029960233e-06, "loss": 0.5537, "step": 2199 }, { "epoch": 0.38127423582678016, "grad_norm": 2.5973743243123737, "learning_rate": 7.099796889477335e-06, "loss": 0.492, "step": 2200 }, { "epoch": 0.3814475422976105, "grad_norm": 2.1648844917768817, "learning_rate": 7.097249087202288e-06, "loss": 0.5276, "step": 2201 }, { "epoch": 0.3816208487684409, "grad_norm": 2.14916691882836, "learning_rate": 7.0947006239380825e-06, "loss": 0.5693, "step": 2202 }, { "epoch": 0.38179415523927124, "grad_norm": 2.241548733629466, "learning_rate": 7.092151500487916e-06, "loss": 0.509, "step": 2203 }, { "epoch": 0.3819674617101016, "grad_norm": 2.5338447415707397, "learning_rate": 7.089601717655197e-06, "loss": 0.535, "step": 2204 }, { "epoch": 0.38214076818093196, "grad_norm": 2.592103190867506, "learning_rate": 7.087051276243537e-06, "loss": 0.5991, "step": 2205 }, { "epoch": 0.3823140746517623, "grad_norm": 1.92737584328343, "learning_rate": 7.084500177056758e-06, "loss": 0.571, "step": 2206 }, { "epoch": 0.3824873811225927, "grad_norm": 2.3273503656900676, "learning_rate": 7.081948420898889e-06, "loss": 0.5463, "step": 2207 }, { "epoch": 0.38266068759342303, "grad_norm": 2.9588498562321206, "learning_rate": 7.079396008574167e-06, "loss": 0.4919, "step": 2208 }, { "epoch": 0.3828339940642534, "grad_norm": 3.8535354259354224, "learning_rate": 7.0768429408870325e-06, "loss": 0.5773, "step": 2209 }, { "epoch": 0.38300730053508375, "grad_norm": 2.1423742830838566, "learning_rate": 7.074289218642137e-06, "loss": 0.5712, "step": 2210 }, { "epoch": 0.3831806070059141, "grad_norm": 2.3681983524652157, "learning_rate": 7.071734842644336e-06, "loss": 0.6067, "step": 2211 }, { "epoch": 0.3833539134767444, "grad_norm": 2.62079466157652, "learning_rate": 7.06917981369869e-06, "loss": 0.5493, "step": 2212 }, { "epoch": 0.38352721994757477, "grad_norm": 2.0819486275658967, "learning_rate": 7.066624132610467e-06, "loss": 0.4967, "step": 2213 }, { "epoch": 0.38370052641840513, "grad_norm": 3.875064295011206, "learning_rate": 7.064067800185142e-06, "loss": 0.6604, "step": 2214 }, { "epoch": 0.3838738328892355, "grad_norm": 3.78402966890712, "learning_rate": 7.061510817228389e-06, "loss": 0.5385, "step": 2215 }, { "epoch": 0.38404713936006585, "grad_norm": 2.4214369694696884, "learning_rate": 7.058953184546093e-06, "loss": 0.5788, "step": 2216 }, { "epoch": 0.3842204458308962, "grad_norm": 2.0648628104179654, "learning_rate": 7.056394902944345e-06, "loss": 0.612, "step": 2217 }, { "epoch": 0.38439375230172657, "grad_norm": 2.1967865034651006, "learning_rate": 7.053835973229433e-06, "loss": 0.4935, "step": 2218 }, { "epoch": 0.3845670587725569, "grad_norm": 2.426660953133181, "learning_rate": 7.05127639620786e-06, "loss": 0.5864, "step": 2219 }, { "epoch": 0.3847403652433873, "grad_norm": 1.8389691237036032, "learning_rate": 7.048716172686321e-06, "loss": 0.5493, "step": 2220 }, { "epoch": 0.38491367171421764, "grad_norm": 2.733017007856722, "learning_rate": 7.046155303471721e-06, "loss": 0.4516, "step": 2221 }, { "epoch": 0.385086978185048, "grad_norm": 2.405135730764305, "learning_rate": 7.043593789371171e-06, "loss": 0.6109, "step": 2222 }, { "epoch": 0.38526028465587836, "grad_norm": 2.2924278665078153, "learning_rate": 7.04103163119198e-06, "loss": 0.5594, "step": 2223 }, { "epoch": 0.3854335911267087, "grad_norm": 2.114307330417793, "learning_rate": 7.038468829741666e-06, "loss": 0.5556, "step": 2224 }, { "epoch": 0.385606897597539, "grad_norm": 2.2814051056631968, "learning_rate": 7.035905385827941e-06, "loss": 0.58, "step": 2225 }, { "epoch": 0.3857802040683694, "grad_norm": 2.05904379336315, "learning_rate": 7.033341300258729e-06, "loss": 0.5673, "step": 2226 }, { "epoch": 0.38595351053919974, "grad_norm": 2.3081550088082685, "learning_rate": 7.03077657384215e-06, "loss": 0.6322, "step": 2227 }, { "epoch": 0.3861268170100301, "grad_norm": 2.3366830684309057, "learning_rate": 7.028211207386527e-06, "loss": 0.5015, "step": 2228 }, { "epoch": 0.38630012348086046, "grad_norm": 2.5376897983671283, "learning_rate": 7.025645201700386e-06, "loss": 0.6142, "step": 2229 }, { "epoch": 0.3864734299516908, "grad_norm": 2.317309664005385, "learning_rate": 7.023078557592455e-06, "loss": 0.625, "step": 2230 }, { "epoch": 0.3866467364225212, "grad_norm": 2.0877392548395535, "learning_rate": 7.0205112758716624e-06, "loss": 0.6105, "step": 2231 }, { "epoch": 0.38682004289335153, "grad_norm": 2.8970405334403706, "learning_rate": 7.017943357347136e-06, "loss": 0.5898, "step": 2232 }, { "epoch": 0.3869933493641819, "grad_norm": 2.0487489903021037, "learning_rate": 7.015374802828206e-06, "loss": 0.5342, "step": 2233 }, { "epoch": 0.38716665583501225, "grad_norm": 2.7012108807064097, "learning_rate": 7.012805613124404e-06, "loss": 0.5899, "step": 2234 }, { "epoch": 0.3873399623058426, "grad_norm": 2.0860885646726053, "learning_rate": 7.010235789045456e-06, "loss": 0.518, "step": 2235 }, { "epoch": 0.38751326877667297, "grad_norm": 3.02434801065093, "learning_rate": 7.007665331401299e-06, "loss": 0.5477, "step": 2236 }, { "epoch": 0.38768657524750333, "grad_norm": 2.442275518024635, "learning_rate": 7.00509424100206e-06, "loss": 0.6432, "step": 2237 }, { "epoch": 0.38785988171833363, "grad_norm": 2.2830090136636314, "learning_rate": 7.002522518658066e-06, "loss": 0.5592, "step": 2238 }, { "epoch": 0.388033188189164, "grad_norm": 1.8818724916826302, "learning_rate": 6.99995016517985e-06, "loss": 0.5079, "step": 2239 }, { "epoch": 0.38820649465999435, "grad_norm": 2.1334484794856774, "learning_rate": 6.997377181378138e-06, "loss": 0.5642, "step": 2240 }, { "epoch": 0.3883798011308247, "grad_norm": 2.129589565859886, "learning_rate": 6.994803568063856e-06, "loss": 0.6163, "step": 2241 }, { "epoch": 0.38855310760165507, "grad_norm": 2.377872285666787, "learning_rate": 6.992229326048128e-06, "loss": 0.6295, "step": 2242 }, { "epoch": 0.3887264140724854, "grad_norm": 2.29016648507204, "learning_rate": 6.989654456142276e-06, "loss": 0.6496, "step": 2243 }, { "epoch": 0.3888997205433158, "grad_norm": 1.8668937617776953, "learning_rate": 6.987078959157825e-06, "loss": 0.5498, "step": 2244 }, { "epoch": 0.38907302701414614, "grad_norm": 2.648139420157059, "learning_rate": 6.9845028359064904e-06, "loss": 0.5932, "step": 2245 }, { "epoch": 0.3892463334849765, "grad_norm": 2.594769723409487, "learning_rate": 6.981926087200188e-06, "loss": 0.6091, "step": 2246 }, { "epoch": 0.38941963995580686, "grad_norm": 2.6678784844464536, "learning_rate": 6.97934871385103e-06, "loss": 0.5179, "step": 2247 }, { "epoch": 0.3895929464266372, "grad_norm": 3.1449415343575065, "learning_rate": 6.976770716671327e-06, "loss": 0.5696, "step": 2248 }, { "epoch": 0.3897662528974676, "grad_norm": 2.303487540758642, "learning_rate": 6.9741920964735865e-06, "loss": 0.5727, "step": 2249 }, { "epoch": 0.38993955936829794, "grad_norm": 2.270003259956452, "learning_rate": 6.9716128540705084e-06, "loss": 0.6601, "step": 2250 }, { "epoch": 0.39011286583912824, "grad_norm": 2.329718960150853, "learning_rate": 6.9690329902749945e-06, "loss": 0.5063, "step": 2251 }, { "epoch": 0.3902861723099586, "grad_norm": 2.3689576864226156, "learning_rate": 6.966452505900138e-06, "loss": 0.6535, "step": 2252 }, { "epoch": 0.39045947878078896, "grad_norm": 2.7389232717705876, "learning_rate": 6.963871401759228e-06, "loss": 0.4497, "step": 2253 }, { "epoch": 0.3906327852516193, "grad_norm": 2.321727667860501, "learning_rate": 6.9612896786657524e-06, "loss": 0.5746, "step": 2254 }, { "epoch": 0.3908060917224497, "grad_norm": 3.9401936873802543, "learning_rate": 6.95870733743339e-06, "loss": 0.5427, "step": 2255 }, { "epoch": 0.39097939819328004, "grad_norm": 3.0646997584931923, "learning_rate": 6.956124378876018e-06, "loss": 0.5729, "step": 2256 }, { "epoch": 0.3911527046641104, "grad_norm": 2.2434114904179423, "learning_rate": 6.953540803807705e-06, "loss": 0.6081, "step": 2257 }, { "epoch": 0.39132601113494075, "grad_norm": 2.763778733040821, "learning_rate": 6.950956613042714e-06, "loss": 0.5901, "step": 2258 }, { "epoch": 0.3914993176057711, "grad_norm": 2.053965707731222, "learning_rate": 6.948371807395508e-06, "loss": 0.5699, "step": 2259 }, { "epoch": 0.39167262407660147, "grad_norm": 2.3501706022398827, "learning_rate": 6.945786387680735e-06, "loss": 0.5169, "step": 2260 }, { "epoch": 0.39184593054743183, "grad_norm": 2.1628748392860357, "learning_rate": 6.943200354713242e-06, "loss": 0.5582, "step": 2261 }, { "epoch": 0.3920192370182622, "grad_norm": 2.494063626336832, "learning_rate": 6.940613709308067e-06, "loss": 0.6045, "step": 2262 }, { "epoch": 0.39219254348909255, "grad_norm": 2.313103378780684, "learning_rate": 6.9380264522804434e-06, "loss": 0.6097, "step": 2263 }, { "epoch": 0.39236584995992285, "grad_norm": 2.2324265988716423, "learning_rate": 6.935438584445795e-06, "loss": 0.5547, "step": 2264 }, { "epoch": 0.3925391564307532, "grad_norm": 2.0354391696841465, "learning_rate": 6.93285010661974e-06, "loss": 0.5327, "step": 2265 }, { "epoch": 0.39271246290158357, "grad_norm": 2.2825160125090997, "learning_rate": 6.930261019618089e-06, "loss": 0.5425, "step": 2266 }, { "epoch": 0.39288576937241393, "grad_norm": 2.2062894334820364, "learning_rate": 6.92767132425684e-06, "loss": 0.5472, "step": 2267 }, { "epoch": 0.3930590758432443, "grad_norm": 2.3079018195289196, "learning_rate": 6.925081021352189e-06, "loss": 0.6234, "step": 2268 }, { "epoch": 0.39323238231407465, "grad_norm": 2.6085504439694507, "learning_rate": 6.92249011172052e-06, "loss": 0.6723, "step": 2269 }, { "epoch": 0.393405688784905, "grad_norm": 2.7327463925320443, "learning_rate": 6.91989859617841e-06, "loss": 0.5177, "step": 2270 }, { "epoch": 0.39357899525573536, "grad_norm": 2.4664197236000738, "learning_rate": 6.917306475542623e-06, "loss": 0.5786, "step": 2271 }, { "epoch": 0.3937523017265657, "grad_norm": 2.6719569154034306, "learning_rate": 6.91471375063012e-06, "loss": 0.566, "step": 2272 }, { "epoch": 0.3939256081973961, "grad_norm": 2.5401141177016267, "learning_rate": 6.912120422258046e-06, "loss": 0.5228, "step": 2273 }, { "epoch": 0.39409891466822644, "grad_norm": 2.600852656068878, "learning_rate": 6.909526491243742e-06, "loss": 0.5527, "step": 2274 }, { "epoch": 0.3942722211390568, "grad_norm": 2.3721716282403658, "learning_rate": 6.906931958404734e-06, "loss": 0.6036, "step": 2275 }, { "epoch": 0.39444552760988716, "grad_norm": 2.184750400580922, "learning_rate": 6.904336824558742e-06, "loss": 0.5813, "step": 2276 }, { "epoch": 0.39461883408071746, "grad_norm": 2.8142623395377613, "learning_rate": 6.901741090523672e-06, "loss": 0.5148, "step": 2277 }, { "epoch": 0.3947921405515478, "grad_norm": 2.3562688930599536, "learning_rate": 6.899144757117621e-06, "loss": 0.5113, "step": 2278 }, { "epoch": 0.3949654470223782, "grad_norm": 2.2147615530033997, "learning_rate": 6.896547825158874e-06, "loss": 0.5664, "step": 2279 }, { "epoch": 0.39513875349320854, "grad_norm": 3.2412822401972314, "learning_rate": 6.893950295465905e-06, "loss": 0.6015, "step": 2280 }, { "epoch": 0.3953120599640389, "grad_norm": 2.224073574055553, "learning_rate": 6.891352168857376e-06, "loss": 0.5375, "step": 2281 }, { "epoch": 0.39548536643486926, "grad_norm": 5.42882186068053, "learning_rate": 6.888753446152139e-06, "loss": 0.5425, "step": 2282 }, { "epoch": 0.3956586729056996, "grad_norm": 2.219164874997658, "learning_rate": 6.886154128169229e-06, "loss": 0.5226, "step": 2283 }, { "epoch": 0.39583197937653, "grad_norm": 2.536874626163276, "learning_rate": 6.883554215727876e-06, "loss": 0.5717, "step": 2284 }, { "epoch": 0.39600528584736033, "grad_norm": 2.38210645030233, "learning_rate": 6.880953709647491e-06, "loss": 0.5561, "step": 2285 }, { "epoch": 0.3961785923181907, "grad_norm": 2.682457486668995, "learning_rate": 6.8783526107476784e-06, "loss": 0.532, "step": 2286 }, { "epoch": 0.39635189878902105, "grad_norm": 2.5687653520745175, "learning_rate": 6.875750919848219e-06, "loss": 0.5326, "step": 2287 }, { "epoch": 0.3965252052598514, "grad_norm": 2.14886736928438, "learning_rate": 6.873148637769091e-06, "loss": 0.607, "step": 2288 }, { "epoch": 0.39669851173068177, "grad_norm": 2.2504110183975055, "learning_rate": 6.870545765330454e-06, "loss": 0.5562, "step": 2289 }, { "epoch": 0.39687181820151207, "grad_norm": 2.32103961853707, "learning_rate": 6.867942303352653e-06, "loss": 0.5987, "step": 2290 }, { "epoch": 0.39704512467234243, "grad_norm": 2.6244017821651724, "learning_rate": 6.865338252656221e-06, "loss": 0.5232, "step": 2291 }, { "epoch": 0.3972184311431728, "grad_norm": 2.6162969212549627, "learning_rate": 6.862733614061876e-06, "loss": 0.5048, "step": 2292 }, { "epoch": 0.39739173761400315, "grad_norm": 2.4377853759124752, "learning_rate": 6.860128388390521e-06, "loss": 0.4869, "step": 2293 }, { "epoch": 0.3975650440848335, "grad_norm": 4.3080608709711115, "learning_rate": 6.857522576463243e-06, "loss": 0.6595, "step": 2294 }, { "epoch": 0.39773835055566387, "grad_norm": 2.196964231872733, "learning_rate": 6.854916179101315e-06, "loss": 0.517, "step": 2295 }, { "epoch": 0.3979116570264942, "grad_norm": 2.6953087013688326, "learning_rate": 6.852309197126194e-06, "loss": 0.5203, "step": 2296 }, { "epoch": 0.3980849634973246, "grad_norm": 2.4529861532771537, "learning_rate": 6.849701631359522e-06, "loss": 0.5554, "step": 2297 }, { "epoch": 0.39825826996815494, "grad_norm": 6.029478086238093, "learning_rate": 6.847093482623125e-06, "loss": 0.5226, "step": 2298 }, { "epoch": 0.3984315764389853, "grad_norm": 2.2250160954927587, "learning_rate": 6.844484751739011e-06, "loss": 0.5492, "step": 2299 }, { "epoch": 0.39860488290981566, "grad_norm": 2.1996704503892768, "learning_rate": 6.841875439529373e-06, "loss": 0.5647, "step": 2300 }, { "epoch": 0.398778189380646, "grad_norm": 2.3353050663457986, "learning_rate": 6.839265546816589e-06, "loss": 0.681, "step": 2301 }, { "epoch": 0.3989514958514764, "grad_norm": 3.2486051231614574, "learning_rate": 6.8366550744232145e-06, "loss": 0.5694, "step": 2302 }, { "epoch": 0.3991248023223067, "grad_norm": 1.9820129627586915, "learning_rate": 6.83404402317199e-06, "loss": 0.5671, "step": 2303 }, { "epoch": 0.39929810879313704, "grad_norm": 2.3426169510220087, "learning_rate": 6.831432393885844e-06, "loss": 0.5818, "step": 2304 }, { "epoch": 0.3994714152639674, "grad_norm": 2.273119688008019, "learning_rate": 6.828820187387881e-06, "loss": 0.5184, "step": 2305 }, { "epoch": 0.39964472173479776, "grad_norm": 2.4150039112615374, "learning_rate": 6.8262074045013894e-06, "loss": 0.6352, "step": 2306 }, { "epoch": 0.3998180282056281, "grad_norm": 2.2164219037743487, "learning_rate": 6.823594046049838e-06, "loss": 0.5841, "step": 2307 }, { "epoch": 0.3999913346764585, "grad_norm": 2.1996664283426273, "learning_rate": 6.820980112856877e-06, "loss": 0.4552, "step": 2308 }, { "epoch": 0.40016464114728884, "grad_norm": 2.248886448218533, "learning_rate": 6.81836560574634e-06, "loss": 0.5704, "step": 2309 }, { "epoch": 0.4003379476181192, "grad_norm": 2.2911253751042078, "learning_rate": 6.815750525542239e-06, "loss": 0.4866, "step": 2310 }, { "epoch": 0.40051125408894955, "grad_norm": 2.0078337993924475, "learning_rate": 6.813134873068769e-06, "loss": 0.5279, "step": 2311 }, { "epoch": 0.4006845605597799, "grad_norm": 2.2234806128399236, "learning_rate": 6.8105186491503065e-06, "loss": 0.5403, "step": 2312 }, { "epoch": 0.40085786703061027, "grad_norm": 2.2921707769810107, "learning_rate": 6.807901854611401e-06, "loss": 0.5208, "step": 2313 }, { "epoch": 0.40103117350144063, "grad_norm": 2.169658821111769, "learning_rate": 6.805284490276789e-06, "loss": 0.6158, "step": 2314 }, { "epoch": 0.401204479972271, "grad_norm": 3.4069754150990295, "learning_rate": 6.802666556971384e-06, "loss": 0.4715, "step": 2315 }, { "epoch": 0.4013777864431013, "grad_norm": 2.093885140145333, "learning_rate": 6.800048055520279e-06, "loss": 0.6077, "step": 2316 }, { "epoch": 0.40155109291393165, "grad_norm": 2.2714285880544676, "learning_rate": 6.797428986748745e-06, "loss": 0.453, "step": 2317 }, { "epoch": 0.401724399384762, "grad_norm": 2.1284001280884564, "learning_rate": 6.794809351482236e-06, "loss": 0.5772, "step": 2318 }, { "epoch": 0.40189770585559237, "grad_norm": 3.593192555832809, "learning_rate": 6.7921891505463775e-06, "loss": 0.5058, "step": 2319 }, { "epoch": 0.4020710123264227, "grad_norm": 2.6336347834371208, "learning_rate": 6.7895683847669806e-06, "loss": 0.5714, "step": 2320 }, { "epoch": 0.4022443187972531, "grad_norm": 3.386156933276334, "learning_rate": 6.786947054970028e-06, "loss": 0.5724, "step": 2321 }, { "epoch": 0.40241762526808345, "grad_norm": 2.0185630177304787, "learning_rate": 6.7843251619816855e-06, "loss": 0.5882, "step": 2322 }, { "epoch": 0.4025909317389138, "grad_norm": 3.696417398034031, "learning_rate": 6.781702706628293e-06, "loss": 0.5959, "step": 2323 }, { "epoch": 0.40276423820974416, "grad_norm": 2.1477775996255555, "learning_rate": 6.779079689736367e-06, "loss": 0.5211, "step": 2324 }, { "epoch": 0.4029375446805745, "grad_norm": 2.3253174327800155, "learning_rate": 6.776456112132604e-06, "loss": 0.55, "step": 2325 }, { "epoch": 0.4031108511514049, "grad_norm": 3.1309690325757185, "learning_rate": 6.7738319746438784e-06, "loss": 0.4969, "step": 2326 }, { "epoch": 0.40328415762223524, "grad_norm": 3.800168783850893, "learning_rate": 6.771207278097237e-06, "loss": 0.4806, "step": 2327 }, { "epoch": 0.4034574640930656, "grad_norm": 2.342501235794948, "learning_rate": 6.768582023319903e-06, "loss": 0.5614, "step": 2328 }, { "epoch": 0.4036307705638959, "grad_norm": 2.19225915336244, "learning_rate": 6.765956211139278e-06, "loss": 0.5907, "step": 2329 }, { "epoch": 0.40380407703472626, "grad_norm": 2.5367918412060693, "learning_rate": 6.763329842382938e-06, "loss": 0.5083, "step": 2330 }, { "epoch": 0.4039773835055566, "grad_norm": 2.3065082098795973, "learning_rate": 6.760702917878635e-06, "loss": 0.6355, "step": 2331 }, { "epoch": 0.404150689976387, "grad_norm": 3.214341456173494, "learning_rate": 6.758075438454295e-06, "loss": 0.5407, "step": 2332 }, { "epoch": 0.40432399644721734, "grad_norm": 2.2776371036325878, "learning_rate": 6.755447404938019e-06, "loss": 0.5272, "step": 2333 }, { "epoch": 0.4044973029180477, "grad_norm": 2.362381349419453, "learning_rate": 6.752818818158085e-06, "loss": 0.5075, "step": 2334 }, { "epoch": 0.40467060938887806, "grad_norm": 2.3139582147018514, "learning_rate": 6.7501896789429436e-06, "loss": 0.6156, "step": 2335 }, { "epoch": 0.4048439158597084, "grad_norm": 4.699903954306939, "learning_rate": 6.747559988121217e-06, "loss": 0.6466, "step": 2336 }, { "epoch": 0.4050172223305388, "grad_norm": 4.416761764824951, "learning_rate": 6.744929746521708e-06, "loss": 0.5032, "step": 2337 }, { "epoch": 0.40519052880136913, "grad_norm": 11.782092017681581, "learning_rate": 6.742298954973385e-06, "loss": 0.5869, "step": 2338 }, { "epoch": 0.4053638352721995, "grad_norm": 2.1399057139532456, "learning_rate": 6.739667614305395e-06, "loss": 0.4954, "step": 2339 }, { "epoch": 0.40553714174302985, "grad_norm": 2.1116176958652186, "learning_rate": 6.7370357253470564e-06, "loss": 0.5595, "step": 2340 }, { "epoch": 0.4057104482138602, "grad_norm": 2.51334585345836, "learning_rate": 6.734403288927862e-06, "loss": 0.5804, "step": 2341 }, { "epoch": 0.4058837546846905, "grad_norm": 4.28733104861096, "learning_rate": 6.731770305877474e-06, "loss": 0.658, "step": 2342 }, { "epoch": 0.40605706115552087, "grad_norm": 2.423215789865723, "learning_rate": 6.72913677702573e-06, "loss": 0.4619, "step": 2343 }, { "epoch": 0.40623036762635123, "grad_norm": 2.278464530381581, "learning_rate": 6.726502703202635e-06, "loss": 0.5972, "step": 2344 }, { "epoch": 0.4064036740971816, "grad_norm": 1.9673886937290272, "learning_rate": 6.723868085238372e-06, "loss": 0.5814, "step": 2345 }, { "epoch": 0.40657698056801195, "grad_norm": 1.9801079549977298, "learning_rate": 6.721232923963293e-06, "loss": 0.5525, "step": 2346 }, { "epoch": 0.4067502870388423, "grad_norm": 2.271101904292872, "learning_rate": 6.718597220207919e-06, "loss": 0.5585, "step": 2347 }, { "epoch": 0.40692359350967267, "grad_norm": 2.09604130944512, "learning_rate": 6.7159609748029456e-06, "loss": 0.5393, "step": 2348 }, { "epoch": 0.407096899980503, "grad_norm": 2.322222255037874, "learning_rate": 6.713324188579234e-06, "loss": 0.5479, "step": 2349 }, { "epoch": 0.4072702064513334, "grad_norm": 1.9868973630336049, "learning_rate": 6.7106868623678215e-06, "loss": 0.5298, "step": 2350 }, { "epoch": 0.40744351292216374, "grad_norm": 2.1861608146597336, "learning_rate": 6.708048996999912e-06, "loss": 0.5737, "step": 2351 }, { "epoch": 0.4076168193929941, "grad_norm": 2.4115855539638904, "learning_rate": 6.705410593306882e-06, "loss": 0.6519, "step": 2352 }, { "epoch": 0.40779012586382446, "grad_norm": 2.8101448364608235, "learning_rate": 6.702771652120276e-06, "loss": 0.5342, "step": 2353 }, { "epoch": 0.4079634323346548, "grad_norm": 2.0781601596905395, "learning_rate": 6.7001321742718066e-06, "loss": 0.5657, "step": 2354 }, { "epoch": 0.4081367388054852, "grad_norm": 2.3650530498129454, "learning_rate": 6.697492160593356e-06, "loss": 0.593, "step": 2355 }, { "epoch": 0.4083100452763155, "grad_norm": 2.2516081865423425, "learning_rate": 6.694851611916979e-06, "loss": 0.5818, "step": 2356 }, { "epoch": 0.40848335174714584, "grad_norm": 2.173997832339464, "learning_rate": 6.692210529074896e-06, "loss": 0.6283, "step": 2357 }, { "epoch": 0.4086566582179762, "grad_norm": 2.3285168187352046, "learning_rate": 6.689568912899494e-06, "loss": 0.5375, "step": 2358 }, { "epoch": 0.40882996468880656, "grad_norm": 2.443202070829019, "learning_rate": 6.6869267642233325e-06, "loss": 0.6243, "step": 2359 }, { "epoch": 0.4090032711596369, "grad_norm": 2.0229694629627644, "learning_rate": 6.684284083879134e-06, "loss": 0.5756, "step": 2360 }, { "epoch": 0.4091765776304673, "grad_norm": 2.100473784788749, "learning_rate": 6.681640872699794e-06, "loss": 0.4913, "step": 2361 }, { "epoch": 0.40934988410129763, "grad_norm": 2.166169557715975, "learning_rate": 6.6789971315183695e-06, "loss": 0.5654, "step": 2362 }, { "epoch": 0.409523190572128, "grad_norm": 2.340403852224985, "learning_rate": 6.67635286116809e-06, "loss": 0.6079, "step": 2363 }, { "epoch": 0.40969649704295835, "grad_norm": 2.9563924804966883, "learning_rate": 6.6737080624823446e-06, "loss": 0.5636, "step": 2364 }, { "epoch": 0.4098698035137887, "grad_norm": 2.35875050029198, "learning_rate": 6.671062736294698e-06, "loss": 0.5938, "step": 2365 }, { "epoch": 0.41004310998461907, "grad_norm": 2.5357633861946027, "learning_rate": 6.668416883438875e-06, "loss": 0.5741, "step": 2366 }, { "epoch": 0.41021641645544943, "grad_norm": 2.26027898894952, "learning_rate": 6.665770504748767e-06, "loss": 0.5198, "step": 2367 }, { "epoch": 0.4103897229262798, "grad_norm": 2.6386136519141545, "learning_rate": 6.663123601058435e-06, "loss": 0.5038, "step": 2368 }, { "epoch": 0.4105630293971101, "grad_norm": 2.074774169482841, "learning_rate": 6.6604761732021e-06, "loss": 0.5182, "step": 2369 }, { "epoch": 0.41073633586794045, "grad_norm": 2.1038596595392853, "learning_rate": 6.657828222014152e-06, "loss": 0.5387, "step": 2370 }, { "epoch": 0.4109096423387708, "grad_norm": 2.8896977522715406, "learning_rate": 6.6551797483291435e-06, "loss": 0.5352, "step": 2371 }, { "epoch": 0.41108294880960117, "grad_norm": 2.663457537380288, "learning_rate": 6.652530752981792e-06, "loss": 0.6937, "step": 2372 }, { "epoch": 0.4112562552804315, "grad_norm": 2.171951703229031, "learning_rate": 6.649881236806985e-06, "loss": 0.4992, "step": 2373 }, { "epoch": 0.4114295617512619, "grad_norm": 2.1976532844104537, "learning_rate": 6.647231200639766e-06, "loss": 0.5495, "step": 2374 }, { "epoch": 0.41160286822209224, "grad_norm": 2.5835483113972555, "learning_rate": 6.6445806453153435e-06, "loss": 0.6396, "step": 2375 }, { "epoch": 0.4117761746929226, "grad_norm": 2.1636799948970724, "learning_rate": 6.641929571669096e-06, "loss": 0.5235, "step": 2376 }, { "epoch": 0.41194948116375296, "grad_norm": 2.348645202378712, "learning_rate": 6.639277980536562e-06, "loss": 0.5957, "step": 2377 }, { "epoch": 0.4121227876345833, "grad_norm": 5.4530433200250625, "learning_rate": 6.636625872753438e-06, "loss": 0.5682, "step": 2378 }, { "epoch": 0.4122960941054137, "grad_norm": 4.144983914453389, "learning_rate": 6.633973249155591e-06, "loss": 0.5308, "step": 2379 }, { "epoch": 0.41246940057624404, "grad_norm": 2.866466614786294, "learning_rate": 6.631320110579046e-06, "loss": 0.6261, "step": 2380 }, { "epoch": 0.4126427070470744, "grad_norm": 2.0063423582112523, "learning_rate": 6.628666457859992e-06, "loss": 0.5418, "step": 2381 }, { "epoch": 0.4128160135179047, "grad_norm": 2.22586957477737, "learning_rate": 6.626012291834781e-06, "loss": 0.4875, "step": 2382 }, { "epoch": 0.41298931998873506, "grad_norm": 2.4848935765238367, "learning_rate": 6.623357613339924e-06, "loss": 0.6179, "step": 2383 }, { "epoch": 0.4131626264595654, "grad_norm": 2.6195922675424335, "learning_rate": 6.620702423212095e-06, "loss": 0.6171, "step": 2384 }, { "epoch": 0.4133359329303958, "grad_norm": 2.5614422172619835, "learning_rate": 6.618046722288129e-06, "loss": 0.6485, "step": 2385 }, { "epoch": 0.41350923940122614, "grad_norm": 2.29973477889157, "learning_rate": 6.615390511405022e-06, "loss": 0.5091, "step": 2386 }, { "epoch": 0.4136825458720565, "grad_norm": 2.222340138727935, "learning_rate": 6.612733791399933e-06, "loss": 0.4811, "step": 2387 }, { "epoch": 0.41385585234288685, "grad_norm": 2.2057158320165184, "learning_rate": 6.610076563110177e-06, "loss": 0.6029, "step": 2388 }, { "epoch": 0.4140291588137172, "grad_norm": 2.2133918567607953, "learning_rate": 6.607418827373234e-06, "loss": 0.5698, "step": 2389 }, { "epoch": 0.41420246528454757, "grad_norm": 15.172741133736205, "learning_rate": 6.6047605850267395e-06, "loss": 0.5692, "step": 2390 }, { "epoch": 0.41437577175537793, "grad_norm": 2.217416099670039, "learning_rate": 6.60210183690849e-06, "loss": 0.5198, "step": 2391 }, { "epoch": 0.4145490782262083, "grad_norm": 2.420728455398477, "learning_rate": 6.599442583856443e-06, "loss": 0.5798, "step": 2392 }, { "epoch": 0.41472238469703865, "grad_norm": 1.8760125531438154, "learning_rate": 6.596782826708715e-06, "loss": 0.4577, "step": 2393 }, { "epoch": 0.414895691167869, "grad_norm": 2.1354874850649157, "learning_rate": 6.5941225663035826e-06, "loss": 0.5995, "step": 2394 }, { "epoch": 0.4150689976386993, "grad_norm": 1.908249979107665, "learning_rate": 6.591461803479474e-06, "loss": 0.5055, "step": 2395 }, { "epoch": 0.41524230410952967, "grad_norm": 2.5065334012586002, "learning_rate": 6.588800539074984e-06, "loss": 0.5836, "step": 2396 }, { "epoch": 0.41541561058036003, "grad_norm": 2.109445247122486, "learning_rate": 6.586138773928861e-06, "loss": 0.5877, "step": 2397 }, { "epoch": 0.4155889170511904, "grad_norm": 2.446299481348139, "learning_rate": 6.5834765088800135e-06, "loss": 0.6008, "step": 2398 }, { "epoch": 0.41576222352202075, "grad_norm": 4.610127467059342, "learning_rate": 6.5808137447675055e-06, "loss": 0.5247, "step": 2399 }, { "epoch": 0.4159355299928511, "grad_norm": 2.5072293685670575, "learning_rate": 6.57815048243056e-06, "loss": 0.6131, "step": 2400 }, { "epoch": 0.41610883646368146, "grad_norm": 4.105316787742749, "learning_rate": 6.5754867227085565e-06, "loss": 0.4951, "step": 2401 }, { "epoch": 0.4162821429345118, "grad_norm": 3.1522853141663862, "learning_rate": 6.57282246644103e-06, "loss": 0.5699, "step": 2402 }, { "epoch": 0.4164554494053422, "grad_norm": 3.1433454753778745, "learning_rate": 6.570157714467674e-06, "loss": 0.6816, "step": 2403 }, { "epoch": 0.41662875587617254, "grad_norm": 2.348760752176544, "learning_rate": 6.567492467628338e-06, "loss": 0.6161, "step": 2404 }, { "epoch": 0.4168020623470029, "grad_norm": 1.963977028281665, "learning_rate": 6.564826726763025e-06, "loss": 0.5112, "step": 2405 }, { "epoch": 0.41697536881783326, "grad_norm": 2.485745180548366, "learning_rate": 6.562160492711896e-06, "loss": 0.549, "step": 2406 }, { "epoch": 0.4171486752886636, "grad_norm": 2.071309637561535, "learning_rate": 6.559493766315268e-06, "loss": 0.5464, "step": 2407 }, { "epoch": 0.4173219817594939, "grad_norm": 2.3486054642295975, "learning_rate": 6.556826548413612e-06, "loss": 0.4748, "step": 2408 }, { "epoch": 0.4174952882303243, "grad_norm": 2.1528716849340714, "learning_rate": 6.554158839847553e-06, "loss": 0.5323, "step": 2409 }, { "epoch": 0.41766859470115464, "grad_norm": 3.3086191020741924, "learning_rate": 6.551490641457873e-06, "loss": 0.5342, "step": 2410 }, { "epoch": 0.417841901171985, "grad_norm": 2.1929518870936415, "learning_rate": 6.548821954085505e-06, "loss": 0.5251, "step": 2411 }, { "epoch": 0.41801520764281536, "grad_norm": 2.302705684601394, "learning_rate": 6.546152778571538e-06, "loss": 0.5709, "step": 2412 }, { "epoch": 0.4181885141136457, "grad_norm": 1.933008808282786, "learning_rate": 6.543483115757217e-06, "loss": 0.5248, "step": 2413 }, { "epoch": 0.4183618205844761, "grad_norm": 2.2219023303831706, "learning_rate": 6.5408129664839395e-06, "loss": 0.5675, "step": 2414 }, { "epoch": 0.41853512705530643, "grad_norm": 2.411519616303847, "learning_rate": 6.538142331593252e-06, "loss": 0.5343, "step": 2415 }, { "epoch": 0.4187084335261368, "grad_norm": 2.23895587813021, "learning_rate": 6.535471211926859e-06, "loss": 0.6018, "step": 2416 }, { "epoch": 0.41888173999696715, "grad_norm": 2.0104633873921847, "learning_rate": 6.5327996083266165e-06, "loss": 0.5156, "step": 2417 }, { "epoch": 0.4190550464677975, "grad_norm": 2.929499573445805, "learning_rate": 6.5301275216345324e-06, "loss": 0.559, "step": 2418 }, { "epoch": 0.41922835293862787, "grad_norm": 2.600327469601713, "learning_rate": 6.527454952692769e-06, "loss": 0.5111, "step": 2419 }, { "epoch": 0.4194016594094582, "grad_norm": 2.5936763568516783, "learning_rate": 6.524781902343636e-06, "loss": 0.4831, "step": 2420 }, { "epoch": 0.41957496588028853, "grad_norm": 2.7198313682227995, "learning_rate": 6.522108371429599e-06, "loss": 0.623, "step": 2421 }, { "epoch": 0.4197482723511189, "grad_norm": 3.034998027359492, "learning_rate": 6.519434360793272e-06, "loss": 0.586, "step": 2422 }, { "epoch": 0.41992157882194925, "grad_norm": 2.750049845996029, "learning_rate": 6.5167598712774234e-06, "loss": 0.5085, "step": 2423 }, { "epoch": 0.4200948852927796, "grad_norm": 2.3835483454638178, "learning_rate": 6.514084903724972e-06, "loss": 0.5919, "step": 2424 }, { "epoch": 0.42026819176360997, "grad_norm": 2.3192374468112362, "learning_rate": 6.511409458978986e-06, "loss": 0.6334, "step": 2425 }, { "epoch": 0.4204414982344403, "grad_norm": 2.6137041445244096, "learning_rate": 6.508733537882681e-06, "loss": 0.5899, "step": 2426 }, { "epoch": 0.4206148047052707, "grad_norm": 2.7554938713156685, "learning_rate": 6.506057141279431e-06, "loss": 0.5297, "step": 2427 }, { "epoch": 0.42078811117610104, "grad_norm": 2.264075739973193, "learning_rate": 6.5033802700127505e-06, "loss": 0.5887, "step": 2428 }, { "epoch": 0.4209614176469314, "grad_norm": 3.601049127289796, "learning_rate": 6.50070292492631e-06, "loss": 0.5407, "step": 2429 }, { "epoch": 0.42113472411776176, "grad_norm": 2.3980177447840547, "learning_rate": 6.4980251068639275e-06, "loss": 0.5961, "step": 2430 }, { "epoch": 0.4213080305885921, "grad_norm": 2.346516062785802, "learning_rate": 6.495346816669568e-06, "loss": 0.5764, "step": 2431 }, { "epoch": 0.4214813370594225, "grad_norm": 2.5467639518906524, "learning_rate": 6.49266805518735e-06, "loss": 0.543, "step": 2432 }, { "epoch": 0.42165464353025284, "grad_norm": 3.691630806855672, "learning_rate": 6.489988823261535e-06, "loss": 0.5561, "step": 2433 }, { "epoch": 0.42182795000108314, "grad_norm": 2.071619713459728, "learning_rate": 6.487309121736536e-06, "loss": 0.5413, "step": 2434 }, { "epoch": 0.4220012564719135, "grad_norm": 2.106427149650842, "learning_rate": 6.484628951456916e-06, "loss": 0.5261, "step": 2435 }, { "epoch": 0.42217456294274386, "grad_norm": 3.825696228871538, "learning_rate": 6.481948313267381e-06, "loss": 0.5823, "step": 2436 }, { "epoch": 0.4223478694135742, "grad_norm": 2.298502451416262, "learning_rate": 6.479267208012786e-06, "loss": 0.5451, "step": 2437 }, { "epoch": 0.4225211758844046, "grad_norm": 2.6424116034027283, "learning_rate": 6.476585636538135e-06, "loss": 0.5426, "step": 2438 }, { "epoch": 0.42269448235523494, "grad_norm": 2.1587842880922525, "learning_rate": 6.473903599688578e-06, "loss": 0.619, "step": 2439 }, { "epoch": 0.4228677888260653, "grad_norm": 2.0696853088870726, "learning_rate": 6.47122109830941e-06, "loss": 0.553, "step": 2440 }, { "epoch": 0.42304109529689565, "grad_norm": 2.2734437213509207, "learning_rate": 6.468538133246076e-06, "loss": 0.5976, "step": 2441 }, { "epoch": 0.423214401767726, "grad_norm": 2.0189895881013773, "learning_rate": 6.465854705344164e-06, "loss": 0.6173, "step": 2442 }, { "epoch": 0.42338770823855637, "grad_norm": 1.850899899368967, "learning_rate": 6.4631708154494085e-06, "loss": 0.5123, "step": 2443 }, { "epoch": 0.42356101470938673, "grad_norm": 2.5228644949145727, "learning_rate": 6.460486464407692e-06, "loss": 0.5418, "step": 2444 }, { "epoch": 0.4237343211802171, "grad_norm": 2.4470866021385693, "learning_rate": 6.457801653065037e-06, "loss": 0.5613, "step": 2445 }, { "epoch": 0.42390762765104745, "grad_norm": 2.2921746863209806, "learning_rate": 6.455116382267616e-06, "loss": 0.59, "step": 2446 }, { "epoch": 0.42408093412187775, "grad_norm": 2.418231208133312, "learning_rate": 6.452430652861745e-06, "loss": 0.623, "step": 2447 }, { "epoch": 0.4242542405927081, "grad_norm": 2.450109727683911, "learning_rate": 6.449744465693886e-06, "loss": 0.5475, "step": 2448 }, { "epoch": 0.42442754706353847, "grad_norm": 2.1314293470076544, "learning_rate": 6.4470578216106395e-06, "loss": 0.5769, "step": 2449 }, { "epoch": 0.4246008535343688, "grad_norm": 2.2877259540932338, "learning_rate": 6.4443707214587595e-06, "loss": 0.5994, "step": 2450 }, { "epoch": 0.4247741600051992, "grad_norm": 2.584052214029828, "learning_rate": 6.441683166085132e-06, "loss": 0.6122, "step": 2451 }, { "epoch": 0.42494746647602955, "grad_norm": 1.7203590671922628, "learning_rate": 6.438995156336797e-06, "loss": 0.4876, "step": 2452 }, { "epoch": 0.4251207729468599, "grad_norm": 2.4661757657659615, "learning_rate": 6.436306693060931e-06, "loss": 0.5552, "step": 2453 }, { "epoch": 0.42529407941769026, "grad_norm": 2.8486986336280222, "learning_rate": 6.433617777104858e-06, "loss": 0.547, "step": 2454 }, { "epoch": 0.4254673858885206, "grad_norm": 2.189980575407191, "learning_rate": 6.430928409316042e-06, "loss": 0.5817, "step": 2455 }, { "epoch": 0.425640692359351, "grad_norm": 2.264220547024679, "learning_rate": 6.428238590542091e-06, "loss": 0.5626, "step": 2456 }, { "epoch": 0.42581399883018134, "grad_norm": 2.896076080579273, "learning_rate": 6.42554832163075e-06, "loss": 0.5831, "step": 2457 }, { "epoch": 0.4259873053010117, "grad_norm": 2.095799896283396, "learning_rate": 6.422857603429915e-06, "loss": 0.5883, "step": 2458 }, { "epoch": 0.42616061177184206, "grad_norm": 1.9939027807780836, "learning_rate": 6.420166436787616e-06, "loss": 0.5116, "step": 2459 }, { "epoch": 0.42633391824267236, "grad_norm": 2.729578503466967, "learning_rate": 6.417474822552025e-06, "loss": 0.6588, "step": 2460 }, { "epoch": 0.4265072247135027, "grad_norm": 2.52745601353046, "learning_rate": 6.414782761571463e-06, "loss": 0.5354, "step": 2461 }, { "epoch": 0.4266805311843331, "grad_norm": 2.2161501763817624, "learning_rate": 6.412090254694381e-06, "loss": 0.515, "step": 2462 }, { "epoch": 0.42685383765516344, "grad_norm": 2.709826778253737, "learning_rate": 6.409397302769376e-06, "loss": 0.5777, "step": 2463 }, { "epoch": 0.4270271441259938, "grad_norm": 2.6970750996804242, "learning_rate": 6.4067039066451846e-06, "loss": 0.5486, "step": 2464 }, { "epoch": 0.42720045059682416, "grad_norm": 2.052894050677419, "learning_rate": 6.4040100671706824e-06, "loss": 0.5518, "step": 2465 }, { "epoch": 0.4273737570676545, "grad_norm": 2.0746193074228048, "learning_rate": 6.4013157851948896e-06, "loss": 0.5564, "step": 2466 }, { "epoch": 0.4275470635384849, "grad_norm": 1.98519630465621, "learning_rate": 6.398621061566958e-06, "loss": 0.5518, "step": 2467 }, { "epoch": 0.42772037000931523, "grad_norm": 2.2266460399520316, "learning_rate": 6.3959258971361845e-06, "loss": 0.5661, "step": 2468 }, { "epoch": 0.4278936764801456, "grad_norm": 2.722177785095735, "learning_rate": 6.393230292752003e-06, "loss": 0.5037, "step": 2469 }, { "epoch": 0.42806698295097595, "grad_norm": 2.267597982594798, "learning_rate": 6.390534249263985e-06, "loss": 0.557, "step": 2470 }, { "epoch": 0.4282402894218063, "grad_norm": 1.9414420419425782, "learning_rate": 6.387837767521843e-06, "loss": 0.4817, "step": 2471 }, { "epoch": 0.42841359589263667, "grad_norm": 3.2930801436842208, "learning_rate": 6.385140848375424e-06, "loss": 0.5745, "step": 2472 }, { "epoch": 0.42858690236346697, "grad_norm": 2.9433634949162855, "learning_rate": 6.382443492674716e-06, "loss": 0.4733, "step": 2473 }, { "epoch": 0.42876020883429733, "grad_norm": 2.174975478289032, "learning_rate": 6.379745701269844e-06, "loss": 0.5584, "step": 2474 }, { "epoch": 0.4289335153051277, "grad_norm": 2.4149916620301433, "learning_rate": 6.377047475011069e-06, "loss": 0.5733, "step": 2475 }, { "epoch": 0.42910682177595805, "grad_norm": 2.375220929201262, "learning_rate": 6.374348814748792e-06, "loss": 0.5834, "step": 2476 }, { "epoch": 0.4292801282467884, "grad_norm": 3.075677926055036, "learning_rate": 6.371649721333545e-06, "loss": 0.535, "step": 2477 }, { "epoch": 0.42945343471761876, "grad_norm": 1.8696887865495766, "learning_rate": 6.368950195616002e-06, "loss": 0.5234, "step": 2478 }, { "epoch": 0.4296267411884491, "grad_norm": 2.652653666732058, "learning_rate": 6.366250238446974e-06, "loss": 0.5657, "step": 2479 }, { "epoch": 0.4298000476592795, "grad_norm": 2.8412332042728106, "learning_rate": 6.3635498506774005e-06, "loss": 0.6065, "step": 2480 }, { "epoch": 0.42997335413010984, "grad_norm": 2.65506838676339, "learning_rate": 6.360849033158365e-06, "loss": 0.5106, "step": 2481 }, { "epoch": 0.4301466606009402, "grad_norm": 2.0492025611611475, "learning_rate": 6.358147786741082e-06, "loss": 0.5826, "step": 2482 }, { "epoch": 0.43031996707177056, "grad_norm": 2.1950161019822843, "learning_rate": 6.355446112276904e-06, "loss": 0.4256, "step": 2483 }, { "epoch": 0.4304932735426009, "grad_norm": 2.576739666564135, "learning_rate": 6.352744010617315e-06, "loss": 0.5943, "step": 2484 }, { "epoch": 0.4306665800134313, "grad_norm": 1.9604974658100414, "learning_rate": 6.350041482613933e-06, "loss": 0.5973, "step": 2485 }, { "epoch": 0.4308398864842616, "grad_norm": 1.9865653259142304, "learning_rate": 6.347338529118517e-06, "loss": 0.5245, "step": 2486 }, { "epoch": 0.43101319295509194, "grad_norm": 2.1600638590904575, "learning_rate": 6.344635150982954e-06, "loss": 0.5339, "step": 2487 }, { "epoch": 0.4311864994259223, "grad_norm": 2.6742567396651244, "learning_rate": 6.341931349059266e-06, "loss": 0.5233, "step": 2488 }, { "epoch": 0.43135980589675266, "grad_norm": 2.2262621210199485, "learning_rate": 6.339227124199611e-06, "loss": 0.6202, "step": 2489 }, { "epoch": 0.431533112367583, "grad_norm": 2.2266106229632268, "learning_rate": 6.336522477256275e-06, "loss": 0.5511, "step": 2490 }, { "epoch": 0.4317064188384134, "grad_norm": 2.1302595719703725, "learning_rate": 6.333817409081684e-06, "loss": 0.5455, "step": 2491 }, { "epoch": 0.43187972530924373, "grad_norm": 2.4433729951238767, "learning_rate": 6.331111920528393e-06, "loss": 0.4535, "step": 2492 }, { "epoch": 0.4320530317800741, "grad_norm": 1.917109617783004, "learning_rate": 6.328406012449087e-06, "loss": 0.6231, "step": 2493 }, { "epoch": 0.43222633825090445, "grad_norm": 2.145004408981071, "learning_rate": 6.325699685696588e-06, "loss": 0.4937, "step": 2494 }, { "epoch": 0.4323996447217348, "grad_norm": 2.567403787082556, "learning_rate": 6.322992941123848e-06, "loss": 0.5627, "step": 2495 }, { "epoch": 0.43257295119256517, "grad_norm": 2.6491523345520847, "learning_rate": 6.32028577958395e-06, "loss": 0.6961, "step": 2496 }, { "epoch": 0.43274625766339553, "grad_norm": 3.1854035615704994, "learning_rate": 6.31757820193011e-06, "loss": 0.5465, "step": 2497 }, { "epoch": 0.4329195641342259, "grad_norm": 1.8430768570105895, "learning_rate": 6.314870209015674e-06, "loss": 0.5004, "step": 2498 }, { "epoch": 0.4330928706050562, "grad_norm": 2.3602831859873232, "learning_rate": 6.312161801694116e-06, "loss": 0.58, "step": 2499 }, { "epoch": 0.43326617707588655, "grad_norm": 2.5666109539508217, "learning_rate": 6.3094529808190466e-06, "loss": 0.6234, "step": 2500 }, { "epoch": 0.4334394835467169, "grad_norm": 3.770781450500543, "learning_rate": 6.306743747244203e-06, "loss": 0.5357, "step": 2501 }, { "epoch": 0.43361279001754727, "grad_norm": 2.8101810687646953, "learning_rate": 6.304034101823455e-06, "loss": 0.5681, "step": 2502 }, { "epoch": 0.4337860964883776, "grad_norm": 2.0764904848067514, "learning_rate": 6.301324045410799e-06, "loss": 0.5585, "step": 2503 }, { "epoch": 0.433959402959208, "grad_norm": 1.8970535584846862, "learning_rate": 6.298613578860362e-06, "loss": 0.5136, "step": 2504 }, { "epoch": 0.43413270943003834, "grad_norm": 2.6840159027964203, "learning_rate": 6.295902703026401e-06, "loss": 0.5071, "step": 2505 }, { "epoch": 0.4343060159008687, "grad_norm": 2.1977024290805036, "learning_rate": 6.293191418763301e-06, "loss": 0.5966, "step": 2506 }, { "epoch": 0.43447932237169906, "grad_norm": 2.2215179604413273, "learning_rate": 6.290479726925578e-06, "loss": 0.5826, "step": 2507 }, { "epoch": 0.4346526288425294, "grad_norm": 3.962854939538261, "learning_rate": 6.287767628367873e-06, "loss": 0.5427, "step": 2508 }, { "epoch": 0.4348259353133598, "grad_norm": 2.245915381259294, "learning_rate": 6.285055123944959e-06, "loss": 0.6324, "step": 2509 }, { "epoch": 0.43499924178419014, "grad_norm": 2.9937936789907873, "learning_rate": 6.282342214511734e-06, "loss": 0.5529, "step": 2510 }, { "epoch": 0.4351725482550205, "grad_norm": 3.458381095341499, "learning_rate": 6.279628900923225e-06, "loss": 0.5267, "step": 2511 }, { "epoch": 0.4353458547258508, "grad_norm": 2.1428415622207018, "learning_rate": 6.276915184034584e-06, "loss": 0.502, "step": 2512 }, { "epoch": 0.43551916119668116, "grad_norm": 2.299823393019494, "learning_rate": 6.2742010647010956e-06, "loss": 0.4498, "step": 2513 }, { "epoch": 0.4356924676675115, "grad_norm": 2.005598513920552, "learning_rate": 6.271486543778162e-06, "loss": 0.5066, "step": 2514 }, { "epoch": 0.4358657741383419, "grad_norm": 4.677891409961805, "learning_rate": 6.268771622121324e-06, "loss": 0.5009, "step": 2515 }, { "epoch": 0.43603908060917224, "grad_norm": 2.284563079295667, "learning_rate": 6.2660563005862395e-06, "loss": 0.5651, "step": 2516 }, { "epoch": 0.4362123870800026, "grad_norm": 1.9128959263148289, "learning_rate": 6.263340580028696e-06, "loss": 0.5724, "step": 2517 }, { "epoch": 0.43638569355083295, "grad_norm": 2.3788181380592253, "learning_rate": 6.260624461304605e-06, "loss": 0.5462, "step": 2518 }, { "epoch": 0.4365590000216633, "grad_norm": 2.277938098999084, "learning_rate": 6.2579079452700055e-06, "loss": 0.6038, "step": 2519 }, { "epoch": 0.43673230649249367, "grad_norm": 2.0951016743460937, "learning_rate": 6.255191032781059e-06, "loss": 0.535, "step": 2520 }, { "epoch": 0.43690561296332403, "grad_norm": 2.192370970971198, "learning_rate": 6.252473724694054e-06, "loss": 0.5931, "step": 2521 }, { "epoch": 0.4370789194341544, "grad_norm": 2.0590664078436896, "learning_rate": 6.249756021865409e-06, "loss": 0.517, "step": 2522 }, { "epoch": 0.43725222590498475, "grad_norm": 2.240581531493836, "learning_rate": 6.2470379251516535e-06, "loss": 0.5172, "step": 2523 }, { "epoch": 0.4374255323758151, "grad_norm": 2.2704931226737397, "learning_rate": 6.244319435409453e-06, "loss": 0.596, "step": 2524 }, { "epoch": 0.4375988388466454, "grad_norm": 2.3397217191601536, "learning_rate": 6.2416005534955925e-06, "loss": 0.5494, "step": 2525 }, { "epoch": 0.43777214531747577, "grad_norm": 2.4804769433721097, "learning_rate": 6.238881280266979e-06, "loss": 0.5928, "step": 2526 }, { "epoch": 0.43794545178830613, "grad_norm": 5.161743418822254, "learning_rate": 6.236161616580648e-06, "loss": 0.5861, "step": 2527 }, { "epoch": 0.4381187582591365, "grad_norm": 1.9860693826053353, "learning_rate": 6.233441563293754e-06, "loss": 0.5425, "step": 2528 }, { "epoch": 0.43829206472996685, "grad_norm": 2.2179796507550775, "learning_rate": 6.230721121263572e-06, "loss": 0.6361, "step": 2529 }, { "epoch": 0.4384653712007972, "grad_norm": 2.0146443508671044, "learning_rate": 6.228000291347506e-06, "loss": 0.514, "step": 2530 }, { "epoch": 0.43863867767162756, "grad_norm": 2.000586418637788, "learning_rate": 6.225279074403078e-06, "loss": 0.4706, "step": 2531 }, { "epoch": 0.4388119841424579, "grad_norm": 2.211464619830766, "learning_rate": 6.222557471287932e-06, "loss": 0.5413, "step": 2532 }, { "epoch": 0.4389852906132883, "grad_norm": 2.7489671329657623, "learning_rate": 6.219835482859837e-06, "loss": 0.5569, "step": 2533 }, { "epoch": 0.43915859708411864, "grad_norm": 2.3492186935230563, "learning_rate": 6.217113109976676e-06, "loss": 0.5094, "step": 2534 }, { "epoch": 0.439331903554949, "grad_norm": 2.134438039675302, "learning_rate": 6.214390353496463e-06, "loss": 0.4957, "step": 2535 }, { "epoch": 0.43950521002577936, "grad_norm": 2.0146527028815724, "learning_rate": 6.211667214277326e-06, "loss": 0.4226, "step": 2536 }, { "epoch": 0.4396785164966097, "grad_norm": 3.2477657570963245, "learning_rate": 6.208943693177517e-06, "loss": 0.5068, "step": 2537 }, { "epoch": 0.43985182296744, "grad_norm": 2.3086547017284027, "learning_rate": 6.206219791055406e-06, "loss": 0.5338, "step": 2538 }, { "epoch": 0.4400251294382704, "grad_norm": 2.063504353309011, "learning_rate": 6.203495508769485e-06, "loss": 0.5375, "step": 2539 }, { "epoch": 0.44019843590910074, "grad_norm": 2.347992559149006, "learning_rate": 6.200770847178364e-06, "loss": 0.5073, "step": 2540 }, { "epoch": 0.4403717423799311, "grad_norm": 3.0028748515178583, "learning_rate": 6.198045807140772e-06, "loss": 0.5399, "step": 2541 }, { "epoch": 0.44054504885076146, "grad_norm": 2.121611925408661, "learning_rate": 6.195320389515561e-06, "loss": 0.5107, "step": 2542 }, { "epoch": 0.4407183553215918, "grad_norm": 3.337039128666271, "learning_rate": 6.192594595161702e-06, "loss": 0.561, "step": 2543 }, { "epoch": 0.4408916617924222, "grad_norm": 2.905738456675053, "learning_rate": 6.1898684249382775e-06, "loss": 0.5726, "step": 2544 }, { "epoch": 0.44106496826325253, "grad_norm": 2.251667964222415, "learning_rate": 6.187141879704497e-06, "loss": 0.4995, "step": 2545 }, { "epoch": 0.4412382747340829, "grad_norm": 2.5399906715593685, "learning_rate": 6.1844149603196835e-06, "loss": 0.5922, "step": 2546 }, { "epoch": 0.44141158120491325, "grad_norm": 2.511508528221634, "learning_rate": 6.181687667643278e-06, "loss": 0.5923, "step": 2547 }, { "epoch": 0.4415848876757436, "grad_norm": 2.063452253582501, "learning_rate": 6.178960002534843e-06, "loss": 0.5212, "step": 2548 }, { "epoch": 0.44175819414657397, "grad_norm": 2.584257476375867, "learning_rate": 6.176231965854052e-06, "loss": 0.591, "step": 2549 }, { "epoch": 0.4419315006174043, "grad_norm": 2.101539429597865, "learning_rate": 6.173503558460703e-06, "loss": 0.4879, "step": 2550 }, { "epoch": 0.44210480708823463, "grad_norm": 1.7569161700204763, "learning_rate": 6.170774781214705e-06, "loss": 0.4754, "step": 2551 }, { "epoch": 0.442278113559065, "grad_norm": 2.157381537615116, "learning_rate": 6.168045634976086e-06, "loss": 0.5312, "step": 2552 }, { "epoch": 0.44245142002989535, "grad_norm": 1.9901852531811932, "learning_rate": 6.165316120604991e-06, "loss": 0.6032, "step": 2553 }, { "epoch": 0.4426247265007257, "grad_norm": 2.4463206937122024, "learning_rate": 6.162586238961677e-06, "loss": 0.5792, "step": 2554 }, { "epoch": 0.44279803297155607, "grad_norm": 3.543899396311623, "learning_rate": 6.159855990906522e-06, "loss": 0.6125, "step": 2555 }, { "epoch": 0.4429713394423864, "grad_norm": 2.1850566738900525, "learning_rate": 6.1571253773000165e-06, "loss": 0.5578, "step": 2556 }, { "epoch": 0.4431446459132168, "grad_norm": 2.629118657800607, "learning_rate": 6.154394399002768e-06, "loss": 0.5314, "step": 2557 }, { "epoch": 0.44331795238404714, "grad_norm": 2.337791247541152, "learning_rate": 6.151663056875496e-06, "loss": 0.5125, "step": 2558 }, { "epoch": 0.4434912588548775, "grad_norm": 2.2358499802565612, "learning_rate": 6.1489313517790395e-06, "loss": 0.5341, "step": 2559 }, { "epoch": 0.44366456532570786, "grad_norm": 2.2300357484394664, "learning_rate": 6.146199284574346e-06, "loss": 0.4761, "step": 2560 }, { "epoch": 0.4438378717965382, "grad_norm": 2.4142827452944093, "learning_rate": 6.14346685612248e-06, "loss": 0.5698, "step": 2561 }, { "epoch": 0.4440111782673686, "grad_norm": 2.0393424483134686, "learning_rate": 6.1407340672846195e-06, "loss": 0.5601, "step": 2562 }, { "epoch": 0.44418448473819894, "grad_norm": 2.2871750859167963, "learning_rate": 6.13800091892206e-06, "loss": 0.5886, "step": 2563 }, { "epoch": 0.44435779120902924, "grad_norm": 1.9901607771644574, "learning_rate": 6.135267411896205e-06, "loss": 0.4968, "step": 2564 }, { "epoch": 0.4445310976798596, "grad_norm": 8.80254814262117, "learning_rate": 6.132533547068572e-06, "loss": 0.6622, "step": 2565 }, { "epoch": 0.44470440415068996, "grad_norm": 2.2274611854005295, "learning_rate": 6.129799325300792e-06, "loss": 0.4452, "step": 2566 }, { "epoch": 0.4448777106215203, "grad_norm": 2.7656152475906173, "learning_rate": 6.127064747454609e-06, "loss": 0.5984, "step": 2567 }, { "epoch": 0.4450510170923507, "grad_norm": 2.3113239077528793, "learning_rate": 6.124329814391878e-06, "loss": 0.5044, "step": 2568 }, { "epoch": 0.44522432356318103, "grad_norm": 2.2462473578756197, "learning_rate": 6.1215945269745705e-06, "loss": 0.5161, "step": 2569 }, { "epoch": 0.4453976300340114, "grad_norm": 1.902084273003885, "learning_rate": 6.11885888606476e-06, "loss": 0.4477, "step": 2570 }, { "epoch": 0.44557093650484175, "grad_norm": 2.3073483458900266, "learning_rate": 6.116122892524642e-06, "loss": 0.5756, "step": 2571 }, { "epoch": 0.4457442429756721, "grad_norm": 2.082927457856222, "learning_rate": 6.113386547216516e-06, "loss": 0.6063, "step": 2572 }, { "epoch": 0.44591754944650247, "grad_norm": 3.0351216885589922, "learning_rate": 6.110649851002797e-06, "loss": 0.5311, "step": 2573 }, { "epoch": 0.44609085591733283, "grad_norm": 2.0785452142229017, "learning_rate": 6.107912804746008e-06, "loss": 0.5142, "step": 2574 }, { "epoch": 0.4462641623881632, "grad_norm": 2.669136159685249, "learning_rate": 6.10517540930878e-06, "loss": 0.5825, "step": 2575 }, { "epoch": 0.44643746885899355, "grad_norm": 1.9498401208699405, "learning_rate": 6.1024376655538595e-06, "loss": 0.5198, "step": 2576 }, { "epoch": 0.44661077532982385, "grad_norm": 1.9786147399144034, "learning_rate": 6.0996995743441e-06, "loss": 0.4497, "step": 2577 }, { "epoch": 0.4467840818006542, "grad_norm": 2.844479471368676, "learning_rate": 6.096961136542463e-06, "loss": 0.5699, "step": 2578 }, { "epoch": 0.44695738827148457, "grad_norm": 2.302278003596674, "learning_rate": 6.094222353012024e-06, "loss": 0.5271, "step": 2579 }, { "epoch": 0.4471306947423149, "grad_norm": 1.9285529970149053, "learning_rate": 6.091483224615961e-06, "loss": 0.4635, "step": 2580 }, { "epoch": 0.4473040012131453, "grad_norm": 2.1370176766853177, "learning_rate": 6.088743752217565e-06, "loss": 0.5598, "step": 2581 }, { "epoch": 0.44747730768397564, "grad_norm": 2.447040190531355, "learning_rate": 6.086003936680234e-06, "loss": 0.6177, "step": 2582 }, { "epoch": 0.447650614154806, "grad_norm": 1.7249466958821653, "learning_rate": 6.083263778867476e-06, "loss": 0.5121, "step": 2583 }, { "epoch": 0.44782392062563636, "grad_norm": 2.0005081803532527, "learning_rate": 6.0805232796429055e-06, "loss": 0.5666, "step": 2584 }, { "epoch": 0.4479972270964667, "grad_norm": 1.9529975733571838, "learning_rate": 6.077782439870243e-06, "loss": 0.5089, "step": 2585 }, { "epoch": 0.4481705335672971, "grad_norm": 1.9764971555385618, "learning_rate": 6.0750412604133195e-06, "loss": 0.5929, "step": 2586 }, { "epoch": 0.44834384003812744, "grad_norm": 2.279508514990261, "learning_rate": 6.072299742136069e-06, "loss": 0.5081, "step": 2587 }, { "epoch": 0.4485171465089578, "grad_norm": 2.3590787916335474, "learning_rate": 6.069557885902537e-06, "loss": 0.6164, "step": 2588 }, { "epoch": 0.44869045297978816, "grad_norm": 2.4774363888571327, "learning_rate": 6.066815692576872e-06, "loss": 0.5737, "step": 2589 }, { "epoch": 0.44886375945061846, "grad_norm": 3.8503358288519256, "learning_rate": 6.064073163023331e-06, "loss": 0.5315, "step": 2590 }, { "epoch": 0.4490370659214488, "grad_norm": 1.9830154036769359, "learning_rate": 6.061330298106276e-06, "loss": 0.5525, "step": 2591 }, { "epoch": 0.4492103723922792, "grad_norm": 2.2955541862824567, "learning_rate": 6.058587098690173e-06, "loss": 0.5328, "step": 2592 }, { "epoch": 0.44938367886310954, "grad_norm": 2.5032069553040484, "learning_rate": 6.055843565639596e-06, "loss": 0.4969, "step": 2593 }, { "epoch": 0.4495569853339399, "grad_norm": 1.830115138937689, "learning_rate": 6.053099699819226e-06, "loss": 0.4835, "step": 2594 }, { "epoch": 0.44973029180477025, "grad_norm": 2.850254495014673, "learning_rate": 6.05035550209384e-06, "loss": 0.5709, "step": 2595 }, { "epoch": 0.4499035982756006, "grad_norm": 3.1674389597601404, "learning_rate": 6.0476109733283305e-06, "loss": 0.5707, "step": 2596 }, { "epoch": 0.450076904746431, "grad_norm": 2.0895048778565197, "learning_rate": 6.044866114387689e-06, "loss": 0.553, "step": 2597 }, { "epoch": 0.45025021121726133, "grad_norm": 5.36186825000409, "learning_rate": 6.042120926137009e-06, "loss": 0.6195, "step": 2598 }, { "epoch": 0.4504235176880917, "grad_norm": 3.181423390637483, "learning_rate": 6.039375409441493e-06, "loss": 0.4133, "step": 2599 }, { "epoch": 0.45059682415892205, "grad_norm": 2.7928062758338177, "learning_rate": 6.036629565166445e-06, "loss": 0.4739, "step": 2600 }, { "epoch": 0.4507701306297524, "grad_norm": 1.8606088975070354, "learning_rate": 6.03388339417727e-06, "loss": 0.4751, "step": 2601 }, { "epoch": 0.45094343710058277, "grad_norm": 2.304686936584295, "learning_rate": 6.031136897339476e-06, "loss": 0.613, "step": 2602 }, { "epoch": 0.45111674357141307, "grad_norm": 2.2153660243708497, "learning_rate": 6.0283900755186775e-06, "loss": 0.5461, "step": 2603 }, { "epoch": 0.45129005004224343, "grad_norm": 2.141913861307232, "learning_rate": 6.02564292958059e-06, "loss": 0.5338, "step": 2604 }, { "epoch": 0.4514633565130738, "grad_norm": 2.1821270406261997, "learning_rate": 6.02289546039103e-06, "loss": 0.5502, "step": 2605 }, { "epoch": 0.45163666298390415, "grad_norm": 2.443126275852068, "learning_rate": 6.0201476688159145e-06, "loss": 0.665, "step": 2606 }, { "epoch": 0.4518099694547345, "grad_norm": 2.059983910370147, "learning_rate": 6.017399555721264e-06, "loss": 0.6201, "step": 2607 }, { "epoch": 0.45198327592556486, "grad_norm": 2.198543255152736, "learning_rate": 6.014651121973203e-06, "loss": 0.5747, "step": 2608 }, { "epoch": 0.4521565823963952, "grad_norm": 2.4031482115125002, "learning_rate": 6.011902368437951e-06, "loss": 0.6228, "step": 2609 }, { "epoch": 0.4523298888672256, "grad_norm": 2.1632546565097166, "learning_rate": 6.009153295981834e-06, "loss": 0.5553, "step": 2610 }, { "epoch": 0.45250319533805594, "grad_norm": 2.061847458771953, "learning_rate": 6.006403905471273e-06, "loss": 0.5859, "step": 2611 }, { "epoch": 0.4526765018088863, "grad_norm": 2.3704256914990096, "learning_rate": 6.003654197772796e-06, "loss": 0.5837, "step": 2612 }, { "epoch": 0.45284980827971666, "grad_norm": 2.777150802517483, "learning_rate": 6.000904173753025e-06, "loss": 0.5506, "step": 2613 }, { "epoch": 0.453023114750547, "grad_norm": 2.396678162696742, "learning_rate": 5.998153834278685e-06, "loss": 0.5202, "step": 2614 }, { "epoch": 0.4531964212213774, "grad_norm": 1.9698698096149312, "learning_rate": 5.995403180216599e-06, "loss": 0.5086, "step": 2615 }, { "epoch": 0.4533697276922077, "grad_norm": 2.936305547766611, "learning_rate": 5.9926522124336894e-06, "loss": 0.5298, "step": 2616 }, { "epoch": 0.45354303416303804, "grad_norm": 2.4571162103810824, "learning_rate": 5.989900931796978e-06, "loss": 0.6179, "step": 2617 }, { "epoch": 0.4537163406338684, "grad_norm": 2.223518514110724, "learning_rate": 5.987149339173584e-06, "loss": 0.5345, "step": 2618 }, { "epoch": 0.45388964710469876, "grad_norm": 2.068036140210999, "learning_rate": 5.984397435430728e-06, "loss": 0.5973, "step": 2619 }, { "epoch": 0.4540629535755291, "grad_norm": 2.456000993028978, "learning_rate": 5.981645221435723e-06, "loss": 0.6259, "step": 2620 }, { "epoch": 0.4542362600463595, "grad_norm": 2.3058302416832612, "learning_rate": 5.978892698055987e-06, "loss": 0.5245, "step": 2621 }, { "epoch": 0.45440956651718983, "grad_norm": 2.1475044610820464, "learning_rate": 5.976139866159028e-06, "loss": 0.5731, "step": 2622 }, { "epoch": 0.4545828729880202, "grad_norm": 2.2806179740022623, "learning_rate": 5.9733867266124546e-06, "loss": 0.5408, "step": 2623 }, { "epoch": 0.45475617945885055, "grad_norm": 5.550267694037063, "learning_rate": 5.970633280283978e-06, "loss": 0.5782, "step": 2624 }, { "epoch": 0.4549294859296809, "grad_norm": 2.256103624922415, "learning_rate": 5.967879528041396e-06, "loss": 0.5627, "step": 2625 }, { "epoch": 0.45510279240051127, "grad_norm": 2.6210691753432567, "learning_rate": 5.965125470752609e-06, "loss": 0.553, "step": 2626 }, { "epoch": 0.45527609887134163, "grad_norm": 2.195683952133816, "learning_rate": 5.962371109285613e-06, "loss": 0.6231, "step": 2627 }, { "epoch": 0.455449405342172, "grad_norm": 2.284143546513877, "learning_rate": 5.959616444508497e-06, "loss": 0.655, "step": 2628 }, { "epoch": 0.45562271181300235, "grad_norm": 2.342153513456333, "learning_rate": 5.956861477289449e-06, "loss": 0.5518, "step": 2629 }, { "epoch": 0.45579601828383265, "grad_norm": 2.2882371711781206, "learning_rate": 5.954106208496751e-06, "loss": 0.4536, "step": 2630 }, { "epoch": 0.455969324754663, "grad_norm": 2.116988309249401, "learning_rate": 5.95135063899878e-06, "loss": 0.5243, "step": 2631 }, { "epoch": 0.45614263122549337, "grad_norm": 2.093196952905061, "learning_rate": 5.948594769664007e-06, "loss": 0.4378, "step": 2632 }, { "epoch": 0.4563159376963237, "grad_norm": 2.4989329646697303, "learning_rate": 5.945838601360998e-06, "loss": 0.6038, "step": 2633 }, { "epoch": 0.4564892441671541, "grad_norm": 2.4687061881933863, "learning_rate": 5.943082134958414e-06, "loss": 0.5485, "step": 2634 }, { "epoch": 0.45666255063798444, "grad_norm": 2.5963460514202055, "learning_rate": 5.940325371325011e-06, "loss": 0.5558, "step": 2635 }, { "epoch": 0.4568358571088148, "grad_norm": 3.7063728015957573, "learning_rate": 5.937568311329634e-06, "loss": 0.5457, "step": 2636 }, { "epoch": 0.45700916357964516, "grad_norm": 2.989772695582964, "learning_rate": 5.934810955841227e-06, "loss": 0.5261, "step": 2637 }, { "epoch": 0.4571824700504755, "grad_norm": 1.8473072208242736, "learning_rate": 5.932053305728824e-06, "loss": 0.4992, "step": 2638 }, { "epoch": 0.4573557765213059, "grad_norm": 2.192050399909305, "learning_rate": 5.9292953618615525e-06, "loss": 0.5507, "step": 2639 }, { "epoch": 0.45752908299213624, "grad_norm": 4.54207256579355, "learning_rate": 5.926537125108631e-06, "loss": 0.5022, "step": 2640 }, { "epoch": 0.4577023894629666, "grad_norm": 2.049302578325537, "learning_rate": 5.923778596339375e-06, "loss": 0.5026, "step": 2641 }, { "epoch": 0.45787569593379696, "grad_norm": 2.214820644533969, "learning_rate": 5.921019776423186e-06, "loss": 0.613, "step": 2642 }, { "epoch": 0.45804900240462726, "grad_norm": 1.9421478311100107, "learning_rate": 5.91826066622956e-06, "loss": 0.5238, "step": 2643 }, { "epoch": 0.4582223088754576, "grad_norm": 2.383921518469395, "learning_rate": 5.915501266628087e-06, "loss": 0.6001, "step": 2644 }, { "epoch": 0.458395615346288, "grad_norm": 2.208779694945726, "learning_rate": 5.912741578488446e-06, "loss": 0.5762, "step": 2645 }, { "epoch": 0.45856892181711834, "grad_norm": 7.72895564477851, "learning_rate": 5.909981602680405e-06, "loss": 0.5153, "step": 2646 }, { "epoch": 0.4587422282879487, "grad_norm": 2.140747919993916, "learning_rate": 5.907221340073825e-06, "loss": 0.6052, "step": 2647 }, { "epoch": 0.45891553475877905, "grad_norm": 2.095186226978643, "learning_rate": 5.904460791538655e-06, "loss": 0.562, "step": 2648 }, { "epoch": 0.4590888412296094, "grad_norm": 2.121297548257762, "learning_rate": 5.901699957944939e-06, "loss": 0.568, "step": 2649 }, { "epoch": 0.45926214770043977, "grad_norm": 2.238465540788595, "learning_rate": 5.898938840162806e-06, "loss": 0.6545, "step": 2650 }, { "epoch": 0.45943545417127013, "grad_norm": 1.8757141292003008, "learning_rate": 5.89617743906248e-06, "loss": 0.4902, "step": 2651 }, { "epoch": 0.4596087606421005, "grad_norm": 2.1241725417458186, "learning_rate": 5.893415755514267e-06, "loss": 0.5081, "step": 2652 }, { "epoch": 0.45978206711293085, "grad_norm": 2.439763702096574, "learning_rate": 5.890653790388565e-06, "loss": 0.4264, "step": 2653 }, { "epoch": 0.4599553735837612, "grad_norm": 2.468134840132216, "learning_rate": 5.8878915445558635e-06, "loss": 0.6948, "step": 2654 }, { "epoch": 0.46012868005459157, "grad_norm": 3.1112796435112355, "learning_rate": 5.885129018886738e-06, "loss": 0.6089, "step": 2655 }, { "epoch": 0.46030198652542187, "grad_norm": 3.04171532020422, "learning_rate": 5.882366214251854e-06, "loss": 0.5724, "step": 2656 }, { "epoch": 0.46047529299625223, "grad_norm": 2.3208906027715277, "learning_rate": 5.87960313152196e-06, "loss": 0.5794, "step": 2657 }, { "epoch": 0.4606485994670826, "grad_norm": 2.1976559213762465, "learning_rate": 5.876839771567898e-06, "loss": 0.6438, "step": 2658 }, { "epoch": 0.46082190593791295, "grad_norm": 2.150796234926839, "learning_rate": 5.874076135260595e-06, "loss": 0.5489, "step": 2659 }, { "epoch": 0.4609952124087433, "grad_norm": 2.010044444315267, "learning_rate": 5.8713122234710655e-06, "loss": 0.5842, "step": 2660 }, { "epoch": 0.46116851887957366, "grad_norm": 2.726828818424616, "learning_rate": 5.868548037070411e-06, "loss": 0.4911, "step": 2661 }, { "epoch": 0.461341825350404, "grad_norm": 4.152991835800921, "learning_rate": 5.8657835769298155e-06, "loss": 0.6382, "step": 2662 }, { "epoch": 0.4615151318212344, "grad_norm": 2.395478745083401, "learning_rate": 5.863018843920555e-06, "loss": 0.5732, "step": 2663 }, { "epoch": 0.46168843829206474, "grad_norm": 2.57233712451072, "learning_rate": 5.860253838913991e-06, "loss": 0.5404, "step": 2664 }, { "epoch": 0.4618617447628951, "grad_norm": 2.123740752292144, "learning_rate": 5.857488562781567e-06, "loss": 0.5532, "step": 2665 }, { "epoch": 0.46203505123372546, "grad_norm": 2.0173533460212134, "learning_rate": 5.854723016394815e-06, "loss": 0.5539, "step": 2666 }, { "epoch": 0.4622083577045558, "grad_norm": 2.0409938152669955, "learning_rate": 5.8519572006253515e-06, "loss": 0.5552, "step": 2667 }, { "epoch": 0.4623816641753862, "grad_norm": 2.1555004392949195, "learning_rate": 5.849191116344875e-06, "loss": 0.5573, "step": 2668 }, { "epoch": 0.4625549706462165, "grad_norm": 2.8133314738552855, "learning_rate": 5.846424764425173e-06, "loss": 0.5075, "step": 2669 }, { "epoch": 0.46272827711704684, "grad_norm": 2.2340521701530256, "learning_rate": 5.843658145738117e-06, "loss": 0.5272, "step": 2670 }, { "epoch": 0.4629015835878772, "grad_norm": 3.2431574058848107, "learning_rate": 5.8408912611556565e-06, "loss": 0.623, "step": 2671 }, { "epoch": 0.46307489005870756, "grad_norm": 2.1573870734304164, "learning_rate": 5.838124111549837e-06, "loss": 0.5638, "step": 2672 }, { "epoch": 0.4632481965295379, "grad_norm": 3.1710570545890793, "learning_rate": 5.835356697792772e-06, "loss": 0.6115, "step": 2673 }, { "epoch": 0.4634215030003683, "grad_norm": 3.8142964509406077, "learning_rate": 5.83258902075667e-06, "loss": 0.487, "step": 2674 }, { "epoch": 0.46359480947119863, "grad_norm": 2.1636706890278825, "learning_rate": 5.829821081313819e-06, "loss": 0.5691, "step": 2675 }, { "epoch": 0.463768115942029, "grad_norm": 2.3565989156299025, "learning_rate": 5.827052880336589e-06, "loss": 0.5903, "step": 2676 }, { "epoch": 0.46394142241285935, "grad_norm": 3.315814146603975, "learning_rate": 5.824284418697432e-06, "loss": 0.5264, "step": 2677 }, { "epoch": 0.4641147288836897, "grad_norm": 3.413714544590513, "learning_rate": 5.821515697268883e-06, "loss": 0.6101, "step": 2678 }, { "epoch": 0.46428803535452007, "grad_norm": 4.661198936840773, "learning_rate": 5.81874671692356e-06, "loss": 0.5561, "step": 2679 }, { "epoch": 0.4644613418253504, "grad_norm": 1.9299567999748035, "learning_rate": 5.815977478534159e-06, "loss": 0.4883, "step": 2680 }, { "epoch": 0.4646346482961808, "grad_norm": 2.390991554083888, "learning_rate": 5.813207982973464e-06, "loss": 0.5433, "step": 2681 }, { "epoch": 0.4648079547670111, "grad_norm": 1.9602739209256852, "learning_rate": 5.810438231114332e-06, "loss": 0.4685, "step": 2682 }, { "epoch": 0.46498126123784145, "grad_norm": 2.3835474318987595, "learning_rate": 5.807668223829705e-06, "loss": 0.4865, "step": 2683 }, { "epoch": 0.4651545677086718, "grad_norm": 2.189943053004913, "learning_rate": 5.804897961992606e-06, "loss": 0.5994, "step": 2684 }, { "epoch": 0.46532787417950217, "grad_norm": 2.26700852106235, "learning_rate": 5.802127446476138e-06, "loss": 0.5308, "step": 2685 }, { "epoch": 0.4655011806503325, "grad_norm": 2.0058211013372484, "learning_rate": 5.799356678153481e-06, "loss": 0.5635, "step": 2686 }, { "epoch": 0.4656744871211629, "grad_norm": 2.0845673642175573, "learning_rate": 5.796585657897902e-06, "loss": 0.5135, "step": 2687 }, { "epoch": 0.46584779359199324, "grad_norm": 2.2236407589637133, "learning_rate": 5.793814386582736e-06, "loss": 0.5477, "step": 2688 }, { "epoch": 0.4660211000628236, "grad_norm": 1.9677804571059285, "learning_rate": 5.791042865081407e-06, "loss": 0.4302, "step": 2689 }, { "epoch": 0.46619440653365396, "grad_norm": 2.3144172900334077, "learning_rate": 5.788271094267412e-06, "loss": 0.5649, "step": 2690 }, { "epoch": 0.4663677130044843, "grad_norm": 3.3350879605433357, "learning_rate": 5.785499075014331e-06, "loss": 0.495, "step": 2691 }, { "epoch": 0.4665410194753147, "grad_norm": 2.7750242676921544, "learning_rate": 5.782726808195822e-06, "loss": 0.57, "step": 2692 }, { "epoch": 0.46671432594614504, "grad_norm": 2.2813044295303544, "learning_rate": 5.779954294685615e-06, "loss": 0.4906, "step": 2693 }, { "epoch": 0.4668876324169754, "grad_norm": 2.263100267932043, "learning_rate": 5.777181535357524e-06, "loss": 0.5119, "step": 2694 }, { "epoch": 0.4670609388878057, "grad_norm": 2.231073861008914, "learning_rate": 5.774408531085439e-06, "loss": 0.5113, "step": 2695 }, { "epoch": 0.46723424535863606, "grad_norm": 2.650932936579111, "learning_rate": 5.7716352827433254e-06, "loss": 0.6198, "step": 2696 }, { "epoch": 0.4674075518294664, "grad_norm": 2.2142923049994847, "learning_rate": 5.768861791205228e-06, "loss": 0.5595, "step": 2697 }, { "epoch": 0.4675808583002968, "grad_norm": 3.636692769909292, "learning_rate": 5.766088057345266e-06, "loss": 0.5167, "step": 2698 }, { "epoch": 0.46775416477112713, "grad_norm": 2.0810203181617033, "learning_rate": 5.763314082037637e-06, "loss": 0.4807, "step": 2699 }, { "epoch": 0.4679274712419575, "grad_norm": 2.5623866550931207, "learning_rate": 5.760539866156611e-06, "loss": 0.5679, "step": 2700 }, { "epoch": 0.46810077771278785, "grad_norm": 2.050182489447958, "learning_rate": 5.7577654105765404e-06, "loss": 0.5155, "step": 2701 }, { "epoch": 0.4682740841836182, "grad_norm": 2.2992579245933293, "learning_rate": 5.754990716171849e-06, "loss": 0.5851, "step": 2702 }, { "epoch": 0.46844739065444857, "grad_norm": 2.186842800891958, "learning_rate": 5.752215783817033e-06, "loss": 0.6079, "step": 2703 }, { "epoch": 0.46862069712527893, "grad_norm": 2.0545281815789584, "learning_rate": 5.749440614386665e-06, "loss": 0.6051, "step": 2704 }, { "epoch": 0.4687940035961093, "grad_norm": 2.679055870867841, "learning_rate": 5.746665208755401e-06, "loss": 0.4672, "step": 2705 }, { "epoch": 0.46896731006693965, "grad_norm": 15.180469415029958, "learning_rate": 5.74388956779796e-06, "loss": 0.6124, "step": 2706 }, { "epoch": 0.46914061653777, "grad_norm": 2.4438847837226025, "learning_rate": 5.7411136923891406e-06, "loss": 0.5462, "step": 2707 }, { "epoch": 0.4693139230086003, "grad_norm": 1.9721587010823722, "learning_rate": 5.738337583403816e-06, "loss": 0.5061, "step": 2708 }, { "epoch": 0.46948722947943067, "grad_norm": 2.290016846223055, "learning_rate": 5.735561241716928e-06, "loss": 0.5167, "step": 2709 }, { "epoch": 0.469660535950261, "grad_norm": 3.9964483588229838, "learning_rate": 5.732784668203496e-06, "loss": 0.5386, "step": 2710 }, { "epoch": 0.4698338424210914, "grad_norm": 2.2182048021846166, "learning_rate": 5.730007863738612e-06, "loss": 0.5739, "step": 2711 }, { "epoch": 0.47000714889192174, "grad_norm": 1.8913466805553487, "learning_rate": 5.7272308291974424e-06, "loss": 0.5339, "step": 2712 }, { "epoch": 0.4701804553627521, "grad_norm": 2.281320303655393, "learning_rate": 5.724453565455222e-06, "loss": 0.6356, "step": 2713 }, { "epoch": 0.47035376183358246, "grad_norm": 2.12380974158661, "learning_rate": 5.721676073387259e-06, "loss": 0.64, "step": 2714 }, { "epoch": 0.4705270683044128, "grad_norm": 2.302147618811415, "learning_rate": 5.718898353868937e-06, "loss": 0.5237, "step": 2715 }, { "epoch": 0.4707003747752432, "grad_norm": 2.392787503944839, "learning_rate": 5.716120407775707e-06, "loss": 0.5129, "step": 2716 }, { "epoch": 0.47087368124607354, "grad_norm": 2.1565854714167543, "learning_rate": 5.713342235983093e-06, "loss": 0.4654, "step": 2717 }, { "epoch": 0.4710469877169039, "grad_norm": 2.5667029845302882, "learning_rate": 5.7105638393666905e-06, "loss": 0.5868, "step": 2718 }, { "epoch": 0.47122029418773426, "grad_norm": 2.936045802870527, "learning_rate": 5.7077852188021675e-06, "loss": 0.4781, "step": 2719 }, { "epoch": 0.4713936006585646, "grad_norm": 3.0129292445256626, "learning_rate": 5.705006375165258e-06, "loss": 0.5085, "step": 2720 }, { "epoch": 0.4715669071293949, "grad_norm": 2.2221666918776615, "learning_rate": 5.702227309331772e-06, "loss": 0.6204, "step": 2721 }, { "epoch": 0.4717402136002253, "grad_norm": 2.0361413622008175, "learning_rate": 5.699448022177583e-06, "loss": 0.534, "step": 2722 }, { "epoch": 0.47191352007105564, "grad_norm": 2.064265291153642, "learning_rate": 5.696668514578641e-06, "loss": 0.6061, "step": 2723 }, { "epoch": 0.472086826541886, "grad_norm": 2.2547985795635985, "learning_rate": 5.693888787410959e-06, "loss": 0.5501, "step": 2724 }, { "epoch": 0.47226013301271635, "grad_norm": 2.418925256969346, "learning_rate": 5.691108841550626e-06, "loss": 0.5627, "step": 2725 }, { "epoch": 0.4724334394835467, "grad_norm": 2.645141915794167, "learning_rate": 5.688328677873796e-06, "loss": 0.5008, "step": 2726 }, { "epoch": 0.47260674595437707, "grad_norm": 1.954316906908474, "learning_rate": 5.685548297256689e-06, "loss": 0.5006, "step": 2727 }, { "epoch": 0.47278005242520743, "grad_norm": 2.631449783768406, "learning_rate": 5.682767700575601e-06, "loss": 0.6177, "step": 2728 }, { "epoch": 0.4729533588960378, "grad_norm": 3.4597094417561256, "learning_rate": 5.679986888706887e-06, "loss": 0.4938, "step": 2729 }, { "epoch": 0.47312666536686815, "grad_norm": 2.1772557514639392, "learning_rate": 5.677205862526976e-06, "loss": 0.5973, "step": 2730 }, { "epoch": 0.4732999718376985, "grad_norm": 2.157575743269135, "learning_rate": 5.674424622912365e-06, "loss": 0.597, "step": 2731 }, { "epoch": 0.47347327830852887, "grad_norm": 8.444277365201916, "learning_rate": 5.671643170739613e-06, "loss": 0.6187, "step": 2732 }, { "epoch": 0.4736465847793592, "grad_norm": 2.125575072375505, "learning_rate": 5.668861506885352e-06, "loss": 0.6211, "step": 2733 }, { "epoch": 0.47381989125018953, "grad_norm": 2.362976522221885, "learning_rate": 5.666079632226276e-06, "loss": 0.6076, "step": 2734 }, { "epoch": 0.4739931977210199, "grad_norm": 2.6711710735599428, "learning_rate": 5.6632975476391475e-06, "loss": 0.5808, "step": 2735 }, { "epoch": 0.47416650419185025, "grad_norm": 2.071407210497723, "learning_rate": 5.6605152540007944e-06, "loss": 0.4818, "step": 2736 }, { "epoch": 0.4743398106626806, "grad_norm": 1.968665636398148, "learning_rate": 5.657732752188113e-06, "loss": 0.5762, "step": 2737 }, { "epoch": 0.47451311713351096, "grad_norm": 2.348501885274673, "learning_rate": 5.654950043078063e-06, "loss": 0.5488, "step": 2738 }, { "epoch": 0.4746864236043413, "grad_norm": 2.130582517502736, "learning_rate": 5.652167127547668e-06, "loss": 0.5422, "step": 2739 }, { "epoch": 0.4748597300751717, "grad_norm": 2.5282802817388084, "learning_rate": 5.64938400647402e-06, "loss": 0.6189, "step": 2740 }, { "epoch": 0.47503303654600204, "grad_norm": 2.4440605996870244, "learning_rate": 5.646600680734274e-06, "loss": 0.6134, "step": 2741 }, { "epoch": 0.4752063430168324, "grad_norm": 2.595114506838272, "learning_rate": 5.643817151205649e-06, "loss": 0.534, "step": 2742 }, { "epoch": 0.47537964948766276, "grad_norm": 3.1126451825916166, "learning_rate": 5.641033418765428e-06, "loss": 0.5827, "step": 2743 }, { "epoch": 0.4755529559584931, "grad_norm": 2.0211753130585413, "learning_rate": 5.638249484290962e-06, "loss": 0.5541, "step": 2744 }, { "epoch": 0.4757262624293235, "grad_norm": 2.4660192860966497, "learning_rate": 5.635465348659658e-06, "loss": 0.587, "step": 2745 }, { "epoch": 0.47589956890015384, "grad_norm": 2.294131605967747, "learning_rate": 5.632681012748995e-06, "loss": 0.602, "step": 2746 }, { "epoch": 0.47607287537098414, "grad_norm": 2.882183655082656, "learning_rate": 5.62989647743651e-06, "loss": 0.5076, "step": 2747 }, { "epoch": 0.4762461818418145, "grad_norm": 2.4002962658844704, "learning_rate": 5.627111743599802e-06, "loss": 0.5659, "step": 2748 }, { "epoch": 0.47641948831264486, "grad_norm": 1.9828575163150077, "learning_rate": 5.6243268121165364e-06, "loss": 0.4928, "step": 2749 }, { "epoch": 0.4765927947834752, "grad_norm": 2.4285473499795707, "learning_rate": 5.621541683864439e-06, "loss": 0.5355, "step": 2750 }, { "epoch": 0.4767661012543056, "grad_norm": 2.9268153905399874, "learning_rate": 5.618756359721296e-06, "loss": 0.5614, "step": 2751 }, { "epoch": 0.47693940772513593, "grad_norm": 2.0407240708266126, "learning_rate": 5.615970840564956e-06, "loss": 0.4986, "step": 2752 }, { "epoch": 0.4771127141959663, "grad_norm": 2.111103947421525, "learning_rate": 5.613185127273334e-06, "loss": 0.5736, "step": 2753 }, { "epoch": 0.47728602066679665, "grad_norm": 2.3397832330782524, "learning_rate": 5.610399220724401e-06, "loss": 0.6933, "step": 2754 }, { "epoch": 0.477459327137627, "grad_norm": 2.5304898785274794, "learning_rate": 5.607613121796187e-06, "loss": 0.6067, "step": 2755 }, { "epoch": 0.47763263360845737, "grad_norm": 8.1368667358606, "learning_rate": 5.604826831366789e-06, "loss": 0.4971, "step": 2756 }, { "epoch": 0.4778059400792877, "grad_norm": 2.5037978208865592, "learning_rate": 5.6020403503143584e-06, "loss": 0.548, "step": 2757 }, { "epoch": 0.4779792465501181, "grad_norm": 2.167557535549317, "learning_rate": 5.59925367951711e-06, "loss": 0.566, "step": 2758 }, { "epoch": 0.47815255302094845, "grad_norm": 2.872518235885213, "learning_rate": 5.59646681985332e-06, "loss": 0.6283, "step": 2759 }, { "epoch": 0.47832585949177875, "grad_norm": 2.3620102784961756, "learning_rate": 5.5936797722013205e-06, "loss": 0.545, "step": 2760 }, { "epoch": 0.4784991659626091, "grad_norm": 3.4388587078454775, "learning_rate": 5.590892537439503e-06, "loss": 0.5853, "step": 2761 }, { "epoch": 0.47867247243343947, "grad_norm": 1.9690701020402697, "learning_rate": 5.58810511644632e-06, "loss": 0.5403, "step": 2762 }, { "epoch": 0.4788457789042698, "grad_norm": 2.2610902397143353, "learning_rate": 5.585317510100281e-06, "loss": 0.4904, "step": 2763 }, { "epoch": 0.4790190853751002, "grad_norm": 2.3396367194476246, "learning_rate": 5.582529719279957e-06, "loss": 0.4501, "step": 2764 }, { "epoch": 0.47919239184593054, "grad_norm": 2.2813492987385398, "learning_rate": 5.5797417448639735e-06, "loss": 0.5669, "step": 2765 }, { "epoch": 0.4793656983167609, "grad_norm": 2.0626616001299793, "learning_rate": 5.576953587731015e-06, "loss": 0.606, "step": 2766 }, { "epoch": 0.47953900478759126, "grad_norm": 2.1946748218362395, "learning_rate": 5.574165248759824e-06, "loss": 0.5281, "step": 2767 }, { "epoch": 0.4797123112584216, "grad_norm": 2.043108795584727, "learning_rate": 5.571376728829201e-06, "loss": 0.5194, "step": 2768 }, { "epoch": 0.479885617729252, "grad_norm": 2.2273132025680855, "learning_rate": 5.568588028818004e-06, "loss": 0.5741, "step": 2769 }, { "epoch": 0.48005892420008234, "grad_norm": 1.9838249906844365, "learning_rate": 5.565799149605142e-06, "loss": 0.4295, "step": 2770 }, { "epoch": 0.4802322306709127, "grad_norm": 2.1565609080871924, "learning_rate": 5.5630100920695885e-06, "loss": 0.4619, "step": 2771 }, { "epoch": 0.48040553714174306, "grad_norm": 2.8574433401463235, "learning_rate": 5.560220857090367e-06, "loss": 0.5969, "step": 2772 }, { "epoch": 0.48057884361257336, "grad_norm": 2.265313753225791, "learning_rate": 5.557431445546564e-06, "loss": 0.5513, "step": 2773 }, { "epoch": 0.4807521500834037, "grad_norm": 2.0095453765082634, "learning_rate": 5.554641858317313e-06, "loss": 0.5541, "step": 2774 }, { "epoch": 0.4809254565542341, "grad_norm": 3.710361137191921, "learning_rate": 5.5518520962818104e-06, "loss": 0.5674, "step": 2775 }, { "epoch": 0.48109876302506444, "grad_norm": 2.2572065249466897, "learning_rate": 5.549062160319301e-06, "loss": 0.4749, "step": 2776 }, { "epoch": 0.4812720694958948, "grad_norm": 2.4039383031684607, "learning_rate": 5.546272051309089e-06, "loss": 0.5842, "step": 2777 }, { "epoch": 0.48144537596672515, "grad_norm": 2.5094453573677717, "learning_rate": 5.543481770130533e-06, "loss": 0.5725, "step": 2778 }, { "epoch": 0.4816186824375555, "grad_norm": 2.5502119685034716, "learning_rate": 5.5406913176630454e-06, "loss": 0.5688, "step": 2779 }, { "epoch": 0.48179198890838587, "grad_norm": 2.264638327675247, "learning_rate": 5.537900694786089e-06, "loss": 0.5524, "step": 2780 }, { "epoch": 0.48196529537921623, "grad_norm": 2.370899715720369, "learning_rate": 5.535109902379186e-06, "loss": 0.5823, "step": 2781 }, { "epoch": 0.4821386018500466, "grad_norm": 3.113293511769012, "learning_rate": 5.5323189413219085e-06, "loss": 0.5029, "step": 2782 }, { "epoch": 0.48231190832087695, "grad_norm": 1.914044750556613, "learning_rate": 5.529527812493882e-06, "loss": 0.5626, "step": 2783 }, { "epoch": 0.4824852147917073, "grad_norm": 2.577854196857432, "learning_rate": 5.526736516774785e-06, "loss": 0.5513, "step": 2784 }, { "epoch": 0.48265852126253767, "grad_norm": 2.30212278122778, "learning_rate": 5.523945055044352e-06, "loss": 0.4693, "step": 2785 }, { "epoch": 0.48283182773336797, "grad_norm": 2.5470134845335157, "learning_rate": 5.521153428182364e-06, "loss": 0.4909, "step": 2786 }, { "epoch": 0.4830051342041983, "grad_norm": 2.60146538653261, "learning_rate": 5.518361637068658e-06, "loss": 0.486, "step": 2787 }, { "epoch": 0.4831784406750287, "grad_norm": 2.0166947226170455, "learning_rate": 5.5155696825831205e-06, "loss": 0.4888, "step": 2788 }, { "epoch": 0.48335174714585905, "grad_norm": 2.1833828129764656, "learning_rate": 5.512777565605694e-06, "loss": 0.6164, "step": 2789 }, { "epoch": 0.4835250536166894, "grad_norm": 2.0996204831604226, "learning_rate": 5.509985287016365e-06, "loss": 0.5825, "step": 2790 }, { "epoch": 0.48369836008751976, "grad_norm": 2.2529902898682215, "learning_rate": 5.5071928476951764e-06, "loss": 0.5838, "step": 2791 }, { "epoch": 0.4838716665583501, "grad_norm": 2.6634817400633266, "learning_rate": 5.504400248522221e-06, "loss": 0.5678, "step": 2792 }, { "epoch": 0.4840449730291805, "grad_norm": 2.1259572137274563, "learning_rate": 5.5016074903776385e-06, "loss": 0.4992, "step": 2793 }, { "epoch": 0.48421827950001084, "grad_norm": 1.9771419345269365, "learning_rate": 5.498814574141624e-06, "loss": 0.4708, "step": 2794 }, { "epoch": 0.4843915859708412, "grad_norm": 2.2744543282076233, "learning_rate": 5.4960215006944205e-06, "loss": 0.5455, "step": 2795 }, { "epoch": 0.48456489244167156, "grad_norm": 2.8823384123482483, "learning_rate": 5.493228270916317e-06, "loss": 0.5989, "step": 2796 }, { "epoch": 0.4847381989125019, "grad_norm": 2.4115335515182204, "learning_rate": 5.490434885687656e-06, "loss": 0.5347, "step": 2797 }, { "epoch": 0.4849115053833323, "grad_norm": 2.0745682179698712, "learning_rate": 5.487641345888829e-06, "loss": 0.5269, "step": 2798 }, { "epoch": 0.4850848118541626, "grad_norm": 2.547921443927554, "learning_rate": 5.484847652400272e-06, "loss": 0.4956, "step": 2799 }, { "epoch": 0.48525811832499294, "grad_norm": 3.1160998614487143, "learning_rate": 5.482053806102474e-06, "loss": 0.5397, "step": 2800 }, { "epoch": 0.4854314247958233, "grad_norm": 2.3006959902150053, "learning_rate": 5.479259807875971e-06, "loss": 0.5544, "step": 2801 }, { "epoch": 0.48560473126665366, "grad_norm": 2.0961150797751853, "learning_rate": 5.476465658601344e-06, "loss": 0.5409, "step": 2802 }, { "epoch": 0.485778037737484, "grad_norm": 2.350789199199849, "learning_rate": 5.473671359159226e-06, "loss": 0.533, "step": 2803 }, { "epoch": 0.4859513442083144, "grad_norm": 2.720389511273713, "learning_rate": 5.470876910430294e-06, "loss": 0.6295, "step": 2804 }, { "epoch": 0.48612465067914473, "grad_norm": 2.2829975584877835, "learning_rate": 5.468082313295276e-06, "loss": 0.5415, "step": 2805 }, { "epoch": 0.4862979571499751, "grad_norm": 2.5080289333114347, "learning_rate": 5.465287568634939e-06, "loss": 0.5279, "step": 2806 }, { "epoch": 0.48647126362080545, "grad_norm": 2.1733757428120426, "learning_rate": 5.462492677330106e-06, "loss": 0.5399, "step": 2807 }, { "epoch": 0.4866445700916358, "grad_norm": 2.3571311163206725, "learning_rate": 5.4596976402616395e-06, "loss": 0.4893, "step": 2808 }, { "epoch": 0.48681787656246617, "grad_norm": 2.076418746133136, "learning_rate": 5.4569024583104524e-06, "loss": 0.5236, "step": 2809 }, { "epoch": 0.4869911830332965, "grad_norm": 2.5838990651974756, "learning_rate": 5.4541071323575005e-06, "loss": 0.5508, "step": 2810 }, { "epoch": 0.4871644895041269, "grad_norm": 1.9558295746170773, "learning_rate": 5.451311663283783e-06, "loss": 0.4713, "step": 2811 }, { "epoch": 0.4873377959749572, "grad_norm": 2.153615586668558, "learning_rate": 5.4485160519703486e-06, "loss": 0.6099, "step": 2812 }, { "epoch": 0.48751110244578755, "grad_norm": 2.744369087682608, "learning_rate": 5.445720299298287e-06, "loss": 0.607, "step": 2813 }, { "epoch": 0.4876844089166179, "grad_norm": 2.633048430639482, "learning_rate": 5.442924406148739e-06, "loss": 0.6057, "step": 2814 }, { "epoch": 0.48785771538744827, "grad_norm": 2.262234302693194, "learning_rate": 5.440128373402881e-06, "loss": 0.6795, "step": 2815 }, { "epoch": 0.4880310218582786, "grad_norm": 2.4478375346497554, "learning_rate": 5.437332201941942e-06, "loss": 0.6017, "step": 2816 }, { "epoch": 0.488204328329109, "grad_norm": 2.5859496327918285, "learning_rate": 5.434535892647184e-06, "loss": 0.6055, "step": 2817 }, { "epoch": 0.48837763479993934, "grad_norm": 4.0845303860780655, "learning_rate": 5.4317394463999225e-06, "loss": 0.5203, "step": 2818 }, { "epoch": 0.4885509412707697, "grad_norm": 2.4567232393588867, "learning_rate": 5.428942864081514e-06, "loss": 0.5559, "step": 2819 }, { "epoch": 0.48872424774160006, "grad_norm": 2.3464999447584263, "learning_rate": 5.42614614657335e-06, "loss": 0.6452, "step": 2820 }, { "epoch": 0.4888975542124304, "grad_norm": 2.8927906512109516, "learning_rate": 5.423349294756879e-06, "loss": 0.5374, "step": 2821 }, { "epoch": 0.4890708606832608, "grad_norm": 2.6862240726786606, "learning_rate": 5.420552309513578e-06, "loss": 0.4781, "step": 2822 }, { "epoch": 0.48924416715409114, "grad_norm": 4.23768850378546, "learning_rate": 5.417755191724973e-06, "loss": 0.58, "step": 2823 }, { "epoch": 0.4894174736249215, "grad_norm": 2.262669837034096, "learning_rate": 5.41495794227263e-06, "loss": 0.598, "step": 2824 }, { "epoch": 0.4895907800957518, "grad_norm": 7.84201282166544, "learning_rate": 5.412160562038159e-06, "loss": 0.5024, "step": 2825 }, { "epoch": 0.48976408656658216, "grad_norm": 2.0029353088522073, "learning_rate": 5.409363051903208e-06, "loss": 0.5726, "step": 2826 }, { "epoch": 0.4899373930374125, "grad_norm": 2.2642729526843715, "learning_rate": 5.406565412749467e-06, "loss": 0.6, "step": 2827 }, { "epoch": 0.4901106995082429, "grad_norm": 2.1740653277993762, "learning_rate": 5.403767645458666e-06, "loss": 0.6085, "step": 2828 }, { "epoch": 0.49028400597907323, "grad_norm": 2.149764165529443, "learning_rate": 5.400969750912578e-06, "loss": 0.4742, "step": 2829 }, { "epoch": 0.4904573124499036, "grad_norm": 4.101738738665733, "learning_rate": 5.398171729993014e-06, "loss": 0.6161, "step": 2830 }, { "epoch": 0.49063061892073395, "grad_norm": 2.6751589358722487, "learning_rate": 5.395373583581824e-06, "loss": 0.5829, "step": 2831 }, { "epoch": 0.4908039253915643, "grad_norm": 1.9198815279047676, "learning_rate": 5.3925753125609e-06, "loss": 0.5414, "step": 2832 }, { "epoch": 0.49097723186239467, "grad_norm": 2.0242694286181746, "learning_rate": 5.3897769178121696e-06, "loss": 0.583, "step": 2833 }, { "epoch": 0.49115053833322503, "grad_norm": 2.3957124338650577, "learning_rate": 5.386978400217604e-06, "loss": 0.5869, "step": 2834 }, { "epoch": 0.4913238448040554, "grad_norm": 2.6294853126259428, "learning_rate": 5.3841797606592105e-06, "loss": 0.6257, "step": 2835 }, { "epoch": 0.49149715127488575, "grad_norm": 2.204762048662896, "learning_rate": 5.381381000019037e-06, "loss": 0.5734, "step": 2836 }, { "epoch": 0.4916704577457161, "grad_norm": 1.9993051733644056, "learning_rate": 5.378582119179163e-06, "loss": 0.4885, "step": 2837 }, { "epoch": 0.4918437642165464, "grad_norm": 2.0859563390914593, "learning_rate": 5.375783119021715e-06, "loss": 0.5011, "step": 2838 }, { "epoch": 0.49201707068737677, "grad_norm": 2.8686184012036926, "learning_rate": 5.37298400042885e-06, "loss": 0.586, "step": 2839 }, { "epoch": 0.4921903771582071, "grad_norm": 2.73686597698177, "learning_rate": 5.370184764282767e-06, "loss": 0.4795, "step": 2840 }, { "epoch": 0.4923636836290375, "grad_norm": 2.0201704972382815, "learning_rate": 5.367385411465699e-06, "loss": 0.5582, "step": 2841 }, { "epoch": 0.49253699009986784, "grad_norm": 2.417948061107601, "learning_rate": 5.3645859428599175e-06, "loss": 0.5578, "step": 2842 }, { "epoch": 0.4927102965706982, "grad_norm": 2.389039511752745, "learning_rate": 5.36178635934773e-06, "loss": 0.5132, "step": 2843 }, { "epoch": 0.49288360304152856, "grad_norm": 2.5577544509509997, "learning_rate": 5.358986661811479e-06, "loss": 0.6487, "step": 2844 }, { "epoch": 0.4930569095123589, "grad_norm": 2.1969458978757994, "learning_rate": 5.356186851133546e-06, "loss": 0.5677, "step": 2845 }, { "epoch": 0.4932302159831893, "grad_norm": 1.8350119408049599, "learning_rate": 5.353386928196344e-06, "loss": 0.5557, "step": 2846 }, { "epoch": 0.49340352245401964, "grad_norm": 2.3940312122647662, "learning_rate": 5.350586893882326e-06, "loss": 0.5585, "step": 2847 }, { "epoch": 0.49357682892485, "grad_norm": 2.1542423789111824, "learning_rate": 5.3477867490739764e-06, "loss": 0.5704, "step": 2848 }, { "epoch": 0.49375013539568036, "grad_norm": 1.983787870921892, "learning_rate": 5.344986494653815e-06, "loss": 0.4552, "step": 2849 }, { "epoch": 0.4939234418665107, "grad_norm": 2.186427656014862, "learning_rate": 5.342186131504398e-06, "loss": 0.596, "step": 2850 }, { "epoch": 0.494096748337341, "grad_norm": 1.8538363043782213, "learning_rate": 5.339385660508314e-06, "loss": 0.5621, "step": 2851 }, { "epoch": 0.4942700548081714, "grad_norm": 2.742142152578166, "learning_rate": 5.336585082548188e-06, "loss": 0.4944, "step": 2852 }, { "epoch": 0.49444336127900174, "grad_norm": 2.1016767951295763, "learning_rate": 5.333784398506674e-06, "loss": 0.5794, "step": 2853 }, { "epoch": 0.4946166677498321, "grad_norm": 2.4038974718586963, "learning_rate": 5.330983609266464e-06, "loss": 0.6751, "step": 2854 }, { "epoch": 0.49478997422066245, "grad_norm": 2.02890087485733, "learning_rate": 5.328182715710283e-06, "loss": 0.5724, "step": 2855 }, { "epoch": 0.4949632806914928, "grad_norm": 2.234709984809627, "learning_rate": 5.325381718720886e-06, "loss": 0.5734, "step": 2856 }, { "epoch": 0.49513658716232317, "grad_norm": 2.034005117688465, "learning_rate": 5.322580619181063e-06, "loss": 0.5726, "step": 2857 }, { "epoch": 0.49530989363315353, "grad_norm": 1.8863081531386257, "learning_rate": 5.319779417973635e-06, "loss": 0.5222, "step": 2858 }, { "epoch": 0.4954832001039839, "grad_norm": 2.115316916924344, "learning_rate": 5.316978115981454e-06, "loss": 0.5014, "step": 2859 }, { "epoch": 0.49565650657481425, "grad_norm": 3.1189934154678602, "learning_rate": 5.314176714087408e-06, "loss": 0.603, "step": 2860 }, { "epoch": 0.4958298130456446, "grad_norm": 2.5846199961117375, "learning_rate": 5.31137521317441e-06, "loss": 0.4858, "step": 2861 }, { "epoch": 0.49600311951647497, "grad_norm": 2.4004006909523485, "learning_rate": 5.308573614125413e-06, "loss": 0.5556, "step": 2862 }, { "epoch": 0.4961764259873053, "grad_norm": 2.2352715189605346, "learning_rate": 5.305771917823391e-06, "loss": 0.5819, "step": 2863 }, { "epoch": 0.49634973245813563, "grad_norm": 2.3543231646253293, "learning_rate": 5.302970125151356e-06, "loss": 0.5337, "step": 2864 }, { "epoch": 0.496523038928966, "grad_norm": 1.9407474873821198, "learning_rate": 5.300168236992348e-06, "loss": 0.5083, "step": 2865 }, { "epoch": 0.49669634539979635, "grad_norm": 2.7418240620798513, "learning_rate": 5.297366254229436e-06, "loss": 0.6687, "step": 2866 }, { "epoch": 0.4968696518706267, "grad_norm": 2.198876435097561, "learning_rate": 5.29456417774572e-06, "loss": 0.6343, "step": 2867 }, { "epoch": 0.49704295834145706, "grad_norm": 2.7984341569815627, "learning_rate": 5.291762008424331e-06, "loss": 0.5977, "step": 2868 }, { "epoch": 0.4972162648122874, "grad_norm": 2.4747924004560233, "learning_rate": 5.288959747148424e-06, "loss": 0.6046, "step": 2869 }, { "epoch": 0.4973895712831178, "grad_norm": 2.4856987041601637, "learning_rate": 5.28615739480119e-06, "loss": 0.5269, "step": 2870 }, { "epoch": 0.49756287775394814, "grad_norm": 2.986128609423687, "learning_rate": 5.283354952265843e-06, "loss": 0.5911, "step": 2871 }, { "epoch": 0.4977361842247785, "grad_norm": 2.1238335911466275, "learning_rate": 5.280552420425631e-06, "loss": 0.5168, "step": 2872 }, { "epoch": 0.49790949069560886, "grad_norm": 2.373577700862963, "learning_rate": 5.277749800163823e-06, "loss": 0.5724, "step": 2873 }, { "epoch": 0.4980827971664392, "grad_norm": 2.7325388399327006, "learning_rate": 5.2749470923637204e-06, "loss": 0.5442, "step": 2874 }, { "epoch": 0.4982561036372696, "grad_norm": 1.8153302286230275, "learning_rate": 5.272144297908652e-06, "loss": 0.4678, "step": 2875 }, { "epoch": 0.49842941010809994, "grad_norm": 5.271028980723504, "learning_rate": 5.2693414176819745e-06, "loss": 0.5307, "step": 2876 }, { "epoch": 0.49860271657893024, "grad_norm": 2.23800642272158, "learning_rate": 5.266538452567071e-06, "loss": 0.605, "step": 2877 }, { "epoch": 0.4987760230497606, "grad_norm": 3.343132756478156, "learning_rate": 5.2637354034473485e-06, "loss": 0.6111, "step": 2878 }, { "epoch": 0.49894932952059096, "grad_norm": 2.873489432240617, "learning_rate": 5.260932271206245e-06, "loss": 0.5371, "step": 2879 }, { "epoch": 0.4991226359914213, "grad_norm": 3.3950849601483966, "learning_rate": 5.25812905672722e-06, "loss": 0.5732, "step": 2880 }, { "epoch": 0.4992959424622517, "grad_norm": 2.311640837024038, "learning_rate": 5.2553257608937635e-06, "loss": 0.5835, "step": 2881 }, { "epoch": 0.49946924893308203, "grad_norm": 2.3754958985629013, "learning_rate": 5.25252238458939e-06, "loss": 0.5248, "step": 2882 }, { "epoch": 0.4996425554039124, "grad_norm": 1.961286980562669, "learning_rate": 5.249718928697637e-06, "loss": 0.522, "step": 2883 }, { "epoch": 0.49981586187474275, "grad_norm": 2.1927037254417865, "learning_rate": 5.246915394102068e-06, "loss": 0.4859, "step": 2884 }, { "epoch": 0.4999891683455731, "grad_norm": 2.659844633619851, "learning_rate": 5.244111781686272e-06, "loss": 0.5327, "step": 2885 }, { "epoch": 0.5001624748164034, "grad_norm": 2.936735346889096, "learning_rate": 5.241308092333864e-06, "loss": 0.5204, "step": 2886 }, { "epoch": 0.5003357812872338, "grad_norm": 2.8410118135322877, "learning_rate": 5.238504326928481e-06, "loss": 0.5871, "step": 2887 }, { "epoch": 0.5005090877580641, "grad_norm": 3.2077566083694102, "learning_rate": 5.2357004863537845e-06, "loss": 0.5662, "step": 2888 }, { "epoch": 0.5006823942288945, "grad_norm": 3.6451908041275973, "learning_rate": 5.232896571493458e-06, "loss": 0.5866, "step": 2889 }, { "epoch": 0.5008557006997248, "grad_norm": 2.958927741357795, "learning_rate": 5.230092583231213e-06, "loss": 0.5993, "step": 2890 }, { "epoch": 0.5010290071705552, "grad_norm": 2.2706083917621074, "learning_rate": 5.227288522450781e-06, "loss": 0.5047, "step": 2891 }, { "epoch": 0.5012023136413856, "grad_norm": 2.3245693654646726, "learning_rate": 5.224484390035915e-06, "loss": 0.574, "step": 2892 }, { "epoch": 0.5013756201122159, "grad_norm": 2.4872910766916205, "learning_rate": 5.221680186870395e-06, "loss": 0.5046, "step": 2893 }, { "epoch": 0.5015489265830463, "grad_norm": 1.997898125325748, "learning_rate": 5.218875913838017e-06, "loss": 0.5067, "step": 2894 }, { "epoch": 0.5017222330538766, "grad_norm": 2.5480729559945074, "learning_rate": 5.216071571822605e-06, "loss": 0.5865, "step": 2895 }, { "epoch": 0.501895539524707, "grad_norm": 2.353868972531968, "learning_rate": 5.213267161708002e-06, "loss": 0.566, "step": 2896 }, { "epoch": 0.5020688459955374, "grad_norm": 2.499988058289754, "learning_rate": 5.210462684378072e-06, "loss": 0.5661, "step": 2897 }, { "epoch": 0.5022421524663677, "grad_norm": 2.128150733017195, "learning_rate": 5.207658140716704e-06, "loss": 0.5445, "step": 2898 }, { "epoch": 0.5024154589371981, "grad_norm": 2.1591396010087576, "learning_rate": 5.204853531607801e-06, "loss": 0.5476, "step": 2899 }, { "epoch": 0.5025887654080284, "grad_norm": 2.502313140736197, "learning_rate": 5.202048857935291e-06, "loss": 0.5079, "step": 2900 }, { "epoch": 0.5027620718788588, "grad_norm": 2.0305236008158576, "learning_rate": 5.199244120583124e-06, "loss": 0.4374, "step": 2901 }, { "epoch": 0.5029353783496892, "grad_norm": 2.139161312462589, "learning_rate": 5.196439320435266e-06, "loss": 0.5412, "step": 2902 }, { "epoch": 0.5031086848205195, "grad_norm": 2.1285546773327644, "learning_rate": 5.193634458375708e-06, "loss": 0.5254, "step": 2903 }, { "epoch": 0.5032819912913499, "grad_norm": 2.1431706552799787, "learning_rate": 5.190829535288453e-06, "loss": 0.5958, "step": 2904 }, { "epoch": 0.5034552977621802, "grad_norm": 2.3773228156123967, "learning_rate": 5.18802455205753e-06, "loss": 0.4329, "step": 2905 }, { "epoch": 0.5036286042330106, "grad_norm": 2.588215439072142, "learning_rate": 5.185219509566985e-06, "loss": 0.6208, "step": 2906 }, { "epoch": 0.503801910703841, "grad_norm": 2.162209837046091, "learning_rate": 5.18241440870088e-06, "loss": 0.5458, "step": 2907 }, { "epoch": 0.5039752171746713, "grad_norm": 2.4955063226256096, "learning_rate": 5.179609250343298e-06, "loss": 0.5182, "step": 2908 }, { "epoch": 0.5041485236455017, "grad_norm": 2.4683595796671, "learning_rate": 5.17680403537834e-06, "loss": 0.5735, "step": 2909 }, { "epoch": 0.5043218301163319, "grad_norm": 2.5199986515281125, "learning_rate": 5.173998764690126e-06, "loss": 0.5353, "step": 2910 }, { "epoch": 0.5044951365871623, "grad_norm": 2.43428241205514, "learning_rate": 5.17119343916279e-06, "loss": 0.5211, "step": 2911 }, { "epoch": 0.5046684430579926, "grad_norm": 3.883964463101453, "learning_rate": 5.1683880596804854e-06, "loss": 0.6081, "step": 2912 }, { "epoch": 0.504841749528823, "grad_norm": 2.85759397322518, "learning_rate": 5.165582627127384e-06, "loss": 0.6236, "step": 2913 }, { "epoch": 0.5050150559996534, "grad_norm": 2.383736725008319, "learning_rate": 5.162777142387672e-06, "loss": 0.5296, "step": 2914 }, { "epoch": 0.5051883624704837, "grad_norm": 2.40322788182198, "learning_rate": 5.159971606345553e-06, "loss": 0.5745, "step": 2915 }, { "epoch": 0.5053616689413141, "grad_norm": 3.2808001246481266, "learning_rate": 5.157166019885246e-06, "loss": 0.4859, "step": 2916 }, { "epoch": 0.5055349754121444, "grad_norm": 2.279948781186341, "learning_rate": 5.154360383890987e-06, "loss": 0.5905, "step": 2917 }, { "epoch": 0.5057082818829748, "grad_norm": 6.435014531418411, "learning_rate": 5.151554699247029e-06, "loss": 0.65, "step": 2918 }, { "epoch": 0.5058815883538051, "grad_norm": 2.17161691980534, "learning_rate": 5.1487489668376354e-06, "loss": 0.5356, "step": 2919 }, { "epoch": 0.5060548948246355, "grad_norm": 3.1808989266238465, "learning_rate": 5.14594318754709e-06, "loss": 0.5608, "step": 2920 }, { "epoch": 0.5062282012954659, "grad_norm": 2.5348255390004177, "learning_rate": 5.143137362259688e-06, "loss": 0.5151, "step": 2921 }, { "epoch": 0.5064015077662962, "grad_norm": 2.2944617287334723, "learning_rate": 5.140331491859739e-06, "loss": 0.5233, "step": 2922 }, { "epoch": 0.5065748142371266, "grad_norm": 2.3936216558822454, "learning_rate": 5.1375255772315715e-06, "loss": 0.5763, "step": 2923 }, { "epoch": 0.5067481207079569, "grad_norm": 2.220447459621884, "learning_rate": 5.1347196192595235e-06, "loss": 0.5465, "step": 2924 }, { "epoch": 0.5069214271787873, "grad_norm": 2.625663245062147, "learning_rate": 5.131913618827946e-06, "loss": 0.5249, "step": 2925 }, { "epoch": 0.5070947336496177, "grad_norm": 2.507165091924994, "learning_rate": 5.129107576821205e-06, "loss": 0.5203, "step": 2926 }, { "epoch": 0.507268040120448, "grad_norm": 2.7601203319862893, "learning_rate": 5.126301494123681e-06, "loss": 0.5974, "step": 2927 }, { "epoch": 0.5074413465912784, "grad_norm": 3.2055312324069383, "learning_rate": 5.123495371619765e-06, "loss": 0.5624, "step": 2928 }, { "epoch": 0.5076146530621087, "grad_norm": 2.744247098929209, "learning_rate": 5.120689210193863e-06, "loss": 0.4413, "step": 2929 }, { "epoch": 0.5077879595329391, "grad_norm": 2.811587467500976, "learning_rate": 5.11788301073039e-06, "loss": 0.5996, "step": 2930 }, { "epoch": 0.5079612660037695, "grad_norm": 3.329184033377485, "learning_rate": 5.115076774113775e-06, "loss": 0.592, "step": 2931 }, { "epoch": 0.5081345724745998, "grad_norm": 2.4917879052217358, "learning_rate": 5.11227050122846e-06, "loss": 0.6273, "step": 2932 }, { "epoch": 0.5083078789454302, "grad_norm": 2.398814464411819, "learning_rate": 5.109464192958895e-06, "loss": 0.4997, "step": 2933 }, { "epoch": 0.5084811854162605, "grad_norm": 2.3936814417160583, "learning_rate": 5.106657850189546e-06, "loss": 0.5504, "step": 2934 }, { "epoch": 0.5086544918870909, "grad_norm": 2.2080230639084633, "learning_rate": 5.103851473804883e-06, "loss": 0.4893, "step": 2935 }, { "epoch": 0.5088277983579211, "grad_norm": 2.559095600085357, "learning_rate": 5.101045064689392e-06, "loss": 0.5565, "step": 2936 }, { "epoch": 0.5090011048287515, "grad_norm": 2.8219523448218946, "learning_rate": 5.09823862372757e-06, "loss": 0.5072, "step": 2937 }, { "epoch": 0.5091744112995819, "grad_norm": 2.4426413948792325, "learning_rate": 5.095432151803919e-06, "loss": 0.5339, "step": 2938 }, { "epoch": 0.5093477177704122, "grad_norm": 2.913812133378647, "learning_rate": 5.0926256498029546e-06, "loss": 0.5029, "step": 2939 }, { "epoch": 0.5095210242412426, "grad_norm": 2.346867979187323, "learning_rate": 5.089819118609201e-06, "loss": 0.6368, "step": 2940 }, { "epoch": 0.5096943307120729, "grad_norm": 2.052483086515703, "learning_rate": 5.087012559107193e-06, "loss": 0.4547, "step": 2941 }, { "epoch": 0.5098676371829033, "grad_norm": 2.552526934851998, "learning_rate": 5.084205972181469e-06, "loss": 0.5893, "step": 2942 }, { "epoch": 0.5100409436537336, "grad_norm": 2.1464808652261183, "learning_rate": 5.081399358716584e-06, "loss": 0.4834, "step": 2943 }, { "epoch": 0.510214250124564, "grad_norm": 2.1748145318100343, "learning_rate": 5.078592719597097e-06, "loss": 0.5637, "step": 2944 }, { "epoch": 0.5103875565953944, "grad_norm": 5.080352434604383, "learning_rate": 5.075786055707571e-06, "loss": 0.6058, "step": 2945 }, { "epoch": 0.5105608630662247, "grad_norm": 2.2103894562050734, "learning_rate": 5.072979367932586e-06, "loss": 0.5223, "step": 2946 }, { "epoch": 0.5107341695370551, "grad_norm": 2.427109922751769, "learning_rate": 5.070172657156721e-06, "loss": 0.5979, "step": 2947 }, { "epoch": 0.5109074760078854, "grad_norm": 2.0655794179897553, "learning_rate": 5.067365924264569e-06, "loss": 0.612, "step": 2948 }, { "epoch": 0.5110807824787158, "grad_norm": 2.787372491128542, "learning_rate": 5.064559170140725e-06, "loss": 0.4986, "step": 2949 }, { "epoch": 0.5112540889495462, "grad_norm": 2.95914391768531, "learning_rate": 5.061752395669794e-06, "loss": 0.5326, "step": 2950 }, { "epoch": 0.5114273954203765, "grad_norm": 2.873988122414107, "learning_rate": 5.058945601736382e-06, "loss": 0.5157, "step": 2951 }, { "epoch": 0.5116007018912069, "grad_norm": 2.1103558088949077, "learning_rate": 5.0561387892251115e-06, "loss": 0.5563, "step": 2952 }, { "epoch": 0.5117740083620372, "grad_norm": 1.9737122775647906, "learning_rate": 5.0533319590206e-06, "loss": 0.5848, "step": 2953 }, { "epoch": 0.5119473148328676, "grad_norm": 2.0337441473101805, "learning_rate": 5.050525112007477e-06, "loss": 0.4945, "step": 2954 }, { "epoch": 0.512120621303698, "grad_norm": 2.696062690374521, "learning_rate": 5.047718249070374e-06, "loss": 0.5452, "step": 2955 }, { "epoch": 0.5122939277745283, "grad_norm": 2.152024655862668, "learning_rate": 5.044911371093929e-06, "loss": 0.5365, "step": 2956 }, { "epoch": 0.5124672342453587, "grad_norm": 2.257896315445233, "learning_rate": 5.042104478962785e-06, "loss": 0.566, "step": 2957 }, { "epoch": 0.512640540716189, "grad_norm": 3.724478728761326, "learning_rate": 5.039297573561589e-06, "loss": 0.5597, "step": 2958 }, { "epoch": 0.5128138471870194, "grad_norm": 2.1593613949912793, "learning_rate": 5.036490655774994e-06, "loss": 0.5718, "step": 2959 }, { "epoch": 0.5129871536578497, "grad_norm": 4.437439484733501, "learning_rate": 5.033683726487654e-06, "loss": 0.5064, "step": 2960 }, { "epoch": 0.5131604601286801, "grad_norm": 2.297914921768626, "learning_rate": 5.0308767865842256e-06, "loss": 0.6141, "step": 2961 }, { "epoch": 0.5133337665995104, "grad_norm": 2.0773062877107793, "learning_rate": 5.028069836949372e-06, "loss": 0.5849, "step": 2962 }, { "epoch": 0.5135070730703407, "grad_norm": 2.2876889412081662, "learning_rate": 5.025262878467759e-06, "loss": 0.5561, "step": 2963 }, { "epoch": 0.5136803795411711, "grad_norm": 2.3061626441211938, "learning_rate": 5.022455912024056e-06, "loss": 0.6279, "step": 2964 }, { "epoch": 0.5138536860120014, "grad_norm": 2.4008764407552237, "learning_rate": 5.019648938502933e-06, "loss": 0.583, "step": 2965 }, { "epoch": 0.5140269924828318, "grad_norm": 2.03241825960385, "learning_rate": 5.01684195878906e-06, "loss": 0.5391, "step": 2966 }, { "epoch": 0.5142002989536621, "grad_norm": 2.4141527113375587, "learning_rate": 5.014034973767116e-06, "loss": 0.5966, "step": 2967 }, { "epoch": 0.5143736054244925, "grad_norm": 2.7127635999610185, "learning_rate": 5.011227984321773e-06, "loss": 0.5998, "step": 2968 }, { "epoch": 0.5145469118953229, "grad_norm": 2.6937087824416612, "learning_rate": 5.00842099133771e-06, "loss": 0.5823, "step": 2969 }, { "epoch": 0.5147202183661532, "grad_norm": 5.632866251107395, "learning_rate": 5.005613995699608e-06, "loss": 0.593, "step": 2970 }, { "epoch": 0.5148935248369836, "grad_norm": 2.4922412492410526, "learning_rate": 5.002806998292144e-06, "loss": 0.6054, "step": 2971 }, { "epoch": 0.5150668313078139, "grad_norm": 2.850991330508087, "learning_rate": 5e-06, "loss": 0.5726, "step": 2972 }, { "epoch": 0.5152401377786443, "grad_norm": 3.1416005882776, "learning_rate": 4.997193001707857e-06, "loss": 0.5442, "step": 2973 }, { "epoch": 0.5154134442494747, "grad_norm": 2.7629693670912645, "learning_rate": 4.994386004300393e-06, "loss": 0.5547, "step": 2974 }, { "epoch": 0.515586750720305, "grad_norm": 2.9616590637736495, "learning_rate": 4.9915790086622915e-06, "loss": 0.4894, "step": 2975 }, { "epoch": 0.5157600571911354, "grad_norm": 2.501274070261283, "learning_rate": 4.988772015678229e-06, "loss": 0.5436, "step": 2976 }, { "epoch": 0.5159333636619657, "grad_norm": 2.2087007926927003, "learning_rate": 4.985965026232885e-06, "loss": 0.5938, "step": 2977 }, { "epoch": 0.5161066701327961, "grad_norm": 2.41975857960943, "learning_rate": 4.983158041210941e-06, "loss": 0.5894, "step": 2978 }, { "epoch": 0.5162799766036265, "grad_norm": 2.57465438949117, "learning_rate": 4.9803510614970695e-06, "loss": 0.6853, "step": 2979 }, { "epoch": 0.5164532830744568, "grad_norm": 2.4884231382804654, "learning_rate": 4.977544087975945e-06, "loss": 0.5807, "step": 2980 }, { "epoch": 0.5166265895452872, "grad_norm": 2.2909142174996098, "learning_rate": 4.974737121532242e-06, "loss": 0.5282, "step": 2981 }, { "epoch": 0.5167998960161175, "grad_norm": 2.3677824198502146, "learning_rate": 4.97193016305063e-06, "loss": 0.4716, "step": 2982 }, { "epoch": 0.5169732024869479, "grad_norm": 2.5676508753747616, "learning_rate": 4.969123213415776e-06, "loss": 0.6064, "step": 2983 }, { "epoch": 0.5171465089577783, "grad_norm": 1.9111407540693004, "learning_rate": 4.9663162735123485e-06, "loss": 0.5833, "step": 2984 }, { "epoch": 0.5173198154286086, "grad_norm": 3.0497885256525668, "learning_rate": 4.963509344225007e-06, "loss": 0.518, "step": 2985 }, { "epoch": 0.517493121899439, "grad_norm": 3.663433610834585, "learning_rate": 4.960702426438411e-06, "loss": 0.5338, "step": 2986 }, { "epoch": 0.5176664283702693, "grad_norm": 2.3579600160679877, "learning_rate": 4.957895521037215e-06, "loss": 0.5908, "step": 2987 }, { "epoch": 0.5178397348410996, "grad_norm": 2.1951532281690342, "learning_rate": 4.955088628906073e-06, "loss": 0.6177, "step": 2988 }, { "epoch": 0.5180130413119299, "grad_norm": 2.3790769915901335, "learning_rate": 4.952281750929628e-06, "loss": 0.5581, "step": 2989 }, { "epoch": 0.5181863477827603, "grad_norm": 2.4462535616819214, "learning_rate": 4.949474887992525e-06, "loss": 0.539, "step": 2990 }, { "epoch": 0.5183596542535907, "grad_norm": 2.4151179365938384, "learning_rate": 4.946668040979402e-06, "loss": 0.5152, "step": 2991 }, { "epoch": 0.518532960724421, "grad_norm": 2.4124421015881796, "learning_rate": 4.94386121077489e-06, "loss": 0.5643, "step": 2992 }, { "epoch": 0.5187062671952514, "grad_norm": 2.1815459319293375, "learning_rate": 4.941054398263619e-06, "loss": 0.4659, "step": 2993 }, { "epoch": 0.5188795736660817, "grad_norm": 1.9133678352819756, "learning_rate": 4.938247604330209e-06, "loss": 0.4415, "step": 2994 }, { "epoch": 0.5190528801369121, "grad_norm": 2.3429191122021917, "learning_rate": 4.935440829859277e-06, "loss": 0.6035, "step": 2995 }, { "epoch": 0.5192261866077424, "grad_norm": 2.1564088668113186, "learning_rate": 4.932634075735433e-06, "loss": 0.5404, "step": 2996 }, { "epoch": 0.5193994930785728, "grad_norm": 4.174982159790306, "learning_rate": 4.92982734284328e-06, "loss": 0.5284, "step": 2997 }, { "epoch": 0.5195727995494032, "grad_norm": 2.7600712333428485, "learning_rate": 4.927020632067418e-06, "loss": 0.5879, "step": 2998 }, { "epoch": 0.5197461060202335, "grad_norm": 2.1048702462342423, "learning_rate": 4.924213944292431e-06, "loss": 0.5258, "step": 2999 }, { "epoch": 0.5199194124910639, "grad_norm": 2.5334923452841163, "learning_rate": 4.921407280402907e-06, "loss": 0.4666, "step": 3000 }, { "epoch": 0.5200927189618942, "grad_norm": 2.4935777771229364, "learning_rate": 4.918600641283417e-06, "loss": 0.5584, "step": 3001 }, { "epoch": 0.5202660254327246, "grad_norm": 2.8946066560423174, "learning_rate": 4.915794027818532e-06, "loss": 0.5094, "step": 3002 }, { "epoch": 0.520439331903555, "grad_norm": 2.7524831102120637, "learning_rate": 4.912987440892809e-06, "loss": 0.5346, "step": 3003 }, { "epoch": 0.5206126383743853, "grad_norm": 3.2097154944909163, "learning_rate": 4.910180881390799e-06, "loss": 0.5211, "step": 3004 }, { "epoch": 0.5207859448452157, "grad_norm": 5.186348928930499, "learning_rate": 4.907374350197046e-06, "loss": 0.534, "step": 3005 }, { "epoch": 0.520959251316046, "grad_norm": 2.4345791230517118, "learning_rate": 4.904567848196082e-06, "loss": 0.5166, "step": 3006 }, { "epoch": 0.5211325577868764, "grad_norm": 2.5006896089194157, "learning_rate": 4.901761376272431e-06, "loss": 0.5736, "step": 3007 }, { "epoch": 0.5213058642577068, "grad_norm": 2.7125238144240016, "learning_rate": 4.898954935310608e-06, "loss": 0.6831, "step": 3008 }, { "epoch": 0.5214791707285371, "grad_norm": 2.199421853982158, "learning_rate": 4.896148526195119e-06, "loss": 0.5388, "step": 3009 }, { "epoch": 0.5216524771993675, "grad_norm": 2.8375082580439885, "learning_rate": 4.893342149810457e-06, "loss": 0.5169, "step": 3010 }, { "epoch": 0.5218257836701978, "grad_norm": 2.569625868628424, "learning_rate": 4.890535807041106e-06, "loss": 0.599, "step": 3011 }, { "epoch": 0.5219990901410282, "grad_norm": 2.286602826785117, "learning_rate": 4.887729498771541e-06, "loss": 0.4948, "step": 3012 }, { "epoch": 0.5221723966118585, "grad_norm": 2.1529950611396758, "learning_rate": 4.884923225886226e-06, "loss": 0.5372, "step": 3013 }, { "epoch": 0.5223457030826888, "grad_norm": 2.3400240455820662, "learning_rate": 4.882116989269611e-06, "loss": 0.5598, "step": 3014 }, { "epoch": 0.5225190095535192, "grad_norm": 2.7150363875069794, "learning_rate": 4.879310789806138e-06, "loss": 0.6505, "step": 3015 }, { "epoch": 0.5226923160243495, "grad_norm": 2.591517044498104, "learning_rate": 4.876504628380236e-06, "loss": 0.5708, "step": 3016 }, { "epoch": 0.5228656224951799, "grad_norm": 2.507598981723586, "learning_rate": 4.87369850587632e-06, "loss": 0.6295, "step": 3017 }, { "epoch": 0.5230389289660102, "grad_norm": 2.8785557334082084, "learning_rate": 4.8708924231787955e-06, "loss": 0.5661, "step": 3018 }, { "epoch": 0.5232122354368406, "grad_norm": 3.075423485896441, "learning_rate": 4.868086381172057e-06, "loss": 0.633, "step": 3019 }, { "epoch": 0.523385541907671, "grad_norm": 2.1338740429963474, "learning_rate": 4.865280380740479e-06, "loss": 0.4861, "step": 3020 }, { "epoch": 0.5235588483785013, "grad_norm": 2.685157981885868, "learning_rate": 4.86247442276843e-06, "loss": 0.5146, "step": 3021 }, { "epoch": 0.5237321548493317, "grad_norm": 2.935418365948593, "learning_rate": 4.859668508140263e-06, "loss": 0.5706, "step": 3022 }, { "epoch": 0.523905461320162, "grad_norm": 1.9799016394231252, "learning_rate": 4.8568626377403135e-06, "loss": 0.5187, "step": 3023 }, { "epoch": 0.5240787677909924, "grad_norm": 2.5784660923360967, "learning_rate": 4.854056812452911e-06, "loss": 0.6066, "step": 3024 }, { "epoch": 0.5242520742618227, "grad_norm": 2.9385241487601905, "learning_rate": 4.851251033162365e-06, "loss": 0.6308, "step": 3025 }, { "epoch": 0.5244253807326531, "grad_norm": 2.200662604792226, "learning_rate": 4.848445300752972e-06, "loss": 0.5299, "step": 3026 }, { "epoch": 0.5245986872034835, "grad_norm": 1.9915856544008241, "learning_rate": 4.845639616109013e-06, "loss": 0.5095, "step": 3027 }, { "epoch": 0.5247719936743138, "grad_norm": 3.3570348120340157, "learning_rate": 4.842833980114755e-06, "loss": 0.6126, "step": 3028 }, { "epoch": 0.5249453001451442, "grad_norm": 2.23319261760259, "learning_rate": 4.840028393654449e-06, "loss": 0.5706, "step": 3029 }, { "epoch": 0.5251186066159745, "grad_norm": 3.274812840933754, "learning_rate": 4.83722285761233e-06, "loss": 0.5019, "step": 3030 }, { "epoch": 0.5252919130868049, "grad_norm": 2.617566863187071, "learning_rate": 4.834417372872617e-06, "loss": 0.5881, "step": 3031 }, { "epoch": 0.5254652195576353, "grad_norm": 2.269030537130596, "learning_rate": 4.831611940319516e-06, "loss": 0.4635, "step": 3032 }, { "epoch": 0.5256385260284656, "grad_norm": 2.101164406850553, "learning_rate": 4.828806560837212e-06, "loss": 0.5266, "step": 3033 }, { "epoch": 0.525811832499296, "grad_norm": 2.456938243816452, "learning_rate": 4.8260012353098755e-06, "loss": 0.5428, "step": 3034 }, { "epoch": 0.5259851389701263, "grad_norm": 2.3299323693614884, "learning_rate": 4.823195964621661e-06, "loss": 0.5211, "step": 3035 }, { "epoch": 0.5261584454409567, "grad_norm": 2.294784104187988, "learning_rate": 4.820390749656704e-06, "loss": 0.5951, "step": 3036 }, { "epoch": 0.526331751911787, "grad_norm": 2.1774122426667346, "learning_rate": 4.817585591299122e-06, "loss": 0.5194, "step": 3037 }, { "epoch": 0.5265050583826174, "grad_norm": 3.5315923797000632, "learning_rate": 4.814780490433016e-06, "loss": 0.556, "step": 3038 }, { "epoch": 0.5266783648534478, "grad_norm": 2.7300816036326596, "learning_rate": 4.811975447942472e-06, "loss": 0.5421, "step": 3039 }, { "epoch": 0.526851671324278, "grad_norm": 3.7023267539133493, "learning_rate": 4.809170464711549e-06, "loss": 0.4997, "step": 3040 }, { "epoch": 0.5270249777951084, "grad_norm": 2.1057160974779228, "learning_rate": 4.806365541624294e-06, "loss": 0.5621, "step": 3041 }, { "epoch": 0.5271982842659387, "grad_norm": 2.890835867236171, "learning_rate": 4.803560679564735e-06, "loss": 0.5327, "step": 3042 }, { "epoch": 0.5273715907367691, "grad_norm": 5.4725282744403865, "learning_rate": 4.800755879416878e-06, "loss": 0.6258, "step": 3043 }, { "epoch": 0.5275448972075995, "grad_norm": 2.1265432033274796, "learning_rate": 4.79795114206471e-06, "loss": 0.5525, "step": 3044 }, { "epoch": 0.5277182036784298, "grad_norm": 2.31201836007357, "learning_rate": 4.795146468392201e-06, "loss": 0.5331, "step": 3045 }, { "epoch": 0.5278915101492602, "grad_norm": 2.119254168137441, "learning_rate": 4.792341859283298e-06, "loss": 0.5183, "step": 3046 }, { "epoch": 0.5280648166200905, "grad_norm": 2.438657289523596, "learning_rate": 4.7895373156219284e-06, "loss": 0.6196, "step": 3047 }, { "epoch": 0.5282381230909209, "grad_norm": 2.235579294522716, "learning_rate": 4.786732838291999e-06, "loss": 0.6025, "step": 3048 }, { "epoch": 0.5284114295617512, "grad_norm": 2.202876729401492, "learning_rate": 4.783928428177396e-06, "loss": 0.64, "step": 3049 }, { "epoch": 0.5285847360325816, "grad_norm": 2.615195826845655, "learning_rate": 4.781124086161984e-06, "loss": 0.5235, "step": 3050 }, { "epoch": 0.528758042503412, "grad_norm": 2.193424443149171, "learning_rate": 4.778319813129607e-06, "loss": 0.5138, "step": 3051 }, { "epoch": 0.5289313489742423, "grad_norm": 4.429185868593176, "learning_rate": 4.775515609964086e-06, "loss": 0.5505, "step": 3052 }, { "epoch": 0.5291046554450727, "grad_norm": 2.335112696261885, "learning_rate": 4.77271147754922e-06, "loss": 0.4896, "step": 3053 }, { "epoch": 0.529277961915903, "grad_norm": 2.548898377383326, "learning_rate": 4.769907416768788e-06, "loss": 0.5266, "step": 3054 }, { "epoch": 0.5294512683867334, "grad_norm": 2.4239338249962996, "learning_rate": 4.7671034285065424e-06, "loss": 0.4857, "step": 3055 }, { "epoch": 0.5296245748575638, "grad_norm": 2.683481549342724, "learning_rate": 4.764299513646217e-06, "loss": 0.4875, "step": 3056 }, { "epoch": 0.5297978813283941, "grad_norm": 2.616286765724111, "learning_rate": 4.761495673071521e-06, "loss": 0.6129, "step": 3057 }, { "epoch": 0.5299711877992245, "grad_norm": 3.243791223007114, "learning_rate": 4.758691907666136e-06, "loss": 0.5865, "step": 3058 }, { "epoch": 0.5301444942700548, "grad_norm": 2.138777381673359, "learning_rate": 4.755888218313728e-06, "loss": 0.5733, "step": 3059 }, { "epoch": 0.5303178007408852, "grad_norm": 2.411142619226182, "learning_rate": 4.753084605897935e-06, "loss": 0.6391, "step": 3060 }, { "epoch": 0.5304911072117156, "grad_norm": 2.3361025728073628, "learning_rate": 4.750281071302366e-06, "loss": 0.5263, "step": 3061 }, { "epoch": 0.5306644136825459, "grad_norm": 2.0872748389770575, "learning_rate": 4.7474776154106124e-06, "loss": 0.5064, "step": 3062 }, { "epoch": 0.5308377201533763, "grad_norm": 2.842303992004237, "learning_rate": 4.744674239106238e-06, "loss": 0.5301, "step": 3063 }, { "epoch": 0.5310110266242066, "grad_norm": 2.2114384561077283, "learning_rate": 4.741870943272781e-06, "loss": 0.5592, "step": 3064 }, { "epoch": 0.531184333095037, "grad_norm": 2.3234857146962993, "learning_rate": 4.739067728793756e-06, "loss": 0.4931, "step": 3065 }, { "epoch": 0.5313576395658673, "grad_norm": 2.142902034592049, "learning_rate": 4.736264596552652e-06, "loss": 0.4811, "step": 3066 }, { "epoch": 0.5315309460366976, "grad_norm": 2.456631332449083, "learning_rate": 4.73346154743293e-06, "loss": 0.4216, "step": 3067 }, { "epoch": 0.531704252507528, "grad_norm": 2.473358811774788, "learning_rate": 4.7306585823180255e-06, "loss": 0.4839, "step": 3068 }, { "epoch": 0.5318775589783583, "grad_norm": 2.5286250312710283, "learning_rate": 4.727855702091348e-06, "loss": 0.6366, "step": 3069 }, { "epoch": 0.5320508654491887, "grad_norm": 2.165668378387325, "learning_rate": 4.725052907636282e-06, "loss": 0.5043, "step": 3070 }, { "epoch": 0.532224171920019, "grad_norm": 2.387969470230106, "learning_rate": 4.72225019983618e-06, "loss": 0.5556, "step": 3071 }, { "epoch": 0.5323974783908494, "grad_norm": 2.2363107661125974, "learning_rate": 4.719447579574371e-06, "loss": 0.5221, "step": 3072 }, { "epoch": 0.5325707848616797, "grad_norm": 2.369485752159303, "learning_rate": 4.716645047734158e-06, "loss": 0.5235, "step": 3073 }, { "epoch": 0.5327440913325101, "grad_norm": 2.834503974266891, "learning_rate": 4.713842605198811e-06, "loss": 0.5808, "step": 3074 }, { "epoch": 0.5329173978033405, "grad_norm": 2.5172666526450316, "learning_rate": 4.711040252851577e-06, "loss": 0.5394, "step": 3075 }, { "epoch": 0.5330907042741708, "grad_norm": 2.1328584118219283, "learning_rate": 4.708237991575671e-06, "loss": 0.5019, "step": 3076 }, { "epoch": 0.5332640107450012, "grad_norm": 2.196112458802118, "learning_rate": 4.705435822254281e-06, "loss": 0.5977, "step": 3077 }, { "epoch": 0.5334373172158315, "grad_norm": 2.8979427984040274, "learning_rate": 4.702633745770565e-06, "loss": 0.4974, "step": 3078 }, { "epoch": 0.5336106236866619, "grad_norm": 2.4184419880331047, "learning_rate": 4.6998317630076525e-06, "loss": 0.4855, "step": 3079 }, { "epoch": 0.5337839301574923, "grad_norm": 2.013187890985961, "learning_rate": 4.697029874848645e-06, "loss": 0.5162, "step": 3080 }, { "epoch": 0.5339572366283226, "grad_norm": 2.155244086806675, "learning_rate": 4.694228082176611e-06, "loss": 0.5451, "step": 3081 }, { "epoch": 0.534130543099153, "grad_norm": 3.0957535840343193, "learning_rate": 4.69142638587459e-06, "loss": 0.5863, "step": 3082 }, { "epoch": 0.5343038495699833, "grad_norm": 2.2578358674323558, "learning_rate": 4.688624786825592e-06, "loss": 0.4913, "step": 3083 }, { "epoch": 0.5344771560408137, "grad_norm": 1.9697371728847628, "learning_rate": 4.685823285912594e-06, "loss": 0.4826, "step": 3084 }, { "epoch": 0.534650462511644, "grad_norm": 2.83627136678575, "learning_rate": 4.683021884018547e-06, "loss": 0.6119, "step": 3085 }, { "epoch": 0.5348237689824744, "grad_norm": 1.9978730984714983, "learning_rate": 4.6802205820263665e-06, "loss": 0.5658, "step": 3086 }, { "epoch": 0.5349970754533048, "grad_norm": 2.345406223142882, "learning_rate": 4.6774193808189384e-06, "loss": 0.5278, "step": 3087 }, { "epoch": 0.5351703819241351, "grad_norm": 2.4143650598267246, "learning_rate": 4.674618281279114e-06, "loss": 0.4775, "step": 3088 }, { "epoch": 0.5353436883949655, "grad_norm": 2.6497430552159114, "learning_rate": 4.671817284289718e-06, "loss": 0.4437, "step": 3089 }, { "epoch": 0.5355169948657958, "grad_norm": 2.521391894174251, "learning_rate": 4.6690163907335365e-06, "loss": 0.5676, "step": 3090 }, { "epoch": 0.5356903013366262, "grad_norm": 3.7715852845566915, "learning_rate": 4.666215601493328e-06, "loss": 0.5566, "step": 3091 }, { "epoch": 0.5358636078074566, "grad_norm": 2.9613934235056543, "learning_rate": 4.663414917451815e-06, "loss": 0.5222, "step": 3092 }, { "epoch": 0.5360369142782868, "grad_norm": 2.493805989257223, "learning_rate": 4.660614339491688e-06, "loss": 0.5426, "step": 3093 }, { "epoch": 0.5362102207491172, "grad_norm": 2.556439214839481, "learning_rate": 4.657813868495604e-06, "loss": 0.5763, "step": 3094 }, { "epoch": 0.5363835272199475, "grad_norm": 2.249730429111256, "learning_rate": 4.655013505346186e-06, "loss": 0.5805, "step": 3095 }, { "epoch": 0.5365568336907779, "grad_norm": 7.733655891003369, "learning_rate": 4.652213250926025e-06, "loss": 0.6195, "step": 3096 }, { "epoch": 0.5367301401616082, "grad_norm": 2.348951553437892, "learning_rate": 4.649413106117675e-06, "loss": 0.5717, "step": 3097 }, { "epoch": 0.5369034466324386, "grad_norm": 2.8136427301781053, "learning_rate": 4.646613071803657e-06, "loss": 0.6472, "step": 3098 }, { "epoch": 0.537076753103269, "grad_norm": 2.0824087095767783, "learning_rate": 4.643813148866454e-06, "loss": 0.5559, "step": 3099 }, { "epoch": 0.5372500595740993, "grad_norm": 2.2299410925404857, "learning_rate": 4.641013338188521e-06, "loss": 0.5467, "step": 3100 }, { "epoch": 0.5374233660449297, "grad_norm": 2.5511502612754065, "learning_rate": 4.638213640652272e-06, "loss": 0.4357, "step": 3101 }, { "epoch": 0.53759667251576, "grad_norm": 1.939333741330295, "learning_rate": 4.635414057140085e-06, "loss": 0.5409, "step": 3102 }, { "epoch": 0.5377699789865904, "grad_norm": 3.8928506195959134, "learning_rate": 4.632614588534304e-06, "loss": 0.4904, "step": 3103 }, { "epoch": 0.5379432854574208, "grad_norm": 2.4398007099305503, "learning_rate": 4.629815235717234e-06, "loss": 0.5483, "step": 3104 }, { "epoch": 0.5381165919282511, "grad_norm": 2.5522756522489414, "learning_rate": 4.627015999571151e-06, "loss": 0.5374, "step": 3105 }, { "epoch": 0.5382898983990815, "grad_norm": 2.47384599819431, "learning_rate": 4.624216880978287e-06, "loss": 0.5189, "step": 3106 }, { "epoch": 0.5384632048699118, "grad_norm": 2.910978732565812, "learning_rate": 4.6214178808208384e-06, "loss": 0.6082, "step": 3107 }, { "epoch": 0.5386365113407422, "grad_norm": 3.240578624596028, "learning_rate": 4.618618999980965e-06, "loss": 0.577, "step": 3108 }, { "epoch": 0.5388098178115726, "grad_norm": 2.418895795584643, "learning_rate": 4.6158202393407895e-06, "loss": 0.5764, "step": 3109 }, { "epoch": 0.5389831242824029, "grad_norm": 2.535337192630047, "learning_rate": 4.6130215997823965e-06, "loss": 0.5877, "step": 3110 }, { "epoch": 0.5391564307532333, "grad_norm": 3.9413112613520043, "learning_rate": 4.610223082187832e-06, "loss": 0.6482, "step": 3111 }, { "epoch": 0.5393297372240636, "grad_norm": 3.1247809834301705, "learning_rate": 4.607424687439102e-06, "loss": 0.6141, "step": 3112 }, { "epoch": 0.539503043694894, "grad_norm": 2.7881741725356166, "learning_rate": 4.604626416418178e-06, "loss": 0.5207, "step": 3113 }, { "epoch": 0.5396763501657244, "grad_norm": 3.0147301401989504, "learning_rate": 4.601828270006988e-06, "loss": 0.6166, "step": 3114 }, { "epoch": 0.5398496566365547, "grad_norm": 2.408757194733652, "learning_rate": 4.5990302490874226e-06, "loss": 0.547, "step": 3115 }, { "epoch": 0.5400229631073851, "grad_norm": 2.573469817344662, "learning_rate": 4.596232354541335e-06, "loss": 0.5071, "step": 3116 }, { "epoch": 0.5401962695782154, "grad_norm": 2.8623302515483213, "learning_rate": 4.593434587250535e-06, "loss": 0.4236, "step": 3117 }, { "epoch": 0.5403695760490458, "grad_norm": 4.037962196518564, "learning_rate": 4.590636948096793e-06, "loss": 0.6596, "step": 3118 }, { "epoch": 0.540542882519876, "grad_norm": 2.217530654881312, "learning_rate": 4.587839437961841e-06, "loss": 0.4528, "step": 3119 }, { "epoch": 0.5407161889907064, "grad_norm": 1.9293516491063039, "learning_rate": 4.58504205772737e-06, "loss": 0.5111, "step": 3120 }, { "epoch": 0.5408894954615368, "grad_norm": 2.2111440973250818, "learning_rate": 4.582244808275027e-06, "loss": 0.487, "step": 3121 }, { "epoch": 0.5410628019323671, "grad_norm": 2.6576019243249958, "learning_rate": 4.5794476904864245e-06, "loss": 0.5089, "step": 3122 }, { "epoch": 0.5412361084031975, "grad_norm": 3.359954663082789, "learning_rate": 4.576650705243124e-06, "loss": 0.5577, "step": 3123 }, { "epoch": 0.5414094148740278, "grad_norm": 2.496655147010866, "learning_rate": 4.573853853426651e-06, "loss": 0.5642, "step": 3124 }, { "epoch": 0.5415827213448582, "grad_norm": 2.09242691585201, "learning_rate": 4.571057135918489e-06, "loss": 0.5232, "step": 3125 }, { "epoch": 0.5417560278156885, "grad_norm": 2.1992482461674654, "learning_rate": 4.568260553600078e-06, "loss": 0.501, "step": 3126 }, { "epoch": 0.5419293342865189, "grad_norm": 3.1579744574113042, "learning_rate": 4.565464107352817e-06, "loss": 0.6147, "step": 3127 }, { "epoch": 0.5421026407573493, "grad_norm": 2.1541764824014935, "learning_rate": 4.5626677980580605e-06, "loss": 0.5888, "step": 3128 }, { "epoch": 0.5422759472281796, "grad_norm": 3.2265885860672325, "learning_rate": 4.559871626597119e-06, "loss": 0.5907, "step": 3129 }, { "epoch": 0.54244925369901, "grad_norm": 2.15589375007928, "learning_rate": 4.557075593851262e-06, "loss": 0.5002, "step": 3130 }, { "epoch": 0.5426225601698403, "grad_norm": 1.9805935337728637, "learning_rate": 4.554279700701713e-06, "loss": 0.48, "step": 3131 }, { "epoch": 0.5427958666406707, "grad_norm": 4.8933579188613425, "learning_rate": 4.551483948029655e-06, "loss": 0.553, "step": 3132 }, { "epoch": 0.5429691731115011, "grad_norm": 2.3890444436137135, "learning_rate": 4.548688336716221e-06, "loss": 0.4827, "step": 3133 }, { "epoch": 0.5431424795823314, "grad_norm": 3.2296542644095165, "learning_rate": 4.545892867642503e-06, "loss": 0.6383, "step": 3134 }, { "epoch": 0.5433157860531618, "grad_norm": 2.6827422852941383, "learning_rate": 4.543097541689549e-06, "loss": 0.521, "step": 3135 }, { "epoch": 0.5434890925239921, "grad_norm": 2.29302563911402, "learning_rate": 4.540302359738361e-06, "loss": 0.5265, "step": 3136 }, { "epoch": 0.5436623989948225, "grad_norm": 2.75668577094101, "learning_rate": 4.537507322669895e-06, "loss": 0.5137, "step": 3137 }, { "epoch": 0.5438357054656529, "grad_norm": 3.0555403114064967, "learning_rate": 4.534712431365062e-06, "loss": 0.5526, "step": 3138 }, { "epoch": 0.5440090119364832, "grad_norm": 2.364505693512784, "learning_rate": 4.531917686704727e-06, "loss": 0.573, "step": 3139 }, { "epoch": 0.5441823184073136, "grad_norm": 2.396670274062137, "learning_rate": 4.529123089569706e-06, "loss": 0.6101, "step": 3140 }, { "epoch": 0.5443556248781439, "grad_norm": 2.73243781628096, "learning_rate": 4.526328640840774e-06, "loss": 0.624, "step": 3141 }, { "epoch": 0.5445289313489743, "grad_norm": 2.9469405220472735, "learning_rate": 4.5235343413986585e-06, "loss": 0.5092, "step": 3142 }, { "epoch": 0.5447022378198046, "grad_norm": 2.298527654096085, "learning_rate": 4.520740192124033e-06, "loss": 0.5741, "step": 3143 }, { "epoch": 0.544875544290635, "grad_norm": 2.923608292412859, "learning_rate": 4.5179461938975285e-06, "loss": 0.5629, "step": 3144 }, { "epoch": 0.5450488507614653, "grad_norm": 2.171813036524973, "learning_rate": 4.515152347599729e-06, "loss": 0.554, "step": 3145 }, { "epoch": 0.5452221572322956, "grad_norm": 2.565739081361965, "learning_rate": 4.512358654111173e-06, "loss": 0.5968, "step": 3146 }, { "epoch": 0.545395463703126, "grad_norm": 2.711774794038822, "learning_rate": 4.5095651143123445e-06, "loss": 0.5496, "step": 3147 }, { "epoch": 0.5455687701739563, "grad_norm": 2.7761708576259103, "learning_rate": 4.5067717290836835e-06, "loss": 0.6013, "step": 3148 }, { "epoch": 0.5457420766447867, "grad_norm": 2.2648111928681365, "learning_rate": 4.503978499305581e-06, "loss": 0.5188, "step": 3149 }, { "epoch": 0.545915383115617, "grad_norm": 3.222618509118147, "learning_rate": 4.501185425858376e-06, "loss": 0.4417, "step": 3150 }, { "epoch": 0.5460886895864474, "grad_norm": 2.3466603013780984, "learning_rate": 4.4983925096223615e-06, "loss": 0.6307, "step": 3151 }, { "epoch": 0.5462619960572778, "grad_norm": 3.1276701785287884, "learning_rate": 4.49559975147778e-06, "loss": 0.5725, "step": 3152 }, { "epoch": 0.5464353025281081, "grad_norm": 2.404457590969109, "learning_rate": 4.492807152304825e-06, "loss": 0.5479, "step": 3153 }, { "epoch": 0.5466086089989385, "grad_norm": 2.10867581568304, "learning_rate": 4.4900147129836376e-06, "loss": 0.6238, "step": 3154 }, { "epoch": 0.5467819154697688, "grad_norm": 3.5794004100313774, "learning_rate": 4.487222434394308e-06, "loss": 0.6184, "step": 3155 }, { "epoch": 0.5469552219405992, "grad_norm": 2.0380125515271232, "learning_rate": 4.48443031741688e-06, "loss": 0.4655, "step": 3156 }, { "epoch": 0.5471285284114296, "grad_norm": 2.4485489287265176, "learning_rate": 4.481638362931344e-06, "loss": 0.5695, "step": 3157 }, { "epoch": 0.5473018348822599, "grad_norm": 2.0478119112975843, "learning_rate": 4.478846571817637e-06, "loss": 0.555, "step": 3158 }, { "epoch": 0.5474751413530903, "grad_norm": 2.3301544489020607, "learning_rate": 4.47605494495565e-06, "loss": 0.5754, "step": 3159 }, { "epoch": 0.5476484478239206, "grad_norm": 2.0671733075202834, "learning_rate": 4.473263483225214e-06, "loss": 0.4916, "step": 3160 }, { "epoch": 0.547821754294751, "grad_norm": 2.232373291848928, "learning_rate": 4.470472187506119e-06, "loss": 0.5516, "step": 3161 }, { "epoch": 0.5479950607655814, "grad_norm": 2.6999427717429825, "learning_rate": 4.467681058678092e-06, "loss": 0.5657, "step": 3162 }, { "epoch": 0.5481683672364117, "grad_norm": 2.2756216678288257, "learning_rate": 4.464890097620817e-06, "loss": 0.5882, "step": 3163 }, { "epoch": 0.5483416737072421, "grad_norm": 2.5646938330081612, "learning_rate": 4.4620993052139134e-06, "loss": 0.5934, "step": 3164 }, { "epoch": 0.5485149801780724, "grad_norm": 2.393222698205191, "learning_rate": 4.459308682336956e-06, "loss": 0.4866, "step": 3165 }, { "epoch": 0.5486882866489028, "grad_norm": 2.3306783481474285, "learning_rate": 4.456518229869468e-06, "loss": 0.5484, "step": 3166 }, { "epoch": 0.5488615931197331, "grad_norm": 2.137626752559883, "learning_rate": 4.453727948690912e-06, "loss": 0.5643, "step": 3167 }, { "epoch": 0.5490348995905635, "grad_norm": 2.3264791855207645, "learning_rate": 4.450937839680701e-06, "loss": 0.5106, "step": 3168 }, { "epoch": 0.5492082060613939, "grad_norm": 3.1295416360998356, "learning_rate": 4.448147903718191e-06, "loss": 0.5491, "step": 3169 }, { "epoch": 0.5493815125322242, "grad_norm": 2.3459139927922634, "learning_rate": 4.445358141682688e-06, "loss": 0.5257, "step": 3170 }, { "epoch": 0.5495548190030545, "grad_norm": 2.8338994024683135, "learning_rate": 4.442568554453437e-06, "loss": 0.5397, "step": 3171 }, { "epoch": 0.5497281254738848, "grad_norm": 2.1657259422403077, "learning_rate": 4.439779142909632e-06, "loss": 0.529, "step": 3172 }, { "epoch": 0.5499014319447152, "grad_norm": 2.9335576342430185, "learning_rate": 4.436989907930414e-06, "loss": 0.5448, "step": 3173 }, { "epoch": 0.5500747384155455, "grad_norm": 2.1301844155134493, "learning_rate": 4.434200850394861e-06, "loss": 0.5964, "step": 3174 }, { "epoch": 0.5502480448863759, "grad_norm": 2.6128537620100647, "learning_rate": 4.431411971181999e-06, "loss": 0.4799, "step": 3175 }, { "epoch": 0.5504213513572063, "grad_norm": 2.1594316529389452, "learning_rate": 4.4286232711708e-06, "loss": 0.5407, "step": 3176 }, { "epoch": 0.5505946578280366, "grad_norm": 2.838656228313174, "learning_rate": 4.4258347512401775e-06, "loss": 0.5321, "step": 3177 }, { "epoch": 0.550767964298867, "grad_norm": 2.292949492929744, "learning_rate": 4.423046412268986e-06, "loss": 0.5602, "step": 3178 }, { "epoch": 0.5509412707696973, "grad_norm": 2.8643439814068956, "learning_rate": 4.420258255136028e-06, "loss": 0.6119, "step": 3179 }, { "epoch": 0.5511145772405277, "grad_norm": 2.3021022453512314, "learning_rate": 4.4174702807200435e-06, "loss": 0.5082, "step": 3180 }, { "epoch": 0.5512878837113581, "grad_norm": 2.050174199221346, "learning_rate": 4.414682489899719e-06, "loss": 0.495, "step": 3181 }, { "epoch": 0.5514611901821884, "grad_norm": 2.4446114410894646, "learning_rate": 4.41189488355368e-06, "loss": 0.5741, "step": 3182 }, { "epoch": 0.5516344966530188, "grad_norm": 2.7071323457505243, "learning_rate": 4.4091074625605e-06, "loss": 0.57, "step": 3183 }, { "epoch": 0.5518078031238491, "grad_norm": 2.47765758145065, "learning_rate": 4.406320227798683e-06, "loss": 0.5438, "step": 3184 }, { "epoch": 0.5519811095946795, "grad_norm": 2.372726861565659, "learning_rate": 4.403533180146682e-06, "loss": 0.528, "step": 3185 }, { "epoch": 0.5521544160655099, "grad_norm": 3.0871420262609837, "learning_rate": 4.4007463204828905e-06, "loss": 0.5844, "step": 3186 }, { "epoch": 0.5523277225363402, "grad_norm": 2.901454676029522, "learning_rate": 4.397959649685643e-06, "loss": 0.4966, "step": 3187 }, { "epoch": 0.5525010290071706, "grad_norm": 3.074625224660533, "learning_rate": 4.395173168633213e-06, "loss": 0.5544, "step": 3188 }, { "epoch": 0.5526743354780009, "grad_norm": 6.899849993471778, "learning_rate": 4.3923868782038145e-06, "loss": 0.4948, "step": 3189 }, { "epoch": 0.5528476419488313, "grad_norm": 2.708807654662812, "learning_rate": 4.389600779275601e-06, "loss": 0.5393, "step": 3190 }, { "epoch": 0.5530209484196617, "grad_norm": 4.479642697274257, "learning_rate": 4.386814872726666e-06, "loss": 0.4709, "step": 3191 }, { "epoch": 0.553194254890492, "grad_norm": 2.7439868728719823, "learning_rate": 4.384029159435044e-06, "loss": 0.5215, "step": 3192 }, { "epoch": 0.5533675613613224, "grad_norm": 3.1081684867899764, "learning_rate": 4.381243640278706e-06, "loss": 0.5125, "step": 3193 }, { "epoch": 0.5535408678321527, "grad_norm": 2.7533847201833943, "learning_rate": 4.378458316135564e-06, "loss": 0.4675, "step": 3194 }, { "epoch": 0.5537141743029831, "grad_norm": 2.412879055548356, "learning_rate": 4.375673187883466e-06, "loss": 0.4936, "step": 3195 }, { "epoch": 0.5538874807738134, "grad_norm": 2.546486859582278, "learning_rate": 4.3728882564002e-06, "loss": 0.5694, "step": 3196 }, { "epoch": 0.5540607872446437, "grad_norm": 2.688366738105846, "learning_rate": 4.370103522563493e-06, "loss": 0.5544, "step": 3197 }, { "epoch": 0.554234093715474, "grad_norm": 2.753549672333871, "learning_rate": 4.367318987251007e-06, "loss": 0.5181, "step": 3198 }, { "epoch": 0.5544074001863044, "grad_norm": 2.869205850022098, "learning_rate": 4.364534651340343e-06, "loss": 0.636, "step": 3199 }, { "epoch": 0.5545807066571348, "grad_norm": 2.804327778464598, "learning_rate": 4.36175051570904e-06, "loss": 0.5974, "step": 3200 }, { "epoch": 0.5547540131279651, "grad_norm": 2.981721622183818, "learning_rate": 4.358966581234572e-06, "loss": 0.543, "step": 3201 }, { "epoch": 0.5549273195987955, "grad_norm": 2.395224326435736, "learning_rate": 4.356182848794352e-06, "loss": 0.5598, "step": 3202 }, { "epoch": 0.5551006260696258, "grad_norm": 3.0138814164057424, "learning_rate": 4.353399319265727e-06, "loss": 0.5639, "step": 3203 }, { "epoch": 0.5552739325404562, "grad_norm": 2.4731743428626096, "learning_rate": 4.350615993525982e-06, "loss": 0.5523, "step": 3204 }, { "epoch": 0.5554472390112866, "grad_norm": 2.1921523023015617, "learning_rate": 4.347832872452334e-06, "loss": 0.5677, "step": 3205 }, { "epoch": 0.5556205454821169, "grad_norm": 2.8418567257442735, "learning_rate": 4.345049956921938e-06, "loss": 0.5085, "step": 3206 }, { "epoch": 0.5557938519529473, "grad_norm": 2.609826588218405, "learning_rate": 4.342267247811888e-06, "loss": 0.5475, "step": 3207 }, { "epoch": 0.5559671584237776, "grad_norm": 3.4255815408653016, "learning_rate": 4.339484745999206e-06, "loss": 0.6192, "step": 3208 }, { "epoch": 0.556140464894608, "grad_norm": 2.0430338487265756, "learning_rate": 4.336702452360855e-06, "loss": 0.5185, "step": 3209 }, { "epoch": 0.5563137713654384, "grad_norm": 2.9016841436658773, "learning_rate": 4.3339203677737265e-06, "loss": 0.6673, "step": 3210 }, { "epoch": 0.5564870778362687, "grad_norm": 2.532377345629201, "learning_rate": 4.33113849311465e-06, "loss": 0.5216, "step": 3211 }, { "epoch": 0.5566603843070991, "grad_norm": 2.7069113810244283, "learning_rate": 4.328356829260388e-06, "loss": 0.5421, "step": 3212 }, { "epoch": 0.5568336907779294, "grad_norm": 2.520435061358181, "learning_rate": 4.325575377087637e-06, "loss": 0.5465, "step": 3213 }, { "epoch": 0.5570069972487598, "grad_norm": 2.179261215338538, "learning_rate": 4.322794137473026e-06, "loss": 0.534, "step": 3214 }, { "epoch": 0.5571803037195902, "grad_norm": 2.412261502603604, "learning_rate": 4.320013111293116e-06, "loss": 0.6103, "step": 3215 }, { "epoch": 0.5573536101904205, "grad_norm": 2.6809286035148867, "learning_rate": 4.317232299424402e-06, "loss": 0.5623, "step": 3216 }, { "epoch": 0.5575269166612509, "grad_norm": 3.1880018465797098, "learning_rate": 4.314451702743312e-06, "loss": 0.5615, "step": 3217 }, { "epoch": 0.5577002231320812, "grad_norm": 2.9195599157707313, "learning_rate": 4.311671322126207e-06, "loss": 0.5214, "step": 3218 }, { "epoch": 0.5578735296029116, "grad_norm": 2.3306782224351044, "learning_rate": 4.308891158449375e-06, "loss": 0.5599, "step": 3219 }, { "epoch": 0.558046836073742, "grad_norm": 3.308684697652812, "learning_rate": 4.306111212589042e-06, "loss": 0.5228, "step": 3220 }, { "epoch": 0.5582201425445723, "grad_norm": 3.7614827963733637, "learning_rate": 4.3033314854213605e-06, "loss": 0.5371, "step": 3221 }, { "epoch": 0.5583934490154027, "grad_norm": 2.067889339652018, "learning_rate": 4.300551977822418e-06, "loss": 0.5873, "step": 3222 }, { "epoch": 0.5585667554862329, "grad_norm": 2.367688750629819, "learning_rate": 4.297772690668229e-06, "loss": 0.5838, "step": 3223 }, { "epoch": 0.5587400619570633, "grad_norm": 3.5466135717776344, "learning_rate": 4.294993624834743e-06, "loss": 0.5571, "step": 3224 }, { "epoch": 0.5589133684278936, "grad_norm": 2.0926403573272427, "learning_rate": 4.292214781197835e-06, "loss": 0.5849, "step": 3225 }, { "epoch": 0.559086674898724, "grad_norm": 2.796388760497937, "learning_rate": 4.28943616063331e-06, "loss": 0.5729, "step": 3226 }, { "epoch": 0.5592599813695543, "grad_norm": 2.493101877114575, "learning_rate": 4.286657764016909e-06, "loss": 0.5275, "step": 3227 }, { "epoch": 0.5594332878403847, "grad_norm": 2.4561722505258414, "learning_rate": 4.283879592224295e-06, "loss": 0.5373, "step": 3228 }, { "epoch": 0.5596065943112151, "grad_norm": 2.7324814818975276, "learning_rate": 4.281101646131065e-06, "loss": 0.6409, "step": 3229 }, { "epoch": 0.5597799007820454, "grad_norm": 2.729069307932654, "learning_rate": 4.278323926612742e-06, "loss": 0.5096, "step": 3230 }, { "epoch": 0.5599532072528758, "grad_norm": 2.1991809415787356, "learning_rate": 4.275546434544779e-06, "loss": 0.5636, "step": 3231 }, { "epoch": 0.5601265137237061, "grad_norm": 2.642423699889021, "learning_rate": 4.272769170802558e-06, "loss": 0.6159, "step": 3232 }, { "epoch": 0.5602998201945365, "grad_norm": 2.4573115327619703, "learning_rate": 4.2699921362613885e-06, "loss": 0.5983, "step": 3233 }, { "epoch": 0.5604731266653669, "grad_norm": 2.6619029605661417, "learning_rate": 4.267215331796505e-06, "loss": 0.5429, "step": 3234 }, { "epoch": 0.5606464331361972, "grad_norm": 2.3817320659808145, "learning_rate": 4.2644387582830746e-06, "loss": 0.555, "step": 3235 }, { "epoch": 0.5608197396070276, "grad_norm": 2.6327460824829356, "learning_rate": 4.261662416596187e-06, "loss": 0.4477, "step": 3236 }, { "epoch": 0.5609930460778579, "grad_norm": 2.567252191184974, "learning_rate": 4.25888630761086e-06, "loss": 0.5451, "step": 3237 }, { "epoch": 0.5611663525486883, "grad_norm": 2.6950111811997983, "learning_rate": 4.256110432202041e-06, "loss": 0.4882, "step": 3238 }, { "epoch": 0.5613396590195187, "grad_norm": 2.545249166201208, "learning_rate": 4.2533347912445995e-06, "loss": 0.6527, "step": 3239 }, { "epoch": 0.561512965490349, "grad_norm": 2.6218202007914067, "learning_rate": 4.250559385613336e-06, "loss": 0.496, "step": 3240 }, { "epoch": 0.5616862719611794, "grad_norm": 15.016559649929425, "learning_rate": 4.24778421618297e-06, "loss": 0.6485, "step": 3241 }, { "epoch": 0.5618595784320097, "grad_norm": 2.120675972770701, "learning_rate": 4.245009283828153e-06, "loss": 0.5668, "step": 3242 }, { "epoch": 0.5620328849028401, "grad_norm": 2.659377957968732, "learning_rate": 4.2422345894234595e-06, "loss": 0.5527, "step": 3243 }, { "epoch": 0.5622061913736704, "grad_norm": 2.423949499089097, "learning_rate": 4.239460133843389e-06, "loss": 0.5797, "step": 3244 }, { "epoch": 0.5623794978445008, "grad_norm": 2.2164033619155097, "learning_rate": 4.236685917962367e-06, "loss": 0.535, "step": 3245 }, { "epoch": 0.5625528043153312, "grad_norm": 2.46931726498172, "learning_rate": 4.233911942654736e-06, "loss": 0.5313, "step": 3246 }, { "epoch": 0.5627261107861615, "grad_norm": 2.129704997436139, "learning_rate": 4.231138208794774e-06, "loss": 0.5973, "step": 3247 }, { "epoch": 0.5628994172569919, "grad_norm": 2.327921373949163, "learning_rate": 4.228364717256675e-06, "loss": 0.6037, "step": 3248 }, { "epoch": 0.5630727237278221, "grad_norm": 2.2342964172161643, "learning_rate": 4.2255914689145624e-06, "loss": 0.5086, "step": 3249 }, { "epoch": 0.5632460301986525, "grad_norm": 3.5615870736878246, "learning_rate": 4.222818464642477e-06, "loss": 0.4965, "step": 3250 }, { "epoch": 0.5634193366694829, "grad_norm": 3.333482962954376, "learning_rate": 4.220045705314386e-06, "loss": 0.5538, "step": 3251 }, { "epoch": 0.5635926431403132, "grad_norm": 3.851315977162038, "learning_rate": 4.21727319180418e-06, "loss": 0.5447, "step": 3252 }, { "epoch": 0.5637659496111436, "grad_norm": 2.378740434179564, "learning_rate": 4.214500924985669e-06, "loss": 0.4551, "step": 3253 }, { "epoch": 0.5639392560819739, "grad_norm": 3.226391453995008, "learning_rate": 4.211728905732589e-06, "loss": 0.524, "step": 3254 }, { "epoch": 0.5641125625528043, "grad_norm": 2.27049965015601, "learning_rate": 4.208957134918596e-06, "loss": 0.5539, "step": 3255 }, { "epoch": 0.5642858690236346, "grad_norm": 2.0849570512495124, "learning_rate": 4.206185613417267e-06, "loss": 0.4645, "step": 3256 }, { "epoch": 0.564459175494465, "grad_norm": 2.513721434656427, "learning_rate": 4.203414342102101e-06, "loss": 0.5766, "step": 3257 }, { "epoch": 0.5646324819652954, "grad_norm": 2.1469527782252236, "learning_rate": 4.20064332184652e-06, "loss": 0.5959, "step": 3258 }, { "epoch": 0.5648057884361257, "grad_norm": 3.0387488657732713, "learning_rate": 4.197872553523864e-06, "loss": 0.5985, "step": 3259 }, { "epoch": 0.5649790949069561, "grad_norm": 2.417475842427568, "learning_rate": 4.195102038007395e-06, "loss": 0.5305, "step": 3260 }, { "epoch": 0.5651524013777864, "grad_norm": 2.5186840544273945, "learning_rate": 4.192331776170296e-06, "loss": 0.5327, "step": 3261 }, { "epoch": 0.5653257078486168, "grad_norm": 2.741774160941045, "learning_rate": 4.189561768885669e-06, "loss": 0.6071, "step": 3262 }, { "epoch": 0.5654990143194472, "grad_norm": 2.202281149544819, "learning_rate": 4.186792017026537e-06, "loss": 0.5535, "step": 3263 }, { "epoch": 0.5656723207902775, "grad_norm": 2.747407054517121, "learning_rate": 4.184022521465841e-06, "loss": 0.5547, "step": 3264 }, { "epoch": 0.5658456272611079, "grad_norm": 2.6729872380095734, "learning_rate": 4.181253283076441e-06, "loss": 0.5633, "step": 3265 }, { "epoch": 0.5660189337319382, "grad_norm": 2.5996923024081973, "learning_rate": 4.17848430273112e-06, "loss": 0.5499, "step": 3266 }, { "epoch": 0.5661922402027686, "grad_norm": 2.1231559486658527, "learning_rate": 4.17571558130257e-06, "loss": 0.4949, "step": 3267 }, { "epoch": 0.566365546673599, "grad_norm": 2.03889674556885, "learning_rate": 4.172947119663412e-06, "loss": 0.4725, "step": 3268 }, { "epoch": 0.5665388531444293, "grad_norm": 2.7715250660822113, "learning_rate": 4.170178918686182e-06, "loss": 0.5024, "step": 3269 }, { "epoch": 0.5667121596152597, "grad_norm": 2.3218784469060347, "learning_rate": 4.167410979243331e-06, "loss": 0.5822, "step": 3270 }, { "epoch": 0.56688546608609, "grad_norm": 2.2907861376217316, "learning_rate": 4.16464330220723e-06, "loss": 0.4081, "step": 3271 }, { "epoch": 0.5670587725569204, "grad_norm": 2.139100528804463, "learning_rate": 4.1618758884501655e-06, "loss": 0.4843, "step": 3272 }, { "epoch": 0.5672320790277507, "grad_norm": 2.2439802010813756, "learning_rate": 4.159108738844343e-06, "loss": 0.5369, "step": 3273 }, { "epoch": 0.5674053854985811, "grad_norm": 2.654005684173086, "learning_rate": 4.156341854261885e-06, "loss": 0.5489, "step": 3274 }, { "epoch": 0.5675786919694114, "grad_norm": 6.099714874690308, "learning_rate": 4.1535752355748275e-06, "loss": 0.5787, "step": 3275 }, { "epoch": 0.5677519984402417, "grad_norm": 2.643681219547117, "learning_rate": 4.150808883655128e-06, "loss": 0.5675, "step": 3276 }, { "epoch": 0.5679253049110721, "grad_norm": 3.443555931267729, "learning_rate": 4.148042799374652e-06, "loss": 0.5477, "step": 3277 }, { "epoch": 0.5680986113819024, "grad_norm": 2.434407695269327, "learning_rate": 4.145276983605187e-06, "loss": 0.5311, "step": 3278 }, { "epoch": 0.5682719178527328, "grad_norm": 4.230879151415503, "learning_rate": 4.142511437218435e-06, "loss": 0.4823, "step": 3279 }, { "epoch": 0.5684452243235631, "grad_norm": 2.6114010773718563, "learning_rate": 4.139746161086011e-06, "loss": 0.5714, "step": 3280 }, { "epoch": 0.5686185307943935, "grad_norm": 2.413837469471593, "learning_rate": 4.136981156079447e-06, "loss": 0.5339, "step": 3281 }, { "epoch": 0.5687918372652239, "grad_norm": 2.16702460099038, "learning_rate": 4.134216423070186e-06, "loss": 0.5479, "step": 3282 }, { "epoch": 0.5689651437360542, "grad_norm": 2.343117747958525, "learning_rate": 4.131451962929591e-06, "loss": 0.522, "step": 3283 }, { "epoch": 0.5691384502068846, "grad_norm": 2.7957864474745953, "learning_rate": 4.128687776528935e-06, "loss": 0.589, "step": 3284 }, { "epoch": 0.5693117566777149, "grad_norm": 2.6050276182671204, "learning_rate": 4.1259238647394055e-06, "loss": 0.5576, "step": 3285 }, { "epoch": 0.5694850631485453, "grad_norm": 2.3778045620127375, "learning_rate": 4.1231602284321044e-06, "loss": 0.553, "step": 3286 }, { "epoch": 0.5696583696193757, "grad_norm": 2.4772339287271783, "learning_rate": 4.120396868478041e-06, "loss": 0.5789, "step": 3287 }, { "epoch": 0.569831676090206, "grad_norm": 2.746348872152362, "learning_rate": 4.117633785748148e-06, "loss": 0.5846, "step": 3288 }, { "epoch": 0.5700049825610364, "grad_norm": 2.230513962684292, "learning_rate": 4.1148709811132626e-06, "loss": 0.501, "step": 3289 }, { "epoch": 0.5701782890318667, "grad_norm": 2.1887562708201362, "learning_rate": 4.112108455444137e-06, "loss": 0.5832, "step": 3290 }, { "epoch": 0.5703515955026971, "grad_norm": 2.926108946388714, "learning_rate": 4.109346209611436e-06, "loss": 0.5654, "step": 3291 }, { "epoch": 0.5705249019735275, "grad_norm": 2.325162655161168, "learning_rate": 4.106584244485734e-06, "loss": 0.4824, "step": 3292 }, { "epoch": 0.5706982084443578, "grad_norm": 2.2272179652791864, "learning_rate": 4.103822560937521e-06, "loss": 0.586, "step": 3293 }, { "epoch": 0.5708715149151882, "grad_norm": 3.523836001556688, "learning_rate": 4.101061159837193e-06, "loss": 0.4959, "step": 3294 }, { "epoch": 0.5710448213860185, "grad_norm": 2.285336764694058, "learning_rate": 4.0983000420550615e-06, "loss": 0.528, "step": 3295 }, { "epoch": 0.5712181278568489, "grad_norm": 2.629283485937955, "learning_rate": 4.095539208461346e-06, "loss": 0.5359, "step": 3296 }, { "epoch": 0.5713914343276792, "grad_norm": 2.59893541257234, "learning_rate": 4.092778659926178e-06, "loss": 0.5473, "step": 3297 }, { "epoch": 0.5715647407985096, "grad_norm": 2.1617617596541576, "learning_rate": 4.090018397319598e-06, "loss": 0.4665, "step": 3298 }, { "epoch": 0.57173804726934, "grad_norm": 2.234713999499453, "learning_rate": 4.087258421511556e-06, "loss": 0.5608, "step": 3299 }, { "epoch": 0.5719113537401703, "grad_norm": 2.443691436679229, "learning_rate": 4.084498733371914e-06, "loss": 0.6382, "step": 3300 }, { "epoch": 0.5720846602110006, "grad_norm": 2.888383905596858, "learning_rate": 4.081739333770441e-06, "loss": 0.5655, "step": 3301 }, { "epoch": 0.5722579666818309, "grad_norm": 2.4292156803399134, "learning_rate": 4.078980223576815e-06, "loss": 0.5782, "step": 3302 }, { "epoch": 0.5724312731526613, "grad_norm": 3.1795191075408806, "learning_rate": 4.076221403660626e-06, "loss": 0.5534, "step": 3303 }, { "epoch": 0.5726045796234916, "grad_norm": 2.063378632924648, "learning_rate": 4.073462874891369e-06, "loss": 0.5444, "step": 3304 }, { "epoch": 0.572777886094322, "grad_norm": 2.843199209607624, "learning_rate": 4.070704638138448e-06, "loss": 0.5224, "step": 3305 }, { "epoch": 0.5729511925651524, "grad_norm": 2.1444445731490034, "learning_rate": 4.0679466942711766e-06, "loss": 0.5325, "step": 3306 }, { "epoch": 0.5731244990359827, "grad_norm": 4.980009069677583, "learning_rate": 4.065189044158774e-06, "loss": 0.6481, "step": 3307 }, { "epoch": 0.5732978055068131, "grad_norm": 2.53522046736866, "learning_rate": 4.0624316886703665e-06, "loss": 0.6387, "step": 3308 }, { "epoch": 0.5734711119776434, "grad_norm": 6.059961923793085, "learning_rate": 4.0596746286749905e-06, "loss": 0.5189, "step": 3309 }, { "epoch": 0.5736444184484738, "grad_norm": 2.1938103125227344, "learning_rate": 4.056917865041587e-06, "loss": 0.5907, "step": 3310 }, { "epoch": 0.5738177249193042, "grad_norm": 2.2184185778349583, "learning_rate": 4.054161398639003e-06, "loss": 0.5183, "step": 3311 }, { "epoch": 0.5739910313901345, "grad_norm": 2.4960377804108314, "learning_rate": 4.0514052303359955e-06, "loss": 0.5881, "step": 3312 }, { "epoch": 0.5741643378609649, "grad_norm": 2.230971088952085, "learning_rate": 4.048649361001222e-06, "loss": 0.509, "step": 3313 }, { "epoch": 0.5743376443317952, "grad_norm": 3.760224736856237, "learning_rate": 4.04589379150325e-06, "loss": 0.5022, "step": 3314 }, { "epoch": 0.5745109508026256, "grad_norm": 2.2502375648495696, "learning_rate": 4.043138522710552e-06, "loss": 0.4715, "step": 3315 }, { "epoch": 0.574684257273456, "grad_norm": 2.4641996832746735, "learning_rate": 4.0403835554915045e-06, "loss": 0.4638, "step": 3316 }, { "epoch": 0.5748575637442863, "grad_norm": 7.015746791264957, "learning_rate": 4.03762889071439e-06, "loss": 0.5267, "step": 3317 }, { "epoch": 0.5750308702151167, "grad_norm": 2.732478585795857, "learning_rate": 4.034874529247393e-06, "loss": 0.5354, "step": 3318 }, { "epoch": 0.575204176685947, "grad_norm": 2.1230504785192754, "learning_rate": 4.032120471958607e-06, "loss": 0.4671, "step": 3319 }, { "epoch": 0.5753774831567774, "grad_norm": 2.6497907447199083, "learning_rate": 4.029366719716025e-06, "loss": 0.5098, "step": 3320 }, { "epoch": 0.5755507896276078, "grad_norm": 2.6181795921372344, "learning_rate": 4.026613273387546e-06, "loss": 0.5745, "step": 3321 }, { "epoch": 0.5757240960984381, "grad_norm": 3.619291790956396, "learning_rate": 4.023860133840974e-06, "loss": 0.6664, "step": 3322 }, { "epoch": 0.5758974025692685, "grad_norm": 2.5993033653074105, "learning_rate": 4.021107301944015e-06, "loss": 0.5859, "step": 3323 }, { "epoch": 0.5760707090400988, "grad_norm": 3.3237925460253535, "learning_rate": 4.018354778564278e-06, "loss": 0.4868, "step": 3324 }, { "epoch": 0.5762440155109292, "grad_norm": 1.9434809157548762, "learning_rate": 4.015602564569273e-06, "loss": 0.4855, "step": 3325 }, { "epoch": 0.5764173219817595, "grad_norm": 2.516797097141749, "learning_rate": 4.012850660826416e-06, "loss": 0.5695, "step": 3326 }, { "epoch": 0.5765906284525899, "grad_norm": 2.5346671155426224, "learning_rate": 4.010099068203024e-06, "loss": 0.5638, "step": 3327 }, { "epoch": 0.5767639349234202, "grad_norm": 2.656450709058591, "learning_rate": 4.007347787566311e-06, "loss": 0.5271, "step": 3328 }, { "epoch": 0.5769372413942505, "grad_norm": 2.5796907611868654, "learning_rate": 4.004596819783402e-06, "loss": 0.5897, "step": 3329 }, { "epoch": 0.5771105478650809, "grad_norm": 2.293149420663644, "learning_rate": 4.0018461657213155e-06, "loss": 0.6129, "step": 3330 }, { "epoch": 0.5772838543359112, "grad_norm": 2.136009716627357, "learning_rate": 3.999095826246976e-06, "loss": 0.5839, "step": 3331 }, { "epoch": 0.5774571608067416, "grad_norm": 2.3181257729544558, "learning_rate": 3.996345802227205e-06, "loss": 0.5403, "step": 3332 }, { "epoch": 0.577630467277572, "grad_norm": 4.20284815851564, "learning_rate": 3.993596094528728e-06, "loss": 0.5112, "step": 3333 }, { "epoch": 0.5778037737484023, "grad_norm": 2.746730391847924, "learning_rate": 3.990846704018169e-06, "loss": 0.4922, "step": 3334 }, { "epoch": 0.5779770802192327, "grad_norm": 2.442823576070426, "learning_rate": 3.988097631562051e-06, "loss": 0.4485, "step": 3335 }, { "epoch": 0.578150386690063, "grad_norm": 2.1587055933059496, "learning_rate": 3.985348878026799e-06, "loss": 0.5292, "step": 3336 }, { "epoch": 0.5783236931608934, "grad_norm": 4.876332753953352, "learning_rate": 3.982600444278736e-06, "loss": 0.5866, "step": 3337 }, { "epoch": 0.5784969996317237, "grad_norm": 1.8728876247870543, "learning_rate": 3.979852331184089e-06, "loss": 0.4504, "step": 3338 }, { "epoch": 0.5786703061025541, "grad_norm": 1.9567616934044485, "learning_rate": 3.977104539608973e-06, "loss": 0.5216, "step": 3339 }, { "epoch": 0.5788436125733845, "grad_norm": 4.577943750098703, "learning_rate": 3.974357070419412e-06, "loss": 0.6161, "step": 3340 }, { "epoch": 0.5790169190442148, "grad_norm": 2.00203494453719, "learning_rate": 3.971609924481323e-06, "loss": 0.5038, "step": 3341 }, { "epoch": 0.5791902255150452, "grad_norm": 2.3756199450035624, "learning_rate": 3.968863102660526e-06, "loss": 0.5622, "step": 3342 }, { "epoch": 0.5793635319858755, "grad_norm": 2.3696225818309236, "learning_rate": 3.966116605822732e-06, "loss": 0.5371, "step": 3343 }, { "epoch": 0.5795368384567059, "grad_norm": 2.775601609705774, "learning_rate": 3.963370434833556e-06, "loss": 0.5404, "step": 3344 }, { "epoch": 0.5797101449275363, "grad_norm": 2.666405916101211, "learning_rate": 3.960624590558507e-06, "loss": 0.5649, "step": 3345 }, { "epoch": 0.5798834513983666, "grad_norm": 3.496958371862612, "learning_rate": 3.957879073862991e-06, "loss": 0.5264, "step": 3346 }, { "epoch": 0.580056757869197, "grad_norm": 2.345710485016373, "learning_rate": 3.955133885612312e-06, "loss": 0.5788, "step": 3347 }, { "epoch": 0.5802300643400273, "grad_norm": 2.86029951837399, "learning_rate": 3.952389026671671e-06, "loss": 0.5026, "step": 3348 }, { "epoch": 0.5804033708108577, "grad_norm": 2.7423365526777226, "learning_rate": 3.949644497906161e-06, "loss": 0.5751, "step": 3349 }, { "epoch": 0.580576677281688, "grad_norm": 2.6023214653043882, "learning_rate": 3.946900300180777e-06, "loss": 0.5204, "step": 3350 }, { "epoch": 0.5807499837525184, "grad_norm": 3.4358336160779053, "learning_rate": 3.944156434360405e-06, "loss": 0.5192, "step": 3351 }, { "epoch": 0.5809232902233488, "grad_norm": 3.4033388949015086, "learning_rate": 3.941412901309829e-06, "loss": 0.5704, "step": 3352 }, { "epoch": 0.5810965966941791, "grad_norm": 3.3269091814120944, "learning_rate": 3.938669701893726e-06, "loss": 0.6501, "step": 3353 }, { "epoch": 0.5812699031650094, "grad_norm": 3.728181874411945, "learning_rate": 3.93592683697667e-06, "loss": 0.5766, "step": 3354 }, { "epoch": 0.5814432096358397, "grad_norm": 4.109771654781591, "learning_rate": 3.933184307423129e-06, "loss": 0.552, "step": 3355 }, { "epoch": 0.5816165161066701, "grad_norm": 1.9528344969973248, "learning_rate": 3.9304421140974645e-06, "loss": 0.4771, "step": 3356 }, { "epoch": 0.5817898225775004, "grad_norm": 2.686089410234881, "learning_rate": 3.927700257863931e-06, "loss": 0.4714, "step": 3357 }, { "epoch": 0.5819631290483308, "grad_norm": 2.910876591721822, "learning_rate": 3.924958739586684e-06, "loss": 0.5259, "step": 3358 }, { "epoch": 0.5821364355191612, "grad_norm": 2.4976034183118734, "learning_rate": 3.92221756012976e-06, "loss": 0.6103, "step": 3359 }, { "epoch": 0.5823097419899915, "grad_norm": 2.575053940056639, "learning_rate": 3.919476720357097e-06, "loss": 0.5733, "step": 3360 }, { "epoch": 0.5824830484608219, "grad_norm": 3.3000675869440332, "learning_rate": 3.916736221132525e-06, "loss": 0.466, "step": 3361 }, { "epoch": 0.5826563549316522, "grad_norm": 2.3888421182276836, "learning_rate": 3.913996063319768e-06, "loss": 0.5523, "step": 3362 }, { "epoch": 0.5828296614024826, "grad_norm": 4.78539068719013, "learning_rate": 3.911256247782437e-06, "loss": 0.5085, "step": 3363 }, { "epoch": 0.583002967873313, "grad_norm": 2.126443642168326, "learning_rate": 3.908516775384041e-06, "loss": 0.5026, "step": 3364 }, { "epoch": 0.5831762743441433, "grad_norm": 2.5866619864468374, "learning_rate": 3.905777646987978e-06, "loss": 0.6074, "step": 3365 }, { "epoch": 0.5833495808149737, "grad_norm": 2.5787198276923116, "learning_rate": 3.903038863457537e-06, "loss": 0.5653, "step": 3366 }, { "epoch": 0.583522887285804, "grad_norm": 2.326456836819896, "learning_rate": 3.900300425655901e-06, "loss": 0.5447, "step": 3367 }, { "epoch": 0.5836961937566344, "grad_norm": 2.4386344314051196, "learning_rate": 3.897562334446141e-06, "loss": 0.5846, "step": 3368 }, { "epoch": 0.5838695002274648, "grad_norm": 2.482259477860858, "learning_rate": 3.894824590691222e-06, "loss": 0.5344, "step": 3369 }, { "epoch": 0.5840428066982951, "grad_norm": 2.3680184987420603, "learning_rate": 3.8920871952539945e-06, "loss": 0.5477, "step": 3370 }, { "epoch": 0.5842161131691255, "grad_norm": 2.3871846268329833, "learning_rate": 3.889350148997204e-06, "loss": 0.5431, "step": 3371 }, { "epoch": 0.5843894196399558, "grad_norm": 2.2883671876943064, "learning_rate": 3.886613452783485e-06, "loss": 0.5026, "step": 3372 }, { "epoch": 0.5845627261107862, "grad_norm": 2.772920656810955, "learning_rate": 3.8838771074753586e-06, "loss": 0.4931, "step": 3373 }, { "epoch": 0.5847360325816165, "grad_norm": 2.3792362217695984, "learning_rate": 3.881141113935241e-06, "loss": 0.5615, "step": 3374 }, { "epoch": 0.5849093390524469, "grad_norm": 2.2837578510638767, "learning_rate": 3.878405473025432e-06, "loss": 0.4391, "step": 3375 }, { "epoch": 0.5850826455232773, "grad_norm": 2.052816597113083, "learning_rate": 3.875670185608123e-06, "loss": 0.549, "step": 3376 }, { "epoch": 0.5852559519941076, "grad_norm": 3.8338034718516485, "learning_rate": 3.872935252545392e-06, "loss": 0.5201, "step": 3377 }, { "epoch": 0.585429258464938, "grad_norm": 2.875655101687704, "learning_rate": 3.870200674699208e-06, "loss": 0.5802, "step": 3378 }, { "epoch": 0.5856025649357683, "grad_norm": 4.099595430337407, "learning_rate": 3.867466452931431e-06, "loss": 0.4888, "step": 3379 }, { "epoch": 0.5857758714065986, "grad_norm": 2.097966128887208, "learning_rate": 3.864732588103798e-06, "loss": 0.4315, "step": 3380 }, { "epoch": 0.585949177877429, "grad_norm": 4.964133723047509, "learning_rate": 3.861999081077941e-06, "loss": 0.54, "step": 3381 }, { "epoch": 0.5861224843482593, "grad_norm": 2.3834759882403196, "learning_rate": 3.859265932715381e-06, "loss": 0.5871, "step": 3382 }, { "epoch": 0.5862957908190897, "grad_norm": 6.75809056126358, "learning_rate": 3.856533143877522e-06, "loss": 0.5078, "step": 3383 }, { "epoch": 0.58646909728992, "grad_norm": 2.761329560003563, "learning_rate": 3.8538007154256565e-06, "loss": 0.646, "step": 3384 }, { "epoch": 0.5866424037607504, "grad_norm": 2.4642540377962674, "learning_rate": 3.851068648220962e-06, "loss": 0.5453, "step": 3385 }, { "epoch": 0.5868157102315807, "grad_norm": 2.3208988879259684, "learning_rate": 3.8483369431245045e-06, "loss": 0.5336, "step": 3386 }, { "epoch": 0.5869890167024111, "grad_norm": 2.402762589557925, "learning_rate": 3.845605600997233e-06, "loss": 0.5034, "step": 3387 }, { "epoch": 0.5871623231732415, "grad_norm": 2.175981473410599, "learning_rate": 3.842874622699984e-06, "loss": 0.6381, "step": 3388 }, { "epoch": 0.5873356296440718, "grad_norm": 2.572605351245252, "learning_rate": 3.84014400909348e-06, "loss": 0.4889, "step": 3389 }, { "epoch": 0.5875089361149022, "grad_norm": 2.1103746824186307, "learning_rate": 3.837413761038325e-06, "loss": 0.5399, "step": 3390 }, { "epoch": 0.5876822425857325, "grad_norm": 2.728410066515366, "learning_rate": 3.834683879395012e-06, "loss": 0.5277, "step": 3391 }, { "epoch": 0.5878555490565629, "grad_norm": 3.3881556853663253, "learning_rate": 3.831954365023915e-06, "loss": 0.5943, "step": 3392 }, { "epoch": 0.5880288555273933, "grad_norm": 2.275013810588057, "learning_rate": 3.8292252187852964e-06, "loss": 0.5454, "step": 3393 }, { "epoch": 0.5882021619982236, "grad_norm": 2.0627664827248426, "learning_rate": 3.826496441539298e-06, "loss": 0.5342, "step": 3394 }, { "epoch": 0.588375468469054, "grad_norm": 2.308540089135694, "learning_rate": 3.8237680341459484e-06, "loss": 0.5679, "step": 3395 }, { "epoch": 0.5885487749398843, "grad_norm": 2.4059744574405015, "learning_rate": 3.821039997465159e-06, "loss": 0.5452, "step": 3396 }, { "epoch": 0.5887220814107147, "grad_norm": 2.3889254468555365, "learning_rate": 3.818312332356723e-06, "loss": 0.4955, "step": 3397 }, { "epoch": 0.588895387881545, "grad_norm": 2.7557725595583675, "learning_rate": 3.815585039680317e-06, "loss": 0.5024, "step": 3398 }, { "epoch": 0.5890686943523754, "grad_norm": 2.501101014361226, "learning_rate": 3.8128581202955057e-06, "loss": 0.5451, "step": 3399 }, { "epoch": 0.5892420008232058, "grad_norm": 3.253881078672498, "learning_rate": 3.8101315750617246e-06, "loss": 0.5285, "step": 3400 }, { "epoch": 0.5894153072940361, "grad_norm": 3.792800553562896, "learning_rate": 3.8074054048383008e-06, "loss": 0.5411, "step": 3401 }, { "epoch": 0.5895886137648665, "grad_norm": 3.6338883038494285, "learning_rate": 3.80467961048444e-06, "loss": 0.4455, "step": 3402 }, { "epoch": 0.5897619202356968, "grad_norm": 2.2917861999407347, "learning_rate": 3.8019541928592286e-06, "loss": 0.5336, "step": 3403 }, { "epoch": 0.5899352267065272, "grad_norm": 5.4857303980367815, "learning_rate": 3.799229152821638e-06, "loss": 0.5907, "step": 3404 }, { "epoch": 0.5901085331773576, "grad_norm": 4.126575878919238, "learning_rate": 3.7965044912305165e-06, "loss": 0.5245, "step": 3405 }, { "epoch": 0.5902818396481878, "grad_norm": 2.1592392751011187, "learning_rate": 3.793780208944595e-06, "loss": 0.5628, "step": 3406 }, { "epoch": 0.5904551461190182, "grad_norm": 2.994372837706102, "learning_rate": 3.7910563068224835e-06, "loss": 0.5558, "step": 3407 }, { "epoch": 0.5906284525898485, "grad_norm": 3.591797825031066, "learning_rate": 3.7883327857226737e-06, "loss": 0.5956, "step": 3408 }, { "epoch": 0.5908017590606789, "grad_norm": 2.8775154665714626, "learning_rate": 3.7856096465035376e-06, "loss": 0.5806, "step": 3409 }, { "epoch": 0.5909750655315092, "grad_norm": 2.251244381457223, "learning_rate": 3.782886890023325e-06, "loss": 0.4971, "step": 3410 }, { "epoch": 0.5911483720023396, "grad_norm": 3.225091454765862, "learning_rate": 3.780164517140166e-06, "loss": 0.5451, "step": 3411 }, { "epoch": 0.59132167847317, "grad_norm": 2.4625154174364243, "learning_rate": 3.77744252871207e-06, "loss": 0.5323, "step": 3412 }, { "epoch": 0.5914949849440003, "grad_norm": 4.389490416055362, "learning_rate": 3.7747209255969236e-06, "loss": 0.5571, "step": 3413 }, { "epoch": 0.5916682914148307, "grad_norm": 2.4138489912228875, "learning_rate": 3.771999708652495e-06, "loss": 0.4748, "step": 3414 }, { "epoch": 0.591841597885661, "grad_norm": 2.198791057458972, "learning_rate": 3.7692788787364294e-06, "loss": 0.5326, "step": 3415 }, { "epoch": 0.5920149043564914, "grad_norm": 2.7530559033583963, "learning_rate": 3.766558436706248e-06, "loss": 0.4383, "step": 3416 }, { "epoch": 0.5921882108273218, "grad_norm": 3.4670522653446527, "learning_rate": 3.7638383834193527e-06, "loss": 0.539, "step": 3417 }, { "epoch": 0.5923615172981521, "grad_norm": 2.531781537443317, "learning_rate": 3.7611187197330203e-06, "loss": 0.5668, "step": 3418 }, { "epoch": 0.5925348237689825, "grad_norm": 2.607764084273668, "learning_rate": 3.7583994465044083e-06, "loss": 0.5221, "step": 3419 }, { "epoch": 0.5927081302398128, "grad_norm": 2.071192908383183, "learning_rate": 3.7556805645905494e-06, "loss": 0.4891, "step": 3420 }, { "epoch": 0.5928814367106432, "grad_norm": 2.1961481282072612, "learning_rate": 3.7529620748483486e-06, "loss": 0.5613, "step": 3421 }, { "epoch": 0.5930547431814736, "grad_norm": 2.1826706821636916, "learning_rate": 3.7502439781345946e-06, "loss": 0.4715, "step": 3422 }, { "epoch": 0.5932280496523039, "grad_norm": 2.355506933310607, "learning_rate": 3.7475262753059464e-06, "loss": 0.5427, "step": 3423 }, { "epoch": 0.5934013561231343, "grad_norm": 2.0001831417292464, "learning_rate": 3.7448089672189425e-06, "loss": 0.498, "step": 3424 }, { "epoch": 0.5935746625939646, "grad_norm": 2.6737215498292426, "learning_rate": 3.7420920547299966e-06, "loss": 0.5634, "step": 3425 }, { "epoch": 0.593747969064795, "grad_norm": 2.4912133011728206, "learning_rate": 3.739375538695397e-06, "loss": 0.5989, "step": 3426 }, { "epoch": 0.5939212755356253, "grad_norm": 2.122774148601711, "learning_rate": 3.7366594199713054e-06, "loss": 0.5386, "step": 3427 }, { "epoch": 0.5940945820064557, "grad_norm": 2.2955279671995807, "learning_rate": 3.7339436994137614e-06, "loss": 0.6513, "step": 3428 }, { "epoch": 0.5942678884772861, "grad_norm": 1.973273517733668, "learning_rate": 3.7312283778786763e-06, "loss": 0.5227, "step": 3429 }, { "epoch": 0.5944411949481164, "grad_norm": 2.34714361303177, "learning_rate": 3.728513456221839e-06, "loss": 0.5777, "step": 3430 }, { "epoch": 0.5946145014189468, "grad_norm": 2.3622109470675645, "learning_rate": 3.7257989352989078e-06, "loss": 0.577, "step": 3431 }, { "epoch": 0.594787807889777, "grad_norm": 2.1963274097115195, "learning_rate": 3.7230848159654177e-06, "loss": 0.5427, "step": 3432 }, { "epoch": 0.5949611143606074, "grad_norm": 2.448071433262157, "learning_rate": 3.720371099076777e-06, "loss": 0.5771, "step": 3433 }, { "epoch": 0.5951344208314377, "grad_norm": 2.616615977130409, "learning_rate": 3.717657785488267e-06, "loss": 0.4865, "step": 3434 }, { "epoch": 0.5953077273022681, "grad_norm": 3.253630523271998, "learning_rate": 3.7149448760550423e-06, "loss": 0.5858, "step": 3435 }, { "epoch": 0.5954810337730985, "grad_norm": 2.1083840992588563, "learning_rate": 3.712232371632127e-06, "loss": 0.4582, "step": 3436 }, { "epoch": 0.5956543402439288, "grad_norm": 2.9046388819539506, "learning_rate": 3.7095202730744233e-06, "loss": 0.6202, "step": 3437 }, { "epoch": 0.5958276467147592, "grad_norm": 2.5455272066793255, "learning_rate": 3.7068085812367003e-06, "loss": 0.5691, "step": 3438 }, { "epoch": 0.5960009531855895, "grad_norm": 2.2952665732026274, "learning_rate": 3.7040972969736e-06, "loss": 0.589, "step": 3439 }, { "epoch": 0.5961742596564199, "grad_norm": 2.543823901014359, "learning_rate": 3.701386421139639e-06, "loss": 0.5232, "step": 3440 }, { "epoch": 0.5963475661272503, "grad_norm": 2.0308288294893884, "learning_rate": 3.698675954589204e-06, "loss": 0.4676, "step": 3441 }, { "epoch": 0.5965208725980806, "grad_norm": 2.8725054154420624, "learning_rate": 3.6959658981765467e-06, "loss": 0.5268, "step": 3442 }, { "epoch": 0.596694179068911, "grad_norm": 2.655800093776496, "learning_rate": 3.6932562527557986e-06, "loss": 0.5659, "step": 3443 }, { "epoch": 0.5968674855397413, "grad_norm": 3.9280464106194533, "learning_rate": 3.6905470191809547e-06, "loss": 0.5077, "step": 3444 }, { "epoch": 0.5970407920105717, "grad_norm": 2.2549025351437546, "learning_rate": 3.687838198305886e-06, "loss": 0.5207, "step": 3445 }, { "epoch": 0.5972140984814021, "grad_norm": 2.8503710689047086, "learning_rate": 3.6851297909843285e-06, "loss": 0.566, "step": 3446 }, { "epoch": 0.5973874049522324, "grad_norm": 2.0790111117296286, "learning_rate": 3.682421798069892e-06, "loss": 0.5935, "step": 3447 }, { "epoch": 0.5975607114230628, "grad_norm": 2.9030534775899013, "learning_rate": 3.6797142204160507e-06, "loss": 0.5346, "step": 3448 }, { "epoch": 0.5977340178938931, "grad_norm": 4.153782332273463, "learning_rate": 3.6770070588761523e-06, "loss": 0.4548, "step": 3449 }, { "epoch": 0.5979073243647235, "grad_norm": 2.2975160068361795, "learning_rate": 3.674300314303412e-06, "loss": 0.5548, "step": 3450 }, { "epoch": 0.5980806308355539, "grad_norm": 2.3446474441919056, "learning_rate": 3.6715939875509145e-06, "loss": 0.6323, "step": 3451 }, { "epoch": 0.5982539373063842, "grad_norm": 2.3545137038615938, "learning_rate": 3.668888079471609e-06, "loss": 0.5592, "step": 3452 }, { "epoch": 0.5984272437772146, "grad_norm": 2.479138085896012, "learning_rate": 3.666182590918317e-06, "loss": 0.5297, "step": 3453 }, { "epoch": 0.5986005502480449, "grad_norm": 2.7661358128826237, "learning_rate": 3.663477522743726e-06, "loss": 0.5737, "step": 3454 }, { "epoch": 0.5987738567188753, "grad_norm": 2.882221258806414, "learning_rate": 3.6607728758003912e-06, "loss": 0.6255, "step": 3455 }, { "epoch": 0.5989471631897056, "grad_norm": 2.4886207506856617, "learning_rate": 3.658068650940735e-06, "loss": 0.5087, "step": 3456 }, { "epoch": 0.599120469660536, "grad_norm": 2.4645690188830502, "learning_rate": 3.6553648490170474e-06, "loss": 0.5369, "step": 3457 }, { "epoch": 0.5992937761313663, "grad_norm": 2.0082297188046505, "learning_rate": 3.652661470881484e-06, "loss": 0.5317, "step": 3458 }, { "epoch": 0.5994670826021966, "grad_norm": 3.4411896020024058, "learning_rate": 3.649958517386067e-06, "loss": 0.5341, "step": 3459 }, { "epoch": 0.599640389073027, "grad_norm": 2.2603342816897416, "learning_rate": 3.6472559893826866e-06, "loss": 0.4884, "step": 3460 }, { "epoch": 0.5998136955438573, "grad_norm": 2.3457203761534227, "learning_rate": 3.6445538877230992e-06, "loss": 0.5081, "step": 3461 }, { "epoch": 0.5999870020146877, "grad_norm": 2.534740191032947, "learning_rate": 3.6418522132589198e-06, "loss": 0.5396, "step": 3462 }, { "epoch": 0.600160308485518, "grad_norm": 2.1278187684706444, "learning_rate": 3.639150966841637e-06, "loss": 0.6141, "step": 3463 }, { "epoch": 0.6003336149563484, "grad_norm": 2.5081208378888715, "learning_rate": 3.6364501493226007e-06, "loss": 0.5887, "step": 3464 }, { "epoch": 0.6005069214271788, "grad_norm": 2.8626746994489243, "learning_rate": 3.6337497615530283e-06, "loss": 0.5867, "step": 3465 }, { "epoch": 0.6006802278980091, "grad_norm": 3.2725621807638143, "learning_rate": 3.631049804383998e-06, "loss": 0.4536, "step": 3466 }, { "epoch": 0.6008535343688395, "grad_norm": 2.1406967036373907, "learning_rate": 3.628350278666456e-06, "loss": 0.6034, "step": 3467 }, { "epoch": 0.6010268408396698, "grad_norm": 2.1216589961162753, "learning_rate": 3.6256511852512098e-06, "loss": 0.5067, "step": 3468 }, { "epoch": 0.6012001473105002, "grad_norm": 2.1874499678070976, "learning_rate": 3.6229525249889313e-06, "loss": 0.4596, "step": 3469 }, { "epoch": 0.6013734537813306, "grad_norm": 3.580462004871738, "learning_rate": 3.620254298730157e-06, "loss": 0.5132, "step": 3470 }, { "epoch": 0.6015467602521609, "grad_norm": 2.3557507277757983, "learning_rate": 3.6175565073252863e-06, "loss": 0.5662, "step": 3471 }, { "epoch": 0.6017200667229913, "grad_norm": 2.328364251478081, "learning_rate": 3.614859151624578e-06, "loss": 0.501, "step": 3472 }, { "epoch": 0.6018933731938216, "grad_norm": 2.0943936795991553, "learning_rate": 3.612162232478159e-06, "loss": 0.4704, "step": 3473 }, { "epoch": 0.602066679664652, "grad_norm": 2.450624239495394, "learning_rate": 3.6094657507360165e-06, "loss": 0.4397, "step": 3474 }, { "epoch": 0.6022399861354824, "grad_norm": 2.399035839371011, "learning_rate": 3.6067697072479992e-06, "loss": 0.5162, "step": 3475 }, { "epoch": 0.6024132926063127, "grad_norm": 2.7747046260222543, "learning_rate": 3.6040741028638167e-06, "loss": 0.5389, "step": 3476 }, { "epoch": 0.6025865990771431, "grad_norm": 3.0442353674193723, "learning_rate": 3.6013789384330435e-06, "loss": 0.5105, "step": 3477 }, { "epoch": 0.6027599055479734, "grad_norm": 2.754406120404231, "learning_rate": 3.598684214805112e-06, "loss": 0.53, "step": 3478 }, { "epoch": 0.6029332120188038, "grad_norm": 2.4445198953172604, "learning_rate": 3.5959899328293167e-06, "loss": 0.4734, "step": 3479 }, { "epoch": 0.6031065184896341, "grad_norm": 2.282981992173334, "learning_rate": 3.5932960933548167e-06, "loss": 0.6529, "step": 3480 }, { "epoch": 0.6032798249604645, "grad_norm": 2.268855950098029, "learning_rate": 3.590602697230625e-06, "loss": 0.6301, "step": 3481 }, { "epoch": 0.6034531314312949, "grad_norm": 2.6617791013740044, "learning_rate": 3.5879097453056223e-06, "loss": 0.5278, "step": 3482 }, { "epoch": 0.6036264379021252, "grad_norm": 2.392907740520216, "learning_rate": 3.5852172384285394e-06, "loss": 0.647, "step": 3483 }, { "epoch": 0.6037997443729555, "grad_norm": 3.1772780758346535, "learning_rate": 3.582525177447976e-06, "loss": 0.6442, "step": 3484 }, { "epoch": 0.6039730508437858, "grad_norm": 2.3796557547280015, "learning_rate": 3.5798335632123858e-06, "loss": 0.5614, "step": 3485 }, { "epoch": 0.6041463573146162, "grad_norm": 2.1908292566362406, "learning_rate": 3.5771423965700863e-06, "loss": 0.5167, "step": 3486 }, { "epoch": 0.6043196637854465, "grad_norm": 2.606340859476138, "learning_rate": 3.5744516783692505e-06, "loss": 0.5761, "step": 3487 }, { "epoch": 0.6044929702562769, "grad_norm": 2.127638067813883, "learning_rate": 3.571761409457911e-06, "loss": 0.5187, "step": 3488 }, { "epoch": 0.6046662767271073, "grad_norm": 2.378973910078221, "learning_rate": 3.569071590683959e-06, "loss": 0.6129, "step": 3489 }, { "epoch": 0.6048395831979376, "grad_norm": 2.5425079436774434, "learning_rate": 3.5663822228951427e-06, "loss": 0.5244, "step": 3490 }, { "epoch": 0.605012889668768, "grad_norm": 6.2460801381522675, "learning_rate": 3.56369330693907e-06, "loss": 0.5665, "step": 3491 }, { "epoch": 0.6051861961395983, "grad_norm": 2.6682214212497635, "learning_rate": 3.5610048436632062e-06, "loss": 0.5406, "step": 3492 }, { "epoch": 0.6053595026104287, "grad_norm": 2.0639036103165074, "learning_rate": 3.5583168339148704e-06, "loss": 0.543, "step": 3493 }, { "epoch": 0.6055328090812591, "grad_norm": 2.896970016287949, "learning_rate": 3.555629278541244e-06, "loss": 0.433, "step": 3494 }, { "epoch": 0.6057061155520894, "grad_norm": 2.177396682720361, "learning_rate": 3.5529421783893614e-06, "loss": 0.5433, "step": 3495 }, { "epoch": 0.6058794220229198, "grad_norm": 2.4018516777081134, "learning_rate": 3.5502555343061164e-06, "loss": 0.4889, "step": 3496 }, { "epoch": 0.6060527284937501, "grad_norm": 2.113622065135333, "learning_rate": 3.5475693471382557e-06, "loss": 0.5925, "step": 3497 }, { "epoch": 0.6062260349645805, "grad_norm": 2.1875039964129144, "learning_rate": 3.5448836177323852e-06, "loss": 0.5243, "step": 3498 }, { "epoch": 0.6063993414354109, "grad_norm": 2.241149094698149, "learning_rate": 3.542198346934964e-06, "loss": 0.5664, "step": 3499 }, { "epoch": 0.6065726479062412, "grad_norm": 2.2862513670304887, "learning_rate": 3.539513535592309e-06, "loss": 0.5859, "step": 3500 }, { "epoch": 0.6067459543770716, "grad_norm": 2.342020089915397, "learning_rate": 3.536829184550592e-06, "loss": 0.5829, "step": 3501 }, { "epoch": 0.6069192608479019, "grad_norm": 2.6629317975092532, "learning_rate": 3.5341452946558385e-06, "loss": 0.5692, "step": 3502 }, { "epoch": 0.6070925673187323, "grad_norm": 2.334870421052814, "learning_rate": 3.531461866753926e-06, "loss": 0.5895, "step": 3503 }, { "epoch": 0.6072658737895626, "grad_norm": 3.4547152546543227, "learning_rate": 3.5287789016905923e-06, "loss": 0.6539, "step": 3504 }, { "epoch": 0.607439180260393, "grad_norm": 2.3644695359163728, "learning_rate": 3.526096400311424e-06, "loss": 0.5868, "step": 3505 }, { "epoch": 0.6076124867312234, "grad_norm": 2.315277644830132, "learning_rate": 3.523414363461867e-06, "loss": 0.5429, "step": 3506 }, { "epoch": 0.6077857932020537, "grad_norm": 2.685447754997813, "learning_rate": 3.5207327919872157e-06, "loss": 0.5425, "step": 3507 }, { "epoch": 0.6079590996728841, "grad_norm": 2.495891001990355, "learning_rate": 3.518051686732621e-06, "loss": 0.5996, "step": 3508 }, { "epoch": 0.6081324061437144, "grad_norm": 2.0512227980783653, "learning_rate": 3.5153710485430848e-06, "loss": 0.5309, "step": 3509 }, { "epoch": 0.6083057126145447, "grad_norm": 2.4123155843067883, "learning_rate": 3.512690878263464e-06, "loss": 0.4877, "step": 3510 }, { "epoch": 0.608479019085375, "grad_norm": 3.9130432974295037, "learning_rate": 3.510011176738466e-06, "loss": 0.5471, "step": 3511 }, { "epoch": 0.6086523255562054, "grad_norm": 2.3008643603795162, "learning_rate": 3.5073319448126516e-06, "loss": 0.5568, "step": 3512 }, { "epoch": 0.6088256320270358, "grad_norm": 2.284336469549611, "learning_rate": 3.5046531833304337e-06, "loss": 0.5255, "step": 3513 }, { "epoch": 0.6089989384978661, "grad_norm": 2.85701757928324, "learning_rate": 3.5019748931360754e-06, "loss": 0.5666, "step": 3514 }, { "epoch": 0.6091722449686965, "grad_norm": 2.024273924514952, "learning_rate": 3.499297075073692e-06, "loss": 0.4898, "step": 3515 }, { "epoch": 0.6093455514395268, "grad_norm": 2.289849525879673, "learning_rate": 3.4966197299872516e-06, "loss": 0.5686, "step": 3516 }, { "epoch": 0.6095188579103572, "grad_norm": 24.610460967105155, "learning_rate": 3.4939428587205713e-06, "loss": 0.6081, "step": 3517 }, { "epoch": 0.6096921643811876, "grad_norm": 3.3292150259729913, "learning_rate": 3.4912664621173196e-06, "loss": 0.5688, "step": 3518 }, { "epoch": 0.6098654708520179, "grad_norm": 2.2091160028309234, "learning_rate": 3.4885905410210164e-06, "loss": 0.5436, "step": 3519 }, { "epoch": 0.6100387773228483, "grad_norm": 1.9517112635762297, "learning_rate": 3.485915096275028e-06, "loss": 0.5023, "step": 3520 }, { "epoch": 0.6102120837936786, "grad_norm": 2.3814996139440914, "learning_rate": 3.4832401287225757e-06, "loss": 0.5588, "step": 3521 }, { "epoch": 0.610385390264509, "grad_norm": 2.114259339537154, "learning_rate": 3.4805656392067287e-06, "loss": 0.4753, "step": 3522 }, { "epoch": 0.6105586967353394, "grad_norm": 4.8973402168460565, "learning_rate": 3.4778916285704046e-06, "loss": 0.5211, "step": 3523 }, { "epoch": 0.6107320032061697, "grad_norm": 2.0445143376667567, "learning_rate": 3.475218097656367e-06, "loss": 0.5425, "step": 3524 }, { "epoch": 0.6109053096770001, "grad_norm": 2.3470201836430666, "learning_rate": 3.472545047307233e-06, "loss": 0.6464, "step": 3525 }, { "epoch": 0.6110786161478304, "grad_norm": 2.7572247385406436, "learning_rate": 3.469872478365468e-06, "loss": 0.5972, "step": 3526 }, { "epoch": 0.6112519226186608, "grad_norm": 2.3130615249026163, "learning_rate": 3.4672003916733843e-06, "loss": 0.561, "step": 3527 }, { "epoch": 0.6114252290894912, "grad_norm": 2.479718140709254, "learning_rate": 3.464528788073142e-06, "loss": 0.5348, "step": 3528 }, { "epoch": 0.6115985355603215, "grad_norm": 2.812870935308726, "learning_rate": 3.4618576684067495e-06, "loss": 0.5537, "step": 3529 }, { "epoch": 0.6117718420311519, "grad_norm": 3.0761860951366007, "learning_rate": 3.459187033516062e-06, "loss": 0.5773, "step": 3530 }, { "epoch": 0.6119451485019822, "grad_norm": 3.061929615995114, "learning_rate": 3.456516884242783e-06, "loss": 0.5836, "step": 3531 }, { "epoch": 0.6121184549728126, "grad_norm": 2.3336171896277684, "learning_rate": 3.4538472214284623e-06, "loss": 0.5634, "step": 3532 }, { "epoch": 0.612291761443643, "grad_norm": 2.7577899845527303, "learning_rate": 3.451178045914498e-06, "loss": 0.5597, "step": 3533 }, { "epoch": 0.6124650679144733, "grad_norm": 2.94875409008312, "learning_rate": 3.44850935854213e-06, "loss": 0.6027, "step": 3534 }, { "epoch": 0.6126383743853037, "grad_norm": 2.395334539959378, "learning_rate": 3.445841160152449e-06, "loss": 0.4612, "step": 3535 }, { "epoch": 0.6128116808561339, "grad_norm": 2.1452109945911197, "learning_rate": 3.4431734515863898e-06, "loss": 0.5502, "step": 3536 }, { "epoch": 0.6129849873269643, "grad_norm": 2.0623674465329245, "learning_rate": 3.440506233684733e-06, "loss": 0.4958, "step": 3537 }, { "epoch": 0.6131582937977946, "grad_norm": 2.2652904768872775, "learning_rate": 3.437839507288105e-06, "loss": 0.5707, "step": 3538 }, { "epoch": 0.613331600268625, "grad_norm": 3.7931891292919153, "learning_rate": 3.435173273236977e-06, "loss": 0.5748, "step": 3539 }, { "epoch": 0.6135049067394553, "grad_norm": 2.6714811375835157, "learning_rate": 3.4325075323716628e-06, "loss": 0.5574, "step": 3540 }, { "epoch": 0.6136782132102857, "grad_norm": 2.307646534581501, "learning_rate": 3.429842285532326e-06, "loss": 0.5675, "step": 3541 }, { "epoch": 0.6138515196811161, "grad_norm": 2.415025033917598, "learning_rate": 3.4271775335589703e-06, "loss": 0.5582, "step": 3542 }, { "epoch": 0.6140248261519464, "grad_norm": 2.272470419274394, "learning_rate": 3.4245132772914464e-06, "loss": 0.5011, "step": 3543 }, { "epoch": 0.6141981326227768, "grad_norm": 3.2523167947953437, "learning_rate": 3.421849517569442e-06, "loss": 0.6435, "step": 3544 }, { "epoch": 0.6143714390936071, "grad_norm": 2.986352463245915, "learning_rate": 3.419186255232496e-06, "loss": 0.5529, "step": 3545 }, { "epoch": 0.6145447455644375, "grad_norm": 2.733439627116326, "learning_rate": 3.4165234911199886e-06, "loss": 0.5735, "step": 3546 }, { "epoch": 0.6147180520352679, "grad_norm": 2.3144795254161474, "learning_rate": 3.41386122607114e-06, "loss": 0.5877, "step": 3547 }, { "epoch": 0.6148913585060982, "grad_norm": 2.5578291981370693, "learning_rate": 3.4111994609250178e-06, "loss": 0.4871, "step": 3548 }, { "epoch": 0.6150646649769286, "grad_norm": 2.5433030642447783, "learning_rate": 3.4085381965205277e-06, "loss": 0.4828, "step": 3549 }, { "epoch": 0.6152379714477589, "grad_norm": 2.9324102584229346, "learning_rate": 3.4058774336964195e-06, "loss": 0.5483, "step": 3550 }, { "epoch": 0.6154112779185893, "grad_norm": 3.442852807570622, "learning_rate": 3.4032171732912843e-06, "loss": 0.4532, "step": 3551 }, { "epoch": 0.6155845843894197, "grad_norm": 2.485098629744996, "learning_rate": 3.4005574161435575e-06, "loss": 0.4951, "step": 3552 }, { "epoch": 0.61575789086025, "grad_norm": 2.7428502919335926, "learning_rate": 3.397898163091511e-06, "loss": 0.4837, "step": 3553 }, { "epoch": 0.6159311973310804, "grad_norm": 2.805952972315569, "learning_rate": 3.395239414973263e-06, "loss": 0.4491, "step": 3554 }, { "epoch": 0.6161045038019107, "grad_norm": 3.35742443373445, "learning_rate": 3.392581172626768e-06, "loss": 0.591, "step": 3555 }, { "epoch": 0.6162778102727411, "grad_norm": 2.6208564962781637, "learning_rate": 3.3899234368898236e-06, "loss": 0.4967, "step": 3556 }, { "epoch": 0.6164511167435714, "grad_norm": 2.166317972925826, "learning_rate": 3.3872662086000686e-06, "loss": 0.5575, "step": 3557 }, { "epoch": 0.6166244232144018, "grad_norm": 2.6985671012850836, "learning_rate": 3.3846094885949787e-06, "loss": 0.5758, "step": 3558 }, { "epoch": 0.6167977296852322, "grad_norm": 2.8200617069832408, "learning_rate": 3.3819532777118725e-06, "loss": 0.5694, "step": 3559 }, { "epoch": 0.6169710361560625, "grad_norm": 2.4721356885336667, "learning_rate": 3.3792975767879055e-06, "loss": 0.5632, "step": 3560 }, { "epoch": 0.6171443426268929, "grad_norm": 2.6741824312857485, "learning_rate": 3.376642386660077e-06, "loss": 0.5374, "step": 3561 }, { "epoch": 0.6173176490977231, "grad_norm": 2.6545322558999014, "learning_rate": 3.3739877081652194e-06, "loss": 0.5386, "step": 3562 }, { "epoch": 0.6174909555685535, "grad_norm": 2.225691835197153, "learning_rate": 3.3713335421400075e-06, "loss": 0.5104, "step": 3563 }, { "epoch": 0.6176642620393838, "grad_norm": 2.2282902976438987, "learning_rate": 3.368679889420956e-06, "loss": 0.5246, "step": 3564 }, { "epoch": 0.6178375685102142, "grad_norm": 2.818432363384709, "learning_rate": 3.3660267508444117e-06, "loss": 0.5094, "step": 3565 }, { "epoch": 0.6180108749810446, "grad_norm": 2.786701383534304, "learning_rate": 3.363374127246564e-06, "loss": 0.4685, "step": 3566 }, { "epoch": 0.6181841814518749, "grad_norm": 2.50027817786614, "learning_rate": 3.3607220194634403e-06, "loss": 0.5318, "step": 3567 }, { "epoch": 0.6183574879227053, "grad_norm": 2.2150087495077218, "learning_rate": 3.3580704283309044e-06, "loss": 0.5195, "step": 3568 }, { "epoch": 0.6185307943935356, "grad_norm": 4.836357088941236, "learning_rate": 3.3554193546846574e-06, "loss": 0.5421, "step": 3569 }, { "epoch": 0.618704100864366, "grad_norm": 7.698931936278669, "learning_rate": 3.3527687993602367e-06, "loss": 0.4627, "step": 3570 }, { "epoch": 0.6188774073351964, "grad_norm": 2.3050431726665583, "learning_rate": 3.3501187631930167e-06, "loss": 0.6092, "step": 3571 }, { "epoch": 0.6190507138060267, "grad_norm": 2.7189343559842496, "learning_rate": 3.347469247018208e-06, "loss": 0.5164, "step": 3572 }, { "epoch": 0.6192240202768571, "grad_norm": 2.425695633607952, "learning_rate": 3.344820251670858e-06, "loss": 0.4845, "step": 3573 }, { "epoch": 0.6193973267476874, "grad_norm": 2.6896205977675347, "learning_rate": 3.342171777985851e-06, "loss": 0.4971, "step": 3574 }, { "epoch": 0.6195706332185178, "grad_norm": 2.5672255202799317, "learning_rate": 3.339523826797902e-06, "loss": 0.5778, "step": 3575 }, { "epoch": 0.6197439396893482, "grad_norm": 2.2104263093442764, "learning_rate": 3.3368763989415664e-06, "loss": 0.4652, "step": 3576 }, { "epoch": 0.6199172461601785, "grad_norm": 6.398085372538334, "learning_rate": 3.334229495251234e-06, "loss": 0.5987, "step": 3577 }, { "epoch": 0.6200905526310089, "grad_norm": 2.3926947720900267, "learning_rate": 3.331583116561127e-06, "loss": 0.5549, "step": 3578 }, { "epoch": 0.6202638591018392, "grad_norm": 2.2782549026619674, "learning_rate": 3.3289372637053038e-06, "loss": 0.5197, "step": 3579 }, { "epoch": 0.6204371655726696, "grad_norm": 2.1431229404426295, "learning_rate": 3.326291937517657e-06, "loss": 0.525, "step": 3580 }, { "epoch": 0.6206104720435, "grad_norm": 2.810619982261229, "learning_rate": 3.323647138831912e-06, "loss": 0.4368, "step": 3581 }, { "epoch": 0.6207837785143303, "grad_norm": 2.662519980525013, "learning_rate": 3.321002868481631e-06, "loss": 0.5411, "step": 3582 }, { "epoch": 0.6209570849851607, "grad_norm": 2.8119756516772276, "learning_rate": 3.318359127300207e-06, "loss": 0.5495, "step": 3583 }, { "epoch": 0.621130391455991, "grad_norm": 2.5531118559686177, "learning_rate": 3.3157159161208653e-06, "loss": 0.4814, "step": 3584 }, { "epoch": 0.6213036979268214, "grad_norm": 2.240561528674696, "learning_rate": 3.31307323577667e-06, "loss": 0.5196, "step": 3585 }, { "epoch": 0.6214770043976517, "grad_norm": 2.443350285336354, "learning_rate": 3.3104310871005063e-06, "loss": 0.5384, "step": 3586 }, { "epoch": 0.6216503108684821, "grad_norm": 2.5824261915318383, "learning_rate": 3.3077894709251057e-06, "loss": 0.6133, "step": 3587 }, { "epoch": 0.6218236173393124, "grad_norm": 2.8172628838460114, "learning_rate": 3.3051483880830216e-06, "loss": 0.6045, "step": 3588 }, { "epoch": 0.6219969238101427, "grad_norm": 3.839810499752259, "learning_rate": 3.3025078394066447e-06, "loss": 0.5376, "step": 3589 }, { "epoch": 0.6221702302809731, "grad_norm": 2.407523860173899, "learning_rate": 3.299867825728196e-06, "loss": 0.4752, "step": 3590 }, { "epoch": 0.6223435367518034, "grad_norm": 2.3782160535793087, "learning_rate": 3.2972283478797263e-06, "loss": 0.5741, "step": 3591 }, { "epoch": 0.6225168432226338, "grad_norm": 2.3128251281498695, "learning_rate": 3.2945894066931187e-06, "loss": 0.5459, "step": 3592 }, { "epoch": 0.6226901496934641, "grad_norm": 2.5263303721468002, "learning_rate": 3.2919510030000888e-06, "loss": 0.5834, "step": 3593 }, { "epoch": 0.6228634561642945, "grad_norm": 2.3496662119984855, "learning_rate": 3.2893131376321798e-06, "loss": 0.474, "step": 3594 }, { "epoch": 0.6230367626351249, "grad_norm": 3.3329910265344576, "learning_rate": 3.2866758114207685e-06, "loss": 0.4392, "step": 3595 }, { "epoch": 0.6232100691059552, "grad_norm": 2.1947639212754653, "learning_rate": 3.2840390251970574e-06, "loss": 0.4827, "step": 3596 }, { "epoch": 0.6233833755767856, "grad_norm": 2.6882993791538046, "learning_rate": 3.281402779792082e-06, "loss": 0.5744, "step": 3597 }, { "epoch": 0.6235566820476159, "grad_norm": 2.775691130210447, "learning_rate": 3.2787670760367086e-06, "loss": 0.6236, "step": 3598 }, { "epoch": 0.6237299885184463, "grad_norm": 2.9093547453752686, "learning_rate": 3.276131914761629e-06, "loss": 0.6216, "step": 3599 }, { "epoch": 0.6239032949892767, "grad_norm": 2.698727654177825, "learning_rate": 3.2734972967973668e-06, "loss": 0.5312, "step": 3600 }, { "epoch": 0.624076601460107, "grad_norm": 2.1736210643358373, "learning_rate": 3.2708632229742722e-06, "loss": 0.4407, "step": 3601 }, { "epoch": 0.6242499079309374, "grad_norm": 2.3629343082814027, "learning_rate": 3.2682296941225265e-06, "loss": 0.5332, "step": 3602 }, { "epoch": 0.6244232144017677, "grad_norm": 2.4331496830184856, "learning_rate": 3.2655967110721386e-06, "loss": 0.5166, "step": 3603 }, { "epoch": 0.6245965208725981, "grad_norm": 2.1289909186642104, "learning_rate": 3.262964274652943e-06, "loss": 0.5573, "step": 3604 }, { "epoch": 0.6247698273434285, "grad_norm": 2.5840397490837015, "learning_rate": 3.2603323856946072e-06, "loss": 0.5418, "step": 3605 }, { "epoch": 0.6249431338142588, "grad_norm": 2.2414250044772497, "learning_rate": 3.2577010450266168e-06, "loss": 0.4944, "step": 3606 }, { "epoch": 0.6251164402850892, "grad_norm": 2.5666472057983034, "learning_rate": 3.255070253478294e-06, "loss": 0.4702, "step": 3607 }, { "epoch": 0.6252897467559195, "grad_norm": 2.429381915423123, "learning_rate": 3.2524400118787835e-06, "loss": 0.5179, "step": 3608 }, { "epoch": 0.6254630532267499, "grad_norm": 4.350971921589052, "learning_rate": 3.2498103210570585e-06, "loss": 0.545, "step": 3609 }, { "epoch": 0.6256363596975802, "grad_norm": 2.3686480116019997, "learning_rate": 3.2471811818419163e-06, "loss": 0.5571, "step": 3610 }, { "epoch": 0.6258096661684106, "grad_norm": 5.311662313762101, "learning_rate": 3.244552595061982e-06, "loss": 0.4076, "step": 3611 }, { "epoch": 0.625982972639241, "grad_norm": 2.0820403986166283, "learning_rate": 3.2419245615457068e-06, "loss": 0.5099, "step": 3612 }, { "epoch": 0.6261562791100713, "grad_norm": 2.0846495151349496, "learning_rate": 3.2392970821213665e-06, "loss": 0.4743, "step": 3613 }, { "epoch": 0.6263295855809017, "grad_norm": 2.3640803292828823, "learning_rate": 3.2366701576170634e-06, "loss": 0.4705, "step": 3614 }, { "epoch": 0.6265028920517319, "grad_norm": 2.7817570970526204, "learning_rate": 3.234043788860724e-06, "loss": 0.5456, "step": 3615 }, { "epoch": 0.6266761985225623, "grad_norm": 3.1130881742077166, "learning_rate": 3.231417976680099e-06, "loss": 0.5303, "step": 3616 }, { "epoch": 0.6268495049933926, "grad_norm": 2.662229970009979, "learning_rate": 3.228792721902765e-06, "loss": 0.544, "step": 3617 }, { "epoch": 0.627022811464223, "grad_norm": 2.6936417190539204, "learning_rate": 3.226168025356123e-06, "loss": 0.6035, "step": 3618 }, { "epoch": 0.6271961179350534, "grad_norm": 2.6661654287503014, "learning_rate": 3.2235438878673962e-06, "loss": 0.5695, "step": 3619 }, { "epoch": 0.6273694244058837, "grad_norm": 2.3840866820312074, "learning_rate": 3.2209203102636353e-06, "loss": 0.4784, "step": 3620 }, { "epoch": 0.6275427308767141, "grad_norm": 2.376401445073594, "learning_rate": 3.2182972933717094e-06, "loss": 0.4205, "step": 3621 }, { "epoch": 0.6277160373475444, "grad_norm": 2.314286667520812, "learning_rate": 3.2156748380183166e-06, "loss": 0.5707, "step": 3622 }, { "epoch": 0.6278893438183748, "grad_norm": 2.974398460366214, "learning_rate": 3.213052945029973e-06, "loss": 0.4278, "step": 3623 }, { "epoch": 0.6280626502892052, "grad_norm": 2.198253627047963, "learning_rate": 3.2104316152330207e-06, "loss": 0.4961, "step": 3624 }, { "epoch": 0.6282359567600355, "grad_norm": 2.96277262095082, "learning_rate": 3.2078108494536233e-06, "loss": 0.5447, "step": 3625 }, { "epoch": 0.6284092632308659, "grad_norm": 2.9719134430614704, "learning_rate": 3.205190648517767e-06, "loss": 0.5599, "step": 3626 }, { "epoch": 0.6285825697016962, "grad_norm": 2.2255752188404654, "learning_rate": 3.2025710132512556e-06, "loss": 0.5894, "step": 3627 }, { "epoch": 0.6287558761725266, "grad_norm": 2.442616272301562, "learning_rate": 3.1999519444797226e-06, "loss": 0.5425, "step": 3628 }, { "epoch": 0.628929182643357, "grad_norm": 3.561202806534788, "learning_rate": 3.197333443028617e-06, "loss": 0.5358, "step": 3629 }, { "epoch": 0.6291024891141873, "grad_norm": 2.790895360814904, "learning_rate": 3.194715509723213e-06, "loss": 0.4876, "step": 3630 }, { "epoch": 0.6292757955850177, "grad_norm": 2.39099238306318, "learning_rate": 3.1920981453886008e-06, "loss": 0.4744, "step": 3631 }, { "epoch": 0.629449102055848, "grad_norm": 2.364189331075108, "learning_rate": 3.1894813508496956e-06, "loss": 0.516, "step": 3632 }, { "epoch": 0.6296224085266784, "grad_norm": 2.44667880182903, "learning_rate": 3.1868651269312313e-06, "loss": 0.5658, "step": 3633 }, { "epoch": 0.6297957149975087, "grad_norm": 7.6939834982843776, "learning_rate": 3.1842494744577625e-06, "loss": 0.4743, "step": 3634 }, { "epoch": 0.6299690214683391, "grad_norm": 2.4129589636344133, "learning_rate": 3.181634394253662e-06, "loss": 0.4875, "step": 3635 }, { "epoch": 0.6301423279391695, "grad_norm": 2.725903074678438, "learning_rate": 3.1790198871431268e-06, "loss": 0.5116, "step": 3636 }, { "epoch": 0.6303156344099998, "grad_norm": 3.505306930003161, "learning_rate": 3.1764059539501657e-06, "loss": 0.5679, "step": 3637 }, { "epoch": 0.6304889408808302, "grad_norm": 2.181642628038275, "learning_rate": 3.173792595498613e-06, "loss": 0.5218, "step": 3638 }, { "epoch": 0.6306622473516605, "grad_norm": 2.5222061045683537, "learning_rate": 3.17117981261212e-06, "loss": 0.5123, "step": 3639 }, { "epoch": 0.6308355538224909, "grad_norm": 2.5049805887380745, "learning_rate": 3.168567606114157e-06, "loss": 0.5892, "step": 3640 }, { "epoch": 0.6310088602933211, "grad_norm": 2.2189357274520356, "learning_rate": 3.1659559768280114e-06, "loss": 0.6212, "step": 3641 }, { "epoch": 0.6311821667641515, "grad_norm": 6.595125253719986, "learning_rate": 3.163344925576787e-06, "loss": 0.5043, "step": 3642 }, { "epoch": 0.6313554732349819, "grad_norm": 4.668697672405692, "learning_rate": 3.1607344531834127e-06, "loss": 0.6075, "step": 3643 }, { "epoch": 0.6315287797058122, "grad_norm": 2.6118584370186455, "learning_rate": 3.158124560470627e-06, "loss": 0.5219, "step": 3644 }, { "epoch": 0.6317020861766426, "grad_norm": 3.0868928504704094, "learning_rate": 3.155515248260989e-06, "loss": 0.5522, "step": 3645 }, { "epoch": 0.6318753926474729, "grad_norm": 2.1177807786326155, "learning_rate": 3.152906517376877e-06, "loss": 0.5015, "step": 3646 }, { "epoch": 0.6320486991183033, "grad_norm": 2.4342327216992685, "learning_rate": 3.1502983686404786e-06, "loss": 0.4994, "step": 3647 }, { "epoch": 0.6322220055891337, "grad_norm": 3.0190752942084966, "learning_rate": 3.1476908028738066e-06, "loss": 0.5883, "step": 3648 }, { "epoch": 0.632395312059964, "grad_norm": 2.5197880490655344, "learning_rate": 3.1450838208986866e-06, "loss": 0.6402, "step": 3649 }, { "epoch": 0.6325686185307944, "grad_norm": 2.225005545449675, "learning_rate": 3.1424774235367584e-06, "loss": 0.6044, "step": 3650 }, { "epoch": 0.6327419250016247, "grad_norm": 2.6025467771261406, "learning_rate": 3.1398716116094807e-06, "loss": 0.5393, "step": 3651 }, { "epoch": 0.6329152314724551, "grad_norm": 3.1499612864340047, "learning_rate": 3.1372663859381245e-06, "loss": 0.542, "step": 3652 }, { "epoch": 0.6330885379432855, "grad_norm": 2.7870395491310345, "learning_rate": 3.1346617473437803e-06, "loss": 0.56, "step": 3653 }, { "epoch": 0.6332618444141158, "grad_norm": 2.395161952551615, "learning_rate": 3.1320576966473482e-06, "loss": 0.5434, "step": 3654 }, { "epoch": 0.6334351508849462, "grad_norm": 2.159704187183427, "learning_rate": 3.129454234669548e-06, "loss": 0.4754, "step": 3655 }, { "epoch": 0.6336084573557765, "grad_norm": 3.024400156751771, "learning_rate": 3.1268513622309093e-06, "loss": 0.523, "step": 3656 }, { "epoch": 0.6337817638266069, "grad_norm": 2.624118822569319, "learning_rate": 3.1242490801517835e-06, "loss": 0.4749, "step": 3657 }, { "epoch": 0.6339550702974373, "grad_norm": 2.8481944598342923, "learning_rate": 3.1216473892523245e-06, "loss": 0.49, "step": 3658 }, { "epoch": 0.6341283767682676, "grad_norm": 2.8273259204367087, "learning_rate": 3.11904629035251e-06, "loss": 0.6037, "step": 3659 }, { "epoch": 0.634301683239098, "grad_norm": 2.122666308714755, "learning_rate": 3.116445784272125e-06, "loss": 0.5675, "step": 3660 }, { "epoch": 0.6344749897099283, "grad_norm": 3.239898398259621, "learning_rate": 3.1138458718307722e-06, "loss": 0.5105, "step": 3661 }, { "epoch": 0.6346482961807587, "grad_norm": 2.6344319123443785, "learning_rate": 3.111246553847863e-06, "loss": 0.4199, "step": 3662 }, { "epoch": 0.634821602651589, "grad_norm": 2.6549597815472543, "learning_rate": 3.108647831142625e-06, "loss": 0.6168, "step": 3663 }, { "epoch": 0.6349949091224194, "grad_norm": 3.11255428787529, "learning_rate": 3.1060497045340965e-06, "loss": 0.5252, "step": 3664 }, { "epoch": 0.6351682155932498, "grad_norm": 2.6328791932462763, "learning_rate": 3.1034521748411274e-06, "loss": 0.4858, "step": 3665 }, { "epoch": 0.6353415220640801, "grad_norm": 2.673996202356442, "learning_rate": 3.1008552428823795e-06, "loss": 0.5638, "step": 3666 }, { "epoch": 0.6355148285349104, "grad_norm": 2.868369607993662, "learning_rate": 3.0982589094763294e-06, "loss": 0.5514, "step": 3667 }, { "epoch": 0.6356881350057407, "grad_norm": 2.2631409183584585, "learning_rate": 3.095663175441259e-06, "loss": 0.5324, "step": 3668 }, { "epoch": 0.6358614414765711, "grad_norm": 4.141684967644628, "learning_rate": 3.093068041595267e-06, "loss": 0.58, "step": 3669 }, { "epoch": 0.6360347479474014, "grad_norm": 2.489455529856376, "learning_rate": 3.0904735087562597e-06, "loss": 0.4887, "step": 3670 }, { "epoch": 0.6362080544182318, "grad_norm": 2.666490119708157, "learning_rate": 3.0878795777419556e-06, "loss": 0.4893, "step": 3671 }, { "epoch": 0.6363813608890622, "grad_norm": 3.486631863525951, "learning_rate": 3.0852862493698823e-06, "loss": 0.5425, "step": 3672 }, { "epoch": 0.6365546673598925, "grad_norm": 2.7534967777064154, "learning_rate": 3.0826935244573787e-06, "loss": 0.5686, "step": 3673 }, { "epoch": 0.6367279738307229, "grad_norm": 2.512239637961333, "learning_rate": 3.080101403821592e-06, "loss": 0.6582, "step": 3674 }, { "epoch": 0.6369012803015532, "grad_norm": 2.3282275191282613, "learning_rate": 3.0775098882794807e-06, "loss": 0.5459, "step": 3675 }, { "epoch": 0.6370745867723836, "grad_norm": 4.117618149650666, "learning_rate": 3.0749189786478117e-06, "loss": 0.4972, "step": 3676 }, { "epoch": 0.637247893243214, "grad_norm": 2.5995796613623154, "learning_rate": 3.072328675743162e-06, "loss": 0.5166, "step": 3677 }, { "epoch": 0.6374211997140443, "grad_norm": 3.353721638092215, "learning_rate": 3.069738980381913e-06, "loss": 0.5649, "step": 3678 }, { "epoch": 0.6375945061848747, "grad_norm": 3.1203238148196197, "learning_rate": 3.0671498933802613e-06, "loss": 0.6053, "step": 3679 }, { "epoch": 0.637767812655705, "grad_norm": 4.506700177840289, "learning_rate": 3.064561415554206e-06, "loss": 0.5204, "step": 3680 }, { "epoch": 0.6379411191265354, "grad_norm": 3.1548611654935135, "learning_rate": 3.061973547719559e-06, "loss": 0.4958, "step": 3681 }, { "epoch": 0.6381144255973658, "grad_norm": 2.1905980673676337, "learning_rate": 3.0593862906919343e-06, "loss": 0.5004, "step": 3682 }, { "epoch": 0.6382877320681961, "grad_norm": 3.50959153257392, "learning_rate": 3.05679964528676e-06, "loss": 0.5578, "step": 3683 }, { "epoch": 0.6384610385390265, "grad_norm": 2.701465138709031, "learning_rate": 3.0542136123192668e-06, "loss": 0.6359, "step": 3684 }, { "epoch": 0.6386343450098568, "grad_norm": 3.692427385941367, "learning_rate": 3.0516281926044934e-06, "loss": 0.5323, "step": 3685 }, { "epoch": 0.6388076514806872, "grad_norm": 3.1078768591521664, "learning_rate": 3.0490433869572857e-06, "loss": 0.4922, "step": 3686 }, { "epoch": 0.6389809579515175, "grad_norm": 2.484442370849069, "learning_rate": 3.0464591961922983e-06, "loss": 0.5213, "step": 3687 }, { "epoch": 0.6391542644223479, "grad_norm": 2.26772418152522, "learning_rate": 3.0438756211239838e-06, "loss": 0.5641, "step": 3688 }, { "epoch": 0.6393275708931783, "grad_norm": 3.300992188841428, "learning_rate": 3.041292662566611e-06, "loss": 0.5638, "step": 3689 }, { "epoch": 0.6395008773640086, "grad_norm": 1.9903593844983634, "learning_rate": 3.038710321334249e-06, "loss": 0.4739, "step": 3690 }, { "epoch": 0.639674183834839, "grad_norm": 3.052130919563523, "learning_rate": 3.0361285982407727e-06, "loss": 0.4881, "step": 3691 }, { "epoch": 0.6398474903056693, "grad_norm": 2.34959870021267, "learning_rate": 3.033547494099864e-06, "loss": 0.521, "step": 3692 }, { "epoch": 0.6400207967764996, "grad_norm": 2.355828571641391, "learning_rate": 3.0309670097250068e-06, "loss": 0.5154, "step": 3693 }, { "epoch": 0.64019410324733, "grad_norm": 2.3883077158085295, "learning_rate": 3.0283871459294924e-06, "loss": 0.6289, "step": 3694 }, { "epoch": 0.6403674097181603, "grad_norm": 2.903958450910178, "learning_rate": 3.0258079035264156e-06, "loss": 0.5985, "step": 3695 }, { "epoch": 0.6405407161889907, "grad_norm": 3.9382761476702, "learning_rate": 3.0232292833286735e-06, "loss": 0.5196, "step": 3696 }, { "epoch": 0.640714022659821, "grad_norm": 2.4198825093690126, "learning_rate": 3.02065128614897e-06, "loss": 0.609, "step": 3697 }, { "epoch": 0.6408873291306514, "grad_norm": 2.2287566316185337, "learning_rate": 3.018073912799815e-06, "loss": 0.5915, "step": 3698 }, { "epoch": 0.6410606356014817, "grad_norm": 2.3678978547073672, "learning_rate": 3.0154971640935116e-06, "loss": 0.4809, "step": 3699 }, { "epoch": 0.6412339420723121, "grad_norm": 3.51787070787502, "learning_rate": 3.0129210408421766e-06, "loss": 0.6142, "step": 3700 }, { "epoch": 0.6414072485431425, "grad_norm": 2.899056811910555, "learning_rate": 3.0103455438577246e-06, "loss": 0.5067, "step": 3701 }, { "epoch": 0.6415805550139728, "grad_norm": 2.180286791902499, "learning_rate": 3.0077706739518732e-06, "loss": 0.5002, "step": 3702 }, { "epoch": 0.6417538614848032, "grad_norm": 2.33469744676002, "learning_rate": 3.0051964319361454e-06, "loss": 0.5462, "step": 3703 }, { "epoch": 0.6419271679556335, "grad_norm": 2.5607091243245343, "learning_rate": 3.002622818621863e-06, "loss": 0.6106, "step": 3704 }, { "epoch": 0.6421004744264639, "grad_norm": 2.5076274195078314, "learning_rate": 3.00004983482015e-06, "loss": 0.5615, "step": 3705 }, { "epoch": 0.6422737808972943, "grad_norm": 1.9714608071170445, "learning_rate": 2.9974774813419337e-06, "loss": 0.5422, "step": 3706 }, { "epoch": 0.6424470873681246, "grad_norm": 2.4098380459930073, "learning_rate": 2.9949057589979414e-06, "loss": 0.5083, "step": 3707 }, { "epoch": 0.642620393838955, "grad_norm": 2.374589707441779, "learning_rate": 2.992334668598702e-06, "loss": 0.5162, "step": 3708 }, { "epoch": 0.6427937003097853, "grad_norm": 2.1951211279826044, "learning_rate": 2.9897642109545444e-06, "loss": 0.5431, "step": 3709 }, { "epoch": 0.6429670067806157, "grad_norm": 2.431442974907154, "learning_rate": 2.987194386875599e-06, "loss": 0.6232, "step": 3710 }, { "epoch": 0.643140313251446, "grad_norm": 2.465530011105217, "learning_rate": 2.9846251971717957e-06, "loss": 0.4694, "step": 3711 }, { "epoch": 0.6433136197222764, "grad_norm": 2.1861088107969415, "learning_rate": 2.9820566426528653e-06, "loss": 0.6126, "step": 3712 }, { "epoch": 0.6434869261931068, "grad_norm": 2.1744797224887726, "learning_rate": 2.979488724128339e-06, "loss": 0.5069, "step": 3713 }, { "epoch": 0.6436602326639371, "grad_norm": 2.884136979523345, "learning_rate": 2.9769214424075456e-06, "loss": 0.4761, "step": 3714 }, { "epoch": 0.6438335391347675, "grad_norm": 2.4652458991316624, "learning_rate": 2.9743547982996146e-06, "loss": 0.4808, "step": 3715 }, { "epoch": 0.6440068456055978, "grad_norm": 2.802846035124563, "learning_rate": 2.9717887926134747e-06, "loss": 0.5426, "step": 3716 }, { "epoch": 0.6441801520764282, "grad_norm": 2.9183563359980047, "learning_rate": 2.9692234261578505e-06, "loss": 0.5364, "step": 3717 }, { "epoch": 0.6443534585472586, "grad_norm": 3.862414447191152, "learning_rate": 2.9666586997412734e-06, "loss": 0.4428, "step": 3718 }, { "epoch": 0.6445267650180888, "grad_norm": 3.529877902480246, "learning_rate": 2.9640946141720606e-06, "loss": 0.432, "step": 3719 }, { "epoch": 0.6447000714889192, "grad_norm": 2.633024154201847, "learning_rate": 2.961531170258337e-06, "loss": 0.5121, "step": 3720 }, { "epoch": 0.6448733779597495, "grad_norm": 2.946586322657873, "learning_rate": 2.9589683688080216e-06, "loss": 0.4604, "step": 3721 }, { "epoch": 0.6450466844305799, "grad_norm": 2.4717972618233475, "learning_rate": 2.9564062106288314e-06, "loss": 0.5636, "step": 3722 }, { "epoch": 0.6452199909014102, "grad_norm": 2.5135116381993607, "learning_rate": 2.9538446965282804e-06, "loss": 0.5507, "step": 3723 }, { "epoch": 0.6453932973722406, "grad_norm": 2.870424836376633, "learning_rate": 2.951283827313681e-06, "loss": 0.5204, "step": 3724 }, { "epoch": 0.645566603843071, "grad_norm": 3.878823267888171, "learning_rate": 2.948723603792142e-06, "loss": 0.5396, "step": 3725 }, { "epoch": 0.6457399103139013, "grad_norm": 2.814172832809348, "learning_rate": 2.946164026770566e-06, "loss": 0.6326, "step": 3726 }, { "epoch": 0.6459132167847317, "grad_norm": 2.770637578143976, "learning_rate": 2.943605097055656e-06, "loss": 0.5353, "step": 3727 }, { "epoch": 0.646086523255562, "grad_norm": 2.359177361606575, "learning_rate": 2.9410468154539063e-06, "loss": 0.5781, "step": 3728 }, { "epoch": 0.6462598297263924, "grad_norm": 2.4841404082501746, "learning_rate": 2.9384891827716134e-06, "loss": 0.4684, "step": 3729 }, { "epoch": 0.6464331361972228, "grad_norm": 2.6650819974492728, "learning_rate": 2.9359321998148602e-06, "loss": 0.5158, "step": 3730 }, { "epoch": 0.6466064426680531, "grad_norm": 2.2948300617891437, "learning_rate": 2.933375867389534e-06, "loss": 0.5164, "step": 3731 }, { "epoch": 0.6467797491388835, "grad_norm": 2.4816396430742387, "learning_rate": 2.9308201863013113e-06, "loss": 0.5287, "step": 3732 }, { "epoch": 0.6469530556097138, "grad_norm": 2.481114744930235, "learning_rate": 2.9282651573556654e-06, "loss": 0.5017, "step": 3733 }, { "epoch": 0.6471263620805442, "grad_norm": 2.8806366179642167, "learning_rate": 2.9257107813578645e-06, "loss": 0.5802, "step": 3734 }, { "epoch": 0.6472996685513746, "grad_norm": 2.9420107427269415, "learning_rate": 2.9231570591129688e-06, "loss": 0.4387, "step": 3735 }, { "epoch": 0.6474729750222049, "grad_norm": 2.3706543791985255, "learning_rate": 2.9206039914258354e-06, "loss": 0.5726, "step": 3736 }, { "epoch": 0.6476462814930353, "grad_norm": 2.5143471256217036, "learning_rate": 2.9180515791011127e-06, "loss": 0.5572, "step": 3737 }, { "epoch": 0.6478195879638656, "grad_norm": 2.8160335569254284, "learning_rate": 2.915499822943244e-06, "loss": 0.5964, "step": 3738 }, { "epoch": 0.647992894434696, "grad_norm": 2.3337632291799273, "learning_rate": 2.9129487237564647e-06, "loss": 0.51, "step": 3739 }, { "epoch": 0.6481662009055263, "grad_norm": 2.0912662453502247, "learning_rate": 2.910398282344804e-06, "loss": 0.5279, "step": 3740 }, { "epoch": 0.6483395073763567, "grad_norm": 5.393790173523432, "learning_rate": 2.907848499512084e-06, "loss": 0.5476, "step": 3741 }, { "epoch": 0.6485128138471871, "grad_norm": 3.31071553562358, "learning_rate": 2.9052993760619188e-06, "loss": 0.5806, "step": 3742 }, { "epoch": 0.6486861203180174, "grad_norm": 2.443494126840349, "learning_rate": 2.9027509127977125e-06, "loss": 0.4836, "step": 3743 }, { "epoch": 0.6488594267888478, "grad_norm": 2.7543955343297455, "learning_rate": 2.900203110522666e-06, "loss": 0.5295, "step": 3744 }, { "epoch": 0.649032733259678, "grad_norm": 2.693287974013038, "learning_rate": 2.8976559700397677e-06, "loss": 0.5383, "step": 3745 }, { "epoch": 0.6492060397305084, "grad_norm": 2.2980629242928905, "learning_rate": 2.8951094921518e-06, "loss": 0.5711, "step": 3746 }, { "epoch": 0.6493793462013387, "grad_norm": 2.4241934850441664, "learning_rate": 2.892563677661333e-06, "loss": 0.5131, "step": 3747 }, { "epoch": 0.6495526526721691, "grad_norm": 2.4044771822890567, "learning_rate": 2.8900185273707326e-06, "loss": 0.484, "step": 3748 }, { "epoch": 0.6497259591429995, "grad_norm": 2.0102674135096534, "learning_rate": 2.8874740420821533e-06, "loss": 0.4804, "step": 3749 }, { "epoch": 0.6498992656138298, "grad_norm": 2.7502981981419214, "learning_rate": 2.8849302225975363e-06, "loss": 0.6197, "step": 3750 }, { "epoch": 0.6500725720846602, "grad_norm": 2.5511887260801602, "learning_rate": 2.8823870697186174e-06, "loss": 0.5579, "step": 3751 }, { "epoch": 0.6502458785554905, "grad_norm": 2.799001914262735, "learning_rate": 2.8798445842469214e-06, "loss": 0.6319, "step": 3752 }, { "epoch": 0.6504191850263209, "grad_norm": 2.6620884810060166, "learning_rate": 2.877302766983761e-06, "loss": 0.5491, "step": 3753 }, { "epoch": 0.6505924914971513, "grad_norm": 2.2915627797285882, "learning_rate": 2.874761618730243e-06, "loss": 0.4699, "step": 3754 }, { "epoch": 0.6507657979679816, "grad_norm": 2.7724478991569828, "learning_rate": 2.872221140287258e-06, "loss": 0.5823, "step": 3755 }, { "epoch": 0.650939104438812, "grad_norm": 2.206407045885354, "learning_rate": 2.8696813324554884e-06, "loss": 0.5566, "step": 3756 }, { "epoch": 0.6511124109096423, "grad_norm": 2.615006541024453, "learning_rate": 2.867142196035404e-06, "loss": 0.5882, "step": 3757 }, { "epoch": 0.6512857173804727, "grad_norm": 2.4878955145596278, "learning_rate": 2.864603731827263e-06, "loss": 0.643, "step": 3758 }, { "epoch": 0.651459023851303, "grad_norm": 3.521821389371027, "learning_rate": 2.8620659406311147e-06, "loss": 0.6123, "step": 3759 }, { "epoch": 0.6516323303221334, "grad_norm": 2.6573337548450198, "learning_rate": 2.859528823246789e-06, "loss": 0.5, "step": 3760 }, { "epoch": 0.6518056367929638, "grad_norm": 2.293668994465868, "learning_rate": 2.856992380473911e-06, "loss": 0.432, "step": 3761 }, { "epoch": 0.6519789432637941, "grad_norm": 3.004892829755904, "learning_rate": 2.85445661311189e-06, "loss": 0.5957, "step": 3762 }, { "epoch": 0.6521522497346245, "grad_norm": 2.508477467153402, "learning_rate": 2.851921521959923e-06, "loss": 0.5087, "step": 3763 }, { "epoch": 0.6523255562054548, "grad_norm": 2.6253521944574287, "learning_rate": 2.849387107816994e-06, "loss": 0.583, "step": 3764 }, { "epoch": 0.6524988626762852, "grad_norm": 5.289468641195197, "learning_rate": 2.8468533714818725e-06, "loss": 0.5395, "step": 3765 }, { "epoch": 0.6526721691471156, "grad_norm": 2.59303515653669, "learning_rate": 2.844320313753115e-06, "loss": 0.6297, "step": 3766 }, { "epoch": 0.6528454756179459, "grad_norm": 2.675455836818558, "learning_rate": 2.841787935429066e-06, "loss": 0.4882, "step": 3767 }, { "epoch": 0.6530187820887763, "grad_norm": 2.549391184732742, "learning_rate": 2.839256237307849e-06, "loss": 0.5651, "step": 3768 }, { "epoch": 0.6531920885596066, "grad_norm": 2.4894536147796145, "learning_rate": 2.8367252201873856e-06, "loss": 0.4723, "step": 3769 }, { "epoch": 0.653365395030437, "grad_norm": 2.743663146709627, "learning_rate": 2.8341948848653737e-06, "loss": 0.6257, "step": 3770 }, { "epoch": 0.6535387015012672, "grad_norm": 2.760665761292568, "learning_rate": 2.8316652321392946e-06, "loss": 0.6338, "step": 3771 }, { "epoch": 0.6537120079720976, "grad_norm": 2.6747043441844727, "learning_rate": 2.82913626280642e-06, "loss": 0.4724, "step": 3772 }, { "epoch": 0.653885314442928, "grad_norm": 2.2164740313209808, "learning_rate": 2.8266079776638034e-06, "loss": 0.4901, "step": 3773 }, { "epoch": 0.6540586209137583, "grad_norm": 2.7227767289257527, "learning_rate": 2.8240803775082846e-06, "loss": 0.5201, "step": 3774 }, { "epoch": 0.6542319273845887, "grad_norm": 2.498017708666198, "learning_rate": 2.8215534631364855e-06, "loss": 0.4955, "step": 3775 }, { "epoch": 0.654405233855419, "grad_norm": 2.0788262725220914, "learning_rate": 2.819027235344813e-06, "loss": 0.5442, "step": 3776 }, { "epoch": 0.6545785403262494, "grad_norm": 2.591627066248537, "learning_rate": 2.8165016949294565e-06, "loss": 0.6199, "step": 3777 }, { "epoch": 0.6547518467970798, "grad_norm": 2.3416157780496683, "learning_rate": 2.813976842686391e-06, "loss": 0.5208, "step": 3778 }, { "epoch": 0.6549251532679101, "grad_norm": 2.178257358904352, "learning_rate": 2.8114526794113726e-06, "loss": 0.5682, "step": 3779 }, { "epoch": 0.6550984597387405, "grad_norm": 2.1653536149030255, "learning_rate": 2.8089292058999406e-06, "loss": 0.5696, "step": 3780 }, { "epoch": 0.6552717662095708, "grad_norm": 2.378407216362936, "learning_rate": 2.8064064229474162e-06, "loss": 0.4926, "step": 3781 }, { "epoch": 0.6554450726804012, "grad_norm": 2.278938019187509, "learning_rate": 2.803884331348906e-06, "loss": 0.517, "step": 3782 }, { "epoch": 0.6556183791512316, "grad_norm": 2.4393003993082245, "learning_rate": 2.8013629318992956e-06, "loss": 0.5119, "step": 3783 }, { "epoch": 0.6557916856220619, "grad_norm": 3.477148072874729, "learning_rate": 2.7988422253932536e-06, "loss": 0.5503, "step": 3784 }, { "epoch": 0.6559649920928923, "grad_norm": 3.4068194584676217, "learning_rate": 2.796322212625229e-06, "loss": 0.5935, "step": 3785 }, { "epoch": 0.6561382985637226, "grad_norm": 2.791704013994546, "learning_rate": 2.7938028943894547e-06, "loss": 0.6156, "step": 3786 }, { "epoch": 0.656311605034553, "grad_norm": 2.788497991557488, "learning_rate": 2.791284271479942e-06, "loss": 0.4461, "step": 3787 }, { "epoch": 0.6564849115053834, "grad_norm": 2.522172071840347, "learning_rate": 2.7887663446904858e-06, "loss": 0.4882, "step": 3788 }, { "epoch": 0.6566582179762137, "grad_norm": 2.175636967219239, "learning_rate": 2.7862491148146583e-06, "loss": 0.5516, "step": 3789 }, { "epoch": 0.6568315244470441, "grad_norm": 2.9507230788560417, "learning_rate": 2.783732582645817e-06, "loss": 0.5961, "step": 3790 }, { "epoch": 0.6570048309178744, "grad_norm": 2.0224535588173778, "learning_rate": 2.7812167489770914e-06, "loss": 0.4787, "step": 3791 }, { "epoch": 0.6571781373887048, "grad_norm": 2.8815675717718734, "learning_rate": 2.7787016146013996e-06, "loss": 0.519, "step": 3792 }, { "epoch": 0.6573514438595351, "grad_norm": 2.2482817068731125, "learning_rate": 2.7761871803114335e-06, "loss": 0.4571, "step": 3793 }, { "epoch": 0.6575247503303655, "grad_norm": 3.6466459626549095, "learning_rate": 2.7736734468996657e-06, "loss": 0.603, "step": 3794 }, { "epoch": 0.6576980568011959, "grad_norm": 2.333586757059476, "learning_rate": 2.7711604151583514e-06, "loss": 0.483, "step": 3795 }, { "epoch": 0.6578713632720262, "grad_norm": 3.1511220990461006, "learning_rate": 2.768648085879521e-06, "loss": 0.5647, "step": 3796 }, { "epoch": 0.6580446697428565, "grad_norm": 4.413581434035559, "learning_rate": 2.766136459854984e-06, "loss": 0.5346, "step": 3797 }, { "epoch": 0.6582179762136868, "grad_norm": 2.6352403315914645, "learning_rate": 2.763625537876328e-06, "loss": 0.5784, "step": 3798 }, { "epoch": 0.6583912826845172, "grad_norm": 2.571095735834339, "learning_rate": 2.7611153207349195e-06, "loss": 0.4617, "step": 3799 }, { "epoch": 0.6585645891553475, "grad_norm": 2.3053127142097978, "learning_rate": 2.7586058092219027e-06, "loss": 0.5743, "step": 3800 }, { "epoch": 0.6587378956261779, "grad_norm": 3.4186489256948986, "learning_rate": 2.756097004128202e-06, "loss": 0.5533, "step": 3801 }, { "epoch": 0.6589112020970083, "grad_norm": 2.36595172902559, "learning_rate": 2.753588906244511e-06, "loss": 0.5056, "step": 3802 }, { "epoch": 0.6590845085678386, "grad_norm": 2.631641721445236, "learning_rate": 2.751081516361309e-06, "loss": 0.611, "step": 3803 }, { "epoch": 0.659257815038669, "grad_norm": 2.4986944011319956, "learning_rate": 2.7485748352688497e-06, "loss": 0.5424, "step": 3804 }, { "epoch": 0.6594311215094993, "grad_norm": 2.8350812422593017, "learning_rate": 2.7460688637571616e-06, "loss": 0.607, "step": 3805 }, { "epoch": 0.6596044279803297, "grad_norm": 2.3185122893411436, "learning_rate": 2.7435636026160506e-06, "loss": 0.5007, "step": 3806 }, { "epoch": 0.6597777344511601, "grad_norm": 7.6911674378990575, "learning_rate": 2.7410590526351e-06, "loss": 0.5668, "step": 3807 }, { "epoch": 0.6599510409219904, "grad_norm": 2.7582412843461324, "learning_rate": 2.7385552146036663e-06, "loss": 0.5009, "step": 3808 }, { "epoch": 0.6601243473928208, "grad_norm": 3.0105160736904892, "learning_rate": 2.7360520893108832e-06, "loss": 0.5341, "step": 3809 }, { "epoch": 0.6602976538636511, "grad_norm": 3.348710555328444, "learning_rate": 2.7335496775456614e-06, "loss": 0.5647, "step": 3810 }, { "epoch": 0.6604709603344815, "grad_norm": 3.9865170074065492, "learning_rate": 2.7310479800966873e-06, "loss": 0.5274, "step": 3811 }, { "epoch": 0.6606442668053119, "grad_norm": 2.2880838349843504, "learning_rate": 2.7285469977524138e-06, "loss": 0.5226, "step": 3812 }, { "epoch": 0.6608175732761422, "grad_norm": 2.774037890411316, "learning_rate": 2.7260467313010775e-06, "loss": 0.6438, "step": 3813 }, { "epoch": 0.6609908797469726, "grad_norm": 3.7296916929555346, "learning_rate": 2.7235471815306867e-06, "loss": 0.5346, "step": 3814 }, { "epoch": 0.6611641862178029, "grad_norm": 2.189370012972466, "learning_rate": 2.721048349229023e-06, "loss": 0.4982, "step": 3815 }, { "epoch": 0.6613374926886333, "grad_norm": 2.7972919301424652, "learning_rate": 2.7185502351836424e-06, "loss": 0.4638, "step": 3816 }, { "epoch": 0.6615107991594636, "grad_norm": 2.1861479280040608, "learning_rate": 2.7160528401818742e-06, "loss": 0.5367, "step": 3817 }, { "epoch": 0.661684105630294, "grad_norm": 2.7024647632607244, "learning_rate": 2.7135561650108224e-06, "loss": 0.5286, "step": 3818 }, { "epoch": 0.6618574121011244, "grad_norm": 2.696296535968836, "learning_rate": 2.7110602104573623e-06, "loss": 0.5754, "step": 3819 }, { "epoch": 0.6620307185719547, "grad_norm": 2.706950432746967, "learning_rate": 2.708564977308143e-06, "loss": 0.5777, "step": 3820 }, { "epoch": 0.6622040250427851, "grad_norm": 4.414983005660734, "learning_rate": 2.706070466349586e-06, "loss": 0.5227, "step": 3821 }, { "epoch": 0.6623773315136154, "grad_norm": 3.399316678260729, "learning_rate": 2.7035766783678865e-06, "loss": 0.6113, "step": 3822 }, { "epoch": 0.6625506379844457, "grad_norm": 2.6177861409179854, "learning_rate": 2.7010836141490095e-06, "loss": 0.5858, "step": 3823 }, { "epoch": 0.662723944455276, "grad_norm": 2.5860650343714915, "learning_rate": 2.6985912744786924e-06, "loss": 0.5733, "step": 3824 }, { "epoch": 0.6628972509261064, "grad_norm": 2.3198391875066307, "learning_rate": 2.696099660142446e-06, "loss": 0.5615, "step": 3825 }, { "epoch": 0.6630705573969368, "grad_norm": 2.840081122824675, "learning_rate": 2.693608771925552e-06, "loss": 0.5274, "step": 3826 }, { "epoch": 0.6632438638677671, "grad_norm": 3.0108694531908893, "learning_rate": 2.6911186106130616e-06, "loss": 0.5889, "step": 3827 }, { "epoch": 0.6634171703385975, "grad_norm": 2.4170862243264177, "learning_rate": 2.6886291769897977e-06, "loss": 0.5293, "step": 3828 }, { "epoch": 0.6635904768094278, "grad_norm": 2.928786306514627, "learning_rate": 2.686140471840355e-06, "loss": 0.544, "step": 3829 }, { "epoch": 0.6637637832802582, "grad_norm": 3.5961839980726307, "learning_rate": 2.6836524959490966e-06, "loss": 0.4367, "step": 3830 }, { "epoch": 0.6639370897510886, "grad_norm": 2.466718819732394, "learning_rate": 2.6811652501001606e-06, "loss": 0.5448, "step": 3831 }, { "epoch": 0.6641103962219189, "grad_norm": 2.533492221781685, "learning_rate": 2.678678735077446e-06, "loss": 0.5352, "step": 3832 }, { "epoch": 0.6642837026927493, "grad_norm": 3.161644493955028, "learning_rate": 2.676192951664628e-06, "loss": 0.4923, "step": 3833 }, { "epoch": 0.6644570091635796, "grad_norm": 2.5785281735650125, "learning_rate": 2.673707900645149e-06, "loss": 0.5697, "step": 3834 }, { "epoch": 0.66463031563441, "grad_norm": 2.600939075916985, "learning_rate": 2.671223582802227e-06, "loss": 0.5881, "step": 3835 }, { "epoch": 0.6648036221052404, "grad_norm": 2.806086757180733, "learning_rate": 2.668739998918839e-06, "loss": 0.5507, "step": 3836 }, { "epoch": 0.6649769285760707, "grad_norm": 2.9365631398053043, "learning_rate": 2.666257149777737e-06, "loss": 0.6035, "step": 3837 }, { "epoch": 0.6651502350469011, "grad_norm": 2.709282284635718, "learning_rate": 2.663775036161437e-06, "loss": 0.5287, "step": 3838 }, { "epoch": 0.6653235415177314, "grad_norm": 2.602004543876505, "learning_rate": 2.6612936588522286e-06, "loss": 0.5859, "step": 3839 }, { "epoch": 0.6654968479885618, "grad_norm": 2.6528159201741586, "learning_rate": 2.6588130186321655e-06, "loss": 0.5525, "step": 3840 }, { "epoch": 0.6656701544593921, "grad_norm": 2.656330292699886, "learning_rate": 2.6563331162830692e-06, "loss": 0.5296, "step": 3841 }, { "epoch": 0.6658434609302225, "grad_norm": 2.861186748148849, "learning_rate": 2.6538539525865326e-06, "loss": 0.4516, "step": 3842 }, { "epoch": 0.6660167674010529, "grad_norm": 4.732541692896969, "learning_rate": 2.6513755283239083e-06, "loss": 0.4238, "step": 3843 }, { "epoch": 0.6661900738718832, "grad_norm": 2.043884904319064, "learning_rate": 2.6488978442763224e-06, "loss": 0.5397, "step": 3844 }, { "epoch": 0.6663633803427136, "grad_norm": 2.5163751228379385, "learning_rate": 2.6464209012246654e-06, "loss": 0.5229, "step": 3845 }, { "epoch": 0.6665366868135439, "grad_norm": 4.009080493438624, "learning_rate": 2.6439446999495953e-06, "loss": 0.6132, "step": 3846 }, { "epoch": 0.6667099932843743, "grad_norm": 4.2601861706879935, "learning_rate": 2.6414692412315345e-06, "loss": 0.5011, "step": 3847 }, { "epoch": 0.6668832997552047, "grad_norm": 2.408902713245886, "learning_rate": 2.6389945258506722e-06, "loss": 0.5883, "step": 3848 }, { "epoch": 0.6670566062260349, "grad_norm": 2.78010713198286, "learning_rate": 2.636520554586962e-06, "loss": 0.5319, "step": 3849 }, { "epoch": 0.6672299126968653, "grad_norm": 2.730875104728211, "learning_rate": 2.634047328220129e-06, "loss": 0.4945, "step": 3850 }, { "epoch": 0.6674032191676956, "grad_norm": 2.9799268878193375, "learning_rate": 2.6315748475296555e-06, "loss": 0.5546, "step": 3851 }, { "epoch": 0.667576525638526, "grad_norm": 2.0888574932994417, "learning_rate": 2.629103113294795e-06, "loss": 0.5196, "step": 3852 }, { "epoch": 0.6677498321093563, "grad_norm": 3.0212063768257313, "learning_rate": 2.6266321262945606e-06, "loss": 0.4792, "step": 3853 }, { "epoch": 0.6679231385801867, "grad_norm": 2.744689904467122, "learning_rate": 2.6241618873077314e-06, "loss": 0.6546, "step": 3854 }, { "epoch": 0.6680964450510171, "grad_norm": 2.714380416850148, "learning_rate": 2.621692397112853e-06, "loss": 0.6094, "step": 3855 }, { "epoch": 0.6682697515218474, "grad_norm": 2.3875737725021007, "learning_rate": 2.6192236564882333e-06, "loss": 0.6089, "step": 3856 }, { "epoch": 0.6684430579926778, "grad_norm": 2.2073849114369475, "learning_rate": 2.6167556662119454e-06, "loss": 0.5544, "step": 3857 }, { "epoch": 0.6686163644635081, "grad_norm": 2.788327898912301, "learning_rate": 2.6142884270618234e-06, "loss": 0.5781, "step": 3858 }, { "epoch": 0.6687896709343385, "grad_norm": 1.957173941534345, "learning_rate": 2.611821939815467e-06, "loss": 0.5652, "step": 3859 }, { "epoch": 0.6689629774051689, "grad_norm": 2.2085818908649233, "learning_rate": 2.6093562052502375e-06, "loss": 0.4547, "step": 3860 }, { "epoch": 0.6691362838759992, "grad_norm": 2.9583191967729854, "learning_rate": 2.60689122414326e-06, "loss": 0.5097, "step": 3861 }, { "epoch": 0.6693095903468296, "grad_norm": 2.2653743521088123, "learning_rate": 2.6044269972714207e-06, "loss": 0.5304, "step": 3862 }, { "epoch": 0.6694828968176599, "grad_norm": 2.3793128003605455, "learning_rate": 2.601963525411371e-06, "loss": 0.5063, "step": 3863 }, { "epoch": 0.6696562032884903, "grad_norm": 2.687713950433398, "learning_rate": 2.5995008093395197e-06, "loss": 0.5122, "step": 3864 }, { "epoch": 0.6698295097593207, "grad_norm": 3.0872662864998754, "learning_rate": 2.5970388498320414e-06, "loss": 0.601, "step": 3865 }, { "epoch": 0.670002816230151, "grad_norm": 3.7266126012460803, "learning_rate": 2.594577647664871e-06, "loss": 0.5279, "step": 3866 }, { "epoch": 0.6701761227009814, "grad_norm": 4.5510171347435, "learning_rate": 2.592117203613704e-06, "loss": 0.5932, "step": 3867 }, { "epoch": 0.6703494291718117, "grad_norm": 1.7938437830215914, "learning_rate": 2.5896575184539984e-06, "loss": 0.4526, "step": 3868 }, { "epoch": 0.6705227356426421, "grad_norm": 2.2980353808845173, "learning_rate": 2.5871985929609724e-06, "loss": 0.4172, "step": 3869 }, { "epoch": 0.6706960421134724, "grad_norm": 2.470794544225552, "learning_rate": 2.5847404279096026e-06, "loss": 0.4965, "step": 3870 }, { "epoch": 0.6708693485843028, "grad_norm": 2.656347930268453, "learning_rate": 2.58228302407463e-06, "loss": 0.5328, "step": 3871 }, { "epoch": 0.6710426550551332, "grad_norm": 2.75073712216989, "learning_rate": 2.5798263822305527e-06, "loss": 0.5136, "step": 3872 }, { "epoch": 0.6712159615259635, "grad_norm": 2.4766857422031, "learning_rate": 2.577370503151631e-06, "loss": 0.547, "step": 3873 }, { "epoch": 0.6713892679967939, "grad_norm": 4.1847132884283615, "learning_rate": 2.5749153876118802e-06, "loss": 0.5018, "step": 3874 }, { "epoch": 0.6715625744676242, "grad_norm": 3.0269016263050066, "learning_rate": 2.572461036385079e-06, "loss": 0.6156, "step": 3875 }, { "epoch": 0.6717358809384545, "grad_norm": 2.51491852103409, "learning_rate": 2.570007450244766e-06, "loss": 0.5544, "step": 3876 }, { "epoch": 0.6719091874092848, "grad_norm": 2.6348833654852535, "learning_rate": 2.5675546299642363e-06, "loss": 0.5959, "step": 3877 }, { "epoch": 0.6720824938801152, "grad_norm": 3.0630904471691944, "learning_rate": 2.5651025763165437e-06, "loss": 0.547, "step": 3878 }, { "epoch": 0.6722558003509456, "grad_norm": 2.5856817682095268, "learning_rate": 2.5626512900745022e-06, "loss": 0.5227, "step": 3879 }, { "epoch": 0.6724291068217759, "grad_norm": 2.2708043952918007, "learning_rate": 2.5602007720106814e-06, "loss": 0.5109, "step": 3880 }, { "epoch": 0.6726024132926063, "grad_norm": 3.4642813715207184, "learning_rate": 2.5577510228974105e-06, "loss": 0.5793, "step": 3881 }, { "epoch": 0.6727757197634366, "grad_norm": 2.7740083095168213, "learning_rate": 2.5553020435067767e-06, "loss": 0.5421, "step": 3882 }, { "epoch": 0.672949026234267, "grad_norm": 2.5275945079439883, "learning_rate": 2.5528538346106245e-06, "loss": 0.5693, "step": 3883 }, { "epoch": 0.6731223327050974, "grad_norm": 2.271621730761273, "learning_rate": 2.5504063969805517e-06, "loss": 0.5662, "step": 3884 }, { "epoch": 0.6732956391759277, "grad_norm": 2.3482448360602466, "learning_rate": 2.547959731387918e-06, "loss": 0.4695, "step": 3885 }, { "epoch": 0.6734689456467581, "grad_norm": 2.1176988489145674, "learning_rate": 2.545513838603839e-06, "loss": 0.5445, "step": 3886 }, { "epoch": 0.6736422521175884, "grad_norm": 1.9763683502579144, "learning_rate": 2.543068719399184e-06, "loss": 0.5002, "step": 3887 }, { "epoch": 0.6738155585884188, "grad_norm": 2.7838345891482024, "learning_rate": 2.540624374544581e-06, "loss": 0.4721, "step": 3888 }, { "epoch": 0.6739888650592492, "grad_norm": 3.0151789370044177, "learning_rate": 2.5381808048104144e-06, "loss": 0.5201, "step": 3889 }, { "epoch": 0.6741621715300795, "grad_norm": 2.9776797030565847, "learning_rate": 2.5357380109668197e-06, "loss": 0.5317, "step": 3890 }, { "epoch": 0.6743354780009099, "grad_norm": 2.4377121692697603, "learning_rate": 2.5332959937836953e-06, "loss": 0.5904, "step": 3891 }, { "epoch": 0.6745087844717402, "grad_norm": 2.2158722937630104, "learning_rate": 2.5308547540306895e-06, "loss": 0.5949, "step": 3892 }, { "epoch": 0.6746820909425706, "grad_norm": 3.581732898630699, "learning_rate": 2.528414292477208e-06, "loss": 0.4855, "step": 3893 }, { "epoch": 0.674855397413401, "grad_norm": 2.46333216638239, "learning_rate": 2.5259746098924065e-06, "loss": 0.5225, "step": 3894 }, { "epoch": 0.6750287038842313, "grad_norm": 2.3201202006204094, "learning_rate": 2.5235357070452016e-06, "loss": 0.5122, "step": 3895 }, { "epoch": 0.6752020103550617, "grad_norm": 3.044245560996254, "learning_rate": 2.52109758470426e-06, "loss": 0.5947, "step": 3896 }, { "epoch": 0.675375316825892, "grad_norm": 2.599065182153735, "learning_rate": 2.5186602436380047e-06, "loss": 0.5407, "step": 3897 }, { "epoch": 0.6755486232967224, "grad_norm": 2.707607758072538, "learning_rate": 2.5162236846146106e-06, "loss": 0.5372, "step": 3898 }, { "epoch": 0.6757219297675527, "grad_norm": 2.527297822384399, "learning_rate": 2.513787908402008e-06, "loss": 0.5233, "step": 3899 }, { "epoch": 0.6758952362383831, "grad_norm": 2.7334469292418717, "learning_rate": 2.511352915767879e-06, "loss": 0.5087, "step": 3900 }, { "epoch": 0.6760685427092135, "grad_norm": 2.3075052168231545, "learning_rate": 2.5089187074796595e-06, "loss": 0.5518, "step": 3901 }, { "epoch": 0.6762418491800437, "grad_norm": 2.0535085497262804, "learning_rate": 2.506485284304537e-06, "loss": 0.5773, "step": 3902 }, { "epoch": 0.6764151556508741, "grad_norm": 2.4994680711057393, "learning_rate": 2.5040526470094543e-06, "loss": 0.5249, "step": 3903 }, { "epoch": 0.6765884621217044, "grad_norm": 2.198312872228593, "learning_rate": 2.501620796361104e-06, "loss": 0.5386, "step": 3904 }, { "epoch": 0.6767617685925348, "grad_norm": 2.399733176356959, "learning_rate": 2.49918973312593e-06, "loss": 0.515, "step": 3905 }, { "epoch": 0.6769350750633651, "grad_norm": 2.373728824383019, "learning_rate": 2.4967594580701323e-06, "loss": 0.5746, "step": 3906 }, { "epoch": 0.6771083815341955, "grad_norm": 2.2108189228062516, "learning_rate": 2.494329971959657e-06, "loss": 0.5408, "step": 3907 }, { "epoch": 0.6772816880050259, "grad_norm": 2.3234244955002388, "learning_rate": 2.491901275560206e-06, "loss": 0.5786, "step": 3908 }, { "epoch": 0.6774549944758562, "grad_norm": 2.640436727019385, "learning_rate": 2.4894733696372296e-06, "loss": 0.5648, "step": 3909 }, { "epoch": 0.6776283009466866, "grad_norm": 2.4485762833883853, "learning_rate": 2.487046254955931e-06, "loss": 0.5121, "step": 3910 }, { "epoch": 0.6778016074175169, "grad_norm": 2.7898005393462184, "learning_rate": 2.484619932281262e-06, "loss": 0.5406, "step": 3911 }, { "epoch": 0.6779749138883473, "grad_norm": 2.39249968223699, "learning_rate": 2.482194402377926e-06, "loss": 0.5537, "step": 3912 }, { "epoch": 0.6781482203591777, "grad_norm": 2.1710661369256066, "learning_rate": 2.4797696660103764e-06, "loss": 0.5035, "step": 3913 }, { "epoch": 0.678321526830008, "grad_norm": 2.4912612586898435, "learning_rate": 2.4773457239428183e-06, "loss": 0.4578, "step": 3914 }, { "epoch": 0.6784948333008384, "grad_norm": 2.292849460311659, "learning_rate": 2.474922576939201e-06, "loss": 0.5276, "step": 3915 }, { "epoch": 0.6786681397716687, "grad_norm": 2.5798834518219236, "learning_rate": 2.472500225763226e-06, "loss": 0.5481, "step": 3916 }, { "epoch": 0.6788414462424991, "grad_norm": 2.3073761074995733, "learning_rate": 2.4700786711783497e-06, "loss": 0.5275, "step": 3917 }, { "epoch": 0.6790147527133295, "grad_norm": 2.8739371568433083, "learning_rate": 2.4676579139477695e-06, "loss": 0.6276, "step": 3918 }, { "epoch": 0.6791880591841598, "grad_norm": 3.189802140034979, "learning_rate": 2.465237954834436e-06, "loss": 0.5496, "step": 3919 }, { "epoch": 0.6793613656549902, "grad_norm": 2.2695765016791065, "learning_rate": 2.4628187946010446e-06, "loss": 0.5449, "step": 3920 }, { "epoch": 0.6795346721258205, "grad_norm": 3.3793905509449025, "learning_rate": 2.4604004340100423e-06, "loss": 0.5945, "step": 3921 }, { "epoch": 0.6797079785966509, "grad_norm": 2.2890527245365093, "learning_rate": 2.4579828738236227e-06, "loss": 0.5382, "step": 3922 }, { "epoch": 0.6798812850674812, "grad_norm": 2.392205421118625, "learning_rate": 2.4555661148037275e-06, "loss": 0.5271, "step": 3923 }, { "epoch": 0.6800545915383116, "grad_norm": 2.789194271992372, "learning_rate": 2.4531501577120477e-06, "loss": 0.4932, "step": 3924 }, { "epoch": 0.680227898009142, "grad_norm": 2.59791355136209, "learning_rate": 2.4507350033100146e-06, "loss": 0.5054, "step": 3925 }, { "epoch": 0.6804012044799723, "grad_norm": 2.5065955185714257, "learning_rate": 2.448320652358815e-06, "loss": 0.6017, "step": 3926 }, { "epoch": 0.6805745109508027, "grad_norm": 2.431436746481012, "learning_rate": 2.445907105619378e-06, "loss": 0.5109, "step": 3927 }, { "epoch": 0.6807478174216329, "grad_norm": 3.1758045659056617, "learning_rate": 2.4434943638523794e-06, "loss": 0.5375, "step": 3928 }, { "epoch": 0.6809211238924633, "grad_norm": 3.1091868788005934, "learning_rate": 2.4410824278182437e-06, "loss": 0.5438, "step": 3929 }, { "epoch": 0.6810944303632936, "grad_norm": 3.43889270751829, "learning_rate": 2.4386712982771364e-06, "loss": 0.6205, "step": 3930 }, { "epoch": 0.681267736834124, "grad_norm": 2.38978648379193, "learning_rate": 2.4362609759889777e-06, "loss": 0.4749, "step": 3931 }, { "epoch": 0.6814410433049544, "grad_norm": 4.066406570562065, "learning_rate": 2.4338514617134246e-06, "loss": 0.6029, "step": 3932 }, { "epoch": 0.6816143497757847, "grad_norm": 2.4776044076362296, "learning_rate": 2.431442756209883e-06, "loss": 0.4804, "step": 3933 }, { "epoch": 0.6817876562466151, "grad_norm": 3.407214682441314, "learning_rate": 2.4290348602375065e-06, "loss": 0.5236, "step": 3934 }, { "epoch": 0.6819609627174454, "grad_norm": 2.7759849118974502, "learning_rate": 2.426627774555185e-06, "loss": 0.5856, "step": 3935 }, { "epoch": 0.6821342691882758, "grad_norm": 3.5456038363419493, "learning_rate": 2.424221499921563e-06, "loss": 0.5575, "step": 3936 }, { "epoch": 0.6823075756591062, "grad_norm": 2.5893299049752834, "learning_rate": 2.4218160370950234e-06, "loss": 0.5115, "step": 3937 }, { "epoch": 0.6824808821299365, "grad_norm": 2.4767827879017, "learning_rate": 2.419411386833696e-06, "loss": 0.5133, "step": 3938 }, { "epoch": 0.6826541886007669, "grad_norm": 3.083414954580299, "learning_rate": 2.417007549895453e-06, "loss": 0.6195, "step": 3939 }, { "epoch": 0.6828274950715972, "grad_norm": 2.6997048634824794, "learning_rate": 2.41460452703791e-06, "loss": 0.5418, "step": 3940 }, { "epoch": 0.6830008015424276, "grad_norm": 2.3136155158541207, "learning_rate": 2.412202319018427e-06, "loss": 0.6239, "step": 3941 }, { "epoch": 0.683174108013258, "grad_norm": 2.82898370718263, "learning_rate": 2.4098009265941084e-06, "loss": 0.4755, "step": 3942 }, { "epoch": 0.6833474144840883, "grad_norm": 2.2998008517877464, "learning_rate": 2.407400350521799e-06, "loss": 0.56, "step": 3943 }, { "epoch": 0.6835207209549187, "grad_norm": 3.5747100163811862, "learning_rate": 2.4050005915580875e-06, "loss": 0.5672, "step": 3944 }, { "epoch": 0.683694027425749, "grad_norm": 2.3267283556288474, "learning_rate": 2.402601650459305e-06, "loss": 0.539, "step": 3945 }, { "epoch": 0.6838673338965794, "grad_norm": 2.9233577457798225, "learning_rate": 2.4002035279815245e-06, "loss": 0.5568, "step": 3946 }, { "epoch": 0.6840406403674097, "grad_norm": 2.542800555514162, "learning_rate": 2.3978062248805618e-06, "loss": 0.4773, "step": 3947 }, { "epoch": 0.6842139468382401, "grad_norm": 2.474109386980598, "learning_rate": 2.3954097419119737e-06, "loss": 0.4643, "step": 3948 }, { "epoch": 0.6843872533090705, "grad_norm": 2.4255024958148974, "learning_rate": 2.3930140798310585e-06, "loss": 0.58, "step": 3949 }, { "epoch": 0.6845605597799008, "grad_norm": 2.167271292398908, "learning_rate": 2.390619239392857e-06, "loss": 0.5357, "step": 3950 }, { "epoch": 0.6847338662507312, "grad_norm": 2.878236660744266, "learning_rate": 2.3882252213521495e-06, "loss": 0.4842, "step": 3951 }, { "epoch": 0.6849071727215615, "grad_norm": 5.295553483683232, "learning_rate": 2.385832026463457e-06, "loss": 0.4909, "step": 3952 }, { "epoch": 0.6850804791923919, "grad_norm": 2.7923219863192834, "learning_rate": 2.383439655481043e-06, "loss": 0.6066, "step": 3953 }, { "epoch": 0.6852537856632221, "grad_norm": 2.3835764695105413, "learning_rate": 2.38104810915891e-06, "loss": 0.5026, "step": 3954 }, { "epoch": 0.6854270921340525, "grad_norm": 2.7769007618660013, "learning_rate": 2.378657388250801e-06, "loss": 0.5475, "step": 3955 }, { "epoch": 0.6856003986048829, "grad_norm": 2.238968547874418, "learning_rate": 2.376267493510195e-06, "loss": 0.4895, "step": 3956 }, { "epoch": 0.6857737050757132, "grad_norm": 2.0122620460039644, "learning_rate": 2.373878425690319e-06, "loss": 0.5841, "step": 3957 }, { "epoch": 0.6859470115465436, "grad_norm": 2.7315244810421078, "learning_rate": 2.3714901855441326e-06, "loss": 0.61, "step": 3958 }, { "epoch": 0.6861203180173739, "grad_norm": 2.9733790580766746, "learning_rate": 2.369102773824336e-06, "loss": 0.5775, "step": 3959 }, { "epoch": 0.6862936244882043, "grad_norm": 2.5903443034745797, "learning_rate": 2.366716191283369e-06, "loss": 0.3988, "step": 3960 }, { "epoch": 0.6864669309590347, "grad_norm": 2.8181419269963994, "learning_rate": 2.364330438673411e-06, "loss": 0.488, "step": 3961 }, { "epoch": 0.686640237429865, "grad_norm": 2.331144034076384, "learning_rate": 2.361945516746377e-06, "loss": 0.5615, "step": 3962 }, { "epoch": 0.6868135439006954, "grad_norm": 1.8994577335371812, "learning_rate": 2.3595614262539234e-06, "loss": 0.508, "step": 3963 }, { "epoch": 0.6869868503715257, "grad_norm": 2.698514845625854, "learning_rate": 2.3571781679474425e-06, "loss": 0.5227, "step": 3964 }, { "epoch": 0.6871601568423561, "grad_norm": 2.038359696407092, "learning_rate": 2.3547957425780664e-06, "loss": 0.519, "step": 3965 }, { "epoch": 0.6873334633131865, "grad_norm": 2.7303770293473106, "learning_rate": 2.3524141508966598e-06, "loss": 0.4982, "step": 3966 }, { "epoch": 0.6875067697840168, "grad_norm": 2.4178186919691815, "learning_rate": 2.3500333936538292e-06, "loss": 0.4728, "step": 3967 }, { "epoch": 0.6876800762548472, "grad_norm": 2.295544488547329, "learning_rate": 2.3476534715999177e-06, "loss": 0.5259, "step": 3968 }, { "epoch": 0.6878533827256775, "grad_norm": 2.5421927691688397, "learning_rate": 2.3452743854850042e-06, "loss": 0.4896, "step": 3969 }, { "epoch": 0.6880266891965079, "grad_norm": 2.825778210747383, "learning_rate": 2.3428961360589044e-06, "loss": 0.5473, "step": 3970 }, { "epoch": 0.6881999956673382, "grad_norm": 2.489867761114689, "learning_rate": 2.3405187240711673e-06, "loss": 0.5097, "step": 3971 }, { "epoch": 0.6883733021381686, "grad_norm": 2.2122096549097043, "learning_rate": 2.3381421502710865e-06, "loss": 0.4848, "step": 3972 }, { "epoch": 0.688546608608999, "grad_norm": 3.4123022552516185, "learning_rate": 2.3357664154076824e-06, "loss": 0.5686, "step": 3973 }, { "epoch": 0.6887199150798293, "grad_norm": 2.0986779505496442, "learning_rate": 2.3333915202297155e-06, "loss": 0.5591, "step": 3974 }, { "epoch": 0.6888932215506597, "grad_norm": 2.168133078733312, "learning_rate": 2.3310174654856814e-06, "loss": 0.5005, "step": 3975 }, { "epoch": 0.68906652802149, "grad_norm": 3.077970026922439, "learning_rate": 2.3286442519238073e-06, "loss": 0.5295, "step": 3976 }, { "epoch": 0.6892398344923204, "grad_norm": 3.016459548658323, "learning_rate": 2.326271880292059e-06, "loss": 0.5993, "step": 3977 }, { "epoch": 0.6894131409631508, "grad_norm": 2.4742005413491417, "learning_rate": 2.323900351338136e-06, "loss": 0.4741, "step": 3978 }, { "epoch": 0.6895864474339811, "grad_norm": 2.590142305213227, "learning_rate": 2.321529665809472e-06, "loss": 0.4656, "step": 3979 }, { "epoch": 0.6897597539048114, "grad_norm": 2.5526411888771094, "learning_rate": 2.3191598244532364e-06, "loss": 0.5986, "step": 3980 }, { "epoch": 0.6899330603756417, "grad_norm": 2.792708515549679, "learning_rate": 2.3167908280163293e-06, "loss": 0.562, "step": 3981 }, { "epoch": 0.6901063668464721, "grad_norm": 2.4830215358341605, "learning_rate": 2.314422677245388e-06, "loss": 0.5913, "step": 3982 }, { "epoch": 0.6902796733173024, "grad_norm": 2.3319477314583428, "learning_rate": 2.31205537288678e-06, "loss": 0.4859, "step": 3983 }, { "epoch": 0.6904529797881328, "grad_norm": 2.7281475139697964, "learning_rate": 2.3096889156866085e-06, "loss": 0.5943, "step": 3984 }, { "epoch": 0.6906262862589632, "grad_norm": 2.099945295970197, "learning_rate": 2.3073233063907096e-06, "loss": 0.549, "step": 3985 }, { "epoch": 0.6907995927297935, "grad_norm": 2.4261918055834477, "learning_rate": 2.3049585457446498e-06, "loss": 0.5373, "step": 3986 }, { "epoch": 0.6909728992006239, "grad_norm": 2.799861459373528, "learning_rate": 2.3025946344937317e-06, "loss": 0.5834, "step": 3987 }, { "epoch": 0.6911462056714542, "grad_norm": 2.77265088602916, "learning_rate": 2.3002315733829865e-06, "loss": 0.5479, "step": 3988 }, { "epoch": 0.6913195121422846, "grad_norm": 2.29507033283124, "learning_rate": 2.29786936315718e-06, "loss": 0.51, "step": 3989 }, { "epoch": 0.691492818613115, "grad_norm": 2.18258272435562, "learning_rate": 2.295508004560809e-06, "loss": 0.5309, "step": 3990 }, { "epoch": 0.6916661250839453, "grad_norm": 2.2758206060487107, "learning_rate": 2.2931474983381024e-06, "loss": 0.4488, "step": 3991 }, { "epoch": 0.6918394315547757, "grad_norm": 2.7806222083750174, "learning_rate": 2.290787845233019e-06, "loss": 0.5182, "step": 3992 }, { "epoch": 0.692012738025606, "grad_norm": 3.8075334938535406, "learning_rate": 2.2884290459892504e-06, "loss": 0.4743, "step": 3993 }, { "epoch": 0.6921860444964364, "grad_norm": 2.350667198118308, "learning_rate": 2.286071101350219e-06, "loss": 0.5538, "step": 3994 }, { "epoch": 0.6923593509672668, "grad_norm": 2.3689159408500053, "learning_rate": 2.283714012059076e-06, "loss": 0.5117, "step": 3995 }, { "epoch": 0.6925326574380971, "grad_norm": 2.941053169416073, "learning_rate": 2.2813577788587066e-06, "loss": 0.5058, "step": 3996 }, { "epoch": 0.6927059639089275, "grad_norm": 2.437347958872887, "learning_rate": 2.279002402491719e-06, "loss": 0.5764, "step": 3997 }, { "epoch": 0.6928792703797578, "grad_norm": 2.477652828786014, "learning_rate": 2.2766478837004608e-06, "loss": 0.5372, "step": 3998 }, { "epoch": 0.6930525768505882, "grad_norm": 2.2209743545738605, "learning_rate": 2.274294223227004e-06, "loss": 0.4966, "step": 3999 }, { "epoch": 0.6932258833214185, "grad_norm": 2.247062584991952, "learning_rate": 2.271941421813149e-06, "loss": 0.4875, "step": 4000 }, { "epoch": 0.6933991897922489, "grad_norm": 2.59277625484054, "learning_rate": 2.2695894802004294e-06, "loss": 0.5377, "step": 4001 }, { "epoch": 0.6935724962630793, "grad_norm": 2.356775942473531, "learning_rate": 2.2672383991301044e-06, "loss": 0.4953, "step": 4002 }, { "epoch": 0.6937458027339096, "grad_norm": 2.1695568115306783, "learning_rate": 2.2648881793431637e-06, "loss": 0.5607, "step": 4003 }, { "epoch": 0.69391910920474, "grad_norm": 2.468887177409443, "learning_rate": 2.262538821580325e-06, "loss": 0.607, "step": 4004 }, { "epoch": 0.6940924156755703, "grad_norm": 2.7903958004169125, "learning_rate": 2.2601903265820347e-06, "loss": 0.4746, "step": 4005 }, { "epoch": 0.6942657221464006, "grad_norm": 3.078187652963807, "learning_rate": 2.2578426950884686e-06, "loss": 0.598, "step": 4006 }, { "epoch": 0.694439028617231, "grad_norm": 2.2256151254727166, "learning_rate": 2.255495927839525e-06, "loss": 0.4447, "step": 4007 }, { "epoch": 0.6946123350880613, "grad_norm": 2.933330032223314, "learning_rate": 2.253150025574836e-06, "loss": 0.5779, "step": 4008 }, { "epoch": 0.6947856415588917, "grad_norm": 2.746808747307189, "learning_rate": 2.250804989033758e-06, "loss": 0.5796, "step": 4009 }, { "epoch": 0.694958948029722, "grad_norm": 3.1700374755293956, "learning_rate": 2.2484608189553758e-06, "loss": 0.5611, "step": 4010 }, { "epoch": 0.6951322545005524, "grad_norm": 6.159865130172189, "learning_rate": 2.2461175160785005e-06, "loss": 0.5808, "step": 4011 }, { "epoch": 0.6953055609713827, "grad_norm": 3.5402986710048903, "learning_rate": 2.243775081141667e-06, "loss": 0.5246, "step": 4012 }, { "epoch": 0.6954788674422131, "grad_norm": 2.5294544374853656, "learning_rate": 2.241433514883145e-06, "loss": 0.5083, "step": 4013 }, { "epoch": 0.6956521739130435, "grad_norm": 4.05407938963096, "learning_rate": 2.239092818040922e-06, "loss": 0.4745, "step": 4014 }, { "epoch": 0.6958254803838738, "grad_norm": 2.066575900020613, "learning_rate": 2.236752991352715e-06, "loss": 0.5236, "step": 4015 }, { "epoch": 0.6959987868547042, "grad_norm": 2.1668342901920163, "learning_rate": 2.2344140355559657e-06, "loss": 0.5254, "step": 4016 }, { "epoch": 0.6961720933255345, "grad_norm": 2.7043923724354793, "learning_rate": 2.2320759513878444e-06, "loss": 0.515, "step": 4017 }, { "epoch": 0.6963453997963649, "grad_norm": 2.5634895999425997, "learning_rate": 2.22973873958524e-06, "loss": 0.5483, "step": 4018 }, { "epoch": 0.6965187062671953, "grad_norm": 2.6300056296948715, "learning_rate": 2.227402400884773e-06, "loss": 0.5699, "step": 4019 }, { "epoch": 0.6966920127380256, "grad_norm": 1.9779643928072055, "learning_rate": 2.225066936022785e-06, "loss": 0.5439, "step": 4020 }, { "epoch": 0.696865319208856, "grad_norm": 1.9848319933313998, "learning_rate": 2.222732345735345e-06, "loss": 0.4507, "step": 4021 }, { "epoch": 0.6970386256796863, "grad_norm": 21.186918278438018, "learning_rate": 2.2203986307582443e-06, "loss": 0.6007, "step": 4022 }, { "epoch": 0.6972119321505167, "grad_norm": 2.7812734314295255, "learning_rate": 2.2180657918269994e-06, "loss": 0.4572, "step": 4023 }, { "epoch": 0.697385238621347, "grad_norm": 2.301008503676595, "learning_rate": 2.2157338296768492e-06, "loss": 0.5934, "step": 4024 }, { "epoch": 0.6975585450921774, "grad_norm": 4.56589646548941, "learning_rate": 2.213402745042758e-06, "loss": 0.5711, "step": 4025 }, { "epoch": 0.6977318515630078, "grad_norm": 2.4212825000809994, "learning_rate": 2.2110725386594105e-06, "loss": 0.5841, "step": 4026 }, { "epoch": 0.6979051580338381, "grad_norm": 2.8244821703306324, "learning_rate": 2.2087432112612235e-06, "loss": 0.5902, "step": 4027 }, { "epoch": 0.6980784645046685, "grad_norm": 2.1143131908899546, "learning_rate": 2.2064147635823233e-06, "loss": 0.4725, "step": 4028 }, { "epoch": 0.6982517709754988, "grad_norm": 2.1873784382514936, "learning_rate": 2.204087196356569e-06, "loss": 0.5914, "step": 4029 }, { "epoch": 0.6984250774463292, "grad_norm": 2.487860805065025, "learning_rate": 2.2017605103175378e-06, "loss": 0.5394, "step": 4030 }, { "epoch": 0.6985983839171596, "grad_norm": 2.5766649358181484, "learning_rate": 2.199434706198531e-06, "loss": 0.5653, "step": 4031 }, { "epoch": 0.6987716903879898, "grad_norm": 3.0355231890127614, "learning_rate": 2.1971097847325717e-06, "loss": 0.5002, "step": 4032 }, { "epoch": 0.6989449968588202, "grad_norm": 3.3234390269413163, "learning_rate": 2.1947857466524037e-06, "loss": 0.4956, "step": 4033 }, { "epoch": 0.6991183033296505, "grad_norm": 3.1117122246416615, "learning_rate": 2.1924625926904936e-06, "loss": 0.5779, "step": 4034 }, { "epoch": 0.6992916098004809, "grad_norm": 2.2496278927844435, "learning_rate": 2.1901403235790286e-06, "loss": 0.5696, "step": 4035 }, { "epoch": 0.6994649162713112, "grad_norm": 1.9317893323226414, "learning_rate": 2.187818940049918e-06, "loss": 0.5121, "step": 4036 }, { "epoch": 0.6996382227421416, "grad_norm": 3.028687636935137, "learning_rate": 2.185498442834791e-06, "loss": 0.5531, "step": 4037 }, { "epoch": 0.699811529212972, "grad_norm": 2.4583409090339634, "learning_rate": 2.183178832664997e-06, "loss": 0.4463, "step": 4038 }, { "epoch": 0.6999848356838023, "grad_norm": 2.5638015936218097, "learning_rate": 2.1808601102716076e-06, "loss": 0.5527, "step": 4039 }, { "epoch": 0.7001581421546327, "grad_norm": 2.2599450794573257, "learning_rate": 2.1785422763854136e-06, "loss": 0.5068, "step": 4040 }, { "epoch": 0.700331448625463, "grad_norm": 2.424415809314955, "learning_rate": 2.176225331736925e-06, "loss": 0.5462, "step": 4041 }, { "epoch": 0.7005047550962934, "grad_norm": 3.3772533404262375, "learning_rate": 2.1739092770563736e-06, "loss": 0.5573, "step": 4042 }, { "epoch": 0.7006780615671238, "grad_norm": 2.890997313245369, "learning_rate": 2.1715941130737078e-06, "loss": 0.521, "step": 4043 }, { "epoch": 0.7008513680379541, "grad_norm": 2.1836484277085995, "learning_rate": 2.1692798405185983e-06, "loss": 0.4465, "step": 4044 }, { "epoch": 0.7010246745087845, "grad_norm": 2.680148987626085, "learning_rate": 2.1669664601204325e-06, "loss": 0.5526, "step": 4045 }, { "epoch": 0.7011979809796148, "grad_norm": 2.2684229502679147, "learning_rate": 2.1646539726083184e-06, "loss": 0.4986, "step": 4046 }, { "epoch": 0.7013712874504452, "grad_norm": 2.192320241667633, "learning_rate": 2.1623423787110827e-06, "loss": 0.4699, "step": 4047 }, { "epoch": 0.7015445939212755, "grad_norm": 2.1574624108199068, "learning_rate": 2.160031679157267e-06, "loss": 0.5105, "step": 4048 }, { "epoch": 0.7017179003921059, "grad_norm": 2.6305266222361126, "learning_rate": 2.157721874675134e-06, "loss": 0.49, "step": 4049 }, { "epoch": 0.7018912068629363, "grad_norm": 2.5749212952011638, "learning_rate": 2.1554129659926647e-06, "loss": 0.491, "step": 4050 }, { "epoch": 0.7020645133337666, "grad_norm": 2.587117005563362, "learning_rate": 2.153104953837557e-06, "loss": 0.5499, "step": 4051 }, { "epoch": 0.702237819804597, "grad_norm": 2.3291298546940986, "learning_rate": 2.150797838937224e-06, "loss": 0.5084, "step": 4052 }, { "epoch": 0.7024111262754273, "grad_norm": 2.24822067279429, "learning_rate": 2.148491622018802e-06, "loss": 0.5422, "step": 4053 }, { "epoch": 0.7025844327462577, "grad_norm": 2.2605474980861313, "learning_rate": 2.1461863038091395e-06, "loss": 0.4856, "step": 4054 }, { "epoch": 0.7027577392170881, "grad_norm": 3.379632198720846, "learning_rate": 2.143881885034801e-06, "loss": 0.5247, "step": 4055 }, { "epoch": 0.7029310456879184, "grad_norm": 2.712246263526474, "learning_rate": 2.14157836642207e-06, "loss": 0.4462, "step": 4056 }, { "epoch": 0.7031043521587488, "grad_norm": 2.3878768422489154, "learning_rate": 2.139275748696946e-06, "loss": 0.5208, "step": 4057 }, { "epoch": 0.703277658629579, "grad_norm": 2.2769798587814987, "learning_rate": 2.136974032585146e-06, "loss": 0.4884, "step": 4058 }, { "epoch": 0.7034509651004094, "grad_norm": 2.6086836959705217, "learning_rate": 2.134673218812096e-06, "loss": 0.5108, "step": 4059 }, { "epoch": 0.7036242715712397, "grad_norm": 2.7264872335438244, "learning_rate": 2.1323733081029457e-06, "loss": 0.5613, "step": 4060 }, { "epoch": 0.7037975780420701, "grad_norm": 2.2820188072554477, "learning_rate": 2.1300743011825568e-06, "loss": 0.5291, "step": 4061 }, { "epoch": 0.7039708845129005, "grad_norm": 2.686148044805482, "learning_rate": 2.1277761987755057e-06, "loss": 0.5005, "step": 4062 }, { "epoch": 0.7041441909837308, "grad_norm": 2.7590139999634697, "learning_rate": 2.1254790016060857e-06, "loss": 0.5905, "step": 4063 }, { "epoch": 0.7043174974545612, "grad_norm": 2.860033980369294, "learning_rate": 2.123182710398302e-06, "loss": 0.5162, "step": 4064 }, { "epoch": 0.7044908039253915, "grad_norm": 2.8842986749498722, "learning_rate": 2.1208873258758765e-06, "loss": 0.4893, "step": 4065 }, { "epoch": 0.7046641103962219, "grad_norm": 2.4753621292418595, "learning_rate": 2.118592848762245e-06, "loss": 0.4906, "step": 4066 }, { "epoch": 0.7048374168670523, "grad_norm": 2.4438891646117633, "learning_rate": 2.116299279780555e-06, "loss": 0.5713, "step": 4067 }, { "epoch": 0.7050107233378826, "grad_norm": 2.34294297640603, "learning_rate": 2.1140066196536748e-06, "loss": 0.5914, "step": 4068 }, { "epoch": 0.705184029808713, "grad_norm": 2.0250640647917106, "learning_rate": 2.1117148691041754e-06, "loss": 0.4976, "step": 4069 }, { "epoch": 0.7053573362795433, "grad_norm": 3.2097515359683317, "learning_rate": 2.10942402885435e-06, "loss": 0.5477, "step": 4070 }, { "epoch": 0.7055306427503737, "grad_norm": 2.676647058912372, "learning_rate": 2.1071340996262003e-06, "loss": 0.5353, "step": 4071 }, { "epoch": 0.705703949221204, "grad_norm": 3.8544272555956156, "learning_rate": 2.1048450821414435e-06, "loss": 0.5576, "step": 4072 }, { "epoch": 0.7058772556920344, "grad_norm": 2.547097183333202, "learning_rate": 2.102556977121508e-06, "loss": 0.4847, "step": 4073 }, { "epoch": 0.7060505621628648, "grad_norm": 3.661464048640137, "learning_rate": 2.1002697852875342e-06, "loss": 0.4969, "step": 4074 }, { "epoch": 0.7062238686336951, "grad_norm": 3.161108886633675, "learning_rate": 2.097983507360376e-06, "loss": 0.5322, "step": 4075 }, { "epoch": 0.7063971751045255, "grad_norm": 2.2138859980367624, "learning_rate": 2.0956981440605994e-06, "loss": 0.4476, "step": 4076 }, { "epoch": 0.7065704815753558, "grad_norm": 3.540144595236324, "learning_rate": 2.093413696108481e-06, "loss": 0.502, "step": 4077 }, { "epoch": 0.7067437880461862, "grad_norm": 2.3984318264540674, "learning_rate": 2.091130164224009e-06, "loss": 0.5066, "step": 4078 }, { "epoch": 0.7069170945170166, "grad_norm": 2.501911113882902, "learning_rate": 2.088847549126883e-06, "loss": 0.4806, "step": 4079 }, { "epoch": 0.7070904009878469, "grad_norm": 2.410075957312402, "learning_rate": 2.0865658515365144e-06, "loss": 0.5513, "step": 4080 }, { "epoch": 0.7072637074586773, "grad_norm": 2.7311130567788466, "learning_rate": 2.0842850721720254e-06, "loss": 0.5323, "step": 4081 }, { "epoch": 0.7074370139295076, "grad_norm": 2.362620968463312, "learning_rate": 2.0820052117522475e-06, "loss": 0.5233, "step": 4082 }, { "epoch": 0.707610320400338, "grad_norm": 2.264602173229173, "learning_rate": 2.079726270995723e-06, "loss": 0.5828, "step": 4083 }, { "epoch": 0.7077836268711682, "grad_norm": 2.3226794645464826, "learning_rate": 2.0774482506207067e-06, "loss": 0.5841, "step": 4084 }, { "epoch": 0.7079569333419986, "grad_norm": 2.5680611057438574, "learning_rate": 2.0751711513451593e-06, "loss": 0.5312, "step": 4085 }, { "epoch": 0.708130239812829, "grad_norm": 10.774432787915314, "learning_rate": 2.0728949738867537e-06, "loss": 0.4968, "step": 4086 }, { "epoch": 0.7083035462836593, "grad_norm": 2.1785963366566623, "learning_rate": 2.070619718962873e-06, "loss": 0.5703, "step": 4087 }, { "epoch": 0.7084768527544897, "grad_norm": 2.3243035826782736, "learning_rate": 2.0683453872906074e-06, "loss": 0.5753, "step": 4088 }, { "epoch": 0.70865015922532, "grad_norm": 2.4198565006082364, "learning_rate": 2.0660719795867585e-06, "loss": 0.5108, "step": 4089 }, { "epoch": 0.7088234656961504, "grad_norm": 2.819387770206597, "learning_rate": 2.0637994965678326e-06, "loss": 0.5169, "step": 4090 }, { "epoch": 0.7089967721669808, "grad_norm": 2.269375892509757, "learning_rate": 2.061527938950049e-06, "loss": 0.5695, "step": 4091 }, { "epoch": 0.7091700786378111, "grad_norm": 4.772659288739512, "learning_rate": 2.0592573074493334e-06, "loss": 0.5172, "step": 4092 }, { "epoch": 0.7093433851086415, "grad_norm": 4.468578301491856, "learning_rate": 2.0569876027813173e-06, "loss": 0.5114, "step": 4093 }, { "epoch": 0.7095166915794718, "grad_norm": 2.3120834114785813, "learning_rate": 2.0547188256613476e-06, "loss": 0.5517, "step": 4094 }, { "epoch": 0.7096899980503022, "grad_norm": 2.479500288706892, "learning_rate": 2.0524509768044715e-06, "loss": 0.4671, "step": 4095 }, { "epoch": 0.7098633045211326, "grad_norm": 2.105320927083148, "learning_rate": 2.0501840569254454e-06, "loss": 0.5208, "step": 4096 }, { "epoch": 0.7100366109919629, "grad_norm": 2.1676087574161653, "learning_rate": 2.047918066738734e-06, "loss": 0.482, "step": 4097 }, { "epoch": 0.7102099174627933, "grad_norm": 2.282161798841255, "learning_rate": 2.045653006958509e-06, "loss": 0.5669, "step": 4098 }, { "epoch": 0.7103832239336236, "grad_norm": 2.6629250371020143, "learning_rate": 2.0433888782986493e-06, "loss": 0.5491, "step": 4099 }, { "epoch": 0.710556530404454, "grad_norm": 2.819702000196649, "learning_rate": 2.0411256814727364e-06, "loss": 0.541, "step": 4100 }, { "epoch": 0.7107298368752843, "grad_norm": 2.6659904504910243, "learning_rate": 2.038863417194063e-06, "loss": 0.4739, "step": 4101 }, { "epoch": 0.7109031433461147, "grad_norm": 2.558484781995716, "learning_rate": 2.0366020861756258e-06, "loss": 0.5443, "step": 4102 }, { "epoch": 0.7110764498169451, "grad_norm": 3.430005205702376, "learning_rate": 2.0343416891301276e-06, "loss": 0.5087, "step": 4103 }, { "epoch": 0.7112497562877754, "grad_norm": 2.566072270095428, "learning_rate": 2.0320822267699768e-06, "loss": 0.5338, "step": 4104 }, { "epoch": 0.7114230627586058, "grad_norm": 2.445839731942673, "learning_rate": 2.0298236998072873e-06, "loss": 0.5512, "step": 4105 }, { "epoch": 0.7115963692294361, "grad_norm": 3.3854196877401055, "learning_rate": 2.0275661089538783e-06, "loss": 0.6451, "step": 4106 }, { "epoch": 0.7117696757002665, "grad_norm": 2.3435456971196906, "learning_rate": 2.025309454921274e-06, "loss": 0.5347, "step": 4107 }, { "epoch": 0.7119429821710969, "grad_norm": 3.07045305989532, "learning_rate": 2.0230537384207004e-06, "loss": 0.4919, "step": 4108 }, { "epoch": 0.7121162886419272, "grad_norm": 2.324556002207252, "learning_rate": 2.0207989601630973e-06, "loss": 0.4733, "step": 4109 }, { "epoch": 0.7122895951127575, "grad_norm": 2.5272913764063025, "learning_rate": 2.018545120859096e-06, "loss": 0.5129, "step": 4110 }, { "epoch": 0.7124629015835878, "grad_norm": 3.2247548146030214, "learning_rate": 2.0162922212190407e-06, "loss": 0.4389, "step": 4111 }, { "epoch": 0.7126362080544182, "grad_norm": 2.2354250621025944, "learning_rate": 2.014040261952976e-06, "loss": 0.5046, "step": 4112 }, { "epoch": 0.7128095145252485, "grad_norm": 2.521044316528376, "learning_rate": 2.0117892437706517e-06, "loss": 0.538, "step": 4113 }, { "epoch": 0.7129828209960789, "grad_norm": 3.27002585728693, "learning_rate": 2.00953916738152e-06, "loss": 0.5542, "step": 4114 }, { "epoch": 0.7131561274669093, "grad_norm": 3.084864730446489, "learning_rate": 2.007290033494737e-06, "loss": 0.6112, "step": 4115 }, { "epoch": 0.7133294339377396, "grad_norm": 2.2409659670091306, "learning_rate": 2.0050418428191607e-06, "loss": 0.4827, "step": 4116 }, { "epoch": 0.71350274040857, "grad_norm": 2.257543371592492, "learning_rate": 2.0027945960633534e-06, "loss": 0.6372, "step": 4117 }, { "epoch": 0.7136760468794003, "grad_norm": 2.7913578806145285, "learning_rate": 2.0005482939355774e-06, "loss": 0.5011, "step": 4118 }, { "epoch": 0.7138493533502307, "grad_norm": 2.792566784103836, "learning_rate": 1.9983029371438e-06, "loss": 0.5476, "step": 4119 }, { "epoch": 0.7140226598210611, "grad_norm": 2.38930103625247, "learning_rate": 1.9960585263956893e-06, "loss": 0.5251, "step": 4120 }, { "epoch": 0.7141959662918914, "grad_norm": 2.388495361578158, "learning_rate": 1.993815062398615e-06, "loss": 0.5632, "step": 4121 }, { "epoch": 0.7143692727627218, "grad_norm": 2.746659293129246, "learning_rate": 1.9915725458596495e-06, "loss": 0.5327, "step": 4122 }, { "epoch": 0.7145425792335521, "grad_norm": 2.773589785888863, "learning_rate": 1.9893309774855652e-06, "loss": 0.5758, "step": 4123 }, { "epoch": 0.7147158857043825, "grad_norm": 2.3581116865338765, "learning_rate": 1.987090357982836e-06, "loss": 0.4815, "step": 4124 }, { "epoch": 0.7148891921752129, "grad_norm": 2.0249516703342394, "learning_rate": 1.9848506880576377e-06, "loss": 0.5188, "step": 4125 }, { "epoch": 0.7150624986460432, "grad_norm": 2.2307632731150746, "learning_rate": 1.9826119684158463e-06, "loss": 0.5215, "step": 4126 }, { "epoch": 0.7152358051168736, "grad_norm": 2.4960211678083843, "learning_rate": 1.9803741997630376e-06, "loss": 0.4741, "step": 4127 }, { "epoch": 0.7154091115877039, "grad_norm": 2.4535718981453716, "learning_rate": 1.9781373828044882e-06, "loss": 0.5735, "step": 4128 }, { "epoch": 0.7155824180585343, "grad_norm": 3.43428941247587, "learning_rate": 1.975901518245175e-06, "loss": 0.4525, "step": 4129 }, { "epoch": 0.7157557245293646, "grad_norm": 2.1905153299287052, "learning_rate": 1.973666606789777e-06, "loss": 0.4477, "step": 4130 }, { "epoch": 0.715929031000195, "grad_norm": 2.6226981559602542, "learning_rate": 1.9714326491426662e-06, "loss": 0.5098, "step": 4131 }, { "epoch": 0.7161023374710254, "grad_norm": 3.000847110236303, "learning_rate": 1.9691996460079192e-06, "loss": 0.5479, "step": 4132 }, { "epoch": 0.7162756439418557, "grad_norm": 2.61614647202154, "learning_rate": 1.9669675980893107e-06, "loss": 0.5021, "step": 4133 }, { "epoch": 0.7164489504126861, "grad_norm": 2.172146402281642, "learning_rate": 1.9647365060903162e-06, "loss": 0.5547, "step": 4134 }, { "epoch": 0.7166222568835164, "grad_norm": 2.796003570972996, "learning_rate": 1.9625063707141067e-06, "loss": 0.5269, "step": 4135 }, { "epoch": 0.7167955633543467, "grad_norm": 2.250220502412108, "learning_rate": 1.9602771926635537e-06, "loss": 0.4767, "step": 4136 }, { "epoch": 0.716968869825177, "grad_norm": 3.033796813576483, "learning_rate": 1.958048972641226e-06, "loss": 0.5241, "step": 4137 }, { "epoch": 0.7171421762960074, "grad_norm": 3.1541170197453554, "learning_rate": 1.955821711349391e-06, "loss": 0.5151, "step": 4138 }, { "epoch": 0.7173154827668378, "grad_norm": 2.507282261667178, "learning_rate": 1.953595409490013e-06, "loss": 0.5227, "step": 4139 }, { "epoch": 0.7174887892376681, "grad_norm": 2.2689417785349146, "learning_rate": 1.951370067764758e-06, "loss": 0.5504, "step": 4140 }, { "epoch": 0.7176620957084985, "grad_norm": 2.3051486919598143, "learning_rate": 1.949145686874981e-06, "loss": 0.541, "step": 4141 }, { "epoch": 0.7178354021793288, "grad_norm": 2.605511448633844, "learning_rate": 1.946922267521741e-06, "loss": 0.5238, "step": 4142 }, { "epoch": 0.7180087086501592, "grad_norm": 2.4709965583343014, "learning_rate": 1.9446998104057933e-06, "loss": 0.5264, "step": 4143 }, { "epoch": 0.7181820151209896, "grad_norm": 5.474570204794496, "learning_rate": 1.9424783162275886e-06, "loss": 0.5357, "step": 4144 }, { "epoch": 0.7183553215918199, "grad_norm": 3.715546192092099, "learning_rate": 1.9402577856872736e-06, "loss": 0.5725, "step": 4145 }, { "epoch": 0.7185286280626503, "grad_norm": 2.0761269865748697, "learning_rate": 1.938038219484692e-06, "loss": 0.46, "step": 4146 }, { "epoch": 0.7187019345334806, "grad_norm": 2.5007512951955304, "learning_rate": 1.9358196183193843e-06, "loss": 0.5083, "step": 4147 }, { "epoch": 0.718875241004311, "grad_norm": 2.7501902542230927, "learning_rate": 1.9336019828905846e-06, "loss": 0.5733, "step": 4148 }, { "epoch": 0.7190485474751414, "grad_norm": 2.3002536623406216, "learning_rate": 1.9313853138972267e-06, "loss": 0.5064, "step": 4149 }, { "epoch": 0.7192218539459717, "grad_norm": 2.783614557745699, "learning_rate": 1.9291696120379377e-06, "loss": 0.5402, "step": 4150 }, { "epoch": 0.7193951604168021, "grad_norm": 2.2082824730873307, "learning_rate": 1.926954878011036e-06, "loss": 0.5665, "step": 4151 }, { "epoch": 0.7195684668876324, "grad_norm": 2.5535834775843296, "learning_rate": 1.9247411125145404e-06, "loss": 0.5518, "step": 4152 }, { "epoch": 0.7197417733584628, "grad_norm": 2.2929395191977306, "learning_rate": 1.922528316246162e-06, "loss": 0.483, "step": 4153 }, { "epoch": 0.7199150798292931, "grad_norm": 2.6782469106531526, "learning_rate": 1.9203164899033078e-06, "loss": 0.4992, "step": 4154 }, { "epoch": 0.7200883863001235, "grad_norm": 2.4606345158927185, "learning_rate": 1.918105634183078e-06, "loss": 0.547, "step": 4155 }, { "epoch": 0.7202616927709539, "grad_norm": 3.8932026228237233, "learning_rate": 1.9158957497822657e-06, "loss": 0.5385, "step": 4156 }, { "epoch": 0.7204349992417842, "grad_norm": 4.060301588553822, "learning_rate": 1.913686837397361e-06, "loss": 0.5913, "step": 4157 }, { "epoch": 0.7206083057126146, "grad_norm": 2.5106434510749267, "learning_rate": 1.911478897724545e-06, "loss": 0.4936, "step": 4158 }, { "epoch": 0.7207816121834449, "grad_norm": 3.0672134390289525, "learning_rate": 1.9092719314596937e-06, "loss": 0.5, "step": 4159 }, { "epoch": 0.7209549186542753, "grad_norm": 2.63991549720969, "learning_rate": 1.9070659392983753e-06, "loss": 0.5106, "step": 4160 }, { "epoch": 0.7211282251251057, "grad_norm": 2.4688468000157466, "learning_rate": 1.9048609219358522e-06, "loss": 0.5272, "step": 4161 }, { "epoch": 0.721301531595936, "grad_norm": 2.460148774726377, "learning_rate": 1.902656880067078e-06, "loss": 0.5073, "step": 4162 }, { "epoch": 0.7214748380667663, "grad_norm": 3.7933225602950777, "learning_rate": 1.9004538143867002e-06, "loss": 0.5706, "step": 4163 }, { "epoch": 0.7216481445375966, "grad_norm": 2.2270996685669213, "learning_rate": 1.8982517255890576e-06, "loss": 0.4809, "step": 4164 }, { "epoch": 0.721821451008427, "grad_norm": 2.1173533636446376, "learning_rate": 1.8960506143681823e-06, "loss": 0.5555, "step": 4165 }, { "epoch": 0.7219947574792573, "grad_norm": 2.4447816835350045, "learning_rate": 1.8938504814177971e-06, "loss": 0.5655, "step": 4166 }, { "epoch": 0.7221680639500877, "grad_norm": 2.205219164070941, "learning_rate": 1.8916513274313176e-06, "loss": 0.5331, "step": 4167 }, { "epoch": 0.7223413704209181, "grad_norm": 2.2987968906802494, "learning_rate": 1.88945315310185e-06, "loss": 0.5001, "step": 4168 }, { "epoch": 0.7225146768917484, "grad_norm": 2.970134822879706, "learning_rate": 1.8872559591221923e-06, "loss": 0.4593, "step": 4169 }, { "epoch": 0.7226879833625788, "grad_norm": 2.7016167412373577, "learning_rate": 1.8850597461848324e-06, "loss": 0.5676, "step": 4170 }, { "epoch": 0.7228612898334091, "grad_norm": 2.4583470108342085, "learning_rate": 1.8828645149819524e-06, "loss": 0.5602, "step": 4171 }, { "epoch": 0.7230345963042395, "grad_norm": 2.3660091330138457, "learning_rate": 1.8806702662054188e-06, "loss": 0.5094, "step": 4172 }, { "epoch": 0.7232079027750699, "grad_norm": 2.4233701350490913, "learning_rate": 1.8784770005467933e-06, "loss": 0.5553, "step": 4173 }, { "epoch": 0.7233812092459002, "grad_norm": 2.589637533471657, "learning_rate": 1.876284718697326e-06, "loss": 0.6029, "step": 4174 }, { "epoch": 0.7235545157167306, "grad_norm": 2.376151619165698, "learning_rate": 1.87409342134796e-06, "loss": 0.5004, "step": 4175 }, { "epoch": 0.7237278221875609, "grad_norm": 2.6205255034109847, "learning_rate": 1.8719031091893241e-06, "loss": 0.4556, "step": 4176 }, { "epoch": 0.7239011286583913, "grad_norm": 2.4775848583259203, "learning_rate": 1.8697137829117385e-06, "loss": 0.5444, "step": 4177 }, { "epoch": 0.7240744351292216, "grad_norm": 2.9300854079302168, "learning_rate": 1.8675254432052115e-06, "loss": 0.4757, "step": 4178 }, { "epoch": 0.724247741600052, "grad_norm": 2.3050558426987116, "learning_rate": 1.8653380907594415e-06, "loss": 0.499, "step": 4179 }, { "epoch": 0.7244210480708824, "grad_norm": 2.111123215840276, "learning_rate": 1.863151726263816e-06, "loss": 0.5069, "step": 4180 }, { "epoch": 0.7245943545417127, "grad_norm": 2.935549600423966, "learning_rate": 1.8609663504074122e-06, "loss": 0.575, "step": 4181 }, { "epoch": 0.7247676610125431, "grad_norm": 2.6101977707967174, "learning_rate": 1.8587819638789907e-06, "loss": 0.5392, "step": 4182 }, { "epoch": 0.7249409674833734, "grad_norm": 2.232698919347717, "learning_rate": 1.8565985673670045e-06, "loss": 0.4903, "step": 4183 }, { "epoch": 0.7251142739542038, "grad_norm": 3.6783572704891223, "learning_rate": 1.8544161615595951e-06, "loss": 0.5498, "step": 4184 }, { "epoch": 0.7252875804250342, "grad_norm": 2.5564128894167286, "learning_rate": 1.85223474714459e-06, "loss": 0.5658, "step": 4185 }, { "epoch": 0.7254608868958645, "grad_norm": 2.451582407072763, "learning_rate": 1.850054324809505e-06, "loss": 0.5428, "step": 4186 }, { "epoch": 0.7256341933666949, "grad_norm": 4.325764847637031, "learning_rate": 1.847874895241542e-06, "loss": 0.5319, "step": 4187 }, { "epoch": 0.7258074998375252, "grad_norm": 2.739137042648259, "learning_rate": 1.8456964591275923e-06, "loss": 0.5854, "step": 4188 }, { "epoch": 0.7259808063083555, "grad_norm": 2.4260499901495254, "learning_rate": 1.8435190171542295e-06, "loss": 0.503, "step": 4189 }, { "epoch": 0.7261541127791858, "grad_norm": 3.102221481328577, "learning_rate": 1.8413425700077214e-06, "loss": 0.5523, "step": 4190 }, { "epoch": 0.7263274192500162, "grad_norm": 2.5135115507239085, "learning_rate": 1.8391671183740185e-06, "loss": 0.5918, "step": 4191 }, { "epoch": 0.7265007257208466, "grad_norm": 2.893731750604712, "learning_rate": 1.836992662938753e-06, "loss": 0.5993, "step": 4192 }, { "epoch": 0.7266740321916769, "grad_norm": 2.579924323639266, "learning_rate": 1.8348192043872482e-06, "loss": 0.5803, "step": 4193 }, { "epoch": 0.7268473386625073, "grad_norm": 2.747777050029715, "learning_rate": 1.8326467434045136e-06, "loss": 0.5174, "step": 4194 }, { "epoch": 0.7270206451333376, "grad_norm": 2.518281943835934, "learning_rate": 1.8304752806752413e-06, "loss": 0.4946, "step": 4195 }, { "epoch": 0.727193951604168, "grad_norm": 4.044829383520948, "learning_rate": 1.828304816883812e-06, "loss": 0.4629, "step": 4196 }, { "epoch": 0.7273672580749984, "grad_norm": 2.823876163668613, "learning_rate": 1.8261353527142884e-06, "loss": 0.478, "step": 4197 }, { "epoch": 0.7275405645458287, "grad_norm": 12.808159468163131, "learning_rate": 1.8239668888504208e-06, "loss": 0.5296, "step": 4198 }, { "epoch": 0.7277138710166591, "grad_norm": 1.9938097656752662, "learning_rate": 1.8217994259756422e-06, "loss": 0.4215, "step": 4199 }, { "epoch": 0.7278871774874894, "grad_norm": 2.2800096958932867, "learning_rate": 1.8196329647730716e-06, "loss": 0.4439, "step": 4200 }, { "epoch": 0.7280604839583198, "grad_norm": 6.199331269411094, "learning_rate": 1.8174675059255115e-06, "loss": 0.5527, "step": 4201 }, { "epoch": 0.7282337904291502, "grad_norm": 8.817212933943994, "learning_rate": 1.815303050115449e-06, "loss": 0.5462, "step": 4202 }, { "epoch": 0.7284070968999805, "grad_norm": 2.107239041512733, "learning_rate": 1.8131395980250543e-06, "loss": 0.5073, "step": 4203 }, { "epoch": 0.7285804033708109, "grad_norm": 3.0380862082435756, "learning_rate": 1.8109771503361817e-06, "loss": 0.5285, "step": 4204 }, { "epoch": 0.7287537098416412, "grad_norm": 3.2722063527108634, "learning_rate": 1.8088157077303697e-06, "loss": 0.4258, "step": 4205 }, { "epoch": 0.7289270163124716, "grad_norm": 2.7773966175135123, "learning_rate": 1.8066552708888384e-06, "loss": 0.48, "step": 4206 }, { "epoch": 0.729100322783302, "grad_norm": 2.881859250875839, "learning_rate": 1.804495840492493e-06, "loss": 0.6442, "step": 4207 }, { "epoch": 0.7292736292541323, "grad_norm": 2.4329432993654794, "learning_rate": 1.8023374172219188e-06, "loss": 0.5833, "step": 4208 }, { "epoch": 0.7294469357249627, "grad_norm": 2.199027519574993, "learning_rate": 1.800180001757386e-06, "loss": 0.5191, "step": 4209 }, { "epoch": 0.729620242195793, "grad_norm": 3.0497756273230054, "learning_rate": 1.7980235947788466e-06, "loss": 0.5326, "step": 4210 }, { "epoch": 0.7297935486666234, "grad_norm": 2.568022422891088, "learning_rate": 1.7958681969659342e-06, "loss": 0.4869, "step": 4211 }, { "epoch": 0.7299668551374537, "grad_norm": 2.4330079538172864, "learning_rate": 1.7937138089979662e-06, "loss": 0.6001, "step": 4212 }, { "epoch": 0.7301401616082841, "grad_norm": 2.288674303802796, "learning_rate": 1.791560431553937e-06, "loss": 0.5453, "step": 4213 }, { "epoch": 0.7303134680791145, "grad_norm": 2.2442164620215674, "learning_rate": 1.789408065312528e-06, "loss": 0.5317, "step": 4214 }, { "epoch": 0.7304867745499447, "grad_norm": 2.667470051231879, "learning_rate": 1.7872567109520967e-06, "loss": 0.5198, "step": 4215 }, { "epoch": 0.7306600810207751, "grad_norm": 2.8657292978520896, "learning_rate": 1.7851063691506893e-06, "loss": 0.5582, "step": 4216 }, { "epoch": 0.7308333874916054, "grad_norm": 3.4680733511524546, "learning_rate": 1.7829570405860252e-06, "loss": 0.517, "step": 4217 }, { "epoch": 0.7310066939624358, "grad_norm": 2.2308121476753326, "learning_rate": 1.7808087259355085e-06, "loss": 0.4652, "step": 4218 }, { "epoch": 0.7311800004332661, "grad_norm": 2.136706413146665, "learning_rate": 1.7786614258762219e-06, "loss": 0.4898, "step": 4219 }, { "epoch": 0.7313533069040965, "grad_norm": 2.389822866380587, "learning_rate": 1.7765151410849297e-06, "loss": 0.5702, "step": 4220 }, { "epoch": 0.7315266133749269, "grad_norm": 2.1137802273961075, "learning_rate": 1.7743698722380748e-06, "loss": 0.4443, "step": 4221 }, { "epoch": 0.7316999198457572, "grad_norm": 3.272438825181311, "learning_rate": 1.7722256200117832e-06, "loss": 0.5718, "step": 4222 }, { "epoch": 0.7318732263165876, "grad_norm": 3.1883093324362126, "learning_rate": 1.7700823850818545e-06, "loss": 0.5495, "step": 4223 }, { "epoch": 0.7320465327874179, "grad_norm": 2.5889299214666415, "learning_rate": 1.767940168123773e-06, "loss": 0.5862, "step": 4224 }, { "epoch": 0.7322198392582483, "grad_norm": 2.3915125102409798, "learning_rate": 1.7657989698126998e-06, "loss": 0.4726, "step": 4225 }, { "epoch": 0.7323931457290787, "grad_norm": 2.2989995337026223, "learning_rate": 1.7636587908234759e-06, "loss": 0.6103, "step": 4226 }, { "epoch": 0.732566452199909, "grad_norm": 2.686977467257815, "learning_rate": 1.7615196318306206e-06, "loss": 0.5184, "step": 4227 }, { "epoch": 0.7327397586707394, "grad_norm": 3.202111274210898, "learning_rate": 1.759381493508332e-06, "loss": 0.5967, "step": 4228 }, { "epoch": 0.7329130651415697, "grad_norm": 2.9145701362154144, "learning_rate": 1.7572443765304842e-06, "loss": 0.5113, "step": 4229 }, { "epoch": 0.7330863716124001, "grad_norm": 2.662819424911342, "learning_rate": 1.7551082815706355e-06, "loss": 0.5049, "step": 4230 }, { "epoch": 0.7332596780832304, "grad_norm": 2.961221986556017, "learning_rate": 1.7529732093020158e-06, "loss": 0.52, "step": 4231 }, { "epoch": 0.7334329845540608, "grad_norm": 2.0499806281461197, "learning_rate": 1.750839160397535e-06, "loss": 0.5081, "step": 4232 }, { "epoch": 0.7336062910248912, "grad_norm": 3.076982586313418, "learning_rate": 1.7487061355297825e-06, "loss": 0.4689, "step": 4233 }, { "epoch": 0.7337795974957215, "grad_norm": 2.348628470970524, "learning_rate": 1.746574135371019e-06, "loss": 0.4434, "step": 4234 }, { "epoch": 0.7339529039665519, "grad_norm": 2.6292928727709732, "learning_rate": 1.7444431605931889e-06, "loss": 0.5433, "step": 4235 }, { "epoch": 0.7341262104373822, "grad_norm": 2.456774984233768, "learning_rate": 1.7423132118679093e-06, "loss": 0.5298, "step": 4236 }, { "epoch": 0.7342995169082126, "grad_norm": 3.7012178866628083, "learning_rate": 1.7401842898664761e-06, "loss": 0.4905, "step": 4237 }, { "epoch": 0.734472823379043, "grad_norm": 3.6590152336299853, "learning_rate": 1.7380563952598606e-06, "loss": 0.5615, "step": 4238 }, { "epoch": 0.7346461298498733, "grad_norm": 2.335126968370226, "learning_rate": 1.7359295287187106e-06, "loss": 0.5624, "step": 4239 }, { "epoch": 0.7348194363207037, "grad_norm": 5.336199752584789, "learning_rate": 1.73380369091335e-06, "loss": 0.4895, "step": 4240 }, { "epoch": 0.7349927427915339, "grad_norm": 2.5236943122250133, "learning_rate": 1.731678882513777e-06, "loss": 0.59, "step": 4241 }, { "epoch": 0.7351660492623643, "grad_norm": 2.28610965749027, "learning_rate": 1.7295551041896686e-06, "loss": 0.5818, "step": 4242 }, { "epoch": 0.7353393557331946, "grad_norm": 4.072714667306468, "learning_rate": 1.7274323566103735e-06, "loss": 0.5574, "step": 4243 }, { "epoch": 0.735512662204025, "grad_norm": 2.322901114114035, "learning_rate": 1.7253106404449183e-06, "loss": 0.567, "step": 4244 }, { "epoch": 0.7356859686748554, "grad_norm": 2.8266254452670094, "learning_rate": 1.723189956362003e-06, "loss": 0.5793, "step": 4245 }, { "epoch": 0.7358592751456857, "grad_norm": 3.4056654155633987, "learning_rate": 1.721070305030002e-06, "loss": 0.5276, "step": 4246 }, { "epoch": 0.7360325816165161, "grad_norm": 2.8360115496052845, "learning_rate": 1.7189516871169665e-06, "loss": 0.5262, "step": 4247 }, { "epoch": 0.7362058880873464, "grad_norm": 3.329357732377558, "learning_rate": 1.7168341032906193e-06, "loss": 0.5046, "step": 4248 }, { "epoch": 0.7363791945581768, "grad_norm": 2.1489707551285044, "learning_rate": 1.7147175542183592e-06, "loss": 0.5085, "step": 4249 }, { "epoch": 0.7365525010290072, "grad_norm": 2.2504730709520686, "learning_rate": 1.7126020405672572e-06, "loss": 0.4938, "step": 4250 }, { "epoch": 0.7367258074998375, "grad_norm": 3.0965625467237037, "learning_rate": 1.7104875630040601e-06, "loss": 0.4976, "step": 4251 }, { "epoch": 0.7368991139706679, "grad_norm": 2.3570730581332855, "learning_rate": 1.708374122195186e-06, "loss": 0.6262, "step": 4252 }, { "epoch": 0.7370724204414982, "grad_norm": 2.320580465662039, "learning_rate": 1.7062617188067287e-06, "loss": 0.483, "step": 4253 }, { "epoch": 0.7372457269123286, "grad_norm": 3.3141426129938, "learning_rate": 1.7041503535044512e-06, "loss": 0.531, "step": 4254 }, { "epoch": 0.737419033383159, "grad_norm": 2.62549976566677, "learning_rate": 1.7020400269537912e-06, "loss": 0.5481, "step": 4255 }, { "epoch": 0.7375923398539893, "grad_norm": 3.2269291113330314, "learning_rate": 1.6999307398198623e-06, "loss": 0.5942, "step": 4256 }, { "epoch": 0.7377656463248197, "grad_norm": 2.1395207775085643, "learning_rate": 1.697822492767447e-06, "loss": 0.4775, "step": 4257 }, { "epoch": 0.73793895279565, "grad_norm": 2.5262140251225484, "learning_rate": 1.6957152864609993e-06, "loss": 0.5888, "step": 4258 }, { "epoch": 0.7381122592664804, "grad_norm": 2.2727986776343863, "learning_rate": 1.6936091215646484e-06, "loss": 0.4469, "step": 4259 }, { "epoch": 0.7382855657373107, "grad_norm": 2.4365195144138547, "learning_rate": 1.6915039987421916e-06, "loss": 0.5708, "step": 4260 }, { "epoch": 0.7384588722081411, "grad_norm": 2.2556073383834567, "learning_rate": 1.6893999186571008e-06, "loss": 0.5117, "step": 4261 }, { "epoch": 0.7386321786789715, "grad_norm": 2.655768280638455, "learning_rate": 1.6872968819725176e-06, "loss": 0.5388, "step": 4262 }, { "epoch": 0.7388054851498018, "grad_norm": 2.2940878853081426, "learning_rate": 1.6851948893512575e-06, "loss": 0.5231, "step": 4263 }, { "epoch": 0.7389787916206322, "grad_norm": 2.9105021362511287, "learning_rate": 1.6830939414558007e-06, "loss": 0.5114, "step": 4264 }, { "epoch": 0.7391520980914625, "grad_norm": 2.2268770509352622, "learning_rate": 1.6809940389483036e-06, "loss": 0.6371, "step": 4265 }, { "epoch": 0.7393254045622929, "grad_norm": 2.281550396587767, "learning_rate": 1.6788951824905924e-06, "loss": 0.5163, "step": 4266 }, { "epoch": 0.7394987110331231, "grad_norm": 2.524843383540634, "learning_rate": 1.6767973727441627e-06, "loss": 0.5754, "step": 4267 }, { "epoch": 0.7396720175039535, "grad_norm": 2.5215131186780066, "learning_rate": 1.6747006103701796e-06, "loss": 0.5201, "step": 4268 }, { "epoch": 0.7398453239747839, "grad_norm": 2.48472360002305, "learning_rate": 1.6726048960294793e-06, "loss": 0.5739, "step": 4269 }, { "epoch": 0.7400186304456142, "grad_norm": 3.1066297366479696, "learning_rate": 1.6705102303825666e-06, "loss": 0.4348, "step": 4270 }, { "epoch": 0.7401919369164446, "grad_norm": 2.3364955964082466, "learning_rate": 1.6684166140896179e-06, "loss": 0.4638, "step": 4271 }, { "epoch": 0.7403652433872749, "grad_norm": 2.5012685470444462, "learning_rate": 1.666324047810477e-06, "loss": 0.431, "step": 4272 }, { "epoch": 0.7405385498581053, "grad_norm": 3.084733327322988, "learning_rate": 1.664232532204657e-06, "loss": 0.5972, "step": 4273 }, { "epoch": 0.7407118563289357, "grad_norm": 2.4434751129564574, "learning_rate": 1.6621420679313415e-06, "loss": 0.5165, "step": 4274 }, { "epoch": 0.740885162799766, "grad_norm": 2.6769572062733196, "learning_rate": 1.660052655649378e-06, "loss": 0.6036, "step": 4275 }, { "epoch": 0.7410584692705964, "grad_norm": 3.0027550206131424, "learning_rate": 1.6579642960172876e-06, "loss": 0.5752, "step": 4276 }, { "epoch": 0.7412317757414267, "grad_norm": 2.9962419235045923, "learning_rate": 1.6558769896932575e-06, "loss": 0.4897, "step": 4277 }, { "epoch": 0.7414050822122571, "grad_norm": 4.88292587564731, "learning_rate": 1.653790737335143e-06, "loss": 0.5623, "step": 4278 }, { "epoch": 0.7415783886830875, "grad_norm": 2.4208668861183678, "learning_rate": 1.6517055396004677e-06, "loss": 0.6021, "step": 4279 }, { "epoch": 0.7417516951539178, "grad_norm": 2.6148258707854284, "learning_rate": 1.649621397146423e-06, "loss": 0.5176, "step": 4280 }, { "epoch": 0.7419250016247482, "grad_norm": 2.9377198464202676, "learning_rate": 1.647538310629867e-06, "loss": 0.4859, "step": 4281 }, { "epoch": 0.7420983080955785, "grad_norm": 2.4357633256801687, "learning_rate": 1.6454562807073248e-06, "loss": 0.5665, "step": 4282 }, { "epoch": 0.7422716145664089, "grad_norm": 2.3626034466124386, "learning_rate": 1.643375308034989e-06, "loss": 0.4993, "step": 4283 }, { "epoch": 0.7424449210372392, "grad_norm": 2.2026377850216496, "learning_rate": 1.6412953932687193e-06, "loss": 0.4665, "step": 4284 }, { "epoch": 0.7426182275080696, "grad_norm": 3.0158563596742693, "learning_rate": 1.6392165370640417e-06, "loss": 0.6017, "step": 4285 }, { "epoch": 0.7427915339789, "grad_norm": 3.082097609399216, "learning_rate": 1.637138740076148e-06, "loss": 0.5727, "step": 4286 }, { "epoch": 0.7429648404497303, "grad_norm": 3.358612678328367, "learning_rate": 1.6350620029598967e-06, "loss": 0.5504, "step": 4287 }, { "epoch": 0.7431381469205607, "grad_norm": 2.888936517259822, "learning_rate": 1.6329863263698125e-06, "loss": 0.5228, "step": 4288 }, { "epoch": 0.743311453391391, "grad_norm": 2.8230340153114946, "learning_rate": 1.6309117109600852e-06, "loss": 0.571, "step": 4289 }, { "epoch": 0.7434847598622214, "grad_norm": 2.5625647098577904, "learning_rate": 1.6288381573845714e-06, "loss": 0.5798, "step": 4290 }, { "epoch": 0.7436580663330518, "grad_norm": 2.464913733017393, "learning_rate": 1.6267656662967912e-06, "loss": 0.5394, "step": 4291 }, { "epoch": 0.7438313728038821, "grad_norm": 2.3220915222063123, "learning_rate": 1.6246942383499308e-06, "loss": 0.5174, "step": 4292 }, { "epoch": 0.7440046792747124, "grad_norm": 3.492664295669208, "learning_rate": 1.622623874196842e-06, "loss": 0.4912, "step": 4293 }, { "epoch": 0.7441779857455427, "grad_norm": 7.065750436161943, "learning_rate": 1.620554574490042e-06, "loss": 0.5573, "step": 4294 }, { "epoch": 0.7443512922163731, "grad_norm": 2.540842342995155, "learning_rate": 1.6184863398817074e-06, "loss": 0.5481, "step": 4295 }, { "epoch": 0.7445245986872034, "grad_norm": 2.238921324024391, "learning_rate": 1.6164191710236838e-06, "loss": 0.4909, "step": 4296 }, { "epoch": 0.7446979051580338, "grad_norm": 2.6767365383653936, "learning_rate": 1.614353068567482e-06, "loss": 0.5704, "step": 4297 }, { "epoch": 0.7448712116288642, "grad_norm": 2.525380638579862, "learning_rate": 1.6122880331642743e-06, "loss": 0.5128, "step": 4298 }, { "epoch": 0.7450445180996945, "grad_norm": 2.9930802628612803, "learning_rate": 1.6102240654648966e-06, "loss": 0.5103, "step": 4299 }, { "epoch": 0.7452178245705249, "grad_norm": 2.3459671912005486, "learning_rate": 1.6081611661198482e-06, "loss": 0.5226, "step": 4300 }, { "epoch": 0.7453911310413552, "grad_norm": 2.6471833361803636, "learning_rate": 1.6060993357792932e-06, "loss": 0.5492, "step": 4301 }, { "epoch": 0.7455644375121856, "grad_norm": 3.066774231729878, "learning_rate": 1.6040385750930577e-06, "loss": 0.5019, "step": 4302 }, { "epoch": 0.745737743983016, "grad_norm": 2.4410726140537706, "learning_rate": 1.6019788847106299e-06, "loss": 0.4775, "step": 4303 }, { "epoch": 0.7459110504538463, "grad_norm": 3.319685866635014, "learning_rate": 1.599920265281163e-06, "loss": 0.4623, "step": 4304 }, { "epoch": 0.7460843569246767, "grad_norm": 3.0556714020054767, "learning_rate": 1.5978627174534717e-06, "loss": 0.5155, "step": 4305 }, { "epoch": 0.746257663395507, "grad_norm": 2.7790597882945276, "learning_rate": 1.59580624187603e-06, "loss": 0.4617, "step": 4306 }, { "epoch": 0.7464309698663374, "grad_norm": 3.610787462755396, "learning_rate": 1.5937508391969775e-06, "loss": 0.6066, "step": 4307 }, { "epoch": 0.7466042763371677, "grad_norm": 2.5206763318488927, "learning_rate": 1.591696510064115e-06, "loss": 0.5217, "step": 4308 }, { "epoch": 0.7467775828079981, "grad_norm": 2.217861031872877, "learning_rate": 1.5896432551249053e-06, "loss": 0.5051, "step": 4309 }, { "epoch": 0.7469508892788285, "grad_norm": 2.812819117924346, "learning_rate": 1.5875910750264706e-06, "loss": 0.4884, "step": 4310 }, { "epoch": 0.7471241957496588, "grad_norm": 2.5508226800856413, "learning_rate": 1.5855399704155949e-06, "loss": 0.5583, "step": 4311 }, { "epoch": 0.7472975022204892, "grad_norm": 3.0154444549874277, "learning_rate": 1.5834899419387268e-06, "loss": 0.581, "step": 4312 }, { "epoch": 0.7474708086913195, "grad_norm": 2.6202081228003404, "learning_rate": 1.581440990241972e-06, "loss": 0.6314, "step": 4313 }, { "epoch": 0.7476441151621499, "grad_norm": 3.2952303435778045, "learning_rate": 1.5793931159710974e-06, "loss": 0.4533, "step": 4314 }, { "epoch": 0.7478174216329803, "grad_norm": 2.2629881322608316, "learning_rate": 1.5773463197715317e-06, "loss": 0.5858, "step": 4315 }, { "epoch": 0.7479907281038106, "grad_norm": 2.661896932116172, "learning_rate": 1.5753006022883606e-06, "loss": 0.5573, "step": 4316 }, { "epoch": 0.748164034574641, "grad_norm": 2.4878658129157123, "learning_rate": 1.5732559641663337e-06, "loss": 0.5477, "step": 4317 }, { "epoch": 0.7483373410454713, "grad_norm": 3.513936249375556, "learning_rate": 1.5712124060498578e-06, "loss": 0.5758, "step": 4318 }, { "epoch": 0.7485106475163016, "grad_norm": 2.3020426694682588, "learning_rate": 1.5691699285830008e-06, "loss": 0.5499, "step": 4319 }, { "epoch": 0.748683953987132, "grad_norm": 3.150182766377862, "learning_rate": 1.5671285324094904e-06, "loss": 0.5675, "step": 4320 }, { "epoch": 0.7488572604579623, "grad_norm": 2.8068179210649578, "learning_rate": 1.5650882181727112e-06, "loss": 0.6465, "step": 4321 }, { "epoch": 0.7490305669287927, "grad_norm": 2.4350324689011926, "learning_rate": 1.563048986515709e-06, "loss": 0.4831, "step": 4322 }, { "epoch": 0.749203873399623, "grad_norm": 3.83997837010388, "learning_rate": 1.5610108380811872e-06, "loss": 0.5004, "step": 4323 }, { "epoch": 0.7493771798704534, "grad_norm": 2.8035199943384095, "learning_rate": 1.5589737735115084e-06, "loss": 0.5896, "step": 4324 }, { "epoch": 0.7495504863412837, "grad_norm": 3.5219735677850195, "learning_rate": 1.556937793448694e-06, "loss": 0.601, "step": 4325 }, { "epoch": 0.7497237928121141, "grad_norm": 3.284348393354424, "learning_rate": 1.5549028985344228e-06, "loss": 0.4783, "step": 4326 }, { "epoch": 0.7498970992829445, "grad_norm": 2.472240915039975, "learning_rate": 1.5528690894100306e-06, "loss": 0.4689, "step": 4327 }, { "epoch": 0.7500704057537748, "grad_norm": 4.517240904856368, "learning_rate": 1.5508363667165139e-06, "loss": 0.546, "step": 4328 }, { "epoch": 0.7502437122246052, "grad_norm": 3.0632907349804466, "learning_rate": 1.5488047310945242e-06, "loss": 0.6512, "step": 4329 }, { "epoch": 0.7504170186954355, "grad_norm": 2.3663297106682664, "learning_rate": 1.5467741831843713e-06, "loss": 0.5457, "step": 4330 }, { "epoch": 0.7505903251662659, "grad_norm": 2.577187377255026, "learning_rate": 1.5447447236260227e-06, "loss": 0.5097, "step": 4331 }, { "epoch": 0.7507636316370963, "grad_norm": 2.6368614986133894, "learning_rate": 1.5427163530591017e-06, "loss": 0.5135, "step": 4332 }, { "epoch": 0.7509369381079266, "grad_norm": 2.589570113559507, "learning_rate": 1.5406890721228895e-06, "loss": 0.5346, "step": 4333 }, { "epoch": 0.751110244578757, "grad_norm": 3.4609311603677777, "learning_rate": 1.5386628814563232e-06, "loss": 0.5212, "step": 4334 }, { "epoch": 0.7512835510495873, "grad_norm": 2.503274951980796, "learning_rate": 1.5366377816979983e-06, "loss": 0.5243, "step": 4335 }, { "epoch": 0.7514568575204177, "grad_norm": 2.8071404966697466, "learning_rate": 1.5346137734861598e-06, "loss": 0.5483, "step": 4336 }, { "epoch": 0.751630163991248, "grad_norm": 2.502492788230706, "learning_rate": 1.5325908574587184e-06, "loss": 0.6201, "step": 4337 }, { "epoch": 0.7518034704620784, "grad_norm": 2.5597893906268085, "learning_rate": 1.5305690342532338e-06, "loss": 0.5655, "step": 4338 }, { "epoch": 0.7519767769329088, "grad_norm": 2.3755848153761523, "learning_rate": 1.5285483045069233e-06, "loss": 0.5809, "step": 4339 }, { "epoch": 0.7521500834037391, "grad_norm": 2.7085366212242166, "learning_rate": 1.52652866885666e-06, "loss": 0.5405, "step": 4340 }, { "epoch": 0.7523233898745695, "grad_norm": 2.2133348269280466, "learning_rate": 1.5245101279389711e-06, "loss": 0.4751, "step": 4341 }, { "epoch": 0.7524966963453998, "grad_norm": 2.254386696170595, "learning_rate": 1.5224926823900394e-06, "loss": 0.5246, "step": 4342 }, { "epoch": 0.7526700028162302, "grad_norm": 4.357723563483271, "learning_rate": 1.520476332845703e-06, "loss": 0.5273, "step": 4343 }, { "epoch": 0.7528433092870606, "grad_norm": 2.4318981090031064, "learning_rate": 1.5184610799414535e-06, "loss": 0.6096, "step": 4344 }, { "epoch": 0.7530166157578908, "grad_norm": 3.2290179145630336, "learning_rate": 1.516446924312437e-06, "loss": 0.5501, "step": 4345 }, { "epoch": 0.7531899222287212, "grad_norm": 2.291572281816179, "learning_rate": 1.5144338665934566e-06, "loss": 0.6011, "step": 4346 }, { "epoch": 0.7533632286995515, "grad_norm": 2.5873371992253835, "learning_rate": 1.5124219074189627e-06, "loss": 0.5185, "step": 4347 }, { "epoch": 0.7535365351703819, "grad_norm": 2.638824775237816, "learning_rate": 1.5104110474230666e-06, "loss": 0.5684, "step": 4348 }, { "epoch": 0.7537098416412122, "grad_norm": 2.4953681875639018, "learning_rate": 1.5084012872395288e-06, "loss": 0.4897, "step": 4349 }, { "epoch": 0.7538831481120426, "grad_norm": 4.446120609685949, "learning_rate": 1.5063926275017653e-06, "loss": 0.4987, "step": 4350 }, { "epoch": 0.754056454582873, "grad_norm": 2.521516956897535, "learning_rate": 1.5043850688428429e-06, "loss": 0.5801, "step": 4351 }, { "epoch": 0.7542297610537033, "grad_norm": 2.348925363775501, "learning_rate": 1.5023786118954869e-06, "loss": 0.5894, "step": 4352 }, { "epoch": 0.7544030675245337, "grad_norm": 2.4547975135304005, "learning_rate": 1.5003732572920688e-06, "loss": 0.575, "step": 4353 }, { "epoch": 0.754576373995364, "grad_norm": 3.049715380305976, "learning_rate": 1.4983690056646162e-06, "loss": 0.6302, "step": 4354 }, { "epoch": 0.7547496804661944, "grad_norm": 3.3039129204969457, "learning_rate": 1.4963658576448082e-06, "loss": 0.5151, "step": 4355 }, { "epoch": 0.7549229869370248, "grad_norm": 3.6015304983604515, "learning_rate": 1.4943638138639772e-06, "loss": 0.4852, "step": 4356 }, { "epoch": 0.7550962934078551, "grad_norm": 3.7107859283686806, "learning_rate": 1.4923628749531038e-06, "loss": 0.492, "step": 4357 }, { "epoch": 0.7552695998786855, "grad_norm": 2.300012212899236, "learning_rate": 1.4903630415428245e-06, "loss": 0.5408, "step": 4358 }, { "epoch": 0.7554429063495158, "grad_norm": 2.4390938105562854, "learning_rate": 1.4883643142634268e-06, "loss": 0.5265, "step": 4359 }, { "epoch": 0.7556162128203462, "grad_norm": 2.479924535194977, "learning_rate": 1.4863666937448474e-06, "loss": 0.548, "step": 4360 }, { "epoch": 0.7557895192911765, "grad_norm": 2.4683969787299866, "learning_rate": 1.4843701806166766e-06, "loss": 0.5723, "step": 4361 }, { "epoch": 0.7559628257620069, "grad_norm": 2.478741869962021, "learning_rate": 1.4823747755081546e-06, "loss": 0.523, "step": 4362 }, { "epoch": 0.7561361322328373, "grad_norm": 2.883638006340693, "learning_rate": 1.4803804790481719e-06, "loss": 0.5205, "step": 4363 }, { "epoch": 0.7563094387036676, "grad_norm": 2.6504952209547983, "learning_rate": 1.478387291865271e-06, "loss": 0.5817, "step": 4364 }, { "epoch": 0.756482745174498, "grad_norm": 2.214311636857132, "learning_rate": 1.4763952145876426e-06, "loss": 0.5657, "step": 4365 }, { "epoch": 0.7566560516453283, "grad_norm": 2.8105249975500306, "learning_rate": 1.4744042478431297e-06, "loss": 0.5075, "step": 4366 }, { "epoch": 0.7568293581161587, "grad_norm": 2.2471654801714167, "learning_rate": 1.4724143922592249e-06, "loss": 0.4532, "step": 4367 }, { "epoch": 0.7570026645869891, "grad_norm": 2.4311962242984357, "learning_rate": 1.4704256484630691e-06, "loss": 0.569, "step": 4368 }, { "epoch": 0.7571759710578194, "grad_norm": 2.893971229596221, "learning_rate": 1.4684380170814549e-06, "loss": 0.5064, "step": 4369 }, { "epoch": 0.7573492775286498, "grad_norm": 2.874524023847146, "learning_rate": 1.4664514987408223e-06, "loss": 0.5387, "step": 4370 }, { "epoch": 0.75752258399948, "grad_norm": 2.300717634001409, "learning_rate": 1.4644660940672628e-06, "loss": 0.5376, "step": 4371 }, { "epoch": 0.7576958904703104, "grad_norm": 2.1539970321230073, "learning_rate": 1.4624818036865146e-06, "loss": 0.475, "step": 4372 }, { "epoch": 0.7578691969411407, "grad_norm": 3.663203185003877, "learning_rate": 1.4604986282239658e-06, "loss": 0.5015, "step": 4373 }, { "epoch": 0.7580425034119711, "grad_norm": 2.803372350259602, "learning_rate": 1.4585165683046537e-06, "loss": 0.6042, "step": 4374 }, { "epoch": 0.7582158098828015, "grad_norm": 2.2191816387967673, "learning_rate": 1.4565356245532624e-06, "loss": 0.5265, "step": 4375 }, { "epoch": 0.7583891163536318, "grad_norm": 2.9521046790850067, "learning_rate": 1.4545557975941255e-06, "loss": 0.5713, "step": 4376 }, { "epoch": 0.7585624228244622, "grad_norm": 2.8082721221326628, "learning_rate": 1.4525770880512247e-06, "loss": 0.5995, "step": 4377 }, { "epoch": 0.7587357292952925, "grad_norm": 3.0693619281394438, "learning_rate": 1.4505994965481885e-06, "loss": 0.6343, "step": 4378 }, { "epoch": 0.7589090357661229, "grad_norm": 2.5483100905216407, "learning_rate": 1.4486230237082944e-06, "loss": 0.5535, "step": 4379 }, { "epoch": 0.7590823422369533, "grad_norm": 2.3910086720565475, "learning_rate": 1.4466476701544657e-06, "loss": 0.5374, "step": 4380 }, { "epoch": 0.7592556487077836, "grad_norm": 2.5130645884303813, "learning_rate": 1.444673436509274e-06, "loss": 0.5954, "step": 4381 }, { "epoch": 0.759428955178614, "grad_norm": 2.4511605811112456, "learning_rate": 1.4427003233949378e-06, "loss": 0.5104, "step": 4382 }, { "epoch": 0.7596022616494443, "grad_norm": 3.8029251898907614, "learning_rate": 1.4407283314333226e-06, "loss": 0.5799, "step": 4383 }, { "epoch": 0.7597755681202747, "grad_norm": 3.309782578960591, "learning_rate": 1.4387574612459403e-06, "loss": 0.5622, "step": 4384 }, { "epoch": 0.759948874591105, "grad_norm": 2.0267181988391463, "learning_rate": 1.4367877134539488e-06, "loss": 0.5414, "step": 4385 }, { "epoch": 0.7601221810619354, "grad_norm": 4.858117240237391, "learning_rate": 1.4348190886781532e-06, "loss": 0.544, "step": 4386 }, { "epoch": 0.7602954875327658, "grad_norm": 2.7751146749047373, "learning_rate": 1.432851587539006e-06, "loss": 0.5687, "step": 4387 }, { "epoch": 0.7604687940035961, "grad_norm": 2.861608693176261, "learning_rate": 1.4308852106565997e-06, "loss": 0.5588, "step": 4388 }, { "epoch": 0.7606421004744265, "grad_norm": 2.4892364163780556, "learning_rate": 1.4289199586506786e-06, "loss": 0.5395, "step": 4389 }, { "epoch": 0.7608154069452568, "grad_norm": 2.4258626856571373, "learning_rate": 1.4269558321406307e-06, "loss": 0.4542, "step": 4390 }, { "epoch": 0.7609887134160872, "grad_norm": 2.4231415900491258, "learning_rate": 1.4249928317454886e-06, "loss": 0.5344, "step": 4391 }, { "epoch": 0.7611620198869176, "grad_norm": 2.5595433595112813, "learning_rate": 1.4230309580839291e-06, "loss": 0.4833, "step": 4392 }, { "epoch": 0.7613353263577479, "grad_norm": 2.326563203546985, "learning_rate": 1.4210702117742776e-06, "loss": 0.4361, "step": 4393 }, { "epoch": 0.7615086328285783, "grad_norm": 3.650298388425376, "learning_rate": 1.4191105934345007e-06, "loss": 0.5556, "step": 4394 }, { "epoch": 0.7616819392994086, "grad_norm": 3.212806240960658, "learning_rate": 1.4171521036822107e-06, "loss": 0.4408, "step": 4395 }, { "epoch": 0.761855245770239, "grad_norm": 2.852330710878486, "learning_rate": 1.4151947431346635e-06, "loss": 0.4544, "step": 4396 }, { "epoch": 0.7620285522410692, "grad_norm": 2.2268664523385797, "learning_rate": 1.4132385124087616e-06, "loss": 0.5116, "step": 4397 }, { "epoch": 0.7622018587118996, "grad_norm": 2.6843378086946625, "learning_rate": 1.4112834121210462e-06, "loss": 0.5088, "step": 4398 }, { "epoch": 0.76237516518273, "grad_norm": 2.5437232472380873, "learning_rate": 1.409329442887707e-06, "loss": 0.4855, "step": 4399 }, { "epoch": 0.7625484716535603, "grad_norm": 2.4072960471502673, "learning_rate": 1.4073766053245752e-06, "loss": 0.5449, "step": 4400 }, { "epoch": 0.7627217781243907, "grad_norm": 2.299512200662011, "learning_rate": 1.405424900047127e-06, "loss": 0.5585, "step": 4401 }, { "epoch": 0.762895084595221, "grad_norm": 2.7389399862216526, "learning_rate": 1.4034743276704799e-06, "loss": 0.5434, "step": 4402 }, { "epoch": 0.7630683910660514, "grad_norm": 4.054306037663646, "learning_rate": 1.4015248888093952e-06, "loss": 0.5661, "step": 4403 }, { "epoch": 0.7632416975368818, "grad_norm": 2.3144716144039967, "learning_rate": 1.3995765840782765e-06, "loss": 0.5142, "step": 4404 }, { "epoch": 0.7634150040077121, "grad_norm": 2.2481818175995527, "learning_rate": 1.3976294140911701e-06, "loss": 0.4863, "step": 4405 }, { "epoch": 0.7635883104785425, "grad_norm": 2.384330480702454, "learning_rate": 1.3956833794617653e-06, "loss": 0.4976, "step": 4406 }, { "epoch": 0.7637616169493728, "grad_norm": 2.290191204115748, "learning_rate": 1.393738480803391e-06, "loss": 0.6102, "step": 4407 }, { "epoch": 0.7639349234202032, "grad_norm": 12.098117200350377, "learning_rate": 1.3917947187290254e-06, "loss": 0.562, "step": 4408 }, { "epoch": 0.7641082298910336, "grad_norm": 4.548808891253266, "learning_rate": 1.3898520938512783e-06, "loss": 0.5193, "step": 4409 }, { "epoch": 0.7642815363618639, "grad_norm": 2.3410376633496925, "learning_rate": 1.387910606782407e-06, "loss": 0.4518, "step": 4410 }, { "epoch": 0.7644548428326943, "grad_norm": 2.182912712428498, "learning_rate": 1.3859702581343105e-06, "loss": 0.5546, "step": 4411 }, { "epoch": 0.7646281493035246, "grad_norm": 2.5956405269865908, "learning_rate": 1.384031048518526e-06, "loss": 0.5749, "step": 4412 }, { "epoch": 0.764801455774355, "grad_norm": 2.8291991894773703, "learning_rate": 1.382092978546235e-06, "loss": 0.5018, "step": 4413 }, { "epoch": 0.7649747622451853, "grad_norm": 2.36467522895969, "learning_rate": 1.3801560488282574e-06, "loss": 0.5461, "step": 4414 }, { "epoch": 0.7651480687160157, "grad_norm": 3.983965780162436, "learning_rate": 1.3782202599750545e-06, "loss": 0.457, "step": 4415 }, { "epoch": 0.7653213751868461, "grad_norm": 3.7515504643107342, "learning_rate": 1.3762856125967283e-06, "loss": 0.5926, "step": 4416 }, { "epoch": 0.7654946816576764, "grad_norm": 2.651581853072539, "learning_rate": 1.3743521073030209e-06, "loss": 0.4709, "step": 4417 }, { "epoch": 0.7656679881285068, "grad_norm": 3.1153163514052102, "learning_rate": 1.3724197447033144e-06, "loss": 0.5088, "step": 4418 }, { "epoch": 0.7658412945993371, "grad_norm": 2.6910857462885183, "learning_rate": 1.3704885254066303e-06, "loss": 0.5166, "step": 4419 }, { "epoch": 0.7660146010701675, "grad_norm": 1.917250436793681, "learning_rate": 1.3685584500216315e-06, "loss": 0.5649, "step": 4420 }, { "epoch": 0.7661879075409979, "grad_norm": 2.6519542739814463, "learning_rate": 1.3666295191566177e-06, "loss": 0.6105, "step": 4421 }, { "epoch": 0.7663612140118282, "grad_norm": 2.6247397868012805, "learning_rate": 1.3647017334195296e-06, "loss": 0.5489, "step": 4422 }, { "epoch": 0.7665345204826586, "grad_norm": 2.528985502894174, "learning_rate": 1.3627750934179473e-06, "loss": 0.5459, "step": 4423 }, { "epoch": 0.7667078269534888, "grad_norm": 2.0745928595402314, "learning_rate": 1.360849599759088e-06, "loss": 0.5047, "step": 4424 }, { "epoch": 0.7668811334243192, "grad_norm": 2.244878502599101, "learning_rate": 1.3589252530498105e-06, "loss": 0.5667, "step": 4425 }, { "epoch": 0.7670544398951495, "grad_norm": 5.266709455084357, "learning_rate": 1.3570020538966083e-06, "loss": 0.4957, "step": 4426 }, { "epoch": 0.7672277463659799, "grad_norm": 2.206245144576036, "learning_rate": 1.3550800029056172e-06, "loss": 0.4447, "step": 4427 }, { "epoch": 0.7674010528368103, "grad_norm": 2.776652685976526, "learning_rate": 1.3531591006826096e-06, "loss": 0.4767, "step": 4428 }, { "epoch": 0.7675743593076406, "grad_norm": 2.376106054204589, "learning_rate": 1.3512393478329927e-06, "loss": 0.6027, "step": 4429 }, { "epoch": 0.767747665778471, "grad_norm": 2.422858360003444, "learning_rate": 1.3493207449618163e-06, "loss": 0.575, "step": 4430 }, { "epoch": 0.7679209722493013, "grad_norm": 3.8938697406364646, "learning_rate": 1.3474032926737646e-06, "loss": 0.4861, "step": 4431 }, { "epoch": 0.7680942787201317, "grad_norm": 2.603280694695639, "learning_rate": 1.3454869915731595e-06, "loss": 0.5489, "step": 4432 }, { "epoch": 0.768267585190962, "grad_norm": 2.975692447798805, "learning_rate": 1.3435718422639643e-06, "loss": 0.5724, "step": 4433 }, { "epoch": 0.7684408916617924, "grad_norm": 2.327705461144507, "learning_rate": 1.3416578453497737e-06, "loss": 0.5235, "step": 4434 }, { "epoch": 0.7686141981326228, "grad_norm": 3.040389434549842, "learning_rate": 1.339745001433821e-06, "loss": 0.555, "step": 4435 }, { "epoch": 0.7687875046034531, "grad_norm": 2.7347597391879086, "learning_rate": 1.3378333111189774e-06, "loss": 0.5377, "step": 4436 }, { "epoch": 0.7689608110742835, "grad_norm": 2.281963649040847, "learning_rate": 1.3359227750077486e-06, "loss": 0.5313, "step": 4437 }, { "epoch": 0.7691341175451138, "grad_norm": 2.772449953048686, "learning_rate": 1.3340133937022797e-06, "loss": 0.4937, "step": 4438 }, { "epoch": 0.7693074240159442, "grad_norm": 2.446609923758582, "learning_rate": 1.3321051678043466e-06, "loss": 0.6409, "step": 4439 }, { "epoch": 0.7694807304867746, "grad_norm": 2.7148580752195017, "learning_rate": 1.3301980979153656e-06, "loss": 0.5262, "step": 4440 }, { "epoch": 0.7696540369576049, "grad_norm": 3.3075661478112863, "learning_rate": 1.3282921846363867e-06, "loss": 0.5572, "step": 4441 }, { "epoch": 0.7698273434284353, "grad_norm": 2.3677432270274745, "learning_rate": 1.3263874285680962e-06, "loss": 0.5347, "step": 4442 }, { "epoch": 0.7700006498992656, "grad_norm": 2.4334978009993407, "learning_rate": 1.324483830310815e-06, "loss": 0.4582, "step": 4443 }, { "epoch": 0.770173956370096, "grad_norm": 4.050243492864091, "learning_rate": 1.3225813904644996e-06, "loss": 0.5631, "step": 4444 }, { "epoch": 0.7703472628409264, "grad_norm": 3.8709038437105643, "learning_rate": 1.3206801096287414e-06, "loss": 0.5359, "step": 4445 }, { "epoch": 0.7705205693117567, "grad_norm": 1.9225343035643285, "learning_rate": 1.3187799884027663e-06, "loss": 0.4881, "step": 4446 }, { "epoch": 0.7706938757825871, "grad_norm": 2.309781147275957, "learning_rate": 1.3168810273854332e-06, "loss": 0.4882, "step": 4447 }, { "epoch": 0.7708671822534174, "grad_norm": 2.5176851642755085, "learning_rate": 1.3149832271752394e-06, "loss": 0.4787, "step": 4448 }, { "epoch": 0.7710404887242478, "grad_norm": 2.427206985310478, "learning_rate": 1.3130865883703148e-06, "loss": 0.6084, "step": 4449 }, { "epoch": 0.771213795195078, "grad_norm": 5.673414624786523, "learning_rate": 1.3111911115684185e-06, "loss": 0.5485, "step": 4450 }, { "epoch": 0.7713871016659084, "grad_norm": 2.492698966031328, "learning_rate": 1.3092967973669485e-06, "loss": 0.4712, "step": 4451 }, { "epoch": 0.7715604081367388, "grad_norm": 2.5097359968717763, "learning_rate": 1.307403646362936e-06, "loss": 0.5617, "step": 4452 }, { "epoch": 0.7717337146075691, "grad_norm": 3.4034221226488475, "learning_rate": 1.3055116591530437e-06, "loss": 0.5903, "step": 4453 }, { "epoch": 0.7719070210783995, "grad_norm": 2.2403629800414913, "learning_rate": 1.3036208363335689e-06, "loss": 0.5261, "step": 4454 }, { "epoch": 0.7720803275492298, "grad_norm": 2.5861957083474336, "learning_rate": 1.3017311785004417e-06, "loss": 0.552, "step": 4455 }, { "epoch": 0.7722536340200602, "grad_norm": 2.9578191924989103, "learning_rate": 1.2998426862492247e-06, "loss": 0.5801, "step": 4456 }, { "epoch": 0.7724269404908906, "grad_norm": 6.130243097242575, "learning_rate": 1.2979553601751128e-06, "loss": 0.4366, "step": 4457 }, { "epoch": 0.7726002469617209, "grad_norm": 2.467998290091891, "learning_rate": 1.2960692008729336e-06, "loss": 0.5536, "step": 4458 }, { "epoch": 0.7727735534325513, "grad_norm": 3.277736396140317, "learning_rate": 1.294184208937148e-06, "loss": 0.5282, "step": 4459 }, { "epoch": 0.7729468599033816, "grad_norm": 2.084544745913686, "learning_rate": 1.2923003849618483e-06, "loss": 0.5179, "step": 4460 }, { "epoch": 0.773120166374212, "grad_norm": 1.8864201447248723, "learning_rate": 1.2904177295407577e-06, "loss": 0.5204, "step": 4461 }, { "epoch": 0.7732934728450424, "grad_norm": 2.5234569240387583, "learning_rate": 1.2885362432672322e-06, "loss": 0.4415, "step": 4462 }, { "epoch": 0.7734667793158727, "grad_norm": 2.472696230638982, "learning_rate": 1.2866559267342593e-06, "loss": 0.4644, "step": 4463 }, { "epoch": 0.7736400857867031, "grad_norm": 2.5080829949884436, "learning_rate": 1.284776780534458e-06, "loss": 0.6312, "step": 4464 }, { "epoch": 0.7738133922575334, "grad_norm": 2.6076209083189585, "learning_rate": 1.282898805260077e-06, "loss": 0.4672, "step": 4465 }, { "epoch": 0.7739866987283638, "grad_norm": 2.719324480754652, "learning_rate": 1.2810220015029978e-06, "loss": 0.5225, "step": 4466 }, { "epoch": 0.7741600051991941, "grad_norm": 2.9601702938019514, "learning_rate": 1.2791463698547318e-06, "loss": 0.5104, "step": 4467 }, { "epoch": 0.7743333116700245, "grad_norm": 2.3962915416852173, "learning_rate": 1.2772719109064208e-06, "loss": 0.5426, "step": 4468 }, { "epoch": 0.7745066181408549, "grad_norm": 2.881945222261984, "learning_rate": 1.2753986252488387e-06, "loss": 0.5452, "step": 4469 }, { "epoch": 0.7746799246116852, "grad_norm": 2.86328099222721, "learning_rate": 1.2735265134723861e-06, "loss": 0.5487, "step": 4470 }, { "epoch": 0.7748532310825156, "grad_norm": 2.79894315734507, "learning_rate": 1.2716555761670961e-06, "loss": 0.5847, "step": 4471 }, { "epoch": 0.7750265375533459, "grad_norm": 3.0915201567900823, "learning_rate": 1.2697858139226315e-06, "loss": 0.5324, "step": 4472 }, { "epoch": 0.7751998440241763, "grad_norm": 3.295361315959031, "learning_rate": 1.2679172273282842e-06, "loss": 0.4585, "step": 4473 }, { "epoch": 0.7753731504950067, "grad_norm": 2.3559696318021808, "learning_rate": 1.2660498169729773e-06, "loss": 0.4379, "step": 4474 }, { "epoch": 0.775546456965837, "grad_norm": 2.04158186599003, "learning_rate": 1.264183583445261e-06, "loss": 0.5776, "step": 4475 }, { "epoch": 0.7757197634366673, "grad_norm": 3.191566031686204, "learning_rate": 1.262318527333315e-06, "loss": 0.4262, "step": 4476 }, { "epoch": 0.7758930699074976, "grad_norm": 2.788579557663582, "learning_rate": 1.2604546492249486e-06, "loss": 0.539, "step": 4477 }, { "epoch": 0.776066376378328, "grad_norm": 2.076953756984357, "learning_rate": 1.2585919497076e-06, "loss": 0.4944, "step": 4478 }, { "epoch": 0.7762396828491583, "grad_norm": 3.097830838736614, "learning_rate": 1.2567304293683347e-06, "loss": 0.5998, "step": 4479 }, { "epoch": 0.7764129893199887, "grad_norm": 1.9237228661807837, "learning_rate": 1.2548700887938492e-06, "loss": 0.5059, "step": 4480 }, { "epoch": 0.7765862957908191, "grad_norm": 2.2265872371326747, "learning_rate": 1.2530109285704629e-06, "loss": 0.5744, "step": 4481 }, { "epoch": 0.7767596022616494, "grad_norm": 2.6737007268420254, "learning_rate": 1.251152949284129e-06, "loss": 0.4711, "step": 4482 }, { "epoch": 0.7769329087324798, "grad_norm": 7.764057904919486, "learning_rate": 1.2492961515204248e-06, "loss": 0.6011, "step": 4483 }, { "epoch": 0.7771062152033101, "grad_norm": 2.0644873992072332, "learning_rate": 1.2474405358645575e-06, "loss": 0.4943, "step": 4484 }, { "epoch": 0.7772795216741405, "grad_norm": 2.626000728577371, "learning_rate": 1.2455861029013605e-06, "loss": 0.5419, "step": 4485 }, { "epoch": 0.7774528281449709, "grad_norm": 2.6087663833244226, "learning_rate": 1.2437328532152943e-06, "loss": 0.5934, "step": 4486 }, { "epoch": 0.7776261346158012, "grad_norm": 2.2843333808269475, "learning_rate": 1.2418807873904477e-06, "loss": 0.5561, "step": 4487 }, { "epoch": 0.7777994410866316, "grad_norm": 2.139994641736658, "learning_rate": 1.2400299060105331e-06, "loss": 0.493, "step": 4488 }, { "epoch": 0.7779727475574619, "grad_norm": 1.9507228521005466, "learning_rate": 1.2381802096588957e-06, "loss": 0.5182, "step": 4489 }, { "epoch": 0.7781460540282923, "grad_norm": 2.2485863890214817, "learning_rate": 1.2363316989185032e-06, "loss": 0.5017, "step": 4490 }, { "epoch": 0.7783193604991226, "grad_norm": 20.814311351513062, "learning_rate": 1.234484374371947e-06, "loss": 0.6349, "step": 4491 }, { "epoch": 0.778492666969953, "grad_norm": 2.125601631073671, "learning_rate": 1.23263823660145e-06, "loss": 0.4717, "step": 4492 }, { "epoch": 0.7786659734407834, "grad_norm": 2.290906352835824, "learning_rate": 1.2307932861888571e-06, "loss": 0.5849, "step": 4493 }, { "epoch": 0.7788392799116137, "grad_norm": 2.2794289530615943, "learning_rate": 1.2289495237156418e-06, "loss": 0.5926, "step": 4494 }, { "epoch": 0.7790125863824441, "grad_norm": 2.3013218700332567, "learning_rate": 1.2271069497629018e-06, "loss": 0.5738, "step": 4495 }, { "epoch": 0.7791858928532744, "grad_norm": 2.776335275338044, "learning_rate": 1.2252655649113604e-06, "loss": 0.4825, "step": 4496 }, { "epoch": 0.7793591993241048, "grad_norm": 2.2745677695503046, "learning_rate": 1.2234253697413656e-06, "loss": 0.549, "step": 4497 }, { "epoch": 0.7795325057949352, "grad_norm": 2.2358749008024628, "learning_rate": 1.221586364832892e-06, "loss": 0.5125, "step": 4498 }, { "epoch": 0.7797058122657655, "grad_norm": 3.217977534620548, "learning_rate": 1.2197485507655366e-06, "loss": 0.5206, "step": 4499 }, { "epoch": 0.7798791187365959, "grad_norm": 2.3162054298458434, "learning_rate": 1.2179119281185237e-06, "loss": 0.5689, "step": 4500 }, { "epoch": 0.7800524252074262, "grad_norm": 2.2686448944567803, "learning_rate": 1.2160764974707007e-06, "loss": 0.5168, "step": 4501 }, { "epoch": 0.7802257316782565, "grad_norm": 2.316948198816842, "learning_rate": 1.2142422594005388e-06, "loss": 0.5401, "step": 4502 }, { "epoch": 0.7803990381490868, "grad_norm": 2.565184705252825, "learning_rate": 1.2124092144861355e-06, "loss": 0.5367, "step": 4503 }, { "epoch": 0.7805723446199172, "grad_norm": 3.04211688112069, "learning_rate": 1.2105773633052093e-06, "loss": 0.63, "step": 4504 }, { "epoch": 0.7807456510907476, "grad_norm": 3.050289984731792, "learning_rate": 1.208746706435105e-06, "loss": 0.6052, "step": 4505 }, { "epoch": 0.7809189575615779, "grad_norm": 2.3006893532030492, "learning_rate": 1.2069172444527898e-06, "loss": 0.5145, "step": 4506 }, { "epoch": 0.7810922640324083, "grad_norm": 2.5212873578492667, "learning_rate": 1.2050889779348546e-06, "loss": 0.5222, "step": 4507 }, { "epoch": 0.7812655705032386, "grad_norm": 2.3657357493240205, "learning_rate": 1.203261907457513e-06, "loss": 0.5871, "step": 4508 }, { "epoch": 0.781438876974069, "grad_norm": 2.1899625887085774, "learning_rate": 1.2014360335966024e-06, "loss": 0.5851, "step": 4509 }, { "epoch": 0.7816121834448994, "grad_norm": 5.683910257893933, "learning_rate": 1.1996113569275841e-06, "loss": 0.6185, "step": 4510 }, { "epoch": 0.7817854899157297, "grad_norm": 2.216838601716371, "learning_rate": 1.1977878780255381e-06, "loss": 0.4466, "step": 4511 }, { "epoch": 0.7819587963865601, "grad_norm": 2.4205475677167523, "learning_rate": 1.19596559746517e-06, "loss": 0.6075, "step": 4512 }, { "epoch": 0.7821321028573904, "grad_norm": 2.7916072182405993, "learning_rate": 1.1941445158208087e-06, "loss": 0.5915, "step": 4513 }, { "epoch": 0.7823054093282208, "grad_norm": 2.2279015320517157, "learning_rate": 1.1923246336664014e-06, "loss": 0.4938, "step": 4514 }, { "epoch": 0.7824787157990511, "grad_norm": 2.746307225108683, "learning_rate": 1.1905059515755224e-06, "loss": 0.4879, "step": 4515 }, { "epoch": 0.7826520222698815, "grad_norm": 2.3290053970045173, "learning_rate": 1.1886884701213642e-06, "loss": 0.4961, "step": 4516 }, { "epoch": 0.7828253287407119, "grad_norm": 2.1053367703428396, "learning_rate": 1.186872189876741e-06, "loss": 0.56, "step": 4517 }, { "epoch": 0.7829986352115422, "grad_norm": 2.501232395355261, "learning_rate": 1.1850571114140897e-06, "loss": 0.6071, "step": 4518 }, { "epoch": 0.7831719416823726, "grad_norm": 3.1939670294098006, "learning_rate": 1.183243235305468e-06, "loss": 0.6109, "step": 4519 }, { "epoch": 0.7833452481532029, "grad_norm": 2.247499303337849, "learning_rate": 1.1814305621225535e-06, "loss": 0.533, "step": 4520 }, { "epoch": 0.7835185546240333, "grad_norm": 2.664861342147532, "learning_rate": 1.179619092436648e-06, "loss": 0.5116, "step": 4521 }, { "epoch": 0.7836918610948637, "grad_norm": 2.5902290470736067, "learning_rate": 1.1778088268186688e-06, "loss": 0.5271, "step": 4522 }, { "epoch": 0.783865167565694, "grad_norm": 2.1060565452260662, "learning_rate": 1.1759997658391576e-06, "loss": 0.4715, "step": 4523 }, { "epoch": 0.7840384740365244, "grad_norm": 2.330312734968396, "learning_rate": 1.1741919100682757e-06, "loss": 0.5093, "step": 4524 }, { "epoch": 0.7842117805073547, "grad_norm": 2.112397121945849, "learning_rate": 1.172385260075804e-06, "loss": 0.4765, "step": 4525 }, { "epoch": 0.7843850869781851, "grad_norm": 2.149420401665689, "learning_rate": 1.1705798164311443e-06, "loss": 0.4329, "step": 4526 }, { "epoch": 0.7845583934490155, "grad_norm": 2.6193982419747766, "learning_rate": 1.1687755797033168e-06, "loss": 0.5242, "step": 4527 }, { "epoch": 0.7847316999198457, "grad_norm": 2.154816237759585, "learning_rate": 1.1669725504609614e-06, "loss": 0.5442, "step": 4528 }, { "epoch": 0.7849050063906761, "grad_norm": 5.226847010879825, "learning_rate": 1.1651707292723403e-06, "loss": 0.6155, "step": 4529 }, { "epoch": 0.7850783128615064, "grad_norm": 2.8370338608679058, "learning_rate": 1.1633701167053313e-06, "loss": 0.5617, "step": 4530 }, { "epoch": 0.7852516193323368, "grad_norm": 2.6740266878672974, "learning_rate": 1.1615707133274345e-06, "loss": 0.4955, "step": 4531 }, { "epoch": 0.7854249258031671, "grad_norm": 2.014967567255627, "learning_rate": 1.1597725197057646e-06, "loss": 0.4638, "step": 4532 }, { "epoch": 0.7855982322739975, "grad_norm": 2.1957147227748566, "learning_rate": 1.1579755364070584e-06, "loss": 0.5002, "step": 4533 }, { "epoch": 0.7857715387448279, "grad_norm": 2.3693994354686896, "learning_rate": 1.1561797639976708e-06, "loss": 0.5028, "step": 4534 }, { "epoch": 0.7859448452156582, "grad_norm": 2.37985269919978, "learning_rate": 1.1543852030435743e-06, "loss": 0.5995, "step": 4535 }, { "epoch": 0.7861181516864886, "grad_norm": 2.17268097917035, "learning_rate": 1.1525918541103598e-06, "loss": 0.5396, "step": 4536 }, { "epoch": 0.7862914581573189, "grad_norm": 2.170283355191315, "learning_rate": 1.1507997177632374e-06, "loss": 0.5367, "step": 4537 }, { "epoch": 0.7864647646281493, "grad_norm": 2.075512225761328, "learning_rate": 1.1490087945670331e-06, "loss": 0.4302, "step": 4538 }, { "epoch": 0.7866380710989797, "grad_norm": 2.3031624681059513, "learning_rate": 1.147219085086192e-06, "loss": 0.4705, "step": 4539 }, { "epoch": 0.78681137756981, "grad_norm": 3.9561090856945227, "learning_rate": 1.145430589884775e-06, "loss": 0.6189, "step": 4540 }, { "epoch": 0.7869846840406404, "grad_norm": 2.5079920484267566, "learning_rate": 1.1436433095264632e-06, "loss": 0.5171, "step": 4541 }, { "epoch": 0.7871579905114707, "grad_norm": 4.630460472706522, "learning_rate": 1.1418572445745513e-06, "loss": 0.6318, "step": 4542 }, { "epoch": 0.7873312969823011, "grad_norm": 2.481717938747751, "learning_rate": 1.140072395591954e-06, "loss": 0.5113, "step": 4543 }, { "epoch": 0.7875046034531314, "grad_norm": 2.303103375440085, "learning_rate": 1.1382887631412004e-06, "loss": 0.5568, "step": 4544 }, { "epoch": 0.7876779099239618, "grad_norm": 4.258454461203569, "learning_rate": 1.1365063477844384e-06, "loss": 0.5224, "step": 4545 }, { "epoch": 0.7878512163947922, "grad_norm": 2.151283510466953, "learning_rate": 1.13472515008343e-06, "loss": 0.4758, "step": 4546 }, { "epoch": 0.7880245228656225, "grad_norm": 2.983572846740944, "learning_rate": 1.1329451705995548e-06, "loss": 0.5942, "step": 4547 }, { "epoch": 0.7881978293364529, "grad_norm": 2.104104226398598, "learning_rate": 1.1311664098938092e-06, "loss": 0.5522, "step": 4548 }, { "epoch": 0.7883711358072832, "grad_norm": 4.104203706025022, "learning_rate": 1.1293888685268028e-06, "loss": 0.4881, "step": 4549 }, { "epoch": 0.7885444422781136, "grad_norm": 2.261798336199771, "learning_rate": 1.1276125470587645e-06, "loss": 0.5299, "step": 4550 }, { "epoch": 0.788717748748944, "grad_norm": 1.9266684865838004, "learning_rate": 1.1258374460495353e-06, "loss": 0.5125, "step": 4551 }, { "epoch": 0.7888910552197743, "grad_norm": 4.4104529424626975, "learning_rate": 1.1240635660585752e-06, "loss": 0.5637, "step": 4552 }, { "epoch": 0.7890643616906047, "grad_norm": 2.127879325353357, "learning_rate": 1.1222909076449546e-06, "loss": 0.5, "step": 4553 }, { "epoch": 0.7892376681614349, "grad_norm": 2.469300863394222, "learning_rate": 1.1205194713673606e-06, "loss": 0.6225, "step": 4554 }, { "epoch": 0.7894109746322653, "grad_norm": 2.31484978758659, "learning_rate": 1.1187492577840997e-06, "loss": 0.4359, "step": 4555 }, { "epoch": 0.7895842811030956, "grad_norm": 2.1834868648283017, "learning_rate": 1.1169802674530882e-06, "loss": 0.4816, "step": 4556 }, { "epoch": 0.789757587573926, "grad_norm": 2.3281366822359564, "learning_rate": 1.1152125009318575e-06, "loss": 0.5229, "step": 4557 }, { "epoch": 0.7899308940447564, "grad_norm": 2.7006374001846027, "learning_rate": 1.1134459587775543e-06, "loss": 0.5763, "step": 4558 }, { "epoch": 0.7901042005155867, "grad_norm": 2.7488583172307908, "learning_rate": 1.111680641546939e-06, "loss": 0.5188, "step": 4559 }, { "epoch": 0.7902775069864171, "grad_norm": 2.4502705382032897, "learning_rate": 1.1099165497963848e-06, "loss": 0.5359, "step": 4560 }, { "epoch": 0.7904508134572474, "grad_norm": 2.5914004221602887, "learning_rate": 1.1081536840818814e-06, "loss": 0.5114, "step": 4561 }, { "epoch": 0.7906241199280778, "grad_norm": 2.5278329424035597, "learning_rate": 1.1063920449590309e-06, "loss": 0.5392, "step": 4562 }, { "epoch": 0.7907974263989082, "grad_norm": 2.341570609149303, "learning_rate": 1.1046316329830458e-06, "loss": 0.4887, "step": 4563 }, { "epoch": 0.7909707328697385, "grad_norm": 2.105618532561286, "learning_rate": 1.1028724487087562e-06, "loss": 0.5771, "step": 4564 }, { "epoch": 0.7911440393405689, "grad_norm": 4.703867037124821, "learning_rate": 1.101114492690603e-06, "loss": 0.617, "step": 4565 }, { "epoch": 0.7913173458113992, "grad_norm": 2.4770519406864966, "learning_rate": 1.0993577654826409e-06, "loss": 0.5409, "step": 4566 }, { "epoch": 0.7914906522822296, "grad_norm": 2.3033557685926853, "learning_rate": 1.0976022676385368e-06, "loss": 0.6497, "step": 4567 }, { "epoch": 0.79166395875306, "grad_norm": 2.5637439824925896, "learning_rate": 1.09584799971157e-06, "loss": 0.4762, "step": 4568 }, { "epoch": 0.7918372652238903, "grad_norm": 2.7896631227526236, "learning_rate": 1.094094962254631e-06, "loss": 0.486, "step": 4569 }, { "epoch": 0.7920105716947207, "grad_norm": 2.7847781667448626, "learning_rate": 1.0923431558202274e-06, "loss": 0.5872, "step": 4570 }, { "epoch": 0.792183878165551, "grad_norm": 2.0995388724557555, "learning_rate": 1.0905925809604729e-06, "loss": 0.4696, "step": 4571 }, { "epoch": 0.7923571846363814, "grad_norm": 2.4632333326316878, "learning_rate": 1.088843238227098e-06, "loss": 0.6202, "step": 4572 }, { "epoch": 0.7925304911072117, "grad_norm": 2.6908170265088893, "learning_rate": 1.0870951281714387e-06, "loss": 0.5307, "step": 4573 }, { "epoch": 0.7927037975780421, "grad_norm": 2.8714631337667114, "learning_rate": 1.085348251344448e-06, "loss": 0.5595, "step": 4574 }, { "epoch": 0.7928771040488725, "grad_norm": 2.064721321362122, "learning_rate": 1.0836026082966884e-06, "loss": 0.5021, "step": 4575 }, { "epoch": 0.7930504105197028, "grad_norm": 2.7800387069137664, "learning_rate": 1.0818581995783328e-06, "loss": 0.4646, "step": 4576 }, { "epoch": 0.7932237169905332, "grad_norm": 2.608627962707757, "learning_rate": 1.0801150257391663e-06, "loss": 0.5098, "step": 4577 }, { "epoch": 0.7933970234613635, "grad_norm": 2.611831375420839, "learning_rate": 1.0783730873285846e-06, "loss": 0.5022, "step": 4578 }, { "epoch": 0.7935703299321939, "grad_norm": 2.02776256372137, "learning_rate": 1.076632384895594e-06, "loss": 0.5722, "step": 4579 }, { "epoch": 0.7937436364030241, "grad_norm": 72.08690516144397, "learning_rate": 1.0748929189888098e-06, "loss": 0.4739, "step": 4580 }, { "epoch": 0.7939169428738545, "grad_norm": 2.123376771305252, "learning_rate": 1.0731546901564598e-06, "loss": 0.576, "step": 4581 }, { "epoch": 0.7940902493446849, "grad_norm": 2.2331684644825787, "learning_rate": 1.0714176989463804e-06, "loss": 0.5061, "step": 4582 }, { "epoch": 0.7942635558155152, "grad_norm": 2.439391257257121, "learning_rate": 1.0696819459060188e-06, "loss": 0.5364, "step": 4583 }, { "epoch": 0.7944368622863456, "grad_norm": 2.4991129418151714, "learning_rate": 1.0679474315824306e-06, "loss": 0.5927, "step": 4584 }, { "epoch": 0.7946101687571759, "grad_norm": 2.6261518581676833, "learning_rate": 1.0662141565222834e-06, "loss": 0.5612, "step": 4585 }, { "epoch": 0.7947834752280063, "grad_norm": 2.524673171796535, "learning_rate": 1.064482121271852e-06, "loss": 0.5355, "step": 4586 }, { "epoch": 0.7949567816988367, "grad_norm": 2.321651377888359, "learning_rate": 1.0627513263770216e-06, "loss": 0.5284, "step": 4587 }, { "epoch": 0.795130088169667, "grad_norm": 5.354399342445472, "learning_rate": 1.0610217723832855e-06, "loss": 0.5795, "step": 4588 }, { "epoch": 0.7953033946404974, "grad_norm": 2.6896634338832013, "learning_rate": 1.0592934598357474e-06, "loss": 0.4642, "step": 4589 }, { "epoch": 0.7954767011113277, "grad_norm": 9.90607485846706, "learning_rate": 1.0575663892791183e-06, "loss": 0.4999, "step": 4590 }, { "epoch": 0.7956500075821581, "grad_norm": 2.745989184217429, "learning_rate": 1.0558405612577189e-06, "loss": 0.5641, "step": 4591 }, { "epoch": 0.7958233140529885, "grad_norm": 3.005910463232164, "learning_rate": 1.0541159763154767e-06, "loss": 0.5609, "step": 4592 }, { "epoch": 0.7959966205238188, "grad_norm": 2.3572145624674565, "learning_rate": 1.0523926349959308e-06, "loss": 0.5218, "step": 4593 }, { "epoch": 0.7961699269946492, "grad_norm": 2.055321388850212, "learning_rate": 1.0506705378422228e-06, "loss": 0.5259, "step": 4594 }, { "epoch": 0.7963432334654795, "grad_norm": 2.4187995805105005, "learning_rate": 1.0489496853971053e-06, "loss": 0.5714, "step": 4595 }, { "epoch": 0.7965165399363099, "grad_norm": 2.17257720712087, "learning_rate": 1.047230078202942e-06, "loss": 0.4949, "step": 4596 }, { "epoch": 0.7966898464071402, "grad_norm": 5.493775602809867, "learning_rate": 1.0455117168016992e-06, "loss": 0.4559, "step": 4597 }, { "epoch": 0.7968631528779706, "grad_norm": 2.593721469161726, "learning_rate": 1.0437946017349515e-06, "loss": 0.5468, "step": 4598 }, { "epoch": 0.797036459348801, "grad_norm": 2.6678764161343005, "learning_rate": 1.0420787335438826e-06, "loss": 0.6366, "step": 4599 }, { "epoch": 0.7972097658196313, "grad_norm": 2.995551958745012, "learning_rate": 1.0403641127692805e-06, "loss": 0.6177, "step": 4600 }, { "epoch": 0.7973830722904617, "grad_norm": 2.31085940351815, "learning_rate": 1.0386507399515427e-06, "loss": 0.4454, "step": 4601 }, { "epoch": 0.797556378761292, "grad_norm": 2.866864869879818, "learning_rate": 1.0369386156306721e-06, "loss": 0.5711, "step": 4602 }, { "epoch": 0.7977296852321224, "grad_norm": 2.2810151886624555, "learning_rate": 1.035227740346279e-06, "loss": 0.4639, "step": 4603 }, { "epoch": 0.7979029917029528, "grad_norm": 2.6044504479982087, "learning_rate": 1.0335181146375767e-06, "loss": 0.5355, "step": 4604 }, { "epoch": 0.7980762981737831, "grad_norm": 5.70756458092418, "learning_rate": 1.0318097390433896e-06, "loss": 0.5466, "step": 4605 }, { "epoch": 0.7982496046446134, "grad_norm": 4.587094706097963, "learning_rate": 1.0301026141021442e-06, "loss": 0.6085, "step": 4606 }, { "epoch": 0.7984229111154437, "grad_norm": 3.400774839232875, "learning_rate": 1.0283967403518757e-06, "loss": 0.5578, "step": 4607 }, { "epoch": 0.7985962175862741, "grad_norm": 2.705708653427268, "learning_rate": 1.026692118330223e-06, "loss": 0.5257, "step": 4608 }, { "epoch": 0.7987695240571044, "grad_norm": 2.8446096326155232, "learning_rate": 1.0249887485744315e-06, "loss": 0.4545, "step": 4609 }, { "epoch": 0.7989428305279348, "grad_norm": 3.694302255888456, "learning_rate": 1.0232866316213496e-06, "loss": 0.5362, "step": 4610 }, { "epoch": 0.7991161369987652, "grad_norm": 2.3719628622435427, "learning_rate": 1.0215857680074358e-06, "loss": 0.571, "step": 4611 }, { "epoch": 0.7992894434695955, "grad_norm": 3.4095892478473204, "learning_rate": 1.0198861582687497e-06, "loss": 0.7036, "step": 4612 }, { "epoch": 0.7994627499404259, "grad_norm": 2.5989446548940514, "learning_rate": 1.0181878029409575e-06, "loss": 0.483, "step": 4613 }, { "epoch": 0.7996360564112562, "grad_norm": 2.681968502394571, "learning_rate": 1.0164907025593268e-06, "loss": 0.5778, "step": 4614 }, { "epoch": 0.7998093628820866, "grad_norm": 2.49759436427338, "learning_rate": 1.014794857658733e-06, "loss": 0.5192, "step": 4615 }, { "epoch": 0.799982669352917, "grad_norm": 3.727602822889544, "learning_rate": 1.0131002687736551e-06, "loss": 0.5485, "step": 4616 }, { "epoch": 0.8001559758237473, "grad_norm": 2.91659574702851, "learning_rate": 1.0114069364381762e-06, "loss": 0.6104, "step": 4617 }, { "epoch": 0.8003292822945777, "grad_norm": 2.8305470753551227, "learning_rate": 1.009714861185983e-06, "loss": 0.5324, "step": 4618 }, { "epoch": 0.800502588765408, "grad_norm": 5.408097750045987, "learning_rate": 1.0080240435503658e-06, "loss": 0.5301, "step": 4619 }, { "epoch": 0.8006758952362384, "grad_norm": 2.4815563276135943, "learning_rate": 1.0063344840642197e-06, "loss": 0.5427, "step": 4620 }, { "epoch": 0.8008492017070687, "grad_norm": 2.074695872882027, "learning_rate": 1.0046461832600419e-06, "loss": 0.3875, "step": 4621 }, { "epoch": 0.8010225081778991, "grad_norm": 2.632142434699178, "learning_rate": 1.002959141669933e-06, "loss": 0.5928, "step": 4622 }, { "epoch": 0.8011958146487295, "grad_norm": 4.683846893302902, "learning_rate": 1.0012733598255986e-06, "loss": 0.6127, "step": 4623 }, { "epoch": 0.8013691211195598, "grad_norm": 2.2845738416294443, "learning_rate": 9.995888382583451e-07, "loss": 0.4873, "step": 4624 }, { "epoch": 0.8015424275903902, "grad_norm": 2.3770796011843673, "learning_rate": 9.97905577499082e-07, "loss": 0.4977, "step": 4625 }, { "epoch": 0.8017157340612205, "grad_norm": 2.7215854239214066, "learning_rate": 9.96223578078323e-07, "loss": 0.552, "step": 4626 }, { "epoch": 0.8018890405320509, "grad_norm": 2.1257288636503735, "learning_rate": 9.945428405261825e-07, "loss": 0.5394, "step": 4627 }, { "epoch": 0.8020623470028813, "grad_norm": 2.4178675246693775, "learning_rate": 9.92863365372378e-07, "loss": 0.589, "step": 4628 }, { "epoch": 0.8022356534737116, "grad_norm": 2.852485904461851, "learning_rate": 9.911851531462292e-07, "loss": 0.4611, "step": 4629 }, { "epoch": 0.802408959944542, "grad_norm": 2.745798820683651, "learning_rate": 9.895082043766575e-07, "loss": 0.5377, "step": 4630 }, { "epoch": 0.8025822664153723, "grad_norm": 3.6576542097237574, "learning_rate": 9.878325195921861e-07, "loss": 0.4741, "step": 4631 }, { "epoch": 0.8027555728862026, "grad_norm": 4.303195905652087, "learning_rate": 9.861580993209396e-07, "loss": 0.5236, "step": 4632 }, { "epoch": 0.8029288793570329, "grad_norm": 2.733526332388073, "learning_rate": 9.84484944090645e-07, "loss": 0.5526, "step": 4633 }, { "epoch": 0.8031021858278633, "grad_norm": 2.1385909198688986, "learning_rate": 9.828130544286307e-07, "loss": 0.4998, "step": 4634 }, { "epoch": 0.8032754922986937, "grad_norm": 2.451299772755682, "learning_rate": 9.811424308618223e-07, "loss": 0.5233, "step": 4635 }, { "epoch": 0.803448798769524, "grad_norm": 2.7433818919487307, "learning_rate": 9.794730739167524e-07, "loss": 0.5437, "step": 4636 }, { "epoch": 0.8036221052403544, "grad_norm": 2.8108275757889443, "learning_rate": 9.778049841195513e-07, "loss": 0.5489, "step": 4637 }, { "epoch": 0.8037954117111847, "grad_norm": 3.4742078557873635, "learning_rate": 9.761381619959497e-07, "loss": 0.6541, "step": 4638 }, { "epoch": 0.8039687181820151, "grad_norm": 2.11827227603222, "learning_rate": 9.74472608071279e-07, "loss": 0.3975, "step": 4639 }, { "epoch": 0.8041420246528455, "grad_norm": 4.111556176312691, "learning_rate": 9.728083228704722e-07, "loss": 0.5948, "step": 4640 }, { "epoch": 0.8043153311236758, "grad_norm": 2.559327217285122, "learning_rate": 9.711453069180598e-07, "loss": 0.5683, "step": 4641 }, { "epoch": 0.8044886375945062, "grad_norm": 2.3680408566935505, "learning_rate": 9.694835607381753e-07, "loss": 0.5822, "step": 4642 }, { "epoch": 0.8046619440653365, "grad_norm": 2.291683382370845, "learning_rate": 9.678230848545501e-07, "loss": 0.4653, "step": 4643 }, { "epoch": 0.8048352505361669, "grad_norm": 2.336769306060661, "learning_rate": 9.661638797905171e-07, "loss": 0.5027, "step": 4644 }, { "epoch": 0.8050085570069972, "grad_norm": 2.3275673055604673, "learning_rate": 9.645059460690042e-07, "loss": 0.5646, "step": 4645 }, { "epoch": 0.8051818634778276, "grad_norm": 2.447176681554535, "learning_rate": 9.628492842125436e-07, "loss": 0.5072, "step": 4646 }, { "epoch": 0.805355169948658, "grad_norm": 3.292960246275711, "learning_rate": 9.611938947432647e-07, "loss": 0.5383, "step": 4647 }, { "epoch": 0.8055284764194883, "grad_norm": 2.755995456248534, "learning_rate": 9.595397781828958e-07, "loss": 0.5327, "step": 4648 }, { "epoch": 0.8057017828903187, "grad_norm": 2.8034069381088593, "learning_rate": 9.578869350527641e-07, "loss": 0.6243, "step": 4649 }, { "epoch": 0.805875089361149, "grad_norm": 2.5716277760506077, "learning_rate": 9.562353658737938e-07, "loss": 0.5753, "step": 4650 }, { "epoch": 0.8060483958319794, "grad_norm": 2.967025349511518, "learning_rate": 9.545850711665128e-07, "loss": 0.6343, "step": 4651 }, { "epoch": 0.8062217023028098, "grad_norm": 2.2360346189242017, "learning_rate": 9.529360514510422e-07, "loss": 0.5092, "step": 4652 }, { "epoch": 0.8063950087736401, "grad_norm": 2.59236631103678, "learning_rate": 9.512883072471024e-07, "loss": 0.526, "step": 4653 }, { "epoch": 0.8065683152444705, "grad_norm": 6.075271306802962, "learning_rate": 9.496418390740142e-07, "loss": 0.5897, "step": 4654 }, { "epoch": 0.8067416217153008, "grad_norm": 2.354796283037088, "learning_rate": 9.479966474506913e-07, "loss": 0.565, "step": 4655 }, { "epoch": 0.8069149281861312, "grad_norm": 2.6669032174620906, "learning_rate": 9.463527328956496e-07, "loss": 0.4783, "step": 4656 }, { "epoch": 0.8070882346569616, "grad_norm": 2.6253881623553936, "learning_rate": 9.447100959270011e-07, "loss": 0.5316, "step": 4657 }, { "epoch": 0.8072615411277918, "grad_norm": 2.7774841390584575, "learning_rate": 9.430687370624547e-07, "loss": 0.4958, "step": 4658 }, { "epoch": 0.8074348475986222, "grad_norm": 3.0044720249804855, "learning_rate": 9.41428656819317e-07, "loss": 0.4566, "step": 4659 }, { "epoch": 0.8076081540694525, "grad_norm": 2.450353882697952, "learning_rate": 9.397898557144919e-07, "loss": 0.526, "step": 4660 }, { "epoch": 0.8077814605402829, "grad_norm": 2.460855404268615, "learning_rate": 9.381523342644788e-07, "loss": 0.5592, "step": 4661 }, { "epoch": 0.8079547670111132, "grad_norm": 2.2643286024541855, "learning_rate": 9.36516092985375e-07, "loss": 0.5521, "step": 4662 }, { "epoch": 0.8081280734819436, "grad_norm": 3.9369878544409516, "learning_rate": 9.348811323928747e-07, "loss": 0.5625, "step": 4663 }, { "epoch": 0.808301379952774, "grad_norm": 5.383987899696406, "learning_rate": 9.332474530022668e-07, "loss": 0.5047, "step": 4664 }, { "epoch": 0.8084746864236043, "grad_norm": 3.4433141547708366, "learning_rate": 9.316150553284375e-07, "loss": 0.6111, "step": 4665 }, { "epoch": 0.8086479928944347, "grad_norm": 2.8466444799112507, "learning_rate": 9.299839398858696e-07, "loss": 0.5653, "step": 4666 }, { "epoch": 0.808821299365265, "grad_norm": 2.5062723121014923, "learning_rate": 9.28354107188641e-07, "loss": 0.5934, "step": 4667 }, { "epoch": 0.8089946058360954, "grad_norm": 3.082161969248879, "learning_rate": 9.267255577504247e-07, "loss": 0.5477, "step": 4668 }, { "epoch": 0.8091679123069258, "grad_norm": 2.6626421140610015, "learning_rate": 9.250982920844904e-07, "loss": 0.478, "step": 4669 }, { "epoch": 0.8093412187777561, "grad_norm": 2.565941732555737, "learning_rate": 9.234723107037031e-07, "loss": 0.5177, "step": 4670 }, { "epoch": 0.8095145252485865, "grad_norm": 2.262328134321148, "learning_rate": 9.218476141205218e-07, "loss": 0.5492, "step": 4671 }, { "epoch": 0.8096878317194168, "grad_norm": 2.1506369009195194, "learning_rate": 9.202242028470027e-07, "loss": 0.5572, "step": 4672 }, { "epoch": 0.8098611381902472, "grad_norm": 2.9827918409176832, "learning_rate": 9.186020773947945e-07, "loss": 0.5948, "step": 4673 }, { "epoch": 0.8100344446610775, "grad_norm": 2.200524085131333, "learning_rate": 9.169812382751431e-07, "loss": 0.5338, "step": 4674 }, { "epoch": 0.8102077511319079, "grad_norm": 2.2725801034918742, "learning_rate": 9.153616859988878e-07, "loss": 0.5785, "step": 4675 }, { "epoch": 0.8103810576027383, "grad_norm": 2.5120907688316834, "learning_rate": 9.13743421076459e-07, "loss": 0.5539, "step": 4676 }, { "epoch": 0.8105543640735686, "grad_norm": 2.2413041694135445, "learning_rate": 9.121264440178884e-07, "loss": 0.508, "step": 4677 }, { "epoch": 0.810727670544399, "grad_norm": 2.1061330703682444, "learning_rate": 9.105107553327963e-07, "loss": 0.5419, "step": 4678 }, { "epoch": 0.8109009770152293, "grad_norm": 2.76147824492964, "learning_rate": 9.088963555303998e-07, "loss": 0.5193, "step": 4679 }, { "epoch": 0.8110742834860597, "grad_norm": 2.4908196486206076, "learning_rate": 9.072832451195074e-07, "loss": 0.4843, "step": 4680 }, { "epoch": 0.8112475899568901, "grad_norm": 2.389893632151061, "learning_rate": 9.056714246085224e-07, "loss": 0.4964, "step": 4681 }, { "epoch": 0.8114208964277204, "grad_norm": 2.325123700772912, "learning_rate": 9.04060894505443e-07, "loss": 0.4968, "step": 4682 }, { "epoch": 0.8115942028985508, "grad_norm": 2.3529786058718427, "learning_rate": 9.02451655317858e-07, "loss": 0.5108, "step": 4683 }, { "epoch": 0.811767509369381, "grad_norm": 2.387650565290288, "learning_rate": 9.008437075529513e-07, "loss": 0.5351, "step": 4684 }, { "epoch": 0.8119408158402114, "grad_norm": 2.4976831238999098, "learning_rate": 8.992370517175009e-07, "loss": 0.633, "step": 4685 }, { "epoch": 0.8121141223110417, "grad_norm": 2.431109690136321, "learning_rate": 8.97631688317872e-07, "loss": 0.5741, "step": 4686 }, { "epoch": 0.8122874287818721, "grad_norm": 2.2785053054004925, "learning_rate": 8.960276178600285e-07, "loss": 0.5271, "step": 4687 }, { "epoch": 0.8124607352527025, "grad_norm": 2.2054683147073906, "learning_rate": 8.944248408495242e-07, "loss": 0.551, "step": 4688 }, { "epoch": 0.8126340417235328, "grad_norm": 3.0342652341502303, "learning_rate": 8.928233577915068e-07, "loss": 0.5942, "step": 4689 }, { "epoch": 0.8128073481943632, "grad_norm": 3.033918029550227, "learning_rate": 8.912231691907136e-07, "loss": 0.5268, "step": 4690 }, { "epoch": 0.8129806546651935, "grad_norm": 2.1444893310092388, "learning_rate": 8.89624275551475e-07, "loss": 0.5187, "step": 4691 }, { "epoch": 0.8131539611360239, "grad_norm": 2.687815247575167, "learning_rate": 8.880266773777157e-07, "loss": 0.5431, "step": 4692 }, { "epoch": 0.8133272676068543, "grad_norm": 2.5733525260967944, "learning_rate": 8.864303751729497e-07, "loss": 0.5336, "step": 4693 }, { "epoch": 0.8135005740776846, "grad_norm": 2.2170174413713974, "learning_rate": 8.848353694402823e-07, "loss": 0.5611, "step": 4694 }, { "epoch": 0.813673880548515, "grad_norm": 3.2883127424930976, "learning_rate": 8.832416606824107e-07, "loss": 0.5523, "step": 4695 }, { "epoch": 0.8138471870193453, "grad_norm": 2.140830079625392, "learning_rate": 8.816492494016244e-07, "loss": 0.5423, "step": 4696 }, { "epoch": 0.8140204934901757, "grad_norm": 3.8852119688256725, "learning_rate": 8.800581360998017e-07, "loss": 0.5349, "step": 4697 }, { "epoch": 0.814193799961006, "grad_norm": 3.356118833496384, "learning_rate": 8.784683212784134e-07, "loss": 0.4933, "step": 4698 }, { "epoch": 0.8143671064318364, "grad_norm": 2.5531206232691233, "learning_rate": 8.768798054385209e-07, "loss": 0.6149, "step": 4699 }, { "epoch": 0.8145404129026668, "grad_norm": 5.779417696016379, "learning_rate": 8.752925890807768e-07, "loss": 0.537, "step": 4700 }, { "epoch": 0.8147137193734971, "grad_norm": 4.046584905208207, "learning_rate": 8.737066727054222e-07, "loss": 0.5293, "step": 4701 }, { "epoch": 0.8148870258443275, "grad_norm": 2.9212880940683927, "learning_rate": 8.721220568122912e-07, "loss": 0.5605, "step": 4702 }, { "epoch": 0.8150603323151578, "grad_norm": 2.5594568780164604, "learning_rate": 8.705387419008055e-07, "loss": 0.519, "step": 4703 }, { "epoch": 0.8152336387859882, "grad_norm": 3.760245713854555, "learning_rate": 8.689567284699779e-07, "loss": 0.5376, "step": 4704 }, { "epoch": 0.8154069452568186, "grad_norm": 2.5595549331172673, "learning_rate": 8.67376017018412e-07, "loss": 0.4492, "step": 4705 }, { "epoch": 0.8155802517276489, "grad_norm": 2.2736394361389394, "learning_rate": 8.657966080442987e-07, "loss": 0.5804, "step": 4706 }, { "epoch": 0.8157535581984793, "grad_norm": 2.304267895089158, "learning_rate": 8.642185020454202e-07, "loss": 0.6001, "step": 4707 }, { "epoch": 0.8159268646693096, "grad_norm": 2.478067208821873, "learning_rate": 8.626416995191472e-07, "loss": 0.5824, "step": 4708 }, { "epoch": 0.81610017114014, "grad_norm": 2.2785819076684786, "learning_rate": 8.61066200962441e-07, "loss": 0.5646, "step": 4709 }, { "epoch": 0.8162734776109704, "grad_norm": 2.7076308668230618, "learning_rate": 8.594920068718498e-07, "loss": 0.5528, "step": 4710 }, { "epoch": 0.8164467840818006, "grad_norm": 2.492312144320399, "learning_rate": 8.57919117743512e-07, "loss": 0.5388, "step": 4711 }, { "epoch": 0.816620090552631, "grad_norm": 2.9366463067173316, "learning_rate": 8.563475340731542e-07, "loss": 0.5134, "step": 4712 }, { "epoch": 0.8167933970234613, "grad_norm": 2.7333159709839463, "learning_rate": 8.547772563560925e-07, "loss": 0.5749, "step": 4713 }, { "epoch": 0.8169667034942917, "grad_norm": 2.8368461426215767, "learning_rate": 8.532082850872303e-07, "loss": 0.5749, "step": 4714 }, { "epoch": 0.817140009965122, "grad_norm": 2.6017479594429322, "learning_rate": 8.516406207610594e-07, "loss": 0.5297, "step": 4715 }, { "epoch": 0.8173133164359524, "grad_norm": 2.4719994733645536, "learning_rate": 8.50074263871662e-07, "loss": 0.4964, "step": 4716 }, { "epoch": 0.8174866229067828, "grad_norm": 2.699168114857137, "learning_rate": 8.485092149127011e-07, "loss": 0.6074, "step": 4717 }, { "epoch": 0.8176599293776131, "grad_norm": 2.971179648824519, "learning_rate": 8.469454743774374e-07, "loss": 0.5488, "step": 4718 }, { "epoch": 0.8178332358484435, "grad_norm": 2.5039572562345866, "learning_rate": 8.453830427587128e-07, "loss": 0.5687, "step": 4719 }, { "epoch": 0.8180065423192738, "grad_norm": 2.8421968743331463, "learning_rate": 8.438219205489578e-07, "loss": 0.459, "step": 4720 }, { "epoch": 0.8181798487901042, "grad_norm": 2.4684834178844435, "learning_rate": 8.422621082401916e-07, "loss": 0.5736, "step": 4721 }, { "epoch": 0.8183531552609346, "grad_norm": 2.0055640592447106, "learning_rate": 8.407036063240192e-07, "loss": 0.4847, "step": 4722 }, { "epoch": 0.8185264617317649, "grad_norm": 2.441656332764154, "learning_rate": 8.391464152916323e-07, "loss": 0.578, "step": 4723 }, { "epoch": 0.8186997682025953, "grad_norm": 2.2849897339259266, "learning_rate": 8.375905356338116e-07, "loss": 0.5264, "step": 4724 }, { "epoch": 0.8188730746734256, "grad_norm": 2.427033034169992, "learning_rate": 8.360359678409219e-07, "loss": 0.5146, "step": 4725 }, { "epoch": 0.819046381144256, "grad_norm": 2.125619592534205, "learning_rate": 8.344827124029176e-07, "loss": 0.5373, "step": 4726 }, { "epoch": 0.8192196876150863, "grad_norm": 2.3508191658468127, "learning_rate": 8.329307698093347e-07, "loss": 0.4664, "step": 4727 }, { "epoch": 0.8193929940859167, "grad_norm": 4.2590014840680155, "learning_rate": 8.313801405492993e-07, "loss": 0.5955, "step": 4728 }, { "epoch": 0.8195663005567471, "grad_norm": 2.214995923741962, "learning_rate": 8.298308251115239e-07, "loss": 0.5917, "step": 4729 }, { "epoch": 0.8197396070275774, "grad_norm": 2.376011786420598, "learning_rate": 8.282828239843044e-07, "loss": 0.5275, "step": 4730 }, { "epoch": 0.8199129134984078, "grad_norm": 2.6437337864511337, "learning_rate": 8.267361376555228e-07, "loss": 0.5143, "step": 4731 }, { "epoch": 0.8200862199692381, "grad_norm": 3.0073443254389094, "learning_rate": 8.251907666126502e-07, "loss": 0.5428, "step": 4732 }, { "epoch": 0.8202595264400685, "grad_norm": 2.3407818731457715, "learning_rate": 8.236467113427387e-07, "loss": 0.5858, "step": 4733 }, { "epoch": 0.8204328329108989, "grad_norm": 2.09652853111136, "learning_rate": 8.221039723324287e-07, "loss": 0.5372, "step": 4734 }, { "epoch": 0.8206061393817292, "grad_norm": 2.5256683158207536, "learning_rate": 8.205625500679442e-07, "loss": 0.5329, "step": 4735 }, { "epoch": 0.8207794458525596, "grad_norm": 2.2494626568342193, "learning_rate": 8.190224450350942e-07, "loss": 0.4951, "step": 4736 }, { "epoch": 0.8209527523233898, "grad_norm": 2.8255545280606573, "learning_rate": 8.174836577192747e-07, "loss": 0.5543, "step": 4737 }, { "epoch": 0.8211260587942202, "grad_norm": 2.134448039163949, "learning_rate": 8.15946188605462e-07, "loss": 0.5059, "step": 4738 }, { "epoch": 0.8212993652650505, "grad_norm": 2.707311353293328, "learning_rate": 8.144100381782211e-07, "loss": 0.5316, "step": 4739 }, { "epoch": 0.8214726717358809, "grad_norm": 2.542217502241921, "learning_rate": 8.128752069216994e-07, "loss": 0.5165, "step": 4740 }, { "epoch": 0.8216459782067113, "grad_norm": 2.65988929837189, "learning_rate": 8.113416953196301e-07, "loss": 0.5112, "step": 4741 }, { "epoch": 0.8218192846775416, "grad_norm": 2.9581397217035095, "learning_rate": 8.098095038553283e-07, "loss": 0.6015, "step": 4742 }, { "epoch": 0.821992591148372, "grad_norm": 2.455086407963947, "learning_rate": 8.082786330116943e-07, "loss": 0.5805, "step": 4743 }, { "epoch": 0.8221658976192023, "grad_norm": 4.575686030398901, "learning_rate": 8.067490832712128e-07, "loss": 0.5669, "step": 4744 }, { "epoch": 0.8223392040900327, "grad_norm": 2.580546129868948, "learning_rate": 8.052208551159507e-07, "loss": 0.6219, "step": 4745 }, { "epoch": 0.822512510560863, "grad_norm": 3.137815884670751, "learning_rate": 8.03693949027558e-07, "loss": 0.4975, "step": 4746 }, { "epoch": 0.8226858170316934, "grad_norm": 2.1336849153362083, "learning_rate": 8.021683654872731e-07, "loss": 0.5094, "step": 4747 }, { "epoch": 0.8228591235025238, "grad_norm": 3.237862640429512, "learning_rate": 8.006441049759094e-07, "loss": 0.5495, "step": 4748 }, { "epoch": 0.8230324299733541, "grad_norm": 2.7011739380054594, "learning_rate": 7.991211679738692e-07, "loss": 0.4204, "step": 4749 }, { "epoch": 0.8232057364441845, "grad_norm": 2.5001033633077503, "learning_rate": 7.975995549611353e-07, "loss": 0.5298, "step": 4750 }, { "epoch": 0.8233790429150148, "grad_norm": 2.4310595295173423, "learning_rate": 7.960792664172745e-07, "loss": 0.5214, "step": 4751 }, { "epoch": 0.8235523493858452, "grad_norm": 4.803768575040473, "learning_rate": 7.945603028214355e-07, "loss": 0.4767, "step": 4752 }, { "epoch": 0.8237256558566756, "grad_norm": 3.32919374051883, "learning_rate": 7.930426646523487e-07, "loss": 0.4773, "step": 4753 }, { "epoch": 0.8238989623275059, "grad_norm": 2.904799689520723, "learning_rate": 7.915263523883282e-07, "loss": 0.5035, "step": 4754 }, { "epoch": 0.8240722687983363, "grad_norm": 2.46010680614506, "learning_rate": 7.900113665072695e-07, "loss": 0.4423, "step": 4755 }, { "epoch": 0.8242455752691666, "grad_norm": 4.35002056723916, "learning_rate": 7.884977074866501e-07, "loss": 0.5162, "step": 4756 }, { "epoch": 0.824418881739997, "grad_norm": 2.2740986127437433, "learning_rate": 7.86985375803529e-07, "loss": 0.4943, "step": 4757 }, { "epoch": 0.8245921882108274, "grad_norm": 4.70615143218431, "learning_rate": 7.85474371934547e-07, "loss": 0.5151, "step": 4758 }, { "epoch": 0.8247654946816577, "grad_norm": 2.618263505512324, "learning_rate": 7.839646963559272e-07, "loss": 0.5592, "step": 4759 }, { "epoch": 0.8249388011524881, "grad_norm": 2.5736226662321093, "learning_rate": 7.824563495434734e-07, "loss": 0.5141, "step": 4760 }, { "epoch": 0.8251121076233184, "grad_norm": 2.201051025757008, "learning_rate": 7.809493319725702e-07, "loss": 0.5607, "step": 4761 }, { "epoch": 0.8252854140941488, "grad_norm": 2.694408742090774, "learning_rate": 7.794436441181846e-07, "loss": 0.5176, "step": 4762 }, { "epoch": 0.825458720564979, "grad_norm": 2.500627176046466, "learning_rate": 7.779392864548624e-07, "loss": 0.5583, "step": 4763 }, { "epoch": 0.8256320270358094, "grad_norm": 6.814166323901639, "learning_rate": 7.76436259456732e-07, "loss": 0.544, "step": 4764 }, { "epoch": 0.8258053335066398, "grad_norm": 2.572271517267156, "learning_rate": 7.74934563597502e-07, "loss": 0.5166, "step": 4765 }, { "epoch": 0.8259786399774701, "grad_norm": 2.59793084452611, "learning_rate": 7.734341993504613e-07, "loss": 0.5591, "step": 4766 }, { "epoch": 0.8261519464483005, "grad_norm": 2.28244636877731, "learning_rate": 7.719351671884783e-07, "loss": 0.4479, "step": 4767 }, { "epoch": 0.8263252529191308, "grad_norm": 2.4385888132775264, "learning_rate": 7.704374675840048e-07, "loss": 0.5833, "step": 4768 }, { "epoch": 0.8264985593899612, "grad_norm": 2.6136901862043906, "learning_rate": 7.689411010090669e-07, "loss": 0.563, "step": 4769 }, { "epoch": 0.8266718658607916, "grad_norm": 2.7055491206735844, "learning_rate": 7.674460679352758e-07, "loss": 0.6029, "step": 4770 }, { "epoch": 0.8268451723316219, "grad_norm": 2.8452630918386697, "learning_rate": 7.659523688338194e-07, "loss": 0.6053, "step": 4771 }, { "epoch": 0.8270184788024523, "grad_norm": 2.1457348017081603, "learning_rate": 7.644600041754663e-07, "loss": 0.5328, "step": 4772 }, { "epoch": 0.8271917852732826, "grad_norm": 2.127029563326598, "learning_rate": 7.629689744305657e-07, "loss": 0.4806, "step": 4773 }, { "epoch": 0.827365091744113, "grad_norm": 2.3207850086052546, "learning_rate": 7.614792800690446e-07, "loss": 0.5263, "step": 4774 }, { "epoch": 0.8275383982149433, "grad_norm": 2.82203318322702, "learning_rate": 7.599909215604085e-07, "loss": 0.4478, "step": 4775 }, { "epoch": 0.8277117046857737, "grad_norm": 2.4913805338082105, "learning_rate": 7.585038993737437e-07, "loss": 0.5513, "step": 4776 }, { "epoch": 0.8278850111566041, "grad_norm": 2.1479717527691276, "learning_rate": 7.570182139777138e-07, "loss": 0.543, "step": 4777 }, { "epoch": 0.8280583176274344, "grad_norm": 3.0840832060512122, "learning_rate": 7.555338658405625e-07, "loss": 0.5899, "step": 4778 }, { "epoch": 0.8282316240982648, "grad_norm": 3.4136374591546748, "learning_rate": 7.540508554301101e-07, "loss": 0.6412, "step": 4779 }, { "epoch": 0.8284049305690951, "grad_norm": 2.087220886687242, "learning_rate": 7.525691832137566e-07, "loss": 0.493, "step": 4780 }, { "epoch": 0.8285782370399255, "grad_norm": 2.44146931401847, "learning_rate": 7.5108884965848e-07, "loss": 0.6464, "step": 4781 }, { "epoch": 0.8287515435107559, "grad_norm": 2.6492486297904216, "learning_rate": 7.496098552308368e-07, "loss": 0.4695, "step": 4782 }, { "epoch": 0.8289248499815862, "grad_norm": 2.508029813141494, "learning_rate": 7.48132200396961e-07, "loss": 0.5159, "step": 4783 }, { "epoch": 0.8290981564524166, "grad_norm": 2.4986728901892126, "learning_rate": 7.466558856225642e-07, "loss": 0.6175, "step": 4784 }, { "epoch": 0.8292714629232469, "grad_norm": 2.6454015681114798, "learning_rate": 7.451809113729364e-07, "loss": 0.5295, "step": 4785 }, { "epoch": 0.8294447693940773, "grad_norm": 2.3131761339356, "learning_rate": 7.437072781129445e-07, "loss": 0.5253, "step": 4786 }, { "epoch": 0.8296180758649077, "grad_norm": 3.4282087161701127, "learning_rate": 7.422349863070316e-07, "loss": 0.4985, "step": 4787 }, { "epoch": 0.829791382335738, "grad_norm": 2.7308146732112952, "learning_rate": 7.407640364192226e-07, "loss": 0.4893, "step": 4788 }, { "epoch": 0.8299646888065683, "grad_norm": 2.0076415554976235, "learning_rate": 7.392944289131132e-07, "loss": 0.4161, "step": 4789 }, { "epoch": 0.8301379952773986, "grad_norm": 2.5361851891757774, "learning_rate": 7.378261642518803e-07, "loss": 0.5815, "step": 4790 }, { "epoch": 0.830311301748229, "grad_norm": 2.24709379646476, "learning_rate": 7.363592428982758e-07, "loss": 0.5389, "step": 4791 }, { "epoch": 0.8304846082190593, "grad_norm": 2.3461161427977295, "learning_rate": 7.348936653146288e-07, "loss": 0.4932, "step": 4792 }, { "epoch": 0.8306579146898897, "grad_norm": 2.523749794197157, "learning_rate": 7.334294319628449e-07, "loss": 0.5541, "step": 4793 }, { "epoch": 0.8308312211607201, "grad_norm": 4.3231931256181095, "learning_rate": 7.31966543304406e-07, "loss": 0.5582, "step": 4794 }, { "epoch": 0.8310045276315504, "grad_norm": 3.3332175330590417, "learning_rate": 7.3050499980037e-07, "loss": 0.5814, "step": 4795 }, { "epoch": 0.8311778341023808, "grad_norm": 2.978572903202629, "learning_rate": 7.290448019113705e-07, "loss": 0.5631, "step": 4796 }, { "epoch": 0.8313511405732111, "grad_norm": 2.3394911875581754, "learning_rate": 7.275859500976184e-07, "loss": 0.6442, "step": 4797 }, { "epoch": 0.8315244470440415, "grad_norm": 2.635975744383672, "learning_rate": 7.261284448188994e-07, "loss": 0.5618, "step": 4798 }, { "epoch": 0.8316977535148719, "grad_norm": 2.1854290568574406, "learning_rate": 7.246722865345745e-07, "loss": 0.5708, "step": 4799 }, { "epoch": 0.8318710599857022, "grad_norm": 2.382152256230933, "learning_rate": 7.232174757035804e-07, "loss": 0.5087, "step": 4800 }, { "epoch": 0.8320443664565326, "grad_norm": 2.445817623034806, "learning_rate": 7.217640127844289e-07, "loss": 0.5051, "step": 4801 }, { "epoch": 0.8322176729273629, "grad_norm": 3.2163060398132273, "learning_rate": 7.203118982352086e-07, "loss": 0.4871, "step": 4802 }, { "epoch": 0.8323909793981933, "grad_norm": 2.849130601857661, "learning_rate": 7.188611325135808e-07, "loss": 0.5267, "step": 4803 }, { "epoch": 0.8325642858690236, "grad_norm": 2.1441922329394765, "learning_rate": 7.17411716076783e-07, "loss": 0.5206, "step": 4804 }, { "epoch": 0.832737592339854, "grad_norm": 2.3101765346361645, "learning_rate": 7.159636493816275e-07, "loss": 0.5111, "step": 4805 }, { "epoch": 0.8329108988106844, "grad_norm": 2.033626424665614, "learning_rate": 7.145169328845003e-07, "loss": 0.4573, "step": 4806 }, { "epoch": 0.8330842052815147, "grad_norm": 2.4115066796021356, "learning_rate": 7.130715670413629e-07, "loss": 0.5897, "step": 4807 }, { "epoch": 0.8332575117523451, "grad_norm": 2.288230198072813, "learning_rate": 7.116275523077504e-07, "loss": 0.4319, "step": 4808 }, { "epoch": 0.8334308182231754, "grad_norm": 7.776842222588465, "learning_rate": 7.101848891387742e-07, "loss": 0.4647, "step": 4809 }, { "epoch": 0.8336041246940058, "grad_norm": 2.4911468684480704, "learning_rate": 7.087435779891145e-07, "loss": 0.5107, "step": 4810 }, { "epoch": 0.8337774311648362, "grad_norm": 2.205176920964177, "learning_rate": 7.073036193130306e-07, "loss": 0.5413, "step": 4811 }, { "epoch": 0.8339507376356665, "grad_norm": 3.18791526343877, "learning_rate": 7.058650135643541e-07, "loss": 0.4678, "step": 4812 }, { "epoch": 0.8341240441064969, "grad_norm": 2.395120439186704, "learning_rate": 7.044277611964878e-07, "loss": 0.4807, "step": 4813 }, { "epoch": 0.8342973505773272, "grad_norm": 2.2956915601118197, "learning_rate": 7.029918626624122e-07, "loss": 0.4539, "step": 4814 }, { "epoch": 0.8344706570481575, "grad_norm": 2.1877109346411188, "learning_rate": 7.015573184146785e-07, "loss": 0.538, "step": 4815 }, { "epoch": 0.8346439635189878, "grad_norm": 2.2425664273897308, "learning_rate": 7.001241289054111e-07, "loss": 0.508, "step": 4816 }, { "epoch": 0.8348172699898182, "grad_norm": 3.864794823561709, "learning_rate": 6.986922945863073e-07, "loss": 0.4592, "step": 4817 }, { "epoch": 0.8349905764606486, "grad_norm": 3.136358179015202, "learning_rate": 6.972618159086387e-07, "loss": 0.5309, "step": 4818 }, { "epoch": 0.8351638829314789, "grad_norm": 2.874648384843481, "learning_rate": 6.95832693323249e-07, "loss": 0.5469, "step": 4819 }, { "epoch": 0.8353371894023093, "grad_norm": 2.013342293933407, "learning_rate": 6.94404927280552e-07, "loss": 0.5691, "step": 4820 }, { "epoch": 0.8355104958731396, "grad_norm": 2.861872513635385, "learning_rate": 6.929785182305382e-07, "loss": 0.4599, "step": 4821 }, { "epoch": 0.83568380234397, "grad_norm": 4.203210043796393, "learning_rate": 6.915534666227669e-07, "loss": 0.5584, "step": 4822 }, { "epoch": 0.8358571088148004, "grad_norm": 2.3576210802928816, "learning_rate": 6.901297729063721e-07, "loss": 0.5224, "step": 4823 }, { "epoch": 0.8360304152856307, "grad_norm": 2.394214616975509, "learning_rate": 6.887074375300584e-07, "loss": 0.5293, "step": 4824 }, { "epoch": 0.8362037217564611, "grad_norm": 2.363939022330763, "learning_rate": 6.872864609421032e-07, "loss": 0.5331, "step": 4825 }, { "epoch": 0.8363770282272914, "grad_norm": 2.870680978726213, "learning_rate": 6.858668435903543e-07, "loss": 0.4986, "step": 4826 }, { "epoch": 0.8365503346981218, "grad_norm": 2.1989300017116102, "learning_rate": 6.844485859222311e-07, "loss": 0.5469, "step": 4827 }, { "epoch": 0.8367236411689521, "grad_norm": 2.3371823275248396, "learning_rate": 6.830316883847282e-07, "loss": 0.6085, "step": 4828 }, { "epoch": 0.8368969476397825, "grad_norm": 8.568577527476558, "learning_rate": 6.816161514244074e-07, "loss": 0.5024, "step": 4829 }, { "epoch": 0.8370702541106129, "grad_norm": 1.9431238863656917, "learning_rate": 6.802019754874017e-07, "loss": 0.5234, "step": 4830 }, { "epoch": 0.8372435605814432, "grad_norm": 2.7434150158296893, "learning_rate": 6.787891610194175e-07, "loss": 0.4447, "step": 4831 }, { "epoch": 0.8374168670522736, "grad_norm": 2.1004868946428186, "learning_rate": 6.773777084657302e-07, "loss": 0.5162, "step": 4832 }, { "epoch": 0.8375901735231039, "grad_norm": 2.650935831619054, "learning_rate": 6.759676182711872e-07, "loss": 0.5676, "step": 4833 }, { "epoch": 0.8377634799939343, "grad_norm": 2.2186149409679707, "learning_rate": 6.745588908802064e-07, "loss": 0.3859, "step": 4834 }, { "epoch": 0.8379367864647647, "grad_norm": 2.2404536779292124, "learning_rate": 6.73151526736775e-07, "loss": 0.4451, "step": 4835 }, { "epoch": 0.838110092935595, "grad_norm": 2.2103066498634667, "learning_rate": 6.717455262844519e-07, "loss": 0.4833, "step": 4836 }, { "epoch": 0.8382833994064254, "grad_norm": 2.2239633606800018, "learning_rate": 6.703408899663661e-07, "loss": 0.5793, "step": 4837 }, { "epoch": 0.8384567058772557, "grad_norm": 2.576014558880095, "learning_rate": 6.689376182252155e-07, "loss": 0.6377, "step": 4838 }, { "epoch": 0.8386300123480861, "grad_norm": 2.2638134082486565, "learning_rate": 6.675357115032693e-07, "loss": 0.6222, "step": 4839 }, { "epoch": 0.8388033188189165, "grad_norm": 2.1724784436071345, "learning_rate": 6.661351702423652e-07, "loss": 0.5237, "step": 4840 }, { "epoch": 0.8389766252897467, "grad_norm": 2.168800196308765, "learning_rate": 6.647359948839121e-07, "loss": 0.4981, "step": 4841 }, { "epoch": 0.8391499317605771, "grad_norm": 3.5101588953371747, "learning_rate": 6.63338185868887e-07, "loss": 0.5777, "step": 4842 }, { "epoch": 0.8393232382314074, "grad_norm": 2.3719640822400305, "learning_rate": 6.619417436378366e-07, "loss": 0.4802, "step": 4843 }, { "epoch": 0.8394965447022378, "grad_norm": 2.666990351570085, "learning_rate": 6.605466686308776e-07, "loss": 0.5859, "step": 4844 }, { "epoch": 0.8396698511730681, "grad_norm": 2.204444521680761, "learning_rate": 6.591529612876951e-07, "loss": 0.4855, "step": 4845 }, { "epoch": 0.8398431576438985, "grad_norm": 2.3055066763199443, "learning_rate": 6.57760622047543e-07, "loss": 0.5316, "step": 4846 }, { "epoch": 0.8400164641147289, "grad_norm": 2.3912141705773724, "learning_rate": 6.563696513492446e-07, "loss": 0.4901, "step": 4847 }, { "epoch": 0.8401897705855592, "grad_norm": 2.6147973022216946, "learning_rate": 6.549800496311909e-07, "loss": 0.4344, "step": 4848 }, { "epoch": 0.8403630770563896, "grad_norm": 2.3855531043491247, "learning_rate": 6.535918173313432e-07, "loss": 0.5489, "step": 4849 }, { "epoch": 0.8405363835272199, "grad_norm": 2.7220151104447528, "learning_rate": 6.522049548872305e-07, "loss": 0.446, "step": 4850 }, { "epoch": 0.8407096899980503, "grad_norm": 2.2528497801057927, "learning_rate": 6.508194627359482e-07, "loss": 0.571, "step": 4851 }, { "epoch": 0.8408829964688807, "grad_norm": 2.592659530990762, "learning_rate": 6.494353413141613e-07, "loss": 0.5722, "step": 4852 }, { "epoch": 0.841056302939711, "grad_norm": 2.0365952564809104, "learning_rate": 6.480525910581026e-07, "loss": 0.3885, "step": 4853 }, { "epoch": 0.8412296094105414, "grad_norm": 2.6094784090422434, "learning_rate": 6.466712124035745e-07, "loss": 0.5256, "step": 4854 }, { "epoch": 0.8414029158813717, "grad_norm": 2.216391060697912, "learning_rate": 6.452912057859451e-07, "loss": 0.4811, "step": 4855 }, { "epoch": 0.8415762223522021, "grad_norm": 4.003087233330461, "learning_rate": 6.439125716401501e-07, "loss": 0.6236, "step": 4856 }, { "epoch": 0.8417495288230324, "grad_norm": 2.16233912888391, "learning_rate": 6.425353104006931e-07, "loss": 0.5378, "step": 4857 }, { "epoch": 0.8419228352938628, "grad_norm": 2.3011204990064744, "learning_rate": 6.411594225016449e-07, "loss": 0.4485, "step": 4858 }, { "epoch": 0.8420961417646932, "grad_norm": 2.300244352503766, "learning_rate": 6.397849083766444e-07, "loss": 0.4917, "step": 4859 }, { "epoch": 0.8422694482355235, "grad_norm": 2.781175020652716, "learning_rate": 6.384117684588964e-07, "loss": 0.5421, "step": 4860 }, { "epoch": 0.8424427547063539, "grad_norm": 2.6424063420285693, "learning_rate": 6.370400031811718e-07, "loss": 0.598, "step": 4861 }, { "epoch": 0.8426160611771842, "grad_norm": 3.0276593327170587, "learning_rate": 6.356696129758094e-07, "loss": 0.5346, "step": 4862 }, { "epoch": 0.8427893676480146, "grad_norm": 2.603814639134441, "learning_rate": 6.343005982747158e-07, "loss": 0.5454, "step": 4863 }, { "epoch": 0.842962674118845, "grad_norm": 2.740248563250096, "learning_rate": 6.329329595093614e-07, "loss": 0.4853, "step": 4864 }, { "epoch": 0.8431359805896753, "grad_norm": 2.1392853699581553, "learning_rate": 6.315666971107859e-07, "loss": 0.4921, "step": 4865 }, { "epoch": 0.8433092870605057, "grad_norm": 2.7942750163297543, "learning_rate": 6.30201811509592e-07, "loss": 0.6264, "step": 4866 }, { "epoch": 0.8434825935313359, "grad_norm": 2.1364102521030133, "learning_rate": 6.288383031359513e-07, "loss": 0.5096, "step": 4867 }, { "epoch": 0.8436559000021663, "grad_norm": 2.5313828227753064, "learning_rate": 6.274761724195994e-07, "loss": 0.5455, "step": 4868 }, { "epoch": 0.8438292064729966, "grad_norm": 6.669347339906716, "learning_rate": 6.261154197898394e-07, "loss": 0.5137, "step": 4869 }, { "epoch": 0.844002512943827, "grad_norm": 2.533524872167709, "learning_rate": 6.247560456755397e-07, "loss": 0.4531, "step": 4870 }, { "epoch": 0.8441758194146574, "grad_norm": 2.3520181102175877, "learning_rate": 6.233980505051324e-07, "loss": 0.5136, "step": 4871 }, { "epoch": 0.8443491258854877, "grad_norm": 2.9695420523452856, "learning_rate": 6.220414347066162e-07, "loss": 0.4774, "step": 4872 }, { "epoch": 0.8445224323563181, "grad_norm": 3.10953612823507, "learning_rate": 6.206861987075552e-07, "loss": 0.4982, "step": 4873 }, { "epoch": 0.8446957388271484, "grad_norm": 2.6322179444647777, "learning_rate": 6.193323429350795e-07, "loss": 0.5041, "step": 4874 }, { "epoch": 0.8448690452979788, "grad_norm": 2.2506062748821876, "learning_rate": 6.179798678158828e-07, "loss": 0.5329, "step": 4875 }, { "epoch": 0.8450423517688092, "grad_norm": 2.688194171270484, "learning_rate": 6.166287737762239e-07, "loss": 0.5969, "step": 4876 }, { "epoch": 0.8452156582396395, "grad_norm": 2.6796272352656447, "learning_rate": 6.152790612419268e-07, "loss": 0.5091, "step": 4877 }, { "epoch": 0.8453889647104699, "grad_norm": 2.4045402511834313, "learning_rate": 6.139307306383802e-07, "loss": 0.5895, "step": 4878 }, { "epoch": 0.8455622711813002, "grad_norm": 2.619318657122292, "learning_rate": 6.125837823905362e-07, "loss": 0.5201, "step": 4879 }, { "epoch": 0.8457355776521306, "grad_norm": 2.346958020595615, "learning_rate": 6.112382169229125e-07, "loss": 0.518, "step": 4880 }, { "epoch": 0.845908884122961, "grad_norm": 2.2242159632192613, "learning_rate": 6.098940346595905e-07, "loss": 0.5204, "step": 4881 }, { "epoch": 0.8460821905937913, "grad_norm": 1.9563081060523477, "learning_rate": 6.085512360242146e-07, "loss": 0.504, "step": 4882 }, { "epoch": 0.8462554970646217, "grad_norm": 2.987148338589847, "learning_rate": 6.072098214399957e-07, "loss": 0.5643, "step": 4883 }, { "epoch": 0.846428803535452, "grad_norm": 2.5085854095222566, "learning_rate": 6.058697913297057e-07, "loss": 0.4646, "step": 4884 }, { "epoch": 0.8466021100062824, "grad_norm": 2.1356480206426274, "learning_rate": 6.045311461156811e-07, "loss": 0.5195, "step": 4885 }, { "epoch": 0.8467754164771127, "grad_norm": 4.896999768963376, "learning_rate": 6.031938862198233e-07, "loss": 0.5114, "step": 4886 }, { "epoch": 0.8469487229479431, "grad_norm": 2.1798235652818487, "learning_rate": 6.018580120635958e-07, "loss": 0.4911, "step": 4887 }, { "epoch": 0.8471220294187735, "grad_norm": 2.244121115669829, "learning_rate": 6.005235240680246e-07, "loss": 0.5583, "step": 4888 }, { "epoch": 0.8472953358896038, "grad_norm": 2.2020819522678496, "learning_rate": 5.991904226537004e-07, "loss": 0.4872, "step": 4889 }, { "epoch": 0.8474686423604342, "grad_norm": 2.3776058686221697, "learning_rate": 5.978587082407766e-07, "loss": 0.48, "step": 4890 }, { "epoch": 0.8476419488312645, "grad_norm": 2.038690569183289, "learning_rate": 5.965283812489692e-07, "loss": 0.4768, "step": 4891 }, { "epoch": 0.8478152553020949, "grad_norm": 2.399690910559451, "learning_rate": 5.95199442097556e-07, "loss": 0.5662, "step": 4892 }, { "epoch": 0.8479885617729251, "grad_norm": 4.320551029615731, "learning_rate": 5.938718912053776e-07, "loss": 0.4791, "step": 4893 }, { "epoch": 0.8481618682437555, "grad_norm": 2.2277079721216015, "learning_rate": 5.925457289908382e-07, "loss": 0.622, "step": 4894 }, { "epoch": 0.8483351747145859, "grad_norm": 2.451944148848186, "learning_rate": 5.912209558719051e-07, "loss": 0.488, "step": 4895 }, { "epoch": 0.8485084811854162, "grad_norm": 2.643183924550328, "learning_rate": 5.898975722661054e-07, "loss": 0.582, "step": 4896 }, { "epoch": 0.8486817876562466, "grad_norm": 2.2680767772292403, "learning_rate": 5.885755785905294e-07, "loss": 0.4182, "step": 4897 }, { "epoch": 0.8488550941270769, "grad_norm": 4.024698799720259, "learning_rate": 5.872549752618301e-07, "loss": 0.4465, "step": 4898 }, { "epoch": 0.8490284005979073, "grad_norm": 2.2747103572476943, "learning_rate": 5.859357626962208e-07, "loss": 0.4764, "step": 4899 }, { "epoch": 0.8492017070687377, "grad_norm": 2.5022964940106394, "learning_rate": 5.846179413094771e-07, "loss": 0.5475, "step": 4900 }, { "epoch": 0.849375013539568, "grad_norm": 2.0830386630525193, "learning_rate": 5.833015115169377e-07, "loss": 0.5185, "step": 4901 }, { "epoch": 0.8495483200103984, "grad_norm": 2.171350774655563, "learning_rate": 5.81986473733499e-07, "loss": 0.6146, "step": 4902 }, { "epoch": 0.8497216264812287, "grad_norm": 2.4932841465265585, "learning_rate": 5.806728283736223e-07, "loss": 0.6088, "step": 4903 }, { "epoch": 0.8498949329520591, "grad_norm": 2.4101531380744503, "learning_rate": 5.793605758513282e-07, "loss": 0.5898, "step": 4904 }, { "epoch": 0.8500682394228894, "grad_norm": 2.3329277018129115, "learning_rate": 5.780497165801991e-07, "loss": 0.5372, "step": 4905 }, { "epoch": 0.8502415458937198, "grad_norm": 2.515741068399809, "learning_rate": 5.767402509733771e-07, "loss": 0.5354, "step": 4906 }, { "epoch": 0.8504148523645502, "grad_norm": 2.578092285948223, "learning_rate": 5.754321794435674e-07, "loss": 0.5336, "step": 4907 }, { "epoch": 0.8505881588353805, "grad_norm": 2.532757827105763, "learning_rate": 5.74125502403034e-07, "loss": 0.4495, "step": 4908 }, { "epoch": 0.8507614653062109, "grad_norm": 3.412395953904401, "learning_rate": 5.728202202635996e-07, "loss": 0.4758, "step": 4909 }, { "epoch": 0.8509347717770412, "grad_norm": 2.6573087294837383, "learning_rate": 5.71516333436653e-07, "loss": 0.4565, "step": 4910 }, { "epoch": 0.8511080782478716, "grad_norm": 2.966054512710098, "learning_rate": 5.702138423331377e-07, "loss": 0.5116, "step": 4911 }, { "epoch": 0.851281384718702, "grad_norm": 2.3509702531940206, "learning_rate": 5.689127473635609e-07, "loss": 0.515, "step": 4912 }, { "epoch": 0.8514546911895323, "grad_norm": 2.269141114291284, "learning_rate": 5.676130489379855e-07, "loss": 0.4635, "step": 4913 }, { "epoch": 0.8516279976603627, "grad_norm": 2.133876250225931, "learning_rate": 5.663147474660391e-07, "loss": 0.4845, "step": 4914 }, { "epoch": 0.851801304131193, "grad_norm": 2.258995474184694, "learning_rate": 5.650178433569053e-07, "loss": 0.5364, "step": 4915 }, { "epoch": 0.8519746106020234, "grad_norm": 2.1667404715414436, "learning_rate": 5.637223370193301e-07, "loss": 0.5432, "step": 4916 }, { "epoch": 0.8521479170728538, "grad_norm": 2.4267778156298587, "learning_rate": 5.624282288616173e-07, "loss": 0.5586, "step": 4917 }, { "epoch": 0.8523212235436841, "grad_norm": 2.762053051338498, "learning_rate": 5.611355192916302e-07, "loss": 0.5858, "step": 4918 }, { "epoch": 0.8524945300145144, "grad_norm": 3.6176297702342692, "learning_rate": 5.598442087167921e-07, "loss": 0.5661, "step": 4919 }, { "epoch": 0.8526678364853447, "grad_norm": 2.605941691870872, "learning_rate": 5.585542975440838e-07, "loss": 0.5052, "step": 4920 }, { "epoch": 0.8528411429561751, "grad_norm": 2.3241137154076106, "learning_rate": 5.572657861800474e-07, "loss": 0.4558, "step": 4921 }, { "epoch": 0.8530144494270054, "grad_norm": 2.6937533908893885, "learning_rate": 5.559786750307822e-07, "loss": 0.4891, "step": 4922 }, { "epoch": 0.8531877558978358, "grad_norm": 2.293251668621995, "learning_rate": 5.546929645019455e-07, "loss": 0.5543, "step": 4923 }, { "epoch": 0.8533610623686662, "grad_norm": 2.1852302033568707, "learning_rate": 5.534086549987555e-07, "loss": 0.509, "step": 4924 }, { "epoch": 0.8535343688394965, "grad_norm": 3.7153553525401875, "learning_rate": 5.521257469259867e-07, "loss": 0.5387, "step": 4925 }, { "epoch": 0.8537076753103269, "grad_norm": 3.8267925678097994, "learning_rate": 5.508442406879727e-07, "loss": 0.5172, "step": 4926 }, { "epoch": 0.8538809817811572, "grad_norm": 2.615547233142806, "learning_rate": 5.495641366886062e-07, "loss": 0.4879, "step": 4927 }, { "epoch": 0.8540542882519876, "grad_norm": 4.80935180348129, "learning_rate": 5.48285435331336e-07, "loss": 0.5594, "step": 4928 }, { "epoch": 0.854227594722818, "grad_norm": 2.6627270826165863, "learning_rate": 5.470081370191705e-07, "loss": 0.5507, "step": 4929 }, { "epoch": 0.8544009011936483, "grad_norm": 2.4680539112788895, "learning_rate": 5.457322421546746e-07, "loss": 0.5487, "step": 4930 }, { "epoch": 0.8545742076644787, "grad_norm": 2.585003931352269, "learning_rate": 5.444577511399724e-07, "loss": 0.5324, "step": 4931 }, { "epoch": 0.854747514135309, "grad_norm": 2.5146798817176874, "learning_rate": 5.431846643767458e-07, "loss": 0.4588, "step": 4932 }, { "epoch": 0.8549208206061394, "grad_norm": 2.3459951686117293, "learning_rate": 5.419129822662305e-07, "loss": 0.5593, "step": 4933 }, { "epoch": 0.8550941270769697, "grad_norm": 2.5736434659406315, "learning_rate": 5.406427052092217e-07, "loss": 0.5982, "step": 4934 }, { "epoch": 0.8552674335478001, "grad_norm": 3.3702053067293773, "learning_rate": 5.39373833606075e-07, "loss": 0.5544, "step": 4935 }, { "epoch": 0.8554407400186305, "grad_norm": 2.3992213007947596, "learning_rate": 5.381063678566989e-07, "loss": 0.5729, "step": 4936 }, { "epoch": 0.8556140464894608, "grad_norm": 2.7627998242722436, "learning_rate": 5.3684030836056e-07, "loss": 0.522, "step": 4937 }, { "epoch": 0.8557873529602912, "grad_norm": 1.9878241561498515, "learning_rate": 5.355756555166813e-07, "loss": 0.4531, "step": 4938 }, { "epoch": 0.8559606594311215, "grad_norm": 2.6155369037597525, "learning_rate": 5.343124097236441e-07, "loss": 0.5501, "step": 4939 }, { "epoch": 0.8561339659019519, "grad_norm": 2.490626969046599, "learning_rate": 5.330505713795836e-07, "loss": 0.5176, "step": 4940 }, { "epoch": 0.8563072723727823, "grad_norm": 2.6267596539265305, "learning_rate": 5.317901408821941e-07, "loss": 0.547, "step": 4941 }, { "epoch": 0.8564805788436126, "grad_norm": 2.019210089270839, "learning_rate": 5.305311186287254e-07, "loss": 0.5151, "step": 4942 }, { "epoch": 0.856653885314443, "grad_norm": 2.733655289911265, "learning_rate": 5.292735050159808e-07, "loss": 0.5465, "step": 4943 }, { "epoch": 0.8568271917852733, "grad_norm": 2.2164411640313064, "learning_rate": 5.280173004403239e-07, "loss": 0.5805, "step": 4944 }, { "epoch": 0.8570004982561036, "grad_norm": 3.559969122110653, "learning_rate": 5.26762505297671e-07, "loss": 0.6531, "step": 4945 }, { "epoch": 0.8571738047269339, "grad_norm": 2.847680634074329, "learning_rate": 5.255091199834955e-07, "loss": 0.6277, "step": 4946 }, { "epoch": 0.8573471111977643, "grad_norm": 2.63965181948173, "learning_rate": 5.242571448928274e-07, "loss": 0.5941, "step": 4947 }, { "epoch": 0.8575204176685947, "grad_norm": 2.8014257755216696, "learning_rate": 5.230065804202506e-07, "loss": 0.4806, "step": 4948 }, { "epoch": 0.857693724139425, "grad_norm": 2.2836127216345647, "learning_rate": 5.217574269599035e-07, "loss": 0.4483, "step": 4949 }, { "epoch": 0.8578670306102554, "grad_norm": 9.543086882061884, "learning_rate": 5.205096849054842e-07, "loss": 0.6118, "step": 4950 }, { "epoch": 0.8580403370810857, "grad_norm": 2.521529809938678, "learning_rate": 5.192633546502412e-07, "loss": 0.5019, "step": 4951 }, { "epoch": 0.8582136435519161, "grad_norm": 5.128828727284235, "learning_rate": 5.180184365869806e-07, "loss": 0.4732, "step": 4952 }, { "epoch": 0.8583869500227465, "grad_norm": 3.0839840728764316, "learning_rate": 5.167749311080633e-07, "loss": 0.554, "step": 4953 }, { "epoch": 0.8585602564935768, "grad_norm": 2.2236015108001475, "learning_rate": 5.155328386054026e-07, "loss": 0.5554, "step": 4954 }, { "epoch": 0.8587335629644072, "grad_norm": 2.8788863871818138, "learning_rate": 5.142921594704691e-07, "loss": 0.564, "step": 4955 }, { "epoch": 0.8589068694352375, "grad_norm": 11.411757751813074, "learning_rate": 5.13052894094288e-07, "loss": 0.4927, "step": 4956 }, { "epoch": 0.8590801759060679, "grad_norm": 2.4917829090990673, "learning_rate": 5.118150428674368e-07, "loss": 0.587, "step": 4957 }, { "epoch": 0.8592534823768982, "grad_norm": 2.2047363755292784, "learning_rate": 5.105786061800494e-07, "loss": 0.6029, "step": 4958 }, { "epoch": 0.8594267888477286, "grad_norm": 2.244605170782876, "learning_rate": 5.093435844218125e-07, "loss": 0.484, "step": 4959 }, { "epoch": 0.859600095318559, "grad_norm": 2.4786216451913523, "learning_rate": 5.081099779819682e-07, "loss": 0.5182, "step": 4960 }, { "epoch": 0.8597734017893893, "grad_norm": 9.642043292826562, "learning_rate": 5.068777872493108e-07, "loss": 0.4782, "step": 4961 }, { "epoch": 0.8599467082602197, "grad_norm": 3.67636529612942, "learning_rate": 5.056470126121904e-07, "loss": 0.5553, "step": 4962 }, { "epoch": 0.86012001473105, "grad_norm": 2.3485413079092843, "learning_rate": 5.044176544585089e-07, "loss": 0.5256, "step": 4963 }, { "epoch": 0.8602933212018804, "grad_norm": 2.4752686913325443, "learning_rate": 5.03189713175723e-07, "loss": 0.5857, "step": 4964 }, { "epoch": 0.8604666276727108, "grad_norm": 2.6885945912365323, "learning_rate": 5.019631891508425e-07, "loss": 0.513, "step": 4965 }, { "epoch": 0.8606399341435411, "grad_norm": 4.046291594145545, "learning_rate": 5.0073808277043e-07, "loss": 0.6358, "step": 4966 }, { "epoch": 0.8608132406143715, "grad_norm": 2.2688088170508074, "learning_rate": 4.995143944206026e-07, "loss": 0.5187, "step": 4967 }, { "epoch": 0.8609865470852018, "grad_norm": 2.358295988095948, "learning_rate": 4.982921244870292e-07, "loss": 0.5914, "step": 4968 }, { "epoch": 0.8611598535560322, "grad_norm": 2.9824877427267076, "learning_rate": 4.970712733549321e-07, "loss": 0.447, "step": 4969 }, { "epoch": 0.8613331600268626, "grad_norm": 2.8961245058934546, "learning_rate": 4.958518414090863e-07, "loss": 0.5567, "step": 4970 }, { "epoch": 0.8615064664976928, "grad_norm": 3.851718211791473, "learning_rate": 4.946338290338204e-07, "loss": 0.564, "step": 4971 }, { "epoch": 0.8616797729685232, "grad_norm": 2.6649657573409002, "learning_rate": 4.934172366130141e-07, "loss": 0.5654, "step": 4972 }, { "epoch": 0.8618530794393535, "grad_norm": 2.484732453860253, "learning_rate": 4.922020645301013e-07, "loss": 0.5063, "step": 4973 }, { "epoch": 0.8620263859101839, "grad_norm": 2.128501547432357, "learning_rate": 4.90988313168066e-07, "loss": 0.5206, "step": 4974 }, { "epoch": 0.8621996923810142, "grad_norm": 7.0858978100980705, "learning_rate": 4.897759829094456e-07, "loss": 0.5234, "step": 4975 }, { "epoch": 0.8623729988518446, "grad_norm": 2.5530793744154487, "learning_rate": 4.885650741363312e-07, "loss": 0.527, "step": 4976 }, { "epoch": 0.862546305322675, "grad_norm": 2.145174205744644, "learning_rate": 4.873555872303637e-07, "loss": 0.4943, "step": 4977 }, { "epoch": 0.8627196117935053, "grad_norm": 2.4299170078750176, "learning_rate": 4.861475225727369e-07, "loss": 0.5255, "step": 4978 }, { "epoch": 0.8628929182643357, "grad_norm": 2.5945414537821834, "learning_rate": 4.849408805441958e-07, "loss": 0.5243, "step": 4979 }, { "epoch": 0.863066224735166, "grad_norm": 2.687082010596841, "learning_rate": 4.837356615250372e-07, "loss": 0.5109, "step": 4980 }, { "epoch": 0.8632395312059964, "grad_norm": 2.3208088143733043, "learning_rate": 4.825318658951095e-07, "loss": 0.5939, "step": 4981 }, { "epoch": 0.8634128376768267, "grad_norm": 4.420941271558549, "learning_rate": 4.813294940338126e-07, "loss": 0.5763, "step": 4982 }, { "epoch": 0.8635861441476571, "grad_norm": 2.0811567600195335, "learning_rate": 4.801285463200977e-07, "loss": 0.446, "step": 4983 }, { "epoch": 0.8637594506184875, "grad_norm": 2.3348929990148144, "learning_rate": 4.789290231324678e-07, "loss": 0.5931, "step": 4984 }, { "epoch": 0.8639327570893178, "grad_norm": 2.3596265214826397, "learning_rate": 4.777309248489742e-07, "loss": 0.4609, "step": 4985 }, { "epoch": 0.8641060635601482, "grad_norm": 2.3059065222118718, "learning_rate": 4.765342518472221e-07, "loss": 0.4891, "step": 4986 }, { "epoch": 0.8642793700309785, "grad_norm": 2.566656374859756, "learning_rate": 4.75339004504366e-07, "loss": 0.5896, "step": 4987 }, { "epoch": 0.8644526765018089, "grad_norm": 2.770397529431545, "learning_rate": 4.7414518319711276e-07, "loss": 0.4795, "step": 4988 }, { "epoch": 0.8646259829726393, "grad_norm": 3.283621304019844, "learning_rate": 4.729527883017171e-07, "loss": 0.5696, "step": 4989 }, { "epoch": 0.8647992894434696, "grad_norm": 2.883373769762215, "learning_rate": 4.7176182019398555e-07, "loss": 0.5393, "step": 4990 }, { "epoch": 0.8649725959143, "grad_norm": 2.7754854207280033, "learning_rate": 4.7057227924927673e-07, "loss": 0.5153, "step": 4991 }, { "epoch": 0.8651459023851303, "grad_norm": 2.4813162757703426, "learning_rate": 4.6938416584249693e-07, "loss": 0.587, "step": 4992 }, { "epoch": 0.8653192088559607, "grad_norm": 1.9646187519415488, "learning_rate": 4.681974803481032e-07, "loss": 0.3218, "step": 4993 }, { "epoch": 0.8654925153267911, "grad_norm": 2.075262857283083, "learning_rate": 4.6701222314010366e-07, "loss": 0.4998, "step": 4994 }, { "epoch": 0.8656658217976214, "grad_norm": 2.53450948945037, "learning_rate": 4.658283945920539e-07, "loss": 0.5559, "step": 4995 }, { "epoch": 0.8658391282684518, "grad_norm": 2.502381039804133, "learning_rate": 4.64645995077061e-07, "loss": 0.5032, "step": 4996 }, { "epoch": 0.8660124347392821, "grad_norm": 2.5739039348485324, "learning_rate": 4.634650249677819e-07, "loss": 0.5572, "step": 4997 }, { "epoch": 0.8661857412101124, "grad_norm": 2.444184577136493, "learning_rate": 4.622854846364222e-07, "loss": 0.4781, "step": 4998 }, { "epoch": 0.8663590476809427, "grad_norm": 4.151622057662184, "learning_rate": 4.611073744547373e-07, "loss": 0.5477, "step": 4999 }, { "epoch": 0.8665323541517731, "grad_norm": 2.3978032175069806, "learning_rate": 4.599306947940313e-07, "loss": 0.5295, "step": 5000 }, { "epoch": 0.8667056606226035, "grad_norm": 2.206597975736212, "learning_rate": 4.5875544602515864e-07, "loss": 0.5455, "step": 5001 }, { "epoch": 0.8668789670934338, "grad_norm": 2.311154921210284, "learning_rate": 4.5758162851852083e-07, "loss": 0.4254, "step": 5002 }, { "epoch": 0.8670522735642642, "grad_norm": 2.263373958681351, "learning_rate": 4.5640924264407083e-07, "loss": 0.5624, "step": 5003 }, { "epoch": 0.8672255800350945, "grad_norm": 2.5046547577241025, "learning_rate": 4.5523828877130807e-07, "loss": 0.5152, "step": 5004 }, { "epoch": 0.8673988865059249, "grad_norm": 2.1258145806553026, "learning_rate": 4.5406876726928227e-07, "loss": 0.515, "step": 5005 }, { "epoch": 0.8675721929767553, "grad_norm": 2.40053332851754, "learning_rate": 4.5290067850659035e-07, "loss": 0.5086, "step": 5006 }, { "epoch": 0.8677454994475856, "grad_norm": 2.395381028628214, "learning_rate": 4.5173402285137893e-07, "loss": 0.5505, "step": 5007 }, { "epoch": 0.867918805918416, "grad_norm": 4.278132676218571, "learning_rate": 4.5056880067134156e-07, "loss": 0.5442, "step": 5008 }, { "epoch": 0.8680921123892463, "grad_norm": 2.2405574115415656, "learning_rate": 4.4940501233372236e-07, "loss": 0.5293, "step": 5009 }, { "epoch": 0.8682654188600767, "grad_norm": 2.2483589149379952, "learning_rate": 4.482426582053107e-07, "loss": 0.498, "step": 5010 }, { "epoch": 0.868438725330907, "grad_norm": 2.424472451831947, "learning_rate": 4.470817386524462e-07, "loss": 0.4786, "step": 5011 }, { "epoch": 0.8686120318017374, "grad_norm": 2.099659887323435, "learning_rate": 4.4592225404101464e-07, "loss": 0.4542, "step": 5012 }, { "epoch": 0.8687853382725678, "grad_norm": 2.551213991112046, "learning_rate": 4.4476420473645033e-07, "loss": 0.5155, "step": 5013 }, { "epoch": 0.8689586447433981, "grad_norm": 2.5083263641424263, "learning_rate": 4.4360759110373685e-07, "loss": 0.5266, "step": 5014 }, { "epoch": 0.8691319512142285, "grad_norm": 3.0571483416858336, "learning_rate": 4.4245241350740155e-07, "loss": 0.5332, "step": 5015 }, { "epoch": 0.8693052576850588, "grad_norm": 3.2107043856421154, "learning_rate": 4.412986723115209e-07, "loss": 0.4581, "step": 5016 }, { "epoch": 0.8694785641558892, "grad_norm": 8.253516876683012, "learning_rate": 4.401463678797208e-07, "loss": 0.5107, "step": 5017 }, { "epoch": 0.8696518706267196, "grad_norm": 2.336894363085696, "learning_rate": 4.3899550057517225e-07, "loss": 0.5495, "step": 5018 }, { "epoch": 0.8698251770975499, "grad_norm": 3.420702447165578, "learning_rate": 4.378460707605936e-07, "loss": 0.4998, "step": 5019 }, { "epoch": 0.8699984835683803, "grad_norm": 2.2992599831548777, "learning_rate": 4.3669807879825e-07, "loss": 0.6267, "step": 5020 }, { "epoch": 0.8701717900392106, "grad_norm": 2.5744095933869477, "learning_rate": 4.355515250499531e-07, "loss": 0.6301, "step": 5021 }, { "epoch": 0.870345096510041, "grad_norm": 2.177589176026661, "learning_rate": 4.3440640987706284e-07, "loss": 0.5293, "step": 5022 }, { "epoch": 0.8705184029808714, "grad_norm": 2.113752462892023, "learning_rate": 4.332627336404843e-07, "loss": 0.5369, "step": 5023 }, { "epoch": 0.8706917094517016, "grad_norm": 4.222419520856978, "learning_rate": 4.321204967006687e-07, "loss": 0.5566, "step": 5024 }, { "epoch": 0.870865015922532, "grad_norm": 2.1219208197960304, "learning_rate": 4.309796994176163e-07, "loss": 0.5432, "step": 5025 }, { "epoch": 0.8710383223933623, "grad_norm": 2.3354308387681444, "learning_rate": 4.2984034215086955e-07, "loss": 0.4517, "step": 5026 }, { "epoch": 0.8712116288641927, "grad_norm": 2.4028764476212547, "learning_rate": 4.2870242525951954e-07, "loss": 0.5826, "step": 5027 }, { "epoch": 0.871384935335023, "grad_norm": 2.548424331984201, "learning_rate": 4.275659491022044e-07, "loss": 0.5107, "step": 5028 }, { "epoch": 0.8715582418058534, "grad_norm": 2.673793567934227, "learning_rate": 4.264309140371059e-07, "loss": 0.4693, "step": 5029 }, { "epoch": 0.8717315482766838, "grad_norm": 2.418090137817401, "learning_rate": 4.2529732042195184e-07, "loss": 0.4315, "step": 5030 }, { "epoch": 0.8719048547475141, "grad_norm": 3.1483519108819094, "learning_rate": 4.241651686140186e-07, "loss": 0.4686, "step": 5031 }, { "epoch": 0.8720781612183445, "grad_norm": 2.454830123382743, "learning_rate": 4.2303445897012406e-07, "loss": 0.5347, "step": 5032 }, { "epoch": 0.8722514676891748, "grad_norm": 2.7069608326987327, "learning_rate": 4.219051918466349e-07, "loss": 0.5038, "step": 5033 }, { "epoch": 0.8724247741600052, "grad_norm": 2.781958321941589, "learning_rate": 4.2077736759946143e-07, "loss": 0.633, "step": 5034 }, { "epoch": 0.8725980806308355, "grad_norm": 2.376029516073926, "learning_rate": 4.1965098658406035e-07, "loss": 0.4892, "step": 5035 }, { "epoch": 0.8727713871016659, "grad_norm": 2.424125715756052, "learning_rate": 4.185260491554305e-07, "loss": 0.4448, "step": 5036 }, { "epoch": 0.8729446935724963, "grad_norm": 2.4007906248942166, "learning_rate": 4.174025556681188e-07, "loss": 0.5528, "step": 5037 }, { "epoch": 0.8731180000433266, "grad_norm": 2.258312599732528, "learning_rate": 4.162805064762171e-07, "loss": 0.4823, "step": 5038 }, { "epoch": 0.873291306514157, "grad_norm": 2.513300223184153, "learning_rate": 4.151599019333608e-07, "loss": 0.5053, "step": 5039 }, { "epoch": 0.8734646129849873, "grad_norm": 1.9888367615450682, "learning_rate": 4.1404074239273007e-07, "loss": 0.4813, "step": 5040 }, { "epoch": 0.8736379194558177, "grad_norm": 2.3806656788803076, "learning_rate": 4.129230282070501e-07, "loss": 0.5026, "step": 5041 }, { "epoch": 0.8738112259266481, "grad_norm": 2.5913094103756147, "learning_rate": 4.118067597285907e-07, "loss": 0.559, "step": 5042 }, { "epoch": 0.8739845323974784, "grad_norm": 2.5113815056524134, "learning_rate": 4.1069193730916614e-07, "loss": 0.5932, "step": 5043 }, { "epoch": 0.8741578388683088, "grad_norm": 2.3839704169917817, "learning_rate": 4.0957856130013354e-07, "loss": 0.5298, "step": 5044 }, { "epoch": 0.8743311453391391, "grad_norm": 2.2935288324962175, "learning_rate": 4.0846663205239557e-07, "loss": 0.4845, "step": 5045 }, { "epoch": 0.8745044518099695, "grad_norm": 2.105700233549587, "learning_rate": 4.073561499163986e-07, "loss": 0.5558, "step": 5046 }, { "epoch": 0.8746777582807999, "grad_norm": 4.242850377875308, "learning_rate": 4.0624711524213267e-07, "loss": 0.5188, "step": 5047 }, { "epoch": 0.8748510647516302, "grad_norm": 5.399574876150263, "learning_rate": 4.051395283791321e-07, "loss": 0.4608, "step": 5048 }, { "epoch": 0.8750243712224606, "grad_norm": 2.215044049228319, "learning_rate": 4.040333896764748e-07, "loss": 0.5029, "step": 5049 }, { "epoch": 0.8751976776932908, "grad_norm": 1.782109715720924, "learning_rate": 4.0292869948278137e-07, "loss": 0.4953, "step": 5050 }, { "epoch": 0.8753709841641212, "grad_norm": 2.615479355761147, "learning_rate": 4.018254581462172e-07, "loss": 0.5963, "step": 5051 }, { "epoch": 0.8755442906349515, "grad_norm": 2.526678354945701, "learning_rate": 4.007236660144903e-07, "loss": 0.4928, "step": 5052 }, { "epoch": 0.8757175971057819, "grad_norm": 2.384190318289457, "learning_rate": 3.996233234348518e-07, "loss": 0.5511, "step": 5053 }, { "epoch": 0.8758909035766123, "grad_norm": 2.479248260202936, "learning_rate": 3.985244307540959e-07, "loss": 0.4925, "step": 5054 }, { "epoch": 0.8760642100474426, "grad_norm": 3.931091824475743, "learning_rate": 3.9742698831856106e-07, "loss": 0.5683, "step": 5055 }, { "epoch": 0.876237516518273, "grad_norm": 2.04229753193189, "learning_rate": 3.9633099647412745e-07, "loss": 0.4907, "step": 5056 }, { "epoch": 0.8764108229891033, "grad_norm": 2.7665651513247127, "learning_rate": 3.9523645556621806e-07, "loss": 0.4898, "step": 5057 }, { "epoch": 0.8765841294599337, "grad_norm": 2.156437320916034, "learning_rate": 3.941433659397992e-07, "loss": 0.5324, "step": 5058 }, { "epoch": 0.876757435930764, "grad_norm": 2.362368636954505, "learning_rate": 3.930517279393797e-07, "loss": 0.4838, "step": 5059 }, { "epoch": 0.8769307424015944, "grad_norm": 2.5143014660750684, "learning_rate": 3.9196154190901003e-07, "loss": 0.558, "step": 5060 }, { "epoch": 0.8771040488724248, "grad_norm": 2.7278302394689455, "learning_rate": 3.9087280819228413e-07, "loss": 0.515, "step": 5061 }, { "epoch": 0.8772773553432551, "grad_norm": 2.326044906734734, "learning_rate": 3.8978552713233754e-07, "loss": 0.4827, "step": 5062 }, { "epoch": 0.8774506618140855, "grad_norm": 4.426929201100923, "learning_rate": 3.8869969907184835e-07, "loss": 0.5636, "step": 5063 }, { "epoch": 0.8776239682849158, "grad_norm": 2.2021818965147575, "learning_rate": 3.876153243530367e-07, "loss": 0.5192, "step": 5064 }, { "epoch": 0.8777972747557462, "grad_norm": 2.278006398863102, "learning_rate": 3.865324033176648e-07, "loss": 0.476, "step": 5065 }, { "epoch": 0.8779705812265766, "grad_norm": 2.2664727513455816, "learning_rate": 3.854509363070363e-07, "loss": 0.5304, "step": 5066 }, { "epoch": 0.8781438876974069, "grad_norm": 2.7957515011358027, "learning_rate": 3.843709236619958e-07, "loss": 0.5419, "step": 5067 }, { "epoch": 0.8783171941682373, "grad_norm": 4.06307355729045, "learning_rate": 3.8329236572293096e-07, "loss": 0.5879, "step": 5068 }, { "epoch": 0.8784905006390676, "grad_norm": 3.889006719929551, "learning_rate": 3.8221526282977095e-07, "loss": 0.5125, "step": 5069 }, { "epoch": 0.878663807109898, "grad_norm": 2.0808975754117918, "learning_rate": 3.811396153219854e-07, "loss": 0.5053, "step": 5070 }, { "epoch": 0.8788371135807284, "grad_norm": 2.5197710032465443, "learning_rate": 3.800654235385848e-07, "loss": 0.4633, "step": 5071 }, { "epoch": 0.8790104200515587, "grad_norm": 2.7388661809893553, "learning_rate": 3.789926878181238e-07, "loss": 0.4961, "step": 5072 }, { "epoch": 0.8791837265223891, "grad_norm": 2.7043798150403755, "learning_rate": 3.7792140849869484e-07, "loss": 0.4765, "step": 5073 }, { "epoch": 0.8793570329932194, "grad_norm": 2.3515953074168934, "learning_rate": 3.768515859179328e-07, "loss": 0.5089, "step": 5074 }, { "epoch": 0.8795303394640498, "grad_norm": 2.8705177926432275, "learning_rate": 3.757832204130135e-07, "loss": 0.5646, "step": 5075 }, { "epoch": 0.87970364593488, "grad_norm": 6.383461601474681, "learning_rate": 3.7471631232065377e-07, "loss": 0.4589, "step": 5076 }, { "epoch": 0.8798769524057104, "grad_norm": 2.925078368409728, "learning_rate": 3.73650861977109e-07, "loss": 0.5085, "step": 5077 }, { "epoch": 0.8800502588765408, "grad_norm": 2.700317567779268, "learning_rate": 3.725868697181778e-07, "loss": 0.547, "step": 5078 }, { "epoch": 0.8802235653473711, "grad_norm": 2.2573741519548713, "learning_rate": 3.715243358791981e-07, "loss": 0.5314, "step": 5079 }, { "epoch": 0.8803968718182015, "grad_norm": 2.3563362585979384, "learning_rate": 3.704632607950476e-07, "loss": 0.5699, "step": 5080 }, { "epoch": 0.8805701782890318, "grad_norm": 2.2098676323298436, "learning_rate": 3.694036448001459e-07, "loss": 0.4557, "step": 5081 }, { "epoch": 0.8807434847598622, "grad_norm": 2.860925189830121, "learning_rate": 3.683454882284515e-07, "loss": 0.5227, "step": 5082 }, { "epoch": 0.8809167912306926, "grad_norm": 2.4556240291400124, "learning_rate": 3.672887914134626e-07, "loss": 0.4861, "step": 5083 }, { "epoch": 0.8810900977015229, "grad_norm": 2.8251298822353745, "learning_rate": 3.662335546882184e-07, "loss": 0.5643, "step": 5084 }, { "epoch": 0.8812634041723533, "grad_norm": 2.2153445383259127, "learning_rate": 3.6517977838529727e-07, "loss": 0.4838, "step": 5085 }, { "epoch": 0.8814367106431836, "grad_norm": 2.342301389958224, "learning_rate": 3.6412746283681797e-07, "loss": 0.5196, "step": 5086 }, { "epoch": 0.881610017114014, "grad_norm": 2.380478330002107, "learning_rate": 3.630766083744375e-07, "loss": 0.509, "step": 5087 }, { "epoch": 0.8817833235848443, "grad_norm": 2.009841853987231, "learning_rate": 3.620272153293541e-07, "loss": 0.4903, "step": 5088 }, { "epoch": 0.8819566300556747, "grad_norm": 2.5663212591183027, "learning_rate": 3.60979284032304e-07, "loss": 0.5275, "step": 5089 }, { "epoch": 0.8821299365265051, "grad_norm": 2.634824991625742, "learning_rate": 3.5993281481356335e-07, "loss": 0.5059, "step": 5090 }, { "epoch": 0.8823032429973354, "grad_norm": 2.3888419234067046, "learning_rate": 3.588878080029479e-07, "loss": 0.5145, "step": 5091 }, { "epoch": 0.8824765494681658, "grad_norm": 2.3263528909482214, "learning_rate": 3.578442639298113e-07, "loss": 0.4276, "step": 5092 }, { "epoch": 0.8826498559389961, "grad_norm": 2.906247425259122, "learning_rate": 3.5680218292304726e-07, "loss": 0.5363, "step": 5093 }, { "epoch": 0.8828231624098265, "grad_norm": 2.2365069656862997, "learning_rate": 3.5576156531108795e-07, "loss": 0.579, "step": 5094 }, { "epoch": 0.8829964688806569, "grad_norm": 2.412849017763514, "learning_rate": 3.54722411421905e-07, "loss": 0.4822, "step": 5095 }, { "epoch": 0.8831697753514872, "grad_norm": 2.612442379738673, "learning_rate": 3.536847215830075e-07, "loss": 0.5497, "step": 5096 }, { "epoch": 0.8833430818223176, "grad_norm": 2.2982510457143843, "learning_rate": 3.526484961214438e-07, "loss": 0.5604, "step": 5097 }, { "epoch": 0.8835163882931479, "grad_norm": 2.2116630803759407, "learning_rate": 3.516137353638005e-07, "loss": 0.4716, "step": 5098 }, { "epoch": 0.8836896947639783, "grad_norm": 2.1755597749706723, "learning_rate": 3.505804396362034e-07, "loss": 0.5889, "step": 5099 }, { "epoch": 0.8838630012348087, "grad_norm": 2.2464824796479297, "learning_rate": 3.4954860926431524e-07, "loss": 0.5114, "step": 5100 }, { "epoch": 0.884036307705639, "grad_norm": 2.9403286706123013, "learning_rate": 3.4851824457333816e-07, "loss": 0.5372, "step": 5101 }, { "epoch": 0.8842096141764693, "grad_norm": 3.4957539681621284, "learning_rate": 3.474893458880113e-07, "loss": 0.5485, "step": 5102 }, { "epoch": 0.8843829206472996, "grad_norm": 4.6230018243598545, "learning_rate": 3.464619135326125e-07, "loss": 0.5549, "step": 5103 }, { "epoch": 0.88455622711813, "grad_norm": 7.567687245545234, "learning_rate": 3.454359478309566e-07, "loss": 0.5376, "step": 5104 }, { "epoch": 0.8847295335889603, "grad_norm": 2.0983051566756226, "learning_rate": 3.444114491063977e-07, "loss": 0.5253, "step": 5105 }, { "epoch": 0.8849028400597907, "grad_norm": 2.3897984965710197, "learning_rate": 3.433884176818264e-07, "loss": 0.5573, "step": 5106 }, { "epoch": 0.885076146530621, "grad_norm": 2.344145021456537, "learning_rate": 3.4236685387967207e-07, "loss": 0.4419, "step": 5107 }, { "epoch": 0.8852494530014514, "grad_norm": 3.9735910032872344, "learning_rate": 3.413467580218982e-07, "loss": 0.6074, "step": 5108 }, { "epoch": 0.8854227594722818, "grad_norm": 2.364148054799075, "learning_rate": 3.403281304300093e-07, "loss": 0.4582, "step": 5109 }, { "epoch": 0.8855960659431121, "grad_norm": 2.1796583010403316, "learning_rate": 3.3931097142504586e-07, "loss": 0.5436, "step": 5110 }, { "epoch": 0.8857693724139425, "grad_norm": 1.9699556589435145, "learning_rate": 3.3829528132758527e-07, "loss": 0.4063, "step": 5111 }, { "epoch": 0.8859426788847728, "grad_norm": 2.254168654247738, "learning_rate": 3.37281060457742e-07, "loss": 0.5749, "step": 5112 }, { "epoch": 0.8861159853556032, "grad_norm": 2.523213112991357, "learning_rate": 3.362683091351682e-07, "loss": 0.5541, "step": 5113 }, { "epoch": 0.8862892918264336, "grad_norm": 2.47068092685838, "learning_rate": 3.3525702767905235e-07, "loss": 0.5201, "step": 5114 }, { "epoch": 0.8864625982972639, "grad_norm": 2.0918494758224804, "learning_rate": 3.342472164081195e-07, "loss": 0.4512, "step": 5115 }, { "epoch": 0.8866359047680943, "grad_norm": 11.014316181838506, "learning_rate": 3.332388756406313e-07, "loss": 0.5111, "step": 5116 }, { "epoch": 0.8868092112389246, "grad_norm": 2.3364123493153586, "learning_rate": 3.322320056943862e-07, "loss": 0.5098, "step": 5117 }, { "epoch": 0.886982517709755, "grad_norm": 2.710155552325023, "learning_rate": 3.312266068867187e-07, "loss": 0.483, "step": 5118 }, { "epoch": 0.8871558241805854, "grad_norm": 2.3029055013659794, "learning_rate": 3.302226795345004e-07, "loss": 0.5193, "step": 5119 }, { "epoch": 0.8873291306514157, "grad_norm": 2.5225937022229554, "learning_rate": 3.292202239541381e-07, "loss": 0.5145, "step": 5120 }, { "epoch": 0.8875024371222461, "grad_norm": 2.4294350018022004, "learning_rate": 3.2821924046157526e-07, "loss": 0.5679, "step": 5121 }, { "epoch": 0.8876757435930764, "grad_norm": 2.738382701695477, "learning_rate": 3.2721972937229227e-07, "loss": 0.5191, "step": 5122 }, { "epoch": 0.8878490500639068, "grad_norm": 2.7973477925098478, "learning_rate": 3.262216910013044e-07, "loss": 0.5684, "step": 5123 }, { "epoch": 0.8880223565347372, "grad_norm": 2.9655202870784563, "learning_rate": 3.252251256631622e-07, "loss": 0.5664, "step": 5124 }, { "epoch": 0.8881956630055675, "grad_norm": 2.8466753537837155, "learning_rate": 3.2423003367195394e-07, "loss": 0.5076, "step": 5125 }, { "epoch": 0.8883689694763979, "grad_norm": 2.1989789620910902, "learning_rate": 3.232364153413009e-07, "loss": 0.4808, "step": 5126 }, { "epoch": 0.8885422759472282, "grad_norm": 2.9197956088660435, "learning_rate": 3.222442709843632e-07, "loss": 0.4509, "step": 5127 }, { "epoch": 0.8887155824180585, "grad_norm": 2.3856084372443744, "learning_rate": 3.2125360091383397e-07, "loss": 0.5739, "step": 5128 }, { "epoch": 0.8888888888888888, "grad_norm": 4.580582491110035, "learning_rate": 3.202644054419418e-07, "loss": 0.5837, "step": 5129 }, { "epoch": 0.8890621953597192, "grad_norm": 2.9759009568684944, "learning_rate": 3.192766848804507e-07, "loss": 0.6278, "step": 5130 }, { "epoch": 0.8892355018305496, "grad_norm": 3.1797979266697007, "learning_rate": 3.1829043954066106e-07, "loss": 0.5309, "step": 5131 }, { "epoch": 0.8894088083013799, "grad_norm": 3.419192665939283, "learning_rate": 3.1730566973340706e-07, "loss": 0.6051, "step": 5132 }, { "epoch": 0.8895821147722103, "grad_norm": 2.5723401377789994, "learning_rate": 3.163223757690581e-07, "loss": 0.5101, "step": 5133 }, { "epoch": 0.8897554212430406, "grad_norm": 2.2930924948742426, "learning_rate": 3.1534055795751916e-07, "loss": 0.4994, "step": 5134 }, { "epoch": 0.889928727713871, "grad_norm": 2.9359906168996015, "learning_rate": 3.1436021660822827e-07, "loss": 0.4716, "step": 5135 }, { "epoch": 0.8901020341847014, "grad_norm": 2.9138227496507296, "learning_rate": 3.133813520301598e-07, "loss": 0.5153, "step": 5136 }, { "epoch": 0.8902753406555317, "grad_norm": 2.044862125203652, "learning_rate": 3.124039645318222e-07, "loss": 0.4712, "step": 5137 }, { "epoch": 0.8904486471263621, "grad_norm": 2.4711522881507224, "learning_rate": 3.114280544212589e-07, "loss": 0.5269, "step": 5138 }, { "epoch": 0.8906219535971924, "grad_norm": 2.2622160904899467, "learning_rate": 3.104536220060456e-07, "loss": 0.4671, "step": 5139 }, { "epoch": 0.8907952600680228, "grad_norm": 2.624175065546312, "learning_rate": 3.094806675932949e-07, "loss": 0.5762, "step": 5140 }, { "epoch": 0.8909685665388531, "grad_norm": 2.6567706767999915, "learning_rate": 3.085091914896521e-07, "loss": 0.5947, "step": 5141 }, { "epoch": 0.8911418730096835, "grad_norm": 2.5547844082870386, "learning_rate": 3.075391940012973e-07, "loss": 0.6199, "step": 5142 }, { "epoch": 0.8913151794805139, "grad_norm": 2.417920925243677, "learning_rate": 3.065706754339437e-07, "loss": 0.5498, "step": 5143 }, { "epoch": 0.8914884859513442, "grad_norm": 2.201379688571473, "learning_rate": 3.056036360928388e-07, "loss": 0.4728, "step": 5144 }, { "epoch": 0.8916617924221746, "grad_norm": 2.4202510596891504, "learning_rate": 3.0463807628276497e-07, "loss": 0.4364, "step": 5145 }, { "epoch": 0.8918350988930049, "grad_norm": 2.547095470022159, "learning_rate": 3.036739963080365e-07, "loss": 0.5332, "step": 5146 }, { "epoch": 0.8920084053638353, "grad_norm": 2.468047909356419, "learning_rate": 3.0271139647250205e-07, "loss": 0.4322, "step": 5147 }, { "epoch": 0.8921817118346657, "grad_norm": 2.602947576367038, "learning_rate": 3.01750277079545e-07, "loss": 0.5739, "step": 5148 }, { "epoch": 0.892355018305496, "grad_norm": 2.355508321517849, "learning_rate": 3.007906384320791e-07, "loss": 0.4744, "step": 5149 }, { "epoch": 0.8925283247763264, "grad_norm": 3.829146736475449, "learning_rate": 2.9983248083255424e-07, "loss": 0.542, "step": 5150 }, { "epoch": 0.8927016312471567, "grad_norm": 2.0745232481593217, "learning_rate": 2.9887580458295207e-07, "loss": 0.4685, "step": 5151 }, { "epoch": 0.8928749377179871, "grad_norm": 1.9797291457163564, "learning_rate": 2.9792060998478813e-07, "loss": 0.5233, "step": 5152 }, { "epoch": 0.8930482441888175, "grad_norm": 2.2207739677316574, "learning_rate": 2.9696689733911156e-07, "loss": 0.5656, "step": 5153 }, { "epoch": 0.8932215506596477, "grad_norm": 2.2110470189976903, "learning_rate": 2.9601466694650304e-07, "loss": 0.5472, "step": 5154 }, { "epoch": 0.8933948571304781, "grad_norm": 2.5149760939595867, "learning_rate": 2.950639191070764e-07, "loss": 0.5041, "step": 5155 }, { "epoch": 0.8935681636013084, "grad_norm": 2.832284820994014, "learning_rate": 2.9411465412047866e-07, "loss": 0.5337, "step": 5156 }, { "epoch": 0.8937414700721388, "grad_norm": 2.5810916528024093, "learning_rate": 2.931668722858888e-07, "loss": 0.5618, "step": 5157 }, { "epoch": 0.8939147765429691, "grad_norm": 2.28526487534916, "learning_rate": 2.9222057390202065e-07, "loss": 0.5608, "step": 5158 }, { "epoch": 0.8940880830137995, "grad_norm": 2.54241325852983, "learning_rate": 2.9127575926711615e-07, "loss": 0.5246, "step": 5159 }, { "epoch": 0.8942613894846299, "grad_norm": 2.4912514392000307, "learning_rate": 2.903324286789533e-07, "loss": 0.6131, "step": 5160 }, { "epoch": 0.8944346959554602, "grad_norm": 1.933437450834187, "learning_rate": 2.8939058243484143e-07, "loss": 0.497, "step": 5161 }, { "epoch": 0.8946080024262906, "grad_norm": 2.772461341140139, "learning_rate": 2.88450220831622e-07, "loss": 0.475, "step": 5162 }, { "epoch": 0.8947813088971209, "grad_norm": 2.0043536109445497, "learning_rate": 2.875113441656674e-07, "loss": 0.4961, "step": 5163 }, { "epoch": 0.8949546153679513, "grad_norm": 2.2394601742006697, "learning_rate": 2.865739527328837e-07, "loss": 0.509, "step": 5164 }, { "epoch": 0.8951279218387816, "grad_norm": 3.0349259823630055, "learning_rate": 2.8563804682870846e-07, "loss": 0.5343, "step": 5165 }, { "epoch": 0.895301228309612, "grad_norm": 2.5912751480621616, "learning_rate": 2.847036267481107e-07, "loss": 0.5955, "step": 5166 }, { "epoch": 0.8954745347804424, "grad_norm": 2.7078782973688775, "learning_rate": 2.837706927855899e-07, "loss": 0.5324, "step": 5167 }, { "epoch": 0.8956478412512727, "grad_norm": 2.3163626149959513, "learning_rate": 2.8283924523518026e-07, "loss": 0.5891, "step": 5168 }, { "epoch": 0.8958211477221031, "grad_norm": 3.9965546690433604, "learning_rate": 2.8190928439044583e-07, "loss": 0.5218, "step": 5169 }, { "epoch": 0.8959944541929334, "grad_norm": 2.2303607883992047, "learning_rate": 2.8098081054448055e-07, "loss": 0.4631, "step": 5170 }, { "epoch": 0.8961677606637638, "grad_norm": 9.861621923198182, "learning_rate": 2.8005382398991197e-07, "loss": 0.5315, "step": 5171 }, { "epoch": 0.8963410671345942, "grad_norm": 3.572152612144318, "learning_rate": 2.791283250188975e-07, "loss": 0.5642, "step": 5172 }, { "epoch": 0.8965143736054245, "grad_norm": 3.8189493551764246, "learning_rate": 2.782043139231272e-07, "loss": 0.5765, "step": 5173 }, { "epoch": 0.8966876800762549, "grad_norm": 2.2153311707385925, "learning_rate": 2.7728179099382025e-07, "loss": 0.5251, "step": 5174 }, { "epoch": 0.8968609865470852, "grad_norm": 4.634360857588675, "learning_rate": 2.763607565217286e-07, "loss": 0.5603, "step": 5175 }, { "epoch": 0.8970342930179156, "grad_norm": 3.054979289524831, "learning_rate": 2.754412107971344e-07, "loss": 0.549, "step": 5176 }, { "epoch": 0.897207599488746, "grad_norm": 2.5965175444314728, "learning_rate": 2.7452315410984975e-07, "loss": 0.5537, "step": 5177 }, { "epoch": 0.8973809059595763, "grad_norm": 2.221061151779459, "learning_rate": 2.7360658674921883e-07, "loss": 0.4998, "step": 5178 }, { "epoch": 0.8975542124304067, "grad_norm": 2.357900401439302, "learning_rate": 2.726915090041154e-07, "loss": 0.5133, "step": 5179 }, { "epoch": 0.8977275189012369, "grad_norm": 2.2997776547778286, "learning_rate": 2.717779211629451e-07, "loss": 0.5098, "step": 5180 }, { "epoch": 0.8979008253720673, "grad_norm": 2.3015561501179653, "learning_rate": 2.7086582351364187e-07, "loss": 0.5459, "step": 5181 }, { "epoch": 0.8980741318428976, "grad_norm": 3.4418194144900034, "learning_rate": 2.6995521634367136e-07, "loss": 0.5346, "step": 5182 }, { "epoch": 0.898247438313728, "grad_norm": 2.256642973503946, "learning_rate": 2.690460999400296e-07, "loss": 0.5098, "step": 5183 }, { "epoch": 0.8984207447845584, "grad_norm": 2.114304105179342, "learning_rate": 2.681384745892424e-07, "loss": 0.5687, "step": 5184 }, { "epoch": 0.8985940512553887, "grad_norm": 2.3684777396544403, "learning_rate": 2.672323405773658e-07, "loss": 0.5302, "step": 5185 }, { "epoch": 0.8987673577262191, "grad_norm": 2.8293460937982724, "learning_rate": 2.663276981899854e-07, "loss": 0.5137, "step": 5186 }, { "epoch": 0.8989406641970494, "grad_norm": 4.483322176990422, "learning_rate": 2.6542454771221747e-07, "loss": 0.5178, "step": 5187 }, { "epoch": 0.8991139706678798, "grad_norm": 2.3750877889360296, "learning_rate": 2.645228894287072e-07, "loss": 0.5733, "step": 5188 }, { "epoch": 0.8992872771387102, "grad_norm": 2.7234985048791907, "learning_rate": 2.636227236236305e-07, "loss": 0.5532, "step": 5189 }, { "epoch": 0.8994605836095405, "grad_norm": 3.441813256161006, "learning_rate": 2.627240505806905e-07, "loss": 0.5327, "step": 5190 }, { "epoch": 0.8996338900803709, "grad_norm": 3.473548673205181, "learning_rate": 2.618268705831234e-07, "loss": 0.6094, "step": 5191 }, { "epoch": 0.8998071965512012, "grad_norm": 2.4388410609904163, "learning_rate": 2.609311839136924e-07, "loss": 0.5114, "step": 5192 }, { "epoch": 0.8999805030220316, "grad_norm": 2.1066873230342686, "learning_rate": 2.600369908546896e-07, "loss": 0.558, "step": 5193 }, { "epoch": 0.900153809492862, "grad_norm": 2.732193472138457, "learning_rate": 2.5914429168793997e-07, "loss": 0.4971, "step": 5194 }, { "epoch": 0.9003271159636923, "grad_norm": 2.481682227123931, "learning_rate": 2.5825308669479354e-07, "loss": 0.5277, "step": 5195 }, { "epoch": 0.9005004224345227, "grad_norm": 2.6572439596892137, "learning_rate": 2.5736337615613114e-07, "loss": 0.4954, "step": 5196 }, { "epoch": 0.900673728905353, "grad_norm": 2.753708435694724, "learning_rate": 2.5647516035236295e-07, "loss": 0.4767, "step": 5197 }, { "epoch": 0.9008470353761834, "grad_norm": 2.943171394020592, "learning_rate": 2.555884395634273e-07, "loss": 0.4301, "step": 5198 }, { "epoch": 0.9010203418470137, "grad_norm": 2.1021211668082973, "learning_rate": 2.547032140687916e-07, "loss": 0.5159, "step": 5199 }, { "epoch": 0.9011936483178441, "grad_norm": 2.6180617686568737, "learning_rate": 2.5381948414745283e-07, "loss": 0.44, "step": 5200 }, { "epoch": 0.9013669547886745, "grad_norm": 2.3777887889598723, "learning_rate": 2.5293725007793425e-07, "loss": 0.5661, "step": 5201 }, { "epoch": 0.9015402612595048, "grad_norm": 2.6909097012303973, "learning_rate": 2.520565121382895e-07, "loss": 0.4754, "step": 5202 }, { "epoch": 0.9017135677303352, "grad_norm": 2.3290828707278473, "learning_rate": 2.511772706061016e-07, "loss": 0.5176, "step": 5203 }, { "epoch": 0.9018868742011655, "grad_norm": 2.202721227773013, "learning_rate": 2.5029952575848003e-07, "loss": 0.538, "step": 5204 }, { "epoch": 0.9020601806719959, "grad_norm": 2.447785699395613, "learning_rate": 2.4942327787206287e-07, "loss": 0.5254, "step": 5205 }, { "epoch": 0.9022334871428261, "grad_norm": 2.7132069901772597, "learning_rate": 2.4854852722301757e-07, "loss": 0.6017, "step": 5206 }, { "epoch": 0.9024067936136565, "grad_norm": 2.948002526900421, "learning_rate": 2.4767527408703805e-07, "loss": 0.5324, "step": 5207 }, { "epoch": 0.9025801000844869, "grad_norm": 2.9512799844916575, "learning_rate": 2.468035187393475e-07, "loss": 0.5257, "step": 5208 }, { "epoch": 0.9027534065553172, "grad_norm": 1.9685399204943932, "learning_rate": 2.4593326145469777e-07, "loss": 0.4759, "step": 5209 }, { "epoch": 0.9029267130261476, "grad_norm": 2.0736041670345737, "learning_rate": 2.4506450250736725e-07, "loss": 0.4455, "step": 5210 }, { "epoch": 0.9031000194969779, "grad_norm": 3.3629381653954784, "learning_rate": 2.441972421711608e-07, "loss": 0.5977, "step": 5211 }, { "epoch": 0.9032733259678083, "grad_norm": 2.1461366142535594, "learning_rate": 2.433314807194137e-07, "loss": 0.5277, "step": 5212 }, { "epoch": 0.9034466324386387, "grad_norm": 2.3971935952555006, "learning_rate": 2.4246721842498766e-07, "loss": 0.571, "step": 5213 }, { "epoch": 0.903619938909469, "grad_norm": 2.733895110354058, "learning_rate": 2.4160445556027143e-07, "loss": 0.4679, "step": 5214 }, { "epoch": 0.9037932453802994, "grad_norm": 2.326686566881556, "learning_rate": 2.407431923971815e-07, "loss": 0.5593, "step": 5215 }, { "epoch": 0.9039665518511297, "grad_norm": 2.242555993431943, "learning_rate": 2.3988342920716276e-07, "loss": 0.5403, "step": 5216 }, { "epoch": 0.9041398583219601, "grad_norm": 2.735932983859324, "learning_rate": 2.3902516626118534e-07, "loss": 0.542, "step": 5217 }, { "epoch": 0.9043131647927904, "grad_norm": 2.4640290791305435, "learning_rate": 2.3816840382974826e-07, "loss": 0.512, "step": 5218 }, { "epoch": 0.9044864712636208, "grad_norm": 3.155440919241109, "learning_rate": 2.3731314218287726e-07, "loss": 0.5268, "step": 5219 }, { "epoch": 0.9046597777344512, "grad_norm": 2.3217081688780703, "learning_rate": 2.3645938159012393e-07, "loss": 0.519, "step": 5220 }, { "epoch": 0.9048330842052815, "grad_norm": 2.576571808264183, "learning_rate": 2.3560712232056858e-07, "loss": 0.522, "step": 5221 }, { "epoch": 0.9050063906761119, "grad_norm": 2.6984287567033163, "learning_rate": 2.3475636464281691e-07, "loss": 0.5401, "step": 5222 }, { "epoch": 0.9051796971469422, "grad_norm": 3.1402871950741513, "learning_rate": 2.3390710882500223e-07, "loss": 0.5583, "step": 5223 }, { "epoch": 0.9053530036177726, "grad_norm": 2.612017506296852, "learning_rate": 2.3305935513478318e-07, "loss": 0.5041, "step": 5224 }, { "epoch": 0.905526310088603, "grad_norm": 2.4462413930370515, "learning_rate": 2.3221310383934715e-07, "loss": 0.4831, "step": 5225 }, { "epoch": 0.9056996165594333, "grad_norm": 2.5864951175659354, "learning_rate": 2.3136835520540634e-07, "loss": 0.654, "step": 5226 }, { "epoch": 0.9058729230302637, "grad_norm": 2.5170173049059215, "learning_rate": 2.3052510949920004e-07, "loss": 0.5504, "step": 5227 }, { "epoch": 0.906046229501094, "grad_norm": 2.476094171981422, "learning_rate": 2.2968336698649286e-07, "loss": 0.5377, "step": 5228 }, { "epoch": 0.9062195359719244, "grad_norm": 5.170205328486356, "learning_rate": 2.2884312793257757e-07, "loss": 0.5475, "step": 5229 }, { "epoch": 0.9063928424427548, "grad_norm": 2.403253252669153, "learning_rate": 2.2800439260227182e-07, "loss": 0.5841, "step": 5230 }, { "epoch": 0.9065661489135851, "grad_norm": 2.2306904807117025, "learning_rate": 2.2716716125991856e-07, "loss": 0.5545, "step": 5231 }, { "epoch": 0.9067394553844154, "grad_norm": 3.9753930180375354, "learning_rate": 2.263314341693884e-07, "loss": 0.5755, "step": 5232 }, { "epoch": 0.9069127618552457, "grad_norm": 2.533905784663638, "learning_rate": 2.2549721159407621e-07, "loss": 0.5138, "step": 5233 }, { "epoch": 0.9070860683260761, "grad_norm": 4.5212118469466125, "learning_rate": 2.2466449379690503e-07, "loss": 0.5763, "step": 5234 }, { "epoch": 0.9072593747969064, "grad_norm": 2.5540118201281747, "learning_rate": 2.2383328104032152e-07, "loss": 0.4431, "step": 5235 }, { "epoch": 0.9074326812677368, "grad_norm": 2.508377628876888, "learning_rate": 2.2300357358629843e-07, "loss": 0.5134, "step": 5236 }, { "epoch": 0.9076059877385672, "grad_norm": 2.6686823950937315, "learning_rate": 2.2217537169633484e-07, "loss": 0.5636, "step": 5237 }, { "epoch": 0.9077792942093975, "grad_norm": 2.328364691599175, "learning_rate": 2.2134867563145424e-07, "loss": 0.556, "step": 5238 }, { "epoch": 0.9079526006802279, "grad_norm": 2.5286874603839387, "learning_rate": 2.2052348565220648e-07, "loss": 0.5472, "step": 5239 }, { "epoch": 0.9081259071510582, "grad_norm": 2.3334556709901313, "learning_rate": 2.1969980201866637e-07, "loss": 0.4078, "step": 5240 }, { "epoch": 0.9082992136218886, "grad_norm": 2.984939522789434, "learning_rate": 2.1887762499043454e-07, "loss": 0.5597, "step": 5241 }, { "epoch": 0.908472520092719, "grad_norm": 2.39791537286681, "learning_rate": 2.1805695482663425e-07, "loss": 0.5288, "step": 5242 }, { "epoch": 0.9086458265635493, "grad_norm": 2.238150982424516, "learning_rate": 2.1723779178591752e-07, "loss": 0.4706, "step": 5243 }, { "epoch": 0.9088191330343797, "grad_norm": 2.3268397826334075, "learning_rate": 2.1642013612645897e-07, "loss": 0.4842, "step": 5244 }, { "epoch": 0.90899243950521, "grad_norm": 3.19511755180037, "learning_rate": 2.1560398810595963e-07, "loss": 0.5155, "step": 5245 }, { "epoch": 0.9091657459760404, "grad_norm": 2.482574904645316, "learning_rate": 2.147893479816432e-07, "loss": 0.4847, "step": 5246 }, { "epoch": 0.9093390524468707, "grad_norm": 11.178189803098652, "learning_rate": 2.1397621601026043e-07, "loss": 0.4701, "step": 5247 }, { "epoch": 0.9095123589177011, "grad_norm": 2.4067429127357616, "learning_rate": 2.1316459244808463e-07, "loss": 0.5809, "step": 5248 }, { "epoch": 0.9096856653885315, "grad_norm": 2.3625912861613534, "learning_rate": 2.1235447755091675e-07, "loss": 0.4614, "step": 5249 }, { "epoch": 0.9098589718593618, "grad_norm": 2.8555840443482374, "learning_rate": 2.1154587157407924e-07, "loss": 0.5003, "step": 5250 }, { "epoch": 0.9100322783301922, "grad_norm": 2.2580361095981436, "learning_rate": 2.1073877477242098e-07, "loss": 0.5781, "step": 5251 }, { "epoch": 0.9102055848010225, "grad_norm": 2.6918980370669376, "learning_rate": 2.0993318740031298e-07, "loss": 0.4898, "step": 5252 }, { "epoch": 0.9103788912718529, "grad_norm": 2.422244080949549, "learning_rate": 2.0912910971165267e-07, "loss": 0.5327, "step": 5253 }, { "epoch": 0.9105521977426833, "grad_norm": 2.5037909717546083, "learning_rate": 2.0832654195986124e-07, "loss": 0.519, "step": 5254 }, { "epoch": 0.9107255042135136, "grad_norm": 9.956941183229373, "learning_rate": 2.0752548439788246e-07, "loss": 0.5226, "step": 5255 }, { "epoch": 0.910898810684344, "grad_norm": 5.7340744945282225, "learning_rate": 2.0672593727818656e-07, "loss": 0.5781, "step": 5256 }, { "epoch": 0.9110721171551743, "grad_norm": 2.4758653481531163, "learning_rate": 2.0592790085276593e-07, "loss": 0.5279, "step": 5257 }, { "epoch": 0.9112454236260047, "grad_norm": 2.8647317275993145, "learning_rate": 2.0513137537313765e-07, "loss": 0.5462, "step": 5258 }, { "epoch": 0.9114187300968349, "grad_norm": 2.510197997339405, "learning_rate": 2.043363610903415e-07, "loss": 0.5699, "step": 5259 }, { "epoch": 0.9115920365676653, "grad_norm": 2.7405832807452173, "learning_rate": 2.0354285825494313e-07, "loss": 0.5162, "step": 5260 }, { "epoch": 0.9117653430384957, "grad_norm": 3.006666088487711, "learning_rate": 2.0275086711702973e-07, "loss": 0.5859, "step": 5261 }, { "epoch": 0.911938649509326, "grad_norm": 2.3460996081575343, "learning_rate": 2.0196038792621276e-07, "loss": 0.5051, "step": 5262 }, { "epoch": 0.9121119559801564, "grad_norm": 2.084404935563285, "learning_rate": 2.0117142093162733e-07, "loss": 0.4749, "step": 5263 }, { "epoch": 0.9122852624509867, "grad_norm": 2.411388053227741, "learning_rate": 2.0038396638193236e-07, "loss": 0.5366, "step": 5264 }, { "epoch": 0.9124585689218171, "grad_norm": 2.5020132823883743, "learning_rate": 1.9959802452530929e-07, "loss": 0.4332, "step": 5265 }, { "epoch": 0.9126318753926475, "grad_norm": 2.0354830702189313, "learning_rate": 1.9881359560946224e-07, "loss": 0.4633, "step": 5266 }, { "epoch": 0.9128051818634778, "grad_norm": 2.0712473384184573, "learning_rate": 1.9803067988162062e-07, "loss": 0.3934, "step": 5267 }, { "epoch": 0.9129784883343082, "grad_norm": 2.557145618905791, "learning_rate": 1.972492775885354e-07, "loss": 0.5456, "step": 5268 }, { "epoch": 0.9131517948051385, "grad_norm": 3.118378216589819, "learning_rate": 1.9646938897648016e-07, "loss": 0.5259, "step": 5269 }, { "epoch": 0.9133251012759689, "grad_norm": 2.895538778999666, "learning_rate": 1.956910142912527e-07, "loss": 0.5944, "step": 5270 }, { "epoch": 0.9134984077467992, "grad_norm": 2.6801434621319324, "learning_rate": 1.9491415377817292e-07, "loss": 0.5714, "step": 5271 }, { "epoch": 0.9136717142176296, "grad_norm": 3.4303018575293187, "learning_rate": 1.9413880768208382e-07, "loss": 0.5162, "step": 5272 }, { "epoch": 0.91384502068846, "grad_norm": 2.6199188074365263, "learning_rate": 1.933649762473505e-07, "loss": 0.5663, "step": 5273 }, { "epoch": 0.9140183271592903, "grad_norm": 1.8794134885099685, "learning_rate": 1.9259265971786068e-07, "loss": 0.4713, "step": 5274 }, { "epoch": 0.9141916336301207, "grad_norm": 4.026163651976146, "learning_rate": 1.9182185833702572e-07, "loss": 0.5243, "step": 5275 }, { "epoch": 0.914364940100951, "grad_norm": 2.360582072324159, "learning_rate": 1.9105257234777907e-07, "loss": 0.3978, "step": 5276 }, { "epoch": 0.9145382465717814, "grad_norm": 2.693214594435546, "learning_rate": 1.9028480199257627e-07, "loss": 0.5247, "step": 5277 }, { "epoch": 0.9147115530426118, "grad_norm": 1.9824766688279836, "learning_rate": 1.895185475133948e-07, "loss": 0.5192, "step": 5278 }, { "epoch": 0.9148848595134421, "grad_norm": 4.081711599972186, "learning_rate": 1.8875380915173435e-07, "loss": 0.5859, "step": 5279 }, { "epoch": 0.9150581659842725, "grad_norm": 2.135213851528516, "learning_rate": 1.8799058714861817e-07, "loss": 0.4977, "step": 5280 }, { "epoch": 0.9152314724551028, "grad_norm": 2.466370329634417, "learning_rate": 1.8722888174458998e-07, "loss": 0.5816, "step": 5281 }, { "epoch": 0.9154047789259332, "grad_norm": 3.2761836241336417, "learning_rate": 1.8646869317971718e-07, "loss": 0.4889, "step": 5282 }, { "epoch": 0.9155780853967636, "grad_norm": 2.8116806655149262, "learning_rate": 1.8571002169358643e-07, "loss": 0.5019, "step": 5283 }, { "epoch": 0.9157513918675939, "grad_norm": 2.3251539856685914, "learning_rate": 1.8495286752530873e-07, "loss": 0.503, "step": 5284 }, { "epoch": 0.9159246983384242, "grad_norm": 2.5412555861163604, "learning_rate": 1.8419723091351593e-07, "loss": 0.587, "step": 5285 }, { "epoch": 0.9160980048092545, "grad_norm": 2.358728010939113, "learning_rate": 1.8344311209636256e-07, "loss": 0.4509, "step": 5286 }, { "epoch": 0.9162713112800849, "grad_norm": 2.2586633332922275, "learning_rate": 1.826905113115224e-07, "loss": 0.5421, "step": 5287 }, { "epoch": 0.9164446177509152, "grad_norm": 2.427857538888809, "learning_rate": 1.8193942879619397e-07, "loss": 0.5164, "step": 5288 }, { "epoch": 0.9166179242217456, "grad_norm": 3.12453222693218, "learning_rate": 1.811898647870941e-07, "loss": 0.5464, "step": 5289 }, { "epoch": 0.916791230692576, "grad_norm": 4.292072605880031, "learning_rate": 1.804418195204638e-07, "loss": 0.5189, "step": 5290 }, { "epoch": 0.9169645371634063, "grad_norm": 2.3885820861053717, "learning_rate": 1.7969529323206392e-07, "loss": 0.4647, "step": 5291 }, { "epoch": 0.9171378436342367, "grad_norm": 2.93680316877987, "learning_rate": 1.789502861571768e-07, "loss": 0.5138, "step": 5292 }, { "epoch": 0.917311150105067, "grad_norm": 3.1629188388623843, "learning_rate": 1.7820679853060575e-07, "loss": 0.5482, "step": 5293 }, { "epoch": 0.9174844565758974, "grad_norm": 2.6630673663422315, "learning_rate": 1.774648305866755e-07, "loss": 0.5131, "step": 5294 }, { "epoch": 0.9176577630467277, "grad_norm": 2.8256538095026213, "learning_rate": 1.767243825592324e-07, "loss": 0.5114, "step": 5295 }, { "epoch": 0.9178310695175581, "grad_norm": 2.070323899404312, "learning_rate": 1.7598545468164251e-07, "loss": 0.5409, "step": 5296 }, { "epoch": 0.9180043759883885, "grad_norm": 2.5556897482048377, "learning_rate": 1.7524804718679344e-07, "loss": 0.5834, "step": 5297 }, { "epoch": 0.9181776824592188, "grad_norm": 2.3786479441357855, "learning_rate": 1.7451216030709372e-07, "loss": 0.4521, "step": 5298 }, { "epoch": 0.9183509889300492, "grad_norm": 2.9938376287645654, "learning_rate": 1.7377779427447227e-07, "loss": 0.4785, "step": 5299 }, { "epoch": 0.9185242954008795, "grad_norm": 2.8502776475645963, "learning_rate": 1.7304494932038007e-07, "loss": 0.6862, "step": 5300 }, { "epoch": 0.9186976018717099, "grad_norm": 2.6546516875241726, "learning_rate": 1.7231362567578624e-07, "loss": 0.5518, "step": 5301 }, { "epoch": 0.9188709083425403, "grad_norm": 2.4339554059653996, "learning_rate": 1.7158382357118254e-07, "loss": 0.4986, "step": 5302 }, { "epoch": 0.9190442148133706, "grad_norm": 1.895773475457079, "learning_rate": 1.7085554323657994e-07, "loss": 0.462, "step": 5303 }, { "epoch": 0.919217521284201, "grad_norm": 2.677609601144003, "learning_rate": 1.701287849015104e-07, "loss": 0.5536, "step": 5304 }, { "epoch": 0.9193908277550313, "grad_norm": 2.9087998146602767, "learning_rate": 1.694035487950263e-07, "loss": 0.524, "step": 5305 }, { "epoch": 0.9195641342258617, "grad_norm": 2.3322480140021056, "learning_rate": 1.6867983514569973e-07, "loss": 0.5232, "step": 5306 }, { "epoch": 0.919737440696692, "grad_norm": 5.551839631006191, "learning_rate": 1.679576441816233e-07, "loss": 0.5891, "step": 5307 }, { "epoch": 0.9199107471675224, "grad_norm": 2.442103691924595, "learning_rate": 1.672369761304099e-07, "loss": 0.4731, "step": 5308 }, { "epoch": 0.9200840536383528, "grad_norm": 2.3346347694394884, "learning_rate": 1.665178312191923e-07, "loss": 0.5552, "step": 5309 }, { "epoch": 0.9202573601091831, "grad_norm": 2.3046240406255922, "learning_rate": 1.6580020967462252e-07, "loss": 0.4762, "step": 5310 }, { "epoch": 0.9204306665800134, "grad_norm": 2.497568538747797, "learning_rate": 1.6508411172287298e-07, "loss": 0.5088, "step": 5311 }, { "epoch": 0.9206039730508437, "grad_norm": 2.345184905304916, "learning_rate": 1.6436953758963702e-07, "loss": 0.5159, "step": 5312 }, { "epoch": 0.9207772795216741, "grad_norm": 2.1119373541472406, "learning_rate": 1.636564875001262e-07, "loss": 0.5435, "step": 5313 }, { "epoch": 0.9209505859925045, "grad_norm": 3.3447590723439466, "learning_rate": 1.6294496167907126e-07, "loss": 0.4734, "step": 5314 }, { "epoch": 0.9211238924633348, "grad_norm": 2.416635195567826, "learning_rate": 1.6223496035072395e-07, "loss": 0.5441, "step": 5315 }, { "epoch": 0.9212971989341652, "grad_norm": 2.1493188882795184, "learning_rate": 1.6152648373885582e-07, "loss": 0.5082, "step": 5316 }, { "epoch": 0.9214705054049955, "grad_norm": 2.41886521722123, "learning_rate": 1.6081953206675716e-07, "loss": 0.5774, "step": 5317 }, { "epoch": 0.9216438118758259, "grad_norm": 2.325014665892806, "learning_rate": 1.601141055572375e-07, "loss": 0.5997, "step": 5318 }, { "epoch": 0.9218171183466563, "grad_norm": 2.647388036625108, "learning_rate": 1.5941020443262512e-07, "loss": 0.5588, "step": 5319 }, { "epoch": 0.9219904248174866, "grad_norm": 2.413213481065631, "learning_rate": 1.5870782891476867e-07, "loss": 0.5332, "step": 5320 }, { "epoch": 0.922163731288317, "grad_norm": 2.11154656607897, "learning_rate": 1.5800697922503606e-07, "loss": 0.5271, "step": 5321 }, { "epoch": 0.9223370377591473, "grad_norm": 3.0073132840969197, "learning_rate": 1.5730765558431283e-07, "loss": 0.4186, "step": 5322 }, { "epoch": 0.9225103442299777, "grad_norm": 2.775313370559452, "learning_rate": 1.56609858213006e-07, "loss": 0.5117, "step": 5323 }, { "epoch": 0.922683650700808, "grad_norm": 3.5274806124646, "learning_rate": 1.5591358733103857e-07, "loss": 0.4902, "step": 5324 }, { "epoch": 0.9228569571716384, "grad_norm": 2.581615747744934, "learning_rate": 1.5521884315785496e-07, "loss": 0.4471, "step": 5325 }, { "epoch": 0.9230302636424688, "grad_norm": 2.351790586250348, "learning_rate": 1.545256259124167e-07, "loss": 0.5671, "step": 5326 }, { "epoch": 0.9232035701132991, "grad_norm": 2.8850320149901956, "learning_rate": 1.538339358132046e-07, "loss": 0.5314, "step": 5327 }, { "epoch": 0.9233768765841295, "grad_norm": 3.484106195843199, "learning_rate": 1.5314377307821982e-07, "loss": 0.5633, "step": 5328 }, { "epoch": 0.9235501830549598, "grad_norm": 2.8705572973830855, "learning_rate": 1.5245513792497834e-07, "loss": 0.4726, "step": 5329 }, { "epoch": 0.9237234895257902, "grad_norm": 2.1626791342017824, "learning_rate": 1.5176803057051937e-07, "loss": 0.5546, "step": 5330 }, { "epoch": 0.9238967959966206, "grad_norm": 2.3504359710352514, "learning_rate": 1.510824512313974e-07, "loss": 0.5519, "step": 5331 }, { "epoch": 0.9240701024674509, "grad_norm": 2.1015307556022904, "learning_rate": 1.5039840012368578e-07, "loss": 0.4565, "step": 5332 }, { "epoch": 0.9242434089382813, "grad_norm": 2.3493267033267595, "learning_rate": 1.4971587746297756e-07, "loss": 0.5543, "step": 5333 }, { "epoch": 0.9244167154091116, "grad_norm": 3.1078308636496543, "learning_rate": 1.4903488346438233e-07, "loss": 0.5713, "step": 5334 }, { "epoch": 0.924590021879942, "grad_norm": 2.74397825854587, "learning_rate": 1.483554183425284e-07, "loss": 0.456, "step": 5335 }, { "epoch": 0.9247633283507724, "grad_norm": 2.475641079575082, "learning_rate": 1.4767748231156287e-07, "loss": 0.5237, "step": 5336 }, { "epoch": 0.9249366348216026, "grad_norm": 3.7386760485961403, "learning_rate": 1.4700107558515142e-07, "loss": 0.5134, "step": 5337 }, { "epoch": 0.925109941292433, "grad_norm": 2.540679596963818, "learning_rate": 1.4632619837647577e-07, "loss": 0.5058, "step": 5338 }, { "epoch": 0.9252832477632633, "grad_norm": 2.10589795710991, "learning_rate": 1.4565285089823688e-07, "loss": 0.4845, "step": 5339 }, { "epoch": 0.9254565542340937, "grad_norm": 3.0732666217254505, "learning_rate": 1.449810333626539e-07, "loss": 0.5028, "step": 5340 }, { "epoch": 0.925629860704924, "grad_norm": 2.7099599086982646, "learning_rate": 1.44310745981463e-07, "loss": 0.5422, "step": 5341 }, { "epoch": 0.9258031671757544, "grad_norm": 2.598318859353581, "learning_rate": 1.4364198896591851e-07, "loss": 0.5514, "step": 5342 }, { "epoch": 0.9259764736465848, "grad_norm": 2.297676703267842, "learning_rate": 1.4297476252679187e-07, "loss": 0.5752, "step": 5343 }, { "epoch": 0.9261497801174151, "grad_norm": 17.10196642087237, "learning_rate": 1.4230906687437317e-07, "loss": 0.557, "step": 5344 }, { "epoch": 0.9263230865882455, "grad_norm": 2.8924572630722127, "learning_rate": 1.4164490221846905e-07, "loss": 0.5311, "step": 5345 }, { "epoch": 0.9264963930590758, "grad_norm": 2.485184258338045, "learning_rate": 1.4098226876840426e-07, "loss": 0.4204, "step": 5346 }, { "epoch": 0.9266696995299062, "grad_norm": 2.779484615588407, "learning_rate": 1.4032116673302064e-07, "loss": 0.5775, "step": 5347 }, { "epoch": 0.9268430060007365, "grad_norm": 2.5049462867303958, "learning_rate": 1.396615963206771e-07, "loss": 0.5822, "step": 5348 }, { "epoch": 0.9270163124715669, "grad_norm": 2.787296924877901, "learning_rate": 1.3900355773925123e-07, "loss": 0.5661, "step": 5349 }, { "epoch": 0.9271896189423973, "grad_norm": 2.7847793204125564, "learning_rate": 1.3834705119613544e-07, "loss": 0.5608, "step": 5350 }, { "epoch": 0.9273629254132276, "grad_norm": 2.7407835615272, "learning_rate": 1.3769207689824149e-07, "loss": 0.5334, "step": 5351 }, { "epoch": 0.927536231884058, "grad_norm": 4.188651275617886, "learning_rate": 1.3703863505199698e-07, "loss": 0.5215, "step": 5352 }, { "epoch": 0.9277095383548883, "grad_norm": 3.296611019947904, "learning_rate": 1.3638672586334722e-07, "loss": 0.4496, "step": 5353 }, { "epoch": 0.9278828448257187, "grad_norm": 5.014593296396135, "learning_rate": 1.357363495377545e-07, "loss": 0.4457, "step": 5354 }, { "epoch": 0.9280561512965491, "grad_norm": 2.6939965682145948, "learning_rate": 1.3508750628019596e-07, "loss": 0.5415, "step": 5355 }, { "epoch": 0.9282294577673794, "grad_norm": 2.324775245478916, "learning_rate": 1.344401962951697e-07, "loss": 0.5962, "step": 5356 }, { "epoch": 0.9284027642382098, "grad_norm": 2.2062644253684653, "learning_rate": 1.337944197866864e-07, "loss": 0.4373, "step": 5357 }, { "epoch": 0.9285760707090401, "grad_norm": 2.3483591332183615, "learning_rate": 1.3315017695827604e-07, "loss": 0.5469, "step": 5358 }, { "epoch": 0.9287493771798705, "grad_norm": 2.207461314133086, "learning_rate": 1.3250746801298341e-07, "loss": 0.4904, "step": 5359 }, { "epoch": 0.9289226836507009, "grad_norm": 2.5911459471885503, "learning_rate": 1.3186629315337207e-07, "loss": 0.52, "step": 5360 }, { "epoch": 0.9290959901215312, "grad_norm": 2.472350011751315, "learning_rate": 1.3122665258152035e-07, "loss": 0.3794, "step": 5361 }, { "epoch": 0.9292692965923616, "grad_norm": 3.639094936707887, "learning_rate": 1.3058854649902308e-07, "loss": 0.5134, "step": 5362 }, { "epoch": 0.9294426030631918, "grad_norm": 2.3553708074656066, "learning_rate": 1.2995197510699277e-07, "loss": 0.5147, "step": 5363 }, { "epoch": 0.9296159095340222, "grad_norm": 2.3503450433289883, "learning_rate": 1.2931693860605666e-07, "loss": 0.5214, "step": 5364 }, { "epoch": 0.9297892160048525, "grad_norm": 2.403108632231205, "learning_rate": 1.286834371963591e-07, "loss": 0.5134, "step": 5365 }, { "epoch": 0.9299625224756829, "grad_norm": 2.886946538263679, "learning_rate": 1.2805147107756045e-07, "loss": 0.5209, "step": 5366 }, { "epoch": 0.9301358289465133, "grad_norm": 1.9993391904097522, "learning_rate": 1.274210404488374e-07, "loss": 0.4224, "step": 5367 }, { "epoch": 0.9303091354173436, "grad_norm": 2.634717799921486, "learning_rate": 1.2679214550888219e-07, "loss": 0.5795, "step": 5368 }, { "epoch": 0.930482441888174, "grad_norm": 2.181016682163737, "learning_rate": 1.2616478645590403e-07, "loss": 0.5258, "step": 5369 }, { "epoch": 0.9306557483590043, "grad_norm": 2.4953655206173964, "learning_rate": 1.255389634876264e-07, "loss": 0.5614, "step": 5370 }, { "epoch": 0.9308290548298347, "grad_norm": 2.1925511442491556, "learning_rate": 1.2491467680129043e-07, "loss": 0.5003, "step": 5371 }, { "epoch": 0.931002361300665, "grad_norm": 2.170495324907208, "learning_rate": 1.2429192659365208e-07, "loss": 0.535, "step": 5372 }, { "epoch": 0.9311756677714954, "grad_norm": 5.47225981860641, "learning_rate": 1.2367071306098377e-07, "loss": 0.5281, "step": 5373 }, { "epoch": 0.9313489742423258, "grad_norm": 3.226438171042464, "learning_rate": 1.2305103639907333e-07, "loss": 0.5297, "step": 5374 }, { "epoch": 0.9315222807131561, "grad_norm": 2.0759731060100326, "learning_rate": 1.2243289680322235e-07, "loss": 0.5635, "step": 5375 }, { "epoch": 0.9316955871839865, "grad_norm": 2.488344451914104, "learning_rate": 1.2181629446825105e-07, "loss": 0.5595, "step": 5376 }, { "epoch": 0.9318688936548168, "grad_norm": 2.3823178107760987, "learning_rate": 1.2120122958849346e-07, "loss": 0.4705, "step": 5377 }, { "epoch": 0.9320422001256472, "grad_norm": 1.9154869463112154, "learning_rate": 1.2058770235779892e-07, "loss": 0.4955, "step": 5378 }, { "epoch": 0.9322155065964776, "grad_norm": 2.8610195534209, "learning_rate": 1.1997571296953281e-07, "loss": 0.6372, "step": 5379 }, { "epoch": 0.9323888130673079, "grad_norm": 2.894165773173908, "learning_rate": 1.193652616165758e-07, "loss": 0.593, "step": 5380 }, { "epoch": 0.9325621195381383, "grad_norm": 2.3523126747590215, "learning_rate": 1.18756348491324e-07, "loss": 0.5336, "step": 5381 }, { "epoch": 0.9327354260089686, "grad_norm": 2.4576443936552725, "learning_rate": 1.1814897378568724e-07, "loss": 0.5646, "step": 5382 }, { "epoch": 0.932908732479799, "grad_norm": 3.0626128737017893, "learning_rate": 1.1754313769109183e-07, "loss": 0.4769, "step": 5383 }, { "epoch": 0.9330820389506294, "grad_norm": 2.1884178507167595, "learning_rate": 1.1693884039847946e-07, "loss": 0.4783, "step": 5384 }, { "epoch": 0.9332553454214597, "grad_norm": 2.71887934943535, "learning_rate": 1.1633608209830616e-07, "loss": 0.5801, "step": 5385 }, { "epoch": 0.9334286518922901, "grad_norm": 2.433483714990038, "learning_rate": 1.157348629805427e-07, "loss": 0.5629, "step": 5386 }, { "epoch": 0.9336019583631204, "grad_norm": 2.708238085936895, "learning_rate": 1.1513518323467532e-07, "loss": 0.5553, "step": 5387 }, { "epoch": 0.9337752648339508, "grad_norm": 2.888563455161665, "learning_rate": 1.1453704304970447e-07, "loss": 0.5494, "step": 5388 }, { "epoch": 0.933948571304781, "grad_norm": 2.671401059815968, "learning_rate": 1.1394044261414606e-07, "loss": 0.4778, "step": 5389 }, { "epoch": 0.9341218777756114, "grad_norm": 2.3756679719396443, "learning_rate": 1.1334538211603074e-07, "loss": 0.6059, "step": 5390 }, { "epoch": 0.9342951842464418, "grad_norm": 2.7087403207529968, "learning_rate": 1.1275186174290298e-07, "loss": 0.4738, "step": 5391 }, { "epoch": 0.9344684907172721, "grad_norm": 2.528261325720928, "learning_rate": 1.1215988168182201e-07, "loss": 0.5547, "step": 5392 }, { "epoch": 0.9346417971881025, "grad_norm": 3.2617890228679425, "learning_rate": 1.1156944211936305e-07, "loss": 0.5126, "step": 5393 }, { "epoch": 0.9348151036589328, "grad_norm": 2.376574643916056, "learning_rate": 1.1098054324161333e-07, "loss": 0.5598, "step": 5394 }, { "epoch": 0.9349884101297632, "grad_norm": 3.041875659452686, "learning_rate": 1.1039318523417774e-07, "loss": 0.5254, "step": 5395 }, { "epoch": 0.9351617166005936, "grad_norm": 3.0341572005052573, "learning_rate": 1.0980736828217154e-07, "loss": 0.5288, "step": 5396 }, { "epoch": 0.9353350230714239, "grad_norm": 2.3149299667710297, "learning_rate": 1.0922309257022756e-07, "loss": 0.492, "step": 5397 }, { "epoch": 0.9355083295422543, "grad_norm": 2.503702109394596, "learning_rate": 1.0864035828249187e-07, "loss": 0.4934, "step": 5398 }, { "epoch": 0.9356816360130846, "grad_norm": 2.252747761547259, "learning_rate": 1.0805916560262419e-07, "loss": 0.5183, "step": 5399 }, { "epoch": 0.935854942483915, "grad_norm": 2.5435593536379026, "learning_rate": 1.0747951471379914e-07, "loss": 0.6012, "step": 5400 }, { "epoch": 0.9360282489547453, "grad_norm": 2.0222468253759773, "learning_rate": 1.06901405798705e-07, "loss": 0.4527, "step": 5401 }, { "epoch": 0.9362015554255757, "grad_norm": 2.6791641371356443, "learning_rate": 1.0632483903954383e-07, "loss": 0.5528, "step": 5402 }, { "epoch": 0.9363748618964061, "grad_norm": 2.7836703952158124, "learning_rate": 1.0574981461803246e-07, "loss": 0.4885, "step": 5403 }, { "epoch": 0.9365481683672364, "grad_norm": 2.746820028024477, "learning_rate": 1.051763327154004e-07, "loss": 0.5263, "step": 5404 }, { "epoch": 0.9367214748380668, "grad_norm": 2.8271416759135213, "learning_rate": 1.0460439351239249e-07, "loss": 0.5072, "step": 5405 }, { "epoch": 0.9368947813088971, "grad_norm": 2.480544500020671, "learning_rate": 1.0403399718926565e-07, "loss": 0.5068, "step": 5406 }, { "epoch": 0.9370680877797275, "grad_norm": 2.4628722306740207, "learning_rate": 1.0346514392579221e-07, "loss": 0.5344, "step": 5407 }, { "epoch": 0.9372413942505579, "grad_norm": 2.5284536341550354, "learning_rate": 1.0289783390125707e-07, "loss": 0.5675, "step": 5408 }, { "epoch": 0.9374147007213882, "grad_norm": 2.3615997341880064, "learning_rate": 1.0233206729445888e-07, "loss": 0.5439, "step": 5409 }, { "epoch": 0.9375880071922186, "grad_norm": 2.466487648855726, "learning_rate": 1.0176784428371111e-07, "loss": 0.4558, "step": 5410 }, { "epoch": 0.9377613136630489, "grad_norm": 2.6403833876766405, "learning_rate": 1.012051650468382e-07, "loss": 0.6013, "step": 5411 }, { "epoch": 0.9379346201338793, "grad_norm": 2.214295993695553, "learning_rate": 1.0064402976118103e-07, "loss": 0.5549, "step": 5412 }, { "epoch": 0.9381079266047097, "grad_norm": 2.1505989316107486, "learning_rate": 1.0008443860359152e-07, "loss": 0.4965, "step": 5413 }, { "epoch": 0.93828123307554, "grad_norm": 2.5072564398844217, "learning_rate": 9.952639175043576e-08, "loss": 0.5166, "step": 5414 }, { "epoch": 0.9384545395463703, "grad_norm": 2.470627710134664, "learning_rate": 9.896988937759367e-08, "loss": 0.5586, "step": 5415 }, { "epoch": 0.9386278460172006, "grad_norm": 2.6620034775269183, "learning_rate": 9.841493166045767e-08, "loss": 0.5506, "step": 5416 }, { "epoch": 0.938801152488031, "grad_norm": 3.2134398230348022, "learning_rate": 9.786151877393347e-08, "loss": 0.5601, "step": 5417 }, { "epoch": 0.9389744589588613, "grad_norm": 2.839307342799691, "learning_rate": 9.730965089243982e-08, "loss": 0.564, "step": 5418 }, { "epoch": 0.9391477654296917, "grad_norm": 2.0740823317850055, "learning_rate": 9.67593281899093e-08, "loss": 0.4449, "step": 5419 }, { "epoch": 0.939321071900522, "grad_norm": 2.2450534608178994, "learning_rate": 9.621055083978591e-08, "loss": 0.551, "step": 5420 }, { "epoch": 0.9394943783713524, "grad_norm": 2.287693220943316, "learning_rate": 9.566331901502857e-08, "loss": 0.4369, "step": 5421 }, { "epoch": 0.9396676848421828, "grad_norm": 2.738932598560584, "learning_rate": 9.511763288810705e-08, "loss": 0.6017, "step": 5422 }, { "epoch": 0.9398409913130131, "grad_norm": 4.615037784441327, "learning_rate": 9.457349263100602e-08, "loss": 0.5115, "step": 5423 }, { "epoch": 0.9400142977838435, "grad_norm": 2.667884413001153, "learning_rate": 9.403089841522217e-08, "loss": 0.5749, "step": 5424 }, { "epoch": 0.9401876042546738, "grad_norm": 2.4568705631891437, "learning_rate": 9.348985041176317e-08, "loss": 0.5186, "step": 5425 }, { "epoch": 0.9403609107255042, "grad_norm": 2.154166691697928, "learning_rate": 9.295034879115261e-08, "loss": 0.4996, "step": 5426 }, { "epoch": 0.9405342171963346, "grad_norm": 2.507325348723724, "learning_rate": 9.241239372342392e-08, "loss": 0.5232, "step": 5427 }, { "epoch": 0.9407075236671649, "grad_norm": 2.3584058434210826, "learning_rate": 9.18759853781248e-08, "loss": 0.4933, "step": 5428 }, { "epoch": 0.9408808301379953, "grad_norm": 2.906745160970425, "learning_rate": 9.134112392431449e-08, "loss": 0.5266, "step": 5429 }, { "epoch": 0.9410541366088256, "grad_norm": 2.385951587038653, "learning_rate": 9.080780953056479e-08, "loss": 0.5758, "step": 5430 }, { "epoch": 0.941227443079656, "grad_norm": 2.5688058440225463, "learning_rate": 9.027604236496068e-08, "loss": 0.5263, "step": 5431 }, { "epoch": 0.9414007495504864, "grad_norm": 2.4756292481509643, "learning_rate": 8.974582259509923e-08, "loss": 0.5206, "step": 5432 }, { "epoch": 0.9415740560213167, "grad_norm": 2.483238219156017, "learning_rate": 8.921715038808898e-08, "loss": 0.5495, "step": 5433 }, { "epoch": 0.9417473624921471, "grad_norm": 3.1748424525435874, "learning_rate": 8.869002591055165e-08, "loss": 0.4493, "step": 5434 }, { "epoch": 0.9419206689629774, "grad_norm": 3.7384023628714824, "learning_rate": 8.8164449328621e-08, "loss": 0.5511, "step": 5435 }, { "epoch": 0.9420939754338078, "grad_norm": 2.6032835615079333, "learning_rate": 8.764042080794233e-08, "loss": 0.5267, "step": 5436 }, { "epoch": 0.9422672819046382, "grad_norm": 2.493830375319564, "learning_rate": 8.71179405136735e-08, "loss": 0.5346, "step": 5437 }, { "epoch": 0.9424405883754685, "grad_norm": 2.3764889899941406, "learning_rate": 8.659700861048503e-08, "loss": 0.5023, "step": 5438 }, { "epoch": 0.9426138948462989, "grad_norm": 1.9418967365957376, "learning_rate": 8.607762526255836e-08, "loss": 0.4029, "step": 5439 }, { "epoch": 0.9427872013171292, "grad_norm": 2.3057200486661658, "learning_rate": 8.555979063358755e-08, "loss": 0.626, "step": 5440 }, { "epoch": 0.9429605077879595, "grad_norm": 2.1471997966359813, "learning_rate": 8.504350488677815e-08, "loss": 0.6246, "step": 5441 }, { "epoch": 0.9431338142587898, "grad_norm": 2.428950776152051, "learning_rate": 8.452876818484778e-08, "loss": 0.5547, "step": 5442 }, { "epoch": 0.9433071207296202, "grad_norm": 2.650229794352754, "learning_rate": 8.401558069002613e-08, "loss": 0.5392, "step": 5443 }, { "epoch": 0.9434804272004506, "grad_norm": 2.2171553364554955, "learning_rate": 8.35039425640538e-08, "loss": 0.4158, "step": 5444 }, { "epoch": 0.9436537336712809, "grad_norm": 2.1290226269373576, "learning_rate": 8.299385396818349e-08, "loss": 0.6186, "step": 5445 }, { "epoch": 0.9438270401421113, "grad_norm": 2.1188919224536664, "learning_rate": 8.248531506318047e-08, "loss": 0.4709, "step": 5446 }, { "epoch": 0.9440003466129416, "grad_norm": 3.66345553329775, "learning_rate": 8.197832600931988e-08, "loss": 0.5407, "step": 5447 }, { "epoch": 0.944173653083772, "grad_norm": 2.237493191520357, "learning_rate": 8.147288696638945e-08, "loss": 0.4238, "step": 5448 }, { "epoch": 0.9443469595546023, "grad_norm": 1.893892575348246, "learning_rate": 8.096899809368842e-08, "loss": 0.5047, "step": 5449 }, { "epoch": 0.9445202660254327, "grad_norm": 2.768178227122904, "learning_rate": 8.046665955002697e-08, "loss": 0.4917, "step": 5450 }, { "epoch": 0.9446935724962631, "grad_norm": 4.09285641323715, "learning_rate": 7.996587149372681e-08, "loss": 0.6054, "step": 5451 }, { "epoch": 0.9448668789670934, "grad_norm": 2.136215621336084, "learning_rate": 7.946663408262167e-08, "loss": 0.5114, "step": 5452 }, { "epoch": 0.9450401854379238, "grad_norm": 2.0128561514977257, "learning_rate": 7.89689474740557e-08, "loss": 0.5838, "step": 5453 }, { "epoch": 0.9452134919087541, "grad_norm": 3.522250025613126, "learning_rate": 7.847281182488508e-08, "loss": 0.5356, "step": 5454 }, { "epoch": 0.9453867983795845, "grad_norm": 2.6406029658727155, "learning_rate": 7.797822729147586e-08, "loss": 0.5276, "step": 5455 }, { "epoch": 0.9455601048504149, "grad_norm": 3.8090784420400743, "learning_rate": 7.748519402970612e-08, "loss": 0.5095, "step": 5456 }, { "epoch": 0.9457334113212452, "grad_norm": 2.2824674220590735, "learning_rate": 7.699371219496599e-08, "loss": 0.4856, "step": 5457 }, { "epoch": 0.9459067177920756, "grad_norm": 2.1458548212485384, "learning_rate": 7.650378194215436e-08, "loss": 0.5086, "step": 5458 }, { "epoch": 0.9460800242629059, "grad_norm": 2.2461407099721398, "learning_rate": 7.601540342568325e-08, "loss": 0.5315, "step": 5459 }, { "epoch": 0.9462533307337363, "grad_norm": 9.622487326730907, "learning_rate": 7.5528576799474e-08, "loss": 0.5416, "step": 5460 }, { "epoch": 0.9464266372045667, "grad_norm": 2.185189451221443, "learning_rate": 7.504330221695999e-08, "loss": 0.5707, "step": 5461 }, { "epoch": 0.946599943675397, "grad_norm": 2.317855764462349, "learning_rate": 7.455957983108497e-08, "loss": 0.5613, "step": 5462 }, { "epoch": 0.9467732501462274, "grad_norm": 2.6249320525035045, "learning_rate": 7.407740979430312e-08, "loss": 0.4977, "step": 5463 }, { "epoch": 0.9469465566170577, "grad_norm": 2.3753839493563187, "learning_rate": 7.359679225858062e-08, "loss": 0.5201, "step": 5464 }, { "epoch": 0.9471198630878881, "grad_norm": 2.4758793962978762, "learning_rate": 7.311772737539246e-08, "loss": 0.5258, "step": 5465 }, { "epoch": 0.9472931695587185, "grad_norm": 3.325780959075498, "learning_rate": 7.264021529572618e-08, "loss": 0.6503, "step": 5466 }, { "epoch": 0.9474664760295487, "grad_norm": 2.192547615825498, "learning_rate": 7.216425617007916e-08, "loss": 0.4965, "step": 5467 }, { "epoch": 0.9476397825003791, "grad_norm": 1.9840318298747062, "learning_rate": 7.168985014845864e-08, "loss": 0.451, "step": 5468 }, { "epoch": 0.9478130889712094, "grad_norm": 2.589353248711029, "learning_rate": 7.121699738038279e-08, "loss": 0.5296, "step": 5469 }, { "epoch": 0.9479863954420398, "grad_norm": 5.562861169722227, "learning_rate": 7.074569801488129e-08, "loss": 0.5143, "step": 5470 }, { "epoch": 0.9481597019128701, "grad_norm": 2.490701194437729, "learning_rate": 7.027595220049311e-08, "loss": 0.4866, "step": 5471 }, { "epoch": 0.9483330083837005, "grad_norm": 2.244744580794734, "learning_rate": 6.980776008526758e-08, "loss": 0.4683, "step": 5472 }, { "epoch": 0.9485063148545309, "grad_norm": 2.170360068748367, "learning_rate": 6.934112181676445e-08, "loss": 0.4186, "step": 5473 }, { "epoch": 0.9486796213253612, "grad_norm": 2.4163433935015917, "learning_rate": 6.887603754205441e-08, "loss": 0.5352, "step": 5474 }, { "epoch": 0.9488529277961916, "grad_norm": 2.2940583727056434, "learning_rate": 6.841250740771743e-08, "loss": 0.5081, "step": 5475 }, { "epoch": 0.9490262342670219, "grad_norm": 2.3205421684467353, "learning_rate": 6.795053155984443e-08, "loss": 0.5487, "step": 5476 }, { "epoch": 0.9491995407378523, "grad_norm": 2.570409659443094, "learning_rate": 6.749011014403617e-08, "loss": 0.5434, "step": 5477 }, { "epoch": 0.9493728472086826, "grad_norm": 3.4926858393824323, "learning_rate": 6.703124330540323e-08, "loss": 0.6013, "step": 5478 }, { "epoch": 0.949546153679513, "grad_norm": 2.1916925170282973, "learning_rate": 6.657393118856659e-08, "loss": 0.5924, "step": 5479 }, { "epoch": 0.9497194601503434, "grad_norm": 2.024045621682779, "learning_rate": 6.611817393765763e-08, "loss": 0.557, "step": 5480 }, { "epoch": 0.9498927666211737, "grad_norm": 3.191976538833741, "learning_rate": 6.566397169631644e-08, "loss": 0.5071, "step": 5481 }, { "epoch": 0.9500660730920041, "grad_norm": 2.5901682615699184, "learning_rate": 6.521132460769408e-08, "loss": 0.4761, "step": 5482 }, { "epoch": 0.9502393795628344, "grad_norm": 2.5633829423404744, "learning_rate": 6.476023281445087e-08, "loss": 0.5284, "step": 5483 }, { "epoch": 0.9504126860336648, "grad_norm": 2.996214498011624, "learning_rate": 6.43106964587581e-08, "loss": 0.5771, "step": 5484 }, { "epoch": 0.9505859925044952, "grad_norm": 3.2599326965168176, "learning_rate": 6.386271568229463e-08, "loss": 0.5127, "step": 5485 }, { "epoch": 0.9507592989753255, "grad_norm": 2.8996347302290397, "learning_rate": 6.341629062625144e-08, "loss": 0.5599, "step": 5486 }, { "epoch": 0.9509326054461559, "grad_norm": 3.729287325120241, "learning_rate": 6.297142143132818e-08, "loss": 0.501, "step": 5487 }, { "epoch": 0.9511059119169862, "grad_norm": 2.6782425617000647, "learning_rate": 6.252810823773381e-08, "loss": 0.5438, "step": 5488 }, { "epoch": 0.9512792183878166, "grad_norm": 2.6712904290296215, "learning_rate": 6.208635118518658e-08, "loss": 0.5823, "step": 5489 }, { "epoch": 0.951452524858647, "grad_norm": 2.5035966792365767, "learning_rate": 6.164615041291511e-08, "loss": 0.5022, "step": 5490 }, { "epoch": 0.9516258313294773, "grad_norm": 3.2139904475296825, "learning_rate": 6.120750605965786e-08, "loss": 0.5391, "step": 5491 }, { "epoch": 0.9517991378003077, "grad_norm": 2.59781875538798, "learning_rate": 6.077041826366148e-08, "loss": 0.5324, "step": 5492 }, { "epoch": 0.9519724442711379, "grad_norm": 2.1751191125418803, "learning_rate": 6.03348871626841e-08, "loss": 0.5363, "step": 5493 }, { "epoch": 0.9521457507419683, "grad_norm": 2.799380672534815, "learning_rate": 5.990091289399036e-08, "loss": 0.5128, "step": 5494 }, { "epoch": 0.9523190572127986, "grad_norm": 2.293723665093168, "learning_rate": 5.946849559435641e-08, "loss": 0.5444, "step": 5495 }, { "epoch": 0.952492363683629, "grad_norm": 3.5676816460255583, "learning_rate": 5.903763540006713e-08, "loss": 0.4577, "step": 5496 }, { "epoch": 0.9526656701544594, "grad_norm": 2.1919024087604866, "learning_rate": 5.860833244691666e-08, "loss": 0.5297, "step": 5497 }, { "epoch": 0.9528389766252897, "grad_norm": 2.3979129815607596, "learning_rate": 5.818058687020789e-08, "loss": 0.545, "step": 5498 }, { "epoch": 0.9530122830961201, "grad_norm": 2.701919928126786, "learning_rate": 5.775439880475353e-08, "loss": 0.4782, "step": 5499 }, { "epoch": 0.9531855895669504, "grad_norm": 2.393595753331888, "learning_rate": 5.7329768384874477e-08, "loss": 0.4777, "step": 5500 }, { "epoch": 0.9533588960377808, "grad_norm": 4.594938673833325, "learning_rate": 5.6906695744401995e-08, "loss": 0.5016, "step": 5501 }, { "epoch": 0.9535322025086111, "grad_norm": 2.9100135528511277, "learning_rate": 5.6485181016674996e-08, "loss": 0.5696, "step": 5502 }, { "epoch": 0.9537055089794415, "grad_norm": 2.515197452355653, "learning_rate": 5.606522433454331e-08, "loss": 0.5799, "step": 5503 }, { "epoch": 0.9538788154502719, "grad_norm": 2.066911391976492, "learning_rate": 5.5646825830363295e-08, "loss": 0.4988, "step": 5504 }, { "epoch": 0.9540521219211022, "grad_norm": 2.355244686364865, "learning_rate": 5.5229985636002236e-08, "loss": 0.5142, "step": 5505 }, { "epoch": 0.9542254283919326, "grad_norm": 2.187427758956532, "learning_rate": 5.481470388283505e-08, "loss": 0.5298, "step": 5506 }, { "epoch": 0.9543987348627629, "grad_norm": 2.485672246208888, "learning_rate": 5.440098070174593e-08, "loss": 0.52, "step": 5507 }, { "epoch": 0.9545720413335933, "grad_norm": 2.4262545351749774, "learning_rate": 5.3988816223128346e-08, "loss": 0.5364, "step": 5508 }, { "epoch": 0.9547453478044237, "grad_norm": 2.19277249527138, "learning_rate": 5.3578210576883395e-08, "loss": 0.4892, "step": 5509 }, { "epoch": 0.954918654275254, "grad_norm": 2.5021427516647945, "learning_rate": 5.3169163892421995e-08, "loss": 0.4831, "step": 5510 }, { "epoch": 0.9550919607460844, "grad_norm": 2.4126914363046086, "learning_rate": 5.276167629866269e-08, "loss": 0.5598, "step": 5511 }, { "epoch": 0.9552652672169147, "grad_norm": 4.391739327989187, "learning_rate": 5.2355747924033865e-08, "loss": 0.6026, "step": 5512 }, { "epoch": 0.9554385736877451, "grad_norm": 3.1803456828019, "learning_rate": 5.195137889647095e-08, "loss": 0.5426, "step": 5513 }, { "epoch": 0.9556118801585755, "grad_norm": 3.129286662828675, "learning_rate": 5.1548569343419786e-08, "loss": 0.4381, "step": 5514 }, { "epoch": 0.9557851866294058, "grad_norm": 2.808851648095399, "learning_rate": 5.1147319391832704e-08, "loss": 0.6074, "step": 5515 }, { "epoch": 0.9559584931002362, "grad_norm": 2.3767811026214285, "learning_rate": 5.074762916817244e-08, "loss": 0.5018, "step": 5516 }, { "epoch": 0.9561317995710665, "grad_norm": 3.089413297654003, "learning_rate": 5.034949879840822e-08, "loss": 0.5448, "step": 5517 }, { "epoch": 0.9563051060418969, "grad_norm": 2.5795378815503307, "learning_rate": 4.995292840801913e-08, "loss": 0.6026, "step": 5518 }, { "epoch": 0.9564784125127271, "grad_norm": 2.209351061922253, "learning_rate": 4.95579181219924e-08, "loss": 0.522, "step": 5519 }, { "epoch": 0.9566517189835575, "grad_norm": 2.461828137351232, "learning_rate": 4.916446806482289e-08, "loss": 0.491, "step": 5520 }, { "epoch": 0.9568250254543879, "grad_norm": 3.224008493421145, "learning_rate": 4.877257836051419e-08, "loss": 0.5311, "step": 5521 }, { "epoch": 0.9569983319252182, "grad_norm": 2.891014577358532, "learning_rate": 4.838224913257805e-08, "loss": 0.506, "step": 5522 }, { "epoch": 0.9571716383960486, "grad_norm": 2.0628550286608394, "learning_rate": 4.799348050403385e-08, "loss": 0.5175, "step": 5523 }, { "epoch": 0.9573449448668789, "grad_norm": 2.3579151896684394, "learning_rate": 4.76062725974108e-08, "loss": 0.4913, "step": 5524 }, { "epoch": 0.9575182513377093, "grad_norm": 2.661286745519147, "learning_rate": 4.722062553474405e-08, "loss": 0.4476, "step": 5525 }, { "epoch": 0.9576915578085397, "grad_norm": 2.5324117319447508, "learning_rate": 4.683653943757804e-08, "loss": 0.4503, "step": 5526 }, { "epoch": 0.95786486427937, "grad_norm": 11.935348551249433, "learning_rate": 4.64540144269654e-08, "loss": 0.456, "step": 5527 }, { "epoch": 0.9580381707502004, "grad_norm": 2.437671649313457, "learning_rate": 4.607305062346579e-08, "loss": 0.4944, "step": 5528 }, { "epoch": 0.9582114772210307, "grad_norm": 4.091619306020384, "learning_rate": 4.5693648147148164e-08, "loss": 0.567, "step": 5529 }, { "epoch": 0.9583847836918611, "grad_norm": 3.095075252526002, "learning_rate": 4.531580711758799e-08, "loss": 0.51, "step": 5530 }, { "epoch": 0.9585580901626914, "grad_norm": 2.0195140989822007, "learning_rate": 4.493952765387e-08, "loss": 0.5177, "step": 5531 }, { "epoch": 0.9587313966335218, "grad_norm": 2.086821639216498, "learning_rate": 4.456480987458489e-08, "loss": 0.4748, "step": 5532 }, { "epoch": 0.9589047031043522, "grad_norm": 2.9380266296574074, "learning_rate": 4.41916538978332e-08, "loss": 0.5804, "step": 5533 }, { "epoch": 0.9590780095751825, "grad_norm": 2.4454225204813347, "learning_rate": 4.382005984122195e-08, "loss": 0.5277, "step": 5534 }, { "epoch": 0.9592513160460129, "grad_norm": 2.6368004140596772, "learning_rate": 4.345002782186691e-08, "loss": 0.487, "step": 5535 }, { "epoch": 0.9594246225168432, "grad_norm": 2.6629005952914526, "learning_rate": 4.308155795639035e-08, "loss": 0.3698, "step": 5536 }, { "epoch": 0.9595979289876736, "grad_norm": 2.114636675073851, "learning_rate": 4.271465036092326e-08, "loss": 0.5418, "step": 5537 }, { "epoch": 0.959771235458504, "grad_norm": 2.532660994834779, "learning_rate": 4.234930515110314e-08, "loss": 0.4961, "step": 5538 }, { "epoch": 0.9599445419293343, "grad_norm": 2.3480199035730323, "learning_rate": 4.198552244207621e-08, "loss": 0.5247, "step": 5539 }, { "epoch": 0.9601178484001647, "grad_norm": 2.304302601204765, "learning_rate": 4.162330234849521e-08, "loss": 0.4686, "step": 5540 }, { "epoch": 0.960291154870995, "grad_norm": 2.3412491988323465, "learning_rate": 4.126264498452104e-08, "loss": 0.4961, "step": 5541 }, { "epoch": 0.9604644613418254, "grad_norm": 3.783459214119322, "learning_rate": 4.090355046382277e-08, "loss": 0.6075, "step": 5542 }, { "epoch": 0.9606377678126558, "grad_norm": 1.8331777148214734, "learning_rate": 4.054601889957488e-08, "loss": 0.3884, "step": 5543 }, { "epoch": 0.9608110742834861, "grad_norm": 2.362207769956409, "learning_rate": 4.0190050404460556e-08, "loss": 0.52, "step": 5544 }, { "epoch": 0.9609843807543165, "grad_norm": 3.3467303164936464, "learning_rate": 3.9835645090671174e-08, "loss": 0.5868, "step": 5545 }, { "epoch": 0.9611576872251467, "grad_norm": 4.012120501159104, "learning_rate": 3.948280306990404e-08, "loss": 0.4843, "step": 5546 }, { "epoch": 0.9613309936959771, "grad_norm": 2.911165120398505, "learning_rate": 3.91315244533641e-08, "loss": 0.5259, "step": 5547 }, { "epoch": 0.9615043001668074, "grad_norm": 7.598908519236776, "learning_rate": 3.8781809351763345e-08, "loss": 0.567, "step": 5548 }, { "epoch": 0.9616776066376378, "grad_norm": 2.464744786299639, "learning_rate": 3.8433657875322496e-08, "loss": 0.5582, "step": 5549 }, { "epoch": 0.9618509131084682, "grad_norm": 2.2189580119468535, "learning_rate": 3.808707013376767e-08, "loss": 0.4731, "step": 5550 }, { "epoch": 0.9620242195792985, "grad_norm": 2.6833746527583293, "learning_rate": 3.774204623633204e-08, "loss": 0.5003, "step": 5551 }, { "epoch": 0.9621975260501289, "grad_norm": 2.2639285177668427, "learning_rate": 3.7398586291757524e-08, "loss": 0.4532, "step": 5552 }, { "epoch": 0.9623708325209592, "grad_norm": 2.837946433752603, "learning_rate": 3.705669040829252e-08, "loss": 0.5243, "step": 5553 }, { "epoch": 0.9625441389917896, "grad_norm": 3.1771099989441525, "learning_rate": 3.671635869369139e-08, "loss": 0.5308, "step": 5554 }, { "epoch": 0.96271744546262, "grad_norm": 2.711490238337068, "learning_rate": 3.6377591255216674e-08, "loss": 0.6008, "step": 5555 }, { "epoch": 0.9628907519334503, "grad_norm": 1.9889994496500856, "learning_rate": 3.60403881996374e-08, "loss": 0.416, "step": 5556 }, { "epoch": 0.9630640584042807, "grad_norm": 2.3752978533414866, "learning_rate": 3.570474963323023e-08, "loss": 0.5174, "step": 5557 }, { "epoch": 0.963237364875111, "grad_norm": 3.0775478720349003, "learning_rate": 3.537067566177832e-08, "loss": 0.458, "step": 5558 }, { "epoch": 0.9634106713459414, "grad_norm": 2.2863026461596556, "learning_rate": 3.503816639057078e-08, "loss": 0.5197, "step": 5559 }, { "epoch": 0.9635839778167717, "grad_norm": 4.791817335642214, "learning_rate": 3.4707221924404896e-08, "loss": 0.5574, "step": 5560 }, { "epoch": 0.9637572842876021, "grad_norm": 2.6257961045080545, "learning_rate": 3.437784236758446e-08, "loss": 0.5123, "step": 5561 }, { "epoch": 0.9639305907584325, "grad_norm": 2.561146314090149, "learning_rate": 3.405002782391975e-08, "loss": 0.5792, "step": 5562 }, { "epoch": 0.9641038972292628, "grad_norm": 6.04072445445426, "learning_rate": 3.372377839672758e-08, "loss": 0.498, "step": 5563 }, { "epoch": 0.9642772037000932, "grad_norm": 2.3277139980370833, "learning_rate": 3.339909418883236e-08, "loss": 0.5725, "step": 5564 }, { "epoch": 0.9644505101709235, "grad_norm": 2.8503271078485652, "learning_rate": 3.3075975302564457e-08, "loss": 0.5657, "step": 5565 }, { "epoch": 0.9646238166417539, "grad_norm": 2.7857009470189187, "learning_rate": 3.27544218397613e-08, "loss": 0.6247, "step": 5566 }, { "epoch": 0.9647971231125843, "grad_norm": 2.4609349581686772, "learning_rate": 3.2434433901766814e-08, "loss": 0.5331, "step": 5567 }, { "epoch": 0.9649704295834146, "grad_norm": 3.2070624994572783, "learning_rate": 3.211601158943145e-08, "loss": 0.475, "step": 5568 }, { "epoch": 0.965143736054245, "grad_norm": 2.365935277616741, "learning_rate": 3.1799155003111594e-08, "loss": 0.5012, "step": 5569 }, { "epoch": 0.9653170425250753, "grad_norm": 3.188256433015105, "learning_rate": 3.1483864242671805e-08, "loss": 0.5824, "step": 5570 }, { "epoch": 0.9654903489959057, "grad_norm": 2.5067838745263247, "learning_rate": 3.117013940748148e-08, "loss": 0.4857, "step": 5571 }, { "epoch": 0.9656636554667359, "grad_norm": 2.358201821413501, "learning_rate": 3.085798059641709e-08, "loss": 0.5214, "step": 5572 }, { "epoch": 0.9658369619375663, "grad_norm": 2.294768242538187, "learning_rate": 3.054738790786216e-08, "loss": 0.5827, "step": 5573 }, { "epoch": 0.9660102684083967, "grad_norm": 2.0140717068648044, "learning_rate": 3.02383614397056e-08, "loss": 0.5618, "step": 5574 }, { "epoch": 0.966183574879227, "grad_norm": 2.753826444082488, "learning_rate": 2.9930901289343394e-08, "loss": 0.5001, "step": 5575 }, { "epoch": 0.9663568813500574, "grad_norm": 2.9956721416414287, "learning_rate": 2.9625007553677477e-08, "loss": 0.4889, "step": 5576 }, { "epoch": 0.9665301878208877, "grad_norm": 2.788218770581959, "learning_rate": 2.932068032911628e-08, "loss": 0.5001, "step": 5577 }, { "epoch": 0.9667034942917181, "grad_norm": 2.924893404384479, "learning_rate": 2.9017919711574194e-08, "loss": 0.5376, "step": 5578 }, { "epoch": 0.9668768007625484, "grad_norm": 1.8130076526081955, "learning_rate": 2.8716725796473223e-08, "loss": 0.4515, "step": 5579 }, { "epoch": 0.9670501072333788, "grad_norm": 2.5434641603401844, "learning_rate": 2.841709867873965e-08, "loss": 0.5569, "step": 5580 }, { "epoch": 0.9672234137042092, "grad_norm": 3.7864765668782088, "learning_rate": 2.8119038452806834e-08, "loss": 0.592, "step": 5581 }, { "epoch": 0.9673967201750395, "grad_norm": 2.2608738384176186, "learning_rate": 2.7822545212614072e-08, "loss": 0.4662, "step": 5582 }, { "epoch": 0.9675700266458699, "grad_norm": 2.357388950677134, "learning_rate": 2.752761905160828e-08, "loss": 0.4803, "step": 5583 }, { "epoch": 0.9677433331167002, "grad_norm": 2.2450670080597934, "learning_rate": 2.723426006273955e-08, "loss": 0.5198, "step": 5584 }, { "epoch": 0.9679166395875306, "grad_norm": 2.263257459032796, "learning_rate": 2.6942468338466697e-08, "loss": 0.4993, "step": 5585 }, { "epoch": 0.968089946058361, "grad_norm": 2.4015714419638376, "learning_rate": 2.6652243970753945e-08, "loss": 0.4779, "step": 5586 }, { "epoch": 0.9682632525291913, "grad_norm": 3.2753908762083297, "learning_rate": 2.6363587051070337e-08, "loss": 0.5064, "step": 5587 }, { "epoch": 0.9684365590000217, "grad_norm": 2.2545287906035334, "learning_rate": 2.6076497670391443e-08, "loss": 0.4575, "step": 5588 }, { "epoch": 0.968609865470852, "grad_norm": 2.040590622894489, "learning_rate": 2.5790975919200433e-08, "loss": 0.4565, "step": 5589 }, { "epoch": 0.9687831719416824, "grad_norm": 2.6389425944164655, "learning_rate": 2.5507021887484774e-08, "loss": 0.5429, "step": 5590 }, { "epoch": 0.9689564784125128, "grad_norm": 2.7347840322277515, "learning_rate": 2.5224635664737318e-08, "loss": 0.5372, "step": 5591 }, { "epoch": 0.9691297848833431, "grad_norm": 2.233517346903552, "learning_rate": 2.4943817339957986e-08, "loss": 0.5364, "step": 5592 }, { "epoch": 0.9693030913541735, "grad_norm": 2.4246005651052815, "learning_rate": 2.466456700165265e-08, "loss": 0.6084, "step": 5593 }, { "epoch": 0.9694763978250038, "grad_norm": 2.539562594379428, "learning_rate": 2.4386884737831463e-08, "loss": 0.5896, "step": 5594 }, { "epoch": 0.9696497042958342, "grad_norm": 3.0203602134147705, "learning_rate": 2.4110770636012747e-08, "loss": 0.5904, "step": 5595 }, { "epoch": 0.9698230107666646, "grad_norm": 2.333658586570997, "learning_rate": 2.3836224783218565e-08, "loss": 0.4468, "step": 5596 }, { "epoch": 0.9699963172374949, "grad_norm": 2.3617303949730153, "learning_rate": 2.3563247265977474e-08, "loss": 0.4807, "step": 5597 }, { "epoch": 0.9701696237083252, "grad_norm": 2.5253619340688305, "learning_rate": 2.3291838170323432e-08, "loss": 0.5644, "step": 5598 }, { "epoch": 0.9703429301791555, "grad_norm": 2.7872689793626013, "learning_rate": 2.3021997581796906e-08, "loss": 0.4169, "step": 5599 }, { "epoch": 0.9705162366499859, "grad_norm": 2.219322349965591, "learning_rate": 2.2753725585443198e-08, "loss": 0.5351, "step": 5600 }, { "epoch": 0.9706895431208162, "grad_norm": 2.9569423408570845, "learning_rate": 2.248702226581301e-08, "loss": 0.5298, "step": 5601 }, { "epoch": 0.9708628495916466, "grad_norm": 2.3983174850461144, "learning_rate": 2.2221887706964097e-08, "loss": 0.5188, "step": 5602 }, { "epoch": 0.971036156062477, "grad_norm": 1.9990297650286146, "learning_rate": 2.1958321992457953e-08, "loss": 0.477, "step": 5603 }, { "epoch": 0.9712094625333073, "grad_norm": 3.2025342365007425, "learning_rate": 2.1696325205363134e-08, "loss": 0.5679, "step": 5604 }, { "epoch": 0.9713827690041377, "grad_norm": 2.2075204158836836, "learning_rate": 2.143589742825247e-08, "loss": 0.5353, "step": 5605 }, { "epoch": 0.971556075474968, "grad_norm": 3.1987295221905456, "learning_rate": 2.1177038743205313e-08, "loss": 0.5983, "step": 5606 }, { "epoch": 0.9717293819457984, "grad_norm": 2.552455513891099, "learning_rate": 2.091974923180584e-08, "loss": 0.5491, "step": 5607 }, { "epoch": 0.9719026884166287, "grad_norm": 2.3073429628221507, "learning_rate": 2.0664028975143634e-08, "loss": 0.5316, "step": 5608 }, { "epoch": 0.9720759948874591, "grad_norm": 2.9711286125992404, "learning_rate": 2.0409878053814226e-08, "loss": 0.5497, "step": 5609 }, { "epoch": 0.9722493013582895, "grad_norm": 2.3855184437039894, "learning_rate": 2.0157296547918536e-08, "loss": 0.6129, "step": 5610 }, { "epoch": 0.9724226078291198, "grad_norm": 2.221859246004241, "learning_rate": 1.9906284537062892e-08, "loss": 0.4998, "step": 5611 }, { "epoch": 0.9725959142999502, "grad_norm": 2.746603317562389, "learning_rate": 1.965684210035734e-08, "loss": 0.59, "step": 5612 }, { "epoch": 0.9727692207707805, "grad_norm": 2.2807041218633244, "learning_rate": 1.9408969316419555e-08, "loss": 0.4396, "step": 5613 }, { "epoch": 0.9729425272416109, "grad_norm": 2.0985659777111207, "learning_rate": 1.9162666263370932e-08, "loss": 0.4627, "step": 5614 }, { "epoch": 0.9731158337124413, "grad_norm": 2.3997414246363493, "learning_rate": 1.891793301883882e-08, "loss": 0.6168, "step": 5615 }, { "epoch": 0.9732891401832716, "grad_norm": 3.2535033852363004, "learning_rate": 1.8674769659956515e-08, "loss": 0.6147, "step": 5616 }, { "epoch": 0.973462446654102, "grad_norm": 2.5945227164237594, "learning_rate": 1.8433176263360497e-08, "loss": 0.5702, "step": 5617 }, { "epoch": 0.9736357531249323, "grad_norm": 2.661059124282773, "learning_rate": 1.8193152905194857e-08, "loss": 0.5438, "step": 5618 }, { "epoch": 0.9738090595957627, "grad_norm": 2.6398410945459787, "learning_rate": 1.7954699661106857e-08, "loss": 0.537, "step": 5619 }, { "epoch": 0.973982366066593, "grad_norm": 2.432613174441886, "learning_rate": 1.7717816606249715e-08, "loss": 0.5152, "step": 5620 }, { "epoch": 0.9741556725374234, "grad_norm": 2.6879432776395094, "learning_rate": 1.748250381528205e-08, "loss": 0.5177, "step": 5621 }, { "epoch": 0.9743289790082538, "grad_norm": 2.0510182909329697, "learning_rate": 1.7248761362367307e-08, "loss": 0.4758, "step": 5622 }, { "epoch": 0.9745022854790841, "grad_norm": 2.7595743706994016, "learning_rate": 1.701658932117378e-08, "loss": 0.4515, "step": 5623 }, { "epoch": 0.9746755919499144, "grad_norm": 2.179909938941603, "learning_rate": 1.6785987764875167e-08, "loss": 0.5694, "step": 5624 }, { "epoch": 0.9748488984207447, "grad_norm": 2.7905846226747757, "learning_rate": 1.655695676614999e-08, "loss": 0.5101, "step": 5625 }, { "epoch": 0.9750222048915751, "grad_norm": 3.567920379450725, "learning_rate": 1.6329496397182176e-08, "loss": 0.4803, "step": 5626 }, { "epoch": 0.9751955113624055, "grad_norm": 2.5106275484997598, "learning_rate": 1.6103606729659936e-08, "loss": 0.5383, "step": 5627 }, { "epoch": 0.9753688178332358, "grad_norm": 2.4142042050325743, "learning_rate": 1.587928783477688e-08, "loss": 0.5361, "step": 5628 }, { "epoch": 0.9755421243040662, "grad_norm": 2.84501420164471, "learning_rate": 1.565653978323145e-08, "loss": 0.4935, "step": 5629 }, { "epoch": 0.9757154307748965, "grad_norm": 3.3040627423312636, "learning_rate": 1.5435362645227492e-08, "loss": 0.4043, "step": 5630 }, { "epoch": 0.9758887372457269, "grad_norm": 2.3625941650486024, "learning_rate": 1.521575649047313e-08, "loss": 0.496, "step": 5631 }, { "epoch": 0.9760620437165572, "grad_norm": 2.1829732427463697, "learning_rate": 1.499772138818134e-08, "loss": 0.499, "step": 5632 }, { "epoch": 0.9762353501873876, "grad_norm": 2.5356943011728084, "learning_rate": 1.4781257407070481e-08, "loss": 0.5362, "step": 5633 }, { "epoch": 0.976408656658218, "grad_norm": 9.328532558604087, "learning_rate": 1.456636461536265e-08, "loss": 0.5067, "step": 5634 }, { "epoch": 0.9765819631290483, "grad_norm": 3.7820379106481563, "learning_rate": 1.4353043080786444e-08, "loss": 0.5688, "step": 5635 }, { "epoch": 0.9767552695998787, "grad_norm": 2.029377532946576, "learning_rate": 1.4141292870574198e-08, "loss": 0.5106, "step": 5636 }, { "epoch": 0.976928576070709, "grad_norm": 2.7528060199603894, "learning_rate": 1.3931114051462525e-08, "loss": 0.4714, "step": 5637 }, { "epoch": 0.9771018825415394, "grad_norm": 3.199338954871279, "learning_rate": 1.3722506689693439e-08, "loss": 0.472, "step": 5638 }, { "epoch": 0.9772751890123698, "grad_norm": 4.316708545369012, "learning_rate": 1.3515470851014344e-08, "loss": 0.5666, "step": 5639 }, { "epoch": 0.9774484954832001, "grad_norm": 2.6150903042952165, "learning_rate": 1.3310006600676384e-08, "loss": 0.5852, "step": 5640 }, { "epoch": 0.9776218019540305, "grad_norm": 2.83312591307017, "learning_rate": 1.3106114003435533e-08, "loss": 0.5691, "step": 5641 }, { "epoch": 0.9777951084248608, "grad_norm": 2.8349544547286367, "learning_rate": 1.2903793123552611e-08, "loss": 0.5192, "step": 5642 }, { "epoch": 0.9779684148956912, "grad_norm": 3.281712736391903, "learning_rate": 1.2703044024792721e-08, "loss": 0.575, "step": 5643 }, { "epoch": 0.9781417213665216, "grad_norm": 2.6515288828885013, "learning_rate": 1.2503866770426365e-08, "loss": 0.569, "step": 5644 }, { "epoch": 0.9783150278373519, "grad_norm": 2.989762411135211, "learning_rate": 1.2306261423227217e-08, "loss": 0.5429, "step": 5645 }, { "epoch": 0.9784883343081823, "grad_norm": 2.6838333769697864, "learning_rate": 1.2110228045476013e-08, "loss": 0.5451, "step": 5646 }, { "epoch": 0.9786616407790126, "grad_norm": 2.530831555475262, "learning_rate": 1.1915766698955e-08, "loss": 0.6046, "step": 5647 }, { "epoch": 0.978834947249843, "grad_norm": 2.198490439203994, "learning_rate": 1.1722877444953484e-08, "loss": 0.5463, "step": 5648 }, { "epoch": 0.9790082537206733, "grad_norm": 1.8919351840296394, "learning_rate": 1.1531560344263947e-08, "loss": 0.4637, "step": 5649 }, { "epoch": 0.9791815601915036, "grad_norm": 2.3134340632794594, "learning_rate": 1.1341815457183714e-08, "loss": 0.5453, "step": 5650 }, { "epoch": 0.979354866662334, "grad_norm": 2.3296958165519634, "learning_rate": 1.1153642843514389e-08, "loss": 0.5194, "step": 5651 }, { "epoch": 0.9795281731331643, "grad_norm": 3.2540448429658717, "learning_rate": 1.0967042562562424e-08, "loss": 0.5001, "step": 5652 }, { "epoch": 0.9797014796039947, "grad_norm": 2.9229729954559978, "learning_rate": 1.0782014673139108e-08, "loss": 0.464, "step": 5653 }, { "epoch": 0.979874786074825, "grad_norm": 2.2041979144957904, "learning_rate": 1.0598559233558348e-08, "loss": 0.4818, "step": 5654 }, { "epoch": 0.9800480925456554, "grad_norm": 2.226692784379591, "learning_rate": 1.0416676301641116e-08, "loss": 0.4947, "step": 5655 }, { "epoch": 0.9802213990164858, "grad_norm": 2.3279178270068903, "learning_rate": 1.0236365934710446e-08, "loss": 0.5414, "step": 5656 }, { "epoch": 0.9803947054873161, "grad_norm": 2.090855173884322, "learning_rate": 1.0057628189595326e-08, "loss": 0.4793, "step": 5657 }, { "epoch": 0.9805680119581465, "grad_norm": 3.3053300599068085, "learning_rate": 9.880463122627915e-09, "loss": 0.4606, "step": 5658 }, { "epoch": 0.9807413184289768, "grad_norm": 2.890874262880854, "learning_rate": 9.704870789645216e-09, "loss": 0.5396, "step": 5659 }, { "epoch": 0.9809146248998072, "grad_norm": 2.67738224357483, "learning_rate": 9.53085124598907e-09, "loss": 0.4929, "step": 5660 }, { "epoch": 0.9810879313706375, "grad_norm": 2.849765671936577, "learning_rate": 9.35840454650505e-09, "loss": 0.4895, "step": 5661 }, { "epoch": 0.9812612378414679, "grad_norm": 1.9879980804171142, "learning_rate": 9.187530745543016e-09, "loss": 0.5307, "step": 5662 }, { "epoch": 0.9814345443122983, "grad_norm": 2.347674016522096, "learning_rate": 9.018229896957109e-09, "loss": 0.4962, "step": 5663 }, { "epoch": 0.9816078507831286, "grad_norm": 2.3147478674483977, "learning_rate": 8.85050205410576e-09, "loss": 0.4707, "step": 5664 }, { "epoch": 0.981781157253959, "grad_norm": 2.9035068640500596, "learning_rate": 8.684347269851679e-09, "loss": 0.5878, "step": 5665 }, { "epoch": 0.9819544637247893, "grad_norm": 2.0947776668687674, "learning_rate": 8.51976559656187e-09, "loss": 0.5422, "step": 5666 }, { "epoch": 0.9821277701956197, "grad_norm": 2.459887328813207, "learning_rate": 8.356757086107614e-09, "loss": 0.4318, "step": 5667 }, { "epoch": 0.9823010766664501, "grad_norm": 3.498873454571163, "learning_rate": 8.195321789864485e-09, "loss": 0.544, "step": 5668 }, { "epoch": 0.9824743831372804, "grad_norm": 2.7813264287670623, "learning_rate": 8.035459758711228e-09, "loss": 0.5441, "step": 5669 }, { "epoch": 0.9826476896081108, "grad_norm": 2.0585041724622326, "learning_rate": 7.877171043031983e-09, "loss": 0.4247, "step": 5670 }, { "epoch": 0.9828209960789411, "grad_norm": 2.3976969153993033, "learning_rate": 7.720455692714624e-09, "loss": 0.5965, "step": 5671 }, { "epoch": 0.9829943025497715, "grad_norm": 3.263344515981283, "learning_rate": 7.56531375715075e-09, "loss": 0.5811, "step": 5672 }, { "epoch": 0.9831676090206019, "grad_norm": 2.406427358554152, "learning_rate": 7.411745285236804e-09, "loss": 0.6316, "step": 5673 }, { "epoch": 0.9833409154914322, "grad_norm": 2.575825419439037, "learning_rate": 7.259750325372405e-09, "loss": 0.4778, "step": 5674 }, { "epoch": 0.9835142219622626, "grad_norm": 2.6452780970356757, "learning_rate": 7.109328925462011e-09, "loss": 0.5286, "step": 5675 }, { "epoch": 0.9836875284330928, "grad_norm": 2.7470327243108783, "learning_rate": 6.96048113291381e-09, "loss": 0.5772, "step": 5676 }, { "epoch": 0.9838608349039232, "grad_norm": 2.9136520990808124, "learning_rate": 6.813206994640276e-09, "loss": 0.5494, "step": 5677 }, { "epoch": 0.9840341413747535, "grad_norm": 2.8828118345565654, "learning_rate": 6.667506557057057e-09, "loss": 0.5833, "step": 5678 }, { "epoch": 0.9842074478455839, "grad_norm": 2.1444379068327173, "learning_rate": 6.523379866085755e-09, "loss": 0.4958, "step": 5679 }, { "epoch": 0.9843807543164143, "grad_norm": 2.273471288602509, "learning_rate": 6.380826967149478e-09, "loss": 0.5521, "step": 5680 }, { "epoch": 0.9845540607872446, "grad_norm": 3.0965416390344527, "learning_rate": 6.239847905177287e-09, "loss": 0.5033, "step": 5681 }, { "epoch": 0.984727367258075, "grad_norm": 2.3784135707320035, "learning_rate": 6.100442724601419e-09, "loss": 0.5393, "step": 5682 }, { "epoch": 0.9849006737289053, "grad_norm": 2.890541862244872, "learning_rate": 5.9626114693578374e-09, "loss": 0.5755, "step": 5683 }, { "epoch": 0.9850739801997357, "grad_norm": 2.000355873988073, "learning_rate": 5.82635418288735e-09, "loss": 0.4974, "step": 5684 }, { "epoch": 0.985247286670566, "grad_norm": 2.0762605094104494, "learning_rate": 5.691670908133384e-09, "loss": 0.5019, "step": 5685 }, { "epoch": 0.9854205931413964, "grad_norm": 2.382508396813469, "learning_rate": 5.558561687544206e-09, "loss": 0.4823, "step": 5686 }, { "epoch": 0.9855938996122268, "grad_norm": 2.202154582513856, "learning_rate": 5.4270265630723684e-09, "loss": 0.5235, "step": 5687 }, { "epoch": 0.9857672060830571, "grad_norm": 2.3327316749753875, "learning_rate": 5.297065576173044e-09, "loss": 0.556, "step": 5688 }, { "epoch": 0.9859405125538875, "grad_norm": 2.263786074319025, "learning_rate": 5.1686787678062455e-09, "loss": 0.5618, "step": 5689 }, { "epoch": 0.9861138190247178, "grad_norm": 3.4648053478642162, "learning_rate": 5.041866178435717e-09, "loss": 0.5273, "step": 5690 }, { "epoch": 0.9862871254955482, "grad_norm": 2.811165674820997, "learning_rate": 4.916627848028932e-09, "loss": 0.4906, "step": 5691 }, { "epoch": 0.9864604319663786, "grad_norm": 2.3054273338299343, "learning_rate": 4.792963816057095e-09, "loss": 0.5485, "step": 5692 }, { "epoch": 0.9866337384372089, "grad_norm": 2.631837230050439, "learning_rate": 4.670874121495139e-09, "loss": 0.501, "step": 5693 }, { "epoch": 0.9868070449080393, "grad_norm": 2.878467420021929, "learning_rate": 4.550358802822285e-09, "loss": 0.4769, "step": 5694 }, { "epoch": 0.9869803513788696, "grad_norm": 2.379486370487666, "learning_rate": 4.431417898020929e-09, "loss": 0.5588, "step": 5695 }, { "epoch": 0.9871536578497, "grad_norm": 2.421169422328221, "learning_rate": 4.314051444578305e-09, "loss": 0.5531, "step": 5696 }, { "epoch": 0.9873269643205304, "grad_norm": 2.4179248344084465, "learning_rate": 4.198259479484268e-09, "loss": 0.5457, "step": 5697 }, { "epoch": 0.9875002707913607, "grad_norm": 2.150654984919235, "learning_rate": 4.084042039233515e-09, "loss": 0.5079, "step": 5698 }, { "epoch": 0.9876735772621911, "grad_norm": 2.6255069650899965, "learning_rate": 3.971399159822809e-09, "loss": 0.5569, "step": 5699 }, { "epoch": 0.9878468837330214, "grad_norm": 2.6573908380088374, "learning_rate": 3.860330876754859e-09, "loss": 0.5579, "step": 5700 }, { "epoch": 0.9880201902038518, "grad_norm": 2.797834201199765, "learning_rate": 3.750837225034443e-09, "loss": 0.5307, "step": 5701 }, { "epoch": 0.988193496674682, "grad_norm": 2.865076360207601, "learning_rate": 3.642918239170623e-09, "loss": 0.5109, "step": 5702 }, { "epoch": 0.9883668031455124, "grad_norm": 2.9054927236899712, "learning_rate": 3.5365739531761923e-09, "loss": 0.5546, "step": 5703 }, { "epoch": 0.9885401096163428, "grad_norm": 2.335227583749937, "learning_rate": 3.431804400567673e-09, "loss": 0.515, "step": 5704 }, { "epoch": 0.9887134160871731, "grad_norm": 3.5748597585631066, "learning_rate": 3.328609614365874e-09, "loss": 0.5365, "step": 5705 }, { "epoch": 0.9888867225580035, "grad_norm": 3.739224794389751, "learning_rate": 3.2269896270936683e-09, "loss": 0.5433, "step": 5706 }, { "epoch": 0.9890600290288338, "grad_norm": 2.345634518686447, "learning_rate": 3.12694447077877e-09, "loss": 0.4743, "step": 5707 }, { "epoch": 0.9892333354996642, "grad_norm": 2.281191734453, "learning_rate": 3.028474176953178e-09, "loss": 0.4656, "step": 5708 }, { "epoch": 0.9894066419704945, "grad_norm": 2.5324080448309045, "learning_rate": 2.931578776650956e-09, "loss": 0.5776, "step": 5709 }, { "epoch": 0.9895799484413249, "grad_norm": 6.513400964858157, "learning_rate": 2.836258300410455e-09, "loss": 0.5467, "step": 5710 }, { "epoch": 0.9897532549121553, "grad_norm": 2.539111134894222, "learning_rate": 2.7425127782743088e-09, "loss": 0.5828, "step": 5711 }, { "epoch": 0.9899265613829856, "grad_norm": 2.7185237724000317, "learning_rate": 2.6503422397883284e-09, "loss": 0.5171, "step": 5712 }, { "epoch": 0.990099867853816, "grad_norm": 2.956038868252268, "learning_rate": 2.5597467140014986e-09, "loss": 0.5434, "step": 5713 }, { "epoch": 0.9902731743246463, "grad_norm": 2.756478386731311, "learning_rate": 2.4707262294670907e-09, "loss": 0.483, "step": 5714 }, { "epoch": 0.9904464807954767, "grad_norm": 2.525783882000441, "learning_rate": 2.3832808142415507e-09, "loss": 0.4681, "step": 5715 }, { "epoch": 0.9906197872663071, "grad_norm": 3.5165920089173235, "learning_rate": 2.2974104958845e-09, "loss": 0.5923, "step": 5716 }, { "epoch": 0.9907930937371374, "grad_norm": 2.949427621141639, "learning_rate": 2.2131153014604003e-09, "loss": 0.6461, "step": 5717 }, { "epoch": 0.9909664002079678, "grad_norm": 9.078525138422313, "learning_rate": 2.1303952575357777e-09, "loss": 0.5215, "step": 5718 }, { "epoch": 0.9911397066787981, "grad_norm": 2.4260802038264218, "learning_rate": 2.0492503901820004e-09, "loss": 0.4673, "step": 5719 }, { "epoch": 0.9913130131496285, "grad_norm": 2.4789511347153352, "learning_rate": 1.9696807249736104e-09, "loss": 0.5553, "step": 5720 }, { "epoch": 0.9914863196204589, "grad_norm": 2.426538595825305, "learning_rate": 1.8916862869883255e-09, "loss": 0.4985, "step": 5721 }, { "epoch": 0.9916596260912892, "grad_norm": 3.571586770406323, "learning_rate": 1.815267100807594e-09, "loss": 0.468, "step": 5722 }, { "epoch": 0.9918329325621196, "grad_norm": 3.2901370570050648, "learning_rate": 1.7404231905165935e-09, "loss": 0.5593, "step": 5723 }, { "epoch": 0.9920062390329499, "grad_norm": 3.6893406717233996, "learning_rate": 1.6671545797036782e-09, "loss": 0.5127, "step": 5724 }, { "epoch": 0.9921795455037803, "grad_norm": 4.412990418096532, "learning_rate": 1.5954612914609314e-09, "loss": 0.5659, "step": 5725 }, { "epoch": 0.9923528519746107, "grad_norm": 2.630265307044605, "learning_rate": 1.5253433483836122e-09, "loss": 0.4896, "step": 5726 }, { "epoch": 0.992526158445441, "grad_norm": 2.444037944653164, "learning_rate": 1.4568007725707101e-09, "loss": 0.4919, "step": 5727 }, { "epoch": 0.9926994649162713, "grad_norm": 2.527719591657687, "learning_rate": 1.3898335856254996e-09, "loss": 0.4534, "step": 5728 }, { "epoch": 0.9928727713871016, "grad_norm": 3.050027935045058, "learning_rate": 1.3244418086538757e-09, "loss": 0.482, "step": 5729 }, { "epoch": 0.993046077857932, "grad_norm": 2.43888292025094, "learning_rate": 1.2606254622643532e-09, "loss": 0.4782, "step": 5730 }, { "epoch": 0.9932193843287623, "grad_norm": 2.514637582209195, "learning_rate": 1.1983845665713978e-09, "loss": 0.4612, "step": 5731 }, { "epoch": 0.9933926907995927, "grad_norm": 4.585141840532927, "learning_rate": 1.1377191411898747e-09, "loss": 0.4485, "step": 5732 }, { "epoch": 0.993565997270423, "grad_norm": 2.9059659563341325, "learning_rate": 1.0786292052406e-09, "loss": 0.5112, "step": 5733 }, { "epoch": 0.9937393037412534, "grad_norm": 3.349647302274822, "learning_rate": 1.0211147773470099e-09, "loss": 0.5386, "step": 5734 }, { "epoch": 0.9939126102120838, "grad_norm": 1.9244492881028323, "learning_rate": 9.651758756357155e-10, "loss": 0.5365, "step": 5735 }, { "epoch": 0.9940859166829141, "grad_norm": 2.313711758030875, "learning_rate": 9.108125177370586e-10, "loss": 0.5534, "step": 5736 }, { "epoch": 0.9942592231537445, "grad_norm": 2.814476191272522, "learning_rate": 8.580247207840009e-10, "loss": 0.5401, "step": 5737 }, { "epoch": 0.9944325296245748, "grad_norm": 2.481974602382777, "learning_rate": 8.068125014143446e-10, "loss": 0.4445, "step": 5738 }, { "epoch": 0.9946058360954052, "grad_norm": 2.429829715247168, "learning_rate": 7.571758757690672e-10, "loss": 0.5378, "step": 5739 }, { "epoch": 0.9947791425662356, "grad_norm": 2.5755367671672347, "learning_rate": 7.091148594912112e-10, "loss": 0.4989, "step": 5740 }, { "epoch": 0.9949524490370659, "grad_norm": 2.4562559428150004, "learning_rate": 6.626294677281042e-10, "loss": 0.5372, "step": 5741 }, { "epoch": 0.9951257555078963, "grad_norm": 2.293436389583493, "learning_rate": 6.177197151313597e-10, "loss": 0.5473, "step": 5742 }, { "epoch": 0.9952990619787266, "grad_norm": 3.090387277423129, "learning_rate": 5.743856158546557e-10, "loss": 0.4605, "step": 5743 }, { "epoch": 0.995472368449557, "grad_norm": 2.1269465678272033, "learning_rate": 5.326271835554008e-10, "loss": 0.5329, "step": 5744 }, { "epoch": 0.9956456749203874, "grad_norm": 3.0009088281378213, "learning_rate": 4.924444313952892e-10, "loss": 0.5135, "step": 5745 }, { "epoch": 0.9958189813912177, "grad_norm": 2.5921544558981373, "learning_rate": 4.5383737203807953e-10, "loss": 0.4355, "step": 5746 }, { "epoch": 0.9959922878620481, "grad_norm": 2.2974987870524464, "learning_rate": 4.1680601765181626e-10, "loss": 0.4123, "step": 5747 }, { "epoch": 0.9961655943328784, "grad_norm": 2.378691528498749, "learning_rate": 3.8135037990716383e-10, "loss": 0.5092, "step": 5748 }, { "epoch": 0.9963389008037088, "grad_norm": 2.69123947433364, "learning_rate": 3.474704699790721e-10, "loss": 0.5244, "step": 5749 }, { "epoch": 0.9965122072745392, "grad_norm": 9.235399709818074, "learning_rate": 3.1516629854566606e-10, "loss": 0.5171, "step": 5750 }, { "epoch": 0.9966855137453695, "grad_norm": 3.0753121123858804, "learning_rate": 2.84437875788246e-10, "loss": 0.5439, "step": 5751 }, { "epoch": 0.9968588202161999, "grad_norm": 3.0208745774256855, "learning_rate": 2.5528521139073227e-10, "loss": 0.5022, "step": 5752 }, { "epoch": 0.9970321266870302, "grad_norm": 2.8878323081337034, "learning_rate": 2.277083145418857e-10, "loss": 0.5805, "step": 5753 }, { "epoch": 0.9972054331578605, "grad_norm": 2.316581301640426, "learning_rate": 2.0170719393308723e-10, "loss": 0.4666, "step": 5754 }, { "epoch": 0.9973787396286908, "grad_norm": 2.078758235951213, "learning_rate": 1.7728185775833795e-10, "loss": 0.4795, "step": 5755 }, { "epoch": 0.9975520460995212, "grad_norm": 3.092997994180183, "learning_rate": 1.544323137170345e-10, "loss": 0.5216, "step": 5756 }, { "epoch": 0.9977253525703516, "grad_norm": 2.617344364179748, "learning_rate": 1.3315856900952827e-10, "loss": 0.4618, "step": 5757 }, { "epoch": 0.9978986590411819, "grad_norm": 8.011462342025899, "learning_rate": 1.1346063034101129e-10, "loss": 0.4506, "step": 5758 }, { "epoch": 0.9980719655120123, "grad_norm": 2.538550717569075, "learning_rate": 9.533850391985066e-11, "loss": 0.5652, "step": 5759 }, { "epoch": 0.9982452719828426, "grad_norm": 2.3701638893096795, "learning_rate": 7.879219545758875e-11, "loss": 0.5217, "step": 5760 }, { "epoch": 0.998418578453673, "grad_norm": 6.134794796525318, "learning_rate": 6.382171016894312e-11, "loss": 0.4836, "step": 5761 }, { "epoch": 0.9985918849245033, "grad_norm": 2.4964361011237357, "learning_rate": 5.0427052771806485e-11, "loss": 0.5546, "step": 5762 }, { "epoch": 0.9987651913953337, "grad_norm": 2.7255988315908524, "learning_rate": 3.860822748891213e-11, "loss": 0.5375, "step": 5763 }, { "epoch": 0.9989384978661641, "grad_norm": 2.411151420242504, "learning_rate": 2.8365238043948085e-11, "loss": 0.4614, "step": 5764 }, { "epoch": 0.9991118043369944, "grad_norm": 2.664909610577485, "learning_rate": 1.9698087665998012e-11, "loss": 0.5072, "step": 5765 }, { "epoch": 0.9992851108078248, "grad_norm": 2.6204266109702687, "learning_rate": 1.2606779086210553e-11, "loss": 0.4787, "step": 5766 }, { "epoch": 0.9994584172786551, "grad_norm": 3.8266716869776527, "learning_rate": 7.0913145394646555e-12, "loss": 0.5666, "step": 5767 }, { "epoch": 0.9996317237494855, "grad_norm": 2.3717307126852853, "learning_rate": 3.1516957649246893e-12, "loss": 0.515, "step": 5768 }, { "epoch": 0.9998050302203159, "grad_norm": 2.3757338378932813, "learning_rate": 7.879240032648838e-13, "loss": 0.567, "step": 5769 }, { "epoch": 0.9999783366911462, "grad_norm": 2.3144209481537454, "learning_rate": 0.0, "loss": 0.6358, "step": 5770 }, { "epoch": 0.9999783366911462, "step": 5770, "total_flos": 2281912387698688.0, "train_loss": 0.3518706146931111, "train_runtime": 74679.0977, "train_samples_per_second": 9.89, "train_steps_per_second": 0.077 } ], "logging_steps": 1.0, "max_steps": 5770, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2281912387698688.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }