{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999691643539932, "eval_steps": 500, "global_step": 14592, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020557097337855896, "grad_norm": 6.1312150955200195, "learning_rate": 2.0547945205479452e-07, "loss": 1.5536, "step": 1 }, { "epoch": 0.0004111419467571179, "grad_norm": 6.5972065925598145, "learning_rate": 4.1095890410958903e-07, "loss": 1.5863, "step": 2 }, { "epoch": 0.0006167129201356768, "grad_norm": 5.653270244598389, "learning_rate": 6.164383561643835e-07, "loss": 1.491, "step": 3 }, { "epoch": 0.0008222838935142358, "grad_norm": 6.296363830566406, "learning_rate": 8.219178082191781e-07, "loss": 1.5794, "step": 4 }, { "epoch": 0.0010278548668927947, "grad_norm": 1.6855748891830444, "learning_rate": 1.0273972602739727e-06, "loss": 0.8016, "step": 5 }, { "epoch": 0.0012334258402713536, "grad_norm": 5.977898120880127, "learning_rate": 1.232876712328767e-06, "loss": 1.5352, "step": 6 }, { "epoch": 0.0014389968136499125, "grad_norm": 1.6171352863311768, "learning_rate": 1.4383561643835616e-06, "loss": 0.7893, "step": 7 }, { "epoch": 0.0016445677870284717, "grad_norm": 5.830033302307129, "learning_rate": 1.6438356164383561e-06, "loss": 1.5201, "step": 8 }, { "epoch": 0.0018501387604070306, "grad_norm": 5.7432990074157715, "learning_rate": 1.8493150684931507e-06, "loss": 1.5304, "step": 9 }, { "epoch": 0.0020557097337855893, "grad_norm": 5.845605373382568, "learning_rate": 2.0547945205479454e-06, "loss": 1.5429, "step": 10 }, { "epoch": 0.0022612807071641485, "grad_norm": 5.0305399894714355, "learning_rate": 2.2602739726027396e-06, "loss": 1.4662, "step": 11 }, { "epoch": 0.002466851680542707, "grad_norm": 5.128103256225586, "learning_rate": 2.465753424657534e-06, "loss": 1.4793, "step": 12 }, { "epoch": 0.0026724226539212663, "grad_norm": 4.975289344787598, "learning_rate": 2.6712328767123286e-06, "loss": 1.4448, "step": 13 }, { "epoch": 0.002877993627299825, "grad_norm": 4.8694987297058105, "learning_rate": 2.876712328767123e-06, "loss": 1.4351, "step": 14 }, { "epoch": 0.003083564600678384, "grad_norm": 1.9994945526123047, "learning_rate": 3.0821917808219177e-06, "loss": 0.7466, "step": 15 }, { "epoch": 0.0032891355740569434, "grad_norm": 5.214486598968506, "learning_rate": 3.2876712328767123e-06, "loss": 1.293, "step": 16 }, { "epoch": 0.003494706547435502, "grad_norm": 5.771518707275391, "learning_rate": 3.493150684931507e-06, "loss": 1.2649, "step": 17 }, { "epoch": 0.0037002775208140612, "grad_norm": 5.648902893066406, "learning_rate": 3.6986301369863014e-06, "loss": 1.2114, "step": 18 }, { "epoch": 0.00390584849419262, "grad_norm": 2.0775961875915527, "learning_rate": 3.904109589041096e-06, "loss": 1.1923, "step": 19 }, { "epoch": 0.004111419467571179, "grad_norm": 1.4989817142486572, "learning_rate": 4.109589041095891e-06, "loss": 1.1838, "step": 20 }, { "epoch": 0.004316990440949738, "grad_norm": 1.3304322957992554, "learning_rate": 4.315068493150685e-06, "loss": 1.1334, "step": 21 }, { "epoch": 0.004522561414328297, "grad_norm": 1.2907218933105469, "learning_rate": 4.520547945205479e-06, "loss": 1.126, "step": 22 }, { "epoch": 0.004728132387706856, "grad_norm": 1.3639134168624878, "learning_rate": 4.726027397260274e-06, "loss": 1.1328, "step": 23 }, { "epoch": 0.004933703361085414, "grad_norm": 1.2764439582824707, "learning_rate": 4.931506849315068e-06, "loss": 1.1774, "step": 24 }, { "epoch": 0.005139274334463974, "grad_norm": 1.016863465309143, "learning_rate": 5.136986301369863e-06, "loss": 0.7168, "step": 25 }, { "epoch": 0.005344845307842533, "grad_norm": 0.9651275277137756, "learning_rate": 5.342465753424657e-06, "loss": 0.7096, "step": 26 }, { "epoch": 0.005550416281221091, "grad_norm": 0.922505795955658, "learning_rate": 5.547945205479452e-06, "loss": 1.0915, "step": 27 }, { "epoch": 0.00575598725459965, "grad_norm": 1.0005972385406494, "learning_rate": 5.753424657534246e-06, "loss": 1.1167, "step": 28 }, { "epoch": 0.00596155822797821, "grad_norm": 1.006510615348816, "learning_rate": 5.958904109589041e-06, "loss": 1.1025, "step": 29 }, { "epoch": 0.006167129201356768, "grad_norm": 1.069066047668457, "learning_rate": 6.1643835616438354e-06, "loss": 1.0833, "step": 30 }, { "epoch": 0.006372700174735327, "grad_norm": 1.1197434663772583, "learning_rate": 6.36986301369863e-06, "loss": 0.7086, "step": 31 }, { "epoch": 0.006578271148113887, "grad_norm": 1.1849225759506226, "learning_rate": 6.5753424657534245e-06, "loss": 0.7198, "step": 32 }, { "epoch": 0.0067838421214924454, "grad_norm": 1.0908714532852173, "learning_rate": 6.7808219178082195e-06, "loss": 1.0882, "step": 33 }, { "epoch": 0.006989413094871004, "grad_norm": 1.1033886671066284, "learning_rate": 6.986301369863014e-06, "loss": 1.0619, "step": 34 }, { "epoch": 0.007194984068249563, "grad_norm": 0.9067010283470154, "learning_rate": 7.191780821917809e-06, "loss": 1.0383, "step": 35 }, { "epoch": 0.0074005550416281225, "grad_norm": 0.7680827379226685, "learning_rate": 7.397260273972603e-06, "loss": 1.0172, "step": 36 }, { "epoch": 0.007606126015006681, "grad_norm": 0.6832679510116577, "learning_rate": 7.602739726027398e-06, "loss": 1.0656, "step": 37 }, { "epoch": 0.00781169698838524, "grad_norm": 0.631285548210144, "learning_rate": 7.808219178082192e-06, "loss": 1.0222, "step": 38 }, { "epoch": 0.008017267961763799, "grad_norm": 0.6489036083221436, "learning_rate": 8.013698630136987e-06, "loss": 1.0526, "step": 39 }, { "epoch": 0.008222838935142357, "grad_norm": 0.7755447626113892, "learning_rate": 8.219178082191782e-06, "loss": 0.7246, "step": 40 }, { "epoch": 0.008428409908520916, "grad_norm": 0.7579307556152344, "learning_rate": 8.424657534246575e-06, "loss": 1.0303, "step": 41 }, { "epoch": 0.008633980881899476, "grad_norm": 0.746900200843811, "learning_rate": 8.63013698630137e-06, "loss": 1.0118, "step": 42 }, { "epoch": 0.008839551855278035, "grad_norm": 0.6753754615783691, "learning_rate": 8.835616438356165e-06, "loss": 1.0531, "step": 43 }, { "epoch": 0.009045122828656594, "grad_norm": 0.6792585253715515, "learning_rate": 9.041095890410958e-06, "loss": 1.0317, "step": 44 }, { "epoch": 0.009250693802035153, "grad_norm": 0.6036022305488586, "learning_rate": 9.246575342465753e-06, "loss": 1.0008, "step": 45 }, { "epoch": 0.009456264775413711, "grad_norm": 0.5249003767967224, "learning_rate": 9.452054794520548e-06, "loss": 1.0103, "step": 46 }, { "epoch": 0.00966183574879227, "grad_norm": 0.48237892985343933, "learning_rate": 9.657534246575343e-06, "loss": 1.0129, "step": 47 }, { "epoch": 0.009867406722170829, "grad_norm": 0.4669821858406067, "learning_rate": 9.863013698630136e-06, "loss": 0.6748, "step": 48 }, { "epoch": 0.01007297769554939, "grad_norm": 0.7257899045944214, "learning_rate": 1.0068493150684931e-05, "loss": 1.0394, "step": 49 }, { "epoch": 0.010278548668927948, "grad_norm": 0.5101274847984314, "learning_rate": 1.0273972602739726e-05, "loss": 0.9956, "step": 50 }, { "epoch": 0.010484119642306507, "grad_norm": 0.4904460906982422, "learning_rate": 1.0479452054794521e-05, "loss": 1.0081, "step": 51 }, { "epoch": 0.010689690615685065, "grad_norm": 0.49294978380203247, "learning_rate": 1.0684931506849315e-05, "loss": 0.9707, "step": 52 }, { "epoch": 0.010895261589063624, "grad_norm": 0.5110352039337158, "learning_rate": 1.089041095890411e-05, "loss": 0.9684, "step": 53 }, { "epoch": 0.011100832562442183, "grad_norm": 0.44021663069725037, "learning_rate": 1.1095890410958904e-05, "loss": 0.9872, "step": 54 }, { "epoch": 0.011306403535820742, "grad_norm": 0.5229463577270508, "learning_rate": 1.13013698630137e-05, "loss": 0.9821, "step": 55 }, { "epoch": 0.0115119745091993, "grad_norm": 0.4633481502532959, "learning_rate": 1.1506849315068493e-05, "loss": 0.9858, "step": 56 }, { "epoch": 0.01171754548257786, "grad_norm": 0.43951645493507385, "learning_rate": 1.1712328767123288e-05, "loss": 0.9608, "step": 57 }, { "epoch": 0.01192311645595642, "grad_norm": 0.46415814757347107, "learning_rate": 1.1917808219178083e-05, "loss": 0.9831, "step": 58 }, { "epoch": 0.012128687429334978, "grad_norm": 0.35238775610923767, "learning_rate": 1.2123287671232878e-05, "loss": 0.671, "step": 59 }, { "epoch": 0.012334258402713537, "grad_norm": 0.4979459047317505, "learning_rate": 1.2328767123287671e-05, "loss": 0.9634, "step": 60 }, { "epoch": 0.012539829376092096, "grad_norm": 0.40928781032562256, "learning_rate": 1.2534246575342466e-05, "loss": 0.9618, "step": 61 }, { "epoch": 0.012745400349470654, "grad_norm": 0.35449472069740295, "learning_rate": 1.273972602739726e-05, "loss": 0.6745, "step": 62 }, { "epoch": 0.012950971322849213, "grad_norm": 0.5600117444992065, "learning_rate": 1.2945205479452054e-05, "loss": 0.9651, "step": 63 }, { "epoch": 0.013156542296227773, "grad_norm": 0.4429936110973358, "learning_rate": 1.3150684931506849e-05, "loss": 0.9478, "step": 64 }, { "epoch": 0.013362113269606332, "grad_norm": 0.47870925068855286, "learning_rate": 1.3356164383561644e-05, "loss": 0.9631, "step": 65 }, { "epoch": 0.013567684242984891, "grad_norm": 0.4984883964061737, "learning_rate": 1.3561643835616439e-05, "loss": 0.9612, "step": 66 }, { "epoch": 0.01377325521636345, "grad_norm": 0.43905192613601685, "learning_rate": 1.3767123287671232e-05, "loss": 0.9495, "step": 67 }, { "epoch": 0.013978826189742008, "grad_norm": 0.4528709650039673, "learning_rate": 1.3972602739726027e-05, "loss": 0.9597, "step": 68 }, { "epoch": 0.014184397163120567, "grad_norm": 0.2834670841693878, "learning_rate": 1.4178082191780822e-05, "loss": 0.6768, "step": 69 }, { "epoch": 0.014389968136499126, "grad_norm": 0.736508846282959, "learning_rate": 1.4383561643835617e-05, "loss": 0.9616, "step": 70 }, { "epoch": 0.014595539109877684, "grad_norm": 0.2635529935359955, "learning_rate": 1.458904109589041e-05, "loss": 0.6671, "step": 71 }, { "epoch": 0.014801110083256245, "grad_norm": 0.5397729873657227, "learning_rate": 1.4794520547945205e-05, "loss": 0.9488, "step": 72 }, { "epoch": 0.015006681056634804, "grad_norm": 0.23914408683776855, "learning_rate": 1.5e-05, "loss": 0.6537, "step": 73 }, { "epoch": 0.015212252030013362, "grad_norm": 0.6451640129089355, "learning_rate": 1.5205479452054795e-05, "loss": 0.954, "step": 74 }, { "epoch": 0.015417823003391921, "grad_norm": 0.37705564498901367, "learning_rate": 1.541095890410959e-05, "loss": 0.9367, "step": 75 }, { "epoch": 0.01562339397677048, "grad_norm": 0.5562038421630859, "learning_rate": 1.5616438356164384e-05, "loss": 0.9374, "step": 76 }, { "epoch": 0.01582896495014904, "grad_norm": 0.2332352101802826, "learning_rate": 1.582191780821918e-05, "loss": 0.6542, "step": 77 }, { "epoch": 0.016034535923527597, "grad_norm": 0.5999805331230164, "learning_rate": 1.6027397260273974e-05, "loss": 0.9342, "step": 78 }, { "epoch": 0.016240106896906158, "grad_norm": 0.3581260144710541, "learning_rate": 1.623287671232877e-05, "loss": 0.9542, "step": 79 }, { "epoch": 0.016445677870284715, "grad_norm": 0.5643858909606934, "learning_rate": 1.6438356164383563e-05, "loss": 0.9312, "step": 80 }, { "epoch": 0.016651248843663275, "grad_norm": 0.5196654200553894, "learning_rate": 1.6643835616438355e-05, "loss": 0.9256, "step": 81 }, { "epoch": 0.016856819817041832, "grad_norm": 0.37860536575317383, "learning_rate": 1.684931506849315e-05, "loss": 0.9139, "step": 82 }, { "epoch": 0.017062390790420393, "grad_norm": 0.6562532186508179, "learning_rate": 1.7054794520547945e-05, "loss": 0.8984, "step": 83 }, { "epoch": 0.017267961763798953, "grad_norm": 0.4133750796318054, "learning_rate": 1.726027397260274e-05, "loss": 0.905, "step": 84 }, { "epoch": 0.01747353273717751, "grad_norm": 0.38232654333114624, "learning_rate": 1.7465753424657535e-05, "loss": 0.9202, "step": 85 }, { "epoch": 0.01767910371055607, "grad_norm": 0.5049018859863281, "learning_rate": 1.767123287671233e-05, "loss": 0.9235, "step": 86 }, { "epoch": 0.017884674683934627, "grad_norm": 0.4014778137207031, "learning_rate": 1.7876712328767125e-05, "loss": 0.9272, "step": 87 }, { "epoch": 0.018090245657313188, "grad_norm": 0.45734459161758423, "learning_rate": 1.8082191780821916e-05, "loss": 0.9312, "step": 88 }, { "epoch": 0.018295816630691745, "grad_norm": 0.46464303135871887, "learning_rate": 1.828767123287671e-05, "loss": 0.9394, "step": 89 }, { "epoch": 0.018501387604070305, "grad_norm": 0.39655131101608276, "learning_rate": 1.8493150684931506e-05, "loss": 0.9133, "step": 90 }, { "epoch": 0.018706958577448866, "grad_norm": 0.36367830634117126, "learning_rate": 1.86986301369863e-05, "loss": 0.9085, "step": 91 }, { "epoch": 0.018912529550827423, "grad_norm": 0.4867264926433563, "learning_rate": 1.8904109589041096e-05, "loss": 0.8848, "step": 92 }, { "epoch": 0.019118100524205983, "grad_norm": 0.3669883906841278, "learning_rate": 1.910958904109589e-05, "loss": 0.8986, "step": 93 }, { "epoch": 0.01932367149758454, "grad_norm": 0.4508739411830902, "learning_rate": 1.9315068493150686e-05, "loss": 0.9478, "step": 94 }, { "epoch": 0.0195292424709631, "grad_norm": 0.4065166711807251, "learning_rate": 1.952054794520548e-05, "loss": 0.9318, "step": 95 }, { "epoch": 0.019734813444341658, "grad_norm": 0.21278417110443115, "learning_rate": 1.9726027397260273e-05, "loss": 0.6272, "step": 96 }, { "epoch": 0.019940384417720218, "grad_norm": 0.5677651762962341, "learning_rate": 1.9931506849315068e-05, "loss": 0.9209, "step": 97 }, { "epoch": 0.02014595539109878, "grad_norm": 0.4079231023788452, "learning_rate": 2.0136986301369863e-05, "loss": 0.9189, "step": 98 }, { "epoch": 0.020351526364477335, "grad_norm": 0.3942011892795563, "learning_rate": 2.0342465753424658e-05, "loss": 0.8827, "step": 99 }, { "epoch": 0.020557097337855896, "grad_norm": 0.5771577954292297, "learning_rate": 2.0547945205479453e-05, "loss": 0.885, "step": 100 }, { "epoch": 0.020762668311234453, "grad_norm": 0.35876256227493286, "learning_rate": 2.0753424657534248e-05, "loss": 0.867, "step": 101 }, { "epoch": 0.020968239284613013, "grad_norm": 0.47500577569007874, "learning_rate": 2.0958904109589043e-05, "loss": 0.8921, "step": 102 }, { "epoch": 0.02117381025799157, "grad_norm": 0.4215965270996094, "learning_rate": 2.1164383561643834e-05, "loss": 0.883, "step": 103 }, { "epoch": 0.02137938123137013, "grad_norm": 0.41377994418144226, "learning_rate": 2.136986301369863e-05, "loss": 0.9116, "step": 104 }, { "epoch": 0.021584952204748688, "grad_norm": 0.4422590434551239, "learning_rate": 2.1575342465753424e-05, "loss": 0.9215, "step": 105 }, { "epoch": 0.021790523178127248, "grad_norm": 0.39756667613983154, "learning_rate": 2.178082191780822e-05, "loss": 0.8749, "step": 106 }, { "epoch": 0.02199609415150581, "grad_norm": 0.3924627900123596, "learning_rate": 2.1986301369863014e-05, "loss": 0.9013, "step": 107 }, { "epoch": 0.022201665124884366, "grad_norm": 0.4422127306461334, "learning_rate": 2.219178082191781e-05, "loss": 0.8741, "step": 108 }, { "epoch": 0.022407236098262926, "grad_norm": 0.37621861696243286, "learning_rate": 2.2397260273972604e-05, "loss": 0.8726, "step": 109 }, { "epoch": 0.022612807071641483, "grad_norm": 0.38060134649276733, "learning_rate": 2.26027397260274e-05, "loss": 0.8584, "step": 110 }, { "epoch": 0.022818378045020044, "grad_norm": 0.2121458202600479, "learning_rate": 2.2808219178082194e-05, "loss": 0.6438, "step": 111 }, { "epoch": 0.0230239490183986, "grad_norm": 0.5301511883735657, "learning_rate": 2.3013698630136985e-05, "loss": 0.894, "step": 112 }, { "epoch": 0.02322951999177716, "grad_norm": 0.3643994629383087, "learning_rate": 2.3219178082191784e-05, "loss": 0.8608, "step": 113 }, { "epoch": 0.02343509096515572, "grad_norm": 0.4830370843410492, "learning_rate": 2.3424657534246575e-05, "loss": 0.9062, "step": 114 }, { "epoch": 0.02364066193853428, "grad_norm": 0.384884774684906, "learning_rate": 2.3630136986301374e-05, "loss": 0.8855, "step": 115 }, { "epoch": 0.02384623291191284, "grad_norm": 0.3976382315158844, "learning_rate": 2.3835616438356165e-05, "loss": 0.8806, "step": 116 }, { "epoch": 0.024051803885291396, "grad_norm": 0.1835232675075531, "learning_rate": 2.404109589041096e-05, "loss": 0.611, "step": 117 }, { "epoch": 0.024257374858669956, "grad_norm": 0.5072860717773438, "learning_rate": 2.4246575342465755e-05, "loss": 0.9086, "step": 118 }, { "epoch": 0.024462945832048513, "grad_norm": 0.3984593152999878, "learning_rate": 2.445205479452055e-05, "loss": 0.8694, "step": 119 }, { "epoch": 0.024668516805427074, "grad_norm": 0.4669335186481476, "learning_rate": 2.4657534246575342e-05, "loss": 0.8798, "step": 120 }, { "epoch": 0.024874087778805634, "grad_norm": 0.4184141159057617, "learning_rate": 2.486301369863014e-05, "loss": 0.8805, "step": 121 }, { "epoch": 0.02507965875218419, "grad_norm": 0.4648849070072174, "learning_rate": 2.5068493150684932e-05, "loss": 0.8941, "step": 122 }, { "epoch": 0.02528522972556275, "grad_norm": 0.503567636013031, "learning_rate": 2.527397260273973e-05, "loss": 0.9006, "step": 123 }, { "epoch": 0.02549080069894131, "grad_norm": 0.4252830445766449, "learning_rate": 2.547945205479452e-05, "loss": 0.8887, "step": 124 }, { "epoch": 0.02569637167231987, "grad_norm": 0.4380176067352295, "learning_rate": 2.5684931506849317e-05, "loss": 0.8662, "step": 125 }, { "epoch": 0.025901942645698426, "grad_norm": 0.3882461488246918, "learning_rate": 2.5890410958904108e-05, "loss": 0.8969, "step": 126 }, { "epoch": 0.026107513619076986, "grad_norm": 0.43722933530807495, "learning_rate": 2.6095890410958907e-05, "loss": 0.8589, "step": 127 }, { "epoch": 0.026313084592455547, "grad_norm": 0.46026188135147095, "learning_rate": 2.6301369863013698e-05, "loss": 0.8831, "step": 128 }, { "epoch": 0.026518655565834104, "grad_norm": 0.36106160283088684, "learning_rate": 2.6506849315068496e-05, "loss": 0.8433, "step": 129 }, { "epoch": 0.026724226539212664, "grad_norm": 0.19909483194351196, "learning_rate": 2.6712328767123288e-05, "loss": 0.6199, "step": 130 }, { "epoch": 0.02692979751259122, "grad_norm": 0.5032296180725098, "learning_rate": 2.6917808219178086e-05, "loss": 0.9036, "step": 131 }, { "epoch": 0.027135368485969782, "grad_norm": 0.40603938698768616, "learning_rate": 2.7123287671232878e-05, "loss": 0.8892, "step": 132 }, { "epoch": 0.02734093945934834, "grad_norm": 0.43442800641059875, "learning_rate": 2.7328767123287673e-05, "loss": 0.8975, "step": 133 }, { "epoch": 0.0275465104327269, "grad_norm": 0.442852258682251, "learning_rate": 2.7534246575342465e-05, "loss": 0.8509, "step": 134 }, { "epoch": 0.027752081406105456, "grad_norm": 0.4811699688434601, "learning_rate": 2.7739726027397263e-05, "loss": 0.8496, "step": 135 }, { "epoch": 0.027957652379484017, "grad_norm": 0.38817986845970154, "learning_rate": 2.7945205479452054e-05, "loss": 0.8383, "step": 136 }, { "epoch": 0.028163223352862577, "grad_norm": 0.41808751225471497, "learning_rate": 2.8150684931506853e-05, "loss": 0.8662, "step": 137 }, { "epoch": 0.028368794326241134, "grad_norm": 0.49768969416618347, "learning_rate": 2.8356164383561644e-05, "loss": 0.8526, "step": 138 }, { "epoch": 0.028574365299619695, "grad_norm": 0.3861895203590393, "learning_rate": 2.856164383561644e-05, "loss": 0.8454, "step": 139 }, { "epoch": 0.02877993627299825, "grad_norm": 0.4545285999774933, "learning_rate": 2.8767123287671234e-05, "loss": 0.8717, "step": 140 }, { "epoch": 0.028985507246376812, "grad_norm": 0.20150704681873322, "learning_rate": 2.897260273972603e-05, "loss": 0.6377, "step": 141 }, { "epoch": 0.02919107821975537, "grad_norm": 0.42400142550468445, "learning_rate": 2.917808219178082e-05, "loss": 0.8583, "step": 142 }, { "epoch": 0.02939664919313393, "grad_norm": 0.3788576126098633, "learning_rate": 2.938356164383562e-05, "loss": 0.8476, "step": 143 }, { "epoch": 0.02960222016651249, "grad_norm": 0.17580586671829224, "learning_rate": 2.958904109589041e-05, "loss": 0.6334, "step": 144 }, { "epoch": 0.029807791139891047, "grad_norm": 0.17598563432693481, "learning_rate": 2.979452054794521e-05, "loss": 0.6251, "step": 145 }, { "epoch": 0.030013362113269607, "grad_norm": 0.7843010425567627, "learning_rate": 3e-05, "loss": 0.8495, "step": 146 }, { "epoch": 0.030218933086648164, "grad_norm": 0.478127121925354, "learning_rate": 3.0205479452054796e-05, "loss": 0.8733, "step": 147 }, { "epoch": 0.030424504060026725, "grad_norm": 0.6210460066795349, "learning_rate": 3.041095890410959e-05, "loss": 0.8513, "step": 148 }, { "epoch": 0.03063007503340528, "grad_norm": 0.5364311337471008, "learning_rate": 3.061643835616439e-05, "loss": 0.8532, "step": 149 }, { "epoch": 0.030835646006783842, "grad_norm": 0.5108141899108887, "learning_rate": 3.082191780821918e-05, "loss": 0.852, "step": 150 }, { "epoch": 0.031041216980162403, "grad_norm": 0.4817136228084564, "learning_rate": 3.102739726027397e-05, "loss": 0.8431, "step": 151 }, { "epoch": 0.03124678795354096, "grad_norm": 0.5212568044662476, "learning_rate": 3.123287671232877e-05, "loss": 0.8591, "step": 152 }, { "epoch": 0.03145235892691952, "grad_norm": 0.4288831949234009, "learning_rate": 3.143835616438356e-05, "loss": 0.8614, "step": 153 }, { "epoch": 0.03165792990029808, "grad_norm": 0.1943136751651764, "learning_rate": 3.164383561643836e-05, "loss": 0.6347, "step": 154 }, { "epoch": 0.031863500873676634, "grad_norm": 0.7128695249557495, "learning_rate": 3.184931506849315e-05, "loss": 0.87, "step": 155 }, { "epoch": 0.032069071847055194, "grad_norm": 0.40213656425476074, "learning_rate": 3.205479452054795e-05, "loss": 0.8425, "step": 156 }, { "epoch": 0.032274642820433755, "grad_norm": 0.4853759706020355, "learning_rate": 3.226027397260274e-05, "loss": 0.8643, "step": 157 }, { "epoch": 0.032480213793812315, "grad_norm": 0.5050686001777649, "learning_rate": 3.246575342465754e-05, "loss": 0.8628, "step": 158 }, { "epoch": 0.032685784767190876, "grad_norm": 0.5028424263000488, "learning_rate": 3.267123287671233e-05, "loss": 0.8267, "step": 159 }, { "epoch": 0.03289135574056943, "grad_norm": 0.4855990707874298, "learning_rate": 3.287671232876713e-05, "loss": 0.8549, "step": 160 }, { "epoch": 0.03309692671394799, "grad_norm": 0.40553873777389526, "learning_rate": 3.308219178082192e-05, "loss": 0.8548, "step": 161 }, { "epoch": 0.03330249768732655, "grad_norm": 0.22181855142116547, "learning_rate": 3.328767123287671e-05, "loss": 0.6371, "step": 162 }, { "epoch": 0.03350806866070511, "grad_norm": 0.7873424887657166, "learning_rate": 3.349315068493151e-05, "loss": 0.8876, "step": 163 }, { "epoch": 0.033713639634083664, "grad_norm": 0.4477074444293976, "learning_rate": 3.36986301369863e-05, "loss": 0.8418, "step": 164 }, { "epoch": 0.033919210607462225, "grad_norm": 0.6497864127159119, "learning_rate": 3.39041095890411e-05, "loss": 0.8605, "step": 165 }, { "epoch": 0.034124781580840785, "grad_norm": 0.41493016481399536, "learning_rate": 3.410958904109589e-05, "loss": 0.8276, "step": 166 }, { "epoch": 0.034330352554219346, "grad_norm": 0.5347689390182495, "learning_rate": 3.4315068493150685e-05, "loss": 0.8809, "step": 167 }, { "epoch": 0.034535923527597906, "grad_norm": 0.4067676365375519, "learning_rate": 3.452054794520548e-05, "loss": 0.8329, "step": 168 }, { "epoch": 0.03474149450097646, "grad_norm": 0.4063913822174072, "learning_rate": 3.4726027397260275e-05, "loss": 0.8556, "step": 169 }, { "epoch": 0.03494706547435502, "grad_norm": 0.4246818721294403, "learning_rate": 3.493150684931507e-05, "loss": 0.8664, "step": 170 }, { "epoch": 0.03515263644773358, "grad_norm": 0.41586360335350037, "learning_rate": 3.5136986301369865e-05, "loss": 0.842, "step": 171 }, { "epoch": 0.03535820742111214, "grad_norm": 0.3807069659233093, "learning_rate": 3.534246575342466e-05, "loss": 0.824, "step": 172 }, { "epoch": 0.0355637783944907, "grad_norm": 0.7290697693824768, "learning_rate": 3.5547945205479455e-05, "loss": 0.6189, "step": 173 }, { "epoch": 0.035769349367869255, "grad_norm": 0.19204974174499512, "learning_rate": 3.575342465753425e-05, "loss": 0.6093, "step": 174 }, { "epoch": 0.035974920341247815, "grad_norm": 0.6416502594947815, "learning_rate": 3.5958904109589045e-05, "loss": 0.8379, "step": 175 }, { "epoch": 0.036180491314626376, "grad_norm": 0.3935816287994385, "learning_rate": 3.616438356164383e-05, "loss": 0.8263, "step": 176 }, { "epoch": 0.036386062288004936, "grad_norm": 0.47259315848350525, "learning_rate": 3.6369863013698635e-05, "loss": 0.8132, "step": 177 }, { "epoch": 0.03659163326138349, "grad_norm": 0.47834697365760803, "learning_rate": 3.657534246575342e-05, "loss": 0.8393, "step": 178 }, { "epoch": 0.03679720423476205, "grad_norm": 0.3470703363418579, "learning_rate": 3.6780821917808224e-05, "loss": 0.6182, "step": 179 }, { "epoch": 0.03700277520814061, "grad_norm": 0.5120542645454407, "learning_rate": 3.698630136986301e-05, "loss": 0.8336, "step": 180 }, { "epoch": 0.03720834618151917, "grad_norm": 0.42222753167152405, "learning_rate": 3.719178082191781e-05, "loss": 0.837, "step": 181 }, { "epoch": 0.03741391715489773, "grad_norm": 0.38363730907440186, "learning_rate": 3.73972602739726e-05, "loss": 0.8651, "step": 182 }, { "epoch": 0.037619488128276285, "grad_norm": 0.4108883738517761, "learning_rate": 3.76027397260274e-05, "loss": 0.8175, "step": 183 }, { "epoch": 0.037825059101654845, "grad_norm": 0.41021236777305603, "learning_rate": 3.780821917808219e-05, "loss": 0.8412, "step": 184 }, { "epoch": 0.038030630075033406, "grad_norm": 0.24833433330059052, "learning_rate": 3.801369863013699e-05, "loss": 0.6215, "step": 185 }, { "epoch": 0.038236201048411966, "grad_norm": 0.465718537569046, "learning_rate": 3.821917808219178e-05, "loss": 0.842, "step": 186 }, { "epoch": 0.03844177202179052, "grad_norm": 0.41596537828445435, "learning_rate": 3.842465753424658e-05, "loss": 0.8296, "step": 187 }, { "epoch": 0.03864734299516908, "grad_norm": 0.3815116286277771, "learning_rate": 3.863013698630137e-05, "loss": 0.8131, "step": 188 }, { "epoch": 0.03885291396854764, "grad_norm": 0.38065505027770996, "learning_rate": 3.883561643835617e-05, "loss": 0.8227, "step": 189 }, { "epoch": 0.0390584849419262, "grad_norm": 0.40238457918167114, "learning_rate": 3.904109589041096e-05, "loss": 0.829, "step": 190 }, { "epoch": 0.03926405591530476, "grad_norm": 0.39533552527427673, "learning_rate": 3.924657534246576e-05, "loss": 0.8062, "step": 191 }, { "epoch": 0.039469626888683315, "grad_norm": 0.2254960983991623, "learning_rate": 3.9452054794520546e-05, "loss": 0.6202, "step": 192 }, { "epoch": 0.039675197862061876, "grad_norm": 0.5490075945854187, "learning_rate": 3.965753424657535e-05, "loss": 0.8587, "step": 193 }, { "epoch": 0.039880768835440436, "grad_norm": 0.3820808231830597, "learning_rate": 3.9863013698630135e-05, "loss": 0.8461, "step": 194 }, { "epoch": 0.040086339808818996, "grad_norm": 0.48500680923461914, "learning_rate": 4.006849315068494e-05, "loss": 0.8319, "step": 195 }, { "epoch": 0.04029191078219756, "grad_norm": 0.20103423297405243, "learning_rate": 4.0273972602739725e-05, "loss": 0.6231, "step": 196 }, { "epoch": 0.04049748175557611, "grad_norm": 0.5550208687782288, "learning_rate": 4.047945205479452e-05, "loss": 0.8343, "step": 197 }, { "epoch": 0.04070305272895467, "grad_norm": 0.37427324056625366, "learning_rate": 4.0684931506849315e-05, "loss": 0.8292, "step": 198 }, { "epoch": 0.04090862370233323, "grad_norm": 0.2106785923242569, "learning_rate": 4.089041095890411e-05, "loss": 0.603, "step": 199 }, { "epoch": 0.04111419467571179, "grad_norm": 0.7520186305046082, "learning_rate": 4.1095890410958905e-05, "loss": 0.86, "step": 200 }, { "epoch": 0.041319765649090345, "grad_norm": 0.38897809386253357, "learning_rate": 4.13013698630137e-05, "loss": 0.82, "step": 201 }, { "epoch": 0.041525336622468906, "grad_norm": 0.5800373554229736, "learning_rate": 4.1506849315068495e-05, "loss": 0.8282, "step": 202 }, { "epoch": 0.041730907595847466, "grad_norm": 0.46717479825019836, "learning_rate": 4.171232876712329e-05, "loss": 0.8268, "step": 203 }, { "epoch": 0.04193647856922603, "grad_norm": 0.45258304476737976, "learning_rate": 4.1917808219178085e-05, "loss": 0.8178, "step": 204 }, { "epoch": 0.04214204954260459, "grad_norm": 0.44093188643455505, "learning_rate": 4.212328767123288e-05, "loss": 0.8507, "step": 205 }, { "epoch": 0.04234762051598314, "grad_norm": 0.38282710313796997, "learning_rate": 4.232876712328767e-05, "loss": 0.823, "step": 206 }, { "epoch": 0.0425531914893617, "grad_norm": 0.21601058542728424, "learning_rate": 4.253424657534247e-05, "loss": 0.6133, "step": 207 }, { "epoch": 0.04275876246274026, "grad_norm": 0.6589162945747375, "learning_rate": 4.273972602739726e-05, "loss": 0.8517, "step": 208 }, { "epoch": 0.04296433343611882, "grad_norm": 0.39537516236305237, "learning_rate": 4.294520547945206e-05, "loss": 0.8297, "step": 209 }, { "epoch": 0.043169904409497376, "grad_norm": 0.5449748039245605, "learning_rate": 4.315068493150685e-05, "loss": 0.8329, "step": 210 }, { "epoch": 0.043375475382875936, "grad_norm": 0.4801601767539978, "learning_rate": 4.335616438356165e-05, "loss": 0.8263, "step": 211 }, { "epoch": 0.043581046356254496, "grad_norm": 0.3884707987308502, "learning_rate": 4.356164383561644e-05, "loss": 0.8392, "step": 212 }, { "epoch": 0.04378661732963306, "grad_norm": 0.4665462374687195, "learning_rate": 4.376712328767123e-05, "loss": 0.8319, "step": 213 }, { "epoch": 0.04399218830301162, "grad_norm": 0.3869108557701111, "learning_rate": 4.397260273972603e-05, "loss": 0.8207, "step": 214 }, { "epoch": 0.04419775927639017, "grad_norm": 0.38586127758026123, "learning_rate": 4.417808219178082e-05, "loss": 0.8035, "step": 215 }, { "epoch": 0.04440333024976873, "grad_norm": 0.41265037655830383, "learning_rate": 4.438356164383562e-05, "loss": 0.8578, "step": 216 }, { "epoch": 0.04460890122314729, "grad_norm": 0.3726780116558075, "learning_rate": 4.458904109589041e-05, "loss": 0.8103, "step": 217 }, { "epoch": 0.04481447219652585, "grad_norm": 0.21903295814990997, "learning_rate": 4.479452054794521e-05, "loss": 0.6149, "step": 218 }, { "epoch": 0.04502004316990441, "grad_norm": 0.470803439617157, "learning_rate": 4.5e-05, "loss": 0.8187, "step": 219 }, { "epoch": 0.045225614143282966, "grad_norm": 0.3907180726528168, "learning_rate": 4.52054794520548e-05, "loss": 0.843, "step": 220 }, { "epoch": 0.04543118511666153, "grad_norm": 0.3910331726074219, "learning_rate": 4.54109589041096e-05, "loss": 0.8228, "step": 221 }, { "epoch": 0.04563675609004009, "grad_norm": 0.4238927364349365, "learning_rate": 4.561643835616439e-05, "loss": 0.8287, "step": 222 }, { "epoch": 0.04584232706341865, "grad_norm": 0.38111889362335205, "learning_rate": 4.582191780821918e-05, "loss": 0.8375, "step": 223 }, { "epoch": 0.0460478980367972, "grad_norm": 0.17004454135894775, "learning_rate": 4.602739726027397e-05, "loss": 0.6103, "step": 224 }, { "epoch": 0.04625346901017576, "grad_norm": 0.5066764950752258, "learning_rate": 4.623287671232877e-05, "loss": 0.8377, "step": 225 }, { "epoch": 0.04645903998355432, "grad_norm": 0.16975145041942596, "learning_rate": 4.643835616438357e-05, "loss": 0.6379, "step": 226 }, { "epoch": 0.04666461095693288, "grad_norm": 0.17714980244636536, "learning_rate": 4.6643835616438356e-05, "loss": 0.6246, "step": 227 }, { "epoch": 0.04687018193031144, "grad_norm": 0.44060373306274414, "learning_rate": 4.684931506849315e-05, "loss": 0.8455, "step": 228 }, { "epoch": 0.047075752903689996, "grad_norm": 0.41871070861816406, "learning_rate": 4.705479452054795e-05, "loss": 0.8438, "step": 229 }, { "epoch": 0.04728132387706856, "grad_norm": 0.20235472917556763, "learning_rate": 4.726027397260275e-05, "loss": 0.6155, "step": 230 }, { "epoch": 0.04748689485044712, "grad_norm": 0.4988607168197632, "learning_rate": 4.7465753424657536e-05, "loss": 0.8098, "step": 231 }, { "epoch": 0.04769246582382568, "grad_norm": 0.41510388255119324, "learning_rate": 4.767123287671233e-05, "loss": 0.8214, "step": 232 }, { "epoch": 0.04789803679720424, "grad_norm": 0.3907022178173065, "learning_rate": 4.787671232876713e-05, "loss": 0.8112, "step": 233 }, { "epoch": 0.04810360777058279, "grad_norm": 0.40868282318115234, "learning_rate": 4.808219178082192e-05, "loss": 0.8161, "step": 234 }, { "epoch": 0.04830917874396135, "grad_norm": 0.3888959288597107, "learning_rate": 4.8287671232876716e-05, "loss": 0.803, "step": 235 }, { "epoch": 0.04851474971733991, "grad_norm": 0.38003799319267273, "learning_rate": 4.849315068493151e-05, "loss": 0.8293, "step": 236 }, { "epoch": 0.04872032069071847, "grad_norm": 0.2189408391714096, "learning_rate": 4.869863013698631e-05, "loss": 0.601, "step": 237 }, { "epoch": 0.048925891664097027, "grad_norm": 0.44841453433036804, "learning_rate": 4.89041095890411e-05, "loss": 0.8239, "step": 238 }, { "epoch": 0.04913146263747559, "grad_norm": 0.41675901412963867, "learning_rate": 4.9109589041095895e-05, "loss": 0.8041, "step": 239 }, { "epoch": 0.04933703361085415, "grad_norm": 0.3353470265865326, "learning_rate": 4.9315068493150684e-05, "loss": 0.8233, "step": 240 }, { "epoch": 0.04954260458423271, "grad_norm": 0.38614898920059204, "learning_rate": 4.9520547945205485e-05, "loss": 0.8202, "step": 241 }, { "epoch": 0.04974817555761127, "grad_norm": 0.3578384220600128, "learning_rate": 4.972602739726028e-05, "loss": 0.8155, "step": 242 }, { "epoch": 0.04995374653098982, "grad_norm": 0.3806624114513397, "learning_rate": 4.993150684931507e-05, "loss": 0.8475, "step": 243 }, { "epoch": 0.05015931750436838, "grad_norm": 0.23930180072784424, "learning_rate": 5.0136986301369863e-05, "loss": 0.6126, "step": 244 }, { "epoch": 0.05036488847774694, "grad_norm": 0.4321422278881073, "learning_rate": 5.0342465753424665e-05, "loss": 0.8145, "step": 245 }, { "epoch": 0.0505704594511255, "grad_norm": 0.3582285940647125, "learning_rate": 5.054794520547946e-05, "loss": 0.8384, "step": 246 }, { "epoch": 0.05077603042450406, "grad_norm": 0.3378206491470337, "learning_rate": 5.075342465753425e-05, "loss": 0.8189, "step": 247 }, { "epoch": 0.05098160139788262, "grad_norm": 0.3585507571697235, "learning_rate": 5.095890410958904e-05, "loss": 0.8379, "step": 248 }, { "epoch": 0.05118717237126118, "grad_norm": 0.36620137095451355, "learning_rate": 5.1164383561643845e-05, "loss": 0.8059, "step": 249 }, { "epoch": 0.05139274334463974, "grad_norm": 0.348910391330719, "learning_rate": 5.136986301369863e-05, "loss": 0.8231, "step": 250 }, { "epoch": 0.0515983143180183, "grad_norm": 0.37466245889663696, "learning_rate": 5.157534246575343e-05, "loss": 0.8263, "step": 251 }, { "epoch": 0.05180388529139685, "grad_norm": 0.3923078775405884, "learning_rate": 5.1780821917808216e-05, "loss": 0.8142, "step": 252 }, { "epoch": 0.05200945626477541, "grad_norm": 0.3668658435344696, "learning_rate": 5.1986301369863025e-05, "loss": 0.815, "step": 253 }, { "epoch": 0.05221502723815397, "grad_norm": 0.34352773427963257, "learning_rate": 5.219178082191781e-05, "loss": 0.8103, "step": 254 }, { "epoch": 0.05242059821153253, "grad_norm": 0.35997268557548523, "learning_rate": 5.239726027397261e-05, "loss": 0.8021, "step": 255 }, { "epoch": 0.052626169184911094, "grad_norm": 0.4281958043575287, "learning_rate": 5.2602739726027396e-05, "loss": 0.613, "step": 256 }, { "epoch": 0.05283174015828965, "grad_norm": 0.40191400051116943, "learning_rate": 5.28082191780822e-05, "loss": 0.8114, "step": 257 }, { "epoch": 0.05303731113166821, "grad_norm": 0.2332005500793457, "learning_rate": 5.301369863013699e-05, "loss": 0.6145, "step": 258 }, { "epoch": 0.05324288210504677, "grad_norm": 0.3814218044281006, "learning_rate": 5.321917808219178e-05, "loss": 0.8322, "step": 259 }, { "epoch": 0.05344845307842533, "grad_norm": 0.8000903129577637, "learning_rate": 5.3424657534246576e-05, "loss": 0.8061, "step": 260 }, { "epoch": 0.05365402405180388, "grad_norm": 0.3613252341747284, "learning_rate": 5.363013698630138e-05, "loss": 0.817, "step": 261 }, { "epoch": 0.05385959502518244, "grad_norm": 0.3710997998714447, "learning_rate": 5.383561643835617e-05, "loss": 0.847, "step": 262 }, { "epoch": 0.054065165998561, "grad_norm": 0.36693164706230164, "learning_rate": 5.404109589041096e-05, "loss": 0.6174, "step": 263 }, { "epoch": 0.054270736971939564, "grad_norm": 0.4523719251155853, "learning_rate": 5.4246575342465756e-05, "loss": 0.8234, "step": 264 }, { "epoch": 0.054476307945318124, "grad_norm": 0.3696235120296478, "learning_rate": 5.445205479452056e-05, "loss": 0.7997, "step": 265 }, { "epoch": 0.05468187891869668, "grad_norm": 0.3745763599872589, "learning_rate": 5.4657534246575346e-05, "loss": 0.8098, "step": 266 }, { "epoch": 0.05488744989207524, "grad_norm": 0.36916518211364746, "learning_rate": 5.486301369863014e-05, "loss": 0.788, "step": 267 }, { "epoch": 0.0550930208654538, "grad_norm": 0.351854532957077, "learning_rate": 5.506849315068493e-05, "loss": 0.8124, "step": 268 }, { "epoch": 0.05529859183883236, "grad_norm": 0.3717731535434723, "learning_rate": 5.527397260273973e-05, "loss": 0.8166, "step": 269 }, { "epoch": 0.05550416281221091, "grad_norm": 0.3277188837528229, "learning_rate": 5.5479452054794526e-05, "loss": 0.6006, "step": 270 }, { "epoch": 0.05570973378558947, "grad_norm": 0.39217084646224976, "learning_rate": 5.568493150684932e-05, "loss": 0.8076, "step": 271 }, { "epoch": 0.05591530475896803, "grad_norm": 0.37465596199035645, "learning_rate": 5.589041095890411e-05, "loss": 0.8196, "step": 272 }, { "epoch": 0.056120875732346594, "grad_norm": 0.37113896012306213, "learning_rate": 5.609589041095891e-05, "loss": 0.8206, "step": 273 }, { "epoch": 0.056326446705725154, "grad_norm": 0.3641659915447235, "learning_rate": 5.6301369863013706e-05, "loss": 0.8372, "step": 274 }, { "epoch": 0.05653201767910371, "grad_norm": 0.3738704025745392, "learning_rate": 5.6506849315068494e-05, "loss": 0.8201, "step": 275 }, { "epoch": 0.05673758865248227, "grad_norm": 0.35747018456459045, "learning_rate": 5.671232876712329e-05, "loss": 0.8082, "step": 276 }, { "epoch": 0.05694315962586083, "grad_norm": 0.29701605439186096, "learning_rate": 5.691780821917809e-05, "loss": 0.6105, "step": 277 }, { "epoch": 0.05714873059923939, "grad_norm": 0.4180268347263336, "learning_rate": 5.712328767123288e-05, "loss": 0.8325, "step": 278 }, { "epoch": 0.05735430157261795, "grad_norm": 0.36010023951530457, "learning_rate": 5.7328767123287674e-05, "loss": 0.8403, "step": 279 }, { "epoch": 0.0575598725459965, "grad_norm": 0.35812970995903015, "learning_rate": 5.753424657534247e-05, "loss": 0.8201, "step": 280 }, { "epoch": 0.05776544351937506, "grad_norm": 0.35655659437179565, "learning_rate": 5.773972602739727e-05, "loss": 0.8104, "step": 281 }, { "epoch": 0.057971014492753624, "grad_norm": 0.3628866970539093, "learning_rate": 5.794520547945206e-05, "loss": 0.8011, "step": 282 }, { "epoch": 0.058176585466132184, "grad_norm": 0.33707040548324585, "learning_rate": 5.8150684931506854e-05, "loss": 0.7863, "step": 283 }, { "epoch": 0.05838215643951074, "grad_norm": 0.25686392188072205, "learning_rate": 5.835616438356164e-05, "loss": 0.605, "step": 284 }, { "epoch": 0.0585877274128893, "grad_norm": 0.4549000859260559, "learning_rate": 5.8561643835616444e-05, "loss": 0.7871, "step": 285 }, { "epoch": 0.05879329838626786, "grad_norm": 0.17129164934158325, "learning_rate": 5.876712328767124e-05, "loss": 0.6043, "step": 286 }, { "epoch": 0.05899886935964642, "grad_norm": 0.4582807719707489, "learning_rate": 5.8972602739726033e-05, "loss": 0.7943, "step": 287 }, { "epoch": 0.05920444033302498, "grad_norm": 0.3587150573730469, "learning_rate": 5.917808219178082e-05, "loss": 0.818, "step": 288 }, { "epoch": 0.05941001130640353, "grad_norm": 0.35766854882240295, "learning_rate": 5.9383561643835623e-05, "loss": 0.8084, "step": 289 }, { "epoch": 0.059615582279782094, "grad_norm": 0.24981027841567993, "learning_rate": 5.958904109589042e-05, "loss": 0.6123, "step": 290 }, { "epoch": 0.059821153253160654, "grad_norm": 0.4611298143863678, "learning_rate": 5.9794520547945207e-05, "loss": 0.7859, "step": 291 }, { "epoch": 0.060026724226539215, "grad_norm": 0.1829315423965454, "learning_rate": 6e-05, "loss": 0.6047, "step": 292 }, { "epoch": 0.060232295199917775, "grad_norm": 0.432064026594162, "learning_rate": 6.02054794520548e-05, "loss": 0.8252, "step": 293 }, { "epoch": 0.06043786617329633, "grad_norm": 0.3626839518547058, "learning_rate": 6.041095890410959e-05, "loss": 0.8004, "step": 294 }, { "epoch": 0.06064343714667489, "grad_norm": 0.3860291838645935, "learning_rate": 6.0616438356164386e-05, "loss": 0.8287, "step": 295 }, { "epoch": 0.06084900812005345, "grad_norm": 0.2607959806919098, "learning_rate": 6.082191780821918e-05, "loss": 0.617, "step": 296 }, { "epoch": 0.06105457909343201, "grad_norm": 0.494211882352829, "learning_rate": 6.102739726027398e-05, "loss": 0.8062, "step": 297 }, { "epoch": 0.06126015006681056, "grad_norm": 0.37032371759414673, "learning_rate": 6.123287671232878e-05, "loss": 0.7842, "step": 298 }, { "epoch": 0.061465721040189124, "grad_norm": 0.3706514835357666, "learning_rate": 6.143835616438357e-05, "loss": 0.8076, "step": 299 }, { "epoch": 0.061671292013567684, "grad_norm": 0.41590166091918945, "learning_rate": 6.164383561643835e-05, "loss": 0.8142, "step": 300 }, { "epoch": 0.061876862986946245, "grad_norm": 0.4085366129875183, "learning_rate": 6.184931506849316e-05, "loss": 0.8583, "step": 301 }, { "epoch": 0.062082433960324805, "grad_norm": 0.3671876788139343, "learning_rate": 6.205479452054794e-05, "loss": 0.7891, "step": 302 }, { "epoch": 0.06228800493370336, "grad_norm": 0.39252158999443054, "learning_rate": 6.226027397260275e-05, "loss": 0.8023, "step": 303 }, { "epoch": 0.06249357590708192, "grad_norm": 0.35324522852897644, "learning_rate": 6.246575342465753e-05, "loss": 0.7921, "step": 304 }, { "epoch": 0.06269914688046048, "grad_norm": 0.28854769468307495, "learning_rate": 6.267123287671234e-05, "loss": 0.6309, "step": 305 }, { "epoch": 0.06290471785383904, "grad_norm": 0.48670095205307007, "learning_rate": 6.287671232876712e-05, "loss": 0.7814, "step": 306 }, { "epoch": 0.0631102888272176, "grad_norm": 0.3746386170387268, "learning_rate": 6.308219178082193e-05, "loss": 0.8142, "step": 307 }, { "epoch": 0.06331585980059616, "grad_norm": 0.42179784178733826, "learning_rate": 6.328767123287671e-05, "loss": 0.8312, "step": 308 }, { "epoch": 0.06352143077397472, "grad_norm": 0.37425556778907776, "learning_rate": 6.349315068493152e-05, "loss": 0.8397, "step": 309 }, { "epoch": 0.06372700174735327, "grad_norm": 0.42048847675323486, "learning_rate": 6.36986301369863e-05, "loss": 0.7864, "step": 310 }, { "epoch": 0.06393257272073183, "grad_norm": 0.34095990657806396, "learning_rate": 6.390410958904109e-05, "loss": 0.8275, "step": 311 }, { "epoch": 0.06413814369411039, "grad_norm": 0.3992113769054413, "learning_rate": 6.41095890410959e-05, "loss": 0.8037, "step": 312 }, { "epoch": 0.06434371466748895, "grad_norm": 0.3752027451992035, "learning_rate": 6.43150684931507e-05, "loss": 0.8096, "step": 313 }, { "epoch": 0.06454928564086751, "grad_norm": 0.3788531422615051, "learning_rate": 6.452054794520548e-05, "loss": 0.8148, "step": 314 }, { "epoch": 0.06475485661424607, "grad_norm": 0.34858015179634094, "learning_rate": 6.472602739726027e-05, "loss": 0.7865, "step": 315 }, { "epoch": 0.06496042758762463, "grad_norm": 0.3562847375869751, "learning_rate": 6.493150684931507e-05, "loss": 0.7953, "step": 316 }, { "epoch": 0.06516599856100319, "grad_norm": 0.3146650493144989, "learning_rate": 6.513698630136988e-05, "loss": 0.5924, "step": 317 }, { "epoch": 0.06537156953438175, "grad_norm": 0.21578195691108704, "learning_rate": 6.534246575342466e-05, "loss": 0.6165, "step": 318 }, { "epoch": 0.0655771405077603, "grad_norm": 0.19480906426906586, "learning_rate": 6.554794520547945e-05, "loss": 0.6254, "step": 319 }, { "epoch": 0.06578271148113886, "grad_norm": 0.8668273091316223, "learning_rate": 6.575342465753425e-05, "loss": 0.8364, "step": 320 }, { "epoch": 0.06598828245451742, "grad_norm": 0.5889570116996765, "learning_rate": 6.595890410958906e-05, "loss": 0.8205, "step": 321 }, { "epoch": 0.06619385342789598, "grad_norm": 0.3477165102958679, "learning_rate": 6.616438356164384e-05, "loss": 0.6104, "step": 322 }, { "epoch": 0.06639942440127454, "grad_norm": 1.1917229890823364, "learning_rate": 6.636986301369863e-05, "loss": 0.8402, "step": 323 }, { "epoch": 0.0666049953746531, "grad_norm": 0.5916200876235962, "learning_rate": 6.657534246575342e-05, "loss": 0.8265, "step": 324 }, { "epoch": 0.06681056634803166, "grad_norm": 0.6326993107795715, "learning_rate": 6.678082191780822e-05, "loss": 0.822, "step": 325 }, { "epoch": 0.06701613732141022, "grad_norm": 0.545361340045929, "learning_rate": 6.698630136986302e-05, "loss": 0.8369, "step": 326 }, { "epoch": 0.06722170829478878, "grad_norm": 0.5392776727676392, "learning_rate": 6.719178082191781e-05, "loss": 0.8009, "step": 327 }, { "epoch": 0.06742727926816733, "grad_norm": 0.2618131637573242, "learning_rate": 6.73972602739726e-05, "loss": 0.6182, "step": 328 }, { "epoch": 0.06763285024154589, "grad_norm": 0.6088753342628479, "learning_rate": 6.76027397260274e-05, "loss": 0.8189, "step": 329 }, { "epoch": 0.06783842121492445, "grad_norm": 0.5107940435409546, "learning_rate": 6.78082191780822e-05, "loss": 0.8304, "step": 330 }, { "epoch": 0.06804399218830301, "grad_norm": 0.38624778389930725, "learning_rate": 6.801369863013699e-05, "loss": 0.8361, "step": 331 }, { "epoch": 0.06824956316168157, "grad_norm": 0.41758957505226135, "learning_rate": 6.821917808219178e-05, "loss": 0.7881, "step": 332 }, { "epoch": 0.06845513413506013, "grad_norm": 0.41675320267677307, "learning_rate": 6.842465753424658e-05, "loss": 0.8297, "step": 333 }, { "epoch": 0.06866070510843869, "grad_norm": 0.3944019079208374, "learning_rate": 6.863013698630137e-05, "loss": 0.8154, "step": 334 }, { "epoch": 0.06886627608181725, "grad_norm": 0.3403918743133545, "learning_rate": 6.883561643835617e-05, "loss": 0.6183, "step": 335 }, { "epoch": 0.06907184705519581, "grad_norm": 0.5603693127632141, "learning_rate": 6.904109589041096e-05, "loss": 0.8398, "step": 336 }, { "epoch": 0.06927741802857436, "grad_norm": 0.3981553912162781, "learning_rate": 6.924657534246576e-05, "loss": 0.8122, "step": 337 }, { "epoch": 0.06948298900195292, "grad_norm": 0.4603327214717865, "learning_rate": 6.945205479452055e-05, "loss": 0.8305, "step": 338 }, { "epoch": 0.06968855997533148, "grad_norm": 0.43689751625061035, "learning_rate": 6.965753424657535e-05, "loss": 0.828, "step": 339 }, { "epoch": 0.06989413094871004, "grad_norm": 0.41511690616607666, "learning_rate": 6.986301369863014e-05, "loss": 0.7844, "step": 340 }, { "epoch": 0.0700997019220886, "grad_norm": 0.3534780740737915, "learning_rate": 7.006849315068494e-05, "loss": 0.7882, "step": 341 }, { "epoch": 0.07030527289546716, "grad_norm": 0.33764714002609253, "learning_rate": 7.027397260273973e-05, "loss": 0.6009, "step": 342 }, { "epoch": 0.07051084386884572, "grad_norm": 0.4741517901420593, "learning_rate": 7.047945205479452e-05, "loss": 0.7903, "step": 343 }, { "epoch": 0.07071641484222428, "grad_norm": 0.19411741197109222, "learning_rate": 7.068493150684932e-05, "loss": 0.6019, "step": 344 }, { "epoch": 0.07092198581560284, "grad_norm": 0.2023278921842575, "learning_rate": 7.089041095890412e-05, "loss": 0.6041, "step": 345 }, { "epoch": 0.0711275567889814, "grad_norm": 0.18110667169094086, "learning_rate": 7.109589041095891e-05, "loss": 0.6082, "step": 346 }, { "epoch": 0.07133312776235995, "grad_norm": 0.6595879197120667, "learning_rate": 7.13013698630137e-05, "loss": 0.8487, "step": 347 }, { "epoch": 0.07153869873573851, "grad_norm": 0.3792790472507477, "learning_rate": 7.15068493150685e-05, "loss": 0.8155, "step": 348 }, { "epoch": 0.07174426970911707, "grad_norm": 0.553161084651947, "learning_rate": 7.17123287671233e-05, "loss": 0.8172, "step": 349 }, { "epoch": 0.07194984068249563, "grad_norm": 0.3672430217266083, "learning_rate": 7.191780821917809e-05, "loss": 0.7855, "step": 350 }, { "epoch": 0.07215541165587419, "grad_norm": 0.5036430358886719, "learning_rate": 7.212328767123288e-05, "loss": 0.8164, "step": 351 }, { "epoch": 0.07236098262925275, "grad_norm": 0.3772536814212799, "learning_rate": 7.232876712328767e-05, "loss": 0.7894, "step": 352 }, { "epoch": 0.07256655360263131, "grad_norm": 0.37201905250549316, "learning_rate": 7.253424657534247e-05, "loss": 0.8306, "step": 353 }, { "epoch": 0.07277212457600987, "grad_norm": 0.4128398597240448, "learning_rate": 7.273972602739727e-05, "loss": 0.8272, "step": 354 }, { "epoch": 0.07297769554938843, "grad_norm": 0.3522986173629761, "learning_rate": 7.294520547945206e-05, "loss": 0.8075, "step": 355 }, { "epoch": 0.07318326652276698, "grad_norm": 0.3743478059768677, "learning_rate": 7.315068493150685e-05, "loss": 0.8188, "step": 356 }, { "epoch": 0.07338883749614554, "grad_norm": 0.4586912989616394, "learning_rate": 7.335616438356165e-05, "loss": 0.6061, "step": 357 }, { "epoch": 0.0735944084695241, "grad_norm": 0.21246209740638733, "learning_rate": 7.356164383561645e-05, "loss": 0.6243, "step": 358 }, { "epoch": 0.07379997944290266, "grad_norm": 0.5889565944671631, "learning_rate": 7.376712328767124e-05, "loss": 0.8188, "step": 359 }, { "epoch": 0.07400555041628122, "grad_norm": 0.37973251938819885, "learning_rate": 7.397260273972603e-05, "loss": 0.8092, "step": 360 }, { "epoch": 0.07421112138965978, "grad_norm": 0.45936939120292664, "learning_rate": 7.417808219178083e-05, "loss": 0.6085, "step": 361 }, { "epoch": 0.07441669236303834, "grad_norm": 0.33185017108917236, "learning_rate": 7.438356164383562e-05, "loss": 0.5758, "step": 362 }, { "epoch": 0.0746222633364169, "grad_norm": 0.7869192361831665, "learning_rate": 7.458904109589042e-05, "loss": 0.8316, "step": 363 }, { "epoch": 0.07482783430979546, "grad_norm": 0.5039427876472473, "learning_rate": 7.47945205479452e-05, "loss": 0.8197, "step": 364 }, { "epoch": 0.07503340528317401, "grad_norm": 0.4809415340423584, "learning_rate": 7.500000000000001e-05, "loss": 0.8023, "step": 365 }, { "epoch": 0.07523897625655257, "grad_norm": 0.5067195296287537, "learning_rate": 7.52054794520548e-05, "loss": 0.8258, "step": 366 }, { "epoch": 0.07544454722993113, "grad_norm": 0.44106048345565796, "learning_rate": 7.54109589041096e-05, "loss": 0.8063, "step": 367 }, { "epoch": 0.07565011820330969, "grad_norm": 0.40639805793762207, "learning_rate": 7.561643835616439e-05, "loss": 0.8315, "step": 368 }, { "epoch": 0.07585568917668825, "grad_norm": 0.44400423765182495, "learning_rate": 7.582191780821919e-05, "loss": 0.8053, "step": 369 }, { "epoch": 0.07606126015006681, "grad_norm": 0.3997926414012909, "learning_rate": 7.602739726027398e-05, "loss": 0.8118, "step": 370 }, { "epoch": 0.07626683112344537, "grad_norm": 0.36897820234298706, "learning_rate": 7.623287671232878e-05, "loss": 0.8377, "step": 371 }, { "epoch": 0.07647240209682393, "grad_norm": 0.40449821949005127, "learning_rate": 7.643835616438356e-05, "loss": 0.8115, "step": 372 }, { "epoch": 0.0766779730702025, "grad_norm": 0.39014002680778503, "learning_rate": 7.664383561643837e-05, "loss": 0.8149, "step": 373 }, { "epoch": 0.07688354404358104, "grad_norm": 0.3730955421924591, "learning_rate": 7.684931506849315e-05, "loss": 0.8019, "step": 374 }, { "epoch": 0.0770891150169596, "grad_norm": 0.36292803287506104, "learning_rate": 7.705479452054794e-05, "loss": 0.8305, "step": 375 }, { "epoch": 0.07729468599033816, "grad_norm": 0.8635247349739075, "learning_rate": 7.726027397260274e-05, "loss": 0.6601, "step": 376 }, { "epoch": 0.07750025696371672, "grad_norm": 0.4957028925418854, "learning_rate": 7.746575342465755e-05, "loss": 0.8365, "step": 377 }, { "epoch": 0.07770582793709528, "grad_norm": 0.400206983089447, "learning_rate": 7.767123287671233e-05, "loss": 0.8128, "step": 378 }, { "epoch": 0.07791139891047384, "grad_norm": 0.3647255301475525, "learning_rate": 7.787671232876712e-05, "loss": 0.7968, "step": 379 }, { "epoch": 0.0781169698838524, "grad_norm": 0.39965569972991943, "learning_rate": 7.808219178082192e-05, "loss": 0.8015, "step": 380 }, { "epoch": 0.07832254085723096, "grad_norm": 0.3467910885810852, "learning_rate": 7.828767123287673e-05, "loss": 0.7904, "step": 381 }, { "epoch": 0.07852811183060952, "grad_norm": 0.33436062932014465, "learning_rate": 7.849315068493151e-05, "loss": 0.7647, "step": 382 }, { "epoch": 0.07873368280398808, "grad_norm": 0.3548223376274109, "learning_rate": 7.86986301369863e-05, "loss": 0.7939, "step": 383 }, { "epoch": 0.07893925377736663, "grad_norm": 0.7502946853637695, "learning_rate": 7.890410958904109e-05, "loss": 0.6747, "step": 384 }, { "epoch": 0.07914482475074519, "grad_norm": 0.3931428790092468, "learning_rate": 7.910958904109589e-05, "loss": 0.8237, "step": 385 }, { "epoch": 0.07935039572412375, "grad_norm": 0.30833980441093445, "learning_rate": 7.93150684931507e-05, "loss": 0.64, "step": 386 }, { "epoch": 0.07955596669750231, "grad_norm": 0.43092408776283264, "learning_rate": 7.952054794520548e-05, "loss": 0.8138, "step": 387 }, { "epoch": 0.07976153767088087, "grad_norm": 0.26460933685302734, "learning_rate": 7.972602739726027e-05, "loss": 0.6153, "step": 388 }, { "epoch": 0.07996710864425943, "grad_norm": 0.4149387776851654, "learning_rate": 7.993150684931507e-05, "loss": 0.7809, "step": 389 }, { "epoch": 0.08017267961763799, "grad_norm": 0.35397103428840637, "learning_rate": 8.013698630136987e-05, "loss": 0.8249, "step": 390 }, { "epoch": 0.08037825059101655, "grad_norm": 0.34258702397346497, "learning_rate": 8.034246575342466e-05, "loss": 0.8259, "step": 391 }, { "epoch": 0.08058382156439511, "grad_norm": 0.3488398790359497, "learning_rate": 8.054794520547945e-05, "loss": 0.7772, "step": 392 }, { "epoch": 0.08078939253777366, "grad_norm": 0.3264416456222534, "learning_rate": 8.075342465753425e-05, "loss": 0.7751, "step": 393 }, { "epoch": 0.08099496351115222, "grad_norm": 0.3270927965641022, "learning_rate": 8.095890410958904e-05, "loss": 0.7992, "step": 394 }, { "epoch": 0.08120053448453078, "grad_norm": 0.2641488313674927, "learning_rate": 8.116438356164384e-05, "loss": 0.6224, "step": 395 }, { "epoch": 0.08140610545790934, "grad_norm": 0.3740901052951813, "learning_rate": 8.136986301369863e-05, "loss": 0.8118, "step": 396 }, { "epoch": 0.0816116764312879, "grad_norm": 0.328571081161499, "learning_rate": 8.157534246575343e-05, "loss": 0.7969, "step": 397 }, { "epoch": 0.08181724740466646, "grad_norm": 0.2278534322977066, "learning_rate": 8.178082191780822e-05, "loss": 0.6215, "step": 398 }, { "epoch": 0.08202281837804502, "grad_norm": 0.3593691885471344, "learning_rate": 8.198630136986302e-05, "loss": 0.7949, "step": 399 }, { "epoch": 0.08222838935142358, "grad_norm": 0.3530971109867096, "learning_rate": 8.219178082191781e-05, "loss": 0.8042, "step": 400 }, { "epoch": 0.08243396032480214, "grad_norm": 0.17606891691684723, "learning_rate": 8.239726027397261e-05, "loss": 0.638, "step": 401 }, { "epoch": 0.08263953129818069, "grad_norm": 0.1690833419561386, "learning_rate": 8.26027397260274e-05, "loss": 0.5895, "step": 402 }, { "epoch": 0.08284510227155925, "grad_norm": 0.17045153677463531, "learning_rate": 8.280821917808219e-05, "loss": 0.5924, "step": 403 }, { "epoch": 0.08305067324493781, "grad_norm": 0.5894138813018799, "learning_rate": 8.301369863013699e-05, "loss": 0.8156, "step": 404 }, { "epoch": 0.08325624421831637, "grad_norm": 0.3428020477294922, "learning_rate": 8.321917808219179e-05, "loss": 0.8131, "step": 405 }, { "epoch": 0.08346181519169493, "grad_norm": 0.4333934783935547, "learning_rate": 8.342465753424658e-05, "loss": 0.8106, "step": 406 }, { "epoch": 0.08366738616507349, "grad_norm": 0.4093782901763916, "learning_rate": 8.363013698630137e-05, "loss": 0.8158, "step": 407 }, { "epoch": 0.08387295713845205, "grad_norm": 0.3554767668247223, "learning_rate": 8.383561643835617e-05, "loss": 0.805, "step": 408 }, { "epoch": 0.08407852811183061, "grad_norm": 0.35396429896354675, "learning_rate": 8.404109589041097e-05, "loss": 0.787, "step": 409 }, { "epoch": 0.08428409908520917, "grad_norm": 0.36389169096946716, "learning_rate": 8.424657534246576e-05, "loss": 0.8378, "step": 410 }, { "epoch": 0.08448967005858772, "grad_norm": 0.3563280999660492, "learning_rate": 8.445205479452055e-05, "loss": 0.7844, "step": 411 }, { "epoch": 0.08469524103196628, "grad_norm": 0.340190589427948, "learning_rate": 8.465753424657534e-05, "loss": 0.8288, "step": 412 }, { "epoch": 0.08490081200534484, "grad_norm": 0.2419368475675583, "learning_rate": 8.486301369863015e-05, "loss": 0.6281, "step": 413 }, { "epoch": 0.0851063829787234, "grad_norm": 0.37181293964385986, "learning_rate": 8.506849315068494e-05, "loss": 0.8016, "step": 414 }, { "epoch": 0.08531195395210196, "grad_norm": 0.34155288338661194, "learning_rate": 8.527397260273973e-05, "loss": 0.7963, "step": 415 }, { "epoch": 0.08551752492548052, "grad_norm": 0.3259139358997345, "learning_rate": 8.547945205479452e-05, "loss": 0.8013, "step": 416 }, { "epoch": 0.08572309589885908, "grad_norm": 0.3541535437107086, "learning_rate": 8.568493150684932e-05, "loss": 0.7988, "step": 417 }, { "epoch": 0.08592866687223764, "grad_norm": 0.20659230649471283, "learning_rate": 8.589041095890412e-05, "loss": 0.6026, "step": 418 }, { "epoch": 0.0861342378456162, "grad_norm": 0.1695416420698166, "learning_rate": 8.609589041095891e-05, "loss": 0.5905, "step": 419 }, { "epoch": 0.08633980881899475, "grad_norm": 0.48443859815597534, "learning_rate": 8.63013698630137e-05, "loss": 0.8179, "step": 420 }, { "epoch": 0.08654537979237331, "grad_norm": 0.33505165576934814, "learning_rate": 8.65068493150685e-05, "loss": 0.7979, "step": 421 }, { "epoch": 0.08675095076575187, "grad_norm": 0.19388127326965332, "learning_rate": 8.67123287671233e-05, "loss": 0.6141, "step": 422 }, { "epoch": 0.08695652173913043, "grad_norm": 0.19659045338630676, "learning_rate": 8.691780821917809e-05, "loss": 0.5968, "step": 423 }, { "epoch": 0.08716209271250899, "grad_norm": 0.5674632787704468, "learning_rate": 8.712328767123288e-05, "loss": 0.8258, "step": 424 }, { "epoch": 0.08736766368588755, "grad_norm": 0.17561140656471252, "learning_rate": 8.732876712328768e-05, "loss": 0.5972, "step": 425 }, { "epoch": 0.08757323465926611, "grad_norm": 0.48669886589050293, "learning_rate": 8.753424657534247e-05, "loss": 0.7975, "step": 426 }, { "epoch": 0.08777880563264467, "grad_norm": 0.3487796187400818, "learning_rate": 8.773972602739727e-05, "loss": 0.7713, "step": 427 }, { "epoch": 0.08798437660602323, "grad_norm": 0.3712750971317291, "learning_rate": 8.794520547945206e-05, "loss": 0.7665, "step": 428 }, { "epoch": 0.0881899475794018, "grad_norm": 0.23141850531101227, "learning_rate": 8.815068493150686e-05, "loss": 0.6171, "step": 429 }, { "epoch": 0.08839551855278034, "grad_norm": 0.43884536623954773, "learning_rate": 8.835616438356165e-05, "loss": 0.7922, "step": 430 }, { "epoch": 0.0886010895261589, "grad_norm": 0.17824266850948334, "learning_rate": 8.856164383561645e-05, "loss": 0.616, "step": 431 }, { "epoch": 0.08880666049953746, "grad_norm": 0.4101521670818329, "learning_rate": 8.876712328767124e-05, "loss": 0.8083, "step": 432 }, { "epoch": 0.08901223147291602, "grad_norm": 0.3446323275566101, "learning_rate": 8.897260273972604e-05, "loss": 0.813, "step": 433 }, { "epoch": 0.08921780244629458, "grad_norm": 0.17695310711860657, "learning_rate": 8.917808219178083e-05, "loss": 0.5855, "step": 434 }, { "epoch": 0.08942337341967314, "grad_norm": 0.41505882143974304, "learning_rate": 8.938356164383561e-05, "loss": 0.7966, "step": 435 }, { "epoch": 0.0896289443930517, "grad_norm": 0.3373473286628723, "learning_rate": 8.958904109589042e-05, "loss": 0.7981, "step": 436 }, { "epoch": 0.08983451536643026, "grad_norm": 0.1881159394979477, "learning_rate": 8.979452054794522e-05, "loss": 0.5907, "step": 437 }, { "epoch": 0.09004008633980883, "grad_norm": 0.570391058921814, "learning_rate": 9e-05, "loss": 0.8141, "step": 438 }, { "epoch": 0.09024565731318737, "grad_norm": 0.34099552035331726, "learning_rate": 8.999999889153016e-05, "loss": 0.7716, "step": 439 }, { "epoch": 0.09045122828656593, "grad_norm": 0.4682377576828003, "learning_rate": 8.999999556612072e-05, "loss": 0.8084, "step": 440 }, { "epoch": 0.09065679925994449, "grad_norm": 0.36160755157470703, "learning_rate": 8.999999002377183e-05, "loss": 0.7883, "step": 441 }, { "epoch": 0.09086237023332305, "grad_norm": 0.42005038261413574, "learning_rate": 8.999998226448373e-05, "loss": 0.794, "step": 442 }, { "epoch": 0.09106794120670161, "grad_norm": 0.32100972533226013, "learning_rate": 8.999997228825685e-05, "loss": 0.7767, "step": 443 }, { "epoch": 0.09127351218008017, "grad_norm": 0.35609909892082214, "learning_rate": 8.999996009509166e-05, "loss": 0.7735, "step": 444 }, { "epoch": 0.09147908315345873, "grad_norm": 0.3225650191307068, "learning_rate": 8.999994568498878e-05, "loss": 0.7805, "step": 445 }, { "epoch": 0.0916846541268373, "grad_norm": 0.5321671962738037, "learning_rate": 8.999992905794889e-05, "loss": 0.8085, "step": 446 }, { "epoch": 0.09189022510021586, "grad_norm": 0.22884899377822876, "learning_rate": 8.999991021397283e-05, "loss": 0.6043, "step": 447 }, { "epoch": 0.0920957960735944, "grad_norm": 0.4308418333530426, "learning_rate": 8.999988915306154e-05, "loss": 0.7922, "step": 448 }, { "epoch": 0.09230136704697296, "grad_norm": 0.33842045068740845, "learning_rate": 8.999986587521601e-05, "loss": 0.8081, "step": 449 }, { "epoch": 0.09250693802035152, "grad_norm": 0.18722039461135864, "learning_rate": 8.999984038043744e-05, "loss": 0.5795, "step": 450 }, { "epoch": 0.09271250899373008, "grad_norm": 0.4215300679206848, "learning_rate": 8.999981266872705e-05, "loss": 0.7982, "step": 451 }, { "epoch": 0.09291807996710864, "grad_norm": 0.16856899857521057, "learning_rate": 8.999978274008622e-05, "loss": 0.5915, "step": 452 }, { "epoch": 0.0931236509404872, "grad_norm": 0.40007540583610535, "learning_rate": 8.999975059451644e-05, "loss": 0.7934, "step": 453 }, { "epoch": 0.09332922191386576, "grad_norm": 0.3234069049358368, "learning_rate": 8.999971623201925e-05, "loss": 0.7963, "step": 454 }, { "epoch": 0.09353479288724433, "grad_norm": 0.33642691373825073, "learning_rate": 8.999967965259639e-05, "loss": 0.7909, "step": 455 }, { "epoch": 0.09374036386062289, "grad_norm": 0.33508196473121643, "learning_rate": 8.999964085624962e-05, "loss": 0.7777, "step": 456 }, { "epoch": 0.09394593483400143, "grad_norm": 0.2953488826751709, "learning_rate": 8.999959984298089e-05, "loss": 0.7596, "step": 457 }, { "epoch": 0.09415150580737999, "grad_norm": 0.32082295417785645, "learning_rate": 8.99995566127922e-05, "loss": 0.7774, "step": 458 }, { "epoch": 0.09435707678075855, "grad_norm": 0.31374961137771606, "learning_rate": 8.999951116568568e-05, "loss": 0.7898, "step": 459 }, { "epoch": 0.09456264775413711, "grad_norm": 0.29701462388038635, "learning_rate": 8.999946350166357e-05, "loss": 0.7725, "step": 460 }, { "epoch": 0.09476821872751567, "grad_norm": 0.3302834630012512, "learning_rate": 8.999941362072822e-05, "loss": 0.7727, "step": 461 }, { "epoch": 0.09497378970089423, "grad_norm": 0.28933510184288025, "learning_rate": 8.99993615228821e-05, "loss": 0.8082, "step": 462 }, { "epoch": 0.0951793606742728, "grad_norm": 0.28469645977020264, "learning_rate": 8.999930720812776e-05, "loss": 0.78, "step": 463 }, { "epoch": 0.09538493164765136, "grad_norm": 0.30801114439964294, "learning_rate": 8.999925067646787e-05, "loss": 0.8154, "step": 464 }, { "epoch": 0.09559050262102992, "grad_norm": 0.28879374265670776, "learning_rate": 8.999919192790524e-05, "loss": 0.6174, "step": 465 }, { "epoch": 0.09579607359440848, "grad_norm": 0.35134953260421753, "learning_rate": 8.999913096244273e-05, "loss": 0.7819, "step": 466 }, { "epoch": 0.09600164456778702, "grad_norm": 0.31098031997680664, "learning_rate": 8.999906778008339e-05, "loss": 0.7876, "step": 467 }, { "epoch": 0.09620721554116558, "grad_norm": 0.31209641695022583, "learning_rate": 8.999900238083028e-05, "loss": 0.823, "step": 468 }, { "epoch": 0.09641278651454414, "grad_norm": 0.3438270688056946, "learning_rate": 8.999893476468666e-05, "loss": 0.7994, "step": 469 }, { "epoch": 0.0966183574879227, "grad_norm": 0.303815096616745, "learning_rate": 8.999886493165584e-05, "loss": 0.8183, "step": 470 }, { "epoch": 0.09682392846130126, "grad_norm": 0.31640782952308655, "learning_rate": 8.999879288174128e-05, "loss": 0.7947, "step": 471 }, { "epoch": 0.09702949943467983, "grad_norm": 0.31044483184814453, "learning_rate": 8.999871861494651e-05, "loss": 0.7867, "step": 472 }, { "epoch": 0.09723507040805839, "grad_norm": 0.3066295385360718, "learning_rate": 8.999864213127521e-05, "loss": 0.782, "step": 473 }, { "epoch": 0.09744064138143695, "grad_norm": 0.32025477290153503, "learning_rate": 8.999856343073111e-05, "loss": 0.7756, "step": 474 }, { "epoch": 0.0976462123548155, "grad_norm": 0.3043205440044403, "learning_rate": 8.999848251331813e-05, "loss": 0.8049, "step": 475 }, { "epoch": 0.09785178332819405, "grad_norm": 0.3142707943916321, "learning_rate": 8.999839937904024e-05, "loss": 0.7967, "step": 476 }, { "epoch": 0.09805735430157261, "grad_norm": 0.2932131886482239, "learning_rate": 8.999831402790153e-05, "loss": 0.8031, "step": 477 }, { "epoch": 0.09826292527495117, "grad_norm": 0.30467313528060913, "learning_rate": 8.999822645990621e-05, "loss": 0.7804, "step": 478 }, { "epoch": 0.09846849624832973, "grad_norm": 0.2950557768344879, "learning_rate": 8.99981366750586e-05, "loss": 0.8004, "step": 479 }, { "epoch": 0.0986740672217083, "grad_norm": 0.2995617091655731, "learning_rate": 8.99980446733631e-05, "loss": 0.8044, "step": 480 }, { "epoch": 0.09887963819508686, "grad_norm": 0.29080766439437866, "learning_rate": 8.999795045482429e-05, "loss": 0.7603, "step": 481 }, { "epoch": 0.09908520916846542, "grad_norm": 0.29487237334251404, "learning_rate": 8.999785401944675e-05, "loss": 0.8036, "step": 482 }, { "epoch": 0.09929078014184398, "grad_norm": 0.30198103189468384, "learning_rate": 8.999775536723527e-05, "loss": 0.7993, "step": 483 }, { "epoch": 0.09949635111522254, "grad_norm": 0.30626240372657776, "learning_rate": 8.999765449819471e-05, "loss": 0.7928, "step": 484 }, { "epoch": 0.09970192208860108, "grad_norm": 0.3268794119358063, "learning_rate": 8.999755141233002e-05, "loss": 0.7797, "step": 485 }, { "epoch": 0.09990749306197964, "grad_norm": 0.41261476278305054, "learning_rate": 8.99974461096463e-05, "loss": 0.628, "step": 486 }, { "epoch": 0.1001130640353582, "grad_norm": 0.2068365067243576, "learning_rate": 8.999733859014873e-05, "loss": 0.6014, "step": 487 }, { "epoch": 0.10031863500873676, "grad_norm": 0.6694285869598389, "learning_rate": 8.99972288538426e-05, "loss": 0.8168, "step": 488 }, { "epoch": 0.10052420598211532, "grad_norm": 0.3849710524082184, "learning_rate": 8.999711690073331e-05, "loss": 0.7958, "step": 489 }, { "epoch": 0.10072977695549389, "grad_norm": 0.4657621383666992, "learning_rate": 8.99970027308264e-05, "loss": 0.7877, "step": 490 }, { "epoch": 0.10093534792887245, "grad_norm": 0.3709288537502289, "learning_rate": 8.999688634412747e-05, "loss": 0.781, "step": 491 }, { "epoch": 0.101140918902251, "grad_norm": 0.3850356340408325, "learning_rate": 8.999676774064228e-05, "loss": 0.7822, "step": 492 }, { "epoch": 0.10134648987562957, "grad_norm": 0.32711490988731384, "learning_rate": 8.999664692037665e-05, "loss": 0.7903, "step": 493 }, { "epoch": 0.10155206084900811, "grad_norm": 0.35332190990448, "learning_rate": 8.999652388333654e-05, "loss": 0.7746, "step": 494 }, { "epoch": 0.10175763182238667, "grad_norm": 0.6354550719261169, "learning_rate": 8.999639862952801e-05, "loss": 0.6377, "step": 495 }, { "epoch": 0.10196320279576523, "grad_norm": 0.4530143737792969, "learning_rate": 8.999627115895724e-05, "loss": 0.8012, "step": 496 }, { "epoch": 0.1021687737691438, "grad_norm": 0.38917437195777893, "learning_rate": 8.99961414716305e-05, "loss": 0.7772, "step": 497 }, { "epoch": 0.10237434474252236, "grad_norm": 0.3817954361438751, "learning_rate": 8.999600956755417e-05, "loss": 0.769, "step": 498 }, { "epoch": 0.10257991571590092, "grad_norm": 0.3404269814491272, "learning_rate": 8.999587544673475e-05, "loss": 0.7832, "step": 499 }, { "epoch": 0.10278548668927948, "grad_norm": 0.29421180486679077, "learning_rate": 8.99957391091789e-05, "loss": 0.6173, "step": 500 }, { "epoch": 0.10299105766265804, "grad_norm": 0.4653105139732361, "learning_rate": 8.999560055489324e-05, "loss": 0.7835, "step": 501 }, { "epoch": 0.1031966286360366, "grad_norm": 0.3839401304721832, "learning_rate": 8.99954597838847e-05, "loss": 0.7978, "step": 502 }, { "epoch": 0.10340219960941516, "grad_norm": 0.3156857192516327, "learning_rate": 8.999531679616013e-05, "loss": 0.7589, "step": 503 }, { "epoch": 0.1036077705827937, "grad_norm": 0.3422304391860962, "learning_rate": 8.999517159172662e-05, "loss": 0.7809, "step": 504 }, { "epoch": 0.10381334155617226, "grad_norm": 0.340270072221756, "learning_rate": 8.999502417059132e-05, "loss": 0.7981, "step": 505 }, { "epoch": 0.10401891252955082, "grad_norm": 0.30371013283729553, "learning_rate": 8.999487453276148e-05, "loss": 0.7967, "step": 506 }, { "epoch": 0.10422448350292939, "grad_norm": 0.2999022901058197, "learning_rate": 8.999472267824447e-05, "loss": 0.7964, "step": 507 }, { "epoch": 0.10443005447630795, "grad_norm": 0.3306732475757599, "learning_rate": 8.999456860704778e-05, "loss": 0.7903, "step": 508 }, { "epoch": 0.1046356254496865, "grad_norm": 0.3183232843875885, "learning_rate": 8.999441231917901e-05, "loss": 0.7773, "step": 509 }, { "epoch": 0.10484119642306507, "grad_norm": 0.29510068893432617, "learning_rate": 8.999425381464582e-05, "loss": 0.7812, "step": 510 }, { "epoch": 0.10504676739644363, "grad_norm": 0.30512964725494385, "learning_rate": 8.999409309345609e-05, "loss": 0.8054, "step": 511 }, { "epoch": 0.10525233836982219, "grad_norm": 0.30337393283843994, "learning_rate": 8.999393015561767e-05, "loss": 0.767, "step": 512 }, { "epoch": 0.10545790934320073, "grad_norm": 0.32128670811653137, "learning_rate": 8.999376500113861e-05, "loss": 0.7576, "step": 513 }, { "epoch": 0.1056634803165793, "grad_norm": 0.22419625520706177, "learning_rate": 8.999359763002704e-05, "loss": 0.6232, "step": 514 }, { "epoch": 0.10586905128995786, "grad_norm": 0.35744601488113403, "learning_rate": 8.999342804229125e-05, "loss": 0.7999, "step": 515 }, { "epoch": 0.10607462226333642, "grad_norm": 0.31676504015922546, "learning_rate": 8.999325623793952e-05, "loss": 0.7892, "step": 516 }, { "epoch": 0.10628019323671498, "grad_norm": 0.3098521828651428, "learning_rate": 8.999308221698038e-05, "loss": 0.7892, "step": 517 }, { "epoch": 0.10648576421009354, "grad_norm": 0.32372260093688965, "learning_rate": 8.999290597942237e-05, "loss": 0.7697, "step": 518 }, { "epoch": 0.1066913351834721, "grad_norm": 0.3482767343521118, "learning_rate": 8.999272752527417e-05, "loss": 0.8299, "step": 519 }, { "epoch": 0.10689690615685066, "grad_norm": 0.17404678463935852, "learning_rate": 8.999254685454459e-05, "loss": 0.5814, "step": 520 }, { "epoch": 0.10710247713022922, "grad_norm": 0.36048364639282227, "learning_rate": 8.999236396724252e-05, "loss": 0.7881, "step": 521 }, { "epoch": 0.10730804810360776, "grad_norm": 0.30838942527770996, "learning_rate": 8.999217886337696e-05, "loss": 0.7818, "step": 522 }, { "epoch": 0.10751361907698632, "grad_norm": 0.3079747259616852, "learning_rate": 8.999199154295705e-05, "loss": 0.7732, "step": 523 }, { "epoch": 0.10771919005036489, "grad_norm": 0.3467218577861786, "learning_rate": 8.9991802005992e-05, "loss": 0.7969, "step": 524 }, { "epoch": 0.10792476102374345, "grad_norm": 0.29866865277290344, "learning_rate": 8.999161025249117e-05, "loss": 0.7996, "step": 525 }, { "epoch": 0.108130331997122, "grad_norm": 0.17642079293727875, "learning_rate": 8.999141628246398e-05, "loss": 0.5753, "step": 526 }, { "epoch": 0.10833590297050057, "grad_norm": 0.3251280188560486, "learning_rate": 8.999122009592002e-05, "loss": 0.7962, "step": 527 }, { "epoch": 0.10854147394387913, "grad_norm": 0.316807359457016, "learning_rate": 8.999102169286891e-05, "loss": 0.7592, "step": 528 }, { "epoch": 0.10874704491725769, "grad_norm": 0.16698336601257324, "learning_rate": 8.999082107332046e-05, "loss": 0.5955, "step": 529 }, { "epoch": 0.10895261589063625, "grad_norm": 0.30919867753982544, "learning_rate": 8.999061823728455e-05, "loss": 0.7481, "step": 530 }, { "epoch": 0.1091581868640148, "grad_norm": 0.2959042489528656, "learning_rate": 8.999041318477114e-05, "loss": 0.7795, "step": 531 }, { "epoch": 0.10936375783739335, "grad_norm": 0.15893301367759705, "learning_rate": 8.999020591579038e-05, "loss": 0.5953, "step": 532 }, { "epoch": 0.10956932881077192, "grad_norm": 0.16407330334186554, "learning_rate": 8.998999643035244e-05, "loss": 0.5873, "step": 533 }, { "epoch": 0.10977489978415048, "grad_norm": 0.3498159348964691, "learning_rate": 8.998978472846768e-05, "loss": 0.7825, "step": 534 }, { "epoch": 0.10998047075752904, "grad_norm": 0.3068999946117401, "learning_rate": 8.99895708101465e-05, "loss": 0.8112, "step": 535 }, { "epoch": 0.1101860417309076, "grad_norm": 0.28588443994522095, "learning_rate": 8.998935467539944e-05, "loss": 0.7778, "step": 536 }, { "epoch": 0.11039161270428616, "grad_norm": 0.31996187567710876, "learning_rate": 8.998913632423716e-05, "loss": 0.7736, "step": 537 }, { "epoch": 0.11059718367766472, "grad_norm": 0.3105761408805847, "learning_rate": 8.998891575667041e-05, "loss": 0.7683, "step": 538 }, { "epoch": 0.11080275465104328, "grad_norm": 0.3134320378303528, "learning_rate": 8.998869297271006e-05, "loss": 0.7877, "step": 539 }, { "epoch": 0.11100832562442182, "grad_norm": 0.2837049067020416, "learning_rate": 8.998846797236708e-05, "loss": 0.7664, "step": 540 }, { "epoch": 0.11121389659780039, "grad_norm": 0.2891695499420166, "learning_rate": 8.998824075565258e-05, "loss": 0.7862, "step": 541 }, { "epoch": 0.11141946757117895, "grad_norm": 0.2949972450733185, "learning_rate": 8.99880113225777e-05, "loss": 0.7551, "step": 542 }, { "epoch": 0.1116250385445575, "grad_norm": 0.2788076400756836, "learning_rate": 8.99877796731538e-05, "loss": 0.7657, "step": 543 }, { "epoch": 0.11183060951793607, "grad_norm": 0.237320676445961, "learning_rate": 8.998754580739225e-05, "loss": 0.6081, "step": 544 }, { "epoch": 0.11203618049131463, "grad_norm": 0.3368750810623169, "learning_rate": 8.99873097253046e-05, "loss": 0.7962, "step": 545 }, { "epoch": 0.11224175146469319, "grad_norm": 0.16897863149642944, "learning_rate": 8.998707142690247e-05, "loss": 0.5933, "step": 546 }, { "epoch": 0.11244732243807175, "grad_norm": 0.31463444232940674, "learning_rate": 8.99868309121976e-05, "loss": 0.778, "step": 547 }, { "epoch": 0.11265289341145031, "grad_norm": 0.28116437792778015, "learning_rate": 8.998658818120184e-05, "loss": 0.7677, "step": 548 }, { "epoch": 0.11285846438482887, "grad_norm": 0.2780570685863495, "learning_rate": 8.998634323392714e-05, "loss": 0.7736, "step": 549 }, { "epoch": 0.11306403535820742, "grad_norm": 0.18777993321418762, "learning_rate": 8.998609607038558e-05, "loss": 0.5928, "step": 550 }, { "epoch": 0.11326960633158598, "grad_norm": 0.3512813150882721, "learning_rate": 8.998584669058933e-05, "loss": 0.7971, "step": 551 }, { "epoch": 0.11347517730496454, "grad_norm": 0.1571076214313507, "learning_rate": 8.998559509455066e-05, "loss": 0.6026, "step": 552 }, { "epoch": 0.1136807482783431, "grad_norm": 0.1699524074792862, "learning_rate": 8.9985341282282e-05, "loss": 0.5835, "step": 553 }, { "epoch": 0.11388631925172166, "grad_norm": 0.38411441445350647, "learning_rate": 8.998508525379584e-05, "loss": 0.7829, "step": 554 }, { "epoch": 0.11409189022510022, "grad_norm": 0.2952065169811249, "learning_rate": 8.998482700910478e-05, "loss": 0.7878, "step": 555 }, { "epoch": 0.11429746119847878, "grad_norm": 0.3076973557472229, "learning_rate": 8.998456654822156e-05, "loss": 0.7988, "step": 556 }, { "epoch": 0.11450303217185734, "grad_norm": 0.30433389544487, "learning_rate": 8.9984303871159e-05, "loss": 0.78, "step": 557 }, { "epoch": 0.1147086031452359, "grad_norm": 0.30562445521354675, "learning_rate": 8.998403897793004e-05, "loss": 0.7832, "step": 558 }, { "epoch": 0.11491417411861445, "grad_norm": 0.3120015561580658, "learning_rate": 8.998377186854774e-05, "loss": 0.7989, "step": 559 }, { "epoch": 0.115119745091993, "grad_norm": 0.26990431547164917, "learning_rate": 8.998350254302524e-05, "loss": 0.7471, "step": 560 }, { "epoch": 0.11532531606537157, "grad_norm": 0.2938286364078522, "learning_rate": 8.998323100137585e-05, "loss": 0.7667, "step": 561 }, { "epoch": 0.11553088703875013, "grad_norm": 0.32502278685569763, "learning_rate": 8.998295724361289e-05, "loss": 0.7618, "step": 562 }, { "epoch": 0.11573645801212869, "grad_norm": 0.296321839094162, "learning_rate": 8.998268126974988e-05, "loss": 0.7828, "step": 563 }, { "epoch": 0.11594202898550725, "grad_norm": 0.30217137932777405, "learning_rate": 8.998240307980042e-05, "loss": 0.765, "step": 564 }, { "epoch": 0.11614759995888581, "grad_norm": 0.2876279950141907, "learning_rate": 8.998212267377822e-05, "loss": 0.7687, "step": 565 }, { "epoch": 0.11635317093226437, "grad_norm": 0.2792581021785736, "learning_rate": 8.998184005169706e-05, "loss": 0.785, "step": 566 }, { "epoch": 0.11655874190564293, "grad_norm": 0.28941112756729126, "learning_rate": 8.99815552135709e-05, "loss": 0.7732, "step": 567 }, { "epoch": 0.11676431287902148, "grad_norm": 0.28016045689582825, "learning_rate": 8.998126815941376e-05, "loss": 0.8033, "step": 568 }, { "epoch": 0.11696988385240004, "grad_norm": 0.27612999081611633, "learning_rate": 8.998097888923977e-05, "loss": 0.7811, "step": 569 }, { "epoch": 0.1171754548257786, "grad_norm": 0.2725747525691986, "learning_rate": 8.99806874030632e-05, "loss": 0.7426, "step": 570 }, { "epoch": 0.11738102579915716, "grad_norm": 0.23188281059265137, "learning_rate": 8.998039370089838e-05, "loss": 0.6119, "step": 571 }, { "epoch": 0.11758659677253572, "grad_norm": 0.329795777797699, "learning_rate": 8.998009778275982e-05, "loss": 0.7774, "step": 572 }, { "epoch": 0.11779216774591428, "grad_norm": 0.292244017124176, "learning_rate": 8.997979964866208e-05, "loss": 0.7684, "step": 573 }, { "epoch": 0.11799773871929284, "grad_norm": 0.2874715030193329, "learning_rate": 8.997949929861984e-05, "loss": 0.7606, "step": 574 }, { "epoch": 0.1182033096926714, "grad_norm": 0.3013349175453186, "learning_rate": 8.99791967326479e-05, "loss": 0.7686, "step": 575 }, { "epoch": 0.11840888066604996, "grad_norm": 0.2986513674259186, "learning_rate": 8.997889195076117e-05, "loss": 0.7651, "step": 576 }, { "epoch": 0.1186144516394285, "grad_norm": 0.2857048809528351, "learning_rate": 8.997858495297467e-05, "loss": 0.7875, "step": 577 }, { "epoch": 0.11882002261280707, "grad_norm": 0.27221107482910156, "learning_rate": 8.997827573930351e-05, "loss": 0.785, "step": 578 }, { "epoch": 0.11902559358618563, "grad_norm": 0.29440751671791077, "learning_rate": 8.997796430976294e-05, "loss": 0.7703, "step": 579 }, { "epoch": 0.11923116455956419, "grad_norm": 0.28240329027175903, "learning_rate": 8.99776506643683e-05, "loss": 0.7901, "step": 580 }, { "epoch": 0.11943673553294275, "grad_norm": 0.27463993430137634, "learning_rate": 8.997733480313503e-05, "loss": 0.7616, "step": 581 }, { "epoch": 0.11964230650632131, "grad_norm": 0.2833562195301056, "learning_rate": 8.99770167260787e-05, "loss": 0.7512, "step": 582 }, { "epoch": 0.11984787747969987, "grad_norm": 0.22366029024124146, "learning_rate": 8.997669643321496e-05, "loss": 0.6235, "step": 583 }, { "epoch": 0.12005344845307843, "grad_norm": 0.17241071164608002, "learning_rate": 8.997637392455963e-05, "loss": 0.5989, "step": 584 }, { "epoch": 0.12025901942645699, "grad_norm": 0.15749235451221466, "learning_rate": 8.997604920012856e-05, "loss": 0.5973, "step": 585 }, { "epoch": 0.12046459039983555, "grad_norm": 0.42778778076171875, "learning_rate": 8.997572225993778e-05, "loss": 0.7722, "step": 586 }, { "epoch": 0.1206701613732141, "grad_norm": 0.3165600597858429, "learning_rate": 8.997539310400337e-05, "loss": 0.7524, "step": 587 }, { "epoch": 0.12087573234659266, "grad_norm": 0.3048163950443268, "learning_rate": 8.997506173234156e-05, "loss": 0.7699, "step": 588 }, { "epoch": 0.12108130331997122, "grad_norm": 0.3166545331478119, "learning_rate": 8.997472814496867e-05, "loss": 0.7819, "step": 589 }, { "epoch": 0.12128687429334978, "grad_norm": 0.3150469958782196, "learning_rate": 8.997439234190113e-05, "loss": 0.7419, "step": 590 }, { "epoch": 0.12149244526672834, "grad_norm": 0.3222194015979767, "learning_rate": 8.99740543231555e-05, "loss": 0.7808, "step": 591 }, { "epoch": 0.1216980162401069, "grad_norm": 0.3114274740219116, "learning_rate": 8.99737140887484e-05, "loss": 0.7859, "step": 592 }, { "epoch": 0.12190358721348546, "grad_norm": 0.2929398715496063, "learning_rate": 8.997337163869665e-05, "loss": 0.8025, "step": 593 }, { "epoch": 0.12210915818686402, "grad_norm": 0.2900030016899109, "learning_rate": 8.997302697301706e-05, "loss": 0.7914, "step": 594 }, { "epoch": 0.12231472916024258, "grad_norm": 0.2980877459049225, "learning_rate": 8.997268009172664e-05, "loss": 0.7548, "step": 595 }, { "epoch": 0.12252030013362113, "grad_norm": 0.280519962310791, "learning_rate": 8.997233099484247e-05, "loss": 0.7923, "step": 596 }, { "epoch": 0.12272587110699969, "grad_norm": 0.27224200963974, "learning_rate": 8.997197968238175e-05, "loss": 0.7935, "step": 597 }, { "epoch": 0.12293144208037825, "grad_norm": 0.2736833691596985, "learning_rate": 8.99716261543618e-05, "loss": 0.7409, "step": 598 }, { "epoch": 0.12313701305375681, "grad_norm": 0.28164225816726685, "learning_rate": 8.99712704108e-05, "loss": 0.7855, "step": 599 }, { "epoch": 0.12334258402713537, "grad_norm": 0.27927008271217346, "learning_rate": 8.997091245171394e-05, "loss": 0.7768, "step": 600 }, { "epoch": 0.12354815500051393, "grad_norm": 0.2606373429298401, "learning_rate": 8.997055227712119e-05, "loss": 0.764, "step": 601 }, { "epoch": 0.12375372597389249, "grad_norm": 0.32072070240974426, "learning_rate": 8.997018988703953e-05, "loss": 0.8124, "step": 602 }, { "epoch": 0.12395929694727105, "grad_norm": 0.4943363666534424, "learning_rate": 8.996982528148682e-05, "loss": 0.6366, "step": 603 }, { "epoch": 0.12416486792064961, "grad_norm": 0.3180435299873352, "learning_rate": 8.996945846048098e-05, "loss": 0.7723, "step": 604 }, { "epoch": 0.12437043889402816, "grad_norm": 0.29927217960357666, "learning_rate": 8.996908942404012e-05, "loss": 0.7608, "step": 605 }, { "epoch": 0.12457600986740672, "grad_norm": 0.2776423990726471, "learning_rate": 8.99687181721824e-05, "loss": 0.775, "step": 606 }, { "epoch": 0.12478158084078528, "grad_norm": 0.3051820397377014, "learning_rate": 8.996834470492613e-05, "loss": 0.7923, "step": 607 }, { "epoch": 0.12498715181416384, "grad_norm": 0.2759751081466675, "learning_rate": 8.99679690222897e-05, "loss": 0.7486, "step": 608 }, { "epoch": 0.1251927227875424, "grad_norm": 0.2878243923187256, "learning_rate": 8.99675911242916e-05, "loss": 0.7774, "step": 609 }, { "epoch": 0.12539829376092096, "grad_norm": 0.2739849090576172, "learning_rate": 8.996721101095048e-05, "loss": 0.771, "step": 610 }, { "epoch": 0.12560386473429952, "grad_norm": 0.2817218601703644, "learning_rate": 8.996682868228505e-05, "loss": 0.761, "step": 611 }, { "epoch": 0.12580943570767808, "grad_norm": 0.2750679552555084, "learning_rate": 8.996644413831412e-05, "loss": 0.7739, "step": 612 }, { "epoch": 0.12601500668105664, "grad_norm": 0.26886436343193054, "learning_rate": 8.996605737905669e-05, "loss": 0.7585, "step": 613 }, { "epoch": 0.1262205776544352, "grad_norm": 0.2675554156303406, "learning_rate": 8.996566840453178e-05, "loss": 0.7639, "step": 614 }, { "epoch": 0.12642614862781376, "grad_norm": 0.2672448456287384, "learning_rate": 8.996527721475855e-05, "loss": 0.7687, "step": 615 }, { "epoch": 0.12663171960119232, "grad_norm": 0.27541592717170715, "learning_rate": 8.996488380975626e-05, "loss": 0.7702, "step": 616 }, { "epoch": 0.12683729057457088, "grad_norm": 1.3074686527252197, "learning_rate": 8.996448818954434e-05, "loss": 0.6375, "step": 617 }, { "epoch": 0.12704286154794944, "grad_norm": 0.2855135202407837, "learning_rate": 8.996409035414224e-05, "loss": 0.7633, "step": 618 }, { "epoch": 0.12724843252132798, "grad_norm": 0.6012619137763977, "learning_rate": 8.996369030356957e-05, "loss": 0.6213, "step": 619 }, { "epoch": 0.12745400349470654, "grad_norm": 0.30922386050224304, "learning_rate": 8.996328803784604e-05, "loss": 0.7827, "step": 620 }, { "epoch": 0.1276595744680851, "grad_norm": 0.29752808809280396, "learning_rate": 8.996288355699146e-05, "loss": 0.773, "step": 621 }, { "epoch": 0.12786514544146366, "grad_norm": 0.31884685158729553, "learning_rate": 8.996247686102577e-05, "loss": 0.7656, "step": 622 }, { "epoch": 0.12807071641484222, "grad_norm": 0.2772408425807953, "learning_rate": 8.996206794996899e-05, "loss": 0.7898, "step": 623 }, { "epoch": 0.12827628738822078, "grad_norm": 0.2835623323917389, "learning_rate": 8.996165682384129e-05, "loss": 0.7631, "step": 624 }, { "epoch": 0.12848185836159934, "grad_norm": 0.3379913568496704, "learning_rate": 8.996124348266291e-05, "loss": 0.7805, "step": 625 }, { "epoch": 0.1286874293349779, "grad_norm": 0.26578038930892944, "learning_rate": 8.996082792645419e-05, "loss": 0.608, "step": 626 }, { "epoch": 0.12889300030835646, "grad_norm": 0.29912567138671875, "learning_rate": 8.996041015523563e-05, "loss": 0.7565, "step": 627 }, { "epoch": 0.12909857128173502, "grad_norm": 0.3043285608291626, "learning_rate": 8.995999016902781e-05, "loss": 0.7787, "step": 628 }, { "epoch": 0.12930414225511358, "grad_norm": 0.1923503428697586, "learning_rate": 8.995956796785143e-05, "loss": 0.6051, "step": 629 }, { "epoch": 0.12950971322849214, "grad_norm": 0.29241567850112915, "learning_rate": 8.995914355172726e-05, "loss": 0.7742, "step": 630 }, { "epoch": 0.1297152842018707, "grad_norm": 0.1634470671415329, "learning_rate": 8.995871692067622e-05, "loss": 0.6009, "step": 631 }, { "epoch": 0.12992085517524926, "grad_norm": 0.1948513388633728, "learning_rate": 8.995828807471935e-05, "loss": 0.6038, "step": 632 }, { "epoch": 0.13012642614862782, "grad_norm": 0.34593167901039124, "learning_rate": 8.995785701387774e-05, "loss": 0.7712, "step": 633 }, { "epoch": 0.13033199712200638, "grad_norm": 0.2905696630477905, "learning_rate": 8.995742373817268e-05, "loss": 0.7745, "step": 634 }, { "epoch": 0.13053756809538494, "grad_norm": 0.28553932905197144, "learning_rate": 8.995698824762547e-05, "loss": 0.779, "step": 635 }, { "epoch": 0.1307431390687635, "grad_norm": 0.18538178503513336, "learning_rate": 8.995655054225757e-05, "loss": 0.623, "step": 636 }, { "epoch": 0.13094871004214204, "grad_norm": 0.32950466871261597, "learning_rate": 8.995611062209054e-05, "loss": 0.7682, "step": 637 }, { "epoch": 0.1311542810155206, "grad_norm": 0.28783705830574036, "learning_rate": 8.995566848714609e-05, "loss": 0.7534, "step": 638 }, { "epoch": 0.13135985198889916, "grad_norm": 0.2871015667915344, "learning_rate": 8.995522413744596e-05, "loss": 0.7315, "step": 639 }, { "epoch": 0.13156542296227772, "grad_norm": 0.18547143042087555, "learning_rate": 8.995477757301207e-05, "loss": 0.5805, "step": 640 }, { "epoch": 0.13177099393565628, "grad_norm": 0.34090474247932434, "learning_rate": 8.99543287938664e-05, "loss": 0.7783, "step": 641 }, { "epoch": 0.13197656490903484, "grad_norm": 0.2930915355682373, "learning_rate": 8.995387780003107e-05, "loss": 0.768, "step": 642 }, { "epoch": 0.1321821358824134, "grad_norm": 0.28531643748283386, "learning_rate": 8.995342459152827e-05, "loss": 0.7627, "step": 643 }, { "epoch": 0.13238770685579196, "grad_norm": 0.2844246029853821, "learning_rate": 8.995296916838038e-05, "loss": 0.7588, "step": 644 }, { "epoch": 0.13259327782917052, "grad_norm": 0.2866900861263275, "learning_rate": 8.99525115306098e-05, "loss": 0.7569, "step": 645 }, { "epoch": 0.13279884880254908, "grad_norm": 0.2860448360443115, "learning_rate": 8.995205167823908e-05, "loss": 0.7614, "step": 646 }, { "epoch": 0.13300441977592764, "grad_norm": 0.2673685848712921, "learning_rate": 8.995158961129088e-05, "loss": 0.7753, "step": 647 }, { "epoch": 0.1332099907493062, "grad_norm": 0.2862294316291809, "learning_rate": 8.995112532978798e-05, "loss": 0.7682, "step": 648 }, { "epoch": 0.13341556172268476, "grad_norm": 0.27633753418922424, "learning_rate": 8.995065883375321e-05, "loss": 0.7726, "step": 649 }, { "epoch": 0.13362113269606332, "grad_norm": 0.26780807971954346, "learning_rate": 8.995019012320959e-05, "loss": 0.8017, "step": 650 }, { "epoch": 0.13382670366944188, "grad_norm": 0.27239716053009033, "learning_rate": 8.99497191981802e-05, "loss": 0.7479, "step": 651 }, { "epoch": 0.13403227464282044, "grad_norm": 0.2104814648628235, "learning_rate": 8.994924605868824e-05, "loss": 0.5866, "step": 652 }, { "epoch": 0.134237845616199, "grad_norm": 0.30780890583992004, "learning_rate": 8.994877070475701e-05, "loss": 0.7577, "step": 653 }, { "epoch": 0.13444341658957756, "grad_norm": 0.2910194993019104, "learning_rate": 8.994829313640995e-05, "loss": 0.779, "step": 654 }, { "epoch": 0.13464898756295612, "grad_norm": 0.277893602848053, "learning_rate": 8.994781335367057e-05, "loss": 0.77, "step": 655 }, { "epoch": 0.13485455853633466, "grad_norm": 0.28844013810157776, "learning_rate": 8.994733135656252e-05, "loss": 0.7746, "step": 656 }, { "epoch": 0.13506012950971322, "grad_norm": 0.28865233063697815, "learning_rate": 8.994684714510954e-05, "loss": 0.7825, "step": 657 }, { "epoch": 0.13526570048309178, "grad_norm": 0.3075569272041321, "learning_rate": 8.994636071933546e-05, "loss": 0.753, "step": 658 }, { "epoch": 0.13547127145647034, "grad_norm": 0.2790246903896332, "learning_rate": 8.994587207926429e-05, "loss": 0.7341, "step": 659 }, { "epoch": 0.1356768424298489, "grad_norm": 0.27742037177085876, "learning_rate": 8.994538122492006e-05, "loss": 0.7631, "step": 660 }, { "epoch": 0.13588241340322746, "grad_norm": 0.266181617975235, "learning_rate": 8.994488815632699e-05, "loss": 0.7381, "step": 661 }, { "epoch": 0.13608798437660602, "grad_norm": 0.2639121413230896, "learning_rate": 8.994439287350932e-05, "loss": 0.7634, "step": 662 }, { "epoch": 0.13629355534998458, "grad_norm": 0.271953821182251, "learning_rate": 8.994389537649151e-05, "loss": 0.7902, "step": 663 }, { "epoch": 0.13649912632336314, "grad_norm": 0.2754836082458496, "learning_rate": 8.994339566529804e-05, "loss": 0.7708, "step": 664 }, { "epoch": 0.1367046972967417, "grad_norm": 0.30965548753738403, "learning_rate": 8.994289373995352e-05, "loss": 0.7607, "step": 665 }, { "epoch": 0.13691026827012026, "grad_norm": 0.28129950165748596, "learning_rate": 8.99423896004827e-05, "loss": 0.7701, "step": 666 }, { "epoch": 0.13711583924349882, "grad_norm": 0.23147864639759064, "learning_rate": 8.99418832469104e-05, "loss": 0.6085, "step": 667 }, { "epoch": 0.13732141021687738, "grad_norm": 0.3050214648246765, "learning_rate": 8.994137467926156e-05, "loss": 0.7704, "step": 668 }, { "epoch": 0.13752698119025594, "grad_norm": 0.15223456919193268, "learning_rate": 8.994086389756126e-05, "loss": 0.6074, "step": 669 }, { "epoch": 0.1377325521636345, "grad_norm": 0.2975500226020813, "learning_rate": 8.994035090183464e-05, "loss": 0.7422, "step": 670 }, { "epoch": 0.13793812313701306, "grad_norm": 0.28416451811790466, "learning_rate": 8.993983569210698e-05, "loss": 0.7575, "step": 671 }, { "epoch": 0.13814369411039162, "grad_norm": 0.25423794984817505, "learning_rate": 8.993931826840368e-05, "loss": 0.7617, "step": 672 }, { "epoch": 0.13834926508377018, "grad_norm": 0.2733759582042694, "learning_rate": 8.993879863075019e-05, "loss": 0.7478, "step": 673 }, { "epoch": 0.13855483605714872, "grad_norm": 0.2590562105178833, "learning_rate": 8.993827677917215e-05, "loss": 0.7578, "step": 674 }, { "epoch": 0.13876040703052728, "grad_norm": 0.26819926500320435, "learning_rate": 8.993775271369525e-05, "loss": 0.7485, "step": 675 }, { "epoch": 0.13896597800390584, "grad_norm": 0.261787474155426, "learning_rate": 8.993722643434532e-05, "loss": 0.7623, "step": 676 }, { "epoch": 0.1391715489772844, "grad_norm": 0.27696770429611206, "learning_rate": 8.993669794114828e-05, "loss": 0.5995, "step": 677 }, { "epoch": 0.13937711995066296, "grad_norm": 0.1687610000371933, "learning_rate": 8.993616723413015e-05, "loss": 0.5993, "step": 678 }, { "epoch": 0.13958269092404152, "grad_norm": 0.34388282895088196, "learning_rate": 8.993563431331711e-05, "loss": 0.7844, "step": 679 }, { "epoch": 0.13978826189742008, "grad_norm": 0.3012101948261261, "learning_rate": 8.993509917873539e-05, "loss": 0.806, "step": 680 }, { "epoch": 0.13999383287079864, "grad_norm": 0.27226656675338745, "learning_rate": 8.993456183041135e-05, "loss": 0.7302, "step": 681 }, { "epoch": 0.1401994038441772, "grad_norm": 0.2889186143875122, "learning_rate": 8.993402226837148e-05, "loss": 0.7609, "step": 682 }, { "epoch": 0.14040497481755576, "grad_norm": 0.33441823720932007, "learning_rate": 8.993348049264235e-05, "loss": 0.6023, "step": 683 }, { "epoch": 0.14061054579093432, "grad_norm": 0.21067148447036743, "learning_rate": 8.993293650325066e-05, "loss": 0.6154, "step": 684 }, { "epoch": 0.14081611676431288, "grad_norm": 0.4340059459209442, "learning_rate": 8.99323903002232e-05, "loss": 0.7965, "step": 685 }, { "epoch": 0.14102168773769144, "grad_norm": 0.3370809853076935, "learning_rate": 8.993184188358688e-05, "loss": 0.7557, "step": 686 }, { "epoch": 0.14122725871107, "grad_norm": 0.31289970874786377, "learning_rate": 8.993129125336873e-05, "loss": 0.7804, "step": 687 }, { "epoch": 0.14143282968444856, "grad_norm": 0.31972143054008484, "learning_rate": 8.993073840959587e-05, "loss": 0.7438, "step": 688 }, { "epoch": 0.14163840065782712, "grad_norm": 0.31906935572624207, "learning_rate": 8.993018335229552e-05, "loss": 0.7564, "step": 689 }, { "epoch": 0.14184397163120568, "grad_norm": 0.3015035390853882, "learning_rate": 8.992962608149505e-05, "loss": 0.7668, "step": 690 }, { "epoch": 0.14204954260458424, "grad_norm": 0.3022618591785431, "learning_rate": 8.99290665972219e-05, "loss": 0.775, "step": 691 }, { "epoch": 0.1422551135779628, "grad_norm": 0.3151668906211853, "learning_rate": 8.992850489950365e-05, "loss": 0.7715, "step": 692 }, { "epoch": 0.14246068455134134, "grad_norm": 0.29301926493644714, "learning_rate": 8.992794098836794e-05, "loss": 0.7472, "step": 693 }, { "epoch": 0.1426662555247199, "grad_norm": 0.2793315649032593, "learning_rate": 8.992737486384257e-05, "loss": 0.7795, "step": 694 }, { "epoch": 0.14287182649809846, "grad_norm": 0.28239625692367554, "learning_rate": 8.992680652595544e-05, "loss": 0.7649, "step": 695 }, { "epoch": 0.14307739747147702, "grad_norm": 0.2796134352684021, "learning_rate": 8.992623597473455e-05, "loss": 0.7207, "step": 696 }, { "epoch": 0.14328296844485558, "grad_norm": 0.2902660369873047, "learning_rate": 8.992566321020799e-05, "loss": 0.767, "step": 697 }, { "epoch": 0.14348853941823414, "grad_norm": 0.28000608086586, "learning_rate": 8.992508823240397e-05, "loss": 0.7655, "step": 698 }, { "epoch": 0.1436941103916127, "grad_norm": 0.28330516815185547, "learning_rate": 8.992451104135084e-05, "loss": 0.787, "step": 699 }, { "epoch": 0.14389968136499126, "grad_norm": 0.28026729822158813, "learning_rate": 8.992393163707704e-05, "loss": 0.774, "step": 700 }, { "epoch": 0.14410525233836982, "grad_norm": 0.5302313566207886, "learning_rate": 8.99233500196111e-05, "loss": 0.6421, "step": 701 }, { "epoch": 0.14431082331174838, "grad_norm": 0.3879426419734955, "learning_rate": 8.992276618898167e-05, "loss": 0.7804, "step": 702 }, { "epoch": 0.14451639428512694, "grad_norm": 0.34966281056404114, "learning_rate": 8.992218014521752e-05, "loss": 0.7597, "step": 703 }, { "epoch": 0.1447219652585055, "grad_norm": 0.31454893946647644, "learning_rate": 8.99215918883475e-05, "loss": 0.7709, "step": 704 }, { "epoch": 0.14492753623188406, "grad_norm": 0.3002963066101074, "learning_rate": 8.992100141840064e-05, "loss": 0.7689, "step": 705 }, { "epoch": 0.14513310720526262, "grad_norm": 0.2704041600227356, "learning_rate": 8.992040873540599e-05, "loss": 0.5956, "step": 706 }, { "epoch": 0.14533867817864118, "grad_norm": 0.37959620356559753, "learning_rate": 8.991981383939275e-05, "loss": 0.7709, "step": 707 }, { "epoch": 0.14554424915201974, "grad_norm": 0.21092139184474945, "learning_rate": 8.991921673039024e-05, "loss": 0.6133, "step": 708 }, { "epoch": 0.1457498201253983, "grad_norm": 0.3205825686454773, "learning_rate": 8.991861740842789e-05, "loss": 0.7759, "step": 709 }, { "epoch": 0.14595539109877687, "grad_norm": 0.3055117428302765, "learning_rate": 8.99180158735352e-05, "loss": 0.7601, "step": 710 }, { "epoch": 0.1461609620721554, "grad_norm": 0.2790381908416748, "learning_rate": 8.991741212574182e-05, "loss": 0.7473, "step": 711 }, { "epoch": 0.14636653304553396, "grad_norm": 0.22031188011169434, "learning_rate": 8.991680616507747e-05, "loss": 0.6042, "step": 712 }, { "epoch": 0.14657210401891252, "grad_norm": 0.18893392384052277, "learning_rate": 8.991619799157203e-05, "loss": 0.579, "step": 713 }, { "epoch": 0.14677767499229108, "grad_norm": 0.409572571516037, "learning_rate": 8.991558760525546e-05, "loss": 0.7456, "step": 714 }, { "epoch": 0.14698324596566964, "grad_norm": 0.30903562903404236, "learning_rate": 8.991497500615781e-05, "loss": 0.7597, "step": 715 }, { "epoch": 0.1471888169390482, "grad_norm": 0.3029564917087555, "learning_rate": 8.991436019430928e-05, "loss": 0.7574, "step": 716 }, { "epoch": 0.14739438791242676, "grad_norm": 0.40293097496032715, "learning_rate": 8.991374316974016e-05, "loss": 0.7726, "step": 717 }, { "epoch": 0.14759995888580532, "grad_norm": 0.2837783992290497, "learning_rate": 8.991312393248083e-05, "loss": 0.7345, "step": 718 }, { "epoch": 0.14780552985918388, "grad_norm": 0.31906503438949585, "learning_rate": 8.991250248256181e-05, "loss": 0.7493, "step": 719 }, { "epoch": 0.14801110083256244, "grad_norm": 0.28739094734191895, "learning_rate": 8.991187882001371e-05, "loss": 0.7527, "step": 720 }, { "epoch": 0.148216671805941, "grad_norm": 0.28792694211006165, "learning_rate": 8.991125294486727e-05, "loss": 0.7758, "step": 721 }, { "epoch": 0.14842224277931956, "grad_norm": 0.30004221200942993, "learning_rate": 8.99106248571533e-05, "loss": 0.774, "step": 722 }, { "epoch": 0.14862781375269812, "grad_norm": 0.2681220471858978, "learning_rate": 8.990999455690276e-05, "loss": 0.7636, "step": 723 }, { "epoch": 0.14883338472607668, "grad_norm": 0.2687060534954071, "learning_rate": 8.990936204414669e-05, "loss": 0.7763, "step": 724 }, { "epoch": 0.14903895569945524, "grad_norm": 0.3481808602809906, "learning_rate": 8.990872731891628e-05, "loss": 0.6129, "step": 725 }, { "epoch": 0.1492445266728338, "grad_norm": 0.31415244936943054, "learning_rate": 8.990809038124275e-05, "loss": 0.7789, "step": 726 }, { "epoch": 0.14945009764621237, "grad_norm": 0.2992306649684906, "learning_rate": 8.990745123115752e-05, "loss": 0.7361, "step": 727 }, { "epoch": 0.14965566861959093, "grad_norm": 0.2780331075191498, "learning_rate": 8.990680986869206e-05, "loss": 0.7657, "step": 728 }, { "epoch": 0.1498612395929695, "grad_norm": 0.20312556624412537, "learning_rate": 8.990616629387798e-05, "loss": 0.5755, "step": 729 }, { "epoch": 0.15006681056634802, "grad_norm": 0.32418328523635864, "learning_rate": 8.990552050674697e-05, "loss": 0.7537, "step": 730 }, { "epoch": 0.15027238153972658, "grad_norm": 0.30750200152397156, "learning_rate": 8.990487250733086e-05, "loss": 0.7585, "step": 731 }, { "epoch": 0.15047795251310514, "grad_norm": 0.2661309242248535, "learning_rate": 8.990422229566156e-05, "loss": 0.7454, "step": 732 }, { "epoch": 0.1506835234864837, "grad_norm": 0.2825012803077698, "learning_rate": 8.99035698717711e-05, "loss": 0.7466, "step": 733 }, { "epoch": 0.15088909445986226, "grad_norm": 0.27984434366226196, "learning_rate": 8.990291523569166e-05, "loss": 0.7558, "step": 734 }, { "epoch": 0.15109466543324082, "grad_norm": 0.20815995335578918, "learning_rate": 8.990225838745544e-05, "loss": 0.6112, "step": 735 }, { "epoch": 0.15130023640661938, "grad_norm": 0.3687712848186493, "learning_rate": 8.990159932709483e-05, "loss": 0.7705, "step": 736 }, { "epoch": 0.15150580737999794, "grad_norm": 0.28203409910202026, "learning_rate": 8.990093805464227e-05, "loss": 0.7658, "step": 737 }, { "epoch": 0.1517113783533765, "grad_norm": 0.26725029945373535, "learning_rate": 8.990027457013039e-05, "loss": 0.7545, "step": 738 }, { "epoch": 0.15191694932675506, "grad_norm": 0.27890896797180176, "learning_rate": 8.989960887359183e-05, "loss": 0.7713, "step": 739 }, { "epoch": 0.15212252030013362, "grad_norm": 0.2642592191696167, "learning_rate": 8.98989409650594e-05, "loss": 0.7418, "step": 740 }, { "epoch": 0.15232809127351218, "grad_norm": 0.28167617321014404, "learning_rate": 8.9898270844566e-05, "loss": 0.7641, "step": 741 }, { "epoch": 0.15253366224689074, "grad_norm": 0.2627207338809967, "learning_rate": 8.989759851214465e-05, "loss": 0.7453, "step": 742 }, { "epoch": 0.1527392332202693, "grad_norm": 0.28408879041671753, "learning_rate": 8.98969239678285e-05, "loss": 0.7596, "step": 743 }, { "epoch": 0.15294480419364787, "grad_norm": 0.2735441327095032, "learning_rate": 8.989624721165072e-05, "loss": 0.7715, "step": 744 }, { "epoch": 0.15315037516702643, "grad_norm": 0.18697437644004822, "learning_rate": 8.989556824364469e-05, "loss": 0.5824, "step": 745 }, { "epoch": 0.153355946140405, "grad_norm": 0.2745780646800995, "learning_rate": 8.989488706384386e-05, "loss": 0.7615, "step": 746 }, { "epoch": 0.15356151711378355, "grad_norm": 0.14835397899150848, "learning_rate": 8.989420367228179e-05, "loss": 0.5817, "step": 747 }, { "epoch": 0.15376708808716208, "grad_norm": 0.272223562002182, "learning_rate": 8.989351806899213e-05, "loss": 0.7756, "step": 748 }, { "epoch": 0.15397265906054064, "grad_norm": 0.1476040929555893, "learning_rate": 8.989283025400868e-05, "loss": 0.5714, "step": 749 }, { "epoch": 0.1541782300339192, "grad_norm": 0.29153406620025635, "learning_rate": 8.98921402273653e-05, "loss": 0.766, "step": 750 }, { "epoch": 0.15438380100729776, "grad_norm": 0.1418268382549286, "learning_rate": 8.989144798909598e-05, "loss": 0.6128, "step": 751 }, { "epoch": 0.15458937198067632, "grad_norm": 0.2692977786064148, "learning_rate": 8.989075353923487e-05, "loss": 0.7622, "step": 752 }, { "epoch": 0.15479494295405488, "grad_norm": 0.26004138588905334, "learning_rate": 8.989005687781615e-05, "loss": 0.7816, "step": 753 }, { "epoch": 0.15500051392743344, "grad_norm": 0.2757778465747833, "learning_rate": 8.988935800487412e-05, "loss": 0.7434, "step": 754 }, { "epoch": 0.155206084900812, "grad_norm": 0.255287766456604, "learning_rate": 8.988865692044326e-05, "loss": 0.7624, "step": 755 }, { "epoch": 0.15541165587419056, "grad_norm": 0.25884950160980225, "learning_rate": 8.988795362455807e-05, "loss": 0.7563, "step": 756 }, { "epoch": 0.15561722684756912, "grad_norm": 0.2563144266605377, "learning_rate": 8.988724811725321e-05, "loss": 0.7714, "step": 757 }, { "epoch": 0.15582279782094768, "grad_norm": 0.2678104639053345, "learning_rate": 8.988654039856344e-05, "loss": 0.7474, "step": 758 }, { "epoch": 0.15602836879432624, "grad_norm": 0.24936316907405853, "learning_rate": 8.98858304685236e-05, "loss": 0.7673, "step": 759 }, { "epoch": 0.1562339397677048, "grad_norm": 0.26165440678596497, "learning_rate": 8.988511832716873e-05, "loss": 0.7601, "step": 760 }, { "epoch": 0.15643951074108337, "grad_norm": 0.26390373706817627, "learning_rate": 8.988440397453385e-05, "loss": 0.771, "step": 761 }, { "epoch": 0.15664508171446193, "grad_norm": 0.2585375905036926, "learning_rate": 8.988368741065418e-05, "loss": 0.7544, "step": 762 }, { "epoch": 0.15685065268784049, "grad_norm": 0.2905960977077484, "learning_rate": 8.9882968635565e-05, "loss": 0.7778, "step": 763 }, { "epoch": 0.15705622366121905, "grad_norm": 0.25519707798957825, "learning_rate": 8.988224764930176e-05, "loss": 0.7575, "step": 764 }, { "epoch": 0.1572617946345976, "grad_norm": 0.19228395819664001, "learning_rate": 8.988152445189995e-05, "loss": 0.5991, "step": 765 }, { "epoch": 0.15746736560797617, "grad_norm": 0.3007056713104248, "learning_rate": 8.988079904339521e-05, "loss": 0.7521, "step": 766 }, { "epoch": 0.1576729365813547, "grad_norm": 0.2646825611591339, "learning_rate": 8.988007142382328e-05, "loss": 0.7681, "step": 767 }, { "epoch": 0.15787850755473326, "grad_norm": 0.25301775336265564, "learning_rate": 8.987934159321998e-05, "loss": 0.7559, "step": 768 }, { "epoch": 0.15808407852811182, "grad_norm": 0.2603342533111572, "learning_rate": 8.987860955162129e-05, "loss": 0.7328, "step": 769 }, { "epoch": 0.15828964950149038, "grad_norm": 0.2716013491153717, "learning_rate": 8.987787529906327e-05, "loss": 0.7904, "step": 770 }, { "epoch": 0.15849522047486894, "grad_norm": 0.2763035297393799, "learning_rate": 8.98771388355821e-05, "loss": 0.7466, "step": 771 }, { "epoch": 0.1587007914482475, "grad_norm": 0.20483554899692535, "learning_rate": 8.987640016121405e-05, "loss": 0.6064, "step": 772 }, { "epoch": 0.15890636242162606, "grad_norm": 0.2952456474304199, "learning_rate": 8.987565927599552e-05, "loss": 0.7767, "step": 773 }, { "epoch": 0.15911193339500462, "grad_norm": 0.262829452753067, "learning_rate": 8.9874916179963e-05, "loss": 0.7453, "step": 774 }, { "epoch": 0.15931750436838318, "grad_norm": 0.27599036693573, "learning_rate": 8.987417087315311e-05, "loss": 0.7633, "step": 775 }, { "epoch": 0.15952307534176174, "grad_norm": 0.2878960371017456, "learning_rate": 8.987342335560257e-05, "loss": 0.7264, "step": 776 }, { "epoch": 0.1597286463151403, "grad_norm": 0.27682632207870483, "learning_rate": 8.98726736273482e-05, "loss": 0.7599, "step": 777 }, { "epoch": 0.15993421728851887, "grad_norm": 0.28773486614227295, "learning_rate": 8.98719216884269e-05, "loss": 0.749, "step": 778 }, { "epoch": 0.16013978826189743, "grad_norm": 0.18678279221057892, "learning_rate": 8.987116753887578e-05, "loss": 0.5898, "step": 779 }, { "epoch": 0.16034535923527599, "grad_norm": 0.2946769595146179, "learning_rate": 8.987041117873195e-05, "loss": 0.7631, "step": 780 }, { "epoch": 0.16055093020865455, "grad_norm": 0.2669578492641449, "learning_rate": 8.98696526080327e-05, "loss": 0.7401, "step": 781 }, { "epoch": 0.1607565011820331, "grad_norm": 0.2495296746492386, "learning_rate": 8.986889182681537e-05, "loss": 0.7548, "step": 782 }, { "epoch": 0.16096207215541167, "grad_norm": 0.1537548452615738, "learning_rate": 8.986812883511746e-05, "loss": 0.5952, "step": 783 }, { "epoch": 0.16116764312879023, "grad_norm": 0.3242528736591339, "learning_rate": 8.986736363297657e-05, "loss": 0.7621, "step": 784 }, { "epoch": 0.16137321410216876, "grad_norm": 0.2763916254043579, "learning_rate": 8.986659622043038e-05, "loss": 0.7518, "step": 785 }, { "epoch": 0.16157878507554732, "grad_norm": 0.27918627858161926, "learning_rate": 8.986582659751668e-05, "loss": 0.759, "step": 786 }, { "epoch": 0.16178435604892588, "grad_norm": 0.1745089888572693, "learning_rate": 8.986505476427342e-05, "loss": 0.6015, "step": 787 }, { "epoch": 0.16198992702230444, "grad_norm": 0.2984016239643097, "learning_rate": 8.986428072073861e-05, "loss": 0.7422, "step": 788 }, { "epoch": 0.162195497995683, "grad_norm": 0.27629682421684265, "learning_rate": 8.986350446695038e-05, "loss": 0.7691, "step": 789 }, { "epoch": 0.16240106896906156, "grad_norm": 0.15922513604164124, "learning_rate": 8.986272600294698e-05, "loss": 0.594, "step": 790 }, { "epoch": 0.16260663994244012, "grad_norm": 0.14948177337646484, "learning_rate": 8.986194532876676e-05, "loss": 0.5879, "step": 791 }, { "epoch": 0.16281221091581868, "grad_norm": 0.33852294087409973, "learning_rate": 8.986116244444816e-05, "loss": 0.788, "step": 792 }, { "epoch": 0.16301778188919724, "grad_norm": 0.29658934473991394, "learning_rate": 8.986037735002979e-05, "loss": 0.7502, "step": 793 }, { "epoch": 0.1632233528625758, "grad_norm": 0.27061983942985535, "learning_rate": 8.98595900455503e-05, "loss": 0.7444, "step": 794 }, { "epoch": 0.16342892383595437, "grad_norm": 0.28159090876579285, "learning_rate": 8.985880053104848e-05, "loss": 0.7497, "step": 795 }, { "epoch": 0.16363449480933293, "grad_norm": 0.27150630950927734, "learning_rate": 8.985800880656322e-05, "loss": 0.7283, "step": 796 }, { "epoch": 0.16384006578271149, "grad_norm": 0.26862168312072754, "learning_rate": 8.985721487213353e-05, "loss": 0.7492, "step": 797 }, { "epoch": 0.16404563675609005, "grad_norm": 0.284452885389328, "learning_rate": 8.985641872779853e-05, "loss": 0.7864, "step": 798 }, { "epoch": 0.1642512077294686, "grad_norm": 0.19958379864692688, "learning_rate": 8.985562037359745e-05, "loss": 0.585, "step": 799 }, { "epoch": 0.16445677870284717, "grad_norm": 0.1591620147228241, "learning_rate": 8.985481980956959e-05, "loss": 0.5937, "step": 800 }, { "epoch": 0.16466234967622573, "grad_norm": 0.15034611523151398, "learning_rate": 8.985401703575444e-05, "loss": 0.6034, "step": 801 }, { "epoch": 0.1648679206496043, "grad_norm": 0.4189755618572235, "learning_rate": 8.985321205219149e-05, "loss": 0.7696, "step": 802 }, { "epoch": 0.16507349162298285, "grad_norm": 0.17588938772678375, "learning_rate": 8.985240485892043e-05, "loss": 0.5819, "step": 803 }, { "epoch": 0.16527906259636138, "grad_norm": 0.17400261759757996, "learning_rate": 8.985159545598102e-05, "loss": 0.5878, "step": 804 }, { "epoch": 0.16548463356973994, "grad_norm": 0.5819520354270935, "learning_rate": 8.985078384341314e-05, "loss": 0.7724, "step": 805 }, { "epoch": 0.1656902045431185, "grad_norm": 0.3000738322734833, "learning_rate": 8.984997002125677e-05, "loss": 0.7544, "step": 806 }, { "epoch": 0.16589577551649706, "grad_norm": 0.5194309949874878, "learning_rate": 8.984915398955201e-05, "loss": 0.7717, "step": 807 }, { "epoch": 0.16610134648987562, "grad_norm": 0.24588865041732788, "learning_rate": 8.984833574833905e-05, "loss": 0.5959, "step": 808 }, { "epoch": 0.16630691746325418, "grad_norm": 0.3617485761642456, "learning_rate": 8.984751529765823e-05, "loss": 0.7641, "step": 809 }, { "epoch": 0.16651248843663274, "grad_norm": 0.1757216602563858, "learning_rate": 8.984669263754993e-05, "loss": 0.5963, "step": 810 }, { "epoch": 0.1667180594100113, "grad_norm": 0.37562620639801025, "learning_rate": 8.98458677680547e-05, "loss": 0.7765, "step": 811 }, { "epoch": 0.16692363038338986, "grad_norm": 0.19446802139282227, "learning_rate": 8.984504068921317e-05, "loss": 0.5991, "step": 812 }, { "epoch": 0.16712920135676843, "grad_norm": 0.2953244149684906, "learning_rate": 8.98442114010661e-05, "loss": 0.7816, "step": 813 }, { "epoch": 0.16733477233014699, "grad_norm": 0.3022470772266388, "learning_rate": 8.984337990365433e-05, "loss": 0.7426, "step": 814 }, { "epoch": 0.16754034330352555, "grad_norm": 0.268697053194046, "learning_rate": 8.984254619701882e-05, "loss": 0.7798, "step": 815 }, { "epoch": 0.1677459142769041, "grad_norm": 0.2634507119655609, "learning_rate": 8.984171028120066e-05, "loss": 0.7499, "step": 816 }, { "epoch": 0.16795148525028267, "grad_norm": 0.2637363374233246, "learning_rate": 8.984087215624102e-05, "loss": 0.7244, "step": 817 }, { "epoch": 0.16815705622366123, "grad_norm": 0.25045761466026306, "learning_rate": 8.984003182218121e-05, "loss": 0.7206, "step": 818 }, { "epoch": 0.1683626271970398, "grad_norm": 0.24836835265159607, "learning_rate": 8.983918927906259e-05, "loss": 0.7381, "step": 819 }, { "epoch": 0.16856819817041835, "grad_norm": 0.26156720519065857, "learning_rate": 8.983834452692671e-05, "loss": 0.748, "step": 820 }, { "epoch": 0.1687737691437969, "grad_norm": 0.2660123407840729, "learning_rate": 8.983749756581517e-05, "loss": 0.7349, "step": 821 }, { "epoch": 0.16897934011717544, "grad_norm": 0.20181813836097717, "learning_rate": 8.983664839576969e-05, "loss": 0.6089, "step": 822 }, { "epoch": 0.169184911090554, "grad_norm": 0.16823935508728027, "learning_rate": 8.98357970168321e-05, "loss": 0.6203, "step": 823 }, { "epoch": 0.16939048206393256, "grad_norm": 0.36333969235420227, "learning_rate": 8.983494342904437e-05, "loss": 0.7704, "step": 824 }, { "epoch": 0.16959605303731112, "grad_norm": 0.2901283800601959, "learning_rate": 8.983408763244853e-05, "loss": 0.7484, "step": 825 }, { "epoch": 0.16980162401068968, "grad_norm": 0.2594255805015564, "learning_rate": 8.983322962708673e-05, "loss": 0.7726, "step": 826 }, { "epoch": 0.17000719498406824, "grad_norm": 0.2951291799545288, "learning_rate": 8.983236941300128e-05, "loss": 0.743, "step": 827 }, { "epoch": 0.1702127659574468, "grad_norm": 0.23186159133911133, "learning_rate": 8.983150699023453e-05, "loss": 0.6015, "step": 828 }, { "epoch": 0.17041833693082536, "grad_norm": 0.2974048852920532, "learning_rate": 8.983064235882896e-05, "loss": 0.7689, "step": 829 }, { "epoch": 0.17062390790420393, "grad_norm": 0.2741788327693939, "learning_rate": 8.982977551882719e-05, "loss": 0.7825, "step": 830 }, { "epoch": 0.17082947887758249, "grad_norm": 0.2528201639652252, "learning_rate": 8.982890647027191e-05, "loss": 0.7549, "step": 831 }, { "epoch": 0.17103504985096105, "grad_norm": 0.27328386902809143, "learning_rate": 8.982803521320593e-05, "loss": 0.7433, "step": 832 }, { "epoch": 0.1712406208243396, "grad_norm": 0.18332356214523315, "learning_rate": 8.98271617476722e-05, "loss": 0.6056, "step": 833 }, { "epoch": 0.17144619179771817, "grad_norm": 0.2897491753101349, "learning_rate": 8.982628607371373e-05, "loss": 0.7229, "step": 834 }, { "epoch": 0.17165176277109673, "grad_norm": 0.27189579606056213, "learning_rate": 8.982540819137363e-05, "loss": 0.7409, "step": 835 }, { "epoch": 0.1718573337444753, "grad_norm": 0.2686000168323517, "learning_rate": 8.982452810069521e-05, "loss": 0.7622, "step": 836 }, { "epoch": 0.17206290471785385, "grad_norm": 0.2843405306339264, "learning_rate": 8.98236458017218e-05, "loss": 0.7774, "step": 837 }, { "epoch": 0.1722684756912324, "grad_norm": 0.249932661652565, "learning_rate": 8.982276129449687e-05, "loss": 0.758, "step": 838 }, { "epoch": 0.17247404666461097, "grad_norm": 0.1650909036397934, "learning_rate": 8.982187457906399e-05, "loss": 0.6026, "step": 839 }, { "epoch": 0.1726796176379895, "grad_norm": 0.2688060700893402, "learning_rate": 8.982098565546684e-05, "loss": 0.74, "step": 840 }, { "epoch": 0.17288518861136806, "grad_norm": 0.2702515423297882, "learning_rate": 8.982009452374921e-05, "loss": 0.7454, "step": 841 }, { "epoch": 0.17309075958474662, "grad_norm": 0.2621611952781677, "learning_rate": 8.981920118395502e-05, "loss": 0.741, "step": 842 }, { "epoch": 0.17329633055812518, "grad_norm": 0.26395297050476074, "learning_rate": 8.981830563612828e-05, "loss": 0.7634, "step": 843 }, { "epoch": 0.17350190153150374, "grad_norm": 0.1796771138906479, "learning_rate": 8.981740788031309e-05, "loss": 0.5774, "step": 844 }, { "epoch": 0.1737074725048823, "grad_norm": 0.28493568301200867, "learning_rate": 8.98165079165537e-05, "loss": 0.744, "step": 845 }, { "epoch": 0.17391304347826086, "grad_norm": 0.14998356997966766, "learning_rate": 8.981560574489442e-05, "loss": 0.583, "step": 846 }, { "epoch": 0.17411861445163943, "grad_norm": 0.28660815954208374, "learning_rate": 8.981470136537973e-05, "loss": 0.7648, "step": 847 }, { "epoch": 0.17432418542501799, "grad_norm": 0.26909613609313965, "learning_rate": 8.981379477805416e-05, "loss": 0.7621, "step": 848 }, { "epoch": 0.17452975639839655, "grad_norm": 0.2543969750404358, "learning_rate": 8.981288598296238e-05, "loss": 0.7383, "step": 849 }, { "epoch": 0.1747353273717751, "grad_norm": 0.27695950865745544, "learning_rate": 8.981197498014916e-05, "loss": 0.7567, "step": 850 }, { "epoch": 0.17494089834515367, "grad_norm": 0.2635768949985504, "learning_rate": 8.98110617696594e-05, "loss": 0.7627, "step": 851 }, { "epoch": 0.17514646931853223, "grad_norm": 0.31927260756492615, "learning_rate": 8.981014635153806e-05, "loss": 0.7376, "step": 852 }, { "epoch": 0.1753520402919108, "grad_norm": 0.25446754693984985, "learning_rate": 8.980922872583025e-05, "loss": 0.7415, "step": 853 }, { "epoch": 0.17555761126528935, "grad_norm": 0.2923116683959961, "learning_rate": 8.980830889258118e-05, "loss": 0.7375, "step": 854 }, { "epoch": 0.1757631822386679, "grad_norm": 0.17673562467098236, "learning_rate": 8.980738685183617e-05, "loss": 0.5944, "step": 855 }, { "epoch": 0.17596875321204647, "grad_norm": 0.2569844424724579, "learning_rate": 8.980646260364063e-05, "loss": 0.7681, "step": 856 }, { "epoch": 0.17617432418542503, "grad_norm": 0.2668174207210541, "learning_rate": 8.98055361480401e-05, "loss": 0.753, "step": 857 }, { "epoch": 0.1763798951588036, "grad_norm": 0.15782947838306427, "learning_rate": 8.980460748508023e-05, "loss": 0.5973, "step": 858 }, { "epoch": 0.17658546613218212, "grad_norm": 0.27562811970710754, "learning_rate": 8.980367661480678e-05, "loss": 0.7613, "step": 859 }, { "epoch": 0.17679103710556068, "grad_norm": 0.2562348544597626, "learning_rate": 8.980274353726556e-05, "loss": 0.7451, "step": 860 }, { "epoch": 0.17699660807893924, "grad_norm": 0.25293174386024475, "learning_rate": 8.980180825250261e-05, "loss": 0.7285, "step": 861 }, { "epoch": 0.1772021790523178, "grad_norm": 0.2638672888278961, "learning_rate": 8.980087076056394e-05, "loss": 0.7539, "step": 862 }, { "epoch": 0.17740775002569636, "grad_norm": 0.1891278624534607, "learning_rate": 8.979993106149579e-05, "loss": 0.58, "step": 863 }, { "epoch": 0.17761332099907493, "grad_norm": 0.27774450182914734, "learning_rate": 8.979898915534442e-05, "loss": 0.7754, "step": 864 }, { "epoch": 0.17781889197245349, "grad_norm": 0.26496121287345886, "learning_rate": 8.979804504215624e-05, "loss": 0.7595, "step": 865 }, { "epoch": 0.17802446294583205, "grad_norm": 0.26245352625846863, "learning_rate": 8.979709872197778e-05, "loss": 0.7565, "step": 866 }, { "epoch": 0.1782300339192106, "grad_norm": 0.2624642252922058, "learning_rate": 8.979615019485564e-05, "loss": 0.7556, "step": 867 }, { "epoch": 0.17843560489258917, "grad_norm": 0.16684101521968842, "learning_rate": 8.979519946083656e-05, "loss": 0.6104, "step": 868 }, { "epoch": 0.17864117586596773, "grad_norm": 0.26087847352027893, "learning_rate": 8.979424651996738e-05, "loss": 0.7496, "step": 869 }, { "epoch": 0.1788467468393463, "grad_norm": 0.2627946436405182, "learning_rate": 8.979329137229502e-05, "loss": 0.7471, "step": 870 }, { "epoch": 0.17905231781272485, "grad_norm": 0.2528480887413025, "learning_rate": 8.979233401786657e-05, "loss": 0.7645, "step": 871 }, { "epoch": 0.1792578887861034, "grad_norm": 0.26880887150764465, "learning_rate": 8.97913744567292e-05, "loss": 0.7492, "step": 872 }, { "epoch": 0.17946345975948197, "grad_norm": 0.25951650738716125, "learning_rate": 8.979041268893014e-05, "loss": 0.7428, "step": 873 }, { "epoch": 0.17966903073286053, "grad_norm": 0.15437857806682587, "learning_rate": 8.97894487145168e-05, "loss": 0.5812, "step": 874 }, { "epoch": 0.1798746017062391, "grad_norm": 0.28139808773994446, "learning_rate": 8.978848253353668e-05, "loss": 0.7438, "step": 875 }, { "epoch": 0.18008017267961765, "grad_norm": 0.14730799198150635, "learning_rate": 8.978751414603735e-05, "loss": 0.5816, "step": 876 }, { "epoch": 0.18028574365299618, "grad_norm": 0.2632145285606384, "learning_rate": 8.978654355206654e-05, "loss": 0.7467, "step": 877 }, { "epoch": 0.18049131462637474, "grad_norm": 0.2908996045589447, "learning_rate": 8.978557075167206e-05, "loss": 0.74, "step": 878 }, { "epoch": 0.1806968855997533, "grad_norm": 0.24691736698150635, "learning_rate": 8.978459574490184e-05, "loss": 0.7718, "step": 879 }, { "epoch": 0.18090245657313186, "grad_norm": 0.25215819478034973, "learning_rate": 8.978361853180392e-05, "loss": 0.7481, "step": 880 }, { "epoch": 0.18110802754651043, "grad_norm": 0.2547704577445984, "learning_rate": 8.978263911242642e-05, "loss": 0.7508, "step": 881 }, { "epoch": 0.18131359851988899, "grad_norm": 0.184767946600914, "learning_rate": 8.97816574868176e-05, "loss": 0.5983, "step": 882 }, { "epoch": 0.18151916949326755, "grad_norm": 0.1742323487997055, "learning_rate": 8.978067365502583e-05, "loss": 0.6079, "step": 883 }, { "epoch": 0.1817247404666461, "grad_norm": 0.15977798402309418, "learning_rate": 8.977968761709958e-05, "loss": 0.5984, "step": 884 }, { "epoch": 0.18193031144002467, "grad_norm": 0.36065980792045593, "learning_rate": 8.977869937308742e-05, "loss": 0.7727, "step": 885 }, { "epoch": 0.18213588241340323, "grad_norm": 0.28331291675567627, "learning_rate": 8.977770892303802e-05, "loss": 0.753, "step": 886 }, { "epoch": 0.1823414533867818, "grad_norm": 0.2905336022377014, "learning_rate": 8.977671626700021e-05, "loss": 0.7554, "step": 887 }, { "epoch": 0.18254702436016035, "grad_norm": 0.2962552309036255, "learning_rate": 8.977572140502286e-05, "loss": 0.7432, "step": 888 }, { "epoch": 0.1827525953335389, "grad_norm": 0.2991376519203186, "learning_rate": 8.977472433715502e-05, "loss": 0.7562, "step": 889 }, { "epoch": 0.18295816630691747, "grad_norm": 0.22425773739814758, "learning_rate": 8.977372506344578e-05, "loss": 0.5851, "step": 890 }, { "epoch": 0.18316373728029603, "grad_norm": 0.32990381121635437, "learning_rate": 8.977272358394437e-05, "loss": 0.7482, "step": 891 }, { "epoch": 0.1833693082536746, "grad_norm": 0.17806373536586761, "learning_rate": 8.977171989870013e-05, "loss": 0.6074, "step": 892 }, { "epoch": 0.18357487922705315, "grad_norm": 0.318367063999176, "learning_rate": 8.977071400776253e-05, "loss": 0.7526, "step": 893 }, { "epoch": 0.1837804502004317, "grad_norm": 0.17434534430503845, "learning_rate": 8.97697059111811e-05, "loss": 0.5821, "step": 894 }, { "epoch": 0.18398602117381027, "grad_norm": 0.29355406761169434, "learning_rate": 8.976869560900552e-05, "loss": 0.7531, "step": 895 }, { "epoch": 0.1841915921471888, "grad_norm": 0.2709575593471527, "learning_rate": 8.976768310128555e-05, "loss": 0.7768, "step": 896 }, { "epoch": 0.18439716312056736, "grad_norm": 0.252112478017807, "learning_rate": 8.976666838807107e-05, "loss": 0.7173, "step": 897 }, { "epoch": 0.18460273409394592, "grad_norm": 0.2750721573829651, "learning_rate": 8.976565146941209e-05, "loss": 0.7365, "step": 898 }, { "epoch": 0.18480830506732449, "grad_norm": 0.2349645495414734, "learning_rate": 8.97646323453587e-05, "loss": 0.5986, "step": 899 }, { "epoch": 0.18501387604070305, "grad_norm": 0.268477201461792, "learning_rate": 8.976361101596108e-05, "loss": 0.7779, "step": 900 }, { "epoch": 0.1852194470140816, "grad_norm": 0.2666422426700592, "learning_rate": 8.976258748126959e-05, "loss": 0.7536, "step": 901 }, { "epoch": 0.18542501798746017, "grad_norm": 0.2692512571811676, "learning_rate": 8.976156174133462e-05, "loss": 0.7737, "step": 902 }, { "epoch": 0.18563058896083873, "grad_norm": 0.25315481424331665, "learning_rate": 8.976053379620673e-05, "loss": 0.7359, "step": 903 }, { "epoch": 0.1858361599342173, "grad_norm": 0.2516801953315735, "learning_rate": 8.975950364593655e-05, "loss": 0.7381, "step": 904 }, { "epoch": 0.18604173090759585, "grad_norm": 0.2789689600467682, "learning_rate": 8.975847129057482e-05, "loss": 0.7466, "step": 905 }, { "epoch": 0.1862473018809744, "grad_norm": 0.1855190098285675, "learning_rate": 8.975743673017243e-05, "loss": 0.5948, "step": 906 }, { "epoch": 0.18645287285435297, "grad_norm": 0.27560868859291077, "learning_rate": 8.975639996478032e-05, "loss": 0.737, "step": 907 }, { "epoch": 0.18665844382773153, "grad_norm": 0.26743271946907043, "learning_rate": 8.975536099444957e-05, "loss": 0.7585, "step": 908 }, { "epoch": 0.1868640148011101, "grad_norm": 0.2512650191783905, "learning_rate": 8.975431981923137e-05, "loss": 0.7318, "step": 909 }, { "epoch": 0.18706958577448865, "grad_norm": 0.2596076726913452, "learning_rate": 8.9753276439177e-05, "loss": 0.7641, "step": 910 }, { "epoch": 0.1872751567478672, "grad_norm": 0.20333601534366608, "learning_rate": 8.97522308543379e-05, "loss": 0.6016, "step": 911 }, { "epoch": 0.18748072772124577, "grad_norm": 0.2744527757167816, "learning_rate": 8.975118306476554e-05, "loss": 0.7522, "step": 912 }, { "epoch": 0.18768629869462433, "grad_norm": 0.2788070738315582, "learning_rate": 8.975013307051157e-05, "loss": 0.7487, "step": 913 }, { "epoch": 0.18789186966800286, "grad_norm": 0.25242358446121216, "learning_rate": 8.97490808716277e-05, "loss": 0.7345, "step": 914 }, { "epoch": 0.18809744064138142, "grad_norm": 0.2651404142379761, "learning_rate": 8.974802646816578e-05, "loss": 0.7281, "step": 915 }, { "epoch": 0.18830301161475999, "grad_norm": 0.2696022689342499, "learning_rate": 8.974696986017773e-05, "loss": 0.7516, "step": 916 }, { "epoch": 0.18850858258813855, "grad_norm": 0.24874137341976166, "learning_rate": 8.974591104771564e-05, "loss": 0.7413, "step": 917 }, { "epoch": 0.1887141535615171, "grad_norm": 0.2631874084472656, "learning_rate": 8.974485003083164e-05, "loss": 0.7562, "step": 918 }, { "epoch": 0.18891972453489567, "grad_norm": 0.26414451003074646, "learning_rate": 8.974378680957802e-05, "loss": 0.5997, "step": 919 }, { "epoch": 0.18912529550827423, "grad_norm": 0.28355100750923157, "learning_rate": 8.974272138400716e-05, "loss": 0.756, "step": 920 }, { "epoch": 0.1893308664816528, "grad_norm": 0.26617303490638733, "learning_rate": 8.974165375417155e-05, "loss": 0.7841, "step": 921 }, { "epoch": 0.18953643745503135, "grad_norm": 0.2054712474346161, "learning_rate": 8.974058392012375e-05, "loss": 0.575, "step": 922 }, { "epoch": 0.1897420084284099, "grad_norm": 0.27742794156074524, "learning_rate": 8.973951188191652e-05, "loss": 0.7585, "step": 923 }, { "epoch": 0.18994757940178847, "grad_norm": 0.1530211716890335, "learning_rate": 8.973843763960267e-05, "loss": 0.5826, "step": 924 }, { "epoch": 0.19015315037516703, "grad_norm": 0.2896377444267273, "learning_rate": 8.973736119323508e-05, "loss": 0.7741, "step": 925 }, { "epoch": 0.1903587213485456, "grad_norm": 0.16760393977165222, "learning_rate": 8.973628254286681e-05, "loss": 0.5857, "step": 926 }, { "epoch": 0.19056429232192415, "grad_norm": 0.26283350586891174, "learning_rate": 8.9735201688551e-05, "loss": 0.7505, "step": 927 }, { "epoch": 0.1907698632953027, "grad_norm": 0.24747183918952942, "learning_rate": 8.97341186303409e-05, "loss": 0.7227, "step": 928 }, { "epoch": 0.19097543426868127, "grad_norm": 0.27605384588241577, "learning_rate": 8.973303336828985e-05, "loss": 0.7628, "step": 929 }, { "epoch": 0.19118100524205983, "grad_norm": 0.2601989507675171, "learning_rate": 8.973194590245132e-05, "loss": 0.7559, "step": 930 }, { "epoch": 0.1913865762154384, "grad_norm": 0.18584440648555756, "learning_rate": 8.973085623287892e-05, "loss": 0.5884, "step": 931 }, { "epoch": 0.19159214718881695, "grad_norm": 0.17022742331027985, "learning_rate": 8.972976435962629e-05, "loss": 0.5944, "step": 932 }, { "epoch": 0.19179771816219549, "grad_norm": 0.34249716997146606, "learning_rate": 8.972867028274723e-05, "loss": 0.767, "step": 933 }, { "epoch": 0.19200328913557405, "grad_norm": 0.26959505677223206, "learning_rate": 8.972757400229565e-05, "loss": 0.7707, "step": 934 }, { "epoch": 0.1922088601089526, "grad_norm": 0.2650569975376129, "learning_rate": 8.972647551832556e-05, "loss": 0.7181, "step": 935 }, { "epoch": 0.19241443108233117, "grad_norm": 0.20763760805130005, "learning_rate": 8.972537483089107e-05, "loss": 0.5857, "step": 936 }, { "epoch": 0.19262000205570973, "grad_norm": 0.1736496388912201, "learning_rate": 8.97242719400464e-05, "loss": 0.5943, "step": 937 }, { "epoch": 0.1928255730290883, "grad_norm": 0.3711773157119751, "learning_rate": 8.97231668458459e-05, "loss": 0.7748, "step": 938 }, { "epoch": 0.19303114400246685, "grad_norm": 0.2923683226108551, "learning_rate": 8.9722059548344e-05, "loss": 0.756, "step": 939 }, { "epoch": 0.1932367149758454, "grad_norm": 0.2692539393901825, "learning_rate": 8.972095004759527e-05, "loss": 0.7795, "step": 940 }, { "epoch": 0.19344228594922397, "grad_norm": 0.2933458387851715, "learning_rate": 8.971983834365434e-05, "loss": 0.7411, "step": 941 }, { "epoch": 0.19364785692260253, "grad_norm": 0.25706520676612854, "learning_rate": 8.9718724436576e-05, "loss": 0.6123, "step": 942 }, { "epoch": 0.1938534278959811, "grad_norm": 0.2033473253250122, "learning_rate": 8.971760832641513e-05, "loss": 0.5855, "step": 943 }, { "epoch": 0.19405899886935965, "grad_norm": 0.3263876140117645, "learning_rate": 8.97164900132267e-05, "loss": 0.7315, "step": 944 }, { "epoch": 0.1942645698427382, "grad_norm": 0.3143511414527893, "learning_rate": 8.971536949706582e-05, "loss": 0.761, "step": 945 }, { "epoch": 0.19447014081611677, "grad_norm": 0.26773688197135925, "learning_rate": 8.971424677798768e-05, "loss": 0.7457, "step": 946 }, { "epoch": 0.19467571178949533, "grad_norm": 0.29603666067123413, "learning_rate": 8.971312185604759e-05, "loss": 0.7635, "step": 947 }, { "epoch": 0.1948812827628739, "grad_norm": 0.27570641040802, "learning_rate": 8.971199473130097e-05, "loss": 0.7524, "step": 948 }, { "epoch": 0.19508685373625245, "grad_norm": 0.2680298984050751, "learning_rate": 8.971086540380337e-05, "loss": 0.723, "step": 949 }, { "epoch": 0.195292424709631, "grad_norm": 0.2859373390674591, "learning_rate": 8.970973387361039e-05, "loss": 0.7422, "step": 950 }, { "epoch": 0.19549799568300955, "grad_norm": 0.28261512517929077, "learning_rate": 8.97086001407778e-05, "loss": 0.7666, "step": 951 }, { "epoch": 0.1957035666563881, "grad_norm": 0.2570000886917114, "learning_rate": 8.970746420536146e-05, "loss": 0.7278, "step": 952 }, { "epoch": 0.19590913762976667, "grad_norm": 0.25783413648605347, "learning_rate": 8.97063260674173e-05, "loss": 0.7684, "step": 953 }, { "epoch": 0.19611470860314523, "grad_norm": 0.36918801069259644, "learning_rate": 8.970518572700143e-05, "loss": 0.6265, "step": 954 }, { "epoch": 0.1963202795765238, "grad_norm": 0.32823050022125244, "learning_rate": 8.970404318417e-05, "loss": 0.7552, "step": 955 }, { "epoch": 0.19652585054990235, "grad_norm": 0.31253358721733093, "learning_rate": 8.970289843897933e-05, "loss": 0.7588, "step": 956 }, { "epoch": 0.1967314215232809, "grad_norm": 0.25706982612609863, "learning_rate": 8.970175149148577e-05, "loss": 0.7432, "step": 957 }, { "epoch": 0.19693699249665947, "grad_norm": 0.2800324559211731, "learning_rate": 8.970060234174586e-05, "loss": 0.7389, "step": 958 }, { "epoch": 0.19714256347003803, "grad_norm": 0.29499801993370056, "learning_rate": 8.969945098981621e-05, "loss": 0.7663, "step": 959 }, { "epoch": 0.1973481344434166, "grad_norm": 0.2643605172634125, "learning_rate": 8.969829743575351e-05, "loss": 0.7446, "step": 960 }, { "epoch": 0.19755370541679515, "grad_norm": 0.2712821662425995, "learning_rate": 8.969714167961463e-05, "loss": 0.7657, "step": 961 }, { "epoch": 0.1977592763901737, "grad_norm": 0.31495070457458496, "learning_rate": 8.96959837214565e-05, "loss": 0.6014, "step": 962 }, { "epoch": 0.19796484736355227, "grad_norm": 0.2913089394569397, "learning_rate": 8.969482356133615e-05, "loss": 0.7527, "step": 963 }, { "epoch": 0.19817041833693083, "grad_norm": 0.172258198261261, "learning_rate": 8.969366119931075e-05, "loss": 0.6048, "step": 964 }, { "epoch": 0.1983759893103094, "grad_norm": 0.29237228631973267, "learning_rate": 8.969249663543756e-05, "loss": 0.7519, "step": 965 }, { "epoch": 0.19858156028368795, "grad_norm": 0.27603963017463684, "learning_rate": 8.969132986977396e-05, "loss": 0.731, "step": 966 }, { "epoch": 0.1987871312570665, "grad_norm": 0.2580612003803253, "learning_rate": 8.969016090237742e-05, "loss": 0.723, "step": 967 }, { "epoch": 0.19899270223044507, "grad_norm": 0.27025994658470154, "learning_rate": 8.968898973330552e-05, "loss": 0.7453, "step": 968 }, { "epoch": 0.19919827320382363, "grad_norm": 0.27253222465515137, "learning_rate": 8.968781636261599e-05, "loss": 0.7455, "step": 969 }, { "epoch": 0.19940384417720217, "grad_norm": 0.25386548042297363, "learning_rate": 8.96866407903666e-05, "loss": 0.753, "step": 970 }, { "epoch": 0.19960941515058073, "grad_norm": 0.2759700417518616, "learning_rate": 8.96854630166153e-05, "loss": 0.5741, "step": 971 }, { "epoch": 0.1998149861239593, "grad_norm": 0.28211307525634766, "learning_rate": 8.96842830414201e-05, "loss": 0.7339, "step": 972 }, { "epoch": 0.20002055709733785, "grad_norm": 0.27216947078704834, "learning_rate": 8.96831008648391e-05, "loss": 0.7405, "step": 973 }, { "epoch": 0.2002261280707164, "grad_norm": 0.24992568790912628, "learning_rate": 8.96819164869306e-05, "loss": 0.7186, "step": 974 }, { "epoch": 0.20043169904409497, "grad_norm": 0.181453675031662, "learning_rate": 8.96807299077529e-05, "loss": 0.5892, "step": 975 }, { "epoch": 0.20063727001747353, "grad_norm": 0.2908715307712555, "learning_rate": 8.967954112736448e-05, "loss": 0.7462, "step": 976 }, { "epoch": 0.2008428409908521, "grad_norm": 0.2695624828338623, "learning_rate": 8.96783501458239e-05, "loss": 0.7669, "step": 977 }, { "epoch": 0.20104841196423065, "grad_norm": 0.2560322880744934, "learning_rate": 8.967715696318983e-05, "loss": 0.7682, "step": 978 }, { "epoch": 0.2012539829376092, "grad_norm": 0.25563281774520874, "learning_rate": 8.967596157952106e-05, "loss": 0.7246, "step": 979 }, { "epoch": 0.20145955391098777, "grad_norm": 0.24063649773597717, "learning_rate": 8.967476399487649e-05, "loss": 0.7328, "step": 980 }, { "epoch": 0.20166512488436633, "grad_norm": 0.2495402842760086, "learning_rate": 8.967356420931509e-05, "loss": 0.722, "step": 981 }, { "epoch": 0.2018706958577449, "grad_norm": 0.25746145844459534, "learning_rate": 8.9672362222896e-05, "loss": 0.7357, "step": 982 }, { "epoch": 0.20207626683112345, "grad_norm": 0.2592317461967468, "learning_rate": 8.96711580356784e-05, "loss": 0.746, "step": 983 }, { "epoch": 0.202281837804502, "grad_norm": 0.25513893365859985, "learning_rate": 8.966995164772166e-05, "loss": 0.7486, "step": 984 }, { "epoch": 0.20248740877788057, "grad_norm": 0.40953561663627625, "learning_rate": 8.966874305908516e-05, "loss": 0.5957, "step": 985 }, { "epoch": 0.20269297975125913, "grad_norm": 0.255729079246521, "learning_rate": 8.96675322698285e-05, "loss": 0.748, "step": 986 }, { "epoch": 0.2028985507246377, "grad_norm": 0.26324090361595154, "learning_rate": 8.966631928001129e-05, "loss": 0.7387, "step": 987 }, { "epoch": 0.20310412169801623, "grad_norm": 0.24772094190120697, "learning_rate": 8.966510408969329e-05, "loss": 0.7252, "step": 988 }, { "epoch": 0.2033096926713948, "grad_norm": 0.27024003863334656, "learning_rate": 8.96638866989344e-05, "loss": 0.7716, "step": 989 }, { "epoch": 0.20351526364477335, "grad_norm": 0.2622278928756714, "learning_rate": 8.966266710779454e-05, "loss": 0.7678, "step": 990 }, { "epoch": 0.2037208346181519, "grad_norm": 0.252861350774765, "learning_rate": 8.966144531633384e-05, "loss": 0.7769, "step": 991 }, { "epoch": 0.20392640559153047, "grad_norm": 0.3397926390171051, "learning_rate": 8.966022132461248e-05, "loss": 0.742, "step": 992 }, { "epoch": 0.20413197656490903, "grad_norm": 0.2550930380821228, "learning_rate": 8.965899513269076e-05, "loss": 0.7205, "step": 993 }, { "epoch": 0.2043375475382876, "grad_norm": 0.2502458393573761, "learning_rate": 8.965776674062906e-05, "loss": 0.7368, "step": 994 }, { "epoch": 0.20454311851166615, "grad_norm": 0.25033867359161377, "learning_rate": 8.965653614848793e-05, "loss": 0.758, "step": 995 }, { "epoch": 0.2047486894850447, "grad_norm": 0.24429009854793549, "learning_rate": 8.965530335632801e-05, "loss": 0.7466, "step": 996 }, { "epoch": 0.20495426045842327, "grad_norm": 0.24865779280662537, "learning_rate": 8.965406836421e-05, "loss": 0.7741, "step": 997 }, { "epoch": 0.20515983143180183, "grad_norm": 0.2573890686035156, "learning_rate": 8.965283117219475e-05, "loss": 0.7486, "step": 998 }, { "epoch": 0.2053654024051804, "grad_norm": 0.2486078292131424, "learning_rate": 8.965159178034322e-05, "loss": 0.7277, "step": 999 }, { "epoch": 0.20557097337855895, "grad_norm": 0.2717074155807495, "learning_rate": 8.965035018871647e-05, "loss": 0.6086, "step": 1000 }, { "epoch": 0.2057765443519375, "grad_norm": 0.2679359018802643, "learning_rate": 8.964910639737566e-05, "loss": 0.7664, "step": 1001 }, { "epoch": 0.20598211532531607, "grad_norm": 0.16115225851535797, "learning_rate": 8.964786040638205e-05, "loss": 0.5977, "step": 1002 }, { "epoch": 0.20618768629869463, "grad_norm": 0.2807529866695404, "learning_rate": 8.964661221579706e-05, "loss": 0.7348, "step": 1003 }, { "epoch": 0.2063932572720732, "grad_norm": 0.25754019618034363, "learning_rate": 8.964536182568215e-05, "loss": 0.7283, "step": 1004 }, { "epoch": 0.20659882824545175, "grad_norm": 0.2526054382324219, "learning_rate": 8.964410923609894e-05, "loss": 0.7144, "step": 1005 }, { "epoch": 0.20680439921883031, "grad_norm": 0.2148108184337616, "learning_rate": 8.964285444710914e-05, "loss": 0.5871, "step": 1006 }, { "epoch": 0.20700997019220885, "grad_norm": 0.18252213299274445, "learning_rate": 8.964159745877456e-05, "loss": 0.5956, "step": 1007 }, { "epoch": 0.2072155411655874, "grad_norm": 0.3090805113315582, "learning_rate": 8.964033827115713e-05, "loss": 0.7496, "step": 1008 }, { "epoch": 0.20742111213896597, "grad_norm": 0.2703743278980255, "learning_rate": 8.963907688431887e-05, "loss": 0.7492, "step": 1009 }, { "epoch": 0.20762668311234453, "grad_norm": 0.26899415254592896, "learning_rate": 8.963781329832194e-05, "loss": 0.7468, "step": 1010 }, { "epoch": 0.2078322540857231, "grad_norm": 0.2887749969959259, "learning_rate": 8.963654751322858e-05, "loss": 0.765, "step": 1011 }, { "epoch": 0.20803782505910165, "grad_norm": 0.2602989077568054, "learning_rate": 8.963527952910116e-05, "loss": 0.7749, "step": 1012 }, { "epoch": 0.2082433960324802, "grad_norm": 0.22857093811035156, "learning_rate": 8.963400934600215e-05, "loss": 0.5989, "step": 1013 }, { "epoch": 0.20844896700585877, "grad_norm": 0.29049423336982727, "learning_rate": 8.963273696399411e-05, "loss": 0.7406, "step": 1014 }, { "epoch": 0.20865453797923733, "grad_norm": 0.27531930804252625, "learning_rate": 8.963146238313975e-05, "loss": 0.7575, "step": 1015 }, { "epoch": 0.2088601089526159, "grad_norm": 0.2683233320713043, "learning_rate": 8.963018560350181e-05, "loss": 0.7572, "step": 1016 }, { "epoch": 0.20906567992599445, "grad_norm": 0.26720771193504333, "learning_rate": 8.962890662514325e-05, "loss": 0.7537, "step": 1017 }, { "epoch": 0.209271250899373, "grad_norm": 0.26178407669067383, "learning_rate": 8.962762544812705e-05, "loss": 0.7226, "step": 1018 }, { "epoch": 0.20947682187275157, "grad_norm": 0.25852060317993164, "learning_rate": 8.962634207251633e-05, "loss": 0.7401, "step": 1019 }, { "epoch": 0.20968239284613013, "grad_norm": 0.25970616936683655, "learning_rate": 8.962505649837432e-05, "loss": 0.7277, "step": 1020 }, { "epoch": 0.2098879638195087, "grad_norm": 0.2682318687438965, "learning_rate": 8.962376872576436e-05, "loss": 0.7638, "step": 1021 }, { "epoch": 0.21009353479288725, "grad_norm": 0.24570779502391815, "learning_rate": 8.962247875474989e-05, "loss": 0.7256, "step": 1022 }, { "epoch": 0.21029910576626581, "grad_norm": 0.2523082196712494, "learning_rate": 8.962118658539446e-05, "loss": 0.7288, "step": 1023 }, { "epoch": 0.21050467673964438, "grad_norm": 0.24562524259090424, "learning_rate": 8.96198922177617e-05, "loss": 0.7292, "step": 1024 }, { "epoch": 0.2107102477130229, "grad_norm": 0.23262366652488708, "learning_rate": 8.961859565191543e-05, "loss": 0.7401, "step": 1025 }, { "epoch": 0.21091581868640147, "grad_norm": 0.21075837314128876, "learning_rate": 8.961729688791949e-05, "loss": 0.5854, "step": 1026 }, { "epoch": 0.21112138965978003, "grad_norm": 0.2659233808517456, "learning_rate": 8.961599592583785e-05, "loss": 0.742, "step": 1027 }, { "epoch": 0.2113269606331586, "grad_norm": 0.2612632215023041, "learning_rate": 8.961469276573466e-05, "loss": 0.7212, "step": 1028 }, { "epoch": 0.21153253160653715, "grad_norm": 0.24459590017795563, "learning_rate": 8.961338740767407e-05, "loss": 0.7445, "step": 1029 }, { "epoch": 0.2117381025799157, "grad_norm": 0.2455456703901291, "learning_rate": 8.96120798517204e-05, "loss": 0.7469, "step": 1030 }, { "epoch": 0.21194367355329427, "grad_norm": 0.25947311520576477, "learning_rate": 8.961077009793809e-05, "loss": 0.7578, "step": 1031 }, { "epoch": 0.21214924452667283, "grad_norm": 0.26415055990219116, "learning_rate": 8.960945814639162e-05, "loss": 0.7453, "step": 1032 }, { "epoch": 0.2123548155000514, "grad_norm": 0.2478688508272171, "learning_rate": 8.960814399714568e-05, "loss": 0.7246, "step": 1033 }, { "epoch": 0.21256038647342995, "grad_norm": 0.21988952159881592, "learning_rate": 8.960682765026497e-05, "loss": 0.6062, "step": 1034 }, { "epoch": 0.2127659574468085, "grad_norm": 0.16625165939331055, "learning_rate": 8.960550910581436e-05, "loss": 0.5704, "step": 1035 }, { "epoch": 0.21297152842018707, "grad_norm": 0.2860580086708069, "learning_rate": 8.960418836385879e-05, "loss": 0.747, "step": 1036 }, { "epoch": 0.21317709939356563, "grad_norm": 0.2644577920436859, "learning_rate": 8.960286542446335e-05, "loss": 0.7268, "step": 1037 }, { "epoch": 0.2133826703669442, "grad_norm": 0.2598789930343628, "learning_rate": 8.960154028769319e-05, "loss": 0.7645, "step": 1038 }, { "epoch": 0.21358824134032275, "grad_norm": 0.2992006540298462, "learning_rate": 8.960021295361363e-05, "loss": 0.5999, "step": 1039 }, { "epoch": 0.21379381231370131, "grad_norm": 0.27868691086769104, "learning_rate": 8.959888342229001e-05, "loss": 0.7472, "step": 1040 }, { "epoch": 0.21399938328707988, "grad_norm": 0.2707647979259491, "learning_rate": 8.959755169378788e-05, "loss": 0.7158, "step": 1041 }, { "epoch": 0.21420495426045844, "grad_norm": 0.2671177089214325, "learning_rate": 8.959621776817281e-05, "loss": 0.7573, "step": 1042 }, { "epoch": 0.214410525233837, "grad_norm": 0.24762409925460815, "learning_rate": 8.959488164551055e-05, "loss": 0.7353, "step": 1043 }, { "epoch": 0.21461609620721553, "grad_norm": 0.24137498438358307, "learning_rate": 8.959354332586689e-05, "loss": 0.7476, "step": 1044 }, { "epoch": 0.2148216671805941, "grad_norm": 0.2598249614238739, "learning_rate": 8.959220280930779e-05, "loss": 0.7397, "step": 1045 }, { "epoch": 0.21502723815397265, "grad_norm": 0.2500339448451996, "learning_rate": 8.959086009589929e-05, "loss": 0.7525, "step": 1046 }, { "epoch": 0.2152328091273512, "grad_norm": 0.25262802839279175, "learning_rate": 8.958951518570753e-05, "loss": 0.759, "step": 1047 }, { "epoch": 0.21543838010072977, "grad_norm": 0.2515556216239929, "learning_rate": 8.958816807879875e-05, "loss": 0.7321, "step": 1048 }, { "epoch": 0.21564395107410833, "grad_norm": 0.24297581613063812, "learning_rate": 8.958681877523935e-05, "loss": 0.7444, "step": 1049 }, { "epoch": 0.2158495220474869, "grad_norm": 0.2649231255054474, "learning_rate": 8.958546727509578e-05, "loss": 0.7458, "step": 1050 }, { "epoch": 0.21605509302086545, "grad_norm": 0.2701459527015686, "learning_rate": 8.958411357843461e-05, "loss": 0.595, "step": 1051 }, { "epoch": 0.216260663994244, "grad_norm": 0.2653101682662964, "learning_rate": 8.958275768532258e-05, "loss": 0.7544, "step": 1052 }, { "epoch": 0.21646623496762257, "grad_norm": 0.2633649408817291, "learning_rate": 8.958139959582645e-05, "loss": 0.7403, "step": 1053 }, { "epoch": 0.21667180594100113, "grad_norm": 0.25117960572242737, "learning_rate": 8.958003931001312e-05, "loss": 0.7427, "step": 1054 }, { "epoch": 0.2168773769143797, "grad_norm": 0.24553567171096802, "learning_rate": 8.957867682794963e-05, "loss": 0.7264, "step": 1055 }, { "epoch": 0.21708294788775825, "grad_norm": 0.23510022461414337, "learning_rate": 8.95773121497031e-05, "loss": 0.7413, "step": 1056 }, { "epoch": 0.21728851886113681, "grad_norm": 0.2532014846801758, "learning_rate": 8.957594527534075e-05, "loss": 0.735, "step": 1057 }, { "epoch": 0.21749408983451538, "grad_norm": 0.25079968571662903, "learning_rate": 8.957457620492993e-05, "loss": 0.7478, "step": 1058 }, { "epoch": 0.21769966080789394, "grad_norm": 0.23813451826572418, "learning_rate": 8.957320493853805e-05, "loss": 0.7238, "step": 1059 }, { "epoch": 0.2179052317812725, "grad_norm": 0.24865779280662537, "learning_rate": 8.957183147623273e-05, "loss": 0.7369, "step": 1060 }, { "epoch": 0.21811080275465106, "grad_norm": 0.24684272706508636, "learning_rate": 8.957045581808159e-05, "loss": 0.7008, "step": 1061 }, { "epoch": 0.2183163737280296, "grad_norm": 0.24000217020511627, "learning_rate": 8.956907796415241e-05, "loss": 0.5949, "step": 1062 }, { "epoch": 0.21852194470140815, "grad_norm": 0.266008198261261, "learning_rate": 8.956769791451309e-05, "loss": 0.7161, "step": 1063 }, { "epoch": 0.2187275156747867, "grad_norm": 0.14858698844909668, "learning_rate": 8.956631566923159e-05, "loss": 0.5948, "step": 1064 }, { "epoch": 0.21893308664816527, "grad_norm": 0.2638164162635803, "learning_rate": 8.956493122837601e-05, "loss": 0.7347, "step": 1065 }, { "epoch": 0.21913865762154383, "grad_norm": 0.2497703582048416, "learning_rate": 8.956354459201459e-05, "loss": 0.7458, "step": 1066 }, { "epoch": 0.2193442285949224, "grad_norm": 0.22499538958072662, "learning_rate": 8.95621557602156e-05, "loss": 0.5748, "step": 1067 }, { "epoch": 0.21954979956830095, "grad_norm": 0.2625332176685333, "learning_rate": 8.956076473304748e-05, "loss": 0.748, "step": 1068 }, { "epoch": 0.2197553705416795, "grad_norm": 0.2666896879673004, "learning_rate": 8.955937151057876e-05, "loss": 0.7547, "step": 1069 }, { "epoch": 0.21996094151505807, "grad_norm": 0.25993168354034424, "learning_rate": 8.955797609287807e-05, "loss": 0.7593, "step": 1070 }, { "epoch": 0.22016651248843663, "grad_norm": 0.248934805393219, "learning_rate": 8.955657848001417e-05, "loss": 0.753, "step": 1071 }, { "epoch": 0.2203720834618152, "grad_norm": 0.24592526257038116, "learning_rate": 8.95551786720559e-05, "loss": 0.7335, "step": 1072 }, { "epoch": 0.22057765443519375, "grad_norm": 0.2522546052932739, "learning_rate": 8.955377666907224e-05, "loss": 0.7287, "step": 1073 }, { "epoch": 0.22078322540857231, "grad_norm": 0.24097007513046265, "learning_rate": 8.955237247113222e-05, "loss": 0.7178, "step": 1074 }, { "epoch": 0.22098879638195087, "grad_norm": 0.26036760210990906, "learning_rate": 8.955096607830506e-05, "loss": 0.7528, "step": 1075 }, { "epoch": 0.22119436735532944, "grad_norm": 0.2414807826280594, "learning_rate": 8.954955749066005e-05, "loss": 0.7121, "step": 1076 }, { "epoch": 0.221399938328708, "grad_norm": 0.2436942607164383, "learning_rate": 8.954814670826654e-05, "loss": 0.744, "step": 1077 }, { "epoch": 0.22160550930208656, "grad_norm": 0.2534603774547577, "learning_rate": 8.954673373119407e-05, "loss": 0.7627, "step": 1078 }, { "epoch": 0.22181108027546512, "grad_norm": 0.21081526577472687, "learning_rate": 8.954531855951224e-05, "loss": 0.5921, "step": 1079 }, { "epoch": 0.22201665124884365, "grad_norm": 0.26541346311569214, "learning_rate": 8.954390119329077e-05, "loss": 0.7452, "step": 1080 }, { "epoch": 0.2222222222222222, "grad_norm": 0.24794277548789978, "learning_rate": 8.954248163259949e-05, "loss": 0.7196, "step": 1081 }, { "epoch": 0.22242779319560077, "grad_norm": 0.25889837741851807, "learning_rate": 8.954105987750832e-05, "loss": 0.7674, "step": 1082 }, { "epoch": 0.22263336416897933, "grad_norm": 0.24961018562316895, "learning_rate": 8.953963592808733e-05, "loss": 0.7232, "step": 1083 }, { "epoch": 0.2228389351423579, "grad_norm": 0.2539832293987274, "learning_rate": 8.953820978440664e-05, "loss": 0.7559, "step": 1084 }, { "epoch": 0.22304450611573645, "grad_norm": 0.23905551433563232, "learning_rate": 8.953678144653653e-05, "loss": 0.7211, "step": 1085 }, { "epoch": 0.223250077089115, "grad_norm": 0.24047812819480896, "learning_rate": 8.953535091454735e-05, "loss": 0.7367, "step": 1086 }, { "epoch": 0.22345564806249357, "grad_norm": 0.25583919882774353, "learning_rate": 8.953391818850961e-05, "loss": 0.7573, "step": 1087 }, { "epoch": 0.22366121903587213, "grad_norm": 0.20065194368362427, "learning_rate": 8.953248326849386e-05, "loss": 0.5804, "step": 1088 }, { "epoch": 0.2238667900092507, "grad_norm": 0.18610531091690063, "learning_rate": 8.953104615457081e-05, "loss": 0.5888, "step": 1089 }, { "epoch": 0.22407236098262925, "grad_norm": 0.15629194676876068, "learning_rate": 8.952960684681125e-05, "loss": 0.5884, "step": 1090 }, { "epoch": 0.22427793195600781, "grad_norm": 0.3306218683719635, "learning_rate": 8.952816534528609e-05, "loss": 0.7454, "step": 1091 }, { "epoch": 0.22448350292938637, "grad_norm": 0.26848849654197693, "learning_rate": 8.952672165006635e-05, "loss": 0.7336, "step": 1092 }, { "epoch": 0.22468907390276494, "grad_norm": 0.20548087358474731, "learning_rate": 8.952527576122315e-05, "loss": 0.5992, "step": 1093 }, { "epoch": 0.2248946448761435, "grad_norm": 0.18607185781002045, "learning_rate": 8.952382767882773e-05, "loss": 0.5666, "step": 1094 }, { "epoch": 0.22510021584952206, "grad_norm": 0.16436809301376343, "learning_rate": 8.952237740295141e-05, "loss": 0.595, "step": 1095 }, { "epoch": 0.22530578682290062, "grad_norm": 0.46899160742759705, "learning_rate": 8.952092493366567e-05, "loss": 0.7777, "step": 1096 }, { "epoch": 0.22551135779627918, "grad_norm": 0.2985895276069641, "learning_rate": 8.951947027104205e-05, "loss": 0.7495, "step": 1097 }, { "epoch": 0.22571692876965774, "grad_norm": 0.319159597158432, "learning_rate": 8.95180134151522e-05, "loss": 0.7469, "step": 1098 }, { "epoch": 0.22592249974303627, "grad_norm": 0.324747771024704, "learning_rate": 8.95165543660679e-05, "loss": 0.7239, "step": 1099 }, { "epoch": 0.22612807071641483, "grad_norm": 0.5259039402008057, "learning_rate": 8.951509312386105e-05, "loss": 0.6189, "step": 1100 }, { "epoch": 0.2263336416897934, "grad_norm": 0.2236146181821823, "learning_rate": 8.951362968860361e-05, "loss": 0.5996, "step": 1101 }, { "epoch": 0.22653921266317195, "grad_norm": 0.4835422933101654, "learning_rate": 8.95121640603677e-05, "loss": 0.7662, "step": 1102 }, { "epoch": 0.2267447836365505, "grad_norm": 0.371629923582077, "learning_rate": 8.951069623922552e-05, "loss": 0.7393, "step": 1103 }, { "epoch": 0.22695035460992907, "grad_norm": 0.2967519164085388, "learning_rate": 8.950922622524938e-05, "loss": 0.7547, "step": 1104 }, { "epoch": 0.22715592558330763, "grad_norm": 0.3473425507545471, "learning_rate": 8.950775401851169e-05, "loss": 0.7603, "step": 1105 }, { "epoch": 0.2273614965566862, "grad_norm": 0.3515138030052185, "learning_rate": 8.950627961908499e-05, "loss": 0.729, "step": 1106 }, { "epoch": 0.22756706753006475, "grad_norm": 0.3210054039955139, "learning_rate": 8.950480302704193e-05, "loss": 0.7565, "step": 1107 }, { "epoch": 0.22777263850344331, "grad_norm": 0.5195302367210388, "learning_rate": 8.950332424245522e-05, "loss": 0.6351, "step": 1108 }, { "epoch": 0.22797820947682187, "grad_norm": 0.3467387557029724, "learning_rate": 8.950184326539775e-05, "loss": 0.7554, "step": 1109 }, { "epoch": 0.22818378045020044, "grad_norm": 0.33716848492622375, "learning_rate": 8.950036009594245e-05, "loss": 0.7558, "step": 1110 }, { "epoch": 0.228389351423579, "grad_norm": 0.27896901965141296, "learning_rate": 8.94988747341624e-05, "loss": 0.7455, "step": 1111 }, { "epoch": 0.22859492239695756, "grad_norm": 0.27595579624176025, "learning_rate": 8.949738718013078e-05, "loss": 0.7425, "step": 1112 }, { "epoch": 0.22880049337033612, "grad_norm": 0.29621824622154236, "learning_rate": 8.949589743392089e-05, "loss": 0.7416, "step": 1113 }, { "epoch": 0.22900606434371468, "grad_norm": 0.28054726123809814, "learning_rate": 8.94944054956061e-05, "loss": 0.7538, "step": 1114 }, { "epoch": 0.22921163531709324, "grad_norm": 0.25396206974983215, "learning_rate": 8.949291136525991e-05, "loss": 0.7479, "step": 1115 }, { "epoch": 0.2294172062904718, "grad_norm": 0.2706109881401062, "learning_rate": 8.949141504295594e-05, "loss": 0.7475, "step": 1116 }, { "epoch": 0.22962277726385033, "grad_norm": 0.26184260845184326, "learning_rate": 8.94899165287679e-05, "loss": 0.7383, "step": 1117 }, { "epoch": 0.2298283482372289, "grad_norm": 0.2610413134098053, "learning_rate": 8.948841582276963e-05, "loss": 0.7384, "step": 1118 }, { "epoch": 0.23003391921060745, "grad_norm": 0.2537980079650879, "learning_rate": 8.948691292503504e-05, "loss": 0.7444, "step": 1119 }, { "epoch": 0.230239490183986, "grad_norm": 0.2602024972438812, "learning_rate": 8.948540783563817e-05, "loss": 0.7306, "step": 1120 }, { "epoch": 0.23044506115736457, "grad_norm": 0.3567192256450653, "learning_rate": 8.94839005546532e-05, "loss": 0.604, "step": 1121 }, { "epoch": 0.23065063213074313, "grad_norm": 0.49138790369033813, "learning_rate": 8.948239108215437e-05, "loss": 0.7303, "step": 1122 }, { "epoch": 0.2308562031041217, "grad_norm": 0.30943894386291504, "learning_rate": 8.948087941821603e-05, "loss": 0.7535, "step": 1123 }, { "epoch": 0.23106177407750025, "grad_norm": 0.25115516781806946, "learning_rate": 8.947936556291267e-05, "loss": 0.7416, "step": 1124 }, { "epoch": 0.23126734505087881, "grad_norm": 0.24797074496746063, "learning_rate": 8.947784951631886e-05, "loss": 0.7328, "step": 1125 }, { "epoch": 0.23147291602425737, "grad_norm": 0.25195595622062683, "learning_rate": 8.94763312785093e-05, "loss": 0.7375, "step": 1126 }, { "epoch": 0.23167848699763594, "grad_norm": 0.20428021252155304, "learning_rate": 8.947481084955877e-05, "loss": 0.61, "step": 1127 }, { "epoch": 0.2318840579710145, "grad_norm": 0.27424702048301697, "learning_rate": 8.947328822954218e-05, "loss": 0.7512, "step": 1128 }, { "epoch": 0.23208962894439306, "grad_norm": 0.26351961493492126, "learning_rate": 8.947176341853455e-05, "loss": 0.7584, "step": 1129 }, { "epoch": 0.23229519991777162, "grad_norm": 0.25228413939476013, "learning_rate": 8.947023641661101e-05, "loss": 0.7629, "step": 1130 }, { "epoch": 0.23250077089115018, "grad_norm": 0.24488292634487152, "learning_rate": 8.946870722384676e-05, "loss": 0.7501, "step": 1131 }, { "epoch": 0.23270634186452874, "grad_norm": 0.2597258388996124, "learning_rate": 8.946717584031716e-05, "loss": 0.7408, "step": 1132 }, { "epoch": 0.2329119128379073, "grad_norm": 0.25343239307403564, "learning_rate": 8.946564226609764e-05, "loss": 0.7186, "step": 1133 }, { "epoch": 0.23311748381128586, "grad_norm": 0.24788786470890045, "learning_rate": 8.946410650126376e-05, "loss": 0.6838, "step": 1134 }, { "epoch": 0.23332305478466442, "grad_norm": 0.18649965524673462, "learning_rate": 8.946256854589118e-05, "loss": 0.6325, "step": 1135 }, { "epoch": 0.23352862575804295, "grad_norm": 0.26197314262390137, "learning_rate": 8.946102840005568e-05, "loss": 0.7428, "step": 1136 }, { "epoch": 0.2337341967314215, "grad_norm": 0.25486642122268677, "learning_rate": 8.94594860638331e-05, "loss": 0.7505, "step": 1137 }, { "epoch": 0.23393976770480007, "grad_norm": 0.2388404756784439, "learning_rate": 8.945794153729945e-05, "loss": 0.7296, "step": 1138 }, { "epoch": 0.23414533867817863, "grad_norm": 0.2506440579891205, "learning_rate": 8.945639482053081e-05, "loss": 0.7501, "step": 1139 }, { "epoch": 0.2343509096515572, "grad_norm": 0.2521236538887024, "learning_rate": 8.94548459136034e-05, "loss": 0.7488, "step": 1140 }, { "epoch": 0.23455648062493575, "grad_norm": 0.25158312916755676, "learning_rate": 8.94532948165935e-05, "loss": 0.7274, "step": 1141 }, { "epoch": 0.23476205159831431, "grad_norm": 0.23634850978851318, "learning_rate": 8.945174152957755e-05, "loss": 0.7306, "step": 1142 }, { "epoch": 0.23496762257169287, "grad_norm": 0.1795545369386673, "learning_rate": 8.945018605263205e-05, "loss": 0.5908, "step": 1143 }, { "epoch": 0.23517319354507144, "grad_norm": 0.26744595170021057, "learning_rate": 8.944862838583364e-05, "loss": 0.747, "step": 1144 }, { "epoch": 0.23537876451845, "grad_norm": 0.23531249165534973, "learning_rate": 8.944706852925908e-05, "loss": 0.7097, "step": 1145 }, { "epoch": 0.23558433549182856, "grad_norm": 0.2423231452703476, "learning_rate": 8.944550648298519e-05, "loss": 0.7536, "step": 1146 }, { "epoch": 0.23578990646520712, "grad_norm": 0.24406969547271729, "learning_rate": 8.944394224708892e-05, "loss": 0.7459, "step": 1147 }, { "epoch": 0.23599547743858568, "grad_norm": 0.2516055405139923, "learning_rate": 8.944237582164736e-05, "loss": 0.748, "step": 1148 }, { "epoch": 0.23620104841196424, "grad_norm": 0.23662374913692474, "learning_rate": 8.944080720673766e-05, "loss": 0.7272, "step": 1149 }, { "epoch": 0.2364066193853428, "grad_norm": 0.25914058089256287, "learning_rate": 8.943923640243712e-05, "loss": 0.7286, "step": 1150 }, { "epoch": 0.23661219035872136, "grad_norm": 0.16088080406188965, "learning_rate": 8.943766340882309e-05, "loss": 0.5913, "step": 1151 }, { "epoch": 0.23681776133209992, "grad_norm": 0.15930064022541046, "learning_rate": 8.943608822597309e-05, "loss": 0.5927, "step": 1152 }, { "epoch": 0.23702333230547848, "grad_norm": 0.2877768576145172, "learning_rate": 8.943451085396473e-05, "loss": 0.7462, "step": 1153 }, { "epoch": 0.237228903278857, "grad_norm": 0.2618594169616699, "learning_rate": 8.94329312928757e-05, "loss": 0.7506, "step": 1154 }, { "epoch": 0.23743447425223557, "grad_norm": 0.24599005281925201, "learning_rate": 8.943134954278383e-05, "loss": 0.7052, "step": 1155 }, { "epoch": 0.23764004522561413, "grad_norm": 0.2675454318523407, "learning_rate": 8.942976560376703e-05, "loss": 0.7396, "step": 1156 }, { "epoch": 0.2378456161989927, "grad_norm": 0.2358483374118805, "learning_rate": 8.942817947590333e-05, "loss": 0.7131, "step": 1157 }, { "epoch": 0.23805118717237125, "grad_norm": 0.24510863423347473, "learning_rate": 8.94265911592709e-05, "loss": 0.735, "step": 1158 }, { "epoch": 0.23825675814574981, "grad_norm": 0.24396325647830963, "learning_rate": 8.942500065394798e-05, "loss": 0.7286, "step": 1159 }, { "epoch": 0.23846232911912837, "grad_norm": 0.24989542365074158, "learning_rate": 8.942340796001291e-05, "loss": 0.7614, "step": 1160 }, { "epoch": 0.23866790009250693, "grad_norm": 0.22477596998214722, "learning_rate": 8.942181307754416e-05, "loss": 0.7065, "step": 1161 }, { "epoch": 0.2388734710658855, "grad_norm": 0.27181369066238403, "learning_rate": 8.942021600662033e-05, "loss": 0.7612, "step": 1162 }, { "epoch": 0.23907904203926406, "grad_norm": 0.2516171336174011, "learning_rate": 8.941861674732005e-05, "loss": 0.7506, "step": 1163 }, { "epoch": 0.23928461301264262, "grad_norm": 0.23005805909633636, "learning_rate": 8.941701529972216e-05, "loss": 0.7287, "step": 1164 }, { "epoch": 0.23949018398602118, "grad_norm": 0.24049928784370422, "learning_rate": 8.941541166390549e-05, "loss": 0.7337, "step": 1165 }, { "epoch": 0.23969575495939974, "grad_norm": 0.2356685847043991, "learning_rate": 8.941380583994912e-05, "loss": 0.7066, "step": 1166 }, { "epoch": 0.2399013259327783, "grad_norm": 0.21500107645988464, "learning_rate": 8.941219782793211e-05, "loss": 0.5845, "step": 1167 }, { "epoch": 0.24010689690615686, "grad_norm": 0.24245062470436096, "learning_rate": 8.941058762793371e-05, "loss": 0.7339, "step": 1168 }, { "epoch": 0.24031246787953542, "grad_norm": 0.24114523828029633, "learning_rate": 8.940897524003322e-05, "loss": 0.7167, "step": 1169 }, { "epoch": 0.24051803885291398, "grad_norm": 0.2341417521238327, "learning_rate": 8.94073606643101e-05, "loss": 0.7557, "step": 1170 }, { "epoch": 0.24072360982629254, "grad_norm": 0.24253100156784058, "learning_rate": 8.940574390084385e-05, "loss": 0.7522, "step": 1171 }, { "epoch": 0.2409291807996711, "grad_norm": 0.17679694294929504, "learning_rate": 8.940412494971418e-05, "loss": 0.5978, "step": 1172 }, { "epoch": 0.24113475177304963, "grad_norm": 0.2966403067111969, "learning_rate": 8.940250381100081e-05, "loss": 0.7489, "step": 1173 }, { "epoch": 0.2413403227464282, "grad_norm": 0.2602713108062744, "learning_rate": 8.94008804847836e-05, "loss": 0.737, "step": 1174 }, { "epoch": 0.24154589371980675, "grad_norm": 0.24620187282562256, "learning_rate": 8.939925497114255e-05, "loss": 0.7612, "step": 1175 }, { "epoch": 0.24175146469318531, "grad_norm": 1.3907586336135864, "learning_rate": 8.939762727015773e-05, "loss": 0.7424, "step": 1176 }, { "epoch": 0.24195703566656387, "grad_norm": 0.25489339232444763, "learning_rate": 8.939599738190933e-05, "loss": 0.7292, "step": 1177 }, { "epoch": 0.24216260663994243, "grad_norm": 0.24630793929100037, "learning_rate": 8.939436530647765e-05, "loss": 0.7201, "step": 1178 }, { "epoch": 0.242368177613321, "grad_norm": 0.2420111447572708, "learning_rate": 8.939273104394307e-05, "loss": 0.7593, "step": 1179 }, { "epoch": 0.24257374858669956, "grad_norm": 0.24446842074394226, "learning_rate": 8.939109459438614e-05, "loss": 0.7191, "step": 1180 }, { "epoch": 0.24277931956007812, "grad_norm": 0.2652778625488281, "learning_rate": 8.938945595788746e-05, "loss": 0.7417, "step": 1181 }, { "epoch": 0.24298489053345668, "grad_norm": 0.2472565621137619, "learning_rate": 8.938781513452775e-05, "loss": 0.7128, "step": 1182 }, { "epoch": 0.24319046150683524, "grad_norm": 0.25744304060935974, "learning_rate": 8.938617212438786e-05, "loss": 0.7433, "step": 1183 }, { "epoch": 0.2433960324802138, "grad_norm": 0.2481434941291809, "learning_rate": 8.938452692754874e-05, "loss": 0.6043, "step": 1184 }, { "epoch": 0.24360160345359236, "grad_norm": 0.27799829840660095, "learning_rate": 8.938287954409143e-05, "loss": 0.7457, "step": 1185 }, { "epoch": 0.24380717442697092, "grad_norm": 0.1753695160150528, "learning_rate": 8.938122997409709e-05, "loss": 0.5978, "step": 1186 }, { "epoch": 0.24401274540034948, "grad_norm": 0.16633495688438416, "learning_rate": 8.937957821764698e-05, "loss": 0.6047, "step": 1187 }, { "epoch": 0.24421831637372804, "grad_norm": 0.2707998752593994, "learning_rate": 8.937792427482249e-05, "loss": 0.7181, "step": 1188 }, { "epoch": 0.2444238873471066, "grad_norm": 0.1617717742919922, "learning_rate": 8.937626814570507e-05, "loss": 0.6032, "step": 1189 }, { "epoch": 0.24462945832048516, "grad_norm": 0.15513579547405243, "learning_rate": 8.937460983037636e-05, "loss": 0.5983, "step": 1190 }, { "epoch": 0.2448350292938637, "grad_norm": 0.2588478624820709, "learning_rate": 8.9372949328918e-05, "loss": 0.7395, "step": 1191 }, { "epoch": 0.24504060026724225, "grad_norm": 0.2583847939968109, "learning_rate": 8.937128664141184e-05, "loss": 0.7442, "step": 1192 }, { "epoch": 0.2452461712406208, "grad_norm": 0.23951515555381775, "learning_rate": 8.936962176793979e-05, "loss": 0.7309, "step": 1193 }, { "epoch": 0.24545174221399937, "grad_norm": 0.23284120857715607, "learning_rate": 8.936795470858385e-05, "loss": 0.7122, "step": 1194 }, { "epoch": 0.24565731318737793, "grad_norm": 0.2364392876625061, "learning_rate": 8.936628546342617e-05, "loss": 0.7452, "step": 1195 }, { "epoch": 0.2458628841607565, "grad_norm": 0.19968503713607788, "learning_rate": 8.936461403254895e-05, "loss": 0.6054, "step": 1196 }, { "epoch": 0.24606845513413506, "grad_norm": 0.25698399543762207, "learning_rate": 8.936294041603457e-05, "loss": 0.7542, "step": 1197 }, { "epoch": 0.24627402610751362, "grad_norm": 0.2551160454750061, "learning_rate": 8.936126461396545e-05, "loss": 0.729, "step": 1198 }, { "epoch": 0.24647959708089218, "grad_norm": 0.2407594472169876, "learning_rate": 8.935958662642419e-05, "loss": 0.7331, "step": 1199 }, { "epoch": 0.24668516805427074, "grad_norm": 0.19667823612689972, "learning_rate": 8.935790645349342e-05, "loss": 0.5818, "step": 1200 }, { "epoch": 0.2468907390276493, "grad_norm": 0.25005340576171875, "learning_rate": 8.935622409525593e-05, "loss": 0.7355, "step": 1201 }, { "epoch": 0.24709631000102786, "grad_norm": 0.15851576626300812, "learning_rate": 8.93545395517946e-05, "loss": 0.6147, "step": 1202 }, { "epoch": 0.24730188097440642, "grad_norm": 0.2595955431461334, "learning_rate": 8.935285282319242e-05, "loss": 0.7344, "step": 1203 }, { "epoch": 0.24750745194778498, "grad_norm": 0.2531373202800751, "learning_rate": 8.935116390953249e-05, "loss": 0.7206, "step": 1204 }, { "epoch": 0.24771302292116354, "grad_norm": 0.2330513596534729, "learning_rate": 8.9349472810898e-05, "loss": 0.7487, "step": 1205 }, { "epoch": 0.2479185938945421, "grad_norm": 0.23262523114681244, "learning_rate": 8.934777952737228e-05, "loss": 0.7268, "step": 1206 }, { "epoch": 0.24812416486792066, "grad_norm": 0.2461225688457489, "learning_rate": 8.934608405903875e-05, "loss": 0.7272, "step": 1207 }, { "epoch": 0.24832973584129922, "grad_norm": 0.23531411588191986, "learning_rate": 8.934438640598092e-05, "loss": 0.7249, "step": 1208 }, { "epoch": 0.24853530681467778, "grad_norm": 0.19100695848464966, "learning_rate": 8.934268656828244e-05, "loss": 0.6049, "step": 1209 }, { "epoch": 0.2487408777880563, "grad_norm": 0.25513240694999695, "learning_rate": 8.934098454602704e-05, "loss": 0.7281, "step": 1210 }, { "epoch": 0.24894644876143487, "grad_norm": 0.24409835040569305, "learning_rate": 8.93392803392986e-05, "loss": 0.7533, "step": 1211 }, { "epoch": 0.24915201973481343, "grad_norm": 0.24540594220161438, "learning_rate": 8.933757394818104e-05, "loss": 0.7218, "step": 1212 }, { "epoch": 0.249357590708192, "grad_norm": 0.24975821375846863, "learning_rate": 8.933586537275846e-05, "loss": 0.7528, "step": 1213 }, { "epoch": 0.24956316168157056, "grad_norm": 0.17961885035037994, "learning_rate": 8.933415461311502e-05, "loss": 0.5881, "step": 1214 }, { "epoch": 0.24976873265494912, "grad_norm": 0.26504039764404297, "learning_rate": 8.9332441669335e-05, "loss": 0.7393, "step": 1215 }, { "epoch": 0.24997430362832768, "grad_norm": 0.24959856271743774, "learning_rate": 8.933072654150277e-05, "loss": 0.7333, "step": 1216 }, { "epoch": 0.25017987460170626, "grad_norm": 0.25788456201553345, "learning_rate": 8.932900922970287e-05, "loss": 0.7524, "step": 1217 }, { "epoch": 0.2503854455750848, "grad_norm": 0.2299453467130661, "learning_rate": 8.932728973401986e-05, "loss": 0.7532, "step": 1218 }, { "epoch": 0.25059101654846333, "grad_norm": 0.23602120578289032, "learning_rate": 8.932556805453847e-05, "loss": 0.7446, "step": 1219 }, { "epoch": 0.2507965875218419, "grad_norm": 0.24988947808742523, "learning_rate": 8.932384419134352e-05, "loss": 0.7275, "step": 1220 }, { "epoch": 0.25100215849522045, "grad_norm": 0.22750410437583923, "learning_rate": 8.932211814451995e-05, "loss": 0.7284, "step": 1221 }, { "epoch": 0.25120772946859904, "grad_norm": 0.22385790944099426, "learning_rate": 8.932038991415277e-05, "loss": 0.753, "step": 1222 }, { "epoch": 0.25141330044197757, "grad_norm": 0.22648993134498596, "learning_rate": 8.931865950032713e-05, "loss": 0.7171, "step": 1223 }, { "epoch": 0.25161887141535616, "grad_norm": 0.22896623611450195, "learning_rate": 8.931692690312828e-05, "loss": 0.7164, "step": 1224 }, { "epoch": 0.2518244423887347, "grad_norm": 0.2378738969564438, "learning_rate": 8.931519212264157e-05, "loss": 0.6969, "step": 1225 }, { "epoch": 0.2520300133621133, "grad_norm": 0.23377791047096252, "learning_rate": 8.931345515895248e-05, "loss": 0.7102, "step": 1226 }, { "epoch": 0.2522355843354918, "grad_norm": 0.23156873881816864, "learning_rate": 8.93117160121466e-05, "loss": 0.7426, "step": 1227 }, { "epoch": 0.2524411553088704, "grad_norm": 0.2447620928287506, "learning_rate": 8.930997468230956e-05, "loss": 0.7254, "step": 1228 }, { "epoch": 0.25264672628224893, "grad_norm": 0.24257569015026093, "learning_rate": 8.930823116952717e-05, "loss": 0.7551, "step": 1229 }, { "epoch": 0.2528522972556275, "grad_norm": 0.23060962557792664, "learning_rate": 8.930648547388534e-05, "loss": 0.7411, "step": 1230 }, { "epoch": 0.25305786822900606, "grad_norm": 0.23297728598117828, "learning_rate": 8.930473759547005e-05, "loss": 0.731, "step": 1231 }, { "epoch": 0.25326343920238464, "grad_norm": 0.18401369452476501, "learning_rate": 8.930298753436741e-05, "loss": 0.6025, "step": 1232 }, { "epoch": 0.2534690101757632, "grad_norm": 0.25541701912879944, "learning_rate": 8.930123529066365e-05, "loss": 0.7314, "step": 1233 }, { "epoch": 0.25367458114914176, "grad_norm": 0.2430264949798584, "learning_rate": 8.929948086444512e-05, "loss": 0.7115, "step": 1234 }, { "epoch": 0.2538801521225203, "grad_norm": 0.2397884875535965, "learning_rate": 8.929772425579818e-05, "loss": 0.7065, "step": 1235 }, { "epoch": 0.2540857230958989, "grad_norm": 0.2442830502986908, "learning_rate": 8.929596546480944e-05, "loss": 0.7252, "step": 1236 }, { "epoch": 0.2542912940692774, "grad_norm": 0.2494584023952484, "learning_rate": 8.92942044915655e-05, "loss": 0.7292, "step": 1237 }, { "epoch": 0.25449686504265595, "grad_norm": 0.23975245654582977, "learning_rate": 8.929244133615314e-05, "loss": 0.7256, "step": 1238 }, { "epoch": 0.25470243601603454, "grad_norm": 0.24557578563690186, "learning_rate": 8.929067599865924e-05, "loss": 0.7126, "step": 1239 }, { "epoch": 0.25490800698941307, "grad_norm": 0.2466876208782196, "learning_rate": 8.928890847917073e-05, "loss": 0.7397, "step": 1240 }, { "epoch": 0.25511357796279166, "grad_norm": 0.236251562833786, "learning_rate": 8.92871387777747e-05, "loss": 0.7578, "step": 1241 }, { "epoch": 0.2553191489361702, "grad_norm": 0.23271340131759644, "learning_rate": 8.928536689455835e-05, "loss": 0.7126, "step": 1242 }, { "epoch": 0.2555247199095488, "grad_norm": 0.2597436010837555, "learning_rate": 8.928359282960896e-05, "loss": 0.7506, "step": 1243 }, { "epoch": 0.2557302908829273, "grad_norm": 0.2491491734981537, "learning_rate": 8.928181658301394e-05, "loss": 0.7396, "step": 1244 }, { "epoch": 0.2559358618563059, "grad_norm": 0.2302912026643753, "learning_rate": 8.928003815486078e-05, "loss": 0.7074, "step": 1245 }, { "epoch": 0.25614143282968443, "grad_norm": 0.22792287170886993, "learning_rate": 8.927825754523711e-05, "loss": 0.705, "step": 1246 }, { "epoch": 0.256347003803063, "grad_norm": 0.20026971399784088, "learning_rate": 8.927647475423064e-05, "loss": 0.597, "step": 1247 }, { "epoch": 0.25655257477644156, "grad_norm": 0.2631547749042511, "learning_rate": 8.92746897819292e-05, "loss": 0.7552, "step": 1248 }, { "epoch": 0.25675814574982014, "grad_norm": 0.24641458690166473, "learning_rate": 8.927290262842075e-05, "loss": 0.7049, "step": 1249 }, { "epoch": 0.2569637167231987, "grad_norm": 0.24111877381801605, "learning_rate": 8.927111329379331e-05, "loss": 0.7467, "step": 1250 }, { "epoch": 0.25716928769657726, "grad_norm": 0.23682504892349243, "learning_rate": 8.926932177813505e-05, "loss": 0.7529, "step": 1251 }, { "epoch": 0.2573748586699558, "grad_norm": 0.2335578352212906, "learning_rate": 8.92675280815342e-05, "loss": 0.7186, "step": 1252 }, { "epoch": 0.2575804296433344, "grad_norm": 0.25901028513908386, "learning_rate": 8.926573220407918e-05, "loss": 0.7339, "step": 1253 }, { "epoch": 0.2577860006167129, "grad_norm": 0.2469077706336975, "learning_rate": 8.92639341458584e-05, "loss": 0.744, "step": 1254 }, { "epoch": 0.2579915715900915, "grad_norm": 0.17402611672878265, "learning_rate": 8.926213390696048e-05, "loss": 0.5948, "step": 1255 }, { "epoch": 0.25819714256347004, "grad_norm": 0.2638707160949707, "learning_rate": 8.926033148747412e-05, "loss": 0.7456, "step": 1256 }, { "epoch": 0.25840271353684857, "grad_norm": 0.15191468596458435, "learning_rate": 8.925852688748808e-05, "loss": 0.6055, "step": 1257 }, { "epoch": 0.25860828451022716, "grad_norm": 0.25375521183013916, "learning_rate": 8.92567201070913e-05, "loss": 0.7441, "step": 1258 }, { "epoch": 0.2588138554836057, "grad_norm": 0.24398963153362274, "learning_rate": 8.925491114637277e-05, "loss": 0.7551, "step": 1259 }, { "epoch": 0.2590194264569843, "grad_norm": 0.15817205607891083, "learning_rate": 8.925310000542161e-05, "loss": 0.5987, "step": 1260 }, { "epoch": 0.2592249974303628, "grad_norm": 0.15531690418720245, "learning_rate": 8.925128668432705e-05, "loss": 0.5948, "step": 1261 }, { "epoch": 0.2594305684037414, "grad_norm": 0.25315144658088684, "learning_rate": 8.924947118317844e-05, "loss": 0.7374, "step": 1262 }, { "epoch": 0.25963613937711993, "grad_norm": 0.24230562150478363, "learning_rate": 8.924765350206519e-05, "loss": 0.7363, "step": 1263 }, { "epoch": 0.2598417103504985, "grad_norm": 0.22478878498077393, "learning_rate": 8.924583364107687e-05, "loss": 0.7269, "step": 1264 }, { "epoch": 0.26004728132387706, "grad_norm": 0.24388407170772552, "learning_rate": 8.924401160030313e-05, "loss": 0.7349, "step": 1265 }, { "epoch": 0.26025285229725564, "grad_norm": 0.24955937266349792, "learning_rate": 8.924218737983373e-05, "loss": 0.73, "step": 1266 }, { "epoch": 0.2604584232706342, "grad_norm": 0.24500887095928192, "learning_rate": 8.924036097975856e-05, "loss": 0.7247, "step": 1267 }, { "epoch": 0.26066399424401276, "grad_norm": 0.20046253502368927, "learning_rate": 8.923853240016757e-05, "loss": 0.5842, "step": 1268 }, { "epoch": 0.2608695652173913, "grad_norm": 0.25663238763809204, "learning_rate": 8.923670164115087e-05, "loss": 0.7296, "step": 1269 }, { "epoch": 0.2610751361907699, "grad_norm": 0.25753530859947205, "learning_rate": 8.923486870279863e-05, "loss": 0.7367, "step": 1270 }, { "epoch": 0.2612807071641484, "grad_norm": 0.23126912117004395, "learning_rate": 8.923303358520117e-05, "loss": 0.7257, "step": 1271 }, { "epoch": 0.261486278137527, "grad_norm": 0.24083848297595978, "learning_rate": 8.923119628844889e-05, "loss": 0.7335, "step": 1272 }, { "epoch": 0.26169184911090554, "grad_norm": 0.17281857132911682, "learning_rate": 8.92293568126323e-05, "loss": 0.5799, "step": 1273 }, { "epoch": 0.26189742008428407, "grad_norm": 0.1575915813446045, "learning_rate": 8.922751515784204e-05, "loss": 0.5796, "step": 1274 }, { "epoch": 0.26210299105766266, "grad_norm": 0.31265151500701904, "learning_rate": 8.922567132416881e-05, "loss": 0.7426, "step": 1275 }, { "epoch": 0.2623085620310412, "grad_norm": 0.257569819688797, "learning_rate": 8.922382531170347e-05, "loss": 0.7183, "step": 1276 }, { "epoch": 0.2625141330044198, "grad_norm": 0.23766203224658966, "learning_rate": 8.922197712053697e-05, "loss": 0.7331, "step": 1277 }, { "epoch": 0.2627197039777983, "grad_norm": 0.25914183259010315, "learning_rate": 8.922012675076034e-05, "loss": 0.7342, "step": 1278 }, { "epoch": 0.2629252749511769, "grad_norm": 0.26477503776550293, "learning_rate": 8.921827420246473e-05, "loss": 0.7313, "step": 1279 }, { "epoch": 0.26313084592455543, "grad_norm": 0.3233232796192169, "learning_rate": 8.921641947574145e-05, "loss": 0.7345, "step": 1280 }, { "epoch": 0.263336416897934, "grad_norm": 0.20394398272037506, "learning_rate": 8.921456257068186e-05, "loss": 0.5848, "step": 1281 }, { "epoch": 0.26354198787131256, "grad_norm": 0.28951147198677063, "learning_rate": 8.921270348737741e-05, "loss": 0.7507, "step": 1282 }, { "epoch": 0.26374755884469114, "grad_norm": 0.26492390036582947, "learning_rate": 8.921084222591971e-05, "loss": 0.7124, "step": 1283 }, { "epoch": 0.2639531298180697, "grad_norm": 0.2661970555782318, "learning_rate": 8.920897878640046e-05, "loss": 0.7556, "step": 1284 }, { "epoch": 0.26415870079144826, "grad_norm": 0.17668524384498596, "learning_rate": 8.920711316891145e-05, "loss": 0.5874, "step": 1285 }, { "epoch": 0.2643642717648268, "grad_norm": 0.2812560796737671, "learning_rate": 8.92052453735446e-05, "loss": 0.744, "step": 1286 }, { "epoch": 0.2645698427382054, "grad_norm": 0.25487664341926575, "learning_rate": 8.920337540039193e-05, "loss": 0.7414, "step": 1287 }, { "epoch": 0.2647754137115839, "grad_norm": 0.26109081506729126, "learning_rate": 8.920150324954557e-05, "loss": 0.7305, "step": 1288 }, { "epoch": 0.2649809846849625, "grad_norm": 0.2654556334018707, "learning_rate": 8.919962892109772e-05, "loss": 0.7105, "step": 1289 }, { "epoch": 0.26518655565834104, "grad_norm": 0.25440090894699097, "learning_rate": 8.919775241514075e-05, "loss": 0.7567, "step": 1290 }, { "epoch": 0.2653921266317196, "grad_norm": 0.26158374547958374, "learning_rate": 8.91958737317671e-05, "loss": 0.7656, "step": 1291 }, { "epoch": 0.26559769760509816, "grad_norm": 0.25178900361061096, "learning_rate": 8.919399287106933e-05, "loss": 0.7342, "step": 1292 }, { "epoch": 0.2658032685784767, "grad_norm": 0.2315172553062439, "learning_rate": 8.91921098331401e-05, "loss": 0.7527, "step": 1293 }, { "epoch": 0.2660088395518553, "grad_norm": 0.2387528419494629, "learning_rate": 8.919022461807215e-05, "loss": 0.7414, "step": 1294 }, { "epoch": 0.2662144105252338, "grad_norm": 0.24964243173599243, "learning_rate": 8.918833722595838e-05, "loss": 0.7538, "step": 1295 }, { "epoch": 0.2664199814986124, "grad_norm": 0.43933603167533875, "learning_rate": 8.918644765689179e-05, "loss": 0.738, "step": 1296 }, { "epoch": 0.26662555247199093, "grad_norm": 0.23242905735969543, "learning_rate": 8.918455591096543e-05, "loss": 0.7456, "step": 1297 }, { "epoch": 0.2668311234453695, "grad_norm": 0.2441163808107376, "learning_rate": 8.918266198827252e-05, "loss": 0.7278, "step": 1298 }, { "epoch": 0.26703669441874806, "grad_norm": 0.2470923811197281, "learning_rate": 8.918076588890637e-05, "loss": 0.7274, "step": 1299 }, { "epoch": 0.26724226539212664, "grad_norm": 0.23086468875408173, "learning_rate": 8.917886761296039e-05, "loss": 0.7503, "step": 1300 }, { "epoch": 0.2674478363655052, "grad_norm": 0.24466407299041748, "learning_rate": 8.917696716052808e-05, "loss": 0.6128, "step": 1301 }, { "epoch": 0.26765340733888376, "grad_norm": 0.24658440053462982, "learning_rate": 8.91750645317031e-05, "loss": 0.7356, "step": 1302 }, { "epoch": 0.2678589783122623, "grad_norm": 0.24751920998096466, "learning_rate": 8.917315972657915e-05, "loss": 0.7394, "step": 1303 }, { "epoch": 0.2680645492856409, "grad_norm": 0.2545618414878845, "learning_rate": 8.91712527452501e-05, "loss": 0.7412, "step": 1304 }, { "epoch": 0.2682701202590194, "grad_norm": 0.23690831661224365, "learning_rate": 8.916934358780986e-05, "loss": 0.7224, "step": 1305 }, { "epoch": 0.268475691232398, "grad_norm": 0.24612128734588623, "learning_rate": 8.916743225435252e-05, "loss": 0.7441, "step": 1306 }, { "epoch": 0.26868126220577654, "grad_norm": 0.24375763535499573, "learning_rate": 8.916551874497223e-05, "loss": 0.735, "step": 1307 }, { "epoch": 0.2688868331791551, "grad_norm": 0.22968213260173798, "learning_rate": 8.916360305976326e-05, "loss": 0.7453, "step": 1308 }, { "epoch": 0.26909240415253366, "grad_norm": 0.23660656809806824, "learning_rate": 8.916168519881999e-05, "loss": 0.7201, "step": 1309 }, { "epoch": 0.26929797512591225, "grad_norm": 0.2977808713912964, "learning_rate": 8.915976516223691e-05, "loss": 0.6098, "step": 1310 }, { "epoch": 0.2695035460992908, "grad_norm": 0.2509056031703949, "learning_rate": 8.915784295010859e-05, "loss": 0.7539, "step": 1311 }, { "epoch": 0.2697091170726693, "grad_norm": 0.2543947696685791, "learning_rate": 8.915591856252973e-05, "loss": 0.7508, "step": 1312 }, { "epoch": 0.2699146880460479, "grad_norm": 0.24036121368408203, "learning_rate": 8.915399199959516e-05, "loss": 0.7149, "step": 1313 }, { "epoch": 0.27012025901942643, "grad_norm": 0.2512202560901642, "learning_rate": 8.915206326139978e-05, "loss": 0.6823, "step": 1314 }, { "epoch": 0.270325829992805, "grad_norm": 0.24787308275699615, "learning_rate": 8.915013234803863e-05, "loss": 0.7399, "step": 1315 }, { "epoch": 0.27053140096618356, "grad_norm": 0.24503572285175323, "learning_rate": 8.914819925960679e-05, "loss": 0.7347, "step": 1316 }, { "epoch": 0.27073697193956214, "grad_norm": 0.23503392934799194, "learning_rate": 8.914626399619951e-05, "loss": 0.7262, "step": 1317 }, { "epoch": 0.2709425429129407, "grad_norm": 0.23490577936172485, "learning_rate": 8.914432655791217e-05, "loss": 0.7333, "step": 1318 }, { "epoch": 0.27114811388631926, "grad_norm": 0.2428707480430603, "learning_rate": 8.914238694484016e-05, "loss": 0.7087, "step": 1319 }, { "epoch": 0.2713536848596978, "grad_norm": 0.24492257833480835, "learning_rate": 8.91404451570791e-05, "loss": 0.7164, "step": 1320 }, { "epoch": 0.2715592558330764, "grad_norm": 0.2504068911075592, "learning_rate": 8.913850119472461e-05, "loss": 0.7406, "step": 1321 }, { "epoch": 0.2717648268064549, "grad_norm": 0.24984775483608246, "learning_rate": 8.913655505787246e-05, "loss": 0.7324, "step": 1322 }, { "epoch": 0.2719703977798335, "grad_norm": 0.23938335478305817, "learning_rate": 8.913460674661854e-05, "loss": 0.7147, "step": 1323 }, { "epoch": 0.27217596875321204, "grad_norm": 0.24494026601314545, "learning_rate": 8.913265626105883e-05, "loss": 0.7476, "step": 1324 }, { "epoch": 0.2723815397265906, "grad_norm": 0.23465509712696075, "learning_rate": 8.913070360128941e-05, "loss": 0.7203, "step": 1325 }, { "epoch": 0.27258711069996916, "grad_norm": 0.2233608067035675, "learning_rate": 8.912874876740651e-05, "loss": 0.7189, "step": 1326 }, { "epoch": 0.27279268167334775, "grad_norm": 0.23633797466754913, "learning_rate": 8.912679175950641e-05, "loss": 0.7257, "step": 1327 }, { "epoch": 0.2729982526467263, "grad_norm": 0.22821030020713806, "learning_rate": 8.912483257768551e-05, "loss": 0.726, "step": 1328 }, { "epoch": 0.27320382362010487, "grad_norm": 0.2244369387626648, "learning_rate": 8.912287122204038e-05, "loss": 0.709, "step": 1329 }, { "epoch": 0.2734093945934834, "grad_norm": 0.23471800982952118, "learning_rate": 8.912090769266758e-05, "loss": 0.7163, "step": 1330 }, { "epoch": 0.27361496556686193, "grad_norm": 0.23954612016677856, "learning_rate": 8.911894198966391e-05, "loss": 0.7477, "step": 1331 }, { "epoch": 0.2738205365402405, "grad_norm": 0.33054718375205994, "learning_rate": 8.911697411312616e-05, "loss": 0.616, "step": 1332 }, { "epoch": 0.27402610751361905, "grad_norm": 0.26455309987068176, "learning_rate": 8.91150040631513e-05, "loss": 0.7477, "step": 1333 }, { "epoch": 0.27423167848699764, "grad_norm": 0.15511548519134521, "learning_rate": 8.911303183983639e-05, "loss": 0.5804, "step": 1334 }, { "epoch": 0.2744372494603762, "grad_norm": 0.2723095715045929, "learning_rate": 8.911105744327858e-05, "loss": 0.7527, "step": 1335 }, { "epoch": 0.27464282043375476, "grad_norm": 0.2615657150745392, "learning_rate": 8.910908087357515e-05, "loss": 0.7228, "step": 1336 }, { "epoch": 0.2748483914071333, "grad_norm": 0.2343035191297531, "learning_rate": 8.910710213082346e-05, "loss": 0.7435, "step": 1337 }, { "epoch": 0.2750539623805119, "grad_norm": 0.27343472838401794, "learning_rate": 8.910512121512101e-05, "loss": 0.7415, "step": 1338 }, { "epoch": 0.2752595333538904, "grad_norm": 0.2690789997577667, "learning_rate": 8.910313812656539e-05, "loss": 0.7301, "step": 1339 }, { "epoch": 0.275465104327269, "grad_norm": 0.23863738775253296, "learning_rate": 8.910115286525428e-05, "loss": 0.7114, "step": 1340 }, { "epoch": 0.27567067530064754, "grad_norm": 0.26206308603286743, "learning_rate": 8.909916543128551e-05, "loss": 0.5967, "step": 1341 }, { "epoch": 0.2758762462740261, "grad_norm": 0.27798014879226685, "learning_rate": 8.909717582475695e-05, "loss": 0.7337, "step": 1342 }, { "epoch": 0.27608181724740466, "grad_norm": 0.23681025207042694, "learning_rate": 8.909518404576668e-05, "loss": 0.7287, "step": 1343 }, { "epoch": 0.27628738822078325, "grad_norm": 0.2664317786693573, "learning_rate": 8.90931900944128e-05, "loss": 0.7151, "step": 1344 }, { "epoch": 0.2764929591941618, "grad_norm": 0.2881788909435272, "learning_rate": 8.909119397079349e-05, "loss": 0.7289, "step": 1345 }, { "epoch": 0.27669853016754037, "grad_norm": 0.248192697763443, "learning_rate": 8.908919567500718e-05, "loss": 0.7233, "step": 1346 }, { "epoch": 0.2769041011409189, "grad_norm": 0.2383420318365097, "learning_rate": 8.908719520715224e-05, "loss": 0.7178, "step": 1347 }, { "epoch": 0.27710967211429743, "grad_norm": 0.23679983615875244, "learning_rate": 8.908519256732727e-05, "loss": 0.717, "step": 1348 }, { "epoch": 0.277315243087676, "grad_norm": 0.2335837185382843, "learning_rate": 8.908318775563092e-05, "loss": 0.7167, "step": 1349 }, { "epoch": 0.27752081406105455, "grad_norm": 0.247580885887146, "learning_rate": 8.908118077216194e-05, "loss": 0.7467, "step": 1350 }, { "epoch": 0.27772638503443314, "grad_norm": 0.24042358994483948, "learning_rate": 8.907917161701923e-05, "loss": 0.7615, "step": 1351 }, { "epoch": 0.2779319560078117, "grad_norm": 0.24658474326133728, "learning_rate": 8.907716029030174e-05, "loss": 0.7096, "step": 1352 }, { "epoch": 0.27813752698119026, "grad_norm": 0.24043896794319153, "learning_rate": 8.90751467921086e-05, "loss": 0.735, "step": 1353 }, { "epoch": 0.2783430979545688, "grad_norm": 0.2515980303287506, "learning_rate": 8.907313112253898e-05, "loss": 0.7167, "step": 1354 }, { "epoch": 0.2785486689279474, "grad_norm": 0.23116926848888397, "learning_rate": 8.907111328169219e-05, "loss": 0.6996, "step": 1355 }, { "epoch": 0.2787542399013259, "grad_norm": 0.23852792382240295, "learning_rate": 8.906909326966762e-05, "loss": 0.7252, "step": 1356 }, { "epoch": 0.2789598108747045, "grad_norm": 0.2699477970600128, "learning_rate": 8.906707108656481e-05, "loss": 0.5933, "step": 1357 }, { "epoch": 0.27916538184808304, "grad_norm": 0.171479269862175, "learning_rate": 8.906504673248338e-05, "loss": 0.583, "step": 1358 }, { "epoch": 0.2793709528214616, "grad_norm": 0.1635981947183609, "learning_rate": 8.906302020752306e-05, "loss": 0.592, "step": 1359 }, { "epoch": 0.27957652379484016, "grad_norm": 0.3277224898338318, "learning_rate": 8.906099151178368e-05, "loss": 0.7403, "step": 1360 }, { "epoch": 0.27978209476821875, "grad_norm": 0.27374133467674255, "learning_rate": 8.905896064536519e-05, "loss": 0.7438, "step": 1361 }, { "epoch": 0.2799876657415973, "grad_norm": 0.2909560203552246, "learning_rate": 8.905692760836765e-05, "loss": 0.5838, "step": 1362 }, { "epoch": 0.28019323671497587, "grad_norm": 0.34569621086120605, "learning_rate": 8.905489240089119e-05, "loss": 0.7456, "step": 1363 }, { "epoch": 0.2803988076883544, "grad_norm": 0.32318931818008423, "learning_rate": 8.90528550230361e-05, "loss": 0.7337, "step": 1364 }, { "epoch": 0.280604378661733, "grad_norm": 0.24782495200634003, "learning_rate": 8.905081547490276e-05, "loss": 0.7135, "step": 1365 }, { "epoch": 0.2808099496351115, "grad_norm": 0.25972336530685425, "learning_rate": 8.904877375659163e-05, "loss": 0.7076, "step": 1366 }, { "epoch": 0.28101552060849005, "grad_norm": 0.28636348247528076, "learning_rate": 8.904672986820328e-05, "loss": 0.7406, "step": 1367 }, { "epoch": 0.28122109158186864, "grad_norm": 0.21100643277168274, "learning_rate": 8.904468380983843e-05, "loss": 0.6081, "step": 1368 }, { "epoch": 0.2814266625552472, "grad_norm": 0.2907034456729889, "learning_rate": 8.904263558159788e-05, "loss": 0.7046, "step": 1369 }, { "epoch": 0.28163223352862576, "grad_norm": 0.2622237205505371, "learning_rate": 8.904058518358253e-05, "loss": 0.7578, "step": 1370 }, { "epoch": 0.2818378045020043, "grad_norm": 0.2604566812515259, "learning_rate": 8.903853261589339e-05, "loss": 0.75, "step": 1371 }, { "epoch": 0.2820433754753829, "grad_norm": 0.27299514412879944, "learning_rate": 8.90364778786316e-05, "loss": 0.7491, "step": 1372 }, { "epoch": 0.2822489464487614, "grad_norm": 0.25931867957115173, "learning_rate": 8.903442097189835e-05, "loss": 0.6978, "step": 1373 }, { "epoch": 0.28245451742214, "grad_norm": 0.2450464367866516, "learning_rate": 8.9032361895795e-05, "loss": 0.7276, "step": 1374 }, { "epoch": 0.28266008839551854, "grad_norm": 0.20911885797977448, "learning_rate": 8.903030065042298e-05, "loss": 0.5984, "step": 1375 }, { "epoch": 0.2828656593688971, "grad_norm": 0.2976955473423004, "learning_rate": 8.902823723588385e-05, "loss": 0.7332, "step": 1376 }, { "epoch": 0.28307123034227566, "grad_norm": 0.2745811641216278, "learning_rate": 8.902617165227928e-05, "loss": 0.7369, "step": 1377 }, { "epoch": 0.28327680131565425, "grad_norm": 0.23596425354480743, "learning_rate": 8.902410389971099e-05, "loss": 0.7253, "step": 1378 }, { "epoch": 0.2834823722890328, "grad_norm": 0.25958871841430664, "learning_rate": 8.902203397828086e-05, "loss": 0.7494, "step": 1379 }, { "epoch": 0.28368794326241137, "grad_norm": 0.2587198317050934, "learning_rate": 8.901996188809088e-05, "loss": 0.7001, "step": 1380 }, { "epoch": 0.2838935142357899, "grad_norm": 0.2621273696422577, "learning_rate": 8.901788762924313e-05, "loss": 0.728, "step": 1381 }, { "epoch": 0.2840990852091685, "grad_norm": 0.18734264373779297, "learning_rate": 8.901581120183979e-05, "loss": 0.6061, "step": 1382 }, { "epoch": 0.284304656182547, "grad_norm": 0.16175542771816254, "learning_rate": 8.901373260598317e-05, "loss": 0.6072, "step": 1383 }, { "epoch": 0.2845102271559256, "grad_norm": 0.30578863620758057, "learning_rate": 8.901165184177567e-05, "loss": 0.7373, "step": 1384 }, { "epoch": 0.28471579812930414, "grad_norm": 0.26835259795188904, "learning_rate": 8.900956890931979e-05, "loss": 0.7249, "step": 1385 }, { "epoch": 0.2849213691026827, "grad_norm": 0.221610888838768, "learning_rate": 8.900748380871814e-05, "loss": 0.5865, "step": 1386 }, { "epoch": 0.28512694007606126, "grad_norm": 0.27838990092277527, "learning_rate": 8.900539654007346e-05, "loss": 0.7224, "step": 1387 }, { "epoch": 0.2853325110494398, "grad_norm": 0.24998264014720917, "learning_rate": 8.900330710348857e-05, "loss": 0.7112, "step": 1388 }, { "epoch": 0.2855380820228184, "grad_norm": 0.2573053240776062, "learning_rate": 8.900121549906642e-05, "loss": 0.7395, "step": 1389 }, { "epoch": 0.2857436529961969, "grad_norm": 0.24121756851673126, "learning_rate": 8.899912172691004e-05, "loss": 0.747, "step": 1390 }, { "epoch": 0.2859492239695755, "grad_norm": 0.2541133463382721, "learning_rate": 8.899702578712256e-05, "loss": 0.7226, "step": 1391 }, { "epoch": 0.28615479494295404, "grad_norm": 0.24340660870075226, "learning_rate": 8.899492767980729e-05, "loss": 0.698, "step": 1392 }, { "epoch": 0.2863603659163326, "grad_norm": 0.24495667219161987, "learning_rate": 8.899282740506756e-05, "loss": 0.7535, "step": 1393 }, { "epoch": 0.28656593688971116, "grad_norm": 0.2280047982931137, "learning_rate": 8.899072496300684e-05, "loss": 0.7219, "step": 1394 }, { "epoch": 0.28677150786308975, "grad_norm": 0.23093637824058533, "learning_rate": 8.898862035372872e-05, "loss": 0.7135, "step": 1395 }, { "epoch": 0.2869770788364683, "grad_norm": 0.24832944571971893, "learning_rate": 8.898651357733686e-05, "loss": 0.7522, "step": 1396 }, { "epoch": 0.28718264980984687, "grad_norm": 0.23297333717346191, "learning_rate": 8.898440463393508e-05, "loss": 0.7546, "step": 1397 }, { "epoch": 0.2873882207832254, "grad_norm": 0.21482457220554352, "learning_rate": 8.898229352362727e-05, "loss": 0.5847, "step": 1398 }, { "epoch": 0.287593791756604, "grad_norm": 0.16317768394947052, "learning_rate": 8.898018024651742e-05, "loss": 0.5954, "step": 1399 }, { "epoch": 0.2877993627299825, "grad_norm": 0.3127588629722595, "learning_rate": 8.897806480270967e-05, "loss": 0.7413, "step": 1400 }, { "epoch": 0.2880049337033611, "grad_norm": 0.2599581182003021, "learning_rate": 8.897594719230821e-05, "loss": 0.7315, "step": 1401 }, { "epoch": 0.28821050467673964, "grad_norm": 0.23986676335334778, "learning_rate": 8.897382741541737e-05, "loss": 0.7528, "step": 1402 }, { "epoch": 0.28841607565011823, "grad_norm": 0.2908901870250702, "learning_rate": 8.897170547214159e-05, "loss": 0.7404, "step": 1403 }, { "epoch": 0.28862164662349676, "grad_norm": 0.3151310682296753, "learning_rate": 8.896958136258541e-05, "loss": 0.6033, "step": 1404 }, { "epoch": 0.2888272175968753, "grad_norm": 0.2576965391635895, "learning_rate": 8.896745508685346e-05, "loss": 0.7326, "step": 1405 }, { "epoch": 0.2890327885702539, "grad_norm": 0.2626875340938568, "learning_rate": 8.896532664505051e-05, "loss": 0.7408, "step": 1406 }, { "epoch": 0.2892383595436324, "grad_norm": 0.24406549334526062, "learning_rate": 8.896319603728141e-05, "loss": 0.7326, "step": 1407 }, { "epoch": 0.289443930517011, "grad_norm": 0.24385593831539154, "learning_rate": 8.896106326365112e-05, "loss": 0.7503, "step": 1408 }, { "epoch": 0.28964950149038954, "grad_norm": 0.24427802860736847, "learning_rate": 8.89589283242647e-05, "loss": 0.7341, "step": 1409 }, { "epoch": 0.2898550724637681, "grad_norm": 0.24131245911121368, "learning_rate": 8.895679121922738e-05, "loss": 0.7313, "step": 1410 }, { "epoch": 0.29006064343714666, "grad_norm": 0.24251912534236908, "learning_rate": 8.895465194864439e-05, "loss": 0.7138, "step": 1411 }, { "epoch": 0.29026621441052525, "grad_norm": 0.22263044118881226, "learning_rate": 8.895251051262115e-05, "loss": 0.6891, "step": 1412 }, { "epoch": 0.2904717853839038, "grad_norm": 0.23494918644428253, "learning_rate": 8.895036691126314e-05, "loss": 0.732, "step": 1413 }, { "epoch": 0.29067735635728237, "grad_norm": 0.22686836123466492, "learning_rate": 8.894822114467598e-05, "loss": 0.7274, "step": 1414 }, { "epoch": 0.2908829273306609, "grad_norm": 0.24379804730415344, "learning_rate": 8.894607321296538e-05, "loss": 0.74, "step": 1415 }, { "epoch": 0.2910884983040395, "grad_norm": 0.23114730417728424, "learning_rate": 8.894392311623714e-05, "loss": 0.7377, "step": 1416 }, { "epoch": 0.291294069277418, "grad_norm": 0.23655329644680023, "learning_rate": 8.894177085459722e-05, "loss": 0.7493, "step": 1417 }, { "epoch": 0.2914996402507966, "grad_norm": 0.2256159633398056, "learning_rate": 8.893961642815163e-05, "loss": 0.6974, "step": 1418 }, { "epoch": 0.29170521122417514, "grad_norm": 0.20934060215950012, "learning_rate": 8.893745983700652e-05, "loss": 0.5891, "step": 1419 }, { "epoch": 0.29191078219755373, "grad_norm": 0.1600976139307022, "learning_rate": 8.893530108126811e-05, "loss": 0.6138, "step": 1420 }, { "epoch": 0.29211635317093226, "grad_norm": 0.1524209976196289, "learning_rate": 8.893314016104278e-05, "loss": 0.5702, "step": 1421 }, { "epoch": 0.2923219241443108, "grad_norm": 0.31443774700164795, "learning_rate": 8.893097707643697e-05, "loss": 0.6969, "step": 1422 }, { "epoch": 0.2925274951176894, "grad_norm": 0.2652696669101715, "learning_rate": 8.892881182755727e-05, "loss": 0.7177, "step": 1423 }, { "epoch": 0.2927330660910679, "grad_norm": 0.23116344213485718, "learning_rate": 8.892664441451031e-05, "loss": 0.6064, "step": 1424 }, { "epoch": 0.2929386370644465, "grad_norm": 0.2783909738063812, "learning_rate": 8.892447483740291e-05, "loss": 0.7301, "step": 1425 }, { "epoch": 0.29314420803782504, "grad_norm": 0.2517321705818176, "learning_rate": 8.892230309634192e-05, "loss": 0.7447, "step": 1426 }, { "epoch": 0.2933497790112036, "grad_norm": 0.2492847889661789, "learning_rate": 8.892012919143436e-05, "loss": 0.7529, "step": 1427 }, { "epoch": 0.29355534998458216, "grad_norm": 0.23372922837734222, "learning_rate": 8.891795312278732e-05, "loss": 0.7302, "step": 1428 }, { "epoch": 0.29376092095796075, "grad_norm": 0.260433167219162, "learning_rate": 8.8915774890508e-05, "loss": 0.7388, "step": 1429 }, { "epoch": 0.2939664919313393, "grad_norm": 0.24735549092292786, "learning_rate": 8.89135944947037e-05, "loss": 0.6851, "step": 1430 }, { "epoch": 0.29417206290471787, "grad_norm": 0.24530264735221863, "learning_rate": 8.891141193548188e-05, "loss": 0.7483, "step": 1431 }, { "epoch": 0.2943776338780964, "grad_norm": 0.24232807755470276, "learning_rate": 8.890922721295e-05, "loss": 0.7272, "step": 1432 }, { "epoch": 0.294583204851475, "grad_norm": 0.23810634016990662, "learning_rate": 8.890704032721575e-05, "loss": 0.6853, "step": 1433 }, { "epoch": 0.2947887758248535, "grad_norm": 0.23144571483135223, "learning_rate": 8.890485127838684e-05, "loss": 0.7317, "step": 1434 }, { "epoch": 0.2949943467982321, "grad_norm": 0.23867613077163696, "learning_rate": 8.890266006657111e-05, "loss": 0.7378, "step": 1435 }, { "epoch": 0.29519991777161064, "grad_norm": 0.2355402261018753, "learning_rate": 8.890046669187653e-05, "loss": 0.7183, "step": 1436 }, { "epoch": 0.29540548874498923, "grad_norm": 0.2344846874475479, "learning_rate": 8.889827115441114e-05, "loss": 0.6113, "step": 1437 }, { "epoch": 0.29561105971836776, "grad_norm": 0.25104036927223206, "learning_rate": 8.88960734542831e-05, "loss": 0.716, "step": 1438 }, { "epoch": 0.29581663069174635, "grad_norm": 0.2465832382440567, "learning_rate": 8.88938735916007e-05, "loss": 0.7588, "step": 1439 }, { "epoch": 0.2960222016651249, "grad_norm": 0.24674251675605774, "learning_rate": 8.889167156647231e-05, "loss": 0.7221, "step": 1440 }, { "epoch": 0.2962277726385034, "grad_norm": 0.25955334305763245, "learning_rate": 8.888946737900642e-05, "loss": 0.742, "step": 1441 }, { "epoch": 0.296433343611882, "grad_norm": 0.2384418547153473, "learning_rate": 8.888726102931159e-05, "loss": 0.7298, "step": 1442 }, { "epoch": 0.29663891458526054, "grad_norm": 0.2418283224105835, "learning_rate": 8.888505251749655e-05, "loss": 0.7149, "step": 1443 }, { "epoch": 0.2968444855586391, "grad_norm": 0.2591508626937866, "learning_rate": 8.88828418436701e-05, "loss": 0.7281, "step": 1444 }, { "epoch": 0.29705005653201766, "grad_norm": 0.2347528338432312, "learning_rate": 8.888062900794113e-05, "loss": 0.741, "step": 1445 }, { "epoch": 0.29725562750539625, "grad_norm": 0.22745028138160706, "learning_rate": 8.887841401041865e-05, "loss": 0.7347, "step": 1446 }, { "epoch": 0.2974611984787748, "grad_norm": 0.236216738820076, "learning_rate": 8.887619685121183e-05, "loss": 0.7229, "step": 1447 }, { "epoch": 0.29766676945215337, "grad_norm": 0.22409434616565704, "learning_rate": 8.887397753042985e-05, "loss": 0.5921, "step": 1448 }, { "epoch": 0.2978723404255319, "grad_norm": 0.24046771228313446, "learning_rate": 8.887175604818206e-05, "loss": 0.6934, "step": 1449 }, { "epoch": 0.2980779113989105, "grad_norm": 0.25511425733566284, "learning_rate": 8.886953240457791e-05, "loss": 0.7177, "step": 1450 }, { "epoch": 0.298283482372289, "grad_norm": 0.23517939448356628, "learning_rate": 8.886730659972696e-05, "loss": 0.744, "step": 1451 }, { "epoch": 0.2984890533456676, "grad_norm": 0.23165474832057953, "learning_rate": 8.886507863373883e-05, "loss": 0.72, "step": 1452 }, { "epoch": 0.29869462431904614, "grad_norm": 0.22487609088420868, "learning_rate": 8.88628485067233e-05, "loss": 0.6993, "step": 1453 }, { "epoch": 0.29890019529242473, "grad_norm": 0.2359279990196228, "learning_rate": 8.886061621879024e-05, "loss": 0.7148, "step": 1454 }, { "epoch": 0.29910576626580326, "grad_norm": 0.23191282153129578, "learning_rate": 8.885838177004964e-05, "loss": 0.73, "step": 1455 }, { "epoch": 0.29931133723918185, "grad_norm": 0.2255670130252838, "learning_rate": 8.885614516061156e-05, "loss": 0.7192, "step": 1456 }, { "epoch": 0.2995169082125604, "grad_norm": 0.21794365346431732, "learning_rate": 8.885390639058617e-05, "loss": 0.7126, "step": 1457 }, { "epoch": 0.299722479185939, "grad_norm": 0.22137753665447235, "learning_rate": 8.88516654600838e-05, "loss": 0.6953, "step": 1458 }, { "epoch": 0.2999280501593175, "grad_norm": 0.23347578942775726, "learning_rate": 8.884942236921483e-05, "loss": 0.7275, "step": 1459 }, { "epoch": 0.30013362113269604, "grad_norm": 0.22592391073703766, "learning_rate": 8.884717711808976e-05, "loss": 0.7011, "step": 1460 }, { "epoch": 0.3003391921060746, "grad_norm": 0.2333751916885376, "learning_rate": 8.884492970681924e-05, "loss": 0.5993, "step": 1461 }, { "epoch": 0.30054476307945316, "grad_norm": 0.23949290812015533, "learning_rate": 8.884268013551395e-05, "loss": 0.7246, "step": 1462 }, { "epoch": 0.30075033405283175, "grad_norm": 0.22439618408679962, "learning_rate": 8.884042840428473e-05, "loss": 0.7257, "step": 1463 }, { "epoch": 0.3009559050262103, "grad_norm": 0.2332451343536377, "learning_rate": 8.883817451324253e-05, "loss": 0.7344, "step": 1464 }, { "epoch": 0.30116147599958887, "grad_norm": 0.2470991313457489, "learning_rate": 8.883591846249834e-05, "loss": 0.7396, "step": 1465 }, { "epoch": 0.3013670469729674, "grad_norm": 0.23062336444854736, "learning_rate": 8.883366025216336e-05, "loss": 0.715, "step": 1466 }, { "epoch": 0.301572617946346, "grad_norm": 0.2705153226852417, "learning_rate": 8.88313998823488e-05, "loss": 0.7202, "step": 1467 }, { "epoch": 0.3017781889197245, "grad_norm": 0.2432517409324646, "learning_rate": 8.882913735316604e-05, "loss": 0.7346, "step": 1468 }, { "epoch": 0.3019837598931031, "grad_norm": 0.20731572806835175, "learning_rate": 8.882687266472655e-05, "loss": 0.6029, "step": 1469 }, { "epoch": 0.30218933086648164, "grad_norm": 0.24890613555908203, "learning_rate": 8.882460581714188e-05, "loss": 0.743, "step": 1470 }, { "epoch": 0.30239490183986023, "grad_norm": 0.23934966325759888, "learning_rate": 8.882233681052371e-05, "loss": 0.7102, "step": 1471 }, { "epoch": 0.30260047281323876, "grad_norm": 0.2529708743095398, "learning_rate": 8.882006564498385e-05, "loss": 0.7366, "step": 1472 }, { "epoch": 0.30280604378661735, "grad_norm": 0.22400988638401031, "learning_rate": 8.881779232063416e-05, "loss": 0.7295, "step": 1473 }, { "epoch": 0.3030116147599959, "grad_norm": 0.23044519126415253, "learning_rate": 8.881551683758664e-05, "loss": 0.7332, "step": 1474 }, { "epoch": 0.3032171857333745, "grad_norm": 0.2295847088098526, "learning_rate": 8.881323919595341e-05, "loss": 0.6939, "step": 1475 }, { "epoch": 0.303422756706753, "grad_norm": 0.22964751720428467, "learning_rate": 8.881095939584667e-05, "loss": 0.7197, "step": 1476 }, { "epoch": 0.30362832768013154, "grad_norm": 0.2278130203485489, "learning_rate": 8.880867743737873e-05, "loss": 0.7366, "step": 1477 }, { "epoch": 0.3038338986535101, "grad_norm": 0.17138256132602692, "learning_rate": 8.8806393320662e-05, "loss": 0.585, "step": 1478 }, { "epoch": 0.30403946962688866, "grad_norm": 0.23692992329597473, "learning_rate": 8.880410704580904e-05, "loss": 0.7368, "step": 1479 }, { "epoch": 0.30424504060026725, "grad_norm": 0.23937001824378967, "learning_rate": 8.880181861293245e-05, "loss": 0.7465, "step": 1480 }, { "epoch": 0.3044506115736458, "grad_norm": 0.2425798624753952, "learning_rate": 8.879952802214498e-05, "loss": 0.7235, "step": 1481 }, { "epoch": 0.30465618254702437, "grad_norm": 0.22199256718158722, "learning_rate": 8.87972352735595e-05, "loss": 0.7266, "step": 1482 }, { "epoch": 0.3048617535204029, "grad_norm": 0.22652393579483032, "learning_rate": 8.879494036728895e-05, "loss": 0.7196, "step": 1483 }, { "epoch": 0.3050673244937815, "grad_norm": 0.23339220881462097, "learning_rate": 8.879264330344637e-05, "loss": 0.6907, "step": 1484 }, { "epoch": 0.30527289546716, "grad_norm": 0.17793652415275574, "learning_rate": 8.879034408214495e-05, "loss": 0.5843, "step": 1485 }, { "epoch": 0.3054784664405386, "grad_norm": 0.14778107404708862, "learning_rate": 8.878804270349794e-05, "loss": 0.5915, "step": 1486 }, { "epoch": 0.30568403741391714, "grad_norm": 0.25510430335998535, "learning_rate": 8.878573916761875e-05, "loss": 0.7359, "step": 1487 }, { "epoch": 0.30588960838729573, "grad_norm": 0.245680570602417, "learning_rate": 8.878343347462083e-05, "loss": 0.7232, "step": 1488 }, { "epoch": 0.30609517936067426, "grad_norm": 0.22665980458259583, "learning_rate": 8.878112562461781e-05, "loss": 0.72, "step": 1489 }, { "epoch": 0.30630075033405285, "grad_norm": 0.23110273480415344, "learning_rate": 8.877881561772334e-05, "loss": 0.7333, "step": 1490 }, { "epoch": 0.3065063213074314, "grad_norm": 0.2374107986688614, "learning_rate": 8.877650345405124e-05, "loss": 0.7047, "step": 1491 }, { "epoch": 0.30671189228081, "grad_norm": 0.23222175240516663, "learning_rate": 8.877418913371543e-05, "loss": 0.7247, "step": 1492 }, { "epoch": 0.3069174632541885, "grad_norm": 0.2248169332742691, "learning_rate": 8.877187265682993e-05, "loss": 0.731, "step": 1493 }, { "epoch": 0.3071230342275671, "grad_norm": 0.22877496480941772, "learning_rate": 8.876955402350885e-05, "loss": 0.7317, "step": 1494 }, { "epoch": 0.3073286052009456, "grad_norm": 0.23524411022663116, "learning_rate": 8.876723323386642e-05, "loss": 0.7243, "step": 1495 }, { "epoch": 0.30753417617432416, "grad_norm": 0.23392078280448914, "learning_rate": 8.876491028801698e-05, "loss": 0.7291, "step": 1496 }, { "epoch": 0.30773974714770275, "grad_norm": 0.23218654096126556, "learning_rate": 8.876258518607496e-05, "loss": 0.7185, "step": 1497 }, { "epoch": 0.3079453181210813, "grad_norm": 0.22467701137065887, "learning_rate": 8.876025792815493e-05, "loss": 0.6027, "step": 1498 }, { "epoch": 0.30815088909445987, "grad_norm": 0.16272898018360138, "learning_rate": 8.875792851437153e-05, "loss": 0.5879, "step": 1499 }, { "epoch": 0.3083564600678384, "grad_norm": 0.3116845190525055, "learning_rate": 8.875559694483949e-05, "loss": 0.7104, "step": 1500 }, { "epoch": 0.308562031041217, "grad_norm": 0.27991852164268494, "learning_rate": 8.875326321967371e-05, "loss": 0.7103, "step": 1501 }, { "epoch": 0.3087676020145955, "grad_norm": 0.2318386435508728, "learning_rate": 8.875092733898917e-05, "loss": 0.7377, "step": 1502 }, { "epoch": 0.3089731729879741, "grad_norm": 0.2598876655101776, "learning_rate": 8.874858930290091e-05, "loss": 0.6015, "step": 1503 }, { "epoch": 0.30917874396135264, "grad_norm": 0.29758408665657043, "learning_rate": 8.874624911152415e-05, "loss": 0.7181, "step": 1504 }, { "epoch": 0.30938431493473123, "grad_norm": 0.27736955881118774, "learning_rate": 8.874390676497416e-05, "loss": 0.7206, "step": 1505 }, { "epoch": 0.30958988590810976, "grad_norm": 0.2458835244178772, "learning_rate": 8.874156226336634e-05, "loss": 0.7499, "step": 1506 }, { "epoch": 0.30979545688148835, "grad_norm": 0.22762452065944672, "learning_rate": 8.873921560681619e-05, "loss": 0.5821, "step": 1507 }, { "epoch": 0.3100010278548669, "grad_norm": 0.27454984188079834, "learning_rate": 8.873686679543934e-05, "loss": 0.7146, "step": 1508 }, { "epoch": 0.3102065988282455, "grad_norm": 0.26772287487983704, "learning_rate": 8.873451582935148e-05, "loss": 0.7536, "step": 1509 }, { "epoch": 0.310412169801624, "grad_norm": 0.23362015187740326, "learning_rate": 8.873216270866843e-05, "loss": 0.6984, "step": 1510 }, { "epoch": 0.3106177407750026, "grad_norm": 0.23610959947109222, "learning_rate": 8.872980743350613e-05, "loss": 0.7171, "step": 1511 }, { "epoch": 0.3108233117483811, "grad_norm": 0.25894349813461304, "learning_rate": 8.872745000398062e-05, "loss": 0.7187, "step": 1512 }, { "epoch": 0.3110288827217597, "grad_norm": 0.26054081320762634, "learning_rate": 8.872509042020803e-05, "loss": 0.7203, "step": 1513 }, { "epoch": 0.31123445369513825, "grad_norm": 0.2335205376148224, "learning_rate": 8.872272868230461e-05, "loss": 0.7009, "step": 1514 }, { "epoch": 0.3114400246685168, "grad_norm": 0.24587051570415497, "learning_rate": 8.872036479038669e-05, "loss": 0.7399, "step": 1515 }, { "epoch": 0.31164559564189537, "grad_norm": 0.24924126267433167, "learning_rate": 8.871799874457075e-05, "loss": 0.7493, "step": 1516 }, { "epoch": 0.3118511666152739, "grad_norm": 0.24950510263442993, "learning_rate": 8.871563054497335e-05, "loss": 0.7178, "step": 1517 }, { "epoch": 0.3120567375886525, "grad_norm": 0.25369346141815186, "learning_rate": 8.871326019171117e-05, "loss": 0.6963, "step": 1518 }, { "epoch": 0.312262308562031, "grad_norm": 0.2488810122013092, "learning_rate": 8.871088768490098e-05, "loss": 0.7619, "step": 1519 }, { "epoch": 0.3124678795354096, "grad_norm": 0.24383045732975006, "learning_rate": 8.870851302465962e-05, "loss": 0.711, "step": 1520 }, { "epoch": 0.31267345050878814, "grad_norm": 0.2425009161233902, "learning_rate": 8.870613621110415e-05, "loss": 0.7177, "step": 1521 }, { "epoch": 0.31287902148216673, "grad_norm": 0.240753635764122, "learning_rate": 8.870375724435162e-05, "loss": 0.7244, "step": 1522 }, { "epoch": 0.31308459245554526, "grad_norm": 0.23214225471019745, "learning_rate": 8.870137612451926e-05, "loss": 0.7576, "step": 1523 }, { "epoch": 0.31329016342892385, "grad_norm": 0.2381378412246704, "learning_rate": 8.869899285172435e-05, "loss": 0.7379, "step": 1524 }, { "epoch": 0.3134957344023024, "grad_norm": 0.24119152128696442, "learning_rate": 8.869660742608429e-05, "loss": 0.5884, "step": 1525 }, { "epoch": 0.31370130537568097, "grad_norm": 0.1588635891675949, "learning_rate": 8.869421984771664e-05, "loss": 0.5977, "step": 1526 }, { "epoch": 0.3139068763490595, "grad_norm": 0.30175936222076416, "learning_rate": 8.869183011673899e-05, "loss": 0.7523, "step": 1527 }, { "epoch": 0.3141124473224381, "grad_norm": 0.2720763385295868, "learning_rate": 8.868943823326911e-05, "loss": 0.7369, "step": 1528 }, { "epoch": 0.3143180182958166, "grad_norm": 0.25000452995300293, "learning_rate": 8.868704419742477e-05, "loss": 0.7248, "step": 1529 }, { "epoch": 0.3145235892691952, "grad_norm": 0.24794606864452362, "learning_rate": 8.8684648009324e-05, "loss": 0.716, "step": 1530 }, { "epoch": 0.31472916024257375, "grad_norm": 0.2837069630622864, "learning_rate": 8.868224966908477e-05, "loss": 0.7167, "step": 1531 }, { "epoch": 0.31493473121595233, "grad_norm": 0.2553151845932007, "learning_rate": 8.867984917682529e-05, "loss": 0.728, "step": 1532 }, { "epoch": 0.31514030218933087, "grad_norm": 0.2584458589553833, "learning_rate": 8.86774465326638e-05, "loss": 0.7546, "step": 1533 }, { "epoch": 0.3153458731627094, "grad_norm": 0.3400932252407074, "learning_rate": 8.867504173671866e-05, "loss": 0.6503, "step": 1534 }, { "epoch": 0.315551444136088, "grad_norm": 0.22265098989009857, "learning_rate": 8.867263478910834e-05, "loss": 0.6126, "step": 1535 }, { "epoch": 0.3157570151094665, "grad_norm": 0.3153107464313507, "learning_rate": 8.867022568995144e-05, "loss": 0.7263, "step": 1536 }, { "epoch": 0.3159625860828451, "grad_norm": 0.2766020596027374, "learning_rate": 8.866781443936664e-05, "loss": 0.7219, "step": 1537 }, { "epoch": 0.31616815705622364, "grad_norm": 0.24225422739982605, "learning_rate": 8.866540103747273e-05, "loss": 0.7171, "step": 1538 }, { "epoch": 0.31637372802960223, "grad_norm": 0.25176170468330383, "learning_rate": 8.866298548438859e-05, "loss": 0.7344, "step": 1539 }, { "epoch": 0.31657929900298076, "grad_norm": 0.25651928782463074, "learning_rate": 8.866056778023322e-05, "loss": 0.7413, "step": 1540 }, { "epoch": 0.31678486997635935, "grad_norm": 0.2334342896938324, "learning_rate": 8.865814792512578e-05, "loss": 0.7253, "step": 1541 }, { "epoch": 0.3169904409497379, "grad_norm": 0.2274434119462967, "learning_rate": 8.865572591918542e-05, "loss": 0.7159, "step": 1542 }, { "epoch": 0.31719601192311647, "grad_norm": 0.2403416633605957, "learning_rate": 8.86533017625315e-05, "loss": 0.7181, "step": 1543 }, { "epoch": 0.317401582896495, "grad_norm": 0.4360656142234802, "learning_rate": 8.865087545528343e-05, "loss": 0.621, "step": 1544 }, { "epoch": 0.3176071538698736, "grad_norm": 0.267894983291626, "learning_rate": 8.864844699756077e-05, "loss": 0.7211, "step": 1545 }, { "epoch": 0.3178127248432521, "grad_norm": 0.28000763058662415, "learning_rate": 8.864601638948313e-05, "loss": 0.7417, "step": 1546 }, { "epoch": 0.3180182958166307, "grad_norm": 0.25448542833328247, "learning_rate": 8.864358363117026e-05, "loss": 0.7456, "step": 1547 }, { "epoch": 0.31822386679000925, "grad_norm": 0.22277960181236267, "learning_rate": 8.864114872274201e-05, "loss": 0.7509, "step": 1548 }, { "epoch": 0.31842943776338783, "grad_norm": 0.25154295563697815, "learning_rate": 8.863871166431835e-05, "loss": 0.7561, "step": 1549 }, { "epoch": 0.31863500873676637, "grad_norm": 0.24481630325317383, "learning_rate": 8.863627245601933e-05, "loss": 0.7205, "step": 1550 }, { "epoch": 0.3188405797101449, "grad_norm": 0.2636171877384186, "learning_rate": 8.863383109796514e-05, "loss": 0.6225, "step": 1551 }, { "epoch": 0.3190461506835235, "grad_norm": 0.24895146489143372, "learning_rate": 8.863138759027601e-05, "loss": 0.713, "step": 1552 }, { "epoch": 0.319251721656902, "grad_norm": 0.23717238008975983, "learning_rate": 8.862894193307234e-05, "loss": 0.7009, "step": 1553 }, { "epoch": 0.3194572926302806, "grad_norm": 0.17063067853450775, "learning_rate": 8.862649412647463e-05, "loss": 0.609, "step": 1554 }, { "epoch": 0.31966286360365914, "grad_norm": 0.24430248141288757, "learning_rate": 8.862404417060348e-05, "loss": 0.7329, "step": 1555 }, { "epoch": 0.31986843457703773, "grad_norm": 0.22696368396282196, "learning_rate": 8.862159206557955e-05, "loss": 0.7189, "step": 1556 }, { "epoch": 0.32007400555041626, "grad_norm": 0.23269693553447723, "learning_rate": 8.861913781152368e-05, "loss": 0.72, "step": 1557 }, { "epoch": 0.32027957652379485, "grad_norm": 0.23606634140014648, "learning_rate": 8.861668140855677e-05, "loss": 0.7273, "step": 1558 }, { "epoch": 0.3204851474971734, "grad_norm": 0.2232600301504135, "learning_rate": 8.861422285679982e-05, "loss": 0.7271, "step": 1559 }, { "epoch": 0.32069071847055197, "grad_norm": 0.22926129400730133, "learning_rate": 8.861176215637396e-05, "loss": 0.7046, "step": 1560 }, { "epoch": 0.3208962894439305, "grad_norm": 0.21815744042396545, "learning_rate": 8.860929930740043e-05, "loss": 0.7145, "step": 1561 }, { "epoch": 0.3211018604173091, "grad_norm": 0.2220899611711502, "learning_rate": 8.860683431000055e-05, "loss": 0.7517, "step": 1562 }, { "epoch": 0.3213074313906876, "grad_norm": 0.23148676753044128, "learning_rate": 8.860436716429576e-05, "loss": 0.7425, "step": 1563 }, { "epoch": 0.3215130023640662, "grad_norm": 0.2475571632385254, "learning_rate": 8.86018978704076e-05, "loss": 0.7373, "step": 1564 }, { "epoch": 0.32171857333744475, "grad_norm": 0.22201502323150635, "learning_rate": 8.859942642845773e-05, "loss": 0.739, "step": 1565 }, { "epoch": 0.32192414431082333, "grad_norm": 0.23228532075881958, "learning_rate": 8.859695283856791e-05, "loss": 0.7181, "step": 1566 }, { "epoch": 0.32212971528420187, "grad_norm": 0.22633086144924164, "learning_rate": 8.859447710085998e-05, "loss": 0.7264, "step": 1567 }, { "epoch": 0.32233528625758046, "grad_norm": 0.2289307564496994, "learning_rate": 8.859199921545595e-05, "loss": 0.6861, "step": 1568 }, { "epoch": 0.322540857230959, "grad_norm": 0.2249763160943985, "learning_rate": 8.858951918247784e-05, "loss": 0.7251, "step": 1569 }, { "epoch": 0.3227464282043375, "grad_norm": 0.21789641678333282, "learning_rate": 8.858703700204787e-05, "loss": 0.5872, "step": 1570 }, { "epoch": 0.3229519991777161, "grad_norm": 0.32843679189682007, "learning_rate": 8.85845526742883e-05, "loss": 0.7297, "step": 1571 }, { "epoch": 0.32315757015109464, "grad_norm": 0.2552517354488373, "learning_rate": 8.858206619932154e-05, "loss": 0.7297, "step": 1572 }, { "epoch": 0.32336314112447323, "grad_norm": 0.1595383882522583, "learning_rate": 8.857957757727008e-05, "loss": 0.5928, "step": 1573 }, { "epoch": 0.32356871209785176, "grad_norm": 0.23427622020244598, "learning_rate": 8.857708680825654e-05, "loss": 0.7416, "step": 1574 }, { "epoch": 0.32377428307123035, "grad_norm": 0.2303827553987503, "learning_rate": 8.85745938924036e-05, "loss": 0.7506, "step": 1575 }, { "epoch": 0.3239798540446089, "grad_norm": 0.2222229540348053, "learning_rate": 8.857209882983408e-05, "loss": 0.7212, "step": 1576 }, { "epoch": 0.32418542501798747, "grad_norm": 0.21901166439056396, "learning_rate": 8.856960162067091e-05, "loss": 0.7307, "step": 1577 }, { "epoch": 0.324390995991366, "grad_norm": 1.646615743637085, "learning_rate": 8.85671022650371e-05, "loss": 0.7284, "step": 1578 }, { "epoch": 0.3245965669647446, "grad_norm": 0.22739437222480774, "learning_rate": 8.856460076305581e-05, "loss": 0.7468, "step": 1579 }, { "epoch": 0.3248021379381231, "grad_norm": 0.22001872956752777, "learning_rate": 8.856209711485026e-05, "loss": 0.6801, "step": 1580 }, { "epoch": 0.3250077089115017, "grad_norm": 0.2490796595811844, "learning_rate": 8.855959132054379e-05, "loss": 0.7225, "step": 1581 }, { "epoch": 0.32521327988488025, "grad_norm": 0.23509925603866577, "learning_rate": 8.855708338025985e-05, "loss": 0.7126, "step": 1582 }, { "epoch": 0.32541885085825883, "grad_norm": 0.26781192421913147, "learning_rate": 8.8554573294122e-05, "loss": 0.7345, "step": 1583 }, { "epoch": 0.32562442183163737, "grad_norm": 0.23214460909366608, "learning_rate": 8.85520610622539e-05, "loss": 0.7287, "step": 1584 }, { "epoch": 0.32582999280501596, "grad_norm": 0.24188122153282166, "learning_rate": 8.854954668477931e-05, "loss": 0.7169, "step": 1585 }, { "epoch": 0.3260355637783945, "grad_norm": 0.22148127853870392, "learning_rate": 8.85470301618221e-05, "loss": 0.7128, "step": 1586 }, { "epoch": 0.3262411347517731, "grad_norm": 0.6666994690895081, "learning_rate": 8.854451149350625e-05, "loss": 0.6192, "step": 1587 }, { "epoch": 0.3264467057251516, "grad_norm": 0.24034947156906128, "learning_rate": 8.854199067995585e-05, "loss": 0.724, "step": 1588 }, { "epoch": 0.32665227669853014, "grad_norm": 0.23072193562984467, "learning_rate": 8.85394677212951e-05, "loss": 0.727, "step": 1589 }, { "epoch": 0.32685784767190873, "grad_norm": 0.23429062962532043, "learning_rate": 8.853694261764826e-05, "loss": 0.7165, "step": 1590 }, { "epoch": 0.32706341864528726, "grad_norm": 0.23310211300849915, "learning_rate": 8.853441536913976e-05, "loss": 0.7284, "step": 1591 }, { "epoch": 0.32726898961866585, "grad_norm": 0.2373618483543396, "learning_rate": 8.853188597589409e-05, "loss": 0.7347, "step": 1592 }, { "epoch": 0.3274745605920444, "grad_norm": 0.22494561970233917, "learning_rate": 8.852935443803587e-05, "loss": 0.73, "step": 1593 }, { "epoch": 0.32768013156542297, "grad_norm": 0.22148995101451874, "learning_rate": 8.85268207556898e-05, "loss": 0.7105, "step": 1594 }, { "epoch": 0.3278857025388015, "grad_norm": 0.23605044186115265, "learning_rate": 8.852428492898071e-05, "loss": 0.7147, "step": 1595 }, { "epoch": 0.3280912735121801, "grad_norm": 0.23167657852172852, "learning_rate": 8.852174695803355e-05, "loss": 0.7129, "step": 1596 }, { "epoch": 0.3282968444855586, "grad_norm": 0.2309151291847229, "learning_rate": 8.851920684297333e-05, "loss": 0.7087, "step": 1597 }, { "epoch": 0.3285024154589372, "grad_norm": 0.22455458343029022, "learning_rate": 8.85166645839252e-05, "loss": 0.7316, "step": 1598 }, { "epoch": 0.32870798643231575, "grad_norm": 0.2276565134525299, "learning_rate": 8.85141201810144e-05, "loss": 0.719, "step": 1599 }, { "epoch": 0.32891355740569433, "grad_norm": 0.23086774349212646, "learning_rate": 8.851157363436628e-05, "loss": 0.7065, "step": 1600 }, { "epoch": 0.32911912837907287, "grad_norm": 0.23493504524230957, "learning_rate": 8.850902494410631e-05, "loss": 0.7245, "step": 1601 }, { "epoch": 0.32932469935245146, "grad_norm": 0.24357451498508453, "learning_rate": 8.850647411036003e-05, "loss": 0.7151, "step": 1602 }, { "epoch": 0.32953027032583, "grad_norm": 0.24102084338665009, "learning_rate": 8.850392113325312e-05, "loss": 0.7389, "step": 1603 }, { "epoch": 0.3297358412992086, "grad_norm": 0.2216963768005371, "learning_rate": 8.850136601291137e-05, "loss": 0.703, "step": 1604 }, { "epoch": 0.3299414122725871, "grad_norm": 0.22978007793426514, "learning_rate": 8.849880874946062e-05, "loss": 0.7402, "step": 1605 }, { "epoch": 0.3301469832459657, "grad_norm": 0.23540645837783813, "learning_rate": 8.849624934302689e-05, "loss": 0.6975, "step": 1606 }, { "epoch": 0.33035255421934423, "grad_norm": 1.370906949043274, "learning_rate": 8.849368779373625e-05, "loss": 0.8282, "step": 1607 }, { "epoch": 0.33055812519272276, "grad_norm": 0.2301483154296875, "learning_rate": 8.84911241017149e-05, "loss": 0.7083, "step": 1608 }, { "epoch": 0.33076369616610135, "grad_norm": 0.24278217554092407, "learning_rate": 8.848855826708914e-05, "loss": 0.724, "step": 1609 }, { "epoch": 0.3309692671394799, "grad_norm": 0.25511378049850464, "learning_rate": 8.848599028998538e-05, "loss": 0.7214, "step": 1610 }, { "epoch": 0.33117483811285847, "grad_norm": 0.2384072244167328, "learning_rate": 8.848342017053015e-05, "loss": 0.7211, "step": 1611 }, { "epoch": 0.331380409086237, "grad_norm": 0.31351780891418457, "learning_rate": 8.848084790885003e-05, "loss": 0.6297, "step": 1612 }, { "epoch": 0.3315859800596156, "grad_norm": 0.262350469827652, "learning_rate": 8.847827350507177e-05, "loss": 0.7176, "step": 1613 }, { "epoch": 0.3317915510329941, "grad_norm": 0.2178378701210022, "learning_rate": 8.847569695932219e-05, "loss": 0.5897, "step": 1614 }, { "epoch": 0.3319971220063727, "grad_norm": 0.2447414994239807, "learning_rate": 8.847311827172822e-05, "loss": 0.7119, "step": 1615 }, { "epoch": 0.33220269297975125, "grad_norm": 0.23930813372135162, "learning_rate": 8.84705374424169e-05, "loss": 0.7297, "step": 1616 }, { "epoch": 0.33240826395312983, "grad_norm": 0.18309295177459717, "learning_rate": 8.846795447151539e-05, "loss": 0.6059, "step": 1617 }, { "epoch": 0.33261383492650837, "grad_norm": 0.23922927677631378, "learning_rate": 8.846536935915093e-05, "loss": 0.709, "step": 1618 }, { "epoch": 0.33281940589988696, "grad_norm": 0.24151726067066193, "learning_rate": 8.846278210545089e-05, "loss": 0.7009, "step": 1619 }, { "epoch": 0.3330249768732655, "grad_norm": 0.23320122063159943, "learning_rate": 8.846019271054272e-05, "loss": 0.702, "step": 1620 }, { "epoch": 0.3332305478466441, "grad_norm": 0.24178290367126465, "learning_rate": 8.845760117455397e-05, "loss": 0.7359, "step": 1621 }, { "epoch": 0.3334361188200226, "grad_norm": 0.6629179120063782, "learning_rate": 8.845500749761233e-05, "loss": 0.7394, "step": 1622 }, { "epoch": 0.3336416897934012, "grad_norm": 0.2403455376625061, "learning_rate": 8.84524116798456e-05, "loss": 0.7285, "step": 1623 }, { "epoch": 0.33384726076677973, "grad_norm": 0.19743573665618896, "learning_rate": 8.844981372138162e-05, "loss": 0.6283, "step": 1624 }, { "epoch": 0.33405283174015826, "grad_norm": 0.2429579198360443, "learning_rate": 8.844721362234841e-05, "loss": 0.7409, "step": 1625 }, { "epoch": 0.33425840271353685, "grad_norm": 0.24667932093143463, "learning_rate": 8.844461138287406e-05, "loss": 0.7242, "step": 1626 }, { "epoch": 0.3344639736869154, "grad_norm": 0.2274756133556366, "learning_rate": 8.844200700308677e-05, "loss": 0.7241, "step": 1627 }, { "epoch": 0.33466954466029397, "grad_norm": 0.24319452047348022, "learning_rate": 8.843940048311484e-05, "loss": 0.7248, "step": 1628 }, { "epoch": 0.3348751156336725, "grad_norm": 0.23962891101837158, "learning_rate": 8.843679182308668e-05, "loss": 0.7236, "step": 1629 }, { "epoch": 0.3350806866070511, "grad_norm": 0.23430408537387848, "learning_rate": 8.84341810231308e-05, "loss": 0.7255, "step": 1630 }, { "epoch": 0.3352862575804296, "grad_norm": 0.2336353361606598, "learning_rate": 8.843156808337585e-05, "loss": 0.7229, "step": 1631 }, { "epoch": 0.3354918285538082, "grad_norm": 0.22381432354450226, "learning_rate": 8.842895300395054e-05, "loss": 0.7248, "step": 1632 }, { "epoch": 0.33569739952718675, "grad_norm": 0.2316228300333023, "learning_rate": 8.842633578498368e-05, "loss": 0.7343, "step": 1633 }, { "epoch": 0.33590297050056533, "grad_norm": 0.22491221129894257, "learning_rate": 8.842371642660424e-05, "loss": 0.718, "step": 1634 }, { "epoch": 0.33610854147394387, "grad_norm": 0.2314968854188919, "learning_rate": 8.842109492894127e-05, "loss": 0.7289, "step": 1635 }, { "epoch": 0.33631411244732246, "grad_norm": 0.23885907232761383, "learning_rate": 8.841847129212389e-05, "loss": 0.7338, "step": 1636 }, { "epoch": 0.336519683420701, "grad_norm": 0.22755815088748932, "learning_rate": 8.841584551628136e-05, "loss": 0.7238, "step": 1637 }, { "epoch": 0.3367252543940796, "grad_norm": 0.2223365604877472, "learning_rate": 8.841321760154306e-05, "loss": 0.729, "step": 1638 }, { "epoch": 0.3369308253674581, "grad_norm": 0.23648889362812042, "learning_rate": 8.841058754803844e-05, "loss": 0.7479, "step": 1639 }, { "epoch": 0.3371363963408367, "grad_norm": 0.22464527189731598, "learning_rate": 8.840795535589706e-05, "loss": 0.7364, "step": 1640 }, { "epoch": 0.33734196731421523, "grad_norm": 0.22983680665493011, "learning_rate": 8.840532102524861e-05, "loss": 0.7288, "step": 1641 }, { "epoch": 0.3375475382875938, "grad_norm": 0.22532789409160614, "learning_rate": 8.840268455622288e-05, "loss": 0.7626, "step": 1642 }, { "epoch": 0.33775310926097235, "grad_norm": 0.22486740350723267, "learning_rate": 8.840004594894974e-05, "loss": 0.7198, "step": 1643 }, { "epoch": 0.3379586802343509, "grad_norm": 0.220737487077713, "learning_rate": 8.839740520355918e-05, "loss": 0.7467, "step": 1644 }, { "epoch": 0.33816425120772947, "grad_norm": 0.23781028389930725, "learning_rate": 8.839476232018131e-05, "loss": 0.7162, "step": 1645 }, { "epoch": 0.338369822181108, "grad_norm": 0.22306212782859802, "learning_rate": 8.839211729894634e-05, "loss": 0.7024, "step": 1646 }, { "epoch": 0.3385753931544866, "grad_norm": 0.22637905180454254, "learning_rate": 8.838947013998454e-05, "loss": 0.7227, "step": 1647 }, { "epoch": 0.3387809641278651, "grad_norm": 0.21539071202278137, "learning_rate": 8.838682084342637e-05, "loss": 0.715, "step": 1648 }, { "epoch": 0.3389865351012437, "grad_norm": 0.21236176788806915, "learning_rate": 8.838416940940232e-05, "loss": 0.6935, "step": 1649 }, { "epoch": 0.33919210607462225, "grad_norm": 0.21903282403945923, "learning_rate": 8.838151583804302e-05, "loss": 0.6875, "step": 1650 }, { "epoch": 0.33939767704800083, "grad_norm": 0.22233720123767853, "learning_rate": 8.83788601294792e-05, "loss": 0.7196, "step": 1651 }, { "epoch": 0.33960324802137937, "grad_norm": 0.21296600997447968, "learning_rate": 8.837620228384169e-05, "loss": 0.7383, "step": 1652 }, { "epoch": 0.33980881899475796, "grad_norm": 0.21336333453655243, "learning_rate": 8.837354230126144e-05, "loss": 0.7222, "step": 1653 }, { "epoch": 0.3400143899681365, "grad_norm": 0.22977587580680847, "learning_rate": 8.837088018186948e-05, "loss": 0.7053, "step": 1654 }, { "epoch": 0.3402199609415151, "grad_norm": 0.22435788810253143, "learning_rate": 8.836821592579697e-05, "loss": 0.6154, "step": 1655 }, { "epoch": 0.3404255319148936, "grad_norm": 0.23182466626167297, "learning_rate": 8.836554953317518e-05, "loss": 0.7294, "step": 1656 }, { "epoch": 0.3406311028882722, "grad_norm": 0.2296569049358368, "learning_rate": 8.836288100413543e-05, "loss": 0.7147, "step": 1657 }, { "epoch": 0.34083667386165073, "grad_norm": 0.22955302894115448, "learning_rate": 8.836021033880922e-05, "loss": 0.7228, "step": 1658 }, { "epoch": 0.3410422448350293, "grad_norm": 0.28406065702438354, "learning_rate": 8.83575375373281e-05, "loss": 0.722, "step": 1659 }, { "epoch": 0.34124781580840785, "grad_norm": 0.22933915257453918, "learning_rate": 8.835486259982378e-05, "loss": 0.7365, "step": 1660 }, { "epoch": 0.34145338678178644, "grad_norm": 0.18561038374900818, "learning_rate": 8.835218552642801e-05, "loss": 0.6073, "step": 1661 }, { "epoch": 0.34165895775516497, "grad_norm": 0.22962850332260132, "learning_rate": 8.834950631727269e-05, "loss": 0.7329, "step": 1662 }, { "epoch": 0.3418645287285435, "grad_norm": 0.22192583978176117, "learning_rate": 8.83468249724898e-05, "loss": 0.6966, "step": 1663 }, { "epoch": 0.3420700997019221, "grad_norm": 0.2303367406129837, "learning_rate": 8.834414149221145e-05, "loss": 0.7083, "step": 1664 }, { "epoch": 0.3422756706753006, "grad_norm": 0.21235564351081848, "learning_rate": 8.834145587656984e-05, "loss": 0.7054, "step": 1665 }, { "epoch": 0.3424812416486792, "grad_norm": 0.22414252161979675, "learning_rate": 8.833876812569728e-05, "loss": 0.7094, "step": 1666 }, { "epoch": 0.34268681262205775, "grad_norm": 0.21854104101657867, "learning_rate": 8.833607823972617e-05, "loss": 0.7009, "step": 1667 }, { "epoch": 0.34289238359543633, "grad_norm": 0.21945634484291077, "learning_rate": 8.833338621878904e-05, "loss": 0.7214, "step": 1668 }, { "epoch": 0.34309795456881487, "grad_norm": 0.22008635103702545, "learning_rate": 8.833069206301852e-05, "loss": 0.7231, "step": 1669 }, { "epoch": 0.34330352554219346, "grad_norm": 0.2222408652305603, "learning_rate": 8.832799577254734e-05, "loss": 0.7249, "step": 1670 }, { "epoch": 0.343509096515572, "grad_norm": 0.22058893740177155, "learning_rate": 8.83252973475083e-05, "loss": 0.7196, "step": 1671 }, { "epoch": 0.3437146674889506, "grad_norm": 0.2201676368713379, "learning_rate": 8.832259678803437e-05, "loss": 0.7226, "step": 1672 }, { "epoch": 0.3439202384623291, "grad_norm": 0.21815598011016846, "learning_rate": 8.831989409425857e-05, "loss": 0.6943, "step": 1673 }, { "epoch": 0.3441258094357077, "grad_norm": 0.22216841578483582, "learning_rate": 8.831718926631409e-05, "loss": 0.7259, "step": 1674 }, { "epoch": 0.34433138040908623, "grad_norm": 0.21504633128643036, "learning_rate": 8.831448230433415e-05, "loss": 0.7286, "step": 1675 }, { "epoch": 0.3445369513824648, "grad_norm": 0.20685335993766785, "learning_rate": 8.83117732084521e-05, "loss": 0.6891, "step": 1676 }, { "epoch": 0.34474252235584335, "grad_norm": 0.1763618290424347, "learning_rate": 8.830906197880146e-05, "loss": 0.6218, "step": 1677 }, { "epoch": 0.34494809332922194, "grad_norm": 0.24009843170642853, "learning_rate": 8.830634861551573e-05, "loss": 0.7337, "step": 1678 }, { "epoch": 0.34515366430260047, "grad_norm": 0.21924906969070435, "learning_rate": 8.830363311872862e-05, "loss": 0.7194, "step": 1679 }, { "epoch": 0.345359235275979, "grad_norm": 0.22524218261241913, "learning_rate": 8.830091548857392e-05, "loss": 0.728, "step": 1680 }, { "epoch": 0.3455648062493576, "grad_norm": 0.15049724280834198, "learning_rate": 8.829819572518549e-05, "loss": 0.5879, "step": 1681 }, { "epoch": 0.3457703772227361, "grad_norm": 0.23018436133861542, "learning_rate": 8.829547382869734e-05, "loss": 0.7318, "step": 1682 }, { "epoch": 0.3459759481961147, "grad_norm": 0.14980974793434143, "learning_rate": 8.829274979924355e-05, "loss": 0.6082, "step": 1683 }, { "epoch": 0.34618151916949325, "grad_norm": 0.23299898207187653, "learning_rate": 8.829002363695834e-05, "loss": 0.6979, "step": 1684 }, { "epoch": 0.34638709014287183, "grad_norm": 0.22874654829502106, "learning_rate": 8.828729534197599e-05, "loss": 0.7117, "step": 1685 }, { "epoch": 0.34659266111625037, "grad_norm": 0.14617690443992615, "learning_rate": 8.828456491443093e-05, "loss": 0.5823, "step": 1686 }, { "epoch": 0.34679823208962895, "grad_norm": 0.14507731795310974, "learning_rate": 8.828183235445767e-05, "loss": 0.6002, "step": 1687 }, { "epoch": 0.3470038030630075, "grad_norm": 0.15053583681583405, "learning_rate": 8.827909766219082e-05, "loss": 0.6047, "step": 1688 }, { "epoch": 0.3472093740363861, "grad_norm": 0.1374531388282776, "learning_rate": 8.827636083776512e-05, "loss": 0.6148, "step": 1689 }, { "epoch": 0.3474149450097646, "grad_norm": 0.2662424147129059, "learning_rate": 8.827362188131539e-05, "loss": 0.7147, "step": 1690 }, { "epoch": 0.3476205159831432, "grad_norm": 0.24824592471122742, "learning_rate": 8.827088079297658e-05, "loss": 0.749, "step": 1691 }, { "epoch": 0.34782608695652173, "grad_norm": 0.17181143164634705, "learning_rate": 8.826813757288371e-05, "loss": 0.605, "step": 1692 }, { "epoch": 0.3480316579299003, "grad_norm": 0.2484540492296219, "learning_rate": 8.826539222117195e-05, "loss": 0.7012, "step": 1693 }, { "epoch": 0.34823722890327885, "grad_norm": 0.17473895847797394, "learning_rate": 8.826264473797651e-05, "loss": 0.5969, "step": 1694 }, { "epoch": 0.34844279987665744, "grad_norm": 0.14865082502365112, "learning_rate": 8.825989512343281e-05, "loss": 0.6109, "step": 1695 }, { "epoch": 0.34864837085003597, "grad_norm": 0.26978155970573425, "learning_rate": 8.825714337767625e-05, "loss": 0.7122, "step": 1696 }, { "epoch": 0.34885394182341456, "grad_norm": 0.15846404433250427, "learning_rate": 8.825438950084241e-05, "loss": 0.5924, "step": 1697 }, { "epoch": 0.3490595127967931, "grad_norm": 0.23453454673290253, "learning_rate": 8.8251633493067e-05, "loss": 0.7328, "step": 1698 }, { "epoch": 0.3492650837701716, "grad_norm": 0.22266656160354614, "learning_rate": 8.824887535448574e-05, "loss": 0.7041, "step": 1699 }, { "epoch": 0.3494706547435502, "grad_norm": 0.2392280548810959, "learning_rate": 8.824611508523455e-05, "loss": 0.7133, "step": 1700 }, { "epoch": 0.34967622571692875, "grad_norm": 0.22809362411499023, "learning_rate": 8.82433526854494e-05, "loss": 0.7258, "step": 1701 }, { "epoch": 0.34988179669030733, "grad_norm": 0.2222517728805542, "learning_rate": 8.824058815526637e-05, "loss": 0.7114, "step": 1702 }, { "epoch": 0.35008736766368587, "grad_norm": 0.23900644481182098, "learning_rate": 8.823782149482169e-05, "loss": 0.7146, "step": 1703 }, { "epoch": 0.35029293863706445, "grad_norm": 0.2216804325580597, "learning_rate": 8.823505270425162e-05, "loss": 0.712, "step": 1704 }, { "epoch": 0.350498509610443, "grad_norm": 0.22626622021198273, "learning_rate": 8.823228178369259e-05, "loss": 0.7145, "step": 1705 }, { "epoch": 0.3507040805838216, "grad_norm": 0.23051661252975464, "learning_rate": 8.82295087332811e-05, "loss": 0.7246, "step": 1706 }, { "epoch": 0.3509096515572001, "grad_norm": 0.19165797531604767, "learning_rate": 8.822673355315376e-05, "loss": 0.6022, "step": 1707 }, { "epoch": 0.3511152225305787, "grad_norm": 0.15455321967601776, "learning_rate": 8.822395624344733e-05, "loss": 0.5952, "step": 1708 }, { "epoch": 0.35132079350395723, "grad_norm": 0.25851893424987793, "learning_rate": 8.822117680429856e-05, "loss": 0.7155, "step": 1709 }, { "epoch": 0.3515263644773358, "grad_norm": 0.14911410212516785, "learning_rate": 8.821839523584446e-05, "loss": 0.6002, "step": 1710 }, { "epoch": 0.35173193545071435, "grad_norm": 0.2250581830739975, "learning_rate": 8.821561153822202e-05, "loss": 0.694, "step": 1711 }, { "epoch": 0.35193750642409294, "grad_norm": 0.17733228206634521, "learning_rate": 8.821282571156838e-05, "loss": 0.5743, "step": 1712 }, { "epoch": 0.35214307739747147, "grad_norm": 0.23851247131824493, "learning_rate": 8.82100377560208e-05, "loss": 0.7278, "step": 1713 }, { "epoch": 0.35234864837085006, "grad_norm": 0.23099485039710999, "learning_rate": 8.820724767171662e-05, "loss": 0.7387, "step": 1714 }, { "epoch": 0.3525542193442286, "grad_norm": 0.22473661601543427, "learning_rate": 8.82044554587933e-05, "loss": 0.7185, "step": 1715 }, { "epoch": 0.3527597903176072, "grad_norm": 0.22726485133171082, "learning_rate": 8.820166111738839e-05, "loss": 0.7141, "step": 1716 }, { "epoch": 0.3529653612909857, "grad_norm": 0.2528528869152069, "learning_rate": 8.819886464763958e-05, "loss": 0.725, "step": 1717 }, { "epoch": 0.35317093226436425, "grad_norm": 0.1892632395029068, "learning_rate": 8.81960660496846e-05, "loss": 0.5938, "step": 1718 }, { "epoch": 0.35337650323774283, "grad_norm": 0.22239932417869568, "learning_rate": 8.819326532366134e-05, "loss": 0.7044, "step": 1719 }, { "epoch": 0.35358207421112137, "grad_norm": 0.22476689517498016, "learning_rate": 8.81904624697078e-05, "loss": 0.7243, "step": 1720 }, { "epoch": 0.35378764518449995, "grad_norm": 0.2231576144695282, "learning_rate": 8.818765748796204e-05, "loss": 0.7159, "step": 1721 }, { "epoch": 0.3539932161578785, "grad_norm": 0.21081259846687317, "learning_rate": 8.818485037856224e-05, "loss": 0.7144, "step": 1722 }, { "epoch": 0.3541987871312571, "grad_norm": 0.22331789135932922, "learning_rate": 8.818204114164673e-05, "loss": 0.7398, "step": 1723 }, { "epoch": 0.3544043581046356, "grad_norm": 0.1838466078042984, "learning_rate": 8.817922977735387e-05, "loss": 0.6238, "step": 1724 }, { "epoch": 0.3546099290780142, "grad_norm": 0.2340015321969986, "learning_rate": 8.81764162858222e-05, "loss": 0.7226, "step": 1725 }, { "epoch": 0.35481550005139273, "grad_norm": 0.14466704428195953, "learning_rate": 8.817360066719027e-05, "loss": 0.5699, "step": 1726 }, { "epoch": 0.3550210710247713, "grad_norm": 0.23499037325382233, "learning_rate": 8.817078292159686e-05, "loss": 0.71, "step": 1727 }, { "epoch": 0.35522664199814985, "grad_norm": 0.24169334769248962, "learning_rate": 8.816796304918072e-05, "loss": 0.7195, "step": 1728 }, { "epoch": 0.35543221297152844, "grad_norm": 0.16424809396266937, "learning_rate": 8.816514105008086e-05, "loss": 0.5792, "step": 1729 }, { "epoch": 0.35563778394490697, "grad_norm": 0.2632940113544464, "learning_rate": 8.816231692443621e-05, "loss": 0.7313, "step": 1730 }, { "epoch": 0.35584335491828556, "grad_norm": 0.23430821299552917, "learning_rate": 8.815949067238596e-05, "loss": 0.7073, "step": 1731 }, { "epoch": 0.3560489258916641, "grad_norm": 0.22487561404705048, "learning_rate": 8.815666229406932e-05, "loss": 0.7182, "step": 1732 }, { "epoch": 0.3562544968650427, "grad_norm": 0.24197392165660858, "learning_rate": 8.815383178962566e-05, "loss": 0.7196, "step": 1733 }, { "epoch": 0.3564600678384212, "grad_norm": 0.22599098086357117, "learning_rate": 8.81509991591944e-05, "loss": 0.7165, "step": 1734 }, { "epoch": 0.3566656388117998, "grad_norm": 0.22369571030139923, "learning_rate": 8.814816440291509e-05, "loss": 0.7385, "step": 1735 }, { "epoch": 0.35687120978517833, "grad_norm": 0.23025518655776978, "learning_rate": 8.81453275209274e-05, "loss": 0.7184, "step": 1736 }, { "epoch": 0.35707678075855687, "grad_norm": 0.22964996099472046, "learning_rate": 8.81424885133711e-05, "loss": 0.7192, "step": 1737 }, { "epoch": 0.35728235173193545, "grad_norm": 0.19159770011901855, "learning_rate": 8.813964738038602e-05, "loss": 0.6025, "step": 1738 }, { "epoch": 0.357487922705314, "grad_norm": 0.2504747211933136, "learning_rate": 8.813680412211216e-05, "loss": 0.6964, "step": 1739 }, { "epoch": 0.3576934936786926, "grad_norm": 0.23766383528709412, "learning_rate": 8.813395873868956e-05, "loss": 0.7021, "step": 1740 }, { "epoch": 0.3578990646520711, "grad_norm": 0.2447771579027176, "learning_rate": 8.813111123025844e-05, "loss": 0.7185, "step": 1741 }, { "epoch": 0.3581046356254497, "grad_norm": 0.23200775682926178, "learning_rate": 8.812826159695907e-05, "loss": 0.7188, "step": 1742 }, { "epoch": 0.35831020659882823, "grad_norm": 0.22907336056232452, "learning_rate": 8.812540983893181e-05, "loss": 0.6909, "step": 1743 }, { "epoch": 0.3585157775722068, "grad_norm": 0.22600993514060974, "learning_rate": 8.812255595631719e-05, "loss": 0.7074, "step": 1744 }, { "epoch": 0.35872134854558535, "grad_norm": 0.2269076704978943, "learning_rate": 8.811969994925578e-05, "loss": 0.6814, "step": 1745 }, { "epoch": 0.35892691951896394, "grad_norm": 0.21256834268569946, "learning_rate": 8.811684181788831e-05, "loss": 0.7353, "step": 1746 }, { "epoch": 0.35913249049234247, "grad_norm": 0.22337260842323303, "learning_rate": 8.811398156235557e-05, "loss": 0.7398, "step": 1747 }, { "epoch": 0.35933806146572106, "grad_norm": 0.2335451990365982, "learning_rate": 8.811111918279847e-05, "loss": 0.7205, "step": 1748 }, { "epoch": 0.3595436324390996, "grad_norm": 0.21998728811740875, "learning_rate": 8.810825467935802e-05, "loss": 0.6947, "step": 1749 }, { "epoch": 0.3597492034124782, "grad_norm": 0.272847443819046, "learning_rate": 8.810538805217535e-05, "loss": 0.6987, "step": 1750 }, { "epoch": 0.3599547743858567, "grad_norm": 0.22549496591091156, "learning_rate": 8.810251930139169e-05, "loss": 0.7159, "step": 1751 }, { "epoch": 0.3601603453592353, "grad_norm": 0.21950645744800568, "learning_rate": 8.809964842714837e-05, "loss": 0.7493, "step": 1752 }, { "epoch": 0.36036591633261383, "grad_norm": 0.21935752034187317, "learning_rate": 8.809677542958681e-05, "loss": 0.6923, "step": 1753 }, { "epoch": 0.36057148730599237, "grad_norm": 0.2425873726606369, "learning_rate": 8.809390030884856e-05, "loss": 0.7055, "step": 1754 }, { "epoch": 0.36077705827937095, "grad_norm": 0.21217839419841766, "learning_rate": 8.809102306507527e-05, "loss": 0.7261, "step": 1755 }, { "epoch": 0.3609826292527495, "grad_norm": 0.22305883467197418, "learning_rate": 8.808814369840867e-05, "loss": 0.6804, "step": 1756 }, { "epoch": 0.3611882002261281, "grad_norm": 0.23050794005393982, "learning_rate": 8.808526220899063e-05, "loss": 0.7209, "step": 1757 }, { "epoch": 0.3613937711995066, "grad_norm": 0.21624812483787537, "learning_rate": 8.80823785969631e-05, "loss": 0.733, "step": 1758 }, { "epoch": 0.3615993421728852, "grad_norm": 0.2256494164466858, "learning_rate": 8.807949286246814e-05, "loss": 0.7133, "step": 1759 }, { "epoch": 0.36180491314626373, "grad_norm": 0.2232973873615265, "learning_rate": 8.807660500564793e-05, "loss": 0.7099, "step": 1760 }, { "epoch": 0.3620104841196423, "grad_norm": 0.21484389901161194, "learning_rate": 8.807371502664473e-05, "loss": 0.7089, "step": 1761 }, { "epoch": 0.36221605509302085, "grad_norm": 0.22121310234069824, "learning_rate": 8.807082292560089e-05, "loss": 0.7098, "step": 1762 }, { "epoch": 0.36242162606639944, "grad_norm": 0.22262440621852875, "learning_rate": 8.806792870265895e-05, "loss": 0.7494, "step": 1763 }, { "epoch": 0.36262719703977797, "grad_norm": 0.22367548942565918, "learning_rate": 8.806503235796145e-05, "loss": 0.7334, "step": 1764 }, { "epoch": 0.36283276801315656, "grad_norm": 0.22336241602897644, "learning_rate": 8.806213389165109e-05, "loss": 0.7028, "step": 1765 }, { "epoch": 0.3630383389865351, "grad_norm": 0.21695300936698914, "learning_rate": 8.805923330387067e-05, "loss": 0.7131, "step": 1766 }, { "epoch": 0.3632439099599137, "grad_norm": 0.2211865484714508, "learning_rate": 8.805633059476307e-05, "loss": 0.7493, "step": 1767 }, { "epoch": 0.3634494809332922, "grad_norm": 0.2145841121673584, "learning_rate": 8.80534257644713e-05, "loss": 0.5885, "step": 1768 }, { "epoch": 0.3636550519066708, "grad_norm": 0.23112855851650238, "learning_rate": 8.805051881313849e-05, "loss": 0.6836, "step": 1769 }, { "epoch": 0.36386062288004933, "grad_norm": 0.226564422249794, "learning_rate": 8.804760974090785e-05, "loss": 0.7297, "step": 1770 }, { "epoch": 0.3640661938534279, "grad_norm": 0.15169551968574524, "learning_rate": 8.804469854792266e-05, "loss": 0.6113, "step": 1771 }, { "epoch": 0.36427176482680645, "grad_norm": 0.23821888864040375, "learning_rate": 8.804178523432637e-05, "loss": 0.7175, "step": 1772 }, { "epoch": 0.364477335800185, "grad_norm": 0.23416121304035187, "learning_rate": 8.80388698002625e-05, "loss": 0.7214, "step": 1773 }, { "epoch": 0.3646829067735636, "grad_norm": 0.1642165631055832, "learning_rate": 8.803595224587467e-05, "loss": 0.5792, "step": 1774 }, { "epoch": 0.3648884777469421, "grad_norm": 0.2228156477212906, "learning_rate": 8.803303257130662e-05, "loss": 0.7051, "step": 1775 }, { "epoch": 0.3650940487203207, "grad_norm": 0.2340465635061264, "learning_rate": 8.80301107767022e-05, "loss": 0.7373, "step": 1776 }, { "epoch": 0.36529961969369923, "grad_norm": 0.2198680192232132, "learning_rate": 8.802718686220535e-05, "loss": 0.71, "step": 1777 }, { "epoch": 0.3655051906670778, "grad_norm": 0.2116042524576187, "learning_rate": 8.80242608279601e-05, "loss": 0.7465, "step": 1778 }, { "epoch": 0.36571076164045635, "grad_norm": 0.22121259570121765, "learning_rate": 8.802133267411062e-05, "loss": 0.7352, "step": 1779 }, { "epoch": 0.36591633261383494, "grad_norm": 0.23157210648059845, "learning_rate": 8.801840240080117e-05, "loss": 0.6896, "step": 1780 }, { "epoch": 0.36612190358721347, "grad_norm": 0.22456520795822144, "learning_rate": 8.801547000817609e-05, "loss": 0.7449, "step": 1781 }, { "epoch": 0.36632747456059206, "grad_norm": 0.15871234238147736, "learning_rate": 8.801253549637985e-05, "loss": 0.5766, "step": 1782 }, { "epoch": 0.3665330455339706, "grad_norm": 0.23135414719581604, "learning_rate": 8.800959886555704e-05, "loss": 0.7021, "step": 1783 }, { "epoch": 0.3667386165073492, "grad_norm": 0.1448424756526947, "learning_rate": 8.80066601158523e-05, "loss": 0.6072, "step": 1784 }, { "epoch": 0.3669441874807277, "grad_norm": 0.14697474241256714, "learning_rate": 8.800371924741044e-05, "loss": 0.6064, "step": 1785 }, { "epoch": 0.3671497584541063, "grad_norm": 0.22950981557369232, "learning_rate": 8.800077626037634e-05, "loss": 0.7119, "step": 1786 }, { "epoch": 0.36735532942748483, "grad_norm": 0.21077360212802887, "learning_rate": 8.799783115489497e-05, "loss": 0.7119, "step": 1787 }, { "epoch": 0.3675609004008634, "grad_norm": 0.21831658482551575, "learning_rate": 8.799488393111144e-05, "loss": 0.6915, "step": 1788 }, { "epoch": 0.36776647137424195, "grad_norm": 0.2097778469324112, "learning_rate": 8.799193458917092e-05, "loss": 0.7103, "step": 1789 }, { "epoch": 0.36797204234762054, "grad_norm": 0.21712899208068848, "learning_rate": 8.798898312921874e-05, "loss": 0.7155, "step": 1790 }, { "epoch": 0.3681776133209991, "grad_norm": 0.21277742087841034, "learning_rate": 8.798602955140029e-05, "loss": 0.7349, "step": 1791 }, { "epoch": 0.3683831842943776, "grad_norm": 0.2360071986913681, "learning_rate": 8.798307385586107e-05, "loss": 0.7345, "step": 1792 }, { "epoch": 0.3685887552677562, "grad_norm": 0.200873002409935, "learning_rate": 8.798011604274671e-05, "loss": 0.5943, "step": 1793 }, { "epoch": 0.36879432624113473, "grad_norm": 0.23026502132415771, "learning_rate": 8.797715611220293e-05, "loss": 0.7188, "step": 1794 }, { "epoch": 0.3689998972145133, "grad_norm": 0.22256635129451752, "learning_rate": 8.797419406437553e-05, "loss": 0.7152, "step": 1795 }, { "epoch": 0.36920546818789185, "grad_norm": 0.21542035043239594, "learning_rate": 8.797122989941045e-05, "loss": 0.7055, "step": 1796 }, { "epoch": 0.36941103916127044, "grad_norm": 0.22514380514621735, "learning_rate": 8.796826361745374e-05, "loss": 0.7028, "step": 1797 }, { "epoch": 0.36961661013464897, "grad_norm": 0.2098117172718048, "learning_rate": 8.796529521865149e-05, "loss": 0.7223, "step": 1798 }, { "epoch": 0.36982218110802756, "grad_norm": 0.2132442593574524, "learning_rate": 8.796232470314997e-05, "loss": 0.6883, "step": 1799 }, { "epoch": 0.3700277520814061, "grad_norm": 0.17681948840618134, "learning_rate": 8.795935207109552e-05, "loss": 0.5999, "step": 1800 }, { "epoch": 0.3702333230547847, "grad_norm": 0.23800528049468994, "learning_rate": 8.795637732263459e-05, "loss": 0.7058, "step": 1801 }, { "epoch": 0.3704388940281632, "grad_norm": 0.2394934594631195, "learning_rate": 8.795340045791371e-05, "loss": 0.7371, "step": 1802 }, { "epoch": 0.3706444650015418, "grad_norm": 0.21029235422611237, "learning_rate": 8.795042147707957e-05, "loss": 0.6879, "step": 1803 }, { "epoch": 0.37085003597492033, "grad_norm": 0.2209658920764923, "learning_rate": 8.79474403802789e-05, "loss": 0.7145, "step": 1804 }, { "epoch": 0.3710556069482989, "grad_norm": 0.2265157699584961, "learning_rate": 8.79444571676586e-05, "loss": 0.7329, "step": 1805 }, { "epoch": 0.37126117792167745, "grad_norm": 0.17411258816719055, "learning_rate": 8.79414718393656e-05, "loss": 0.5861, "step": 1806 }, { "epoch": 0.37146674889505604, "grad_norm": 0.235770583152771, "learning_rate": 8.793848439554699e-05, "loss": 0.7168, "step": 1807 }, { "epoch": 0.3716723198684346, "grad_norm": 0.24390238523483276, "learning_rate": 8.793549483634995e-05, "loss": 0.7242, "step": 1808 }, { "epoch": 0.37187789084181316, "grad_norm": 0.22740136086940765, "learning_rate": 8.793250316192175e-05, "loss": 0.7064, "step": 1809 }, { "epoch": 0.3720834618151917, "grad_norm": 0.1567818820476532, "learning_rate": 8.79295093724098e-05, "loss": 0.6035, "step": 1810 }, { "epoch": 0.37228903278857023, "grad_norm": 0.23284457623958588, "learning_rate": 8.792651346796157e-05, "loss": 0.7145, "step": 1811 }, { "epoch": 0.3724946037619488, "grad_norm": 0.21928593516349792, "learning_rate": 8.792351544872467e-05, "loss": 0.7015, "step": 1812 }, { "epoch": 0.37270017473532735, "grad_norm": 0.2226940542459488, "learning_rate": 8.792051531484678e-05, "loss": 0.7032, "step": 1813 }, { "epoch": 0.37290574570870594, "grad_norm": 0.1569989025592804, "learning_rate": 8.791751306647572e-05, "loss": 0.6043, "step": 1814 }, { "epoch": 0.37311131668208447, "grad_norm": 0.231995090842247, "learning_rate": 8.791450870375936e-05, "loss": 0.7066, "step": 1815 }, { "epoch": 0.37331688765546306, "grad_norm": 0.2193315476179123, "learning_rate": 8.791150222684576e-05, "loss": 0.7099, "step": 1816 }, { "epoch": 0.3735224586288416, "grad_norm": 0.2191406637430191, "learning_rate": 8.790849363588301e-05, "loss": 0.7082, "step": 1817 }, { "epoch": 0.3737280296022202, "grad_norm": 0.24114836752414703, "learning_rate": 8.790548293101932e-05, "loss": 0.6951, "step": 1818 }, { "epoch": 0.3739336005755987, "grad_norm": 0.21961726248264313, "learning_rate": 8.790247011240304e-05, "loss": 0.7007, "step": 1819 }, { "epoch": 0.3741391715489773, "grad_norm": 0.22864870727062225, "learning_rate": 8.789945518018259e-05, "loss": 0.7172, "step": 1820 }, { "epoch": 0.37434474252235583, "grad_norm": 0.2318045198917389, "learning_rate": 8.789643813450647e-05, "loss": 0.7168, "step": 1821 }, { "epoch": 0.3745503134957344, "grad_norm": 0.21737788617610931, "learning_rate": 8.789341897552336e-05, "loss": 0.7251, "step": 1822 }, { "epoch": 0.37475588446911295, "grad_norm": 0.21853739023208618, "learning_rate": 8.789039770338197e-05, "loss": 0.7059, "step": 1823 }, { "epoch": 0.37496145544249154, "grad_norm": 0.21663320064544678, "learning_rate": 8.788737431823116e-05, "loss": 0.6916, "step": 1824 }, { "epoch": 0.3751670264158701, "grad_norm": 0.17142772674560547, "learning_rate": 8.788434882021987e-05, "loss": 0.594, "step": 1825 }, { "epoch": 0.37537259738924866, "grad_norm": 0.2234950065612793, "learning_rate": 8.788132120949716e-05, "loss": 0.7175, "step": 1826 }, { "epoch": 0.3755781683626272, "grad_norm": 0.21172864735126495, "learning_rate": 8.787829148621218e-05, "loss": 0.6872, "step": 1827 }, { "epoch": 0.37578373933600573, "grad_norm": 0.22408267855644226, "learning_rate": 8.787525965051418e-05, "loss": 0.7375, "step": 1828 }, { "epoch": 0.3759893103093843, "grad_norm": 0.21300190687179565, "learning_rate": 8.787222570255256e-05, "loss": 0.7224, "step": 1829 }, { "epoch": 0.37619488128276285, "grad_norm": 0.22645267844200134, "learning_rate": 8.786918964247674e-05, "loss": 0.6957, "step": 1830 }, { "epoch": 0.37640045225614144, "grad_norm": 0.1754547655582428, "learning_rate": 8.786615147043633e-05, "loss": 0.5798, "step": 1831 }, { "epoch": 0.37660602322951997, "grad_norm": 0.2161412239074707, "learning_rate": 8.786311118658097e-05, "loss": 0.7041, "step": 1832 }, { "epoch": 0.37681159420289856, "grad_norm": 0.14319129288196564, "learning_rate": 8.78600687910605e-05, "loss": 0.5844, "step": 1833 }, { "epoch": 0.3770171651762771, "grad_norm": 0.22690434753894806, "learning_rate": 8.785702428402475e-05, "loss": 0.7024, "step": 1834 }, { "epoch": 0.3772227361496557, "grad_norm": 0.2214747965335846, "learning_rate": 8.785397766562371e-05, "loss": 0.7269, "step": 1835 }, { "epoch": 0.3774283071230342, "grad_norm": 0.2654751241207123, "learning_rate": 8.785092893600751e-05, "loss": 0.7037, "step": 1836 }, { "epoch": 0.3776338780964128, "grad_norm": 0.21953707933425903, "learning_rate": 8.784787809532632e-05, "loss": 0.7217, "step": 1837 }, { "epoch": 0.37783944906979133, "grad_norm": 0.22590485215187073, "learning_rate": 8.784482514373045e-05, "loss": 0.7056, "step": 1838 }, { "epoch": 0.3780450200431699, "grad_norm": 0.22106105089187622, "learning_rate": 8.78417700813703e-05, "loss": 0.7058, "step": 1839 }, { "epoch": 0.37825059101654845, "grad_norm": 0.1933329850435257, "learning_rate": 8.783871290839637e-05, "loss": 0.5885, "step": 1840 }, { "epoch": 0.37845616198992704, "grad_norm": 0.18652944266796112, "learning_rate": 8.78356536249593e-05, "loss": 0.5857, "step": 1841 }, { "epoch": 0.3786617329633056, "grad_norm": 0.2601449191570282, "learning_rate": 8.783259223120979e-05, "loss": 0.7123, "step": 1842 }, { "epoch": 0.37886730393668416, "grad_norm": 0.246074840426445, "learning_rate": 8.782952872729864e-05, "loss": 0.7277, "step": 1843 }, { "epoch": 0.3790728749100627, "grad_norm": 0.2558608949184418, "learning_rate": 8.78264631133768e-05, "loss": 0.7006, "step": 1844 }, { "epoch": 0.3792784458834413, "grad_norm": 0.21807844936847687, "learning_rate": 8.78233953895953e-05, "loss": 0.5869, "step": 1845 }, { "epoch": 0.3794840168568198, "grad_norm": 0.25354549288749695, "learning_rate": 8.782032555610526e-05, "loss": 0.7129, "step": 1846 }, { "epoch": 0.37968958783019835, "grad_norm": 0.243685781955719, "learning_rate": 8.781725361305793e-05, "loss": 0.7217, "step": 1847 }, { "epoch": 0.37989515880357694, "grad_norm": 0.16930992901325226, "learning_rate": 8.781417956060464e-05, "loss": 0.6007, "step": 1848 }, { "epoch": 0.38010072977695547, "grad_norm": 0.24475498497486115, "learning_rate": 8.781110339889682e-05, "loss": 0.7114, "step": 1849 }, { "epoch": 0.38030630075033406, "grad_norm": 0.24792300164699554, "learning_rate": 8.780802512808605e-05, "loss": 0.7409, "step": 1850 }, { "epoch": 0.3805118717237126, "grad_norm": 0.2320515662431717, "learning_rate": 8.780494474832395e-05, "loss": 0.7163, "step": 1851 }, { "epoch": 0.3807174426970912, "grad_norm": 0.24166975915431976, "learning_rate": 8.780186225976232e-05, "loss": 0.7304, "step": 1852 }, { "epoch": 0.3809230136704697, "grad_norm": 0.23629960417747498, "learning_rate": 8.779877766255297e-05, "loss": 0.7155, "step": 1853 }, { "epoch": 0.3811285846438483, "grad_norm": 0.22916334867477417, "learning_rate": 8.77956909568479e-05, "loss": 0.7263, "step": 1854 }, { "epoch": 0.38133415561722683, "grad_norm": 0.24127478897571564, "learning_rate": 8.779260214279915e-05, "loss": 0.6936, "step": 1855 }, { "epoch": 0.3815397265906054, "grad_norm": 0.22905930876731873, "learning_rate": 8.778951122055891e-05, "loss": 0.718, "step": 1856 }, { "epoch": 0.38174529756398395, "grad_norm": 0.21907439827919006, "learning_rate": 8.778641819027946e-05, "loss": 0.7082, "step": 1857 }, { "epoch": 0.38195086853736254, "grad_norm": 0.2231978327035904, "learning_rate": 8.778332305211315e-05, "loss": 0.5978, "step": 1858 }, { "epoch": 0.3821564395107411, "grad_norm": 0.2434241622686386, "learning_rate": 8.778022580621249e-05, "loss": 0.7043, "step": 1859 }, { "epoch": 0.38236201048411966, "grad_norm": 0.22279253602027893, "learning_rate": 8.777712645273005e-05, "loss": 0.728, "step": 1860 }, { "epoch": 0.3825675814574982, "grad_norm": 0.22146545350551605, "learning_rate": 8.777402499181854e-05, "loss": 0.7035, "step": 1861 }, { "epoch": 0.3827731524308768, "grad_norm": 0.1629379838705063, "learning_rate": 8.777092142363074e-05, "loss": 0.5911, "step": 1862 }, { "epoch": 0.3829787234042553, "grad_norm": 0.24716326594352722, "learning_rate": 8.776781574831956e-05, "loss": 0.7466, "step": 1863 }, { "epoch": 0.3831842943776339, "grad_norm": 0.21958030760288239, "learning_rate": 8.776470796603799e-05, "loss": 0.7112, "step": 1864 }, { "epoch": 0.38338986535101244, "grad_norm": 0.22167621552944183, "learning_rate": 8.776159807693914e-05, "loss": 0.7076, "step": 1865 }, { "epoch": 0.38359543632439097, "grad_norm": 0.22505022585391998, "learning_rate": 8.775848608117621e-05, "loss": 0.7383, "step": 1866 }, { "epoch": 0.38380100729776956, "grad_norm": 0.2208850234746933, "learning_rate": 8.775537197890254e-05, "loss": 0.7371, "step": 1867 }, { "epoch": 0.3840065782711481, "grad_norm": 0.21698389947414398, "learning_rate": 8.775225577027154e-05, "loss": 0.7226, "step": 1868 }, { "epoch": 0.3842121492445267, "grad_norm": 0.22070789337158203, "learning_rate": 8.774913745543668e-05, "loss": 0.712, "step": 1869 }, { "epoch": 0.3844177202179052, "grad_norm": 0.22153621912002563, "learning_rate": 8.774601703455166e-05, "loss": 0.7102, "step": 1870 }, { "epoch": 0.3846232911912838, "grad_norm": 0.21667127311229706, "learning_rate": 8.774289450777017e-05, "loss": 0.705, "step": 1871 }, { "epoch": 0.38482886216466233, "grad_norm": 0.22485022246837616, "learning_rate": 8.773976987524604e-05, "loss": 0.7232, "step": 1872 }, { "epoch": 0.3850344331380409, "grad_norm": 0.19532062113285065, "learning_rate": 8.77366431371332e-05, "loss": 0.61, "step": 1873 }, { "epoch": 0.38524000411141945, "grad_norm": 0.2282322347164154, "learning_rate": 8.773351429358574e-05, "loss": 0.721, "step": 1874 }, { "epoch": 0.38544557508479804, "grad_norm": 0.141531839966774, "learning_rate": 8.773038334475774e-05, "loss": 0.5959, "step": 1875 }, { "epoch": 0.3856511460581766, "grad_norm": 0.22724571824073792, "learning_rate": 8.772725029080349e-05, "loss": 0.7027, "step": 1876 }, { "epoch": 0.38585671703155516, "grad_norm": 0.22629983723163605, "learning_rate": 8.772411513187731e-05, "loss": 0.7021, "step": 1877 }, { "epoch": 0.3860622880049337, "grad_norm": 0.18585380911827087, "learning_rate": 8.772097786813368e-05, "loss": 0.5524, "step": 1878 }, { "epoch": 0.3862678589783123, "grad_norm": 0.25130245089530945, "learning_rate": 8.771783849972714e-05, "loss": 0.7274, "step": 1879 }, { "epoch": 0.3864734299516908, "grad_norm": 0.22500745952129364, "learning_rate": 8.771469702681236e-05, "loss": 0.725, "step": 1880 }, { "epoch": 0.3866790009250694, "grad_norm": 0.20772625505924225, "learning_rate": 8.771155344954412e-05, "loss": 0.7155, "step": 1881 }, { "epoch": 0.38688457189844794, "grad_norm": 0.2251315712928772, "learning_rate": 8.770840776807726e-05, "loss": 0.6973, "step": 1882 }, { "epoch": 0.3870901428718265, "grad_norm": 0.2260076254606247, "learning_rate": 8.770525998256677e-05, "loss": 0.7128, "step": 1883 }, { "epoch": 0.38729571384520506, "grad_norm": 0.16973739862442017, "learning_rate": 8.770211009316772e-05, "loss": 0.5794, "step": 1884 }, { "epoch": 0.3875012848185836, "grad_norm": 0.2505844831466675, "learning_rate": 8.76989581000353e-05, "loss": 0.7359, "step": 1885 }, { "epoch": 0.3877068557919622, "grad_norm": 0.28069007396698, "learning_rate": 8.769580400332479e-05, "loss": 0.7233, "step": 1886 }, { "epoch": 0.3879124267653407, "grad_norm": 0.13608971238136292, "learning_rate": 8.769264780319158e-05, "loss": 0.5905, "step": 1887 }, { "epoch": 0.3881179977387193, "grad_norm": 0.25234588980674744, "learning_rate": 8.768948949979116e-05, "loss": 0.7122, "step": 1888 }, { "epoch": 0.38832356871209783, "grad_norm": 0.234871044754982, "learning_rate": 8.768632909327912e-05, "loss": 0.7299, "step": 1889 }, { "epoch": 0.3885291396854764, "grad_norm": 0.2207827866077423, "learning_rate": 8.768316658381114e-05, "loss": 0.7086, "step": 1890 }, { "epoch": 0.38873471065885495, "grad_norm": 0.25734445452690125, "learning_rate": 8.768000197154306e-05, "loss": 0.7071, "step": 1891 }, { "epoch": 0.38894028163223354, "grad_norm": 0.2389577329158783, "learning_rate": 8.767683525663077e-05, "loss": 0.733, "step": 1892 }, { "epoch": 0.3891458526056121, "grad_norm": 0.17553114891052246, "learning_rate": 8.767366643923028e-05, "loss": 0.5974, "step": 1893 }, { "epoch": 0.38935142357899066, "grad_norm": 0.23687125742435455, "learning_rate": 8.76704955194977e-05, "loss": 0.7148, "step": 1894 }, { "epoch": 0.3895569945523692, "grad_norm": 0.25215673446655273, "learning_rate": 8.766732249758925e-05, "loss": 0.7338, "step": 1895 }, { "epoch": 0.3897625655257478, "grad_norm": 0.16502924263477325, "learning_rate": 8.766414737366124e-05, "loss": 0.584, "step": 1896 }, { "epoch": 0.3899681364991263, "grad_norm": 0.1485537588596344, "learning_rate": 8.76609701478701e-05, "loss": 0.6049, "step": 1897 }, { "epoch": 0.3901737074725049, "grad_norm": 0.3515810966491699, "learning_rate": 8.765779082037235e-05, "loss": 0.7529, "step": 1898 }, { "epoch": 0.39037927844588344, "grad_norm": 0.23719021677970886, "learning_rate": 8.765460939132464e-05, "loss": 0.728, "step": 1899 }, { "epoch": 0.390584849419262, "grad_norm": 0.17814306914806366, "learning_rate": 8.76514258608837e-05, "loss": 0.601, "step": 1900 }, { "epoch": 0.39079042039264056, "grad_norm": 0.4228149652481079, "learning_rate": 8.764824022920636e-05, "loss": 0.7195, "step": 1901 }, { "epoch": 0.3909959913660191, "grad_norm": 0.16185280680656433, "learning_rate": 8.764505249644953e-05, "loss": 0.5728, "step": 1902 }, { "epoch": 0.3912015623393977, "grad_norm": 0.23503097891807556, "learning_rate": 8.764186266277032e-05, "loss": 0.71, "step": 1903 }, { "epoch": 0.3914071333127762, "grad_norm": 0.23683130741119385, "learning_rate": 8.763867072832583e-05, "loss": 0.7351, "step": 1904 }, { "epoch": 0.3916127042861548, "grad_norm": 0.2431173473596573, "learning_rate": 8.763547669327334e-05, "loss": 0.72, "step": 1905 }, { "epoch": 0.39181827525953333, "grad_norm": 0.2246868759393692, "learning_rate": 8.763228055777016e-05, "loss": 0.7136, "step": 1906 }, { "epoch": 0.3920238462329119, "grad_norm": 0.17881381511688232, "learning_rate": 8.762908232197379e-05, "loss": 0.6021, "step": 1907 }, { "epoch": 0.39222941720629045, "grad_norm": 0.25456559658050537, "learning_rate": 8.76258819860418e-05, "loss": 0.7192, "step": 1908 }, { "epoch": 0.39243498817966904, "grad_norm": 0.2538883686065674, "learning_rate": 8.762267955013185e-05, "loss": 0.6971, "step": 1909 }, { "epoch": 0.3926405591530476, "grad_norm": 0.21888628602027893, "learning_rate": 8.761947501440166e-05, "loss": 0.7097, "step": 1910 }, { "epoch": 0.39284613012642616, "grad_norm": 0.2221071869134903, "learning_rate": 8.761626837900916e-05, "loss": 0.7004, "step": 1911 }, { "epoch": 0.3930517010998047, "grad_norm": 0.23489388823509216, "learning_rate": 8.761305964411228e-05, "loss": 0.6935, "step": 1912 }, { "epoch": 0.3932572720731833, "grad_norm": 0.23386436700820923, "learning_rate": 8.760984880986915e-05, "loss": 0.695, "step": 1913 }, { "epoch": 0.3934628430465618, "grad_norm": 0.22081080079078674, "learning_rate": 8.760663587643792e-05, "loss": 0.6939, "step": 1914 }, { "epoch": 0.3936684140199404, "grad_norm": 0.2191271334886551, "learning_rate": 8.760342084397688e-05, "loss": 0.7055, "step": 1915 }, { "epoch": 0.39387398499331894, "grad_norm": 0.16592054069042206, "learning_rate": 8.760020371264442e-05, "loss": 0.5968, "step": 1916 }, { "epoch": 0.3940795559666975, "grad_norm": 0.2341727763414383, "learning_rate": 8.759698448259905e-05, "loss": 0.7216, "step": 1917 }, { "epoch": 0.39428512694007606, "grad_norm": 0.2350844144821167, "learning_rate": 8.759376315399935e-05, "loss": 0.7036, "step": 1918 }, { "epoch": 0.39449069791345465, "grad_norm": 0.1551404446363449, "learning_rate": 8.759053972700401e-05, "loss": 0.6018, "step": 1919 }, { "epoch": 0.3946962688868332, "grad_norm": 0.2272733896970749, "learning_rate": 8.758731420177186e-05, "loss": 0.7132, "step": 1920 }, { "epoch": 0.3949018398602117, "grad_norm": 0.22375091910362244, "learning_rate": 8.758408657846177e-05, "loss": 0.6917, "step": 1921 }, { "epoch": 0.3951074108335903, "grad_norm": 0.14521102607250214, "learning_rate": 8.758085685723279e-05, "loss": 0.5774, "step": 1922 }, { "epoch": 0.39531298180696883, "grad_norm": 0.2234261929988861, "learning_rate": 8.757762503824401e-05, "loss": 0.7322, "step": 1923 }, { "epoch": 0.3955185527803474, "grad_norm": 0.2137596607208252, "learning_rate": 8.757439112165465e-05, "loss": 0.7094, "step": 1924 }, { "epoch": 0.39572412375372595, "grad_norm": 0.15637266635894775, "learning_rate": 8.757115510762404e-05, "loss": 0.599, "step": 1925 }, { "epoch": 0.39592969472710454, "grad_norm": 0.21594415605068207, "learning_rate": 8.756791699631159e-05, "loss": 0.7096, "step": 1926 }, { "epoch": 0.3961352657004831, "grad_norm": 0.21532535552978516, "learning_rate": 8.756467678787683e-05, "loss": 0.7331, "step": 1927 }, { "epoch": 0.39634083667386166, "grad_norm": 0.14360411465168, "learning_rate": 8.756143448247938e-05, "loss": 0.5832, "step": 1928 }, { "epoch": 0.3965464076472402, "grad_norm": 0.2573210597038269, "learning_rate": 8.7558190080279e-05, "loss": 0.7116, "step": 1929 }, { "epoch": 0.3967519786206188, "grad_norm": 0.22037194669246674, "learning_rate": 8.755494358143552e-05, "loss": 0.6988, "step": 1930 }, { "epoch": 0.3969575495939973, "grad_norm": 0.1471826732158661, "learning_rate": 8.755169498610885e-05, "loss": 0.6081, "step": 1931 }, { "epoch": 0.3971631205673759, "grad_norm": 0.24475279450416565, "learning_rate": 8.754844429445906e-05, "loss": 0.7527, "step": 1932 }, { "epoch": 0.39736869154075444, "grad_norm": 0.21802780032157898, "learning_rate": 8.754519150664629e-05, "loss": 0.6628, "step": 1933 }, { "epoch": 0.397574262514133, "grad_norm": 0.14480328559875488, "learning_rate": 8.75419366228308e-05, "loss": 0.597, "step": 1934 }, { "epoch": 0.39777983348751156, "grad_norm": 0.21927644312381744, "learning_rate": 8.753867964317292e-05, "loss": 0.7108, "step": 1935 }, { "epoch": 0.39798540446089015, "grad_norm": 0.20976369082927704, "learning_rate": 8.753542056783312e-05, "loss": 0.7283, "step": 1936 }, { "epoch": 0.3981909754342687, "grad_norm": 0.22052782773971558, "learning_rate": 8.753215939697198e-05, "loss": 0.7261, "step": 1937 }, { "epoch": 0.39839654640764727, "grad_norm": 0.21982043981552124, "learning_rate": 8.752889613075012e-05, "loss": 0.6902, "step": 1938 }, { "epoch": 0.3986021173810258, "grad_norm": 0.24831879138946533, "learning_rate": 8.752563076932833e-05, "loss": 0.7175, "step": 1939 }, { "epoch": 0.39880768835440433, "grad_norm": 0.22775912284851074, "learning_rate": 8.75223633128675e-05, "loss": 0.7126, "step": 1940 }, { "epoch": 0.3990132593277829, "grad_norm": 0.21900929510593414, "learning_rate": 8.751909376152854e-05, "loss": 0.6947, "step": 1941 }, { "epoch": 0.39921883030116145, "grad_norm": 0.22170402109622955, "learning_rate": 8.751582211547259e-05, "loss": 0.7201, "step": 1942 }, { "epoch": 0.39942440127454004, "grad_norm": 0.22413894534111023, "learning_rate": 8.751254837486079e-05, "loss": 0.7205, "step": 1943 }, { "epoch": 0.3996299722479186, "grad_norm": 0.22276797890663147, "learning_rate": 8.750927253985443e-05, "loss": 0.714, "step": 1944 }, { "epoch": 0.39983554322129716, "grad_norm": 0.21520061790943146, "learning_rate": 8.750599461061492e-05, "loss": 0.7147, "step": 1945 }, { "epoch": 0.4000411141946757, "grad_norm": 0.16708485782146454, "learning_rate": 8.750271458730372e-05, "loss": 0.5976, "step": 1946 }, { "epoch": 0.4002466851680543, "grad_norm": 0.24202388525009155, "learning_rate": 8.74994324700824e-05, "loss": 0.7329, "step": 1947 }, { "epoch": 0.4004522561414328, "grad_norm": 0.13979558646678925, "learning_rate": 8.749614825911274e-05, "loss": 0.5932, "step": 1948 }, { "epoch": 0.4006578271148114, "grad_norm": 0.13720543682575226, "learning_rate": 8.749286195455645e-05, "loss": 0.564, "step": 1949 }, { "epoch": 0.40086339808818994, "grad_norm": 0.22568507492542267, "learning_rate": 8.748957355657546e-05, "loss": 0.7259, "step": 1950 }, { "epoch": 0.4010689690615685, "grad_norm": 0.2142673283815384, "learning_rate": 8.748628306533178e-05, "loss": 0.7024, "step": 1951 }, { "epoch": 0.40127454003494706, "grad_norm": 0.2180175483226776, "learning_rate": 8.748299048098751e-05, "loss": 0.7488, "step": 1952 }, { "epoch": 0.40148011100832565, "grad_norm": 0.21027667820453644, "learning_rate": 8.747969580370488e-05, "loss": 0.708, "step": 1953 }, { "epoch": 0.4016856819817042, "grad_norm": 0.21340122818946838, "learning_rate": 8.747639903364617e-05, "loss": 0.7076, "step": 1954 }, { "epoch": 0.40189125295508277, "grad_norm": 0.22183535993099213, "learning_rate": 8.747310017097382e-05, "loss": 0.6994, "step": 1955 }, { "epoch": 0.4020968239284613, "grad_norm": 0.21292465925216675, "learning_rate": 8.746979921585035e-05, "loss": 0.675, "step": 1956 }, { "epoch": 0.40230239490183983, "grad_norm": 0.2158004343509674, "learning_rate": 8.746649616843837e-05, "loss": 0.727, "step": 1957 }, { "epoch": 0.4025079658752184, "grad_norm": 0.20767906308174133, "learning_rate": 8.746319102890061e-05, "loss": 0.7034, "step": 1958 }, { "epoch": 0.40271353684859695, "grad_norm": 0.21342967450618744, "learning_rate": 8.74598837973999e-05, "loss": 0.7249, "step": 1959 }, { "epoch": 0.40291910782197554, "grad_norm": 0.22150453925132751, "learning_rate": 8.745657447409917e-05, "loss": 0.7209, "step": 1960 }, { "epoch": 0.4031246787953541, "grad_norm": 0.20457392930984497, "learning_rate": 8.745326305916145e-05, "loss": 0.6967, "step": 1961 }, { "epoch": 0.40333024976873266, "grad_norm": 0.2096332609653473, "learning_rate": 8.744994955274992e-05, "loss": 0.7295, "step": 1962 }, { "epoch": 0.4035358207421112, "grad_norm": 0.20849314332008362, "learning_rate": 8.744663395502776e-05, "loss": 0.6962, "step": 1963 }, { "epoch": 0.4037413917154898, "grad_norm": 0.21918678283691406, "learning_rate": 8.744331626615835e-05, "loss": 0.6026, "step": 1964 }, { "epoch": 0.4039469626888683, "grad_norm": 0.21508684754371643, "learning_rate": 8.743999648630511e-05, "loss": 0.7116, "step": 1965 }, { "epoch": 0.4041525336622469, "grad_norm": 0.23266804218292236, "learning_rate": 8.743667461563161e-05, "loss": 0.7314, "step": 1966 }, { "epoch": 0.40435810463562544, "grad_norm": 0.21796725690364838, "learning_rate": 8.743335065430151e-05, "loss": 0.7151, "step": 1967 }, { "epoch": 0.404563675609004, "grad_norm": 0.21634382009506226, "learning_rate": 8.743002460247855e-05, "loss": 0.7272, "step": 1968 }, { "epoch": 0.40476924658238256, "grad_norm": 0.21737129986286163, "learning_rate": 8.74266964603266e-05, "loss": 0.748, "step": 1969 }, { "epoch": 0.40497481755576115, "grad_norm": 0.20188266038894653, "learning_rate": 8.742336622800962e-05, "loss": 0.6833, "step": 1970 }, { "epoch": 0.4051803885291397, "grad_norm": 0.21718573570251465, "learning_rate": 8.742003390569166e-05, "loss": 0.7016, "step": 1971 }, { "epoch": 0.40538595950251827, "grad_norm": 0.2084118276834488, "learning_rate": 8.741669949353692e-05, "loss": 0.6989, "step": 1972 }, { "epoch": 0.4055915304758968, "grad_norm": 0.21882924437522888, "learning_rate": 8.741336299170963e-05, "loss": 0.6893, "step": 1973 }, { "epoch": 0.4057971014492754, "grad_norm": 0.2056969553232193, "learning_rate": 8.741002440037421e-05, "loss": 0.7163, "step": 1974 }, { "epoch": 0.4060026724226539, "grad_norm": 0.22237667441368103, "learning_rate": 8.740668371969509e-05, "loss": 0.7379, "step": 1975 }, { "epoch": 0.40620824339603245, "grad_norm": 0.2131538689136505, "learning_rate": 8.740334094983688e-05, "loss": 0.7185, "step": 1976 }, { "epoch": 0.40641381436941104, "grad_norm": 0.20948132872581482, "learning_rate": 8.739999609096425e-05, "loss": 0.5797, "step": 1977 }, { "epoch": 0.4066193853427896, "grad_norm": 0.1722819209098816, "learning_rate": 8.7396649143242e-05, "loss": 0.5985, "step": 1978 }, { "epoch": 0.40682495631616816, "grad_norm": 0.15967948734760284, "learning_rate": 8.739330010683498e-05, "loss": 0.5984, "step": 1979 }, { "epoch": 0.4070305272895467, "grad_norm": 0.29981619119644165, "learning_rate": 8.738994898190825e-05, "loss": 0.6891, "step": 1980 }, { "epoch": 0.4072360982629253, "grad_norm": 0.17661848664283752, "learning_rate": 8.738659576862684e-05, "loss": 0.5816, "step": 1981 }, { "epoch": 0.4074416692363038, "grad_norm": 0.23567262291908264, "learning_rate": 8.738324046715597e-05, "loss": 0.6944, "step": 1982 }, { "epoch": 0.4076472402096824, "grad_norm": 0.23192854225635529, "learning_rate": 8.737988307766094e-05, "loss": 0.7268, "step": 1983 }, { "epoch": 0.40785281118306094, "grad_norm": 0.2210889458656311, "learning_rate": 8.737652360030715e-05, "loss": 0.711, "step": 1984 }, { "epoch": 0.4080583821564395, "grad_norm": 0.22944270074367523, "learning_rate": 8.737316203526013e-05, "loss": 0.7187, "step": 1985 }, { "epoch": 0.40826395312981806, "grad_norm": 0.2202499508857727, "learning_rate": 8.736979838268545e-05, "loss": 0.6949, "step": 1986 }, { "epoch": 0.40846952410319665, "grad_norm": 0.22138486802577972, "learning_rate": 8.736643264274885e-05, "loss": 0.7328, "step": 1987 }, { "epoch": 0.4086750950765752, "grad_norm": 0.22516939043998718, "learning_rate": 8.736306481561613e-05, "loss": 0.7106, "step": 1988 }, { "epoch": 0.40888066604995377, "grad_norm": 0.22086863219738007, "learning_rate": 8.735969490145321e-05, "loss": 0.6854, "step": 1989 }, { "epoch": 0.4090862370233323, "grad_norm": 0.2156277447938919, "learning_rate": 8.73563229004261e-05, "loss": 0.7179, "step": 1990 }, { "epoch": 0.4092918079967109, "grad_norm": 0.26995977759361267, "learning_rate": 8.735294881270095e-05, "loss": 0.59, "step": 1991 }, { "epoch": 0.4094973789700894, "grad_norm": 0.2523725926876068, "learning_rate": 8.734957263844397e-05, "loss": 0.7057, "step": 1992 }, { "epoch": 0.409702949943468, "grad_norm": 0.2281750589609146, "learning_rate": 8.734619437782148e-05, "loss": 0.7269, "step": 1993 }, { "epoch": 0.40990852091684654, "grad_norm": 0.23070600628852844, "learning_rate": 8.734281403099992e-05, "loss": 0.724, "step": 1994 }, { "epoch": 0.4101140918902251, "grad_norm": 0.22441944479942322, "learning_rate": 8.733943159814583e-05, "loss": 0.7058, "step": 1995 }, { "epoch": 0.41031966286360366, "grad_norm": 0.1988096684217453, "learning_rate": 8.733604707942584e-05, "loss": 0.5961, "step": 1996 }, { "epoch": 0.4105252338369822, "grad_norm": 0.16709105670452118, "learning_rate": 8.733266047500667e-05, "loss": 0.5956, "step": 1997 }, { "epoch": 0.4107308048103608, "grad_norm": 0.258070170879364, "learning_rate": 8.73292717850552e-05, "loss": 0.6959, "step": 1998 }, { "epoch": 0.4109363757837393, "grad_norm": 0.24676097929477692, "learning_rate": 8.732588100973834e-05, "loss": 0.7152, "step": 1999 }, { "epoch": 0.4111419467571179, "grad_norm": 0.2049533575773239, "learning_rate": 8.732248814922317e-05, "loss": 0.603, "step": 2000 }, { "epoch": 0.41134751773049644, "grad_norm": 0.24677561223506927, "learning_rate": 8.73190932036768e-05, "loss": 0.7021, "step": 2001 }, { "epoch": 0.411553088703875, "grad_norm": 0.24673065543174744, "learning_rate": 8.731569617326652e-05, "loss": 0.7424, "step": 2002 }, { "epoch": 0.41175865967725356, "grad_norm": 0.23665191233158112, "learning_rate": 8.731229705815968e-05, "loss": 0.7199, "step": 2003 }, { "epoch": 0.41196423065063215, "grad_norm": 0.21852630376815796, "learning_rate": 8.730889585852371e-05, "loss": 0.7065, "step": 2004 }, { "epoch": 0.4121698016240107, "grad_norm": 0.22494211792945862, "learning_rate": 8.730549257452622e-05, "loss": 0.7032, "step": 2005 }, { "epoch": 0.41237537259738927, "grad_norm": 0.21385926008224487, "learning_rate": 8.730208720633483e-05, "loss": 0.6929, "step": 2006 }, { "epoch": 0.4125809435707678, "grad_norm": 0.19130924344062805, "learning_rate": 8.729867975411734e-05, "loss": 0.5725, "step": 2007 }, { "epoch": 0.4127865145441464, "grad_norm": 0.226227268576622, "learning_rate": 8.729527021804158e-05, "loss": 0.6859, "step": 2008 }, { "epoch": 0.4129920855175249, "grad_norm": 0.22433815896511078, "learning_rate": 8.729185859827555e-05, "loss": 0.7239, "step": 2009 }, { "epoch": 0.4131976564909035, "grad_norm": 0.2165122628211975, "learning_rate": 8.728844489498733e-05, "loss": 0.7045, "step": 2010 }, { "epoch": 0.41340322746428204, "grad_norm": 0.21789471805095673, "learning_rate": 8.728502910834506e-05, "loss": 0.7185, "step": 2011 }, { "epoch": 0.41360879843766063, "grad_norm": 0.2177097499370575, "learning_rate": 8.728161123851708e-05, "loss": 0.7074, "step": 2012 }, { "epoch": 0.41381436941103916, "grad_norm": 0.22537820041179657, "learning_rate": 8.727819128567171e-05, "loss": 0.706, "step": 2013 }, { "epoch": 0.4140199403844177, "grad_norm": 0.21425795555114746, "learning_rate": 8.727476924997747e-05, "loss": 0.6974, "step": 2014 }, { "epoch": 0.4142255113577963, "grad_norm": 0.23247577250003815, "learning_rate": 8.727134513160296e-05, "loss": 0.7111, "step": 2015 }, { "epoch": 0.4144310823311748, "grad_norm": 0.21180875599384308, "learning_rate": 8.726791893071683e-05, "loss": 0.6801, "step": 2016 }, { "epoch": 0.4146366533045534, "grad_norm": 0.21250028908252716, "learning_rate": 8.72644906474879e-05, "loss": 0.7447, "step": 2017 }, { "epoch": 0.41484222427793194, "grad_norm": 0.21931192278862, "learning_rate": 8.726106028208505e-05, "loss": 0.7272, "step": 2018 }, { "epoch": 0.4150477952513105, "grad_norm": 0.21856500208377838, "learning_rate": 8.72576278346773e-05, "loss": 0.7224, "step": 2019 }, { "epoch": 0.41525336622468906, "grad_norm": 0.21037447452545166, "learning_rate": 8.725419330543373e-05, "loss": 0.727, "step": 2020 }, { "epoch": 0.41545893719806765, "grad_norm": 0.21209198236465454, "learning_rate": 8.725075669452356e-05, "loss": 0.7019, "step": 2021 }, { "epoch": 0.4156645081714462, "grad_norm": 0.20165219902992249, "learning_rate": 8.724731800211608e-05, "loss": 0.6149, "step": 2022 }, { "epoch": 0.41587007914482477, "grad_norm": 0.22927507758140564, "learning_rate": 8.72438772283807e-05, "loss": 0.7089, "step": 2023 }, { "epoch": 0.4160756501182033, "grad_norm": 0.2256333827972412, "learning_rate": 8.724043437348695e-05, "loss": 0.7093, "step": 2024 }, { "epoch": 0.4162812210915819, "grad_norm": 0.21047276258468628, "learning_rate": 8.723698943760443e-05, "loss": 0.7246, "step": 2025 }, { "epoch": 0.4164867920649604, "grad_norm": 0.21218207478523254, "learning_rate": 8.723354242090285e-05, "loss": 0.6883, "step": 2026 }, { "epoch": 0.416692363038339, "grad_norm": 0.21619375050067902, "learning_rate": 8.723009332355203e-05, "loss": 0.7068, "step": 2027 }, { "epoch": 0.41689793401171754, "grad_norm": 0.215839222073555, "learning_rate": 8.72266421457219e-05, "loss": 0.6964, "step": 2028 }, { "epoch": 0.41710350498509613, "grad_norm": 0.22797274589538574, "learning_rate": 8.722318888758248e-05, "loss": 0.6966, "step": 2029 }, { "epoch": 0.41730907595847466, "grad_norm": 0.2232465296983719, "learning_rate": 8.72197335493039e-05, "loss": 0.611, "step": 2030 }, { "epoch": 0.4175146469318532, "grad_norm": 0.2285899519920349, "learning_rate": 8.721627613105637e-05, "loss": 0.7202, "step": 2031 }, { "epoch": 0.4177202179052318, "grad_norm": 0.23706313967704773, "learning_rate": 8.721281663301024e-05, "loss": 0.7267, "step": 2032 }, { "epoch": 0.4179257888786103, "grad_norm": 0.21476082503795624, "learning_rate": 8.720935505533593e-05, "loss": 0.7026, "step": 2033 }, { "epoch": 0.4181313598519889, "grad_norm": 0.20751173794269562, "learning_rate": 8.720589139820399e-05, "loss": 0.726, "step": 2034 }, { "epoch": 0.41833693082536744, "grad_norm": 0.19482995569705963, "learning_rate": 8.720242566178504e-05, "loss": 0.5893, "step": 2035 }, { "epoch": 0.418542501798746, "grad_norm": 0.2433481514453888, "learning_rate": 8.719895784624985e-05, "loss": 0.6991, "step": 2036 }, { "epoch": 0.41874807277212456, "grad_norm": 0.22105759382247925, "learning_rate": 8.719548795176922e-05, "loss": 0.7016, "step": 2037 }, { "epoch": 0.41895364374550315, "grad_norm": 0.14366379380226135, "learning_rate": 8.719201597851414e-05, "loss": 0.5847, "step": 2038 }, { "epoch": 0.4191592147188817, "grad_norm": 0.15119072794914246, "learning_rate": 8.718854192665563e-05, "loss": 0.599, "step": 2039 }, { "epoch": 0.41936478569226027, "grad_norm": 0.2527151107788086, "learning_rate": 8.718506579636484e-05, "loss": 0.6794, "step": 2040 }, { "epoch": 0.4195703566656388, "grad_norm": 0.1412784457206726, "learning_rate": 8.718158758781305e-05, "loss": 0.5728, "step": 2041 }, { "epoch": 0.4197759276390174, "grad_norm": 0.2282373160123825, "learning_rate": 8.717810730117158e-05, "loss": 0.7497, "step": 2042 }, { "epoch": 0.4199814986123959, "grad_norm": 0.2128640115261078, "learning_rate": 8.717462493661192e-05, "loss": 0.7085, "step": 2043 }, { "epoch": 0.4201870695857745, "grad_norm": 0.22235573828220367, "learning_rate": 8.717114049430558e-05, "loss": 0.7508, "step": 2044 }, { "epoch": 0.42039264055915304, "grad_norm": 0.21980416774749756, "learning_rate": 8.716765397442428e-05, "loss": 0.7091, "step": 2045 }, { "epoch": 0.42059821153253163, "grad_norm": 0.20546141266822815, "learning_rate": 8.716416537713978e-05, "loss": 0.7008, "step": 2046 }, { "epoch": 0.42080378250591016, "grad_norm": 0.2216566503047943, "learning_rate": 8.71606747026239e-05, "loss": 0.6921, "step": 2047 }, { "epoch": 0.42100935347928875, "grad_norm": 0.2280108779668808, "learning_rate": 8.715718195104863e-05, "loss": 0.7094, "step": 2048 }, { "epoch": 0.4212149244526673, "grad_norm": 0.18423175811767578, "learning_rate": 8.715368712258605e-05, "loss": 0.6069, "step": 2049 }, { "epoch": 0.4214204954260458, "grad_norm": 0.22304539382457733, "learning_rate": 8.715019021740834e-05, "loss": 0.7094, "step": 2050 }, { "epoch": 0.4216260663994244, "grad_norm": 0.2160019874572754, "learning_rate": 8.714669123568776e-05, "loss": 0.7204, "step": 2051 }, { "epoch": 0.42183163737280294, "grad_norm": 0.21349206566810608, "learning_rate": 8.714319017759671e-05, "loss": 0.7041, "step": 2052 }, { "epoch": 0.4220372083461815, "grad_norm": 0.2105959951877594, "learning_rate": 8.713968704330766e-05, "loss": 0.7152, "step": 2053 }, { "epoch": 0.42224277931956006, "grad_norm": 0.21072207391262054, "learning_rate": 8.713618183299318e-05, "loss": 0.7148, "step": 2054 }, { "epoch": 0.42244835029293865, "grad_norm": 0.2207954227924347, "learning_rate": 8.713267454682595e-05, "loss": 0.7272, "step": 2055 }, { "epoch": 0.4226539212663172, "grad_norm": 0.21951311826705933, "learning_rate": 8.712916518497877e-05, "loss": 0.7121, "step": 2056 }, { "epoch": 0.42285949223969577, "grad_norm": 0.21501171588897705, "learning_rate": 8.712565374762456e-05, "loss": 0.7086, "step": 2057 }, { "epoch": 0.4230650632130743, "grad_norm": 0.21046118438243866, "learning_rate": 8.712214023493628e-05, "loss": 0.6967, "step": 2058 }, { "epoch": 0.4232706341864529, "grad_norm": 0.1807229071855545, "learning_rate": 8.711862464708701e-05, "loss": 0.5913, "step": 2059 }, { "epoch": 0.4234762051598314, "grad_norm": 0.22645685076713562, "learning_rate": 8.711510698424999e-05, "loss": 0.7036, "step": 2060 }, { "epoch": 0.42368177613321, "grad_norm": 0.22503720223903656, "learning_rate": 8.711158724659848e-05, "loss": 0.7092, "step": 2061 }, { "epoch": 0.42388734710658854, "grad_norm": 0.21952955424785614, "learning_rate": 8.71080654343059e-05, "loss": 0.7028, "step": 2062 }, { "epoch": 0.42409291807996713, "grad_norm": 0.21978265047073364, "learning_rate": 8.710454154754574e-05, "loss": 0.6954, "step": 2063 }, { "epoch": 0.42429848905334566, "grad_norm": 0.21806906163692474, "learning_rate": 8.710101558649162e-05, "loss": 0.6992, "step": 2064 }, { "epoch": 0.42450406002672425, "grad_norm": 0.14885424077510834, "learning_rate": 8.709748755131724e-05, "loss": 0.5892, "step": 2065 }, { "epoch": 0.4247096310001028, "grad_norm": 0.230007603764534, "learning_rate": 8.709395744219641e-05, "loss": 0.7061, "step": 2066 }, { "epoch": 0.42491520197348137, "grad_norm": 0.21456275880336761, "learning_rate": 8.709042525930305e-05, "loss": 0.699, "step": 2067 }, { "epoch": 0.4251207729468599, "grad_norm": 0.21649466454982758, "learning_rate": 8.708689100281116e-05, "loss": 0.6888, "step": 2068 }, { "epoch": 0.42532634392023844, "grad_norm": 0.2111383080482483, "learning_rate": 8.708335467289487e-05, "loss": 0.7007, "step": 2069 }, { "epoch": 0.425531914893617, "grad_norm": 0.2149335891008377, "learning_rate": 8.707981626972839e-05, "loss": 0.6819, "step": 2070 }, { "epoch": 0.42573748586699556, "grad_norm": 0.14442218840122223, "learning_rate": 8.707627579348605e-05, "loss": 0.5817, "step": 2071 }, { "epoch": 0.42594305684037415, "grad_norm": 0.21797578036785126, "learning_rate": 8.707273324434225e-05, "loss": 0.693, "step": 2072 }, { "epoch": 0.4261486278137527, "grad_norm": 0.2137763351202011, "learning_rate": 8.706918862247155e-05, "loss": 0.7087, "step": 2073 }, { "epoch": 0.42635419878713127, "grad_norm": 0.21722511947155, "learning_rate": 8.706564192804854e-05, "loss": 0.7327, "step": 2074 }, { "epoch": 0.4265597697605098, "grad_norm": 0.21744219958782196, "learning_rate": 8.706209316124798e-05, "loss": 0.7024, "step": 2075 }, { "epoch": 0.4267653407338884, "grad_norm": 0.21922947466373444, "learning_rate": 8.705854232224467e-05, "loss": 0.7089, "step": 2076 }, { "epoch": 0.4269709117072669, "grad_norm": 0.20731019973754883, "learning_rate": 8.705498941121357e-05, "loss": 0.7112, "step": 2077 }, { "epoch": 0.4271764826806455, "grad_norm": 0.15655431151390076, "learning_rate": 8.705143442832973e-05, "loss": 0.5976, "step": 2078 }, { "epoch": 0.42738205365402404, "grad_norm": 0.22649213671684265, "learning_rate": 8.704787737376822e-05, "loss": 0.7271, "step": 2079 }, { "epoch": 0.42758762462740263, "grad_norm": 0.2306176871061325, "learning_rate": 8.704431824770436e-05, "loss": 0.7294, "step": 2080 }, { "epoch": 0.42779319560078116, "grad_norm": 0.21303272247314453, "learning_rate": 8.704075705031344e-05, "loss": 0.703, "step": 2081 }, { "epoch": 0.42799876657415975, "grad_norm": 0.2082429826259613, "learning_rate": 8.70371937817709e-05, "loss": 0.7122, "step": 2082 }, { "epoch": 0.4282043375475383, "grad_norm": 0.21812103688716888, "learning_rate": 8.703362844225233e-05, "loss": 0.6854, "step": 2083 }, { "epoch": 0.42840990852091687, "grad_norm": 0.22010985016822815, "learning_rate": 8.703006103193334e-05, "loss": 0.7085, "step": 2084 }, { "epoch": 0.4286154794942954, "grad_norm": 0.21230296790599823, "learning_rate": 8.70264915509897e-05, "loss": 0.6915, "step": 2085 }, { "epoch": 0.428821050467674, "grad_norm": 0.22726766765117645, "learning_rate": 8.702291999959725e-05, "loss": 0.7325, "step": 2086 }, { "epoch": 0.4290266214410525, "grad_norm": 0.22241102159023285, "learning_rate": 8.701934637793194e-05, "loss": 0.7029, "step": 2087 }, { "epoch": 0.42923219241443106, "grad_norm": 0.1587475687265396, "learning_rate": 8.701577068616984e-05, "loss": 0.5836, "step": 2088 }, { "epoch": 0.42943776338780965, "grad_norm": 0.2406635880470276, "learning_rate": 8.701219292448708e-05, "loss": 0.6863, "step": 2089 }, { "epoch": 0.4296433343611882, "grad_norm": 0.21944580972194672, "learning_rate": 8.700861309305995e-05, "loss": 0.6938, "step": 2090 }, { "epoch": 0.42984890533456677, "grad_norm": 0.21135850250720978, "learning_rate": 8.700503119206481e-05, "loss": 0.685, "step": 2091 }, { "epoch": 0.4300544763079453, "grad_norm": 0.20949722826480865, "learning_rate": 8.700144722167811e-05, "loss": 0.6967, "step": 2092 }, { "epoch": 0.4302600472813239, "grad_norm": 0.21594803035259247, "learning_rate": 8.699786118207642e-05, "loss": 0.7037, "step": 2093 }, { "epoch": 0.4304656182547024, "grad_norm": 0.16418609023094177, "learning_rate": 8.69942730734364e-05, "loss": 0.5692, "step": 2094 }, { "epoch": 0.430671189228081, "grad_norm": 0.23615112900733948, "learning_rate": 8.699068289593483e-05, "loss": 0.7278, "step": 2095 }, { "epoch": 0.43087676020145954, "grad_norm": 0.22218084335327148, "learning_rate": 8.698709064974858e-05, "loss": 0.677, "step": 2096 }, { "epoch": 0.43108233117483813, "grad_norm": 0.21628277003765106, "learning_rate": 8.698349633505462e-05, "loss": 0.6902, "step": 2097 }, { "epoch": 0.43128790214821666, "grad_norm": 0.21895258128643036, "learning_rate": 8.697989995203002e-05, "loss": 0.6952, "step": 2098 }, { "epoch": 0.43149347312159525, "grad_norm": 0.21633300185203552, "learning_rate": 8.697630150085197e-05, "loss": 0.7332, "step": 2099 }, { "epoch": 0.4316990440949738, "grad_norm": 0.2174568474292755, "learning_rate": 8.697270098169774e-05, "loss": 0.6904, "step": 2100 }, { "epoch": 0.43190461506835237, "grad_norm": 0.22629016637802124, "learning_rate": 8.696909839474473e-05, "loss": 0.7198, "step": 2101 }, { "epoch": 0.4321101860417309, "grad_norm": 0.20996680855751038, "learning_rate": 8.696549374017038e-05, "loss": 0.6932, "step": 2102 }, { "epoch": 0.4323157570151095, "grad_norm": 0.20978742837905884, "learning_rate": 8.696188701815231e-05, "loss": 0.684, "step": 2103 }, { "epoch": 0.432521327988488, "grad_norm": 0.21533238887786865, "learning_rate": 8.695827822886818e-05, "loss": 0.7218, "step": 2104 }, { "epoch": 0.43272689896186656, "grad_norm": 0.20759303867816925, "learning_rate": 8.695466737249582e-05, "loss": 0.6742, "step": 2105 }, { "epoch": 0.43293246993524515, "grad_norm": 0.17055755853652954, "learning_rate": 8.695105444921307e-05, "loss": 0.5937, "step": 2106 }, { "epoch": 0.4331380409086237, "grad_norm": 0.1438744068145752, "learning_rate": 8.694743945919796e-05, "loss": 0.5962, "step": 2107 }, { "epoch": 0.43334361188200227, "grad_norm": 0.23514226078987122, "learning_rate": 8.694382240262857e-05, "loss": 0.7071, "step": 2108 }, { "epoch": 0.4335491828553808, "grad_norm": 0.16390731930732727, "learning_rate": 8.694020327968309e-05, "loss": 0.597, "step": 2109 }, { "epoch": 0.4337547538287594, "grad_norm": 0.21311801671981812, "learning_rate": 8.693658209053983e-05, "loss": 0.7061, "step": 2110 }, { "epoch": 0.4339603248021379, "grad_norm": 0.21026752889156342, "learning_rate": 8.693295883537717e-05, "loss": 0.7125, "step": 2111 }, { "epoch": 0.4341658957755165, "grad_norm": 0.21940794587135315, "learning_rate": 8.692933351437362e-05, "loss": 0.7429, "step": 2112 }, { "epoch": 0.43437146674889504, "grad_norm": 0.22087624669075012, "learning_rate": 8.69257061277078e-05, "loss": 0.7089, "step": 2113 }, { "epoch": 0.43457703772227363, "grad_norm": 0.21447579562664032, "learning_rate": 8.69220766755584e-05, "loss": 0.7126, "step": 2114 }, { "epoch": 0.43478260869565216, "grad_norm": 0.18616484105587006, "learning_rate": 8.691844515810422e-05, "loss": 0.5893, "step": 2115 }, { "epoch": 0.43498817966903075, "grad_norm": 0.2412138730287552, "learning_rate": 8.691481157552418e-05, "loss": 0.6838, "step": 2116 }, { "epoch": 0.4351937506424093, "grad_norm": 0.2211569845676422, "learning_rate": 8.691117592799726e-05, "loss": 0.7146, "step": 2117 }, { "epoch": 0.43539932161578787, "grad_norm": 0.22833772003650665, "learning_rate": 8.690753821570261e-05, "loss": 0.6909, "step": 2118 }, { "epoch": 0.4356048925891664, "grad_norm": 0.22425860166549683, "learning_rate": 8.690389843881944e-05, "loss": 0.7387, "step": 2119 }, { "epoch": 0.435810463562545, "grad_norm": 0.20990809798240662, "learning_rate": 8.690025659752702e-05, "loss": 0.7058, "step": 2120 }, { "epoch": 0.4360160345359235, "grad_norm": 0.21391835808753967, "learning_rate": 8.689661269200483e-05, "loss": 0.706, "step": 2121 }, { "epoch": 0.4362216055093021, "grad_norm": 0.21198540925979614, "learning_rate": 8.689296672243234e-05, "loss": 0.6776, "step": 2122 }, { "epoch": 0.43642717648268065, "grad_norm": 0.22344285249710083, "learning_rate": 8.68893186889892e-05, "loss": 0.6062, "step": 2123 }, { "epoch": 0.4366327474560592, "grad_norm": 0.23118963837623596, "learning_rate": 8.68856685918551e-05, "loss": 0.7088, "step": 2124 }, { "epoch": 0.43683831842943777, "grad_norm": 0.14518238604068756, "learning_rate": 8.68820164312099e-05, "loss": 0.5962, "step": 2125 }, { "epoch": 0.4370438894028163, "grad_norm": 0.22062361240386963, "learning_rate": 8.68783622072335e-05, "loss": 0.7169, "step": 2126 }, { "epoch": 0.4372494603761949, "grad_norm": 0.21670423448085785, "learning_rate": 8.687470592010593e-05, "loss": 0.6916, "step": 2127 }, { "epoch": 0.4374550313495734, "grad_norm": 0.21488401293754578, "learning_rate": 8.687104757000733e-05, "loss": 0.7139, "step": 2128 }, { "epoch": 0.437660602322952, "grad_norm": 0.22047607600688934, "learning_rate": 8.686738715711791e-05, "loss": 0.6969, "step": 2129 }, { "epoch": 0.43786617329633054, "grad_norm": 0.21157632768154144, "learning_rate": 8.686372468161802e-05, "loss": 0.7293, "step": 2130 }, { "epoch": 0.43807174426970913, "grad_norm": 0.2109154462814331, "learning_rate": 8.686006014368806e-05, "loss": 0.7178, "step": 2131 }, { "epoch": 0.43827731524308766, "grad_norm": 0.2221369594335556, "learning_rate": 8.685639354350862e-05, "loss": 0.7315, "step": 2132 }, { "epoch": 0.43848288621646625, "grad_norm": 0.2168595790863037, "learning_rate": 8.68527248812603e-05, "loss": 0.7079, "step": 2133 }, { "epoch": 0.4386884571898448, "grad_norm": 0.2099953144788742, "learning_rate": 8.684905415712383e-05, "loss": 0.7007, "step": 2134 }, { "epoch": 0.43889402816322337, "grad_norm": 0.21563635766506195, "learning_rate": 8.684538137128008e-05, "loss": 0.716, "step": 2135 }, { "epoch": 0.4390995991366019, "grad_norm": 0.2030235230922699, "learning_rate": 8.684170652390996e-05, "loss": 0.7029, "step": 2136 }, { "epoch": 0.4393051701099805, "grad_norm": 0.21220625936985016, "learning_rate": 8.683802961519454e-05, "loss": 0.7057, "step": 2137 }, { "epoch": 0.439510741083359, "grad_norm": 0.2082281857728958, "learning_rate": 8.683435064531496e-05, "loss": 0.6924, "step": 2138 }, { "epoch": 0.4397163120567376, "grad_norm": 0.2149658501148224, "learning_rate": 8.683066961445245e-05, "loss": 0.7082, "step": 2139 }, { "epoch": 0.43992188303011615, "grad_norm": 0.21991075575351715, "learning_rate": 8.682698652278836e-05, "loss": 0.7101, "step": 2140 }, { "epoch": 0.44012745400349473, "grad_norm": 0.21779777109622955, "learning_rate": 8.682330137050415e-05, "loss": 0.6922, "step": 2141 }, { "epoch": 0.44033302497687327, "grad_norm": 0.21721771359443665, "learning_rate": 8.681961415778134e-05, "loss": 0.7198, "step": 2142 }, { "epoch": 0.4405385959502518, "grad_norm": 0.21693062782287598, "learning_rate": 8.681592488480163e-05, "loss": 0.74, "step": 2143 }, { "epoch": 0.4407441669236304, "grad_norm": 0.21777969598770142, "learning_rate": 8.681223355174673e-05, "loss": 0.6871, "step": 2144 }, { "epoch": 0.4409497378970089, "grad_norm": 0.2129591703414917, "learning_rate": 8.680854015879852e-05, "loss": 0.6949, "step": 2145 }, { "epoch": 0.4411553088703875, "grad_norm": 0.20881325006484985, "learning_rate": 8.680484470613896e-05, "loss": 0.6919, "step": 2146 }, { "epoch": 0.44136087984376604, "grad_norm": 0.21094316244125366, "learning_rate": 8.680114719395007e-05, "loss": 0.7102, "step": 2147 }, { "epoch": 0.44156645081714463, "grad_norm": 0.2205977588891983, "learning_rate": 8.679744762241407e-05, "loss": 0.6933, "step": 2148 }, { "epoch": 0.44177202179052316, "grad_norm": 0.2161235362291336, "learning_rate": 8.679374599171317e-05, "loss": 0.7472, "step": 2149 }, { "epoch": 0.44197759276390175, "grad_norm": 0.2870723009109497, "learning_rate": 8.679004230202973e-05, "loss": 0.5985, "step": 2150 }, { "epoch": 0.4421831637372803, "grad_norm": 0.22053900361061096, "learning_rate": 8.678633655354627e-05, "loss": 0.7013, "step": 2151 }, { "epoch": 0.44238873471065887, "grad_norm": 0.22010482847690582, "learning_rate": 8.67826287464453e-05, "loss": 0.7361, "step": 2152 }, { "epoch": 0.4425943056840374, "grad_norm": 0.2220645248889923, "learning_rate": 8.677891888090949e-05, "loss": 0.7354, "step": 2153 }, { "epoch": 0.442799876657416, "grad_norm": 0.22568100690841675, "learning_rate": 8.677520695712164e-05, "loss": 0.6069, "step": 2154 }, { "epoch": 0.4430054476307945, "grad_norm": 0.21187719702720642, "learning_rate": 8.677149297526459e-05, "loss": 0.6829, "step": 2155 }, { "epoch": 0.4432110186041731, "grad_norm": 0.22478394210338593, "learning_rate": 8.676777693552132e-05, "loss": 0.6992, "step": 2156 }, { "epoch": 0.44341658957755165, "grad_norm": 0.2064889669418335, "learning_rate": 8.67640588380749e-05, "loss": 0.6845, "step": 2157 }, { "epoch": 0.44362216055093023, "grad_norm": 0.21473796665668488, "learning_rate": 8.67603386831085e-05, "loss": 0.706, "step": 2158 }, { "epoch": 0.44382773152430877, "grad_norm": 0.22386027872562408, "learning_rate": 8.675661647080541e-05, "loss": 0.7064, "step": 2159 }, { "epoch": 0.4440333024976873, "grad_norm": 0.21549421548843384, "learning_rate": 8.675289220134901e-05, "loss": 0.6826, "step": 2160 }, { "epoch": 0.4442388734710659, "grad_norm": 0.1654203236103058, "learning_rate": 8.674916587492274e-05, "loss": 0.5987, "step": 2161 }, { "epoch": 0.4444444444444444, "grad_norm": 0.23500193655490875, "learning_rate": 8.674543749171023e-05, "loss": 0.7202, "step": 2162 }, { "epoch": 0.444650015417823, "grad_norm": 0.22905461490154266, "learning_rate": 8.67417070518951e-05, "loss": 0.7066, "step": 2163 }, { "epoch": 0.44485558639120154, "grad_norm": 0.1377820372581482, "learning_rate": 8.673797455566118e-05, "loss": 0.5963, "step": 2164 }, { "epoch": 0.44506115736458013, "grad_norm": 0.21596823632717133, "learning_rate": 8.673424000319233e-05, "loss": 0.6887, "step": 2165 }, { "epoch": 0.44526672833795866, "grad_norm": 0.13856928050518036, "learning_rate": 8.673050339467255e-05, "loss": 0.5903, "step": 2166 }, { "epoch": 0.44547229931133725, "grad_norm": 0.22425222396850586, "learning_rate": 8.672676473028591e-05, "loss": 0.696, "step": 2167 }, { "epoch": 0.4456778702847158, "grad_norm": 0.20974132418632507, "learning_rate": 8.672302401021662e-05, "loss": 0.6882, "step": 2168 }, { "epoch": 0.44588344125809437, "grad_norm": 0.20939786732196808, "learning_rate": 8.671928123464893e-05, "loss": 0.6787, "step": 2169 }, { "epoch": 0.4460890122314729, "grad_norm": 0.21304769814014435, "learning_rate": 8.671553640376724e-05, "loss": 0.6775, "step": 2170 }, { "epoch": 0.4462945832048515, "grad_norm": 0.21474890410900116, "learning_rate": 8.671178951775607e-05, "loss": 0.6984, "step": 2171 }, { "epoch": 0.44650015417823, "grad_norm": 0.2142523229122162, "learning_rate": 8.670804057679999e-05, "loss": 0.6975, "step": 2172 }, { "epoch": 0.4467057251516086, "grad_norm": 0.21635667979717255, "learning_rate": 8.670428958108367e-05, "loss": 0.6998, "step": 2173 }, { "epoch": 0.44691129612498715, "grad_norm": 0.18972234427928925, "learning_rate": 8.670053653079194e-05, "loss": 0.5905, "step": 2174 }, { "epoch": 0.44711686709836573, "grad_norm": 0.22437618672847748, "learning_rate": 8.669678142610969e-05, "loss": 0.7078, "step": 2175 }, { "epoch": 0.44732243807174427, "grad_norm": 0.22813966870307922, "learning_rate": 8.669302426722192e-05, "loss": 0.6999, "step": 2176 }, { "epoch": 0.44752800904512285, "grad_norm": 0.14738696813583374, "learning_rate": 8.66892650543137e-05, "loss": 0.5654, "step": 2177 }, { "epoch": 0.4477335800185014, "grad_norm": 0.2084706872701645, "learning_rate": 8.668550378757024e-05, "loss": 0.7261, "step": 2178 }, { "epoch": 0.4479391509918799, "grad_norm": 0.22098992764949799, "learning_rate": 8.668174046717686e-05, "loss": 0.7273, "step": 2179 }, { "epoch": 0.4481447219652585, "grad_norm": 0.20854520797729492, "learning_rate": 8.667797509331895e-05, "loss": 0.7197, "step": 2180 }, { "epoch": 0.44835029293863704, "grad_norm": 0.2072971910238266, "learning_rate": 8.667420766618198e-05, "loss": 0.6683, "step": 2181 }, { "epoch": 0.44855586391201563, "grad_norm": 0.20528066158294678, "learning_rate": 8.667043818595162e-05, "loss": 0.7181, "step": 2182 }, { "epoch": 0.44876143488539416, "grad_norm": 0.21476523578166962, "learning_rate": 8.666666665281352e-05, "loss": 0.72, "step": 2183 }, { "epoch": 0.44896700585877275, "grad_norm": 0.20512348413467407, "learning_rate": 8.666289306695351e-05, "loss": 0.6984, "step": 2184 }, { "epoch": 0.4491725768321513, "grad_norm": 0.21752099692821503, "learning_rate": 8.665911742855748e-05, "loss": 0.6836, "step": 2185 }, { "epoch": 0.44937814780552987, "grad_norm": 0.21713502705097198, "learning_rate": 8.665533973781145e-05, "loss": 0.6965, "step": 2186 }, { "epoch": 0.4495837187789084, "grad_norm": 0.22159411013126373, "learning_rate": 8.665155999490153e-05, "loss": 0.7348, "step": 2187 }, { "epoch": 0.449789289752287, "grad_norm": 0.20660369098186493, "learning_rate": 8.664777820001394e-05, "loss": 0.6958, "step": 2188 }, { "epoch": 0.4499948607256655, "grad_norm": 0.1848221719264984, "learning_rate": 8.664399435333497e-05, "loss": 0.5917, "step": 2189 }, { "epoch": 0.4502004316990441, "grad_norm": 0.15177948772907257, "learning_rate": 8.664020845505104e-05, "loss": 0.5976, "step": 2190 }, { "epoch": 0.45040600267242265, "grad_norm": 0.23266561329364777, "learning_rate": 8.663642050534867e-05, "loss": 0.7185, "step": 2191 }, { "epoch": 0.45061157364580123, "grad_norm": 0.2253771871328354, "learning_rate": 8.663263050441446e-05, "loss": 0.6928, "step": 2192 }, { "epoch": 0.45081714461917977, "grad_norm": 0.20975717902183533, "learning_rate": 8.662883845243515e-05, "loss": 0.7157, "step": 2193 }, { "epoch": 0.45102271559255835, "grad_norm": 0.23472397029399872, "learning_rate": 8.662504434959753e-05, "loss": 0.7103, "step": 2194 }, { "epoch": 0.4512282865659369, "grad_norm": 0.22584107518196106, "learning_rate": 8.662124819608853e-05, "loss": 0.7278, "step": 2195 }, { "epoch": 0.4514338575393155, "grad_norm": 0.22365206480026245, "learning_rate": 8.661744999209518e-05, "loss": 0.599, "step": 2196 }, { "epoch": 0.451639428512694, "grad_norm": 0.24951714277267456, "learning_rate": 8.661364973780458e-05, "loss": 0.7315, "step": 2197 }, { "epoch": 0.45184499948607254, "grad_norm": 0.22680872678756714, "learning_rate": 8.660984743340396e-05, "loss": 0.7005, "step": 2198 }, { "epoch": 0.45205057045945113, "grad_norm": 0.22146962583065033, "learning_rate": 8.660604307908063e-05, "loss": 0.6956, "step": 2199 }, { "epoch": 0.45225614143282966, "grad_norm": 0.16175302863121033, "learning_rate": 8.660223667502205e-05, "loss": 0.5844, "step": 2200 }, { "epoch": 0.45246171240620825, "grad_norm": 0.24984121322631836, "learning_rate": 8.65984282214157e-05, "loss": 0.7104, "step": 2201 }, { "epoch": 0.4526672833795868, "grad_norm": 0.23822738230228424, "learning_rate": 8.659461771844923e-05, "loss": 0.7287, "step": 2202 }, { "epoch": 0.45287285435296537, "grad_norm": 0.21192102134227753, "learning_rate": 8.659080516631036e-05, "loss": 0.714, "step": 2203 }, { "epoch": 0.4530784253263439, "grad_norm": 0.23573461174964905, "learning_rate": 8.65869905651869e-05, "loss": 0.7125, "step": 2204 }, { "epoch": 0.4532839962997225, "grad_norm": 0.22849269211292267, "learning_rate": 8.658317391526678e-05, "loss": 0.7213, "step": 2205 }, { "epoch": 0.453489567273101, "grad_norm": 0.2162596434354782, "learning_rate": 8.657935521673808e-05, "loss": 0.7036, "step": 2206 }, { "epoch": 0.4536951382464796, "grad_norm": 0.22291293740272522, "learning_rate": 8.657553446978885e-05, "loss": 0.7055, "step": 2207 }, { "epoch": 0.45390070921985815, "grad_norm": 0.23885302245616913, "learning_rate": 8.657171167460738e-05, "loss": 0.7177, "step": 2208 }, { "epoch": 0.45410628019323673, "grad_norm": 0.1670546680688858, "learning_rate": 8.656788683138198e-05, "loss": 0.5963, "step": 2209 }, { "epoch": 0.45431185116661527, "grad_norm": 0.26193171739578247, "learning_rate": 8.656405994030109e-05, "loss": 0.6881, "step": 2210 }, { "epoch": 0.45451742213999385, "grad_norm": 0.2238868772983551, "learning_rate": 8.656023100155324e-05, "loss": 0.6955, "step": 2211 }, { "epoch": 0.4547229931133724, "grad_norm": 0.22464968264102936, "learning_rate": 8.655640001532704e-05, "loss": 0.6937, "step": 2212 }, { "epoch": 0.454928564086751, "grad_norm": 0.2210894376039505, "learning_rate": 8.655256698181125e-05, "loss": 0.7033, "step": 2213 }, { "epoch": 0.4551341350601295, "grad_norm": 0.2309311479330063, "learning_rate": 8.654873190119472e-05, "loss": 0.6877, "step": 2214 }, { "epoch": 0.4553397060335081, "grad_norm": 0.15510539710521698, "learning_rate": 8.654489477366635e-05, "loss": 0.6074, "step": 2215 }, { "epoch": 0.45554527700688663, "grad_norm": 0.1340515911579132, "learning_rate": 8.654105559941519e-05, "loss": 0.5916, "step": 2216 }, { "epoch": 0.45575084798026516, "grad_norm": 0.3258119225502014, "learning_rate": 8.653721437863041e-05, "loss": 0.6729, "step": 2217 }, { "epoch": 0.45595641895364375, "grad_norm": 0.24723531305789948, "learning_rate": 8.653337111150121e-05, "loss": 0.6963, "step": 2218 }, { "epoch": 0.4561619899270223, "grad_norm": 0.16881807148456573, "learning_rate": 8.652952579821693e-05, "loss": 0.5994, "step": 2219 }, { "epoch": 0.45636756090040087, "grad_norm": 0.16700582206249237, "learning_rate": 8.652567843896702e-05, "loss": 0.5822, "step": 2220 }, { "epoch": 0.4565731318737794, "grad_norm": 0.1435755044221878, "learning_rate": 8.652182903394105e-05, "loss": 0.5809, "step": 2221 }, { "epoch": 0.456778702847158, "grad_norm": 0.14672505855560303, "learning_rate": 8.651797758332862e-05, "loss": 0.5943, "step": 2222 }, { "epoch": 0.4569842738205365, "grad_norm": 0.3784264922142029, "learning_rate": 8.651412408731949e-05, "loss": 0.7184, "step": 2223 }, { "epoch": 0.4571898447939151, "grad_norm": 0.24264433979988098, "learning_rate": 8.651026854610348e-05, "loss": 0.6976, "step": 2224 }, { "epoch": 0.45739541576729364, "grad_norm": 0.26151180267333984, "learning_rate": 8.650641095987059e-05, "loss": 0.6998, "step": 2225 }, { "epoch": 0.45760098674067223, "grad_norm": 0.33650773763656616, "learning_rate": 8.650255132881082e-05, "loss": 0.7366, "step": 2226 }, { "epoch": 0.45780655771405077, "grad_norm": 0.27262553572654724, "learning_rate": 8.649868965311432e-05, "loss": 0.7319, "step": 2227 }, { "epoch": 0.45801212868742935, "grad_norm": 0.2205299288034439, "learning_rate": 8.649482593297135e-05, "loss": 0.6905, "step": 2228 }, { "epoch": 0.4582176996608079, "grad_norm": 0.2557431757450104, "learning_rate": 8.649096016857226e-05, "loss": 0.6974, "step": 2229 }, { "epoch": 0.4584232706341865, "grad_norm": 0.27587607502937317, "learning_rate": 8.648709236010749e-05, "loss": 0.7024, "step": 2230 }, { "epoch": 0.458628841607565, "grad_norm": 0.32615306973457336, "learning_rate": 8.64832225077676e-05, "loss": 0.6211, "step": 2231 }, { "epoch": 0.4588344125809436, "grad_norm": 0.24620257318019867, "learning_rate": 8.647935061174321e-05, "loss": 0.7277, "step": 2232 }, { "epoch": 0.45903998355432213, "grad_norm": 0.2339821755886078, "learning_rate": 8.647547667222509e-05, "loss": 0.7122, "step": 2233 }, { "epoch": 0.45924555452770066, "grad_norm": 0.21899057924747467, "learning_rate": 8.647160068940411e-05, "loss": 0.7294, "step": 2234 }, { "epoch": 0.45945112550107925, "grad_norm": 0.21356239914894104, "learning_rate": 8.646772266347119e-05, "loss": 0.7077, "step": 2235 }, { "epoch": 0.4596566964744578, "grad_norm": 0.21990163624286652, "learning_rate": 8.646384259461737e-05, "loss": 0.6991, "step": 2236 }, { "epoch": 0.45986226744783637, "grad_norm": 0.2190622240304947, "learning_rate": 8.645996048303385e-05, "loss": 0.7178, "step": 2237 }, { "epoch": 0.4600678384212149, "grad_norm": 0.20803511142730713, "learning_rate": 8.645607632891187e-05, "loss": 0.6785, "step": 2238 }, { "epoch": 0.4602734093945935, "grad_norm": 0.20758850872516632, "learning_rate": 8.645219013244277e-05, "loss": 0.6661, "step": 2239 }, { "epoch": 0.460478980367972, "grad_norm": 0.21537218987941742, "learning_rate": 8.6448301893818e-05, "loss": 0.7075, "step": 2240 }, { "epoch": 0.4606845513413506, "grad_norm": 0.2241329848766327, "learning_rate": 8.644441161322912e-05, "loss": 0.7014, "step": 2241 }, { "epoch": 0.46089012231472914, "grad_norm": 0.20497076213359833, "learning_rate": 8.64405192908678e-05, "loss": 0.6964, "step": 2242 }, { "epoch": 0.46109569328810773, "grad_norm": 0.20961910486221313, "learning_rate": 8.643662492692578e-05, "loss": 0.6976, "step": 2243 }, { "epoch": 0.46130126426148627, "grad_norm": 0.2163321077823639, "learning_rate": 8.643272852159493e-05, "loss": 0.7253, "step": 2244 }, { "epoch": 0.46150683523486485, "grad_norm": 0.21539649367332458, "learning_rate": 8.642883007506721e-05, "loss": 0.6848, "step": 2245 }, { "epoch": 0.4617124062082434, "grad_norm": 0.2067098766565323, "learning_rate": 8.642492958753465e-05, "loss": 0.7156, "step": 2246 }, { "epoch": 0.461917977181622, "grad_norm": 0.21964769065380096, "learning_rate": 8.642102705918945e-05, "loss": 0.6989, "step": 2247 }, { "epoch": 0.4621235481550005, "grad_norm": 0.2275928258895874, "learning_rate": 8.641712249022384e-05, "loss": 0.6847, "step": 2248 }, { "epoch": 0.4623291191283791, "grad_norm": 0.2040269672870636, "learning_rate": 8.641321588083018e-05, "loss": 0.6973, "step": 2249 }, { "epoch": 0.46253469010175763, "grad_norm": 0.23092588782310486, "learning_rate": 8.640930723120093e-05, "loss": 0.7266, "step": 2250 }, { "epoch": 0.4627402610751362, "grad_norm": 0.2156527191400528, "learning_rate": 8.640539654152868e-05, "loss": 0.7062, "step": 2251 }, { "epoch": 0.46294583204851475, "grad_norm": 0.2142401933670044, "learning_rate": 8.640148381200607e-05, "loss": 0.7047, "step": 2252 }, { "epoch": 0.4631514030218933, "grad_norm": 0.31457456946372986, "learning_rate": 8.639756904282586e-05, "loss": 0.6032, "step": 2253 }, { "epoch": 0.46335697399527187, "grad_norm": 0.23436057567596436, "learning_rate": 8.639365223418091e-05, "loss": 0.7436, "step": 2254 }, { "epoch": 0.4635625449686504, "grad_norm": 0.14833630621433258, "learning_rate": 8.638973338626418e-05, "loss": 0.588, "step": 2255 }, { "epoch": 0.463768115942029, "grad_norm": 0.24190352857112885, "learning_rate": 8.638581249926876e-05, "loss": 0.7079, "step": 2256 }, { "epoch": 0.4639736869154075, "grad_norm": 0.2287464588880539, "learning_rate": 8.638188957338778e-05, "loss": 0.6983, "step": 2257 }, { "epoch": 0.4641792578887861, "grad_norm": 0.24814251065254211, "learning_rate": 8.637796460881454e-05, "loss": 0.707, "step": 2258 }, { "epoch": 0.46438482886216464, "grad_norm": 0.22504420578479767, "learning_rate": 8.637403760574236e-05, "loss": 0.7045, "step": 2259 }, { "epoch": 0.46459039983554323, "grad_norm": 0.21358801424503326, "learning_rate": 8.637010856436475e-05, "loss": 0.7027, "step": 2260 }, { "epoch": 0.46479597080892177, "grad_norm": 0.21219758689403534, "learning_rate": 8.636617748487523e-05, "loss": 0.689, "step": 2261 }, { "epoch": 0.46500154178230035, "grad_norm": 0.21138092875480652, "learning_rate": 8.63622443674675e-05, "loss": 0.7208, "step": 2262 }, { "epoch": 0.4652071127556789, "grad_norm": 0.27241116762161255, "learning_rate": 8.635830921233532e-05, "loss": 0.5964, "step": 2263 }, { "epoch": 0.4654126837290575, "grad_norm": 0.2141522914171219, "learning_rate": 8.635437201967255e-05, "loss": 0.7362, "step": 2264 }, { "epoch": 0.465618254702436, "grad_norm": 0.2085803896188736, "learning_rate": 8.635043278967317e-05, "loss": 0.6859, "step": 2265 }, { "epoch": 0.4658238256758146, "grad_norm": 0.21698498725891113, "learning_rate": 8.634649152253123e-05, "loss": 0.7078, "step": 2266 }, { "epoch": 0.46602939664919313, "grad_norm": 0.19954286515712738, "learning_rate": 8.63425482184409e-05, "loss": 0.6877, "step": 2267 }, { "epoch": 0.4662349676225717, "grad_norm": 0.18924130499362946, "learning_rate": 8.633860287759646e-05, "loss": 0.6001, "step": 2268 }, { "epoch": 0.46644053859595025, "grad_norm": 0.15498289465904236, "learning_rate": 8.633465550019227e-05, "loss": 0.5894, "step": 2269 }, { "epoch": 0.46664610956932884, "grad_norm": 0.2448817938566208, "learning_rate": 8.633070608642282e-05, "loss": 0.6883, "step": 2270 }, { "epoch": 0.46685168054270737, "grad_norm": 0.24218863248825073, "learning_rate": 8.632675463648264e-05, "loss": 0.7305, "step": 2271 }, { "epoch": 0.4670572515160859, "grad_norm": 0.21386098861694336, "learning_rate": 8.632280115056642e-05, "loss": 0.703, "step": 2272 }, { "epoch": 0.4672628224894645, "grad_norm": 0.20794478058815002, "learning_rate": 8.631884562886894e-05, "loss": 0.7054, "step": 2273 }, { "epoch": 0.467468393462843, "grad_norm": 0.22331750392913818, "learning_rate": 8.631488807158505e-05, "loss": 0.7116, "step": 2274 }, { "epoch": 0.4676739644362216, "grad_norm": 0.22476287186145782, "learning_rate": 8.631092847890973e-05, "loss": 0.7001, "step": 2275 }, { "epoch": 0.46787953540960014, "grad_norm": 0.23165211081504822, "learning_rate": 8.630696685103806e-05, "loss": 0.5924, "step": 2276 }, { "epoch": 0.46808510638297873, "grad_norm": 0.17003892362117767, "learning_rate": 8.63030031881652e-05, "loss": 0.5951, "step": 2277 }, { "epoch": 0.46829067735635727, "grad_norm": 0.14959658682346344, "learning_rate": 8.629903749048642e-05, "loss": 0.5875, "step": 2278 }, { "epoch": 0.46849624832973585, "grad_norm": 0.28558462858200073, "learning_rate": 8.629506975819709e-05, "loss": 0.7339, "step": 2279 }, { "epoch": 0.4687018193031144, "grad_norm": 0.2474449872970581, "learning_rate": 8.629109999149268e-05, "loss": 0.7125, "step": 2280 }, { "epoch": 0.468907390276493, "grad_norm": 0.22551508247852325, "learning_rate": 8.628712819056878e-05, "loss": 0.7266, "step": 2281 }, { "epoch": 0.4691129612498715, "grad_norm": 0.23484089970588684, "learning_rate": 8.628315435562105e-05, "loss": 0.686, "step": 2282 }, { "epoch": 0.4693185322232501, "grad_norm": 0.2324771285057068, "learning_rate": 8.627917848684525e-05, "loss": 0.7387, "step": 2283 }, { "epoch": 0.46952410319662863, "grad_norm": 0.28548941016197205, "learning_rate": 8.627520058443727e-05, "loss": 0.6007, "step": 2284 }, { "epoch": 0.4697296741700072, "grad_norm": 0.1830257922410965, "learning_rate": 8.627122064859307e-05, "loss": 0.5817, "step": 2285 }, { "epoch": 0.46993524514338575, "grad_norm": 0.2828942835330963, "learning_rate": 8.626723867950875e-05, "loss": 0.6864, "step": 2286 }, { "epoch": 0.47014081611676434, "grad_norm": 0.20021386444568634, "learning_rate": 8.626325467738045e-05, "loss": 0.5965, "step": 2287 }, { "epoch": 0.47034638709014287, "grad_norm": 0.2412208914756775, "learning_rate": 8.625926864240445e-05, "loss": 0.7398, "step": 2288 }, { "epoch": 0.47055195806352146, "grad_norm": 0.2284758985042572, "learning_rate": 8.625528057477714e-05, "loss": 0.7037, "step": 2289 }, { "epoch": 0.4707575290369, "grad_norm": 0.22256653010845184, "learning_rate": 8.625129047469498e-05, "loss": 0.6852, "step": 2290 }, { "epoch": 0.4709631000102785, "grad_norm": 0.21506358683109283, "learning_rate": 8.624729834235455e-05, "loss": 0.6848, "step": 2291 }, { "epoch": 0.4711686709836571, "grad_norm": 0.2219688594341278, "learning_rate": 8.624330417795251e-05, "loss": 0.7025, "step": 2292 }, { "epoch": 0.47137424195703564, "grad_norm": 0.22017613053321838, "learning_rate": 8.623930798168564e-05, "loss": 0.6911, "step": 2293 }, { "epoch": 0.47157981293041423, "grad_norm": 0.2322702705860138, "learning_rate": 8.623530975375084e-05, "loss": 0.6266, "step": 2294 }, { "epoch": 0.47178538390379277, "grad_norm": 0.25697195529937744, "learning_rate": 8.623130949434505e-05, "loss": 0.7211, "step": 2295 }, { "epoch": 0.47199095487717135, "grad_norm": 0.16440944373607635, "learning_rate": 8.622730720366535e-05, "loss": 0.6019, "step": 2296 }, { "epoch": 0.4721965258505499, "grad_norm": 0.2459285408258438, "learning_rate": 8.622330288190893e-05, "loss": 0.6854, "step": 2297 }, { "epoch": 0.4724020968239285, "grad_norm": 0.25851428508758545, "learning_rate": 8.621929652927306e-05, "loss": 0.6919, "step": 2298 }, { "epoch": 0.472607667797307, "grad_norm": 0.17177143692970276, "learning_rate": 8.621528814595508e-05, "loss": 0.5922, "step": 2299 }, { "epoch": 0.4728132387706856, "grad_norm": 0.22151097655296326, "learning_rate": 8.621127773215252e-05, "loss": 0.6958, "step": 2300 }, { "epoch": 0.47301880974406413, "grad_norm": 0.2228916585445404, "learning_rate": 8.620726528806292e-05, "loss": 0.7062, "step": 2301 }, { "epoch": 0.4732243807174427, "grad_norm": 0.17388984560966492, "learning_rate": 8.620325081388396e-05, "loss": 0.5868, "step": 2302 }, { "epoch": 0.47342995169082125, "grad_norm": 0.22164839506149292, "learning_rate": 8.61992343098134e-05, "loss": 0.6753, "step": 2303 }, { "epoch": 0.47363552266419984, "grad_norm": 0.2175762802362442, "learning_rate": 8.619521577604915e-05, "loss": 0.7057, "step": 2304 }, { "epoch": 0.47384109363757837, "grad_norm": 0.21533454954624176, "learning_rate": 8.619119521278916e-05, "loss": 0.6798, "step": 2305 }, { "epoch": 0.47404666461095696, "grad_norm": 0.23147819936275482, "learning_rate": 8.618717262023151e-05, "loss": 0.7162, "step": 2306 }, { "epoch": 0.4742522355843355, "grad_norm": 0.21729323267936707, "learning_rate": 8.618314799857437e-05, "loss": 0.7169, "step": 2307 }, { "epoch": 0.474457806557714, "grad_norm": 0.19784866273403168, "learning_rate": 8.617912134801603e-05, "loss": 0.6863, "step": 2308 }, { "epoch": 0.4746633775310926, "grad_norm": 0.20950141549110413, "learning_rate": 8.617509266875484e-05, "loss": 0.6784, "step": 2309 }, { "epoch": 0.47486894850447114, "grad_norm": 0.2207701951265335, "learning_rate": 8.617106196098928e-05, "loss": 0.7182, "step": 2310 }, { "epoch": 0.47507451947784973, "grad_norm": 0.21060660481452942, "learning_rate": 8.616702922491794e-05, "loss": 0.7051, "step": 2311 }, { "epoch": 0.47528009045122827, "grad_norm": 0.21560098230838776, "learning_rate": 8.616299446073948e-05, "loss": 0.7186, "step": 2312 }, { "epoch": 0.47548566142460685, "grad_norm": 0.20710930228233337, "learning_rate": 8.615895766865268e-05, "loss": 0.6939, "step": 2313 }, { "epoch": 0.4756912323979854, "grad_norm": 0.20942838490009308, "learning_rate": 8.615491884885642e-05, "loss": 0.6854, "step": 2314 }, { "epoch": 0.475896803371364, "grad_norm": 0.21396920084953308, "learning_rate": 8.615087800154966e-05, "loss": 0.6919, "step": 2315 }, { "epoch": 0.4761023743447425, "grad_norm": 0.20860084891319275, "learning_rate": 8.614683512693147e-05, "loss": 0.715, "step": 2316 }, { "epoch": 0.4763079453181211, "grad_norm": 0.19696597754955292, "learning_rate": 8.614279022520105e-05, "loss": 0.7004, "step": 2317 }, { "epoch": 0.47651351629149963, "grad_norm": 0.214441180229187, "learning_rate": 8.613874329655765e-05, "loss": 0.695, "step": 2318 }, { "epoch": 0.4767190872648782, "grad_norm": 0.20082063972949982, "learning_rate": 8.613469434120065e-05, "loss": 0.69, "step": 2319 }, { "epoch": 0.47692465823825675, "grad_norm": 0.20159681141376495, "learning_rate": 8.613064335932952e-05, "loss": 0.6772, "step": 2320 }, { "epoch": 0.47713022921163534, "grad_norm": 0.20627199113368988, "learning_rate": 8.612659035114383e-05, "loss": 0.6884, "step": 2321 }, { "epoch": 0.47733580018501387, "grad_norm": 0.19715279340744019, "learning_rate": 8.612253531684328e-05, "loss": 0.5856, "step": 2322 }, { "epoch": 0.47754137115839246, "grad_norm": 0.21673934161663055, "learning_rate": 8.61184782566276e-05, "loss": 0.7141, "step": 2323 }, { "epoch": 0.477746942131771, "grad_norm": 0.21236567199230194, "learning_rate": 8.611441917069668e-05, "loss": 0.7081, "step": 2324 }, { "epoch": 0.4779525131051496, "grad_norm": 0.22194881737232208, "learning_rate": 8.61103580592505e-05, "loss": 0.725, "step": 2325 }, { "epoch": 0.4781580840785281, "grad_norm": 0.20836539566516876, "learning_rate": 8.610629492248915e-05, "loss": 0.6872, "step": 2326 }, { "epoch": 0.47836365505190664, "grad_norm": 0.20728257298469543, "learning_rate": 8.610222976061275e-05, "loss": 0.6898, "step": 2327 }, { "epoch": 0.47856922602528523, "grad_norm": 0.2103557288646698, "learning_rate": 8.609816257382162e-05, "loss": 0.6939, "step": 2328 }, { "epoch": 0.47877479699866377, "grad_norm": 0.18069760501384735, "learning_rate": 8.609409336231611e-05, "loss": 0.5892, "step": 2329 }, { "epoch": 0.47898036797204235, "grad_norm": 0.21599088609218597, "learning_rate": 8.609002212629668e-05, "loss": 0.7186, "step": 2330 }, { "epoch": 0.4791859389454209, "grad_norm": 0.22007983922958374, "learning_rate": 8.608594886596392e-05, "loss": 0.6984, "step": 2331 }, { "epoch": 0.4793915099187995, "grad_norm": 0.13403122127056122, "learning_rate": 8.608187358151852e-05, "loss": 0.5937, "step": 2332 }, { "epoch": 0.479597080892178, "grad_norm": 0.21932478249073029, "learning_rate": 8.607779627316119e-05, "loss": 0.6969, "step": 2333 }, { "epoch": 0.4798026518655566, "grad_norm": 0.22216017544269562, "learning_rate": 8.607371694109285e-05, "loss": 0.7011, "step": 2334 }, { "epoch": 0.48000822283893513, "grad_norm": 0.20484335720539093, "learning_rate": 8.606963558551445e-05, "loss": 0.6637, "step": 2335 }, { "epoch": 0.4802137938123137, "grad_norm": 0.22132568061351776, "learning_rate": 8.606555220662707e-05, "loss": 0.7098, "step": 2336 }, { "epoch": 0.48041936478569225, "grad_norm": 0.15403473377227783, "learning_rate": 8.606146680463187e-05, "loss": 0.5913, "step": 2337 }, { "epoch": 0.48062493575907084, "grad_norm": 0.21559444069862366, "learning_rate": 8.605737937973011e-05, "loss": 0.7038, "step": 2338 }, { "epoch": 0.48083050673244937, "grad_norm": 0.13026109337806702, "learning_rate": 8.605328993212317e-05, "loss": 0.5778, "step": 2339 }, { "epoch": 0.48103607770582796, "grad_norm": 0.2200099676847458, "learning_rate": 8.604919846201255e-05, "loss": 0.7091, "step": 2340 }, { "epoch": 0.4812416486792065, "grad_norm": 0.21221928298473358, "learning_rate": 8.604510496959975e-05, "loss": 0.7062, "step": 2341 }, { "epoch": 0.4814472196525851, "grad_norm": 0.20801213383674622, "learning_rate": 8.604100945508648e-05, "loss": 0.6884, "step": 2342 }, { "epoch": 0.4816527906259636, "grad_norm": 0.23321124911308289, "learning_rate": 8.603691191867451e-05, "loss": 0.6849, "step": 2343 }, { "epoch": 0.4818583615993422, "grad_norm": 0.1625455915927887, "learning_rate": 8.603281236056569e-05, "loss": 0.5854, "step": 2344 }, { "epoch": 0.48206393257272073, "grad_norm": 0.14913566410541534, "learning_rate": 8.602871078096198e-05, "loss": 0.5857, "step": 2345 }, { "epoch": 0.48226950354609927, "grad_norm": 0.23094283044338226, "learning_rate": 8.602460718006548e-05, "loss": 0.6814, "step": 2346 }, { "epoch": 0.48247507451947785, "grad_norm": 0.21578393876552582, "learning_rate": 8.602050155807832e-05, "loss": 0.6983, "step": 2347 }, { "epoch": 0.4826806454928564, "grad_norm": 0.21311207115650177, "learning_rate": 8.601639391520278e-05, "loss": 0.714, "step": 2348 }, { "epoch": 0.482886216466235, "grad_norm": 0.20807845890522003, "learning_rate": 8.601228425164123e-05, "loss": 0.6955, "step": 2349 }, { "epoch": 0.4830917874396135, "grad_norm": 0.2071390300989151, "learning_rate": 8.600817256759611e-05, "loss": 0.6911, "step": 2350 }, { "epoch": 0.4832973584129921, "grad_norm": 0.20365330576896667, "learning_rate": 8.600405886327001e-05, "loss": 0.5981, "step": 2351 }, { "epoch": 0.48350292938637063, "grad_norm": 0.21439498662948608, "learning_rate": 8.599994313886558e-05, "loss": 0.7061, "step": 2352 }, { "epoch": 0.4837085003597492, "grad_norm": 0.22116196155548096, "learning_rate": 8.599582539458558e-05, "loss": 0.719, "step": 2353 }, { "epoch": 0.48391407133312775, "grad_norm": 0.14612843096256256, "learning_rate": 8.599170563063289e-05, "loss": 0.5788, "step": 2354 }, { "epoch": 0.48411964230650634, "grad_norm": 0.20347650349140167, "learning_rate": 8.598758384721045e-05, "loss": 0.6891, "step": 2355 }, { "epoch": 0.48432521327988487, "grad_norm": 0.13734294474124908, "learning_rate": 8.598346004452132e-05, "loss": 0.5705, "step": 2356 }, { "epoch": 0.48453078425326346, "grad_norm": 0.21844719350337982, "learning_rate": 8.597933422276868e-05, "loss": 0.7261, "step": 2357 }, { "epoch": 0.484736355226642, "grad_norm": 0.20626910030841827, "learning_rate": 8.597520638215578e-05, "loss": 0.6712, "step": 2358 }, { "epoch": 0.4849419262000206, "grad_norm": 0.2096855491399765, "learning_rate": 8.597107652288598e-05, "loss": 0.6777, "step": 2359 }, { "epoch": 0.4851474971733991, "grad_norm": 0.20726048946380615, "learning_rate": 8.596694464516273e-05, "loss": 0.7194, "step": 2360 }, { "epoch": 0.4853530681467777, "grad_norm": 0.2092740535736084, "learning_rate": 8.59628107491896e-05, "loss": 0.6859, "step": 2361 }, { "epoch": 0.48555863912015623, "grad_norm": 0.20741955935955048, "learning_rate": 8.595867483517025e-05, "loss": 0.7095, "step": 2362 }, { "epoch": 0.4857642100935348, "grad_norm": 0.1959150731563568, "learning_rate": 8.595453690330843e-05, "loss": 0.7032, "step": 2363 }, { "epoch": 0.48596978106691335, "grad_norm": 0.20496924221515656, "learning_rate": 8.5950396953808e-05, "loss": 0.714, "step": 2364 }, { "epoch": 0.4861753520402919, "grad_norm": 0.1742028295993805, "learning_rate": 8.59462549868729e-05, "loss": 0.5882, "step": 2365 }, { "epoch": 0.4863809230136705, "grad_norm": 0.14946137368679047, "learning_rate": 8.59421110027072e-05, "loss": 0.5834, "step": 2366 }, { "epoch": 0.486586493987049, "grad_norm": 0.22946619987487793, "learning_rate": 8.593796500151507e-05, "loss": 0.6916, "step": 2367 }, { "epoch": 0.4867920649604276, "grad_norm": 0.2186809778213501, "learning_rate": 8.593381698350074e-05, "loss": 0.695, "step": 2368 }, { "epoch": 0.48699763593380613, "grad_norm": 0.21201607584953308, "learning_rate": 8.592966694886857e-05, "loss": 0.6895, "step": 2369 }, { "epoch": 0.4872032069071847, "grad_norm": 0.20772308111190796, "learning_rate": 8.592551489782302e-05, "loss": 0.6752, "step": 2370 }, { "epoch": 0.48740877788056325, "grad_norm": 0.2207845002412796, "learning_rate": 8.592136083056862e-05, "loss": 0.7037, "step": 2371 }, { "epoch": 0.48761434885394184, "grad_norm": 0.20530985295772552, "learning_rate": 8.591720474731006e-05, "loss": 0.6922, "step": 2372 }, { "epoch": 0.48781991982732037, "grad_norm": 0.2157611846923828, "learning_rate": 8.591304664825205e-05, "loss": 0.7053, "step": 2373 }, { "epoch": 0.48802549080069896, "grad_norm": 0.2080930769443512, "learning_rate": 8.590888653359947e-05, "loss": 0.6036, "step": 2374 }, { "epoch": 0.4882310617740775, "grad_norm": 0.22034066915512085, "learning_rate": 8.590472440355725e-05, "loss": 0.6732, "step": 2375 }, { "epoch": 0.4884366327474561, "grad_norm": 0.21666774153709412, "learning_rate": 8.590056025833045e-05, "loss": 0.6879, "step": 2376 }, { "epoch": 0.4886422037208346, "grad_norm": 0.21656173467636108, "learning_rate": 8.589639409812422e-05, "loss": 0.7001, "step": 2377 }, { "epoch": 0.4888477746942132, "grad_norm": 0.2207968384027481, "learning_rate": 8.589222592314381e-05, "loss": 0.6988, "step": 2378 }, { "epoch": 0.48905334566759173, "grad_norm": 0.21282252669334412, "learning_rate": 8.588805573359454e-05, "loss": 0.6686, "step": 2379 }, { "epoch": 0.4892589166409703, "grad_norm": 0.21024645864963531, "learning_rate": 8.588388352968188e-05, "loss": 0.6777, "step": 2380 }, { "epoch": 0.48946448761434885, "grad_norm": 0.21151992678642273, "learning_rate": 8.587970931161137e-05, "loss": 0.6922, "step": 2381 }, { "epoch": 0.4896700585877274, "grad_norm": 0.2125832885503769, "learning_rate": 8.587553307958865e-05, "loss": 0.6968, "step": 2382 }, { "epoch": 0.489875629561106, "grad_norm": 0.22030989825725555, "learning_rate": 8.587135483381948e-05, "loss": 0.6913, "step": 2383 }, { "epoch": 0.4900812005344845, "grad_norm": 0.2217807024717331, "learning_rate": 8.586717457450967e-05, "loss": 0.7198, "step": 2384 }, { "epoch": 0.4902867715078631, "grad_norm": 0.20852632820606232, "learning_rate": 8.586299230186519e-05, "loss": 0.6752, "step": 2385 }, { "epoch": 0.4904923424812416, "grad_norm": 0.20621474087238312, "learning_rate": 8.585880801609208e-05, "loss": 0.6783, "step": 2386 }, { "epoch": 0.4906979134546202, "grad_norm": 0.21134278178215027, "learning_rate": 8.585462171739647e-05, "loss": 0.5887, "step": 2387 }, { "epoch": 0.49090348442799875, "grad_norm": 0.2228272408246994, "learning_rate": 8.58504334059846e-05, "loss": 0.6875, "step": 2388 }, { "epoch": 0.49110905540137734, "grad_norm": 0.2240232229232788, "learning_rate": 8.584624308206281e-05, "loss": 0.6768, "step": 2389 }, { "epoch": 0.49131462637475587, "grad_norm": 0.21626600623130798, "learning_rate": 8.584205074583754e-05, "loss": 0.7107, "step": 2390 }, { "epoch": 0.49152019734813446, "grad_norm": 0.21161963045597076, "learning_rate": 8.583785639751532e-05, "loss": 0.6794, "step": 2391 }, { "epoch": 0.491725768321513, "grad_norm": 0.21978048980236053, "learning_rate": 8.583366003730278e-05, "loss": 0.6772, "step": 2392 }, { "epoch": 0.4919313392948916, "grad_norm": 0.20937666296958923, "learning_rate": 8.582946166540668e-05, "loss": 0.6825, "step": 2393 }, { "epoch": 0.4921369102682701, "grad_norm": 0.21978282928466797, "learning_rate": 8.582526128203385e-05, "loss": 0.7231, "step": 2394 }, { "epoch": 0.4923424812416487, "grad_norm": 0.21103829145431519, "learning_rate": 8.582105888739121e-05, "loss": 0.6941, "step": 2395 }, { "epoch": 0.49254805221502723, "grad_norm": 0.20812061429023743, "learning_rate": 8.581685448168579e-05, "loss": 0.6734, "step": 2396 }, { "epoch": 0.4927536231884058, "grad_norm": 0.2180771827697754, "learning_rate": 8.581264806512471e-05, "loss": 0.6817, "step": 2397 }, { "epoch": 0.49295919416178435, "grad_norm": 0.20335964858531952, "learning_rate": 8.580843963791524e-05, "loss": 0.7109, "step": 2398 }, { "epoch": 0.49316476513516294, "grad_norm": 0.22317105531692505, "learning_rate": 8.580422920026468e-05, "loss": 0.6899, "step": 2399 }, { "epoch": 0.4933703361085415, "grad_norm": 0.2043156623840332, "learning_rate": 8.580001675238047e-05, "loss": 0.7072, "step": 2400 }, { "epoch": 0.49357590708192, "grad_norm": 0.22758691012859344, "learning_rate": 8.579580229447013e-05, "loss": 0.5851, "step": 2401 }, { "epoch": 0.4937814780552986, "grad_norm": 0.21011817455291748, "learning_rate": 8.579158582674129e-05, "loss": 0.6755, "step": 2402 }, { "epoch": 0.4939870490286771, "grad_norm": 0.14406029880046844, "learning_rate": 8.578736734940168e-05, "loss": 0.5801, "step": 2403 }, { "epoch": 0.4941926200020557, "grad_norm": 0.21777774393558502, "learning_rate": 8.578314686265911e-05, "loss": 0.6707, "step": 2404 }, { "epoch": 0.49439819097543425, "grad_norm": 0.21820279955863953, "learning_rate": 8.577892436672152e-05, "loss": 0.6942, "step": 2405 }, { "epoch": 0.49460376194881284, "grad_norm": 0.2069522887468338, "learning_rate": 8.577469986179693e-05, "loss": 0.6923, "step": 2406 }, { "epoch": 0.49480933292219137, "grad_norm": 0.202153280377388, "learning_rate": 8.577047334809346e-05, "loss": 0.7045, "step": 2407 }, { "epoch": 0.49501490389556996, "grad_norm": 0.22939299046993256, "learning_rate": 8.576624482581932e-05, "loss": 0.6958, "step": 2408 }, { "epoch": 0.4952204748689485, "grad_norm": 0.19599145650863647, "learning_rate": 8.576201429518283e-05, "loss": 0.6101, "step": 2409 }, { "epoch": 0.4954260458423271, "grad_norm": 0.2155923992395401, "learning_rate": 8.575778175639245e-05, "loss": 0.7045, "step": 2410 }, { "epoch": 0.4956316168157056, "grad_norm": 0.13790921866893768, "learning_rate": 8.575354720965663e-05, "loss": 0.5729, "step": 2411 }, { "epoch": 0.4958371877890842, "grad_norm": 0.23278020322322845, "learning_rate": 8.574931065518403e-05, "loss": 0.7441, "step": 2412 }, { "epoch": 0.49604275876246273, "grad_norm": 0.15767961740493774, "learning_rate": 8.574507209318337e-05, "loss": 0.617, "step": 2413 }, { "epoch": 0.4962483297358413, "grad_norm": 0.21228386461734772, "learning_rate": 8.574083152386344e-05, "loss": 0.6849, "step": 2414 }, { "epoch": 0.49645390070921985, "grad_norm": 0.20901069045066833, "learning_rate": 8.573658894743316e-05, "loss": 0.6881, "step": 2415 }, { "epoch": 0.49665947168259844, "grad_norm": 0.20342102646827698, "learning_rate": 8.573234436410155e-05, "loss": 0.7173, "step": 2416 }, { "epoch": 0.496865042655977, "grad_norm": 0.22326229512691498, "learning_rate": 8.572809777407771e-05, "loss": 0.7265, "step": 2417 }, { "epoch": 0.49707061362935556, "grad_norm": 0.2064063847064972, "learning_rate": 8.572384917757086e-05, "loss": 0.6939, "step": 2418 }, { "epoch": 0.4972761846027341, "grad_norm": 0.2083250731229782, "learning_rate": 8.57195985747903e-05, "loss": 0.7009, "step": 2419 }, { "epoch": 0.4974817555761126, "grad_norm": 0.20397667586803436, "learning_rate": 8.571534596594544e-05, "loss": 0.6835, "step": 2420 }, { "epoch": 0.4976873265494912, "grad_norm": 0.2096882462501526, "learning_rate": 8.571109135124579e-05, "loss": 0.714, "step": 2421 }, { "epoch": 0.49789289752286975, "grad_norm": 0.2030659317970276, "learning_rate": 8.570683473090095e-05, "loss": 0.6971, "step": 2422 }, { "epoch": 0.49809846849624834, "grad_norm": 0.202758327126503, "learning_rate": 8.570257610512064e-05, "loss": 0.6856, "step": 2423 }, { "epoch": 0.49830403946962687, "grad_norm": 0.20229479670524597, "learning_rate": 8.569831547411464e-05, "loss": 0.7063, "step": 2424 }, { "epoch": 0.49850961044300546, "grad_norm": 0.2144801914691925, "learning_rate": 8.569405283809285e-05, "loss": 0.7056, "step": 2425 }, { "epoch": 0.498715181416384, "grad_norm": 0.19797521829605103, "learning_rate": 8.56897881972653e-05, "loss": 0.6035, "step": 2426 }, { "epoch": 0.4989207523897626, "grad_norm": 0.21914798021316528, "learning_rate": 8.568552155184204e-05, "loss": 0.6789, "step": 2427 }, { "epoch": 0.4991263233631411, "grad_norm": 0.2153196483850479, "learning_rate": 8.568125290203332e-05, "loss": 0.7026, "step": 2428 }, { "epoch": 0.4993318943365197, "grad_norm": 0.1549125760793686, "learning_rate": 8.567698224804941e-05, "loss": 0.5727, "step": 2429 }, { "epoch": 0.49953746530989823, "grad_norm": 0.2103041261434555, "learning_rate": 8.567270959010071e-05, "loss": 0.7001, "step": 2430 }, { "epoch": 0.4997430362832768, "grad_norm": 0.20346547663211823, "learning_rate": 8.566843492839769e-05, "loss": 0.6998, "step": 2431 }, { "epoch": 0.49994860725665535, "grad_norm": 0.16657423973083496, "learning_rate": 8.5664158263151e-05, "loss": 0.5893, "step": 2432 }, { "epoch": 0.5001541782300339, "grad_norm": 0.2198108732700348, "learning_rate": 8.565987959457128e-05, "loss": 0.692, "step": 2433 }, { "epoch": 0.5003597492034125, "grad_norm": 0.21006634831428528, "learning_rate": 8.565559892286934e-05, "loss": 0.7012, "step": 2434 }, { "epoch": 0.500565320176791, "grad_norm": 0.20093873143196106, "learning_rate": 8.565131624825605e-05, "loss": 0.6853, "step": 2435 }, { "epoch": 0.5007708911501696, "grad_norm": 0.21130932867527008, "learning_rate": 8.564703157094242e-05, "loss": 0.7092, "step": 2436 }, { "epoch": 0.5009764621235482, "grad_norm": 0.21420711278915405, "learning_rate": 8.564274489113954e-05, "loss": 0.7132, "step": 2437 }, { "epoch": 0.5011820330969267, "grad_norm": 0.2129506766796112, "learning_rate": 8.563845620905856e-05, "loss": 0.6958, "step": 2438 }, { "epoch": 0.5013876040703052, "grad_norm": 0.20229041576385498, "learning_rate": 8.563416552491081e-05, "loss": 0.6567, "step": 2439 }, { "epoch": 0.5015931750436838, "grad_norm": 0.21202024817466736, "learning_rate": 8.562987283890764e-05, "loss": 0.7095, "step": 2440 }, { "epoch": 0.5017987460170624, "grad_norm": 0.20876267552375793, "learning_rate": 8.562557815126053e-05, "loss": 0.6786, "step": 2441 }, { "epoch": 0.5020043169904409, "grad_norm": 0.20050349831581116, "learning_rate": 8.562128146218108e-05, "loss": 0.6929, "step": 2442 }, { "epoch": 0.5022098879638195, "grad_norm": 0.2047853022813797, "learning_rate": 8.561698277188095e-05, "loss": 0.6934, "step": 2443 }, { "epoch": 0.5024154589371981, "grad_norm": 0.18259146809577942, "learning_rate": 8.561268208057192e-05, "loss": 0.6199, "step": 2444 }, { "epoch": 0.5026210299105767, "grad_norm": 0.1506025195121765, "learning_rate": 8.560837938846587e-05, "loss": 0.6148, "step": 2445 }, { "epoch": 0.5028266008839551, "grad_norm": 0.22317710518836975, "learning_rate": 8.560407469577477e-05, "loss": 0.7029, "step": 2446 }, { "epoch": 0.5030321718573337, "grad_norm": 0.21875528991222382, "learning_rate": 8.55997680027107e-05, "loss": 0.7086, "step": 2447 }, { "epoch": 0.5032377428307123, "grad_norm": 0.2068042755126953, "learning_rate": 8.559545930948581e-05, "loss": 0.6979, "step": 2448 }, { "epoch": 0.5034433138040909, "grad_norm": 0.20604568719863892, "learning_rate": 8.559114861631239e-05, "loss": 0.6828, "step": 2449 }, { "epoch": 0.5036488847774694, "grad_norm": 0.20887784659862518, "learning_rate": 8.55868359234028e-05, "loss": 0.7186, "step": 2450 }, { "epoch": 0.503854455750848, "grad_norm": 0.23300114274024963, "learning_rate": 8.55825212309695e-05, "loss": 0.6772, "step": 2451 }, { "epoch": 0.5040600267242266, "grad_norm": 0.2133777141571045, "learning_rate": 8.557820453922507e-05, "loss": 0.5952, "step": 2452 }, { "epoch": 0.5042655976976052, "grad_norm": 0.23336206376552582, "learning_rate": 8.557388584838216e-05, "loss": 0.6794, "step": 2453 }, { "epoch": 0.5044711686709836, "grad_norm": 0.22460931539535522, "learning_rate": 8.556956515865353e-05, "loss": 0.6914, "step": 2454 }, { "epoch": 0.5046767396443622, "grad_norm": 0.21478697657585144, "learning_rate": 8.556524247025206e-05, "loss": 0.7215, "step": 2455 }, { "epoch": 0.5048823106177408, "grad_norm": 0.22004112601280212, "learning_rate": 8.556091778339068e-05, "loss": 0.6831, "step": 2456 }, { "epoch": 0.5050878815911193, "grad_norm": 0.21334481239318848, "learning_rate": 8.555659109828247e-05, "loss": 0.6868, "step": 2457 }, { "epoch": 0.5052934525644979, "grad_norm": 0.20527870953083038, "learning_rate": 8.555226241514059e-05, "loss": 0.7008, "step": 2458 }, { "epoch": 0.5054990235378765, "grad_norm": 0.2052440643310547, "learning_rate": 8.554793173417825e-05, "loss": 0.6851, "step": 2459 }, { "epoch": 0.505704594511255, "grad_norm": 0.20601294934749603, "learning_rate": 8.554359905560886e-05, "loss": 0.7074, "step": 2460 }, { "epoch": 0.5059101654846335, "grad_norm": 0.20732106268405914, "learning_rate": 8.553926437964584e-05, "loss": 0.7022, "step": 2461 }, { "epoch": 0.5061157364580121, "grad_norm": 0.20242151618003845, "learning_rate": 8.553492770650275e-05, "loss": 0.7151, "step": 2462 }, { "epoch": 0.5063213074313907, "grad_norm": 0.2136530876159668, "learning_rate": 8.553058903639322e-05, "loss": 0.6944, "step": 2463 }, { "epoch": 0.5065268784047693, "grad_norm": 0.20471519231796265, "learning_rate": 8.552624836953102e-05, "loss": 0.7044, "step": 2464 }, { "epoch": 0.5067324493781478, "grad_norm": 0.2073119431734085, "learning_rate": 8.552190570612998e-05, "loss": 0.7084, "step": 2465 }, { "epoch": 0.5069380203515264, "grad_norm": 0.20517416298389435, "learning_rate": 8.551756104640403e-05, "loss": 0.7044, "step": 2466 }, { "epoch": 0.5071435913249049, "grad_norm": 0.20278342068195343, "learning_rate": 8.551321439056722e-05, "loss": 0.724, "step": 2467 }, { "epoch": 0.5073491622982835, "grad_norm": 0.20847640931606293, "learning_rate": 8.550886573883371e-05, "loss": 0.6805, "step": 2468 }, { "epoch": 0.507554733271662, "grad_norm": 0.21068242192268372, "learning_rate": 8.550451509141772e-05, "loss": 0.6878, "step": 2469 }, { "epoch": 0.5077603042450406, "grad_norm": 0.19965562224388123, "learning_rate": 8.55001624485336e-05, "loss": 0.6728, "step": 2470 }, { "epoch": 0.5079658752184192, "grad_norm": 0.28934335708618164, "learning_rate": 8.549580781039576e-05, "loss": 0.6096, "step": 2471 }, { "epoch": 0.5081714461917978, "grad_norm": 0.21150463819503784, "learning_rate": 8.549145117721875e-05, "loss": 0.7202, "step": 2472 }, { "epoch": 0.5083770171651762, "grad_norm": 0.17131322622299194, "learning_rate": 8.548709254921721e-05, "loss": 0.5992, "step": 2473 }, { "epoch": 0.5085825881385548, "grad_norm": 0.1621021330356598, "learning_rate": 8.548273192660585e-05, "loss": 0.5971, "step": 2474 }, { "epoch": 0.5087881591119334, "grad_norm": 0.22314049303531647, "learning_rate": 8.547836930959949e-05, "loss": 0.7129, "step": 2475 }, { "epoch": 0.5089937300853119, "grad_norm": 0.21151074767112732, "learning_rate": 8.547400469841307e-05, "loss": 0.6885, "step": 2476 }, { "epoch": 0.5091993010586905, "grad_norm": 0.20470760762691498, "learning_rate": 8.546963809326162e-05, "loss": 0.7107, "step": 2477 }, { "epoch": 0.5094048720320691, "grad_norm": 0.20865213871002197, "learning_rate": 8.546526949436025e-05, "loss": 0.7328, "step": 2478 }, { "epoch": 0.5096104430054477, "grad_norm": 0.24143381416797638, "learning_rate": 8.546089890192422e-05, "loss": 0.5784, "step": 2479 }, { "epoch": 0.5098160139788261, "grad_norm": 0.21726645529270172, "learning_rate": 8.545652631616878e-05, "loss": 0.7009, "step": 2480 }, { "epoch": 0.5100215849522047, "grad_norm": 0.24358177185058594, "learning_rate": 8.545215173730938e-05, "loss": 0.7017, "step": 2481 }, { "epoch": 0.5102271559255833, "grad_norm": 0.21474173665046692, "learning_rate": 8.544777516556155e-05, "loss": 0.6889, "step": 2482 }, { "epoch": 0.5104327268989619, "grad_norm": 0.2038557231426239, "learning_rate": 8.54433966011409e-05, "loss": 0.7172, "step": 2483 }, { "epoch": 0.5106382978723404, "grad_norm": 0.22823157906532288, "learning_rate": 8.54390160442631e-05, "loss": 0.7173, "step": 2484 }, { "epoch": 0.510843868845719, "grad_norm": 0.20391489565372467, "learning_rate": 8.5434633495144e-05, "loss": 0.7198, "step": 2485 }, { "epoch": 0.5110494398190976, "grad_norm": 0.1981978565454483, "learning_rate": 8.543024895399953e-05, "loss": 0.6856, "step": 2486 }, { "epoch": 0.5112550107924761, "grad_norm": 0.2035714089870453, "learning_rate": 8.542586242104563e-05, "loss": 0.6885, "step": 2487 }, { "epoch": 0.5114605817658546, "grad_norm": 0.20313310623168945, "learning_rate": 8.542147389649847e-05, "loss": 0.7015, "step": 2488 }, { "epoch": 0.5116661527392332, "grad_norm": 0.20469297468662262, "learning_rate": 8.541708338057419e-05, "loss": 0.7098, "step": 2489 }, { "epoch": 0.5118717237126118, "grad_norm": 0.2113511860370636, "learning_rate": 8.541269087348913e-05, "loss": 0.7239, "step": 2490 }, { "epoch": 0.5120772946859904, "grad_norm": 0.20842553675174713, "learning_rate": 8.540829637545969e-05, "loss": 0.7047, "step": 2491 }, { "epoch": 0.5122828656593689, "grad_norm": 0.2060026377439499, "learning_rate": 8.540389988670234e-05, "loss": 0.6655, "step": 2492 }, { "epoch": 0.5124884366327475, "grad_norm": 0.21950404345989227, "learning_rate": 8.53995014074337e-05, "loss": 0.6143, "step": 2493 }, { "epoch": 0.512694007606126, "grad_norm": 0.21250604093074799, "learning_rate": 8.539510093787044e-05, "loss": 0.6995, "step": 2494 }, { "epoch": 0.5128995785795045, "grad_norm": 0.21519462764263153, "learning_rate": 8.539069847822938e-05, "loss": 0.6877, "step": 2495 }, { "epoch": 0.5131051495528831, "grad_norm": 0.21637707948684692, "learning_rate": 8.538629402872738e-05, "loss": 0.7088, "step": 2496 }, { "epoch": 0.5133107205262617, "grad_norm": 0.2197788506746292, "learning_rate": 8.538188758958144e-05, "loss": 0.6753, "step": 2497 }, { "epoch": 0.5135162914996403, "grad_norm": 0.22371014952659607, "learning_rate": 8.537747916100865e-05, "loss": 0.7074, "step": 2498 }, { "epoch": 0.5137218624730188, "grad_norm": 0.16387100517749786, "learning_rate": 8.537306874322618e-05, "loss": 0.5846, "step": 2499 }, { "epoch": 0.5139274334463974, "grad_norm": 0.24268200993537903, "learning_rate": 8.536865633645132e-05, "loss": 0.6932, "step": 2500 }, { "epoch": 0.5141330044197759, "grad_norm": 0.23605839908123016, "learning_rate": 8.536424194090144e-05, "loss": 0.6874, "step": 2501 }, { "epoch": 0.5143385753931545, "grad_norm": 0.20614401996135712, "learning_rate": 8.535982555679402e-05, "loss": 0.6704, "step": 2502 }, { "epoch": 0.514544146366533, "grad_norm": 0.20825539529323578, "learning_rate": 8.535540718434665e-05, "loss": 0.7012, "step": 2503 }, { "epoch": 0.5147497173399116, "grad_norm": 0.2111969292163849, "learning_rate": 8.535098682377698e-05, "loss": 0.6834, "step": 2504 }, { "epoch": 0.5149552883132902, "grad_norm": 0.21059072017669678, "learning_rate": 8.534656447530278e-05, "loss": 0.7163, "step": 2505 }, { "epoch": 0.5151608592866688, "grad_norm": 0.20956206321716309, "learning_rate": 8.534214013914193e-05, "loss": 0.6897, "step": 2506 }, { "epoch": 0.5153664302600472, "grad_norm": 0.16276654601097107, "learning_rate": 8.53377138155124e-05, "loss": 0.5806, "step": 2507 }, { "epoch": 0.5155720012334258, "grad_norm": 0.14373748004436493, "learning_rate": 8.533328550463226e-05, "loss": 0.5802, "step": 2508 }, { "epoch": 0.5157775722068044, "grad_norm": 0.14410528540611267, "learning_rate": 8.532885520671963e-05, "loss": 0.5905, "step": 2509 }, { "epoch": 0.515983143180183, "grad_norm": 0.25100046396255493, "learning_rate": 8.532442292199283e-05, "loss": 0.7222, "step": 2510 }, { "epoch": 0.5161887141535615, "grad_norm": 0.1554838865995407, "learning_rate": 8.531998865067017e-05, "loss": 0.5799, "step": 2511 }, { "epoch": 0.5163942851269401, "grad_norm": 0.21566714346408844, "learning_rate": 8.531555239297013e-05, "loss": 0.7103, "step": 2512 }, { "epoch": 0.5165998561003187, "grad_norm": 0.1622397005558014, "learning_rate": 8.531111414911126e-05, "loss": 0.5907, "step": 2513 }, { "epoch": 0.5168054270736971, "grad_norm": 0.2527947723865509, "learning_rate": 8.530667391931221e-05, "loss": 0.6972, "step": 2514 }, { "epoch": 0.5170109980470757, "grad_norm": 0.14436852931976318, "learning_rate": 8.530223170379174e-05, "loss": 0.5834, "step": 2515 }, { "epoch": 0.5172165690204543, "grad_norm": 0.22850194573402405, "learning_rate": 8.529778750276866e-05, "loss": 0.7095, "step": 2516 }, { "epoch": 0.5174221399938329, "grad_norm": 0.21069450676441193, "learning_rate": 8.529334131646196e-05, "loss": 0.6754, "step": 2517 }, { "epoch": 0.5176277109672114, "grad_norm": 0.16173620522022247, "learning_rate": 8.528889314509066e-05, "loss": 0.6033, "step": 2518 }, { "epoch": 0.51783328194059, "grad_norm": 0.23078560829162598, "learning_rate": 8.528444298887391e-05, "loss": 0.6971, "step": 2519 }, { "epoch": 0.5180388529139686, "grad_norm": 0.21634352207183838, "learning_rate": 8.527999084803092e-05, "loss": 0.6821, "step": 2520 }, { "epoch": 0.5182444238873471, "grad_norm": 0.20838621258735657, "learning_rate": 8.527553672278107e-05, "loss": 0.7123, "step": 2521 }, { "epoch": 0.5184499948607256, "grad_norm": 0.20532085001468658, "learning_rate": 8.527108061334378e-05, "loss": 0.7199, "step": 2522 }, { "epoch": 0.5186555658341042, "grad_norm": 0.20181244611740112, "learning_rate": 8.526662251993856e-05, "loss": 0.6995, "step": 2523 }, { "epoch": 0.5188611368074828, "grad_norm": 0.1562027484178543, "learning_rate": 8.526216244278505e-05, "loss": 0.5845, "step": 2524 }, { "epoch": 0.5190667077808614, "grad_norm": 0.22398139536380768, "learning_rate": 8.5257700382103e-05, "loss": 0.7083, "step": 2525 }, { "epoch": 0.5192722787542399, "grad_norm": 0.206566721200943, "learning_rate": 8.52532363381122e-05, "loss": 0.7012, "step": 2526 }, { "epoch": 0.5194778497276185, "grad_norm": 0.20333848893642426, "learning_rate": 8.524877031103259e-05, "loss": 0.7052, "step": 2527 }, { "epoch": 0.519683420700997, "grad_norm": 0.1408892273902893, "learning_rate": 8.524430230108419e-05, "loss": 0.5717, "step": 2528 }, { "epoch": 0.5198889916743756, "grad_norm": 0.21199721097946167, "learning_rate": 8.523983230848712e-05, "loss": 0.6796, "step": 2529 }, { "epoch": 0.5200945626477541, "grad_norm": 0.21294069290161133, "learning_rate": 8.523536033346159e-05, "loss": 0.6961, "step": 2530 }, { "epoch": 0.5203001336211327, "grad_norm": 0.2040695995092392, "learning_rate": 8.523088637622793e-05, "loss": 0.7192, "step": 2531 }, { "epoch": 0.5205057045945113, "grad_norm": 0.13950461149215698, "learning_rate": 8.522641043700653e-05, "loss": 0.5966, "step": 2532 }, { "epoch": 0.5207112755678898, "grad_norm": 0.22141605615615845, "learning_rate": 8.52219325160179e-05, "loss": 0.7104, "step": 2533 }, { "epoch": 0.5209168465412684, "grad_norm": 0.13655850291252136, "learning_rate": 8.521745261348264e-05, "loss": 0.5766, "step": 2534 }, { "epoch": 0.5211224175146469, "grad_norm": 0.21564966440200806, "learning_rate": 8.521297072962148e-05, "loss": 0.7378, "step": 2535 }, { "epoch": 0.5213279884880255, "grad_norm": 0.13964693248271942, "learning_rate": 8.520848686465521e-05, "loss": 0.5763, "step": 2536 }, { "epoch": 0.521533559461404, "grad_norm": 0.20813791453838348, "learning_rate": 8.520400101880472e-05, "loss": 0.6768, "step": 2537 }, { "epoch": 0.5217391304347826, "grad_norm": 0.20774829387664795, "learning_rate": 8.519951319229101e-05, "loss": 0.7078, "step": 2538 }, { "epoch": 0.5219447014081612, "grad_norm": 0.14507782459259033, "learning_rate": 8.519502338533519e-05, "loss": 0.6009, "step": 2539 }, { "epoch": 0.5221502723815398, "grad_norm": 0.21281610429286957, "learning_rate": 8.519053159815843e-05, "loss": 0.6951, "step": 2540 }, { "epoch": 0.5223558433549182, "grad_norm": 0.21360744535923004, "learning_rate": 8.518603783098203e-05, "loss": 0.7098, "step": 2541 }, { "epoch": 0.5225614143282968, "grad_norm": 0.20327754318714142, "learning_rate": 8.518154208402736e-05, "loss": 0.7009, "step": 2542 }, { "epoch": 0.5227669853016754, "grad_norm": 0.200285404920578, "learning_rate": 8.517704435751594e-05, "loss": 0.6858, "step": 2543 }, { "epoch": 0.522972556275054, "grad_norm": 0.13732387125492096, "learning_rate": 8.517254465166932e-05, "loss": 0.5735, "step": 2544 }, { "epoch": 0.5231781272484325, "grad_norm": 0.21144580841064453, "learning_rate": 8.516804296670919e-05, "loss": 0.7217, "step": 2545 }, { "epoch": 0.5233836982218111, "grad_norm": 0.20281550288200378, "learning_rate": 8.516353930285735e-05, "loss": 0.7018, "step": 2546 }, { "epoch": 0.5235892691951897, "grad_norm": 0.1997842639684677, "learning_rate": 8.515903366033563e-05, "loss": 0.6991, "step": 2547 }, { "epoch": 0.5237948401685681, "grad_norm": 0.13998793065547943, "learning_rate": 8.515452603936603e-05, "loss": 0.5788, "step": 2548 }, { "epoch": 0.5240004111419467, "grad_norm": 0.2052655965089798, "learning_rate": 8.51500164401706e-05, "loss": 0.7221, "step": 2549 }, { "epoch": 0.5242059821153253, "grad_norm": 0.21158649027347565, "learning_rate": 8.514550486297155e-05, "loss": 0.7077, "step": 2550 }, { "epoch": 0.5244115530887039, "grad_norm": 0.2046501189470291, "learning_rate": 8.51409913079911e-05, "loss": 0.6898, "step": 2551 }, { "epoch": 0.5246171240620824, "grad_norm": 0.13471710681915283, "learning_rate": 8.513647577545163e-05, "loss": 0.5809, "step": 2552 }, { "epoch": 0.524822695035461, "grad_norm": 0.21416522562503815, "learning_rate": 8.51319582655756e-05, "loss": 0.6954, "step": 2553 }, { "epoch": 0.5250282660088396, "grad_norm": 0.21434451639652252, "learning_rate": 8.512743877858554e-05, "loss": 0.6864, "step": 2554 }, { "epoch": 0.5252338369822181, "grad_norm": 0.2164076715707779, "learning_rate": 8.512291731470415e-05, "loss": 0.7236, "step": 2555 }, { "epoch": 0.5254394079555966, "grad_norm": 0.2215905487537384, "learning_rate": 8.511839387415415e-05, "loss": 0.6808, "step": 2556 }, { "epoch": 0.5256449789289752, "grad_norm": 0.212999165058136, "learning_rate": 8.51138684571584e-05, "loss": 0.6986, "step": 2557 }, { "epoch": 0.5258505499023538, "grad_norm": 0.20863129198551178, "learning_rate": 8.510934106393983e-05, "loss": 0.708, "step": 2558 }, { "epoch": 0.5260561208757324, "grad_norm": 0.14516817033290863, "learning_rate": 8.51048116947215e-05, "loss": 0.574, "step": 2559 }, { "epoch": 0.5262616918491109, "grad_norm": 0.2149210274219513, "learning_rate": 8.510028034972656e-05, "loss": 0.6872, "step": 2560 }, { "epoch": 0.5264672628224895, "grad_norm": 0.21908272802829742, "learning_rate": 8.509574702917823e-05, "loss": 0.6847, "step": 2561 }, { "epoch": 0.526672833795868, "grad_norm": 0.1989137828350067, "learning_rate": 8.509121173329985e-05, "loss": 0.6807, "step": 2562 }, { "epoch": 0.5268784047692466, "grad_norm": 0.14854271709918976, "learning_rate": 8.508667446231486e-05, "loss": 0.5931, "step": 2563 }, { "epoch": 0.5270839757426251, "grad_norm": 0.21540796756744385, "learning_rate": 8.508213521644677e-05, "loss": 0.6948, "step": 2564 }, { "epoch": 0.5272895467160037, "grad_norm": 0.21465127170085907, "learning_rate": 8.507759399591922e-05, "loss": 0.7256, "step": 2565 }, { "epoch": 0.5274951176893823, "grad_norm": 0.2020212709903717, "learning_rate": 8.507305080095595e-05, "loss": 0.6946, "step": 2566 }, { "epoch": 0.5277006886627608, "grad_norm": 0.21125240623950958, "learning_rate": 8.506850563178077e-05, "loss": 0.6756, "step": 2567 }, { "epoch": 0.5279062596361394, "grad_norm": 0.17571476101875305, "learning_rate": 8.506395848861759e-05, "loss": 0.5914, "step": 2568 }, { "epoch": 0.5281118306095179, "grad_norm": 0.22128242254257202, "learning_rate": 8.505940937169044e-05, "loss": 0.6772, "step": 2569 }, { "epoch": 0.5283174015828965, "grad_norm": 0.13210316002368927, "learning_rate": 8.505485828122341e-05, "loss": 0.5798, "step": 2570 }, { "epoch": 0.528522972556275, "grad_norm": 0.22432683408260345, "learning_rate": 8.505030521744074e-05, "loss": 0.693, "step": 2571 }, { "epoch": 0.5287285435296536, "grad_norm": 0.15919888019561768, "learning_rate": 8.504575018056672e-05, "loss": 0.5888, "step": 2572 }, { "epoch": 0.5289341145030322, "grad_norm": 0.21992851793766022, "learning_rate": 8.504119317082577e-05, "loss": 0.6978, "step": 2573 }, { "epoch": 0.5291396854764108, "grad_norm": 0.2072344422340393, "learning_rate": 8.503663418844238e-05, "loss": 0.7253, "step": 2574 }, { "epoch": 0.5293452564497892, "grad_norm": 0.14406660199165344, "learning_rate": 8.503207323364117e-05, "loss": 0.5729, "step": 2575 }, { "epoch": 0.5295508274231678, "grad_norm": 0.21171186864376068, "learning_rate": 8.50275103066468e-05, "loss": 0.7078, "step": 2576 }, { "epoch": 0.5297563983965464, "grad_norm": 0.22379416227340698, "learning_rate": 8.502294540768409e-05, "loss": 0.6871, "step": 2577 }, { "epoch": 0.529961969369925, "grad_norm": 0.2064572423696518, "learning_rate": 8.501837853697792e-05, "loss": 0.7041, "step": 2578 }, { "epoch": 0.5301675403433035, "grad_norm": 0.20695674419403076, "learning_rate": 8.501380969475331e-05, "loss": 0.7138, "step": 2579 }, { "epoch": 0.5303731113166821, "grad_norm": 0.21721471846103668, "learning_rate": 8.50092388812353e-05, "loss": 0.7119, "step": 2580 }, { "epoch": 0.5305786822900607, "grad_norm": 0.20023848116397858, "learning_rate": 8.50046660966491e-05, "loss": 0.6828, "step": 2581 }, { "epoch": 0.5307842532634393, "grad_norm": 0.22572509944438934, "learning_rate": 8.500009134121998e-05, "loss": 0.7025, "step": 2582 }, { "epoch": 0.5309898242368177, "grad_norm": 0.20377467572689056, "learning_rate": 8.499551461517332e-05, "loss": 0.6907, "step": 2583 }, { "epoch": 0.5311953952101963, "grad_norm": 0.2061266154050827, "learning_rate": 8.499093591873459e-05, "loss": 0.7025, "step": 2584 }, { "epoch": 0.5314009661835749, "grad_norm": 0.20886844396591187, "learning_rate": 8.498635525212937e-05, "loss": 0.689, "step": 2585 }, { "epoch": 0.5316065371569534, "grad_norm": 0.21331052482128143, "learning_rate": 8.498177261558332e-05, "loss": 0.7088, "step": 2586 }, { "epoch": 0.531812108130332, "grad_norm": 0.2123933583498001, "learning_rate": 8.49771880093222e-05, "loss": 0.6907, "step": 2587 }, { "epoch": 0.5320176791037106, "grad_norm": 0.20878660678863525, "learning_rate": 8.49726014335719e-05, "loss": 0.724, "step": 2588 }, { "epoch": 0.5322232500770891, "grad_norm": 0.1978175789117813, "learning_rate": 8.496801288855835e-05, "loss": 0.6824, "step": 2589 }, { "epoch": 0.5324288210504676, "grad_norm": 0.21396887302398682, "learning_rate": 8.496342237450761e-05, "loss": 0.712, "step": 2590 }, { "epoch": 0.5326343920238462, "grad_norm": 0.21784614026546478, "learning_rate": 8.495882989164584e-05, "loss": 0.6793, "step": 2591 }, { "epoch": 0.5328399629972248, "grad_norm": 0.20604658126831055, "learning_rate": 8.495423544019928e-05, "loss": 0.7158, "step": 2592 }, { "epoch": 0.5330455339706034, "grad_norm": 0.21813294291496277, "learning_rate": 8.49496390203943e-05, "loss": 0.6887, "step": 2593 }, { "epoch": 0.5332511049439819, "grad_norm": 0.1722048819065094, "learning_rate": 8.494504063245733e-05, "loss": 0.6013, "step": 2594 }, { "epoch": 0.5334566759173605, "grad_norm": 0.2043728232383728, "learning_rate": 8.49404402766149e-05, "loss": 0.684, "step": 2595 }, { "epoch": 0.533662246890739, "grad_norm": 0.20982548594474792, "learning_rate": 8.493583795309364e-05, "loss": 0.6776, "step": 2596 }, { "epoch": 0.5338678178641176, "grad_norm": 0.20805718004703522, "learning_rate": 8.493123366212034e-05, "loss": 0.7061, "step": 2597 }, { "epoch": 0.5340733888374961, "grad_norm": 0.1766945868730545, "learning_rate": 8.492662740392178e-05, "loss": 0.595, "step": 2598 }, { "epoch": 0.5342789598108747, "grad_norm": 0.22322477400302887, "learning_rate": 8.49220191787249e-05, "loss": 0.665, "step": 2599 }, { "epoch": 0.5344845307842533, "grad_norm": 0.22785376012325287, "learning_rate": 8.491740898675675e-05, "loss": 0.7141, "step": 2600 }, { "epoch": 0.5346901017576319, "grad_norm": 0.2232331484556198, "learning_rate": 8.491279682824441e-05, "loss": 0.7175, "step": 2601 }, { "epoch": 0.5348956727310104, "grad_norm": 0.2167566865682602, "learning_rate": 8.490818270341514e-05, "loss": 0.6922, "step": 2602 }, { "epoch": 0.5351012437043889, "grad_norm": 0.20170411467552185, "learning_rate": 8.490356661249623e-05, "loss": 0.6809, "step": 2603 }, { "epoch": 0.5353068146777675, "grad_norm": 0.21896955370903015, "learning_rate": 8.48989485557151e-05, "loss": 0.6952, "step": 2604 }, { "epoch": 0.535512385651146, "grad_norm": 0.17013712227344513, "learning_rate": 8.489432853329927e-05, "loss": 0.5891, "step": 2605 }, { "epoch": 0.5357179566245246, "grad_norm": 0.23494184017181396, "learning_rate": 8.488970654547632e-05, "loss": 0.6739, "step": 2606 }, { "epoch": 0.5359235275979032, "grad_norm": 0.21912021934986115, "learning_rate": 8.4885082592474e-05, "loss": 0.7035, "step": 2607 }, { "epoch": 0.5361290985712818, "grad_norm": 0.14512377977371216, "learning_rate": 8.488045667452006e-05, "loss": 0.569, "step": 2608 }, { "epoch": 0.5363346695446602, "grad_norm": 0.14050711691379547, "learning_rate": 8.487582879184242e-05, "loss": 0.5772, "step": 2609 }, { "epoch": 0.5365402405180388, "grad_norm": 0.25031203031539917, "learning_rate": 8.48711989446691e-05, "loss": 0.6868, "step": 2610 }, { "epoch": 0.5367458114914174, "grad_norm": 0.2108568251132965, "learning_rate": 8.486656713322814e-05, "loss": 0.6894, "step": 2611 }, { "epoch": 0.536951382464796, "grad_norm": 0.22467973828315735, "learning_rate": 8.486193335774777e-05, "loss": 0.692, "step": 2612 }, { "epoch": 0.5371569534381745, "grad_norm": 0.2571062743663788, "learning_rate": 8.485729761845625e-05, "loss": 0.705, "step": 2613 }, { "epoch": 0.5373625244115531, "grad_norm": 0.21951597929000854, "learning_rate": 8.485265991558196e-05, "loss": 0.6824, "step": 2614 }, { "epoch": 0.5375680953849317, "grad_norm": 0.22675755620002747, "learning_rate": 8.48480202493534e-05, "loss": 0.7114, "step": 2615 }, { "epoch": 0.5377736663583103, "grad_norm": 0.2269049733877182, "learning_rate": 8.484337861999912e-05, "loss": 0.6641, "step": 2616 }, { "epoch": 0.5379792373316887, "grad_norm": 0.21990883350372314, "learning_rate": 8.48387350277478e-05, "loss": 0.7275, "step": 2617 }, { "epoch": 0.5381848083050673, "grad_norm": 0.21468190848827362, "learning_rate": 8.483408947282823e-05, "loss": 0.7202, "step": 2618 }, { "epoch": 0.5383903792784459, "grad_norm": 0.21018457412719727, "learning_rate": 8.482944195546925e-05, "loss": 0.6831, "step": 2619 }, { "epoch": 0.5385959502518245, "grad_norm": 0.2128850817680359, "learning_rate": 8.482479247589982e-05, "loss": 0.6809, "step": 2620 }, { "epoch": 0.538801521225203, "grad_norm": 0.23084747791290283, "learning_rate": 8.4820141034349e-05, "loss": 0.6099, "step": 2621 }, { "epoch": 0.5390070921985816, "grad_norm": 0.22527490556240082, "learning_rate": 8.481548763104597e-05, "loss": 0.7123, "step": 2622 }, { "epoch": 0.5392126631719601, "grad_norm": 0.22562628984451294, "learning_rate": 8.481083226621994e-05, "loss": 0.707, "step": 2623 }, { "epoch": 0.5394182341453386, "grad_norm": 0.21400360763072968, "learning_rate": 8.48061749401003e-05, "loss": 0.7019, "step": 2624 }, { "epoch": 0.5396238051187172, "grad_norm": 0.20809048414230347, "learning_rate": 8.480151565291646e-05, "loss": 0.7188, "step": 2625 }, { "epoch": 0.5398293760920958, "grad_norm": 0.21414582431316376, "learning_rate": 8.479685440489798e-05, "loss": 0.6698, "step": 2626 }, { "epoch": 0.5400349470654744, "grad_norm": 0.19604355096817017, "learning_rate": 8.47921911962745e-05, "loss": 0.6728, "step": 2627 }, { "epoch": 0.5402405180388529, "grad_norm": 0.2081209272146225, "learning_rate": 8.478752602727573e-05, "loss": 0.6839, "step": 2628 }, { "epoch": 0.5404460890122315, "grad_norm": 0.21594710648059845, "learning_rate": 8.478285889813153e-05, "loss": 0.6845, "step": 2629 }, { "epoch": 0.54065165998561, "grad_norm": 0.21320217847824097, "learning_rate": 8.477818980907183e-05, "loss": 0.7046, "step": 2630 }, { "epoch": 0.5408572309589886, "grad_norm": 0.20672303438186646, "learning_rate": 8.477351876032662e-05, "loss": 0.7343, "step": 2631 }, { "epoch": 0.5410628019323671, "grad_norm": 0.1888507753610611, "learning_rate": 8.476884575212606e-05, "loss": 0.6666, "step": 2632 }, { "epoch": 0.5412683729057457, "grad_norm": 0.19607265293598175, "learning_rate": 8.476417078470032e-05, "loss": 0.6881, "step": 2633 }, { "epoch": 0.5414739438791243, "grad_norm": 0.20374587178230286, "learning_rate": 8.475949385827977e-05, "loss": 0.6748, "step": 2634 }, { "epoch": 0.5416795148525029, "grad_norm": 0.2075163573026657, "learning_rate": 8.475481497309478e-05, "loss": 0.7178, "step": 2635 }, { "epoch": 0.5418850858258814, "grad_norm": 0.20457369089126587, "learning_rate": 8.475013412937587e-05, "loss": 0.6713, "step": 2636 }, { "epoch": 0.5420906567992599, "grad_norm": 0.22288042306900024, "learning_rate": 8.474545132735365e-05, "loss": 0.593, "step": 2637 }, { "epoch": 0.5422962277726385, "grad_norm": 0.2154739946126938, "learning_rate": 8.474076656725881e-05, "loss": 0.6944, "step": 2638 }, { "epoch": 0.5425017987460171, "grad_norm": 0.21423187851905823, "learning_rate": 8.473607984932215e-05, "loss": 0.6635, "step": 2639 }, { "epoch": 0.5427073697193956, "grad_norm": 0.24016740918159485, "learning_rate": 8.473139117377456e-05, "loss": 0.7088, "step": 2640 }, { "epoch": 0.5429129406927742, "grad_norm": 0.2100851833820343, "learning_rate": 8.472670054084704e-05, "loss": 0.6737, "step": 2641 }, { "epoch": 0.5431185116661528, "grad_norm": 0.20590589940547943, "learning_rate": 8.472200795077065e-05, "loss": 0.7015, "step": 2642 }, { "epoch": 0.5433240826395312, "grad_norm": 0.20215122401714325, "learning_rate": 8.47173134037766e-05, "loss": 0.6834, "step": 2643 }, { "epoch": 0.5435296536129098, "grad_norm": 0.17897242307662964, "learning_rate": 8.471261690009615e-05, "loss": 0.5736, "step": 2644 }, { "epoch": 0.5437352245862884, "grad_norm": 0.1412929892539978, "learning_rate": 8.470791843996068e-05, "loss": 0.5684, "step": 2645 }, { "epoch": 0.543940795559667, "grad_norm": 0.23520296812057495, "learning_rate": 8.470321802360167e-05, "loss": 0.6979, "step": 2646 }, { "epoch": 0.5441463665330455, "grad_norm": 0.22806185483932495, "learning_rate": 8.469851565125068e-05, "loss": 0.6768, "step": 2647 }, { "epoch": 0.5443519375064241, "grad_norm": 0.20918670296669006, "learning_rate": 8.469381132313938e-05, "loss": 0.669, "step": 2648 }, { "epoch": 0.5445575084798027, "grad_norm": 0.21143250167369843, "learning_rate": 8.468910503949951e-05, "loss": 0.7044, "step": 2649 }, { "epoch": 0.5447630794531813, "grad_norm": 0.21474787592887878, "learning_rate": 8.468439680056295e-05, "loss": 0.7171, "step": 2650 }, { "epoch": 0.5449686504265597, "grad_norm": 0.20778292417526245, "learning_rate": 8.467968660656164e-05, "loss": 0.6719, "step": 2651 }, { "epoch": 0.5451742213999383, "grad_norm": 0.20223721861839294, "learning_rate": 8.467497445772764e-05, "loss": 0.5761, "step": 2652 }, { "epoch": 0.5453797923733169, "grad_norm": 0.16389262676239014, "learning_rate": 8.467026035429308e-05, "loss": 0.6203, "step": 2653 }, { "epoch": 0.5455853633466955, "grad_norm": 0.23996488749980927, "learning_rate": 8.466554429649022e-05, "loss": 0.7091, "step": 2654 }, { "epoch": 0.545790934320074, "grad_norm": 0.22990204393863678, "learning_rate": 8.466082628455138e-05, "loss": 0.6889, "step": 2655 }, { "epoch": 0.5459965052934526, "grad_norm": 0.20042270421981812, "learning_rate": 8.4656106318709e-05, "loss": 0.6864, "step": 2656 }, { "epoch": 0.5462020762668311, "grad_norm": 0.2556054890155792, "learning_rate": 8.465138439919563e-05, "loss": 0.6858, "step": 2657 }, { "epoch": 0.5464076472402097, "grad_norm": 0.20988969504833221, "learning_rate": 8.464666052624386e-05, "loss": 0.6907, "step": 2658 }, { "epoch": 0.5466132182135882, "grad_norm": 0.21028688549995422, "learning_rate": 8.464193470008646e-05, "loss": 0.7199, "step": 2659 }, { "epoch": 0.5468187891869668, "grad_norm": 0.20908872783184052, "learning_rate": 8.463720692095621e-05, "loss": 0.6965, "step": 2660 }, { "epoch": 0.5470243601603454, "grad_norm": 0.20974692702293396, "learning_rate": 8.463247718908604e-05, "loss": 0.6913, "step": 2661 }, { "epoch": 0.5472299311337239, "grad_norm": 0.3178030550479889, "learning_rate": 8.462774550470894e-05, "loss": 0.5966, "step": 2662 }, { "epoch": 0.5474355021071025, "grad_norm": 0.23371629416942596, "learning_rate": 8.462301186805807e-05, "loss": 0.6999, "step": 2663 }, { "epoch": 0.547641073080481, "grad_norm": 0.2393561601638794, "learning_rate": 8.461827627936658e-05, "loss": 0.6981, "step": 2664 }, { "epoch": 0.5478466440538596, "grad_norm": 0.21029163897037506, "learning_rate": 8.46135387388678e-05, "loss": 0.6925, "step": 2665 }, { "epoch": 0.5480522150272381, "grad_norm": 0.20427922904491425, "learning_rate": 8.460879924679513e-05, "loss": 0.648, "step": 2666 }, { "epoch": 0.5482577860006167, "grad_norm": 0.20650714635849, "learning_rate": 8.460405780338205e-05, "loss": 0.5918, "step": 2667 }, { "epoch": 0.5484633569739953, "grad_norm": 0.24088306725025177, "learning_rate": 8.459931440886214e-05, "loss": 0.7039, "step": 2668 }, { "epoch": 0.5486689279473739, "grad_norm": 0.22175416350364685, "learning_rate": 8.45945690634691e-05, "loss": 0.7038, "step": 2669 }, { "epoch": 0.5488744989207524, "grad_norm": 0.21606440842151642, "learning_rate": 8.45898217674367e-05, "loss": 0.6745, "step": 2670 }, { "epoch": 0.5490800698941309, "grad_norm": 0.22006148099899292, "learning_rate": 8.458507252099884e-05, "loss": 0.7169, "step": 2671 }, { "epoch": 0.5492856408675095, "grad_norm": 0.2132798433303833, "learning_rate": 8.458032132438947e-05, "loss": 0.6769, "step": 2672 }, { "epoch": 0.5494912118408881, "grad_norm": 0.2083420604467392, "learning_rate": 8.457556817784266e-05, "loss": 0.6845, "step": 2673 }, { "epoch": 0.5496967828142666, "grad_norm": 0.16094450652599335, "learning_rate": 8.457081308159259e-05, "loss": 0.573, "step": 2674 }, { "epoch": 0.5499023537876452, "grad_norm": 0.23418548703193665, "learning_rate": 8.456605603587351e-05, "loss": 0.6743, "step": 2675 }, { "epoch": 0.5501079247610238, "grad_norm": 0.2129811942577362, "learning_rate": 8.456129704091978e-05, "loss": 0.6956, "step": 2676 }, { "epoch": 0.5503134957344022, "grad_norm": 0.14898192882537842, "learning_rate": 8.455653609696585e-05, "loss": 0.5923, "step": 2677 }, { "epoch": 0.5505190667077808, "grad_norm": 0.22483858466148376, "learning_rate": 8.455177320424627e-05, "loss": 0.6918, "step": 2678 }, { "epoch": 0.5507246376811594, "grad_norm": 0.22401611506938934, "learning_rate": 8.454700836299571e-05, "loss": 0.6985, "step": 2679 }, { "epoch": 0.550930208654538, "grad_norm": 0.19923460483551025, "learning_rate": 8.454224157344887e-05, "loss": 0.729, "step": 2680 }, { "epoch": 0.5511357796279165, "grad_norm": 0.21183621883392334, "learning_rate": 8.453747283584061e-05, "loss": 0.677, "step": 2681 }, { "epoch": 0.5513413506012951, "grad_norm": 0.16109618544578552, "learning_rate": 8.453270215040588e-05, "loss": 0.5949, "step": 2682 }, { "epoch": 0.5515469215746737, "grad_norm": 0.21456550061702728, "learning_rate": 8.452792951737966e-05, "loss": 0.7069, "step": 2683 }, { "epoch": 0.5517524925480523, "grad_norm": 0.19927652180194855, "learning_rate": 8.452315493699713e-05, "loss": 0.6762, "step": 2684 }, { "epoch": 0.5519580635214307, "grad_norm": 0.19462721049785614, "learning_rate": 8.451837840949347e-05, "loss": 0.701, "step": 2685 }, { "epoch": 0.5521636344948093, "grad_norm": 0.22193773090839386, "learning_rate": 8.451359993510403e-05, "loss": 0.6949, "step": 2686 }, { "epoch": 0.5523692054681879, "grad_norm": 0.22146186232566833, "learning_rate": 8.450881951406419e-05, "loss": 0.7208, "step": 2687 }, { "epoch": 0.5525747764415665, "grad_norm": 0.19484825432300568, "learning_rate": 8.45040371466095e-05, "loss": 0.6823, "step": 2688 }, { "epoch": 0.552780347414945, "grad_norm": 0.20109498500823975, "learning_rate": 8.449925283297551e-05, "loss": 0.7008, "step": 2689 }, { "epoch": 0.5529859183883236, "grad_norm": 0.1965745985507965, "learning_rate": 8.449446657339798e-05, "loss": 0.7047, "step": 2690 }, { "epoch": 0.5531914893617021, "grad_norm": 0.19609376788139343, "learning_rate": 8.448967836811266e-05, "loss": 0.6856, "step": 2691 }, { "epoch": 0.5533970603350807, "grad_norm": 0.19566380977630615, "learning_rate": 8.448488821735546e-05, "loss": 0.6883, "step": 2692 }, { "epoch": 0.5536026313084592, "grad_norm": 0.18993543088436127, "learning_rate": 8.448009612136238e-05, "loss": 0.5882, "step": 2693 }, { "epoch": 0.5538082022818378, "grad_norm": 0.22677689790725708, "learning_rate": 8.44753020803695e-05, "loss": 0.695, "step": 2694 }, { "epoch": 0.5540137732552164, "grad_norm": 0.21654780209064484, "learning_rate": 8.447050609461299e-05, "loss": 0.7006, "step": 2695 }, { "epoch": 0.5542193442285949, "grad_norm": 0.1987585723400116, "learning_rate": 8.446570816432911e-05, "loss": 0.6786, "step": 2696 }, { "epoch": 0.5544249152019735, "grad_norm": 0.21320489048957825, "learning_rate": 8.446090828975427e-05, "loss": 0.7029, "step": 2697 }, { "epoch": 0.554630486175352, "grad_norm": 0.16352033615112305, "learning_rate": 8.445610647112492e-05, "loss": 0.5938, "step": 2698 }, { "epoch": 0.5548360571487306, "grad_norm": 0.21454685926437378, "learning_rate": 8.44513027086776e-05, "loss": 0.6759, "step": 2699 }, { "epoch": 0.5550416281221091, "grad_norm": 0.20842206478118896, "learning_rate": 8.444649700264902e-05, "loss": 0.6922, "step": 2700 }, { "epoch": 0.5552471990954877, "grad_norm": 0.1389513611793518, "learning_rate": 8.444168935327589e-05, "loss": 0.5826, "step": 2701 }, { "epoch": 0.5554527700688663, "grad_norm": 0.20907482504844666, "learning_rate": 8.443687976079507e-05, "loss": 0.6838, "step": 2702 }, { "epoch": 0.5556583410422449, "grad_norm": 0.21713374555110931, "learning_rate": 8.443206822544352e-05, "loss": 0.7058, "step": 2703 }, { "epoch": 0.5558639120156234, "grad_norm": 0.1558568924665451, "learning_rate": 8.442725474745827e-05, "loss": 0.5847, "step": 2704 }, { "epoch": 0.5560694829890019, "grad_norm": 0.20640867948532104, "learning_rate": 8.442243932707647e-05, "loss": 0.7049, "step": 2705 }, { "epoch": 0.5562750539623805, "grad_norm": 0.12573988735675812, "learning_rate": 8.441762196453534e-05, "loss": 0.5863, "step": 2706 }, { "epoch": 0.5564806249357591, "grad_norm": 0.21294710040092468, "learning_rate": 8.441280266007221e-05, "loss": 0.6913, "step": 2707 }, { "epoch": 0.5566861959091376, "grad_norm": 0.2014019787311554, "learning_rate": 8.44079814139245e-05, "loss": 0.6954, "step": 2708 }, { "epoch": 0.5568917668825162, "grad_norm": 0.2047373652458191, "learning_rate": 8.440315822632974e-05, "loss": 0.6976, "step": 2709 }, { "epoch": 0.5570973378558948, "grad_norm": 0.21064162254333496, "learning_rate": 8.439833309752556e-05, "loss": 0.6994, "step": 2710 }, { "epoch": 0.5573029088292734, "grad_norm": 0.21300119161605835, "learning_rate": 8.439350602774964e-05, "loss": 0.6748, "step": 2711 }, { "epoch": 0.5575084798026518, "grad_norm": 0.17572659254074097, "learning_rate": 8.438867701723982e-05, "loss": 0.5906, "step": 2712 }, { "epoch": 0.5577140507760304, "grad_norm": 0.13898785412311554, "learning_rate": 8.438384606623397e-05, "loss": 0.5679, "step": 2713 }, { "epoch": 0.557919621749409, "grad_norm": 0.24983015656471252, "learning_rate": 8.437901317497011e-05, "loss": 0.6696, "step": 2714 }, { "epoch": 0.5581251927227875, "grad_norm": 0.21426579356193542, "learning_rate": 8.437417834368632e-05, "loss": 0.6824, "step": 2715 }, { "epoch": 0.5583307636961661, "grad_norm": 0.20514623820781708, "learning_rate": 8.436934157262082e-05, "loss": 0.708, "step": 2716 }, { "epoch": 0.5585363346695447, "grad_norm": 0.21398288011550903, "learning_rate": 8.436450286201184e-05, "loss": 0.7051, "step": 2717 }, { "epoch": 0.5587419056429233, "grad_norm": 0.2091488540172577, "learning_rate": 8.435966221209782e-05, "loss": 0.6671, "step": 2718 }, { "epoch": 0.5589474766163017, "grad_norm": 0.21767988801002502, "learning_rate": 8.43548196231172e-05, "loss": 0.724, "step": 2719 }, { "epoch": 0.5591530475896803, "grad_norm": 0.2218277007341385, "learning_rate": 8.434997509530855e-05, "loss": 0.6924, "step": 2720 }, { "epoch": 0.5593586185630589, "grad_norm": 0.2099279761314392, "learning_rate": 8.434512862891058e-05, "loss": 0.6847, "step": 2721 }, { "epoch": 0.5595641895364375, "grad_norm": 0.2063916176557541, "learning_rate": 8.434028022416199e-05, "loss": 0.669, "step": 2722 }, { "epoch": 0.559769760509816, "grad_norm": 0.2331087738275528, "learning_rate": 8.433542988130168e-05, "loss": 0.6039, "step": 2723 }, { "epoch": 0.5599753314831946, "grad_norm": 0.22927048802375793, "learning_rate": 8.433057760056858e-05, "loss": 0.6982, "step": 2724 }, { "epoch": 0.5601809024565731, "grad_norm": 0.22356915473937988, "learning_rate": 8.432572338220177e-05, "loss": 0.6676, "step": 2725 }, { "epoch": 0.5603864734299517, "grad_norm": 0.21038733422756195, "learning_rate": 8.432086722644038e-05, "loss": 0.6922, "step": 2726 }, { "epoch": 0.5605920444033302, "grad_norm": 0.21845050156116486, "learning_rate": 8.431600913352363e-05, "loss": 0.6809, "step": 2727 }, { "epoch": 0.5607976153767088, "grad_norm": 0.20335665345191956, "learning_rate": 8.431114910369087e-05, "loss": 0.6561, "step": 2728 }, { "epoch": 0.5610031863500874, "grad_norm": 0.20789889991283417, "learning_rate": 8.430628713718156e-05, "loss": 0.7282, "step": 2729 }, { "epoch": 0.561208757323466, "grad_norm": 0.21542754769325256, "learning_rate": 8.430142323423518e-05, "loss": 0.6794, "step": 2730 }, { "epoch": 0.5614143282968445, "grad_norm": 0.19883479177951813, "learning_rate": 8.429655739509137e-05, "loss": 0.7022, "step": 2731 }, { "epoch": 0.561619899270223, "grad_norm": 0.2027217149734497, "learning_rate": 8.429168961998987e-05, "loss": 0.7122, "step": 2732 }, { "epoch": 0.5618254702436016, "grad_norm": 0.20962925255298615, "learning_rate": 8.428681990917045e-05, "loss": 0.702, "step": 2733 }, { "epoch": 0.5620310412169801, "grad_norm": 0.2032438963651657, "learning_rate": 8.428194826287304e-05, "loss": 0.6828, "step": 2734 }, { "epoch": 0.5622366121903587, "grad_norm": 0.19384074211120605, "learning_rate": 8.427707468133766e-05, "loss": 0.6693, "step": 2735 }, { "epoch": 0.5624421831637373, "grad_norm": 0.20118926465511322, "learning_rate": 8.427219916480437e-05, "loss": 0.7003, "step": 2736 }, { "epoch": 0.5626477541371159, "grad_norm": 0.21019205451011658, "learning_rate": 8.426732171351338e-05, "loss": 0.7088, "step": 2737 }, { "epoch": 0.5628533251104944, "grad_norm": 0.19624383747577667, "learning_rate": 8.426244232770501e-05, "loss": 0.6929, "step": 2738 }, { "epoch": 0.5630588960838729, "grad_norm": 0.20001311600208282, "learning_rate": 8.425756100761961e-05, "loss": 0.6641, "step": 2739 }, { "epoch": 0.5632644670572515, "grad_norm": 0.20031724870204926, "learning_rate": 8.425267775349766e-05, "loss": 0.7202, "step": 2740 }, { "epoch": 0.5634700380306301, "grad_norm": 0.20123572647571564, "learning_rate": 8.424779256557976e-05, "loss": 0.6924, "step": 2741 }, { "epoch": 0.5636756090040086, "grad_norm": 0.20444491505622864, "learning_rate": 8.424290544410654e-05, "loss": 0.6893, "step": 2742 }, { "epoch": 0.5638811799773872, "grad_norm": 0.1976771205663681, "learning_rate": 8.42380163893188e-05, "loss": 0.6709, "step": 2743 }, { "epoch": 0.5640867509507658, "grad_norm": 0.222488135099411, "learning_rate": 8.42331254014574e-05, "loss": 0.5918, "step": 2744 }, { "epoch": 0.5642923219241444, "grad_norm": 0.21417805552482605, "learning_rate": 8.422823248076329e-05, "loss": 0.6833, "step": 2745 }, { "epoch": 0.5644978928975228, "grad_norm": 0.21681103110313416, "learning_rate": 8.42233376274775e-05, "loss": 0.7288, "step": 2746 }, { "epoch": 0.5647034638709014, "grad_norm": 0.20778658986091614, "learning_rate": 8.42184408418412e-05, "loss": 0.6749, "step": 2747 }, { "epoch": 0.56490903484428, "grad_norm": 0.20677468180656433, "learning_rate": 8.421354212409563e-05, "loss": 0.7008, "step": 2748 }, { "epoch": 0.5651146058176586, "grad_norm": 0.15667958557605743, "learning_rate": 8.420864147448213e-05, "loss": 0.5793, "step": 2749 }, { "epoch": 0.5653201767910371, "grad_norm": 0.22153092920780182, "learning_rate": 8.42037388932421e-05, "loss": 0.6865, "step": 2750 }, { "epoch": 0.5655257477644157, "grad_norm": 0.22236353158950806, "learning_rate": 8.419883438061711e-05, "loss": 0.6672, "step": 2751 }, { "epoch": 0.5657313187377943, "grad_norm": 0.2081800103187561, "learning_rate": 8.419392793684878e-05, "loss": 0.7169, "step": 2752 }, { "epoch": 0.5659368897111727, "grad_norm": 0.16220282018184662, "learning_rate": 8.418901956217878e-05, "loss": 0.5878, "step": 2753 }, { "epoch": 0.5661424606845513, "grad_norm": 0.21759817004203796, "learning_rate": 8.418410925684898e-05, "loss": 0.7273, "step": 2754 }, { "epoch": 0.5663480316579299, "grad_norm": 0.22539561986923218, "learning_rate": 8.417919702110125e-05, "loss": 0.7, "step": 2755 }, { "epoch": 0.5665536026313085, "grad_norm": 0.196711003780365, "learning_rate": 8.41742828551776e-05, "loss": 0.7179, "step": 2756 }, { "epoch": 0.566759173604687, "grad_norm": 0.210893914103508, "learning_rate": 8.416936675932015e-05, "loss": 0.708, "step": 2757 }, { "epoch": 0.5669647445780656, "grad_norm": 0.19233620166778564, "learning_rate": 8.416444873377108e-05, "loss": 0.5911, "step": 2758 }, { "epoch": 0.5671703155514441, "grad_norm": 0.21840979158878326, "learning_rate": 8.415952877877266e-05, "loss": 0.6871, "step": 2759 }, { "epoch": 0.5673758865248227, "grad_norm": 0.216123566031456, "learning_rate": 8.41546068945673e-05, "loss": 0.7381, "step": 2760 }, { "epoch": 0.5675814574982012, "grad_norm": 0.14728981256484985, "learning_rate": 8.414968308139747e-05, "loss": 0.5818, "step": 2761 }, { "epoch": 0.5677870284715798, "grad_norm": 0.16224178671836853, "learning_rate": 8.414475733950572e-05, "loss": 0.5819, "step": 2762 }, { "epoch": 0.5679925994449584, "grad_norm": 0.23816072940826416, "learning_rate": 8.413982966913475e-05, "loss": 0.7021, "step": 2763 }, { "epoch": 0.568198170418337, "grad_norm": 0.2145988643169403, "learning_rate": 8.413490007052731e-05, "loss": 0.712, "step": 2764 }, { "epoch": 0.5684037413917155, "grad_norm": 0.1928829550743103, "learning_rate": 8.412996854392625e-05, "loss": 0.6792, "step": 2765 }, { "epoch": 0.568609312365094, "grad_norm": 0.22511503100395203, "learning_rate": 8.412503508957455e-05, "loss": 0.6914, "step": 2766 }, { "epoch": 0.5688148833384726, "grad_norm": 0.23448607325553894, "learning_rate": 8.412009970771524e-05, "loss": 0.7113, "step": 2767 }, { "epoch": 0.5690204543118512, "grad_norm": 0.21442458033561707, "learning_rate": 8.411516239859146e-05, "loss": 0.7, "step": 2768 }, { "epoch": 0.5692260252852297, "grad_norm": 0.18232490122318268, "learning_rate": 8.411022316244645e-05, "loss": 0.5882, "step": 2769 }, { "epoch": 0.5694315962586083, "grad_norm": 0.1396799087524414, "learning_rate": 8.410528199952354e-05, "loss": 0.5754, "step": 2770 }, { "epoch": 0.5696371672319869, "grad_norm": 0.2816780209541321, "learning_rate": 8.410033891006617e-05, "loss": 0.6885, "step": 2771 }, { "epoch": 0.5698427382053654, "grad_norm": 0.26476380228996277, "learning_rate": 8.409539389431785e-05, "loss": 0.6791, "step": 2772 }, { "epoch": 0.5700483091787439, "grad_norm": 0.2113625705242157, "learning_rate": 8.409044695252221e-05, "loss": 0.7115, "step": 2773 }, { "epoch": 0.5702538801521225, "grad_norm": 0.21605044603347778, "learning_rate": 8.408549808492296e-05, "loss": 0.7098, "step": 2774 }, { "epoch": 0.5704594511255011, "grad_norm": 0.23488545417785645, "learning_rate": 8.40805472917639e-05, "loss": 0.6791, "step": 2775 }, { "epoch": 0.5706650220988796, "grad_norm": 0.23377586901187897, "learning_rate": 8.407559457328894e-05, "loss": 0.7159, "step": 2776 }, { "epoch": 0.5708705930722582, "grad_norm": 0.2001940906047821, "learning_rate": 8.407063992974208e-05, "loss": 0.6831, "step": 2777 }, { "epoch": 0.5710761640456368, "grad_norm": 0.20575560629367828, "learning_rate": 8.40656833613674e-05, "loss": 0.6893, "step": 2778 }, { "epoch": 0.5712817350190154, "grad_norm": 0.21755361557006836, "learning_rate": 8.406072486840909e-05, "loss": 0.6912, "step": 2779 }, { "epoch": 0.5714873059923938, "grad_norm": 0.21302054822444916, "learning_rate": 8.405576445111144e-05, "loss": 0.5823, "step": 2780 }, { "epoch": 0.5716928769657724, "grad_norm": 0.2074202299118042, "learning_rate": 8.405080210971882e-05, "loss": 0.6948, "step": 2781 }, { "epoch": 0.571898447939151, "grad_norm": 0.2045622020959854, "learning_rate": 8.40458378444757e-05, "loss": 0.6982, "step": 2782 }, { "epoch": 0.5721040189125296, "grad_norm": 0.20877763628959656, "learning_rate": 8.404087165562664e-05, "loss": 0.696, "step": 2783 }, { "epoch": 0.5723095898859081, "grad_norm": 0.21138116717338562, "learning_rate": 8.403590354341632e-05, "loss": 0.6767, "step": 2784 }, { "epoch": 0.5725151608592867, "grad_norm": 0.20857292413711548, "learning_rate": 8.40309335080895e-05, "loss": 0.6847, "step": 2785 }, { "epoch": 0.5727207318326653, "grad_norm": 0.20251955091953278, "learning_rate": 8.4025961549891e-05, "loss": 0.7044, "step": 2786 }, { "epoch": 0.5729263028060438, "grad_norm": 0.23925918340682983, "learning_rate": 8.40209876690658e-05, "loss": 0.6971, "step": 2787 }, { "epoch": 0.5731318737794223, "grad_norm": 0.19959931075572968, "learning_rate": 8.401601186585888e-05, "loss": 0.5827, "step": 2788 }, { "epoch": 0.5733374447528009, "grad_norm": 0.22731555998325348, "learning_rate": 8.401103414051545e-05, "loss": 0.6834, "step": 2789 }, { "epoch": 0.5735430157261795, "grad_norm": 0.13042806088924408, "learning_rate": 8.400605449328069e-05, "loss": 0.584, "step": 2790 }, { "epoch": 0.573748586699558, "grad_norm": 0.22589558362960815, "learning_rate": 8.400107292439996e-05, "loss": 0.6953, "step": 2791 }, { "epoch": 0.5739541576729366, "grad_norm": 0.2052125185728073, "learning_rate": 8.399608943411864e-05, "loss": 0.6918, "step": 2792 }, { "epoch": 0.5741597286463151, "grad_norm": 0.2042934000492096, "learning_rate": 8.399110402268226e-05, "loss": 0.7068, "step": 2793 }, { "epoch": 0.5743652996196937, "grad_norm": 0.20587709546089172, "learning_rate": 8.398611669033642e-05, "loss": 0.6933, "step": 2794 }, { "epoch": 0.5745708705930722, "grad_norm": 0.1982177048921585, "learning_rate": 8.398112743732685e-05, "loss": 0.6884, "step": 2795 }, { "epoch": 0.5747764415664508, "grad_norm": 0.19220708310604095, "learning_rate": 8.397613626389933e-05, "loss": 0.5803, "step": 2796 }, { "epoch": 0.5749820125398294, "grad_norm": 0.20522017776966095, "learning_rate": 8.397114317029975e-05, "loss": 0.6739, "step": 2797 }, { "epoch": 0.575187583513208, "grad_norm": 0.20296591520309448, "learning_rate": 8.396614815677408e-05, "loss": 0.6968, "step": 2798 }, { "epoch": 0.5753931544865865, "grad_norm": 0.21436072885990143, "learning_rate": 8.396115122356844e-05, "loss": 0.7124, "step": 2799 }, { "epoch": 0.575598725459965, "grad_norm": 0.1649683117866516, "learning_rate": 8.395615237092896e-05, "loss": 0.5981, "step": 2800 }, { "epoch": 0.5758042964333436, "grad_norm": 0.20267538726329803, "learning_rate": 8.395115159910193e-05, "loss": 0.6791, "step": 2801 }, { "epoch": 0.5760098674067222, "grad_norm": 0.2140885293483734, "learning_rate": 8.394614890833374e-05, "loss": 0.7054, "step": 2802 }, { "epoch": 0.5762154383801007, "grad_norm": 0.20777259767055511, "learning_rate": 8.394114429887083e-05, "loss": 0.68, "step": 2803 }, { "epoch": 0.5764210093534793, "grad_norm": 0.2137485295534134, "learning_rate": 8.393613777095974e-05, "loss": 0.7086, "step": 2804 }, { "epoch": 0.5766265803268579, "grad_norm": 0.20304176211357117, "learning_rate": 8.393112932484713e-05, "loss": 0.6617, "step": 2805 }, { "epoch": 0.5768321513002365, "grad_norm": 0.21544432640075684, "learning_rate": 8.392611896077973e-05, "loss": 0.7053, "step": 2806 }, { "epoch": 0.5770377222736149, "grad_norm": 0.21482408046722412, "learning_rate": 8.39211066790044e-05, "loss": 0.6994, "step": 2807 }, { "epoch": 0.5772432932469935, "grad_norm": 0.15521647036075592, "learning_rate": 8.391609247976805e-05, "loss": 0.5946, "step": 2808 }, { "epoch": 0.5774488642203721, "grad_norm": 0.19584627449512482, "learning_rate": 8.391107636331775e-05, "loss": 0.6638, "step": 2809 }, { "epoch": 0.5776544351937506, "grad_norm": 0.2126510590314865, "learning_rate": 8.390605832990055e-05, "loss": 0.7362, "step": 2810 }, { "epoch": 0.5778600061671292, "grad_norm": 0.1384701430797577, "learning_rate": 8.390103837976373e-05, "loss": 0.5919, "step": 2811 }, { "epoch": 0.5780655771405078, "grad_norm": 0.20149080455303192, "learning_rate": 8.389601651315454e-05, "loss": 0.6609, "step": 2812 }, { "epoch": 0.5782711481138864, "grad_norm": 0.13342009484767914, "learning_rate": 8.389099273032045e-05, "loss": 0.5691, "step": 2813 }, { "epoch": 0.5784767190872648, "grad_norm": 0.20240166783332825, "learning_rate": 8.38859670315089e-05, "loss": 0.6667, "step": 2814 }, { "epoch": 0.5786822900606434, "grad_norm": 0.14066733419895172, "learning_rate": 8.388093941696752e-05, "loss": 0.5841, "step": 2815 }, { "epoch": 0.578887861034022, "grad_norm": 0.20561981201171875, "learning_rate": 8.387590988694398e-05, "loss": 0.6808, "step": 2816 }, { "epoch": 0.5790934320074006, "grad_norm": 0.19909094274044037, "learning_rate": 8.387087844168607e-05, "loss": 0.6827, "step": 2817 }, { "epoch": 0.5792990029807791, "grad_norm": 0.19748428463935852, "learning_rate": 8.386584508144166e-05, "loss": 0.6952, "step": 2818 }, { "epoch": 0.5795045739541577, "grad_norm": 0.203225240111351, "learning_rate": 8.386080980645872e-05, "loss": 0.711, "step": 2819 }, { "epoch": 0.5797101449275363, "grad_norm": 0.20350880920886993, "learning_rate": 8.385577261698531e-05, "loss": 0.6672, "step": 2820 }, { "epoch": 0.5799157159009148, "grad_norm": 0.19929729402065277, "learning_rate": 8.385073351326959e-05, "loss": 0.6749, "step": 2821 }, { "epoch": 0.5801212868742933, "grad_norm": 0.20175184309482574, "learning_rate": 8.384569249555983e-05, "loss": 0.6931, "step": 2822 }, { "epoch": 0.5803268578476719, "grad_norm": 0.18173432350158691, "learning_rate": 8.384064956410437e-05, "loss": 0.5901, "step": 2823 }, { "epoch": 0.5805324288210505, "grad_norm": 0.21010646224021912, "learning_rate": 8.383560471915162e-05, "loss": 0.6967, "step": 2824 }, { "epoch": 0.580737999794429, "grad_norm": 0.2225627601146698, "learning_rate": 8.383055796095018e-05, "loss": 0.7137, "step": 2825 }, { "epoch": 0.5809435707678076, "grad_norm": 0.19758129119873047, "learning_rate": 8.382550928974862e-05, "loss": 0.6991, "step": 2826 }, { "epoch": 0.5811491417411861, "grad_norm": 0.19794224202632904, "learning_rate": 8.382045870579569e-05, "loss": 0.6759, "step": 2827 }, { "epoch": 0.5813547127145647, "grad_norm": 0.20339448750019073, "learning_rate": 8.38154062093402e-05, "loss": 0.6621, "step": 2828 }, { "epoch": 0.5815602836879432, "grad_norm": 0.19173693656921387, "learning_rate": 8.381035180063107e-05, "loss": 0.6821, "step": 2829 }, { "epoch": 0.5817658546613218, "grad_norm": 0.1988253891468048, "learning_rate": 8.380529547991732e-05, "loss": 0.6803, "step": 2830 }, { "epoch": 0.5819714256347004, "grad_norm": 0.2126402109861374, "learning_rate": 8.380023724744802e-05, "loss": 0.6765, "step": 2831 }, { "epoch": 0.582176996608079, "grad_norm": 0.20873717963695526, "learning_rate": 8.379517710347238e-05, "loss": 0.6801, "step": 2832 }, { "epoch": 0.5823825675814575, "grad_norm": 0.1995771825313568, "learning_rate": 8.379011504823973e-05, "loss": 0.6837, "step": 2833 }, { "epoch": 0.582588138554836, "grad_norm": 0.9173756241798401, "learning_rate": 8.378505108199937e-05, "loss": 0.7294, "step": 2834 }, { "epoch": 0.5827937095282146, "grad_norm": 0.20103541016578674, "learning_rate": 8.377998520500086e-05, "loss": 0.6703, "step": 2835 }, { "epoch": 0.5829992805015932, "grad_norm": 0.20115043222904205, "learning_rate": 8.377491741749371e-05, "loss": 0.6794, "step": 2836 }, { "epoch": 0.5832048514749717, "grad_norm": 0.2085791677236557, "learning_rate": 8.376984771972763e-05, "loss": 0.6799, "step": 2837 }, { "epoch": 0.5834104224483503, "grad_norm": 0.2213800698518753, "learning_rate": 8.376477611195234e-05, "loss": 0.7313, "step": 2838 }, { "epoch": 0.5836159934217289, "grad_norm": 0.2140512466430664, "learning_rate": 8.375970259441773e-05, "loss": 0.693, "step": 2839 }, { "epoch": 0.5838215643951075, "grad_norm": 0.20790469646453857, "learning_rate": 8.375462716737375e-05, "loss": 0.6993, "step": 2840 }, { "epoch": 0.5840271353684859, "grad_norm": 0.2115468680858612, "learning_rate": 8.374954983107042e-05, "loss": 0.687, "step": 2841 }, { "epoch": 0.5842327063418645, "grad_norm": 0.21003267168998718, "learning_rate": 8.374447058575786e-05, "loss": 0.7148, "step": 2842 }, { "epoch": 0.5844382773152431, "grad_norm": 0.21963387727737427, "learning_rate": 8.373938943168635e-05, "loss": 0.6821, "step": 2843 }, { "epoch": 0.5846438482886216, "grad_norm": 0.20493534207344055, "learning_rate": 8.373430636910619e-05, "loss": 0.6842, "step": 2844 }, { "epoch": 0.5848494192620002, "grad_norm": 0.20353847742080688, "learning_rate": 8.37292213982678e-05, "loss": 0.6853, "step": 2845 }, { "epoch": 0.5850549902353788, "grad_norm": 0.17759917676448822, "learning_rate": 8.372413451942168e-05, "loss": 0.581, "step": 2846 }, { "epoch": 0.5852605612087574, "grad_norm": 0.14481404423713684, "learning_rate": 8.371904573281845e-05, "loss": 0.5929, "step": 2847 }, { "epoch": 0.5854661321821358, "grad_norm": 0.1454802304506302, "learning_rate": 8.371395503870882e-05, "loss": 0.5616, "step": 2848 }, { "epoch": 0.5856717031555144, "grad_norm": 0.24941618740558624, "learning_rate": 8.370886243734358e-05, "loss": 0.6982, "step": 2849 }, { "epoch": 0.585877274128893, "grad_norm": 0.21928314864635468, "learning_rate": 8.370376792897359e-05, "loss": 0.6931, "step": 2850 }, { "epoch": 0.5860828451022716, "grad_norm": 0.20207005739212036, "learning_rate": 8.369867151384987e-05, "loss": 0.6671, "step": 2851 }, { "epoch": 0.5862884160756501, "grad_norm": 0.22684946656227112, "learning_rate": 8.369357319222348e-05, "loss": 0.6684, "step": 2852 }, { "epoch": 0.5864939870490287, "grad_norm": 0.21584348380565643, "learning_rate": 8.368847296434557e-05, "loss": 0.7032, "step": 2853 }, { "epoch": 0.5866995580224073, "grad_norm": 0.209476038813591, "learning_rate": 8.368337083046747e-05, "loss": 0.6804, "step": 2854 }, { "epoch": 0.5869051289957858, "grad_norm": 0.22032958269119263, "learning_rate": 8.367826679084046e-05, "loss": 0.6868, "step": 2855 }, { "epoch": 0.5871106999691643, "grad_norm": 0.21995702385902405, "learning_rate": 8.367316084571603e-05, "loss": 0.6975, "step": 2856 }, { "epoch": 0.5873162709425429, "grad_norm": 0.20626819133758545, "learning_rate": 8.366805299534574e-05, "loss": 0.7272, "step": 2857 }, { "epoch": 0.5875218419159215, "grad_norm": 0.2072131335735321, "learning_rate": 8.36629432399812e-05, "loss": 0.6957, "step": 2858 }, { "epoch": 0.5877274128893001, "grad_norm": 0.21286934614181519, "learning_rate": 8.365783157987416e-05, "loss": 0.7193, "step": 2859 }, { "epoch": 0.5879329838626786, "grad_norm": 0.20594240725040436, "learning_rate": 8.365271801527644e-05, "loss": 0.6996, "step": 2860 }, { "epoch": 0.5881385548360571, "grad_norm": 0.20829501748085022, "learning_rate": 8.364760254643997e-05, "loss": 0.6832, "step": 2861 }, { "epoch": 0.5883441258094357, "grad_norm": 0.2092822641134262, "learning_rate": 8.364248517361676e-05, "loss": 0.7114, "step": 2862 }, { "epoch": 0.5885496967828142, "grad_norm": 0.19926267862319946, "learning_rate": 8.363736589705892e-05, "loss": 0.6744, "step": 2863 }, { "epoch": 0.5887552677561928, "grad_norm": 0.20233862102031708, "learning_rate": 8.363224471701866e-05, "loss": 0.69, "step": 2864 }, { "epoch": 0.5889608387295714, "grad_norm": 0.2081189900636673, "learning_rate": 8.362712163374826e-05, "loss": 0.7025, "step": 2865 }, { "epoch": 0.58916640970295, "grad_norm": 0.19669431447982788, "learning_rate": 8.362199664750012e-05, "loss": 0.6796, "step": 2866 }, { "epoch": 0.5893719806763285, "grad_norm": 0.20693160593509674, "learning_rate": 8.361686975852672e-05, "loss": 0.6996, "step": 2867 }, { "epoch": 0.589577551649707, "grad_norm": 0.20690032839775085, "learning_rate": 8.361174096708066e-05, "loss": 0.6977, "step": 2868 }, { "epoch": 0.5897831226230856, "grad_norm": 0.19090650975704193, "learning_rate": 8.360661027341459e-05, "loss": 0.6905, "step": 2869 }, { "epoch": 0.5899886935964642, "grad_norm": 0.1915200799703598, "learning_rate": 8.360147767778126e-05, "loss": 0.6921, "step": 2870 }, { "epoch": 0.5901942645698427, "grad_norm": 0.20431163907051086, "learning_rate": 8.359634318043356e-05, "loss": 0.6816, "step": 2871 }, { "epoch": 0.5903998355432213, "grad_norm": 0.20922903716564178, "learning_rate": 8.359120678162442e-05, "loss": 0.7141, "step": 2872 }, { "epoch": 0.5906054065165999, "grad_norm": 0.20200544595718384, "learning_rate": 8.358606848160692e-05, "loss": 0.6883, "step": 2873 }, { "epoch": 0.5908109774899785, "grad_norm": 0.22084182500839233, "learning_rate": 8.358092828063416e-05, "loss": 0.5962, "step": 2874 }, { "epoch": 0.5910165484633569, "grad_norm": 0.19920572638511658, "learning_rate": 8.357578617895939e-05, "loss": 0.6921, "step": 2875 }, { "epoch": 0.5912221194367355, "grad_norm": 0.21406704187393188, "learning_rate": 8.357064217683593e-05, "loss": 0.6809, "step": 2876 }, { "epoch": 0.5914276904101141, "grad_norm": 0.20186960697174072, "learning_rate": 8.356549627451723e-05, "loss": 0.7273, "step": 2877 }, { "epoch": 0.5916332613834927, "grad_norm": 0.20613306760787964, "learning_rate": 8.356034847225677e-05, "loss": 0.6998, "step": 2878 }, { "epoch": 0.5918388323568712, "grad_norm": 0.19980058073997498, "learning_rate": 8.355519877030818e-05, "loss": 0.6707, "step": 2879 }, { "epoch": 0.5920444033302498, "grad_norm": 0.17572249472141266, "learning_rate": 8.355004716892514e-05, "loss": 0.5905, "step": 2880 }, { "epoch": 0.5922499743036284, "grad_norm": 0.14615419507026672, "learning_rate": 8.354489366836147e-05, "loss": 0.5936, "step": 2881 }, { "epoch": 0.5924555452770068, "grad_norm": 0.265011191368103, "learning_rate": 8.353973826887105e-05, "loss": 0.7195, "step": 2882 }, { "epoch": 0.5926611162503854, "grad_norm": 0.22780616581439972, "learning_rate": 8.353458097070784e-05, "loss": 0.7003, "step": 2883 }, { "epoch": 0.592866687223764, "grad_norm": 0.2108001857995987, "learning_rate": 8.352942177412594e-05, "loss": 0.6791, "step": 2884 }, { "epoch": 0.5930722581971426, "grad_norm": 0.23062892258167267, "learning_rate": 8.352426067937953e-05, "loss": 0.7012, "step": 2885 }, { "epoch": 0.5932778291705211, "grad_norm": 0.22096315026283264, "learning_rate": 8.351909768672286e-05, "loss": 0.6848, "step": 2886 }, { "epoch": 0.5934834001438997, "grad_norm": 0.19417156279087067, "learning_rate": 8.351393279641026e-05, "loss": 0.6041, "step": 2887 }, { "epoch": 0.5936889711172783, "grad_norm": 0.21793076395988464, "learning_rate": 8.350876600869624e-05, "loss": 0.6832, "step": 2888 }, { "epoch": 0.5938945420906568, "grad_norm": 0.21608784794807434, "learning_rate": 8.350359732383528e-05, "loss": 0.693, "step": 2889 }, { "epoch": 0.5941001130640353, "grad_norm": 0.1427665799856186, "learning_rate": 8.349842674208205e-05, "loss": 0.6014, "step": 2890 }, { "epoch": 0.5943056840374139, "grad_norm": 0.21171724796295166, "learning_rate": 8.349325426369129e-05, "loss": 0.7155, "step": 2891 }, { "epoch": 0.5945112550107925, "grad_norm": 0.20547601580619812, "learning_rate": 8.348807988891778e-05, "loss": 0.6879, "step": 2892 }, { "epoch": 0.5947168259841711, "grad_norm": 0.20329566299915314, "learning_rate": 8.34829036180165e-05, "loss": 0.6956, "step": 2893 }, { "epoch": 0.5949223969575496, "grad_norm": 0.19427530467510223, "learning_rate": 8.347772545124241e-05, "loss": 0.6853, "step": 2894 }, { "epoch": 0.5951279679309281, "grad_norm": 0.19844532012939453, "learning_rate": 8.347254538885063e-05, "loss": 0.6805, "step": 2895 }, { "epoch": 0.5953335389043067, "grad_norm": 0.20042115449905396, "learning_rate": 8.346736343109637e-05, "loss": 0.6648, "step": 2896 }, { "epoch": 0.5955391098776853, "grad_norm": 0.1955205500125885, "learning_rate": 8.34621795782349e-05, "loss": 0.6676, "step": 2897 }, { "epoch": 0.5957446808510638, "grad_norm": 0.19705745577812195, "learning_rate": 8.345699383052162e-05, "loss": 0.6857, "step": 2898 }, { "epoch": 0.5959502518244424, "grad_norm": 0.19771529734134674, "learning_rate": 8.3451806188212e-05, "loss": 0.6992, "step": 2899 }, { "epoch": 0.596155822797821, "grad_norm": 0.1999768763780594, "learning_rate": 8.344661665156161e-05, "loss": 0.7006, "step": 2900 }, { "epoch": 0.5963613937711995, "grad_norm": 0.2035917341709137, "learning_rate": 8.344142522082612e-05, "loss": 0.7032, "step": 2901 }, { "epoch": 0.596566964744578, "grad_norm": 0.20297078788280487, "learning_rate": 8.343623189626129e-05, "loss": 0.681, "step": 2902 }, { "epoch": 0.5967725357179566, "grad_norm": 0.17843900620937347, "learning_rate": 8.343103667812295e-05, "loss": 0.5906, "step": 2903 }, { "epoch": 0.5969781066913352, "grad_norm": 0.2069201022386551, "learning_rate": 8.342583956666706e-05, "loss": 0.7137, "step": 2904 }, { "epoch": 0.5971836776647137, "grad_norm": 0.20919117331504822, "learning_rate": 8.342064056214967e-05, "loss": 0.6923, "step": 2905 }, { "epoch": 0.5973892486380923, "grad_norm": 0.1899642050266266, "learning_rate": 8.34154396648269e-05, "loss": 0.668, "step": 2906 }, { "epoch": 0.5975948196114709, "grad_norm": 0.1988193541765213, "learning_rate": 8.341023687495494e-05, "loss": 0.676, "step": 2907 }, { "epoch": 0.5978003905848495, "grad_norm": 0.21733912825584412, "learning_rate": 8.340503219279017e-05, "loss": 0.6999, "step": 2908 }, { "epoch": 0.5980059615582279, "grad_norm": 0.20647762715816498, "learning_rate": 8.339982561858896e-05, "loss": 0.694, "step": 2909 }, { "epoch": 0.5982115325316065, "grad_norm": 0.19566026329994202, "learning_rate": 8.339461715260781e-05, "loss": 0.6716, "step": 2910 }, { "epoch": 0.5984171035049851, "grad_norm": 0.2015964686870575, "learning_rate": 8.338940679510334e-05, "loss": 0.6869, "step": 2911 }, { "epoch": 0.5986226744783637, "grad_norm": 0.1712951958179474, "learning_rate": 8.338419454633224e-05, "loss": 0.5902, "step": 2912 }, { "epoch": 0.5988282454517422, "grad_norm": 0.13849389553070068, "learning_rate": 8.337898040655126e-05, "loss": 0.5992, "step": 2913 }, { "epoch": 0.5990338164251208, "grad_norm": 0.2373506873846054, "learning_rate": 8.33737643760173e-05, "loss": 0.6881, "step": 2914 }, { "epoch": 0.5992393873984994, "grad_norm": 0.2165384441614151, "learning_rate": 8.336854645498734e-05, "loss": 0.6805, "step": 2915 }, { "epoch": 0.599444958371878, "grad_norm": 0.21156401932239532, "learning_rate": 8.336332664371843e-05, "loss": 0.6781, "step": 2916 }, { "epoch": 0.5996505293452564, "grad_norm": 0.22182904183864594, "learning_rate": 8.335810494246772e-05, "loss": 0.7046, "step": 2917 }, { "epoch": 0.599856100318635, "grad_norm": 0.21610800921916962, "learning_rate": 8.335288135149246e-05, "loss": 0.7223, "step": 2918 }, { "epoch": 0.6000616712920136, "grad_norm": 0.21809829771518707, "learning_rate": 8.334765587105002e-05, "loss": 0.6088, "step": 2919 }, { "epoch": 0.6002672422653921, "grad_norm": 0.22887369990348816, "learning_rate": 8.334242850139779e-05, "loss": 0.6901, "step": 2920 }, { "epoch": 0.6004728132387707, "grad_norm": 0.22057749330997467, "learning_rate": 8.333719924279332e-05, "loss": 0.5969, "step": 2921 }, { "epoch": 0.6006783842121493, "grad_norm": 0.2292318344116211, "learning_rate": 8.333196809549422e-05, "loss": 0.6893, "step": 2922 }, { "epoch": 0.6008839551855278, "grad_norm": 0.15525048971176147, "learning_rate": 8.332673505975825e-05, "loss": 0.5925, "step": 2923 }, { "epoch": 0.6010895261589063, "grad_norm": 0.21504151821136475, "learning_rate": 8.332150013584315e-05, "loss": 0.678, "step": 2924 }, { "epoch": 0.6012950971322849, "grad_norm": 0.21480882167816162, "learning_rate": 8.331626332400689e-05, "loss": 0.6897, "step": 2925 }, { "epoch": 0.6015006681056635, "grad_norm": 0.14146551489830017, "learning_rate": 8.331102462450738e-05, "loss": 0.5684, "step": 2926 }, { "epoch": 0.6017062390790421, "grad_norm": 0.23041875660419464, "learning_rate": 8.330578403760277e-05, "loss": 0.6994, "step": 2927 }, { "epoch": 0.6019118100524206, "grad_norm": 0.20731528103351593, "learning_rate": 8.330054156355124e-05, "loss": 0.6792, "step": 2928 }, { "epoch": 0.6021173810257991, "grad_norm": 0.19797998666763306, "learning_rate": 8.329529720261103e-05, "loss": 0.6951, "step": 2929 }, { "epoch": 0.6023229519991777, "grad_norm": 0.2016698569059372, "learning_rate": 8.32900509550405e-05, "loss": 0.6833, "step": 2930 }, { "epoch": 0.6025285229725563, "grad_norm": 0.20054802298545837, "learning_rate": 8.328480282109816e-05, "loss": 0.6842, "step": 2931 }, { "epoch": 0.6027340939459348, "grad_norm": 0.19949203729629517, "learning_rate": 8.32795528010425e-05, "loss": 0.691, "step": 2932 }, { "epoch": 0.6029396649193134, "grad_norm": 0.17907802760601044, "learning_rate": 8.32743008951322e-05, "loss": 0.5825, "step": 2933 }, { "epoch": 0.603145235892692, "grad_norm": 0.2004586011171341, "learning_rate": 8.326904710362599e-05, "loss": 0.6639, "step": 2934 }, { "epoch": 0.6033508068660706, "grad_norm": 0.21539311110973358, "learning_rate": 8.32637914267827e-05, "loss": 0.6948, "step": 2935 }, { "epoch": 0.603556377839449, "grad_norm": 0.20301540195941925, "learning_rate": 8.325853386486126e-05, "loss": 0.7028, "step": 2936 }, { "epoch": 0.6037619488128276, "grad_norm": 0.19219626486301422, "learning_rate": 8.325327441812067e-05, "loss": 0.6727, "step": 2937 }, { "epoch": 0.6039675197862062, "grad_norm": 0.20149052143096924, "learning_rate": 8.324801308682004e-05, "loss": 0.6887, "step": 2938 }, { "epoch": 0.6041730907595847, "grad_norm": 0.20644250512123108, "learning_rate": 8.324274987121857e-05, "loss": 0.6764, "step": 2939 }, { "epoch": 0.6043786617329633, "grad_norm": 0.20564045011997223, "learning_rate": 8.323748477157557e-05, "loss": 0.6912, "step": 2940 }, { "epoch": 0.6045842327063419, "grad_norm": 0.18564823269844055, "learning_rate": 8.323221778815042e-05, "loss": 0.564, "step": 2941 }, { "epoch": 0.6047898036797205, "grad_norm": 0.2087641954421997, "learning_rate": 8.32269489212026e-05, "loss": 0.6865, "step": 2942 }, { "epoch": 0.6049953746530989, "grad_norm": 0.13221989572048187, "learning_rate": 8.322167817099166e-05, "loss": 0.5906, "step": 2943 }, { "epoch": 0.6052009456264775, "grad_norm": 0.13168349862098694, "learning_rate": 8.32164055377773e-05, "loss": 0.6099, "step": 2944 }, { "epoch": 0.6054065165998561, "grad_norm": 0.21939873695373535, "learning_rate": 8.321113102181925e-05, "loss": 0.6936, "step": 2945 }, { "epoch": 0.6056120875732347, "grad_norm": 0.21064333617687225, "learning_rate": 8.320585462337738e-05, "loss": 0.6805, "step": 2946 }, { "epoch": 0.6058176585466132, "grad_norm": 0.21517851948738098, "learning_rate": 8.320057634271162e-05, "loss": 0.6941, "step": 2947 }, { "epoch": 0.6060232295199918, "grad_norm": 0.19427655637264252, "learning_rate": 8.319529618008203e-05, "loss": 0.6989, "step": 2948 }, { "epoch": 0.6062288004933704, "grad_norm": 0.20321017503738403, "learning_rate": 8.31900141357487e-05, "loss": 0.6775, "step": 2949 }, { "epoch": 0.606434371466749, "grad_norm": 0.2060307115316391, "learning_rate": 8.318473020997188e-05, "loss": 0.712, "step": 2950 }, { "epoch": 0.6066399424401274, "grad_norm": 0.16920985281467438, "learning_rate": 8.317944440301188e-05, "loss": 0.5975, "step": 2951 }, { "epoch": 0.606845513413506, "grad_norm": 0.2233453392982483, "learning_rate": 8.31741567151291e-05, "loss": 0.6985, "step": 2952 }, { "epoch": 0.6070510843868846, "grad_norm": 0.21463671326637268, "learning_rate": 8.316886714658406e-05, "loss": 0.6661, "step": 2953 }, { "epoch": 0.6072566553602631, "grad_norm": 0.1969480812549591, "learning_rate": 8.316357569763732e-05, "loss": 0.7273, "step": 2954 }, { "epoch": 0.6074622263336417, "grad_norm": 0.17153163254261017, "learning_rate": 8.315828236854958e-05, "loss": 0.6041, "step": 2955 }, { "epoch": 0.6076677973070203, "grad_norm": 0.21503044664859772, "learning_rate": 8.315298715958165e-05, "loss": 0.6841, "step": 2956 }, { "epoch": 0.6078733682803988, "grad_norm": 0.2050783485174179, "learning_rate": 8.314769007099433e-05, "loss": 0.6952, "step": 2957 }, { "epoch": 0.6080789392537773, "grad_norm": 0.20447179675102234, "learning_rate": 8.314239110304864e-05, "loss": 0.7027, "step": 2958 }, { "epoch": 0.6082845102271559, "grad_norm": 0.20713284611701965, "learning_rate": 8.313709025600562e-05, "loss": 0.7172, "step": 2959 }, { "epoch": 0.6084900812005345, "grad_norm": 0.20058241486549377, "learning_rate": 8.31317875301264e-05, "loss": 0.6904, "step": 2960 }, { "epoch": 0.6086956521739131, "grad_norm": 0.19999080896377563, "learning_rate": 8.312648292567226e-05, "loss": 0.7054, "step": 2961 }, { "epoch": 0.6089012231472916, "grad_norm": 0.20129017531871796, "learning_rate": 8.31211764429045e-05, "loss": 0.6781, "step": 2962 }, { "epoch": 0.6091067941206701, "grad_norm": 0.2048570066690445, "learning_rate": 8.311586808208453e-05, "loss": 0.6995, "step": 2963 }, { "epoch": 0.6093123650940487, "grad_norm": 0.20518624782562256, "learning_rate": 8.311055784347392e-05, "loss": 0.6856, "step": 2964 }, { "epoch": 0.6095179360674273, "grad_norm": 0.14647917449474335, "learning_rate": 8.310524572733424e-05, "loss": 0.6034, "step": 2965 }, { "epoch": 0.6097235070408058, "grad_norm": 0.2090081423521042, "learning_rate": 8.309993173392722e-05, "loss": 0.6738, "step": 2966 }, { "epoch": 0.6099290780141844, "grad_norm": 0.13404381275177002, "learning_rate": 8.309461586351463e-05, "loss": 0.59, "step": 2967 }, { "epoch": 0.610134648987563, "grad_norm": 0.20760053396224976, "learning_rate": 8.308929811635837e-05, "loss": 0.7076, "step": 2968 }, { "epoch": 0.6103402199609416, "grad_norm": 0.2022329717874527, "learning_rate": 8.308397849272043e-05, "loss": 0.6992, "step": 2969 }, { "epoch": 0.61054579093432, "grad_norm": 0.20392966270446777, "learning_rate": 8.307865699286287e-05, "loss": 0.7017, "step": 2970 }, { "epoch": 0.6107513619076986, "grad_norm": 0.14375483989715576, "learning_rate": 8.307333361704786e-05, "loss": 0.5902, "step": 2971 }, { "epoch": 0.6109569328810772, "grad_norm": 0.20196297764778137, "learning_rate": 8.306800836553766e-05, "loss": 0.686, "step": 2972 }, { "epoch": 0.6111625038544557, "grad_norm": 0.23178908228874207, "learning_rate": 8.306268123859461e-05, "loss": 0.7128, "step": 2973 }, { "epoch": 0.6113680748278343, "grad_norm": 0.14498086273670197, "learning_rate": 8.305735223648117e-05, "loss": 0.5783, "step": 2974 }, { "epoch": 0.6115736458012129, "grad_norm": 0.21291960775852203, "learning_rate": 8.305202135945985e-05, "loss": 0.6836, "step": 2975 }, { "epoch": 0.6117792167745915, "grad_norm": 0.20154601335525513, "learning_rate": 8.30466886077933e-05, "loss": 0.6775, "step": 2976 }, { "epoch": 0.6119847877479699, "grad_norm": 0.1371108442544937, "learning_rate": 8.304135398174423e-05, "loss": 0.6029, "step": 2977 }, { "epoch": 0.6121903587213485, "grad_norm": 0.20939522981643677, "learning_rate": 8.303601748157545e-05, "loss": 0.7016, "step": 2978 }, { "epoch": 0.6123959296947271, "grad_norm": 0.1982061266899109, "learning_rate": 8.303067910754988e-05, "loss": 0.6724, "step": 2979 }, { "epoch": 0.6126015006681057, "grad_norm": 0.19184644520282745, "learning_rate": 8.302533885993051e-05, "loss": 0.6766, "step": 2980 }, { "epoch": 0.6128070716414842, "grad_norm": 0.1973457783460617, "learning_rate": 8.30199967389804e-05, "loss": 0.701, "step": 2981 }, { "epoch": 0.6130126426148628, "grad_norm": 0.23462116718292236, "learning_rate": 8.301465274496278e-05, "loss": 0.7119, "step": 2982 }, { "epoch": 0.6132182135882414, "grad_norm": 0.1940578669309616, "learning_rate": 8.300930687814089e-05, "loss": 0.6935, "step": 2983 }, { "epoch": 0.61342378456162, "grad_norm": 0.20462383329868317, "learning_rate": 8.30039591387781e-05, "loss": 0.7066, "step": 2984 }, { "epoch": 0.6136293555349984, "grad_norm": 0.1943095475435257, "learning_rate": 8.299860952713788e-05, "loss": 0.6764, "step": 2985 }, { "epoch": 0.613834926508377, "grad_norm": 0.18959608674049377, "learning_rate": 8.299325804348377e-05, "loss": 0.6501, "step": 2986 }, { "epoch": 0.6140404974817556, "grad_norm": 0.2010001540184021, "learning_rate": 8.298790468807941e-05, "loss": 0.6819, "step": 2987 }, { "epoch": 0.6142460684551342, "grad_norm": 0.20373772084712982, "learning_rate": 8.298254946118856e-05, "loss": 0.6776, "step": 2988 }, { "epoch": 0.6144516394285127, "grad_norm": 0.19308720529079437, "learning_rate": 8.2977192363075e-05, "loss": 0.6825, "step": 2989 }, { "epoch": 0.6146572104018913, "grad_norm": 0.19244827330112457, "learning_rate": 8.297183339400271e-05, "loss": 0.6819, "step": 2990 }, { "epoch": 0.6148627813752698, "grad_norm": 0.19886994361877441, "learning_rate": 8.296647255423566e-05, "loss": 0.6907, "step": 2991 }, { "epoch": 0.6150683523486483, "grad_norm": 0.194062277674675, "learning_rate": 8.296110984403794e-05, "loss": 0.6725, "step": 2992 }, { "epoch": 0.6152739233220269, "grad_norm": 0.19105246663093567, "learning_rate": 8.295574526367379e-05, "loss": 0.6895, "step": 2993 }, { "epoch": 0.6154794942954055, "grad_norm": 0.20439203083515167, "learning_rate": 8.295037881340746e-05, "loss": 0.6997, "step": 2994 }, { "epoch": 0.6156850652687841, "grad_norm": 0.2035692036151886, "learning_rate": 8.294501049350335e-05, "loss": 0.6797, "step": 2995 }, { "epoch": 0.6158906362421626, "grad_norm": 0.2011076956987381, "learning_rate": 8.293964030422593e-05, "loss": 0.6948, "step": 2996 }, { "epoch": 0.6160962072155411, "grad_norm": 0.1979755461215973, "learning_rate": 8.293426824583977e-05, "loss": 0.6984, "step": 2997 }, { "epoch": 0.6163017781889197, "grad_norm": 0.20361703634262085, "learning_rate": 8.29288943186095e-05, "loss": 0.6804, "step": 2998 }, { "epoch": 0.6165073491622983, "grad_norm": 0.19313938915729523, "learning_rate": 8.29235185227999e-05, "loss": 0.7105, "step": 2999 }, { "epoch": 0.6167129201356768, "grad_norm": 0.19516946375370026, "learning_rate": 8.291814085867579e-05, "loss": 0.7015, "step": 3000 }, { "epoch": 0.6169184911090554, "grad_norm": 0.19444262981414795, "learning_rate": 8.291276132650212e-05, "loss": 0.7028, "step": 3001 }, { "epoch": 0.617124062082434, "grad_norm": 0.19477610290050507, "learning_rate": 8.290737992654389e-05, "loss": 0.683, "step": 3002 }, { "epoch": 0.6173296330558126, "grad_norm": 0.20169807970523834, "learning_rate": 8.290199665906624e-05, "loss": 0.6816, "step": 3003 }, { "epoch": 0.617535204029191, "grad_norm": 0.1933300644159317, "learning_rate": 8.289661152433436e-05, "loss": 0.7073, "step": 3004 }, { "epoch": 0.6177407750025696, "grad_norm": 0.16266535222530365, "learning_rate": 8.289122452261356e-05, "loss": 0.5968, "step": 3005 }, { "epoch": 0.6179463459759482, "grad_norm": 0.19945891201496124, "learning_rate": 8.288583565416924e-05, "loss": 0.6826, "step": 3006 }, { "epoch": 0.6181519169493268, "grad_norm": 0.1400868445634842, "learning_rate": 8.288044491926687e-05, "loss": 0.6002, "step": 3007 }, { "epoch": 0.6183574879227053, "grad_norm": 0.12712964415550232, "learning_rate": 8.287505231817202e-05, "loss": 0.5836, "step": 3008 }, { "epoch": 0.6185630588960839, "grad_norm": 0.20722496509552002, "learning_rate": 8.286965785115038e-05, "loss": 0.6821, "step": 3009 }, { "epoch": 0.6187686298694625, "grad_norm": 0.1368006467819214, "learning_rate": 8.28642615184677e-05, "loss": 0.5909, "step": 3010 }, { "epoch": 0.6189742008428409, "grad_norm": 0.1366155594587326, "learning_rate": 8.285886332038983e-05, "loss": 0.5806, "step": 3011 }, { "epoch": 0.6191797718162195, "grad_norm": 0.20801199972629547, "learning_rate": 8.285346325718272e-05, "loss": 0.7111, "step": 3012 }, { "epoch": 0.6193853427895981, "grad_norm": 0.19898487627506256, "learning_rate": 8.28480613291124e-05, "loss": 0.6832, "step": 3013 }, { "epoch": 0.6195909137629767, "grad_norm": 0.19258826971054077, "learning_rate": 8.284265753644499e-05, "loss": 0.6962, "step": 3014 }, { "epoch": 0.6197964847363552, "grad_norm": 0.18354789912700653, "learning_rate": 8.283725187944674e-05, "loss": 0.6807, "step": 3015 }, { "epoch": 0.6200020557097338, "grad_norm": 0.15917901694774628, "learning_rate": 8.283184435838392e-05, "loss": 0.5927, "step": 3016 }, { "epoch": 0.6202076266831124, "grad_norm": 0.1983378827571869, "learning_rate": 8.282643497352296e-05, "loss": 0.6791, "step": 3017 }, { "epoch": 0.620413197656491, "grad_norm": 0.20160548388957977, "learning_rate": 8.282102372513035e-05, "loss": 0.6951, "step": 3018 }, { "epoch": 0.6206187686298694, "grad_norm": 0.19742833077907562, "learning_rate": 8.281561061347268e-05, "loss": 0.6848, "step": 3019 }, { "epoch": 0.620824339603248, "grad_norm": 0.19700521230697632, "learning_rate": 8.281019563881663e-05, "loss": 0.6975, "step": 3020 }, { "epoch": 0.6210299105766266, "grad_norm": 0.20055337250232697, "learning_rate": 8.280477880142895e-05, "loss": 0.6769, "step": 3021 }, { "epoch": 0.6212354815500052, "grad_norm": 0.23085735738277435, "learning_rate": 8.279936010157653e-05, "loss": 0.67, "step": 3022 }, { "epoch": 0.6214410525233837, "grad_norm": 0.20529572665691376, "learning_rate": 8.279393953952632e-05, "loss": 0.6962, "step": 3023 }, { "epoch": 0.6216466234967623, "grad_norm": 0.19554628431797028, "learning_rate": 8.278851711554532e-05, "loss": 0.6853, "step": 3024 }, { "epoch": 0.6218521944701408, "grad_norm": 0.1940753012895584, "learning_rate": 8.278309282990073e-05, "loss": 0.6549, "step": 3025 }, { "epoch": 0.6220577654435194, "grad_norm": 0.19746670126914978, "learning_rate": 8.277766668285977e-05, "loss": 0.6544, "step": 3026 }, { "epoch": 0.6222633364168979, "grad_norm": 0.19035373628139496, "learning_rate": 8.277223867468971e-05, "loss": 0.6773, "step": 3027 }, { "epoch": 0.6224689073902765, "grad_norm": 0.19404295086860657, "learning_rate": 8.276680880565803e-05, "loss": 0.6931, "step": 3028 }, { "epoch": 0.6226744783636551, "grad_norm": 0.1988229602575302, "learning_rate": 8.276137707603219e-05, "loss": 0.6812, "step": 3029 }, { "epoch": 0.6228800493370336, "grad_norm": 0.19786033034324646, "learning_rate": 8.27559434860798e-05, "loss": 0.6733, "step": 3030 }, { "epoch": 0.6230856203104121, "grad_norm": 0.19254696369171143, "learning_rate": 8.275050803606853e-05, "loss": 0.7066, "step": 3031 }, { "epoch": 0.6232911912837907, "grad_norm": 0.19956709444522858, "learning_rate": 8.274507072626619e-05, "loss": 0.681, "step": 3032 }, { "epoch": 0.6234967622571693, "grad_norm": 0.19668106734752655, "learning_rate": 8.273963155694062e-05, "loss": 0.676, "step": 3033 }, { "epoch": 0.6237023332305478, "grad_norm": 0.21287435293197632, "learning_rate": 8.273419052835981e-05, "loss": 0.704, "step": 3034 }, { "epoch": 0.6239079042039264, "grad_norm": 2.4127197265625, "learning_rate": 8.27287476407918e-05, "loss": 0.7001, "step": 3035 }, { "epoch": 0.624113475177305, "grad_norm": 0.20844995975494385, "learning_rate": 8.272330289450473e-05, "loss": 0.6808, "step": 3036 }, { "epoch": 0.6243190461506836, "grad_norm": 0.19834044575691223, "learning_rate": 8.271785628976686e-05, "loss": 0.5957, "step": 3037 }, { "epoch": 0.624524617124062, "grad_norm": 0.25713658332824707, "learning_rate": 8.271240782684649e-05, "loss": 0.6067, "step": 3038 }, { "epoch": 0.6247301880974406, "grad_norm": 0.755788266658783, "learning_rate": 8.270695750601206e-05, "loss": 0.7165, "step": 3039 }, { "epoch": 0.6249357590708192, "grad_norm": 0.23070074617862701, "learning_rate": 8.270150532753208e-05, "loss": 0.7086, "step": 3040 }, { "epoch": 0.6251413300441978, "grad_norm": 0.20264309644699097, "learning_rate": 8.269605129167514e-05, "loss": 0.5804, "step": 3041 }, { "epoch": 0.6253469010175763, "grad_norm": 0.25147226452827454, "learning_rate": 8.269059539870996e-05, "loss": 0.6841, "step": 3042 }, { "epoch": 0.6255524719909549, "grad_norm": 0.23628079891204834, "learning_rate": 8.268513764890528e-05, "loss": 0.7055, "step": 3043 }, { "epoch": 0.6257580429643335, "grad_norm": 0.2399078607559204, "learning_rate": 8.267967804253003e-05, "loss": 0.7238, "step": 3044 }, { "epoch": 0.625963613937712, "grad_norm": 0.2208731472492218, "learning_rate": 8.267421657985316e-05, "loss": 0.6938, "step": 3045 }, { "epoch": 0.6261691849110905, "grad_norm": 0.21366935968399048, "learning_rate": 8.266875326114372e-05, "loss": 0.5907, "step": 3046 }, { "epoch": 0.6263747558844691, "grad_norm": 0.22604869306087494, "learning_rate": 8.266328808667086e-05, "loss": 0.6977, "step": 3047 }, { "epoch": 0.6265803268578477, "grad_norm": 0.20610669255256653, "learning_rate": 8.265782105670385e-05, "loss": 0.6953, "step": 3048 }, { "epoch": 0.6267858978312262, "grad_norm": 0.2094089388847351, "learning_rate": 8.2652352171512e-05, "loss": 0.7114, "step": 3049 }, { "epoch": 0.6269914688046048, "grad_norm": 0.20464326441287994, "learning_rate": 8.264688143136474e-05, "loss": 0.6828, "step": 3050 }, { "epoch": 0.6271970397779834, "grad_norm": 0.20458531379699707, "learning_rate": 8.26414088365316e-05, "loss": 0.7172, "step": 3051 }, { "epoch": 0.6274026107513619, "grad_norm": 0.20255166292190552, "learning_rate": 8.26359343872822e-05, "loss": 0.7034, "step": 3052 }, { "epoch": 0.6276081817247404, "grad_norm": 0.20339445769786835, "learning_rate": 8.26304580838862e-05, "loss": 0.7053, "step": 3053 }, { "epoch": 0.627813752698119, "grad_norm": 0.20055994391441345, "learning_rate": 8.262497992661342e-05, "loss": 0.6917, "step": 3054 }, { "epoch": 0.6280193236714976, "grad_norm": 0.17087921500205994, "learning_rate": 8.261949991573374e-05, "loss": 0.6037, "step": 3055 }, { "epoch": 0.6282248946448762, "grad_norm": 0.2011025846004486, "learning_rate": 8.261401805151711e-05, "loss": 0.6748, "step": 3056 }, { "epoch": 0.6284304656182547, "grad_norm": 0.21176697313785553, "learning_rate": 8.260853433423366e-05, "loss": 0.6784, "step": 3057 }, { "epoch": 0.6286360365916333, "grad_norm": 0.2133779078722, "learning_rate": 8.260304876415348e-05, "loss": 0.7074, "step": 3058 }, { "epoch": 0.6288416075650118, "grad_norm": 0.21225228905677795, "learning_rate": 8.259756134154685e-05, "loss": 0.7336, "step": 3059 }, { "epoch": 0.6290471785383904, "grad_norm": 0.16129277646541595, "learning_rate": 8.25920720666841e-05, "loss": 0.5877, "step": 3060 }, { "epoch": 0.6292527495117689, "grad_norm": 0.2276839166879654, "learning_rate": 8.258658093983566e-05, "loss": 0.6943, "step": 3061 }, { "epoch": 0.6294583204851475, "grad_norm": 0.20884232223033905, "learning_rate": 8.258108796127206e-05, "loss": 0.6802, "step": 3062 }, { "epoch": 0.6296638914585261, "grad_norm": 0.21469639241695404, "learning_rate": 8.257559313126391e-05, "loss": 0.7264, "step": 3063 }, { "epoch": 0.6298694624319047, "grad_norm": 0.20983977615833282, "learning_rate": 8.257009645008191e-05, "loss": 0.7146, "step": 3064 }, { "epoch": 0.6300750334052831, "grad_norm": 0.20303663611412048, "learning_rate": 8.256459791799687e-05, "loss": 0.6593, "step": 3065 }, { "epoch": 0.6302806043786617, "grad_norm": 0.20967082679271698, "learning_rate": 8.255909753527968e-05, "loss": 0.6983, "step": 3066 }, { "epoch": 0.6304861753520403, "grad_norm": 0.15247072279453278, "learning_rate": 8.255359530220127e-05, "loss": 0.6055, "step": 3067 }, { "epoch": 0.6306917463254188, "grad_norm": 0.2263472080230713, "learning_rate": 8.254809121903276e-05, "loss": 0.6934, "step": 3068 }, { "epoch": 0.6308973172987974, "grad_norm": 0.22391130030155182, "learning_rate": 8.25425852860453e-05, "loss": 0.6984, "step": 3069 }, { "epoch": 0.631102888272176, "grad_norm": 0.19726432859897614, "learning_rate": 8.253707750351013e-05, "loss": 0.6938, "step": 3070 }, { "epoch": 0.6313084592455546, "grad_norm": 0.2162100374698639, "learning_rate": 8.25315678716986e-05, "loss": 0.675, "step": 3071 }, { "epoch": 0.631514030218933, "grad_norm": 0.2201918661594391, "learning_rate": 8.252605639088215e-05, "loss": 0.6931, "step": 3072 }, { "epoch": 0.6317196011923116, "grad_norm": 0.20799918472766876, "learning_rate": 8.25205430613323e-05, "loss": 0.6911, "step": 3073 }, { "epoch": 0.6319251721656902, "grad_norm": 0.19582496583461761, "learning_rate": 8.251502788332066e-05, "loss": 0.6763, "step": 3074 }, { "epoch": 0.6321307431390688, "grad_norm": 0.2054242044687271, "learning_rate": 8.250951085711894e-05, "loss": 0.6907, "step": 3075 }, { "epoch": 0.6323363141124473, "grad_norm": 0.15331074595451355, "learning_rate": 8.250399198299894e-05, "loss": 0.5903, "step": 3076 }, { "epoch": 0.6325418850858259, "grad_norm": 0.22686253488063812, "learning_rate": 8.249847126123253e-05, "loss": 0.6944, "step": 3077 }, { "epoch": 0.6327474560592045, "grad_norm": 0.2104145586490631, "learning_rate": 8.249294869209172e-05, "loss": 0.678, "step": 3078 }, { "epoch": 0.632953027032583, "grad_norm": 0.14177118241786957, "learning_rate": 8.248742427584858e-05, "loss": 0.5831, "step": 3079 }, { "epoch": 0.6331585980059615, "grad_norm": 0.2042471021413803, "learning_rate": 8.248189801277526e-05, "loss": 0.6831, "step": 3080 }, { "epoch": 0.6333641689793401, "grad_norm": 0.13382332026958466, "learning_rate": 8.2476369903144e-05, "loss": 0.5932, "step": 3081 }, { "epoch": 0.6335697399527187, "grad_norm": 0.21314536035060883, "learning_rate": 8.247083994722717e-05, "loss": 0.7024, "step": 3082 }, { "epoch": 0.6337753109260973, "grad_norm": 0.2022118717432022, "learning_rate": 8.24653081452972e-05, "loss": 0.6778, "step": 3083 }, { "epoch": 0.6339808818994758, "grad_norm": 0.1986151486635208, "learning_rate": 8.24597744976266e-05, "loss": 0.6955, "step": 3084 }, { "epoch": 0.6341864528728544, "grad_norm": 0.1944025456905365, "learning_rate": 8.245423900448802e-05, "loss": 0.6761, "step": 3085 }, { "epoch": 0.6343920238462329, "grad_norm": 0.1960417479276657, "learning_rate": 8.244870166615411e-05, "loss": 0.6694, "step": 3086 }, { "epoch": 0.6345975948196114, "grad_norm": 0.19537580013275146, "learning_rate": 8.244316248289771e-05, "loss": 0.7057, "step": 3087 }, { "epoch": 0.63480316579299, "grad_norm": 0.25191953778266907, "learning_rate": 8.243762145499173e-05, "loss": 0.7093, "step": 3088 }, { "epoch": 0.6350087367663686, "grad_norm": 0.21354857087135315, "learning_rate": 8.24320785827091e-05, "loss": 0.6912, "step": 3089 }, { "epoch": 0.6352143077397472, "grad_norm": 0.2095470279455185, "learning_rate": 8.242653386632292e-05, "loss": 0.6966, "step": 3090 }, { "epoch": 0.6354198787131257, "grad_norm": 0.19135965406894684, "learning_rate": 8.242098730610636e-05, "loss": 0.6868, "step": 3091 }, { "epoch": 0.6356254496865043, "grad_norm": 0.19568754732608795, "learning_rate": 8.241543890233263e-05, "loss": 0.6741, "step": 3092 }, { "epoch": 0.6358310206598828, "grad_norm": 0.19776469469070435, "learning_rate": 8.240988865527513e-05, "loss": 0.7092, "step": 3093 }, { "epoch": 0.6360365916332614, "grad_norm": 0.18224585056304932, "learning_rate": 8.240433656520727e-05, "loss": 0.6031, "step": 3094 }, { "epoch": 0.6362421626066399, "grad_norm": 0.203841432929039, "learning_rate": 8.239878263240256e-05, "loss": 0.6995, "step": 3095 }, { "epoch": 0.6364477335800185, "grad_norm": 0.13863101601600647, "learning_rate": 8.239322685713465e-05, "loss": 0.5863, "step": 3096 }, { "epoch": 0.6366533045533971, "grad_norm": 0.21603704988956451, "learning_rate": 8.238766923967722e-05, "loss": 0.7092, "step": 3097 }, { "epoch": 0.6368588755267757, "grad_norm": 0.20999345183372498, "learning_rate": 8.238210978030407e-05, "loss": 0.6738, "step": 3098 }, { "epoch": 0.6370644465001541, "grad_norm": 0.1540490984916687, "learning_rate": 8.23765484792891e-05, "loss": 0.589, "step": 3099 }, { "epoch": 0.6372700174735327, "grad_norm": 0.21293634176254272, "learning_rate": 8.237098533690628e-05, "loss": 0.6747, "step": 3100 }, { "epoch": 0.6374755884469113, "grad_norm": 0.23176319897174835, "learning_rate": 8.236542035342969e-05, "loss": 0.679, "step": 3101 }, { "epoch": 0.6376811594202898, "grad_norm": 0.19695045053958893, "learning_rate": 8.235985352913348e-05, "loss": 0.6856, "step": 3102 }, { "epoch": 0.6378867303936684, "grad_norm": 0.19714051485061646, "learning_rate": 8.235428486429191e-05, "loss": 0.697, "step": 3103 }, { "epoch": 0.638092301367047, "grad_norm": 0.21369072794914246, "learning_rate": 8.23487143591793e-05, "loss": 0.6986, "step": 3104 }, { "epoch": 0.6382978723404256, "grad_norm": 0.19707739353179932, "learning_rate": 8.234314201407012e-05, "loss": 0.7098, "step": 3105 }, { "epoch": 0.638503443313804, "grad_norm": 0.1957058161497116, "learning_rate": 8.233756782923888e-05, "loss": 0.6754, "step": 3106 }, { "epoch": 0.6387090142871826, "grad_norm": 0.19346770644187927, "learning_rate": 8.233199180496019e-05, "loss": 0.6703, "step": 3107 }, { "epoch": 0.6389145852605612, "grad_norm": 0.2065419703722, "learning_rate": 8.232641394150873e-05, "loss": 0.6961, "step": 3108 }, { "epoch": 0.6391201562339398, "grad_norm": 0.20303097367286682, "learning_rate": 8.232083423915932e-05, "loss": 0.6764, "step": 3109 }, { "epoch": 0.6393257272073183, "grad_norm": 0.19711004197597504, "learning_rate": 8.231525269818688e-05, "loss": 0.6965, "step": 3110 }, { "epoch": 0.6395312981806969, "grad_norm": 0.19637802243232727, "learning_rate": 8.230966931886631e-05, "loss": 0.7109, "step": 3111 }, { "epoch": 0.6397368691540755, "grad_norm": 0.20301949977874756, "learning_rate": 8.230408410147274e-05, "loss": 0.6824, "step": 3112 }, { "epoch": 0.639942440127454, "grad_norm": 1.2079687118530273, "learning_rate": 8.229849704628131e-05, "loss": 0.6643, "step": 3113 }, { "epoch": 0.6401480111008325, "grad_norm": 0.17537331581115723, "learning_rate": 8.229290815356723e-05, "loss": 0.5969, "step": 3114 }, { "epoch": 0.6403535820742111, "grad_norm": 0.2206054925918579, "learning_rate": 8.22873174236059e-05, "loss": 0.6856, "step": 3115 }, { "epoch": 0.6405591530475897, "grad_norm": 0.20161283016204834, "learning_rate": 8.228172485667273e-05, "loss": 0.6803, "step": 3116 }, { "epoch": 0.6407647240209683, "grad_norm": 0.5840950012207031, "learning_rate": 8.227613045304321e-05, "loss": 0.688, "step": 3117 }, { "epoch": 0.6409702949943468, "grad_norm": 0.19631561636924744, "learning_rate": 8.227053421299297e-05, "loss": 0.5931, "step": 3118 }, { "epoch": 0.6411758659677254, "grad_norm": 0.23822426795959473, "learning_rate": 8.226493613679772e-05, "loss": 0.5962, "step": 3119 }, { "epoch": 0.6413814369411039, "grad_norm": 0.15889045596122742, "learning_rate": 8.225933622473322e-05, "loss": 0.5809, "step": 3120 }, { "epoch": 0.6415870079144824, "grad_norm": 0.24698416888713837, "learning_rate": 8.22537344770754e-05, "loss": 0.6965, "step": 3121 }, { "epoch": 0.641792578887861, "grad_norm": 0.2314760684967041, "learning_rate": 8.224813089410021e-05, "loss": 0.6989, "step": 3122 }, { "epoch": 0.6419981498612396, "grad_norm": 0.20642580091953278, "learning_rate": 8.22425254760837e-05, "loss": 0.7141, "step": 3123 }, { "epoch": 0.6422037208346182, "grad_norm": 0.209413081407547, "learning_rate": 8.223691822330203e-05, "loss": 0.7117, "step": 3124 }, { "epoch": 0.6424092918079967, "grad_norm": 0.21780717372894287, "learning_rate": 8.223130913603144e-05, "loss": 0.6902, "step": 3125 }, { "epoch": 0.6426148627813753, "grad_norm": 0.21011175215244293, "learning_rate": 8.222569821454826e-05, "loss": 0.6963, "step": 3126 }, { "epoch": 0.6428204337547538, "grad_norm": 0.2518548369407654, "learning_rate": 8.222008545912895e-05, "loss": 0.6005, "step": 3127 }, { "epoch": 0.6430260047281324, "grad_norm": 0.21928563714027405, "learning_rate": 8.221447087004996e-05, "loss": 0.6957, "step": 3128 }, { "epoch": 0.6432315757015109, "grad_norm": 0.21237944066524506, "learning_rate": 8.220885444758796e-05, "loss": 0.6559, "step": 3129 }, { "epoch": 0.6434371466748895, "grad_norm": 0.22411003708839417, "learning_rate": 8.220323619201958e-05, "loss": 0.7081, "step": 3130 }, { "epoch": 0.6436427176482681, "grad_norm": 0.19972927868366241, "learning_rate": 8.219761610362168e-05, "loss": 0.6792, "step": 3131 }, { "epoch": 0.6438482886216467, "grad_norm": 0.24267856776714325, "learning_rate": 8.219199418267107e-05, "loss": 0.7113, "step": 3132 }, { "epoch": 0.6440538595950251, "grad_norm": 0.20243190228939056, "learning_rate": 8.218637042944476e-05, "loss": 0.6826, "step": 3133 }, { "epoch": 0.6442594305684037, "grad_norm": 0.19848772883415222, "learning_rate": 8.218074484421978e-05, "loss": 0.6965, "step": 3134 }, { "epoch": 0.6444650015417823, "grad_norm": 0.20293201506137848, "learning_rate": 8.217511742727327e-05, "loss": 0.6646, "step": 3135 }, { "epoch": 0.6446705725151609, "grad_norm": 0.20322081446647644, "learning_rate": 8.21694881788825e-05, "loss": 0.699, "step": 3136 }, { "epoch": 0.6448761434885394, "grad_norm": 0.20811443030834198, "learning_rate": 8.216385709932476e-05, "loss": 0.6561, "step": 3137 }, { "epoch": 0.645081714461918, "grad_norm": 0.21710549294948578, "learning_rate": 8.21582241888775e-05, "loss": 0.6903, "step": 3138 }, { "epoch": 0.6452872854352966, "grad_norm": 0.2017020285129547, "learning_rate": 8.21525894478182e-05, "loss": 0.6837, "step": 3139 }, { "epoch": 0.645492856408675, "grad_norm": 0.21228978037834167, "learning_rate": 8.214695287642448e-05, "loss": 0.7046, "step": 3140 }, { "epoch": 0.6456984273820536, "grad_norm": 0.19248290359973907, "learning_rate": 8.214131447497401e-05, "loss": 0.6838, "step": 3141 }, { "epoch": 0.6459039983554322, "grad_norm": 0.20567071437835693, "learning_rate": 8.213567424374458e-05, "loss": 0.6728, "step": 3142 }, { "epoch": 0.6461095693288108, "grad_norm": 0.19881267845630646, "learning_rate": 8.213003218301404e-05, "loss": 0.6937, "step": 3143 }, { "epoch": 0.6463151403021893, "grad_norm": 0.20884251594543457, "learning_rate": 8.212438829306037e-05, "loss": 0.6889, "step": 3144 }, { "epoch": 0.6465207112755679, "grad_norm": 0.196677565574646, "learning_rate": 8.21187425741616e-05, "loss": 0.6586, "step": 3145 }, { "epoch": 0.6467262822489465, "grad_norm": 0.19286644458770752, "learning_rate": 8.211309502659588e-05, "loss": 0.6643, "step": 3146 }, { "epoch": 0.646931853222325, "grad_norm": 0.19453571736812592, "learning_rate": 8.210744565064142e-05, "loss": 0.6898, "step": 3147 }, { "epoch": 0.6471374241957035, "grad_norm": 0.22043997049331665, "learning_rate": 8.210179444657658e-05, "loss": 0.5958, "step": 3148 }, { "epoch": 0.6473429951690821, "grad_norm": 0.2146371752023697, "learning_rate": 8.209614141467972e-05, "loss": 0.7184, "step": 3149 }, { "epoch": 0.6475485661424607, "grad_norm": 0.2086339145898819, "learning_rate": 8.209048655522937e-05, "loss": 0.6878, "step": 3150 }, { "epoch": 0.6477541371158393, "grad_norm": 0.19689536094665527, "learning_rate": 8.20848298685041e-05, "loss": 0.6693, "step": 3151 }, { "epoch": 0.6479597080892178, "grad_norm": 0.19254978001117706, "learning_rate": 8.207917135478259e-05, "loss": 0.6931, "step": 3152 }, { "epoch": 0.6481652790625964, "grad_norm": 0.19382552802562714, "learning_rate": 8.207351101434363e-05, "loss": 0.6691, "step": 3153 }, { "epoch": 0.6483708500359749, "grad_norm": 0.20275139808654785, "learning_rate": 8.206784884746607e-05, "loss": 0.7085, "step": 3154 }, { "epoch": 0.6485764210093535, "grad_norm": 0.19114693999290466, "learning_rate": 8.206218485442883e-05, "loss": 0.6732, "step": 3155 }, { "epoch": 0.648781991982732, "grad_norm": 0.19770143926143646, "learning_rate": 8.2056519035511e-05, "loss": 0.6691, "step": 3156 }, { "epoch": 0.6489875629561106, "grad_norm": 0.2007279098033905, "learning_rate": 8.205085139099165e-05, "loss": 0.6647, "step": 3157 }, { "epoch": 0.6491931339294892, "grad_norm": 0.19302336871623993, "learning_rate": 8.204518192115004e-05, "loss": 0.663, "step": 3158 }, { "epoch": 0.6493987049028677, "grad_norm": 0.19728437066078186, "learning_rate": 8.203951062626546e-05, "loss": 0.674, "step": 3159 }, { "epoch": 0.6496042758762463, "grad_norm": 0.20836929976940155, "learning_rate": 8.203383750661731e-05, "loss": 0.6827, "step": 3160 }, { "epoch": 0.6498098468496248, "grad_norm": 0.226349338889122, "learning_rate": 8.202816256248509e-05, "loss": 0.579, "step": 3161 }, { "epoch": 0.6500154178230034, "grad_norm": 0.203635573387146, "learning_rate": 8.202248579414837e-05, "loss": 0.6959, "step": 3162 }, { "epoch": 0.6502209887963819, "grad_norm": 0.14256790280342102, "learning_rate": 8.201680720188682e-05, "loss": 0.589, "step": 3163 }, { "epoch": 0.6504265597697605, "grad_norm": 0.214716836810112, "learning_rate": 8.201112678598018e-05, "loss": 0.6951, "step": 3164 }, { "epoch": 0.6506321307431391, "grad_norm": 0.20737797021865845, "learning_rate": 8.200544454670834e-05, "loss": 0.6921, "step": 3165 }, { "epoch": 0.6508377017165177, "grad_norm": 0.2059832364320755, "learning_rate": 8.199976048435118e-05, "loss": 0.6845, "step": 3166 }, { "epoch": 0.6510432726898961, "grad_norm": 0.20531848073005676, "learning_rate": 8.199407459918877e-05, "loss": 0.696, "step": 3167 }, { "epoch": 0.6512488436632747, "grad_norm": 0.20587943494319916, "learning_rate": 8.19883868915012e-05, "loss": 0.6877, "step": 3168 }, { "epoch": 0.6514544146366533, "grad_norm": 0.19502076506614685, "learning_rate": 8.198269736156872e-05, "loss": 0.6735, "step": 3169 }, { "epoch": 0.6516599856100319, "grad_norm": 0.1964626908302307, "learning_rate": 8.197700600967158e-05, "loss": 0.6702, "step": 3170 }, { "epoch": 0.6518655565834104, "grad_norm": 0.19854065775871277, "learning_rate": 8.19713128360902e-05, "loss": 0.6639, "step": 3171 }, { "epoch": 0.652071127556789, "grad_norm": 0.2041742503643036, "learning_rate": 8.196561784110502e-05, "loss": 0.6813, "step": 3172 }, { "epoch": 0.6522766985301676, "grad_norm": 0.19994084537029266, "learning_rate": 8.195992102499663e-05, "loss": 0.668, "step": 3173 }, { "epoch": 0.6524822695035462, "grad_norm": 0.1984533816576004, "learning_rate": 8.195422238804569e-05, "loss": 0.6839, "step": 3174 }, { "epoch": 0.6526878404769246, "grad_norm": 0.2585853338241577, "learning_rate": 8.194852193053293e-05, "loss": 0.5857, "step": 3175 }, { "epoch": 0.6528934114503032, "grad_norm": 0.21707791090011597, "learning_rate": 8.194281965273919e-05, "loss": 0.7002, "step": 3176 }, { "epoch": 0.6530989824236818, "grad_norm": 0.21522431075572968, "learning_rate": 8.193711555494541e-05, "loss": 0.6681, "step": 3177 }, { "epoch": 0.6533045533970603, "grad_norm": 0.20251545310020447, "learning_rate": 8.193140963743258e-05, "loss": 0.7119, "step": 3178 }, { "epoch": 0.6535101243704389, "grad_norm": 0.20081111788749695, "learning_rate": 8.192570190048181e-05, "loss": 0.7013, "step": 3179 }, { "epoch": 0.6537156953438175, "grad_norm": 0.20084579288959503, "learning_rate": 8.19199923443743e-05, "loss": 0.6996, "step": 3180 }, { "epoch": 0.653921266317196, "grad_norm": 0.2081523984670639, "learning_rate": 8.191428096939134e-05, "loss": 0.6774, "step": 3181 }, { "epoch": 0.6541268372905745, "grad_norm": 0.19181185960769653, "learning_rate": 8.190856777581427e-05, "loss": 0.5909, "step": 3182 }, { "epoch": 0.6543324082639531, "grad_norm": 0.21452546119689941, "learning_rate": 8.190285276392461e-05, "loss": 0.6737, "step": 3183 }, { "epoch": 0.6545379792373317, "grad_norm": 0.20853358507156372, "learning_rate": 8.189713593400385e-05, "loss": 0.6823, "step": 3184 }, { "epoch": 0.6547435502107103, "grad_norm": 0.20873308181762695, "learning_rate": 8.189141728633367e-05, "loss": 0.7007, "step": 3185 }, { "epoch": 0.6549491211840888, "grad_norm": 0.19929181039333344, "learning_rate": 8.188569682119579e-05, "loss": 0.6567, "step": 3186 }, { "epoch": 0.6551546921574674, "grad_norm": 0.19836626946926117, "learning_rate": 8.187997453887202e-05, "loss": 0.6607, "step": 3187 }, { "epoch": 0.6553602631308459, "grad_norm": 0.18740180134773254, "learning_rate": 8.187425043964429e-05, "loss": 0.6858, "step": 3188 }, { "epoch": 0.6555658341042245, "grad_norm": 0.20412470400333405, "learning_rate": 8.18685245237946e-05, "loss": 0.6895, "step": 3189 }, { "epoch": 0.655771405077603, "grad_norm": 0.15742400288581848, "learning_rate": 8.186279679160502e-05, "loss": 0.5842, "step": 3190 }, { "epoch": 0.6559769760509816, "grad_norm": 0.20259132981300354, "learning_rate": 8.185706724335773e-05, "loss": 0.6967, "step": 3191 }, { "epoch": 0.6561825470243602, "grad_norm": 1.9348865747451782, "learning_rate": 8.185133587933502e-05, "loss": 0.7117, "step": 3192 }, { "epoch": 0.6563881179977388, "grad_norm": 0.2033887505531311, "learning_rate": 8.184560269981922e-05, "loss": 0.6728, "step": 3193 }, { "epoch": 0.6565936889711173, "grad_norm": 0.15772481262683868, "learning_rate": 8.183986770509281e-05, "loss": 0.5949, "step": 3194 }, { "epoch": 0.6567992599444958, "grad_norm": 0.21117869019508362, "learning_rate": 8.18341308954383e-05, "loss": 0.7154, "step": 3195 }, { "epoch": 0.6570048309178744, "grad_norm": 0.21583619713783264, "learning_rate": 8.182839227113833e-05, "loss": 0.7056, "step": 3196 }, { "epoch": 0.6572104018912529, "grad_norm": 0.21002855896949768, "learning_rate": 8.18226518324756e-05, "loss": 0.7106, "step": 3197 }, { "epoch": 0.6574159728646315, "grad_norm": 0.20425178110599518, "learning_rate": 8.181690957973292e-05, "loss": 0.6785, "step": 3198 }, { "epoch": 0.6576215438380101, "grad_norm": 0.2083713412284851, "learning_rate": 8.181116551319319e-05, "loss": 0.707, "step": 3199 }, { "epoch": 0.6578271148113887, "grad_norm": 0.1998489499092102, "learning_rate": 8.180541963313939e-05, "loss": 0.6886, "step": 3200 }, { "epoch": 0.6580326857847671, "grad_norm": 0.20870743691921234, "learning_rate": 8.17996719398546e-05, "loss": 0.6931, "step": 3201 }, { "epoch": 0.6582382567581457, "grad_norm": 0.20594879984855652, "learning_rate": 8.179392243362195e-05, "loss": 0.6897, "step": 3202 }, { "epoch": 0.6584438277315243, "grad_norm": 0.19401825964450836, "learning_rate": 8.178817111472474e-05, "loss": 0.6719, "step": 3203 }, { "epoch": 0.6586493987049029, "grad_norm": 0.20549017190933228, "learning_rate": 8.178241798344627e-05, "loss": 0.666, "step": 3204 }, { "epoch": 0.6588549696782814, "grad_norm": 0.1869438886642456, "learning_rate": 8.177666304007e-05, "loss": 0.6728, "step": 3205 }, { "epoch": 0.65906054065166, "grad_norm": 0.19876159727573395, "learning_rate": 8.177090628487943e-05, "loss": 0.6646, "step": 3206 }, { "epoch": 0.6592661116250386, "grad_norm": 0.1998775601387024, "learning_rate": 8.176514771815818e-05, "loss": 0.7035, "step": 3207 }, { "epoch": 0.6594716825984172, "grad_norm": 0.19949300587177277, "learning_rate": 8.175938734018994e-05, "loss": 0.7035, "step": 3208 }, { "epoch": 0.6596772535717956, "grad_norm": 0.1943056583404541, "learning_rate": 8.175362515125849e-05, "loss": 0.702, "step": 3209 }, { "epoch": 0.6598828245451742, "grad_norm": 0.20226384699344635, "learning_rate": 8.174786115164773e-05, "loss": 0.6887, "step": 3210 }, { "epoch": 0.6600883955185528, "grad_norm": 0.19821226596832275, "learning_rate": 8.174209534164161e-05, "loss": 0.7097, "step": 3211 }, { "epoch": 0.6602939664919314, "grad_norm": 0.19110795855522156, "learning_rate": 8.173632772152416e-05, "loss": 0.6737, "step": 3212 }, { "epoch": 0.6604995374653099, "grad_norm": 0.19855926930904388, "learning_rate": 8.173055829157957e-05, "loss": 0.6818, "step": 3213 }, { "epoch": 0.6607051084386885, "grad_norm": 0.19995853304862976, "learning_rate": 8.172478705209204e-05, "loss": 0.6811, "step": 3214 }, { "epoch": 0.660910679412067, "grad_norm": 0.22749421000480652, "learning_rate": 8.171901400334591e-05, "loss": 0.6004, "step": 3215 }, { "epoch": 0.6611162503854455, "grad_norm": 0.2062731236219406, "learning_rate": 8.171323914562559e-05, "loss": 0.7145, "step": 3216 }, { "epoch": 0.6613218213588241, "grad_norm": 0.20264078676700592, "learning_rate": 8.170746247921555e-05, "loss": 0.6664, "step": 3217 }, { "epoch": 0.6615273923322027, "grad_norm": 0.20601505041122437, "learning_rate": 8.170168400440044e-05, "loss": 0.6727, "step": 3218 }, { "epoch": 0.6617329633055813, "grad_norm": 0.22924602031707764, "learning_rate": 8.169590372146487e-05, "loss": 0.6836, "step": 3219 }, { "epoch": 0.6619385342789598, "grad_norm": 0.19378581643104553, "learning_rate": 8.169012163069366e-05, "loss": 0.6851, "step": 3220 }, { "epoch": 0.6621441052523384, "grad_norm": 0.20838582515716553, "learning_rate": 8.168433773237164e-05, "loss": 0.6856, "step": 3221 }, { "epoch": 0.6623496762257169, "grad_norm": 0.21452072262763977, "learning_rate": 8.167855202678377e-05, "loss": 0.7068, "step": 3222 }, { "epoch": 0.6625552471990955, "grad_norm": 0.2000737488269806, "learning_rate": 8.167276451421506e-05, "loss": 0.6874, "step": 3223 }, { "epoch": 0.662760818172474, "grad_norm": 0.23498542606830597, "learning_rate": 8.166697519495066e-05, "loss": 0.5939, "step": 3224 }, { "epoch": 0.6629663891458526, "grad_norm": 0.2128230184316635, "learning_rate": 8.166118406927578e-05, "loss": 0.7094, "step": 3225 }, { "epoch": 0.6631719601192312, "grad_norm": 0.1330750733613968, "learning_rate": 8.16553911374757e-05, "loss": 0.6022, "step": 3226 }, { "epoch": 0.6633775310926098, "grad_norm": 0.21321649849414825, "learning_rate": 8.164959639983583e-05, "loss": 0.6905, "step": 3227 }, { "epoch": 0.6635831020659883, "grad_norm": 0.2014767974615097, "learning_rate": 8.164379985664166e-05, "loss": 0.685, "step": 3228 }, { "epoch": 0.6637886730393668, "grad_norm": 0.17292124032974243, "learning_rate": 8.163800150817872e-05, "loss": 0.5932, "step": 3229 }, { "epoch": 0.6639942440127454, "grad_norm": 0.20624692738056183, "learning_rate": 8.163220135473271e-05, "loss": 0.6831, "step": 3230 }, { "epoch": 0.6641998149861239, "grad_norm": 0.2030026912689209, "learning_rate": 8.162639939658935e-05, "loss": 0.7166, "step": 3231 }, { "epoch": 0.6644053859595025, "grad_norm": 0.19677379727363586, "learning_rate": 8.162059563403448e-05, "loss": 0.6646, "step": 3232 }, { "epoch": 0.6646109569328811, "grad_norm": 0.1929975152015686, "learning_rate": 8.161479006735404e-05, "loss": 0.671, "step": 3233 }, { "epoch": 0.6648165279062597, "grad_norm": 0.196861132979393, "learning_rate": 8.1608982696834e-05, "loss": 0.6899, "step": 3234 }, { "epoch": 0.6650220988796381, "grad_norm": 0.19990988075733185, "learning_rate": 8.160317352276053e-05, "loss": 0.6889, "step": 3235 }, { "epoch": 0.6652276698530167, "grad_norm": 0.1800822615623474, "learning_rate": 8.159736254541976e-05, "loss": 0.6149, "step": 3236 }, { "epoch": 0.6654332408263953, "grad_norm": 0.1930818259716034, "learning_rate": 8.159154976509801e-05, "loss": 0.6756, "step": 3237 }, { "epoch": 0.6656388117997739, "grad_norm": 0.18298830091953278, "learning_rate": 8.158573518208162e-05, "loss": 0.5984, "step": 3238 }, { "epoch": 0.6658443827731524, "grad_norm": 0.19836896657943726, "learning_rate": 8.157991879665706e-05, "loss": 0.6869, "step": 3239 }, { "epoch": 0.666049953746531, "grad_norm": 0.20596401393413544, "learning_rate": 8.157410060911087e-05, "loss": 0.6882, "step": 3240 }, { "epoch": 0.6662555247199096, "grad_norm": 0.1683359146118164, "learning_rate": 8.15682806197297e-05, "loss": 0.5799, "step": 3241 }, { "epoch": 0.6664610956932882, "grad_norm": 0.19776779413223267, "learning_rate": 8.156245882880026e-05, "loss": 0.6528, "step": 3242 }, { "epoch": 0.6666666666666666, "grad_norm": 0.1920391172170639, "learning_rate": 8.155663523660936e-05, "loss": 0.6982, "step": 3243 }, { "epoch": 0.6668722376400452, "grad_norm": 0.1352914422750473, "learning_rate": 8.155080984344391e-05, "loss": 0.5837, "step": 3244 }, { "epoch": 0.6670778086134238, "grad_norm": 0.2184402048587799, "learning_rate": 8.15449826495909e-05, "loss": 0.6784, "step": 3245 }, { "epoch": 0.6672833795868024, "grad_norm": 0.19601434469223022, "learning_rate": 8.15391536553374e-05, "loss": 0.6778, "step": 3246 }, { "epoch": 0.6674889505601809, "grad_norm": 0.19717663526535034, "learning_rate": 8.15333228609706e-05, "loss": 0.7024, "step": 3247 }, { "epoch": 0.6676945215335595, "grad_norm": 0.19221165776252747, "learning_rate": 8.152749026677773e-05, "loss": 0.6951, "step": 3248 }, { "epoch": 0.667900092506938, "grad_norm": 0.15361624956130981, "learning_rate": 8.152165587304613e-05, "loss": 0.5739, "step": 3249 }, { "epoch": 0.6681056634803165, "grad_norm": 0.13391469419002533, "learning_rate": 8.151581968006325e-05, "loss": 0.5979, "step": 3250 }, { "epoch": 0.6683112344536951, "grad_norm": 0.21153193712234497, "learning_rate": 8.150998168811663e-05, "loss": 0.6651, "step": 3251 }, { "epoch": 0.6685168054270737, "grad_norm": 0.13939164578914642, "learning_rate": 8.150414189749385e-05, "loss": 0.5664, "step": 3252 }, { "epoch": 0.6687223764004523, "grad_norm": 0.21254399418830872, "learning_rate": 8.149830030848261e-05, "loss": 0.6856, "step": 3253 }, { "epoch": 0.6689279473738308, "grad_norm": 0.19342190027236938, "learning_rate": 8.14924569213707e-05, "loss": 0.6828, "step": 3254 }, { "epoch": 0.6691335183472094, "grad_norm": 0.19527758657932281, "learning_rate": 8.148661173644602e-05, "loss": 0.7009, "step": 3255 }, { "epoch": 0.6693390893205879, "grad_norm": 0.1978977620601654, "learning_rate": 8.148076475399651e-05, "loss": 0.7137, "step": 3256 }, { "epoch": 0.6695446602939665, "grad_norm": 0.20413827896118164, "learning_rate": 8.147491597431025e-05, "loss": 0.672, "step": 3257 }, { "epoch": 0.669750231267345, "grad_norm": 0.19834209978580475, "learning_rate": 8.146906539767534e-05, "loss": 0.6726, "step": 3258 }, { "epoch": 0.6699558022407236, "grad_norm": 0.1580744832754135, "learning_rate": 8.146321302438004e-05, "loss": 0.5621, "step": 3259 }, { "epoch": 0.6701613732141022, "grad_norm": 0.20448711514472961, "learning_rate": 8.145735885471266e-05, "loss": 0.6633, "step": 3260 }, { "epoch": 0.6703669441874808, "grad_norm": 0.12794892489910126, "learning_rate": 8.145150288896161e-05, "loss": 0.5989, "step": 3261 }, { "epoch": 0.6705725151608593, "grad_norm": 0.20495088398456573, "learning_rate": 8.144564512741539e-05, "loss": 0.6778, "step": 3262 }, { "epoch": 0.6707780861342378, "grad_norm": 0.13609834015369415, "learning_rate": 8.143978557036259e-05, "loss": 0.5879, "step": 3263 }, { "epoch": 0.6709836571076164, "grad_norm": 0.19716021418571472, "learning_rate": 8.143392421809186e-05, "loss": 0.6998, "step": 3264 }, { "epoch": 0.671189228080995, "grad_norm": 0.19806286692619324, "learning_rate": 8.142806107089198e-05, "loss": 0.6884, "step": 3265 }, { "epoch": 0.6713947990543735, "grad_norm": 0.14359678328037262, "learning_rate": 8.14221961290518e-05, "loss": 0.5788, "step": 3266 }, { "epoch": 0.6716003700277521, "grad_norm": 0.19541367888450623, "learning_rate": 8.141632939286026e-05, "loss": 0.704, "step": 3267 }, { "epoch": 0.6718059410011307, "grad_norm": 0.19442065060138702, "learning_rate": 8.141046086260636e-05, "loss": 0.6666, "step": 3268 }, { "epoch": 0.6720115119745091, "grad_norm": 0.1996643990278244, "learning_rate": 8.140459053857924e-05, "loss": 0.6888, "step": 3269 }, { "epoch": 0.6722170829478877, "grad_norm": 0.19437336921691895, "learning_rate": 8.13987184210681e-05, "loss": 0.7176, "step": 3270 }, { "epoch": 0.6724226539212663, "grad_norm": 0.14562220871448517, "learning_rate": 8.139284451036223e-05, "loss": 0.5886, "step": 3271 }, { "epoch": 0.6726282248946449, "grad_norm": 0.2078685313463211, "learning_rate": 8.138696880675102e-05, "loss": 0.6867, "step": 3272 }, { "epoch": 0.6728337958680234, "grad_norm": 0.20113688707351685, "learning_rate": 8.138109131052393e-05, "loss": 0.7112, "step": 3273 }, { "epoch": 0.673039366841402, "grad_norm": 0.19516409933567047, "learning_rate": 8.137521202197052e-05, "loss": 0.6735, "step": 3274 }, { "epoch": 0.6732449378147806, "grad_norm": 0.18511922657489777, "learning_rate": 8.136933094138042e-05, "loss": 0.6696, "step": 3275 }, { "epoch": 0.6734505087881592, "grad_norm": 0.18774795532226562, "learning_rate": 8.136344806904336e-05, "loss": 0.6739, "step": 3276 }, { "epoch": 0.6736560797615376, "grad_norm": 0.19817449152469635, "learning_rate": 8.135756340524919e-05, "loss": 0.6896, "step": 3277 }, { "epoch": 0.6738616507349162, "grad_norm": 0.19579534232616425, "learning_rate": 8.135167695028782e-05, "loss": 0.6669, "step": 3278 }, { "epoch": 0.6740672217082948, "grad_norm": 0.1967802196741104, "learning_rate": 8.13457887044492e-05, "loss": 0.6763, "step": 3279 }, { "epoch": 0.6742727926816734, "grad_norm": 0.1518080234527588, "learning_rate": 8.133989866802349e-05, "loss": 0.5755, "step": 3280 }, { "epoch": 0.6744783636550519, "grad_norm": 0.1956729292869568, "learning_rate": 8.13340068413008e-05, "loss": 0.6695, "step": 3281 }, { "epoch": 0.6746839346284305, "grad_norm": 0.20296379923820496, "learning_rate": 8.132811322457142e-05, "loss": 0.678, "step": 3282 }, { "epoch": 0.674889505601809, "grad_norm": 0.19922013580799103, "learning_rate": 8.132221781812571e-05, "loss": 0.6898, "step": 3283 }, { "epoch": 0.6750950765751876, "grad_norm": 0.1867515742778778, "learning_rate": 8.13163206222541e-05, "loss": 0.6911, "step": 3284 }, { "epoch": 0.6753006475485661, "grad_norm": 0.20013710856437683, "learning_rate": 8.13104216372471e-05, "loss": 0.6878, "step": 3285 }, { "epoch": 0.6755062185219447, "grad_norm": 0.19711051881313324, "learning_rate": 8.130452086339535e-05, "loss": 0.6755, "step": 3286 }, { "epoch": 0.6757117894953233, "grad_norm": 0.22560589015483856, "learning_rate": 8.129861830098953e-05, "loss": 0.6961, "step": 3287 }, { "epoch": 0.6759173604687018, "grad_norm": 0.1926925927400589, "learning_rate": 8.129271395032046e-05, "loss": 0.6887, "step": 3288 }, { "epoch": 0.6761229314420804, "grad_norm": 0.19523480534553528, "learning_rate": 8.1286807811679e-05, "loss": 0.7129, "step": 3289 }, { "epoch": 0.6763285024154589, "grad_norm": 0.19967713952064514, "learning_rate": 8.128089988535613e-05, "loss": 0.6985, "step": 3290 }, { "epoch": 0.6765340733888375, "grad_norm": 0.1905701607465744, "learning_rate": 8.127499017164289e-05, "loss": 0.6839, "step": 3291 }, { "epoch": 0.676739644362216, "grad_norm": 0.1880829632282257, "learning_rate": 8.126907867083043e-05, "loss": 0.6795, "step": 3292 }, { "epoch": 0.6769452153355946, "grad_norm": 0.19849906861782074, "learning_rate": 8.126316538320999e-05, "loss": 0.7022, "step": 3293 }, { "epoch": 0.6771507863089732, "grad_norm": 0.19704832136631012, "learning_rate": 8.125725030907289e-05, "loss": 0.6762, "step": 3294 }, { "epoch": 0.6773563572823518, "grad_norm": 0.20323243737220764, "learning_rate": 8.125133344871052e-05, "loss": 0.7123, "step": 3295 }, { "epoch": 0.6775619282557303, "grad_norm": 0.16344204545021057, "learning_rate": 8.124541480241441e-05, "loss": 0.5788, "step": 3296 }, { "epoch": 0.6777674992291088, "grad_norm": 0.212424173951149, "learning_rate": 8.123949437047611e-05, "loss": 0.6874, "step": 3297 }, { "epoch": 0.6779730702024874, "grad_norm": 0.2008782923221588, "learning_rate": 8.123357215318731e-05, "loss": 0.67, "step": 3298 }, { "epoch": 0.678178641175866, "grad_norm": 0.20118223130702972, "learning_rate": 8.122764815083976e-05, "loss": 0.6802, "step": 3299 }, { "epoch": 0.6783842121492445, "grad_norm": 0.1353181004524231, "learning_rate": 8.122172236372533e-05, "loss": 0.6006, "step": 3300 }, { "epoch": 0.6785897831226231, "grad_norm": 0.19989068806171417, "learning_rate": 8.121579479213591e-05, "loss": 0.6934, "step": 3301 }, { "epoch": 0.6787953540960017, "grad_norm": 0.20248281955718994, "learning_rate": 8.120986543636357e-05, "loss": 0.6721, "step": 3302 }, { "epoch": 0.6790009250693803, "grad_norm": 0.19119137525558472, "learning_rate": 8.12039342967004e-05, "loss": 0.6735, "step": 3303 }, { "epoch": 0.6792064960427587, "grad_norm": 0.19932256639003754, "learning_rate": 8.119800137343861e-05, "loss": 0.6672, "step": 3304 }, { "epoch": 0.6794120670161373, "grad_norm": 0.19938862323760986, "learning_rate": 8.119206666687047e-05, "loss": 0.681, "step": 3305 }, { "epoch": 0.6796176379895159, "grad_norm": 0.20113952457904816, "learning_rate": 8.118613017728839e-05, "loss": 0.6699, "step": 3306 }, { "epoch": 0.6798232089628944, "grad_norm": 0.19112683832645416, "learning_rate": 8.118019190498477e-05, "loss": 0.7142, "step": 3307 }, { "epoch": 0.680028779936273, "grad_norm": 0.19518610835075378, "learning_rate": 8.117425185025225e-05, "loss": 0.6599, "step": 3308 }, { "epoch": 0.6802343509096516, "grad_norm": 0.20748484134674072, "learning_rate": 8.116831001338338e-05, "loss": 0.6737, "step": 3309 }, { "epoch": 0.6804399218830302, "grad_norm": 0.19534945487976074, "learning_rate": 8.116236639467094e-05, "loss": 0.6724, "step": 3310 }, { "epoch": 0.6806454928564086, "grad_norm": 0.1551889032125473, "learning_rate": 8.115642099440773e-05, "loss": 0.5907, "step": 3311 }, { "epoch": 0.6808510638297872, "grad_norm": 0.223983034491539, "learning_rate": 8.115047381288667e-05, "loss": 0.6984, "step": 3312 }, { "epoch": 0.6810566348031658, "grad_norm": 0.2107374668121338, "learning_rate": 8.11445248504007e-05, "loss": 0.6801, "step": 3313 }, { "epoch": 0.6812622057765444, "grad_norm": 0.2035159170627594, "learning_rate": 8.113857410724294e-05, "loss": 0.6509, "step": 3314 }, { "epoch": 0.6814677767499229, "grad_norm": 0.1422436386346817, "learning_rate": 8.113262158370655e-05, "loss": 0.6071, "step": 3315 }, { "epoch": 0.6816733477233015, "grad_norm": 0.20899644494056702, "learning_rate": 8.11266672800848e-05, "loss": 0.6571, "step": 3316 }, { "epoch": 0.68187891869668, "grad_norm": 0.19945669174194336, "learning_rate": 8.112071119667098e-05, "loss": 0.7201, "step": 3317 }, { "epoch": 0.6820844896700586, "grad_norm": 0.21106722950935364, "learning_rate": 8.111475333375854e-05, "loss": 0.6759, "step": 3318 }, { "epoch": 0.6822900606434371, "grad_norm": 0.2076927125453949, "learning_rate": 8.110879369164101e-05, "loss": 0.6832, "step": 3319 }, { "epoch": 0.6824956316168157, "grad_norm": 0.20357108116149902, "learning_rate": 8.1102832270612e-05, "loss": 0.6636, "step": 3320 }, { "epoch": 0.6827012025901943, "grad_norm": 0.1578240841627121, "learning_rate": 8.109686907096517e-05, "loss": 0.6158, "step": 3321 }, { "epoch": 0.6829067735635729, "grad_norm": 0.20219643414020538, "learning_rate": 8.109090409299434e-05, "loss": 0.6839, "step": 3322 }, { "epoch": 0.6831123445369514, "grad_norm": 0.2029838114976883, "learning_rate": 8.108493733699335e-05, "loss": 0.6963, "step": 3323 }, { "epoch": 0.6833179155103299, "grad_norm": 0.19904999434947968, "learning_rate": 8.107896880325615e-05, "loss": 0.6648, "step": 3324 }, { "epoch": 0.6835234864837085, "grad_norm": 0.2000379115343094, "learning_rate": 8.10729984920768e-05, "loss": 0.6706, "step": 3325 }, { "epoch": 0.683729057457087, "grad_norm": 0.19663308560848236, "learning_rate": 8.106702640374939e-05, "loss": 0.6798, "step": 3326 }, { "epoch": 0.6839346284304656, "grad_norm": 0.2028771936893463, "learning_rate": 8.10610525385682e-05, "loss": 0.6919, "step": 3327 }, { "epoch": 0.6841401994038442, "grad_norm": 0.19258631765842438, "learning_rate": 8.105507689682748e-05, "loss": 0.653, "step": 3328 }, { "epoch": 0.6843457703772228, "grad_norm": 0.14250509440898895, "learning_rate": 8.104909947882165e-05, "loss": 0.5786, "step": 3329 }, { "epoch": 0.6845513413506013, "grad_norm": 0.2034870833158493, "learning_rate": 8.104312028484517e-05, "loss": 0.6705, "step": 3330 }, { "epoch": 0.6847569123239798, "grad_norm": 0.19610241055488586, "learning_rate": 8.103713931519263e-05, "loss": 0.7, "step": 3331 }, { "epoch": 0.6849624832973584, "grad_norm": 0.14964817464351654, "learning_rate": 8.103115657015868e-05, "loss": 0.5914, "step": 3332 }, { "epoch": 0.685168054270737, "grad_norm": 0.20991382002830505, "learning_rate": 8.102517205003804e-05, "loss": 0.6841, "step": 3333 }, { "epoch": 0.6853736252441155, "grad_norm": 0.20073123276233673, "learning_rate": 8.101918575512556e-05, "loss": 0.6919, "step": 3334 }, { "epoch": 0.6855791962174941, "grad_norm": 0.21147504448890686, "learning_rate": 8.101319768571616e-05, "loss": 0.6585, "step": 3335 }, { "epoch": 0.6857847671908727, "grad_norm": 0.20476599037647247, "learning_rate": 8.100720784210482e-05, "loss": 0.7009, "step": 3336 }, { "epoch": 0.6859903381642513, "grad_norm": 0.20010556280612946, "learning_rate": 8.100121622458666e-05, "loss": 0.6734, "step": 3337 }, { "epoch": 0.6861959091376297, "grad_norm": 0.1875293105840683, "learning_rate": 8.099522283345683e-05, "loss": 0.6779, "step": 3338 }, { "epoch": 0.6864014801110083, "grad_norm": 0.20071950554847717, "learning_rate": 8.098922766901063e-05, "loss": 0.6709, "step": 3339 }, { "epoch": 0.6866070510843869, "grad_norm": 0.19928574562072754, "learning_rate": 8.098323073154338e-05, "loss": 0.7085, "step": 3340 }, { "epoch": 0.6868126220577655, "grad_norm": 0.19401361048221588, "learning_rate": 8.097723202135054e-05, "loss": 0.6872, "step": 3341 }, { "epoch": 0.687018193031144, "grad_norm": 0.19485783576965332, "learning_rate": 8.097123153872765e-05, "loss": 0.6864, "step": 3342 }, { "epoch": 0.6872237640045226, "grad_norm": 0.1916022002696991, "learning_rate": 8.09652292839703e-05, "loss": 0.7022, "step": 3343 }, { "epoch": 0.6874293349779012, "grad_norm": 0.1911773532629013, "learning_rate": 8.09592252573742e-05, "loss": 0.708, "step": 3344 }, { "epoch": 0.6876349059512796, "grad_norm": 0.19738483428955078, "learning_rate": 8.095321945923515e-05, "loss": 0.7014, "step": 3345 }, { "epoch": 0.6878404769246582, "grad_norm": 0.16668002307415009, "learning_rate": 8.094721188984903e-05, "loss": 0.6045, "step": 3346 }, { "epoch": 0.6880460478980368, "grad_norm": 0.20171229541301727, "learning_rate": 8.094120254951179e-05, "loss": 0.6919, "step": 3347 }, { "epoch": 0.6882516188714154, "grad_norm": 0.19809181988239288, "learning_rate": 8.093519143851949e-05, "loss": 0.6767, "step": 3348 }, { "epoch": 0.6884571898447939, "grad_norm": 0.19745509326457977, "learning_rate": 8.092917855716826e-05, "loss": 0.6738, "step": 3349 }, { "epoch": 0.6886627608181725, "grad_norm": 0.19986550509929657, "learning_rate": 8.092316390575435e-05, "loss": 0.7112, "step": 3350 }, { "epoch": 0.688868331791551, "grad_norm": 0.19324201345443726, "learning_rate": 8.091714748457404e-05, "loss": 0.6906, "step": 3351 }, { "epoch": 0.6890739027649296, "grad_norm": 0.20095904171466827, "learning_rate": 8.091112929392376e-05, "loss": 0.6486, "step": 3352 }, { "epoch": 0.6892794737383081, "grad_norm": 0.1877359300851822, "learning_rate": 8.09051093341e-05, "loss": 0.6844, "step": 3353 }, { "epoch": 0.6894850447116867, "grad_norm": 0.19812311232089996, "learning_rate": 8.08990876053993e-05, "loss": 0.6795, "step": 3354 }, { "epoch": 0.6896906156850653, "grad_norm": 0.19134752452373505, "learning_rate": 8.089306410811836e-05, "loss": 0.703, "step": 3355 }, { "epoch": 0.6898961866584439, "grad_norm": 0.1890835165977478, "learning_rate": 8.088703884255393e-05, "loss": 0.6585, "step": 3356 }, { "epoch": 0.6901017576318224, "grad_norm": 0.18926945328712463, "learning_rate": 8.088101180900282e-05, "loss": 0.6694, "step": 3357 }, { "epoch": 0.6903073286052009, "grad_norm": 0.18181371688842773, "learning_rate": 8.087498300776194e-05, "loss": 0.5831, "step": 3358 }, { "epoch": 0.6905128995785795, "grad_norm": 0.1939140260219574, "learning_rate": 8.086895243912835e-05, "loss": 0.6658, "step": 3359 }, { "epoch": 0.690718470551958, "grad_norm": 0.13031508028507233, "learning_rate": 8.086292010339912e-05, "loss": 0.6073, "step": 3360 }, { "epoch": 0.6909240415253366, "grad_norm": 0.1984340101480484, "learning_rate": 8.085688600087144e-05, "loss": 0.6565, "step": 3361 }, { "epoch": 0.6911296124987152, "grad_norm": 0.20224301517009735, "learning_rate": 8.08508501318426e-05, "loss": 0.7191, "step": 3362 }, { "epoch": 0.6913351834720938, "grad_norm": 0.18884535133838654, "learning_rate": 8.084481249660991e-05, "loss": 0.7012, "step": 3363 }, { "epoch": 0.6915407544454722, "grad_norm": 0.1905461698770523, "learning_rate": 8.083877309547086e-05, "loss": 0.6861, "step": 3364 }, { "epoch": 0.6917463254188508, "grad_norm": 0.19112585484981537, "learning_rate": 8.083273192872297e-05, "loss": 0.6698, "step": 3365 }, { "epoch": 0.6919518963922294, "grad_norm": 0.19276300072669983, "learning_rate": 8.082668899666386e-05, "loss": 0.6939, "step": 3366 }, { "epoch": 0.692157467365608, "grad_norm": 0.1849944144487381, "learning_rate": 8.082064429959123e-05, "loss": 0.6653, "step": 3367 }, { "epoch": 0.6923630383389865, "grad_norm": 0.197621151804924, "learning_rate": 8.081459783780288e-05, "loss": 0.69, "step": 3368 }, { "epoch": 0.6925686093123651, "grad_norm": 0.20411409437656403, "learning_rate": 8.08085496115967e-05, "loss": 0.6928, "step": 3369 }, { "epoch": 0.6927741802857437, "grad_norm": 0.19879065454006195, "learning_rate": 8.080249962127064e-05, "loss": 0.6855, "step": 3370 }, { "epoch": 0.6929797512591223, "grad_norm": 0.19563095271587372, "learning_rate": 8.079644786712277e-05, "loss": 0.6692, "step": 3371 }, { "epoch": 0.6931853222325007, "grad_norm": 0.1997094601392746, "learning_rate": 8.079039434945124e-05, "loss": 0.6851, "step": 3372 }, { "epoch": 0.6933908932058793, "grad_norm": 0.19280613958835602, "learning_rate": 8.078433906855424e-05, "loss": 0.6731, "step": 3373 }, { "epoch": 0.6935964641792579, "grad_norm": 0.18386954069137573, "learning_rate": 8.077828202473013e-05, "loss": 0.6934, "step": 3374 }, { "epoch": 0.6938020351526365, "grad_norm": 0.20323842763900757, "learning_rate": 8.077222321827727e-05, "loss": 0.6856, "step": 3375 }, { "epoch": 0.694007606126015, "grad_norm": 0.1947094351053238, "learning_rate": 8.076616264949418e-05, "loss": 0.6884, "step": 3376 }, { "epoch": 0.6942131770993936, "grad_norm": 0.19289527833461761, "learning_rate": 8.076010031867944e-05, "loss": 0.589, "step": 3377 }, { "epoch": 0.6944187480727722, "grad_norm": 0.19861692190170288, "learning_rate": 8.075403622613168e-05, "loss": 0.7024, "step": 3378 }, { "epoch": 0.6946243190461506, "grad_norm": 0.21449032425880432, "learning_rate": 8.074797037214968e-05, "loss": 0.7021, "step": 3379 }, { "epoch": 0.6948298900195292, "grad_norm": 0.1875978410243988, "learning_rate": 8.074190275703227e-05, "loss": 0.6898, "step": 3380 }, { "epoch": 0.6950354609929078, "grad_norm": 0.15483641624450684, "learning_rate": 8.073583338107837e-05, "loss": 0.5851, "step": 3381 }, { "epoch": 0.6952410319662864, "grad_norm": 0.19564680755138397, "learning_rate": 8.072976224458697e-05, "loss": 0.6792, "step": 3382 }, { "epoch": 0.6954466029396649, "grad_norm": 0.20344282686710358, "learning_rate": 8.072368934785719e-05, "loss": 0.6869, "step": 3383 }, { "epoch": 0.6956521739130435, "grad_norm": 0.19657017290592194, "learning_rate": 8.071761469118822e-05, "loss": 0.6595, "step": 3384 }, { "epoch": 0.695857744886422, "grad_norm": 0.19356437027454376, "learning_rate": 8.071153827487931e-05, "loss": 0.6804, "step": 3385 }, { "epoch": 0.6960633158598006, "grad_norm": 0.19667509198188782, "learning_rate": 8.070546009922981e-05, "loss": 0.7075, "step": 3386 }, { "epoch": 0.6962688868331791, "grad_norm": 0.18919992446899414, "learning_rate": 8.06993801645392e-05, "loss": 0.6778, "step": 3387 }, { "epoch": 0.6964744578065577, "grad_norm": 0.15784306824207306, "learning_rate": 8.0693298471107e-05, "loss": 0.5685, "step": 3388 }, { "epoch": 0.6966800287799363, "grad_norm": 0.20536069571971893, "learning_rate": 8.068721501923279e-05, "loss": 0.6465, "step": 3389 }, { "epoch": 0.6968855997533149, "grad_norm": 0.1936463564634323, "learning_rate": 8.06811298092163e-05, "loss": 0.6918, "step": 3390 }, { "epoch": 0.6970911707266934, "grad_norm": 0.19561581313610077, "learning_rate": 8.067504284135732e-05, "loss": 0.673, "step": 3391 }, { "epoch": 0.6972967417000719, "grad_norm": 0.198947474360466, "learning_rate": 8.066895411595572e-05, "loss": 0.6773, "step": 3392 }, { "epoch": 0.6975023126734505, "grad_norm": 0.19654102623462677, "learning_rate": 8.066286363331147e-05, "loss": 0.6467, "step": 3393 }, { "epoch": 0.6977078836468291, "grad_norm": 0.1938384771347046, "learning_rate": 8.065677139372462e-05, "loss": 0.6993, "step": 3394 }, { "epoch": 0.6979134546202076, "grad_norm": 0.1924823522567749, "learning_rate": 8.06506773974953e-05, "loss": 0.6672, "step": 3395 }, { "epoch": 0.6981190255935862, "grad_norm": 0.19648601114749908, "learning_rate": 8.064458164492372e-05, "loss": 0.6478, "step": 3396 }, { "epoch": 0.6983245965669648, "grad_norm": 0.1876935362815857, "learning_rate": 8.063848413631023e-05, "loss": 0.6704, "step": 3397 }, { "epoch": 0.6985301675403432, "grad_norm": 0.19049161672592163, "learning_rate": 8.06323848719552e-05, "loss": 0.6582, "step": 3398 }, { "epoch": 0.6987357385137218, "grad_norm": 0.19286733865737915, "learning_rate": 8.06262838521591e-05, "loss": 0.7147, "step": 3399 }, { "epoch": 0.6989413094871004, "grad_norm": 0.19397635757923126, "learning_rate": 8.062018107722252e-05, "loss": 0.6801, "step": 3400 }, { "epoch": 0.699146880460479, "grad_norm": 0.20421355962753296, "learning_rate": 8.06140765474461e-05, "loss": 0.6723, "step": 3401 }, { "epoch": 0.6993524514338575, "grad_norm": 0.1797918975353241, "learning_rate": 8.060797026313059e-05, "loss": 0.5854, "step": 3402 }, { "epoch": 0.6995580224072361, "grad_norm": 0.19936294853687286, "learning_rate": 8.060186222457682e-05, "loss": 0.6819, "step": 3403 }, { "epoch": 0.6997635933806147, "grad_norm": 0.19907638430595398, "learning_rate": 8.05957524320857e-05, "loss": 0.6739, "step": 3404 }, { "epoch": 0.6999691643539933, "grad_norm": 0.20160700380802155, "learning_rate": 8.058964088595822e-05, "loss": 0.6694, "step": 3405 }, { "epoch": 0.7001747353273717, "grad_norm": 0.19310222566127777, "learning_rate": 8.05835275864955e-05, "loss": 0.6806, "step": 3406 }, { "epoch": 0.7003803063007503, "grad_norm": 0.1963704526424408, "learning_rate": 8.057741253399866e-05, "loss": 0.6816, "step": 3407 }, { "epoch": 0.7005858772741289, "grad_norm": 0.5723682641983032, "learning_rate": 8.057129572876903e-05, "loss": 0.6971, "step": 3408 }, { "epoch": 0.7007914482475075, "grad_norm": 0.1899087131023407, "learning_rate": 8.05651771711079e-05, "loss": 0.6834, "step": 3409 }, { "epoch": 0.700997019220886, "grad_norm": 0.1957729011774063, "learning_rate": 8.055905686131672e-05, "loss": 0.7188, "step": 3410 }, { "epoch": 0.7012025901942646, "grad_norm": 0.19298696517944336, "learning_rate": 8.055293479969702e-05, "loss": 0.6694, "step": 3411 }, { "epoch": 0.7014081611676432, "grad_norm": 0.1891012340784073, "learning_rate": 8.05468109865504e-05, "loss": 0.6817, "step": 3412 }, { "epoch": 0.7016137321410217, "grad_norm": 0.19800642132759094, "learning_rate": 8.054068542217854e-05, "loss": 0.6592, "step": 3413 }, { "epoch": 0.7018193031144002, "grad_norm": 0.18479777872562408, "learning_rate": 8.053455810688322e-05, "loss": 0.6702, "step": 3414 }, { "epoch": 0.7020248740877788, "grad_norm": 0.20111770927906036, "learning_rate": 8.052842904096631e-05, "loss": 0.7025, "step": 3415 }, { "epoch": 0.7022304450611574, "grad_norm": 0.19288669526576996, "learning_rate": 8.052229822472977e-05, "loss": 0.6858, "step": 3416 }, { "epoch": 0.7024360160345359, "grad_norm": 0.2072620391845703, "learning_rate": 8.051616565847562e-05, "loss": 0.6998, "step": 3417 }, { "epoch": 0.7026415870079145, "grad_norm": 0.1882101595401764, "learning_rate": 8.051003134250601e-05, "loss": 0.6669, "step": 3418 }, { "epoch": 0.702847157981293, "grad_norm": 0.2227669060230255, "learning_rate": 8.050389527712312e-05, "loss": 0.6115, "step": 3419 }, { "epoch": 0.7030527289546716, "grad_norm": 0.1958729773759842, "learning_rate": 8.049775746262924e-05, "loss": 0.7012, "step": 3420 }, { "epoch": 0.7032582999280501, "grad_norm": 0.14937171339988708, "learning_rate": 8.049161789932677e-05, "loss": 0.6124, "step": 3421 }, { "epoch": 0.7034638709014287, "grad_norm": 0.16276027262210846, "learning_rate": 8.048547658751817e-05, "loss": 0.5928, "step": 3422 }, { "epoch": 0.7036694418748073, "grad_norm": 0.15098173916339874, "learning_rate": 8.047933352750601e-05, "loss": 0.6122, "step": 3423 }, { "epoch": 0.7038750128481859, "grad_norm": 0.20423725247383118, "learning_rate": 8.047318871959292e-05, "loss": 0.6988, "step": 3424 }, { "epoch": 0.7040805838215644, "grad_norm": 0.19810713827610016, "learning_rate": 8.046704216408161e-05, "loss": 0.6585, "step": 3425 }, { "epoch": 0.7042861547949429, "grad_norm": 0.21174119412899017, "learning_rate": 8.046089386127491e-05, "loss": 0.5926, "step": 3426 }, { "epoch": 0.7044917257683215, "grad_norm": 0.18921788036823273, "learning_rate": 8.045474381147572e-05, "loss": 0.663, "step": 3427 }, { "epoch": 0.7046972967417001, "grad_norm": 0.21867318451404572, "learning_rate": 8.044859201498701e-05, "loss": 0.6619, "step": 3428 }, { "epoch": 0.7049028677150786, "grad_norm": 0.18937045335769653, "learning_rate": 8.044243847211186e-05, "loss": 0.6972, "step": 3429 }, { "epoch": 0.7051084386884572, "grad_norm": 0.20421583950519562, "learning_rate": 8.043628318315343e-05, "loss": 0.6855, "step": 3430 }, { "epoch": 0.7053140096618358, "grad_norm": 0.20946352183818817, "learning_rate": 8.043012614841493e-05, "loss": 0.5986, "step": 3431 }, { "epoch": 0.7055195806352144, "grad_norm": 0.21439684927463531, "learning_rate": 8.042396736819974e-05, "loss": 0.6642, "step": 3432 }, { "epoch": 0.7057251516085928, "grad_norm": 0.1428326517343521, "learning_rate": 8.041780684281124e-05, "loss": 0.5734, "step": 3433 }, { "epoch": 0.7059307225819714, "grad_norm": 0.21994005143642426, "learning_rate": 8.041164457255295e-05, "loss": 0.6916, "step": 3434 }, { "epoch": 0.70613629355535, "grad_norm": 0.19378912448883057, "learning_rate": 8.040548055772843e-05, "loss": 0.6845, "step": 3435 }, { "epoch": 0.7063418645287285, "grad_norm": 0.14617706835269928, "learning_rate": 8.039931479864138e-05, "loss": 0.5823, "step": 3436 }, { "epoch": 0.7065474355021071, "grad_norm": 0.2063405066728592, "learning_rate": 8.039314729559553e-05, "loss": 0.7163, "step": 3437 }, { "epoch": 0.7067530064754857, "grad_norm": 0.20391802489757538, "learning_rate": 8.038697804889476e-05, "loss": 0.6825, "step": 3438 }, { "epoch": 0.7069585774488643, "grad_norm": 0.1884995549917221, "learning_rate": 8.038080705884297e-05, "loss": 0.7005, "step": 3439 }, { "epoch": 0.7071641484222427, "grad_norm": 0.15203148126602173, "learning_rate": 8.03746343257442e-05, "loss": 0.5766, "step": 3440 }, { "epoch": 0.7073697193956213, "grad_norm": 0.1965416520833969, "learning_rate": 8.036845984990251e-05, "loss": 0.6746, "step": 3441 }, { "epoch": 0.7075752903689999, "grad_norm": 0.19438838958740234, "learning_rate": 8.036228363162214e-05, "loss": 0.68, "step": 3442 }, { "epoch": 0.7077808613423785, "grad_norm": 0.19313882291316986, "learning_rate": 8.035610567120731e-05, "loss": 0.6638, "step": 3443 }, { "epoch": 0.707986432315757, "grad_norm": 0.19299215078353882, "learning_rate": 8.034992596896244e-05, "loss": 0.6862, "step": 3444 }, { "epoch": 0.7081920032891356, "grad_norm": 0.20329324901103973, "learning_rate": 8.034374452519193e-05, "loss": 0.6824, "step": 3445 }, { "epoch": 0.7083975742625142, "grad_norm": 0.18780893087387085, "learning_rate": 8.033756134020032e-05, "loss": 0.662, "step": 3446 }, { "epoch": 0.7086031452358927, "grad_norm": 0.19197134673595428, "learning_rate": 8.033137641429223e-05, "loss": 0.6791, "step": 3447 }, { "epoch": 0.7088087162092712, "grad_norm": 0.19330036640167236, "learning_rate": 8.032518974777236e-05, "loss": 0.6907, "step": 3448 }, { "epoch": 0.7090142871826498, "grad_norm": 0.19305558502674103, "learning_rate": 8.03190013409455e-05, "loss": 0.6755, "step": 3449 }, { "epoch": 0.7092198581560284, "grad_norm": 0.17885883152484894, "learning_rate": 8.031281119411653e-05, "loss": 0.6032, "step": 3450 }, { "epoch": 0.709425429129407, "grad_norm": 0.19554337859153748, "learning_rate": 8.030661930759041e-05, "loss": 0.6943, "step": 3451 }, { "epoch": 0.7096310001027855, "grad_norm": 0.19464746117591858, "learning_rate": 8.030042568167216e-05, "loss": 0.6655, "step": 3452 }, { "epoch": 0.709836571076164, "grad_norm": 0.19761775434017181, "learning_rate": 8.029423031666694e-05, "loss": 0.6915, "step": 3453 }, { "epoch": 0.7100421420495426, "grad_norm": 0.20174358785152435, "learning_rate": 8.028803321287997e-05, "loss": 0.6715, "step": 3454 }, { "epoch": 0.7102477130229211, "grad_norm": 0.19728273153305054, "learning_rate": 8.028183437061653e-05, "loss": 0.7062, "step": 3455 }, { "epoch": 0.7104532839962997, "grad_norm": 0.1927875429391861, "learning_rate": 8.027563379018202e-05, "loss": 0.6685, "step": 3456 }, { "epoch": 0.7106588549696783, "grad_norm": 0.16123135387897491, "learning_rate": 8.02694314718819e-05, "loss": 0.5778, "step": 3457 }, { "epoch": 0.7108644259430569, "grad_norm": 0.1330617517232895, "learning_rate": 8.026322741602176e-05, "loss": 0.5941, "step": 3458 }, { "epoch": 0.7110699969164354, "grad_norm": 0.24413903057575226, "learning_rate": 8.025702162290721e-05, "loss": 0.6845, "step": 3459 }, { "epoch": 0.7112755678898139, "grad_norm": 0.21330687403678894, "learning_rate": 8.0250814092844e-05, "loss": 0.6724, "step": 3460 }, { "epoch": 0.7114811388631925, "grad_norm": 0.21365886926651, "learning_rate": 8.024460482613793e-05, "loss": 0.6668, "step": 3461 }, { "epoch": 0.7116867098365711, "grad_norm": 0.2229931354522705, "learning_rate": 8.023839382309493e-05, "loss": 0.6628, "step": 3462 }, { "epoch": 0.7118922808099496, "grad_norm": 0.21787157654762268, "learning_rate": 8.023218108402096e-05, "loss": 0.6776, "step": 3463 }, { "epoch": 0.7120978517833282, "grad_norm": 0.19112589955329895, "learning_rate": 8.022596660922212e-05, "loss": 0.5856, "step": 3464 }, { "epoch": 0.7123034227567068, "grad_norm": 0.20584847033023834, "learning_rate": 8.021975039900453e-05, "loss": 0.6659, "step": 3465 }, { "epoch": 0.7125089937300854, "grad_norm": 0.13937044143676758, "learning_rate": 8.021353245367445e-05, "loss": 0.581, "step": 3466 }, { "epoch": 0.7127145647034638, "grad_norm": 0.21949850022792816, "learning_rate": 8.020731277353824e-05, "loss": 0.6818, "step": 3467 }, { "epoch": 0.7129201356768424, "grad_norm": 0.19672751426696777, "learning_rate": 8.020109135890227e-05, "loss": 0.6788, "step": 3468 }, { "epoch": 0.713125706650221, "grad_norm": 0.18057693541049957, "learning_rate": 8.019486821007307e-05, "loss": 0.5962, "step": 3469 }, { "epoch": 0.7133312776235996, "grad_norm": 0.20432183146476746, "learning_rate": 8.01886433273572e-05, "loss": 0.6854, "step": 3470 }, { "epoch": 0.7135368485969781, "grad_norm": 0.20442970097064972, "learning_rate": 8.018241671106135e-05, "loss": 0.6755, "step": 3471 }, { "epoch": 0.7137424195703567, "grad_norm": 0.1377362608909607, "learning_rate": 8.017618836149227e-05, "loss": 0.5924, "step": 3472 }, { "epoch": 0.7139479905437353, "grad_norm": 0.20388440787792206, "learning_rate": 8.01699582789568e-05, "loss": 0.6946, "step": 3473 }, { "epoch": 0.7141535615171137, "grad_norm": 0.2007599174976349, "learning_rate": 8.016372646376188e-05, "loss": 0.6916, "step": 3474 }, { "epoch": 0.7143591324904923, "grad_norm": 0.1868349313735962, "learning_rate": 8.015749291621449e-05, "loss": 0.6758, "step": 3475 }, { "epoch": 0.7145647034638709, "grad_norm": 0.20039929449558258, "learning_rate": 8.015125763662177e-05, "loss": 0.6769, "step": 3476 }, { "epoch": 0.7147702744372495, "grad_norm": 0.1937168687582016, "learning_rate": 8.014502062529089e-05, "loss": 0.6572, "step": 3477 }, { "epoch": 0.714975845410628, "grad_norm": 0.16396324336528778, "learning_rate": 8.013878188252908e-05, "loss": 0.5781, "step": 3478 }, { "epoch": 0.7151814163840066, "grad_norm": 0.19520901143550873, "learning_rate": 8.013254140864376e-05, "loss": 0.7001, "step": 3479 }, { "epoch": 0.7153869873573852, "grad_norm": 0.1290317177772522, "learning_rate": 8.012629920394231e-05, "loss": 0.5826, "step": 3480 }, { "epoch": 0.7155925583307637, "grad_norm": 0.20711787045001984, "learning_rate": 8.012005526873228e-05, "loss": 0.7025, "step": 3481 }, { "epoch": 0.7157981293041422, "grad_norm": 0.20414526760578156, "learning_rate": 8.011380960332128e-05, "loss": 0.6697, "step": 3482 }, { "epoch": 0.7160037002775208, "grad_norm": 0.19431988894939423, "learning_rate": 8.010756220801702e-05, "loss": 0.6705, "step": 3483 }, { "epoch": 0.7162092712508994, "grad_norm": 0.1636938601732254, "learning_rate": 8.010131308312725e-05, "loss": 0.5727, "step": 3484 }, { "epoch": 0.716414842224278, "grad_norm": 0.19284431636333466, "learning_rate": 8.009506222895984e-05, "loss": 0.6772, "step": 3485 }, { "epoch": 0.7166204131976565, "grad_norm": 0.19347639381885529, "learning_rate": 8.008880964582275e-05, "loss": 0.6934, "step": 3486 }, { "epoch": 0.716825984171035, "grad_norm": 0.12324893474578857, "learning_rate": 8.008255533402403e-05, "loss": 0.5841, "step": 3487 }, { "epoch": 0.7170315551444136, "grad_norm": 0.12979742884635925, "learning_rate": 8.007629929387176e-05, "loss": 0.5726, "step": 3488 }, { "epoch": 0.7172371261177922, "grad_norm": 0.19342902302742004, "learning_rate": 8.007004152567417e-05, "loss": 0.6887, "step": 3489 }, { "epoch": 0.7174426970911707, "grad_norm": 0.13253627717494965, "learning_rate": 8.006378202973959e-05, "loss": 0.5835, "step": 3490 }, { "epoch": 0.7176482680645493, "grad_norm": 0.2006087452173233, "learning_rate": 8.005752080637632e-05, "loss": 0.6998, "step": 3491 }, { "epoch": 0.7178538390379279, "grad_norm": 0.12888813018798828, "learning_rate": 8.005125785589286e-05, "loss": 0.595, "step": 3492 }, { "epoch": 0.7180594100113064, "grad_norm": 0.1942748874425888, "learning_rate": 8.004499317859776e-05, "loss": 0.683, "step": 3493 }, { "epoch": 0.7182649809846849, "grad_norm": 0.18737460672855377, "learning_rate": 8.003872677479965e-05, "loss": 0.6861, "step": 3494 }, { "epoch": 0.7184705519580635, "grad_norm": 0.24117667973041534, "learning_rate": 8.003245864480724e-05, "loss": 0.6826, "step": 3495 }, { "epoch": 0.7186761229314421, "grad_norm": 0.19393832981586456, "learning_rate": 8.002618878892934e-05, "loss": 0.6682, "step": 3496 }, { "epoch": 0.7188816939048206, "grad_norm": 0.19202245771884918, "learning_rate": 8.001991720747481e-05, "loss": 0.683, "step": 3497 }, { "epoch": 0.7190872648781992, "grad_norm": 0.18830347061157227, "learning_rate": 8.001364390075266e-05, "loss": 0.6762, "step": 3498 }, { "epoch": 0.7192928358515778, "grad_norm": 0.18478117883205414, "learning_rate": 8.000736886907193e-05, "loss": 0.673, "step": 3499 }, { "epoch": 0.7194984068249564, "grad_norm": 0.19119176268577576, "learning_rate": 8.000109211274176e-05, "loss": 0.6683, "step": 3500 }, { "epoch": 0.7197039777983348, "grad_norm": 0.18504808843135834, "learning_rate": 7.999481363207136e-05, "loss": 0.6671, "step": 3501 }, { "epoch": 0.7199095487717134, "grad_norm": 0.18554535508155823, "learning_rate": 7.998853342737007e-05, "loss": 0.6531, "step": 3502 }, { "epoch": 0.720115119745092, "grad_norm": 0.20063155889511108, "learning_rate": 7.998225149894729e-05, "loss": 0.6826, "step": 3503 }, { "epoch": 0.7203206907184706, "grad_norm": 0.18054603040218353, "learning_rate": 7.997596784711245e-05, "loss": 0.6657, "step": 3504 }, { "epoch": 0.7205262616918491, "grad_norm": 0.19543704390525818, "learning_rate": 7.996968247217517e-05, "loss": 0.7077, "step": 3505 }, { "epoch": 0.7207318326652277, "grad_norm": 0.196107417345047, "learning_rate": 7.996339537444508e-05, "loss": 0.6607, "step": 3506 }, { "epoch": 0.7209374036386063, "grad_norm": 0.1699989140033722, "learning_rate": 7.995710655423193e-05, "loss": 0.5965, "step": 3507 }, { "epoch": 0.7211429746119847, "grad_norm": 0.13372716307640076, "learning_rate": 7.995081601184552e-05, "loss": 0.5885, "step": 3508 }, { "epoch": 0.7213485455853633, "grad_norm": 0.2239861637353897, "learning_rate": 7.994452374759577e-05, "loss": 0.6822, "step": 3509 }, { "epoch": 0.7215541165587419, "grad_norm": 0.20403791964054108, "learning_rate": 7.993822976179265e-05, "loss": 0.6794, "step": 3510 }, { "epoch": 0.7217596875321205, "grad_norm": 0.18789462745189667, "learning_rate": 7.993193405474626e-05, "loss": 0.6642, "step": 3511 }, { "epoch": 0.721965258505499, "grad_norm": 0.1892167031764984, "learning_rate": 7.992563662676676e-05, "loss": 0.6768, "step": 3512 }, { "epoch": 0.7221708294788776, "grad_norm": 0.19989047944545746, "learning_rate": 7.991933747816437e-05, "loss": 0.7015, "step": 3513 }, { "epoch": 0.7223764004522562, "grad_norm": 0.19818507134914398, "learning_rate": 7.991303660924944e-05, "loss": 0.6459, "step": 3514 }, { "epoch": 0.7225819714256347, "grad_norm": 0.20084840059280396, "learning_rate": 7.990673402033238e-05, "loss": 0.6967, "step": 3515 }, { "epoch": 0.7227875423990132, "grad_norm": 0.19589127600193024, "learning_rate": 7.990042971172369e-05, "loss": 0.6819, "step": 3516 }, { "epoch": 0.7229931133723918, "grad_norm": 0.2054595798254013, "learning_rate": 7.989412368373395e-05, "loss": 0.5563, "step": 3517 }, { "epoch": 0.7231986843457704, "grad_norm": 0.16840699315071106, "learning_rate": 7.988781593667382e-05, "loss": 0.5998, "step": 3518 }, { "epoch": 0.723404255319149, "grad_norm": 0.20174477994441986, "learning_rate": 7.988150647085408e-05, "loss": 0.6767, "step": 3519 }, { "epoch": 0.7236098262925275, "grad_norm": 0.2114832103252411, "learning_rate": 7.987519528658556e-05, "loss": 0.674, "step": 3520 }, { "epoch": 0.723815397265906, "grad_norm": 0.20603235065937042, "learning_rate": 7.986888238417915e-05, "loss": 0.6922, "step": 3521 }, { "epoch": 0.7240209682392846, "grad_norm": 0.19396202266216278, "learning_rate": 7.98625677639459e-05, "loss": 0.6542, "step": 3522 }, { "epoch": 0.7242265392126632, "grad_norm": 0.19188427925109863, "learning_rate": 7.985625142619688e-05, "loss": 0.6423, "step": 3523 }, { "epoch": 0.7244321101860417, "grad_norm": 0.24525907635688782, "learning_rate": 7.984993337124326e-05, "loss": 0.5969, "step": 3524 }, { "epoch": 0.7246376811594203, "grad_norm": 0.22921410202980042, "learning_rate": 7.984361359939632e-05, "loss": 0.6787, "step": 3525 }, { "epoch": 0.7248432521327989, "grad_norm": 0.23027624189853668, "learning_rate": 7.98372921109674e-05, "loss": 0.6958, "step": 3526 }, { "epoch": 0.7250488231061774, "grad_norm": 0.21798734366893768, "learning_rate": 7.983096890626792e-05, "loss": 0.7058, "step": 3527 }, { "epoch": 0.7252543940795559, "grad_norm": 0.1834592968225479, "learning_rate": 7.98246439856094e-05, "loss": 0.5576, "step": 3528 }, { "epoch": 0.7254599650529345, "grad_norm": 0.20253108441829681, "learning_rate": 7.981831734930344e-05, "loss": 0.6919, "step": 3529 }, { "epoch": 0.7256655360263131, "grad_norm": 0.2038789689540863, "learning_rate": 7.981198899766173e-05, "loss": 0.7226, "step": 3530 }, { "epoch": 0.7258711069996916, "grad_norm": 0.19789783656597137, "learning_rate": 7.980565893099604e-05, "loss": 0.6876, "step": 3531 }, { "epoch": 0.7260766779730702, "grad_norm": 0.14825506508350372, "learning_rate": 7.97993271496182e-05, "loss": 0.5838, "step": 3532 }, { "epoch": 0.7262822489464488, "grad_norm": 0.19643041491508484, "learning_rate": 7.979299365384017e-05, "loss": 0.6868, "step": 3533 }, { "epoch": 0.7264878199198274, "grad_norm": 0.20128373801708221, "learning_rate": 7.978665844397397e-05, "loss": 0.683, "step": 3534 }, { "epoch": 0.7266933908932058, "grad_norm": 0.2025127112865448, "learning_rate": 7.978032152033169e-05, "loss": 0.6801, "step": 3535 }, { "epoch": 0.7268989618665844, "grad_norm": 0.19767989218235016, "learning_rate": 7.977398288322554e-05, "loss": 0.6735, "step": 3536 }, { "epoch": 0.727104532839963, "grad_norm": 0.1649659425020218, "learning_rate": 7.976764253296779e-05, "loss": 0.5818, "step": 3537 }, { "epoch": 0.7273101038133416, "grad_norm": 0.20704413950443268, "learning_rate": 7.976130046987078e-05, "loss": 0.7032, "step": 3538 }, { "epoch": 0.7275156747867201, "grad_norm": 0.20047134160995483, "learning_rate": 7.975495669424698e-05, "loss": 0.6851, "step": 3539 }, { "epoch": 0.7277212457600987, "grad_norm": 0.14262793958187103, "learning_rate": 7.974861120640891e-05, "loss": 0.5911, "step": 3540 }, { "epoch": 0.7279268167334773, "grad_norm": 0.19910430908203125, "learning_rate": 7.974226400666918e-05, "loss": 0.6729, "step": 3541 }, { "epoch": 0.7281323877068558, "grad_norm": 0.1975426971912384, "learning_rate": 7.973591509534048e-05, "loss": 0.6614, "step": 3542 }, { "epoch": 0.7283379586802343, "grad_norm": 0.18741396069526672, "learning_rate": 7.972956447273561e-05, "loss": 0.6808, "step": 3543 }, { "epoch": 0.7285435296536129, "grad_norm": 0.19174180924892426, "learning_rate": 7.972321213916742e-05, "loss": 0.6732, "step": 3544 }, { "epoch": 0.7287491006269915, "grad_norm": 0.18941205739974976, "learning_rate": 7.971685809494886e-05, "loss": 0.6854, "step": 3545 }, { "epoch": 0.72895467160037, "grad_norm": 0.18745878338813782, "learning_rate": 7.971050234039298e-05, "loss": 0.6653, "step": 3546 }, { "epoch": 0.7291602425737486, "grad_norm": 0.2130347341299057, "learning_rate": 7.970414487581287e-05, "loss": 0.6932, "step": 3547 }, { "epoch": 0.7293658135471272, "grad_norm": 0.18765027821063995, "learning_rate": 7.969778570152175e-05, "loss": 0.6639, "step": 3548 }, { "epoch": 0.7295713845205057, "grad_norm": 0.1892290711402893, "learning_rate": 7.969142481783291e-05, "loss": 0.6788, "step": 3549 }, { "epoch": 0.7297769554938842, "grad_norm": 0.19938233494758606, "learning_rate": 7.968506222505972e-05, "loss": 0.6736, "step": 3550 }, { "epoch": 0.7299825264672628, "grad_norm": 0.19479283690452576, "learning_rate": 7.967869792351563e-05, "loss": 0.671, "step": 3551 }, { "epoch": 0.7301880974406414, "grad_norm": 0.18895529210567474, "learning_rate": 7.967233191351418e-05, "loss": 0.6559, "step": 3552 }, { "epoch": 0.73039366841402, "grad_norm": 0.19964531064033508, "learning_rate": 7.966596419536899e-05, "loss": 0.6772, "step": 3553 }, { "epoch": 0.7305992393873985, "grad_norm": 0.1866195648908615, "learning_rate": 7.965959476939377e-05, "loss": 0.642, "step": 3554 }, { "epoch": 0.730804810360777, "grad_norm": 0.15533728897571564, "learning_rate": 7.965322363590232e-05, "loss": 0.5754, "step": 3555 }, { "epoch": 0.7310103813341556, "grad_norm": 0.19216640293598175, "learning_rate": 7.964685079520851e-05, "loss": 0.6827, "step": 3556 }, { "epoch": 0.7312159523075342, "grad_norm": 0.1994984894990921, "learning_rate": 7.96404762476263e-05, "loss": 0.6814, "step": 3557 }, { "epoch": 0.7314215232809127, "grad_norm": 0.34993866086006165, "learning_rate": 7.963409999346974e-05, "loss": 0.7039, "step": 3558 }, { "epoch": 0.7316270942542913, "grad_norm": 0.13572952151298523, "learning_rate": 7.962772203305295e-05, "loss": 0.5847, "step": 3559 }, { "epoch": 0.7318326652276699, "grad_norm": 0.21044890582561493, "learning_rate": 7.962134236669015e-05, "loss": 0.6852, "step": 3560 }, { "epoch": 0.7320382362010485, "grad_norm": 0.13309255242347717, "learning_rate": 7.961496099469562e-05, "loss": 0.5953, "step": 3561 }, { "epoch": 0.7322438071744269, "grad_norm": 0.19451969861984253, "learning_rate": 7.960857791738376e-05, "loss": 0.6785, "step": 3562 }, { "epoch": 0.7324493781478055, "grad_norm": 0.12751372158527374, "learning_rate": 7.960219313506901e-05, "loss": 0.6012, "step": 3563 }, { "epoch": 0.7326549491211841, "grad_norm": 0.19144867360591888, "learning_rate": 7.959580664806594e-05, "loss": 0.6883, "step": 3564 }, { "epoch": 0.7328605200945626, "grad_norm": 0.18746548891067505, "learning_rate": 7.958941845668921e-05, "loss": 0.6731, "step": 3565 }, { "epoch": 0.7330660910679412, "grad_norm": 0.7065462470054626, "learning_rate": 7.958302856125347e-05, "loss": 0.595, "step": 3566 }, { "epoch": 0.7332716620413198, "grad_norm": 0.1951018124818802, "learning_rate": 7.957663696207355e-05, "loss": 0.6601, "step": 3567 }, { "epoch": 0.7334772330146984, "grad_norm": 0.13065175712108612, "learning_rate": 7.957024365946436e-05, "loss": 0.5551, "step": 3568 }, { "epoch": 0.7336828039880768, "grad_norm": 0.21272675693035126, "learning_rate": 7.956384865374082e-05, "loss": 0.6846, "step": 3569 }, { "epoch": 0.7338883749614554, "grad_norm": 0.19540101289749146, "learning_rate": 7.955745194521802e-05, "loss": 0.6747, "step": 3570 }, { "epoch": 0.734093945934834, "grad_norm": 0.19584521651268005, "learning_rate": 7.95510535342111e-05, "loss": 0.6877, "step": 3571 }, { "epoch": 0.7342995169082126, "grad_norm": 0.19038638472557068, "learning_rate": 7.954465342103525e-05, "loss": 0.6776, "step": 3572 }, { "epoch": 0.7345050878815911, "grad_norm": 0.1913788616657257, "learning_rate": 7.953825160600579e-05, "loss": 0.6754, "step": 3573 }, { "epoch": 0.7347106588549697, "grad_norm": 0.19518351554870605, "learning_rate": 7.953184808943808e-05, "loss": 0.675, "step": 3574 }, { "epoch": 0.7349162298283483, "grad_norm": 0.19314491748809814, "learning_rate": 7.952544287164763e-05, "loss": 0.6771, "step": 3575 }, { "epoch": 0.7351218008017268, "grad_norm": 0.2056049257516861, "learning_rate": 7.951903595295e-05, "loss": 0.6825, "step": 3576 }, { "epoch": 0.7353273717751053, "grad_norm": 0.19159257411956787, "learning_rate": 7.95126273336608e-05, "loss": 0.6783, "step": 3577 }, { "epoch": 0.7355329427484839, "grad_norm": 0.1686679869890213, "learning_rate": 7.950621701409577e-05, "loss": 0.581, "step": 3578 }, { "epoch": 0.7357385137218625, "grad_norm": 0.14951810240745544, "learning_rate": 7.94998049945707e-05, "loss": 0.5694, "step": 3579 }, { "epoch": 0.7359440846952411, "grad_norm": 0.2037050724029541, "learning_rate": 7.949339127540149e-05, "loss": 0.6722, "step": 3580 }, { "epoch": 0.7361496556686196, "grad_norm": 0.15541227161884308, "learning_rate": 7.948697585690412e-05, "loss": 0.6053, "step": 3581 }, { "epoch": 0.7363552266419982, "grad_norm": 0.20057538151741028, "learning_rate": 7.948055873939463e-05, "loss": 0.6745, "step": 3582 }, { "epoch": 0.7365607976153767, "grad_norm": 0.19490864872932434, "learning_rate": 7.947413992318918e-05, "loss": 0.6963, "step": 3583 }, { "epoch": 0.7367663685887552, "grad_norm": 0.19570674002170563, "learning_rate": 7.946771940860398e-05, "loss": 0.6913, "step": 3584 }, { "epoch": 0.7369719395621338, "grad_norm": 0.18625394999980927, "learning_rate": 7.946129719595535e-05, "loss": 0.6699, "step": 3585 }, { "epoch": 0.7371775105355124, "grad_norm": 0.6736593246459961, "learning_rate": 7.945487328555969e-05, "loss": 0.5934, "step": 3586 }, { "epoch": 0.737383081508891, "grad_norm": 0.1934710294008255, "learning_rate": 7.944844767773344e-05, "loss": 0.672, "step": 3587 }, { "epoch": 0.7375886524822695, "grad_norm": 0.20478187501430511, "learning_rate": 7.944202037279322e-05, "loss": 0.6703, "step": 3588 }, { "epoch": 0.737794223455648, "grad_norm": 0.1952143758535385, "learning_rate": 7.94355913710556e-05, "loss": 0.665, "step": 3589 }, { "epoch": 0.7379997944290266, "grad_norm": 0.2044733166694641, "learning_rate": 7.942916067283737e-05, "loss": 0.6705, "step": 3590 }, { "epoch": 0.7382053654024052, "grad_norm": 0.1511656492948532, "learning_rate": 7.942272827845531e-05, "loss": 0.5709, "step": 3591 }, { "epoch": 0.7384109363757837, "grad_norm": 0.20712168514728546, "learning_rate": 7.941629418822631e-05, "loss": 0.6822, "step": 3592 }, { "epoch": 0.7386165073491623, "grad_norm": 0.18875378370285034, "learning_rate": 7.940985840246738e-05, "loss": 0.6657, "step": 3593 }, { "epoch": 0.7388220783225409, "grad_norm": 0.20335470139980316, "learning_rate": 7.940342092149552e-05, "loss": 0.6803, "step": 3594 }, { "epoch": 0.7390276492959195, "grad_norm": 0.19990339875221252, "learning_rate": 7.939698174562795e-05, "loss": 0.6633, "step": 3595 }, { "epoch": 0.7392332202692979, "grad_norm": 0.19923284649848938, "learning_rate": 7.939054087518184e-05, "loss": 0.6894, "step": 3596 }, { "epoch": 0.7394387912426765, "grad_norm": 0.20602424442768097, "learning_rate": 7.938409831047452e-05, "loss": 0.7057, "step": 3597 }, { "epoch": 0.7396443622160551, "grad_norm": 0.19284965097904205, "learning_rate": 7.93776540518234e-05, "loss": 0.6619, "step": 3598 }, { "epoch": 0.7398499331894337, "grad_norm": 0.18483732640743256, "learning_rate": 7.937120809954593e-05, "loss": 0.664, "step": 3599 }, { "epoch": 0.7400555041628122, "grad_norm": 0.19070151448249817, "learning_rate": 7.93647604539597e-05, "loss": 0.6934, "step": 3600 }, { "epoch": 0.7402610751361908, "grad_norm": 0.1932380348443985, "learning_rate": 7.935831111538234e-05, "loss": 0.6692, "step": 3601 }, { "epoch": 0.7404666461095694, "grad_norm": 0.1923176348209381, "learning_rate": 7.935186008413158e-05, "loss": 0.6813, "step": 3602 }, { "epoch": 0.7406722170829478, "grad_norm": 0.19491972029209137, "learning_rate": 7.934540736052524e-05, "loss": 0.6571, "step": 3603 }, { "epoch": 0.7408777880563264, "grad_norm": 0.19038790464401245, "learning_rate": 7.93389529448812e-05, "loss": 0.6627, "step": 3604 }, { "epoch": 0.741083359029705, "grad_norm": 0.1902906596660614, "learning_rate": 7.933249683751745e-05, "loss": 0.6792, "step": 3605 }, { "epoch": 0.7412889300030836, "grad_norm": 0.18056754767894745, "learning_rate": 7.932603903875205e-05, "loss": 0.6706, "step": 3606 }, { "epoch": 0.7414945009764621, "grad_norm": 0.19401055574417114, "learning_rate": 7.931957954890316e-05, "loss": 0.6997, "step": 3607 }, { "epoch": 0.7417000719498407, "grad_norm": 0.19308343529701233, "learning_rate": 7.931311836828898e-05, "loss": 0.6804, "step": 3608 }, { "epoch": 0.7419056429232193, "grad_norm": 0.20034140348434448, "learning_rate": 7.930665549722784e-05, "loss": 0.6672, "step": 3609 }, { "epoch": 0.7421112138965978, "grad_norm": 0.1429484337568283, "learning_rate": 7.930019093603813e-05, "loss": 0.5769, "step": 3610 }, { "epoch": 0.7423167848699763, "grad_norm": 0.19549964368343353, "learning_rate": 7.929372468503834e-05, "loss": 0.68, "step": 3611 }, { "epoch": 0.7425223558433549, "grad_norm": 0.1939014494419098, "learning_rate": 7.928725674454702e-05, "loss": 0.6436, "step": 3612 }, { "epoch": 0.7427279268167335, "grad_norm": 0.1987033188343048, "learning_rate": 7.928078711488281e-05, "loss": 0.6975, "step": 3613 }, { "epoch": 0.7429334977901121, "grad_norm": 0.19069653749465942, "learning_rate": 7.927431579636445e-05, "loss": 0.6744, "step": 3614 }, { "epoch": 0.7431390687634906, "grad_norm": 0.14583733677864075, "learning_rate": 7.926784278931075e-05, "loss": 0.587, "step": 3615 }, { "epoch": 0.7433446397368692, "grad_norm": 0.19307653605937958, "learning_rate": 7.926136809404063e-05, "loss": 0.6458, "step": 3616 }, { "epoch": 0.7435502107102477, "grad_norm": 0.19686581194400787, "learning_rate": 7.9254891710873e-05, "loss": 0.6936, "step": 3617 }, { "epoch": 0.7437557816836263, "grad_norm": 0.19272616505622864, "learning_rate": 7.924841364012698e-05, "loss": 0.6931, "step": 3618 }, { "epoch": 0.7439613526570048, "grad_norm": 0.1832963228225708, "learning_rate": 7.92419338821217e-05, "loss": 0.6543, "step": 3619 }, { "epoch": 0.7441669236303834, "grad_norm": 0.1948852688074112, "learning_rate": 7.923545243717638e-05, "loss": 0.6934, "step": 3620 }, { "epoch": 0.744372494603762, "grad_norm": 0.19358238577842712, "learning_rate": 7.922896930561034e-05, "loss": 0.6901, "step": 3621 }, { "epoch": 0.7445780655771405, "grad_norm": 0.18982093036174774, "learning_rate": 7.922248448774296e-05, "loss": 0.6832, "step": 3622 }, { "epoch": 0.744783636550519, "grad_norm": 0.19411057233810425, "learning_rate": 7.921599798389372e-05, "loss": 0.6899, "step": 3623 }, { "epoch": 0.7449892075238976, "grad_norm": 0.1885984092950821, "learning_rate": 7.92095097943822e-05, "loss": 0.6699, "step": 3624 }, { "epoch": 0.7451947784972762, "grad_norm": 0.19820182025432587, "learning_rate": 7.920301991952802e-05, "loss": 0.6872, "step": 3625 }, { "epoch": 0.7454003494706547, "grad_norm": 0.18656107783317566, "learning_rate": 7.91965283596509e-05, "loss": 0.6982, "step": 3626 }, { "epoch": 0.7456059204440333, "grad_norm": 0.14508990943431854, "learning_rate": 7.919003511507069e-05, "loss": 0.5908, "step": 3627 }, { "epoch": 0.7458114914174119, "grad_norm": 0.2058647722005844, "learning_rate": 7.918354018610723e-05, "loss": 0.6962, "step": 3628 }, { "epoch": 0.7460170623907905, "grad_norm": 0.20024776458740234, "learning_rate": 7.917704357308052e-05, "loss": 0.6748, "step": 3629 }, { "epoch": 0.7462226333641689, "grad_norm": 0.18803846836090088, "learning_rate": 7.917054527631062e-05, "loss": 0.6878, "step": 3630 }, { "epoch": 0.7464282043375475, "grad_norm": 0.18676309287548065, "learning_rate": 7.916404529611768e-05, "loss": 0.6497, "step": 3631 }, { "epoch": 0.7466337753109261, "grad_norm": 0.18984469771385193, "learning_rate": 7.915754363282189e-05, "loss": 0.667, "step": 3632 }, { "epoch": 0.7468393462843047, "grad_norm": 0.1905134618282318, "learning_rate": 7.915104028674359e-05, "loss": 0.7037, "step": 3633 }, { "epoch": 0.7470449172576832, "grad_norm": 0.19282597303390503, "learning_rate": 7.914453525820314e-05, "loss": 0.6825, "step": 3634 }, { "epoch": 0.7472504882310618, "grad_norm": 0.191225066781044, "learning_rate": 7.913802854752105e-05, "loss": 0.6693, "step": 3635 }, { "epoch": 0.7474560592044404, "grad_norm": 0.19597823917865753, "learning_rate": 7.913152015501785e-05, "loss": 0.6854, "step": 3636 }, { "epoch": 0.7476616301778188, "grad_norm": 0.19076837599277496, "learning_rate": 7.912501008101417e-05, "loss": 0.6669, "step": 3637 }, { "epoch": 0.7478672011511974, "grad_norm": 0.15839332342147827, "learning_rate": 7.911849832583075e-05, "loss": 0.5823, "step": 3638 }, { "epoch": 0.748072772124576, "grad_norm": 0.19790640473365784, "learning_rate": 7.91119848897884e-05, "loss": 0.6758, "step": 3639 }, { "epoch": 0.7482783430979546, "grad_norm": 0.20291505753993988, "learning_rate": 7.910546977320799e-05, "loss": 0.6858, "step": 3640 }, { "epoch": 0.7484839140713331, "grad_norm": 0.19537273049354553, "learning_rate": 7.909895297641047e-05, "loss": 0.6818, "step": 3641 }, { "epoch": 0.7486894850447117, "grad_norm": 0.14734981954097748, "learning_rate": 7.909243449971693e-05, "loss": 0.5743, "step": 3642 }, { "epoch": 0.7488950560180903, "grad_norm": 0.15119509398937225, "learning_rate": 7.90859143434485e-05, "loss": 0.5797, "step": 3643 }, { "epoch": 0.7491006269914688, "grad_norm": 0.23732592165470123, "learning_rate": 7.907939250792638e-05, "loss": 0.6841, "step": 3644 }, { "epoch": 0.7493061979648473, "grad_norm": 0.2022113800048828, "learning_rate": 7.907286899347187e-05, "loss": 0.707, "step": 3645 }, { "epoch": 0.7495117689382259, "grad_norm": 0.19698172807693481, "learning_rate": 7.906634380040636e-05, "loss": 0.6966, "step": 3646 }, { "epoch": 0.7497173399116045, "grad_norm": 0.21839676797389984, "learning_rate": 7.905981692905133e-05, "loss": 0.6853, "step": 3647 }, { "epoch": 0.7499229108849831, "grad_norm": 0.20229050517082214, "learning_rate": 7.90532883797283e-05, "loss": 0.659, "step": 3648 }, { "epoch": 0.7501284818583616, "grad_norm": 0.18536463379859924, "learning_rate": 7.904675815275894e-05, "loss": 0.6534, "step": 3649 }, { "epoch": 0.7503340528317402, "grad_norm": 0.20928248763084412, "learning_rate": 7.904022624846491e-05, "loss": 0.6913, "step": 3650 }, { "epoch": 0.7505396238051187, "grad_norm": 0.20999811589717865, "learning_rate": 7.903369266716806e-05, "loss": 0.654, "step": 3651 }, { "epoch": 0.7507451947784973, "grad_norm": 0.19690896570682526, "learning_rate": 7.902715740919023e-05, "loss": 0.5836, "step": 3652 }, { "epoch": 0.7509507657518758, "grad_norm": 0.1489873230457306, "learning_rate": 7.902062047485341e-05, "loss": 0.5822, "step": 3653 }, { "epoch": 0.7511563367252544, "grad_norm": 0.2375965416431427, "learning_rate": 7.901408186447962e-05, "loss": 0.6857, "step": 3654 }, { "epoch": 0.751361907698633, "grad_norm": 0.2292969673871994, "learning_rate": 7.9007541578391e-05, "loss": 0.6998, "step": 3655 }, { "epoch": 0.7515674786720115, "grad_norm": 0.1982121616601944, "learning_rate": 7.900099961690976e-05, "loss": 0.6853, "step": 3656 }, { "epoch": 0.75177304964539, "grad_norm": 0.21135136485099792, "learning_rate": 7.899445598035819e-05, "loss": 0.6663, "step": 3657 }, { "epoch": 0.7519786206187686, "grad_norm": 0.2433331459760666, "learning_rate": 7.898791066905866e-05, "loss": 0.603, "step": 3658 }, { "epoch": 0.7521841915921472, "grad_norm": 0.19841930270195007, "learning_rate": 7.898136368333363e-05, "loss": 0.6507, "step": 3659 }, { "epoch": 0.7523897625655257, "grad_norm": 0.20042434334754944, "learning_rate": 7.897481502350565e-05, "loss": 0.6522, "step": 3660 }, { "epoch": 0.7525953335389043, "grad_norm": 0.2082412987947464, "learning_rate": 7.896826468989731e-05, "loss": 0.682, "step": 3661 }, { "epoch": 0.7528009045122829, "grad_norm": 0.2017931491136551, "learning_rate": 7.896171268283136e-05, "loss": 0.6729, "step": 3662 }, { "epoch": 0.7530064754856615, "grad_norm": 0.1931910514831543, "learning_rate": 7.895515900263055e-05, "loss": 0.6525, "step": 3663 }, { "epoch": 0.7532120464590399, "grad_norm": 0.21447621285915375, "learning_rate": 7.894860364961778e-05, "loss": 0.689, "step": 3664 }, { "epoch": 0.7534176174324185, "grad_norm": 0.20270651578903198, "learning_rate": 7.894204662411595e-05, "loss": 0.6926, "step": 3665 }, { "epoch": 0.7536231884057971, "grad_norm": 0.1878805160522461, "learning_rate": 7.893548792644815e-05, "loss": 0.6721, "step": 3666 }, { "epoch": 0.7538287593791757, "grad_norm": 0.19181132316589355, "learning_rate": 7.892892755693747e-05, "loss": 0.6734, "step": 3667 }, { "epoch": 0.7540343303525542, "grad_norm": 0.19380466639995575, "learning_rate": 7.892236551590712e-05, "loss": 0.6621, "step": 3668 }, { "epoch": 0.7542399013259328, "grad_norm": 0.20492911338806152, "learning_rate": 7.891580180368036e-05, "loss": 0.6827, "step": 3669 }, { "epoch": 0.7544454722993114, "grad_norm": 0.18449199199676514, "learning_rate": 7.890923642058058e-05, "loss": 0.6666, "step": 3670 }, { "epoch": 0.75465104327269, "grad_norm": 0.18999159336090088, "learning_rate": 7.890266936693121e-05, "loss": 0.6498, "step": 3671 }, { "epoch": 0.7548566142460684, "grad_norm": 0.19277434051036835, "learning_rate": 7.889610064305578e-05, "loss": 0.6759, "step": 3672 }, { "epoch": 0.755062185219447, "grad_norm": 0.1884971410036087, "learning_rate": 7.888953024927789e-05, "loss": 0.6745, "step": 3673 }, { "epoch": 0.7552677561928256, "grad_norm": 0.19598397612571716, "learning_rate": 7.888295818592125e-05, "loss": 0.6803, "step": 3674 }, { "epoch": 0.7554733271662041, "grad_norm": 0.19982978701591492, "learning_rate": 7.887638445330962e-05, "loss": 0.6736, "step": 3675 }, { "epoch": 0.7556788981395827, "grad_norm": 0.19140852987766266, "learning_rate": 7.886980905176689e-05, "loss": 0.6659, "step": 3676 }, { "epoch": 0.7558844691129613, "grad_norm": 0.18775241076946259, "learning_rate": 7.886323198161695e-05, "loss": 0.67, "step": 3677 }, { "epoch": 0.7560900400863398, "grad_norm": 0.1859831064939499, "learning_rate": 7.885665324318386e-05, "loss": 0.6554, "step": 3678 }, { "epoch": 0.7562956110597183, "grad_norm": 0.19015206396579742, "learning_rate": 7.885007283679173e-05, "loss": 0.7039, "step": 3679 }, { "epoch": 0.7565011820330969, "grad_norm": 0.19563472270965576, "learning_rate": 7.884349076276469e-05, "loss": 0.6769, "step": 3680 }, { "epoch": 0.7567067530064755, "grad_norm": 0.2165932059288025, "learning_rate": 7.883690702142706e-05, "loss": 0.5897, "step": 3681 }, { "epoch": 0.7569123239798541, "grad_norm": 0.19110572338104248, "learning_rate": 7.883032161310318e-05, "loss": 0.6666, "step": 3682 }, { "epoch": 0.7571178949532326, "grad_norm": 0.2043447345495224, "learning_rate": 7.882373453811745e-05, "loss": 0.6633, "step": 3683 }, { "epoch": 0.7573234659266112, "grad_norm": 0.19598691165447235, "learning_rate": 7.881714579679444e-05, "loss": 0.6601, "step": 3684 }, { "epoch": 0.7575290368999897, "grad_norm": 0.16248776018619537, "learning_rate": 7.88105553894587e-05, "loss": 0.585, "step": 3685 }, { "epoch": 0.7577346078733683, "grad_norm": 0.1903761625289917, "learning_rate": 7.880396331643496e-05, "loss": 0.6702, "step": 3686 }, { "epoch": 0.7579401788467468, "grad_norm": 0.19729363918304443, "learning_rate": 7.87973695780479e-05, "loss": 0.6762, "step": 3687 }, { "epoch": 0.7581457498201254, "grad_norm": 0.20168879628181458, "learning_rate": 7.879077417462244e-05, "loss": 0.7108, "step": 3688 }, { "epoch": 0.758351320793504, "grad_norm": 0.18572981655597687, "learning_rate": 7.878417710648346e-05, "loss": 0.6516, "step": 3689 }, { "epoch": 0.7585568917668826, "grad_norm": 0.18781378865242004, "learning_rate": 7.8777578373956e-05, "loss": 0.6767, "step": 3690 }, { "epoch": 0.758762462740261, "grad_norm": 0.1998245269060135, "learning_rate": 7.877097797736511e-05, "loss": 0.6723, "step": 3691 }, { "epoch": 0.7589680337136396, "grad_norm": 0.22822120785713196, "learning_rate": 7.876437591703598e-05, "loss": 0.668, "step": 3692 }, { "epoch": 0.7591736046870182, "grad_norm": 0.19273287057876587, "learning_rate": 7.875777219329386e-05, "loss": 0.6699, "step": 3693 }, { "epoch": 0.7593791756603967, "grad_norm": 0.2089652717113495, "learning_rate": 7.875116680646411e-05, "loss": 0.6664, "step": 3694 }, { "epoch": 0.7595847466337753, "grad_norm": 0.1920463740825653, "learning_rate": 7.87445597568721e-05, "loss": 0.6731, "step": 3695 }, { "epoch": 0.7597903176071539, "grad_norm": 0.19104163348674774, "learning_rate": 7.873795104484337e-05, "loss": 0.6813, "step": 3696 }, { "epoch": 0.7599958885805325, "grad_norm": 0.15439750254154205, "learning_rate": 7.873134067070347e-05, "loss": 0.56, "step": 3697 }, { "epoch": 0.7602014595539109, "grad_norm": 0.19592773914337158, "learning_rate": 7.872472863477808e-05, "loss": 0.6858, "step": 3698 }, { "epoch": 0.7604070305272895, "grad_norm": 0.19534648954868317, "learning_rate": 7.871811493739294e-05, "loss": 0.681, "step": 3699 }, { "epoch": 0.7606126015006681, "grad_norm": 0.13310682773590088, "learning_rate": 7.871149957887387e-05, "loss": 0.5885, "step": 3700 }, { "epoch": 0.7608181724740467, "grad_norm": 0.19378095865249634, "learning_rate": 7.870488255954679e-05, "loss": 0.667, "step": 3701 }, { "epoch": 0.7610237434474252, "grad_norm": 0.19437304139137268, "learning_rate": 7.869826387973768e-05, "loss": 0.6729, "step": 3702 }, { "epoch": 0.7612293144208038, "grad_norm": 0.19552649557590485, "learning_rate": 7.869164353977261e-05, "loss": 0.668, "step": 3703 }, { "epoch": 0.7614348853941824, "grad_norm": 0.15091755986213684, "learning_rate": 7.868502153997774e-05, "loss": 0.5726, "step": 3704 }, { "epoch": 0.761640456367561, "grad_norm": 0.2120988517999649, "learning_rate": 7.867839788067931e-05, "loss": 0.69, "step": 3705 }, { "epoch": 0.7618460273409394, "grad_norm": 0.1858333796262741, "learning_rate": 7.867177256220362e-05, "loss": 0.677, "step": 3706 }, { "epoch": 0.762051598314318, "grad_norm": 0.1518946886062622, "learning_rate": 7.866514558487709e-05, "loss": 0.5866, "step": 3707 }, { "epoch": 0.7622571692876966, "grad_norm": 0.20156964659690857, "learning_rate": 7.865851694902617e-05, "loss": 0.6694, "step": 3708 }, { "epoch": 0.7624627402610752, "grad_norm": 0.19284150004386902, "learning_rate": 7.865188665497744e-05, "loss": 0.6577, "step": 3709 }, { "epoch": 0.7626683112344537, "grad_norm": 0.13599884510040283, "learning_rate": 7.864525470305756e-05, "loss": 0.5647, "step": 3710 }, { "epoch": 0.7628738822078323, "grad_norm": 0.20330367982387543, "learning_rate": 7.863862109359322e-05, "loss": 0.6663, "step": 3711 }, { "epoch": 0.7630794531812108, "grad_norm": 0.1969096064567566, "learning_rate": 7.863198582691125e-05, "loss": 0.6966, "step": 3712 }, { "epoch": 0.7632850241545893, "grad_norm": 0.20115163922309875, "learning_rate": 7.862534890333854e-05, "loss": 0.7011, "step": 3713 }, { "epoch": 0.7634905951279679, "grad_norm": 0.20134492218494415, "learning_rate": 7.861871032320206e-05, "loss": 0.6588, "step": 3714 }, { "epoch": 0.7636961661013465, "grad_norm": 0.18914572894573212, "learning_rate": 7.861207008682884e-05, "loss": 0.6581, "step": 3715 }, { "epoch": 0.7639017370747251, "grad_norm": 0.430144339799881, "learning_rate": 7.860542819454603e-05, "loss": 0.6026, "step": 3716 }, { "epoch": 0.7641073080481036, "grad_norm": 0.18655115365982056, "learning_rate": 7.859878464668086e-05, "loss": 0.6869, "step": 3717 }, { "epoch": 0.7643128790214821, "grad_norm": 0.19397111237049103, "learning_rate": 7.85921394435606e-05, "loss": 0.6888, "step": 3718 }, { "epoch": 0.7645184499948607, "grad_norm": 0.18396249413490295, "learning_rate": 7.858549258551263e-05, "loss": 0.6527, "step": 3719 }, { "epoch": 0.7647240209682393, "grad_norm": 0.17971353232860565, "learning_rate": 7.857884407286442e-05, "loss": 0.6879, "step": 3720 }, { "epoch": 0.7649295919416178, "grad_norm": 0.1879139393568039, "learning_rate": 7.857219390594353e-05, "loss": 0.6821, "step": 3721 }, { "epoch": 0.7651351629149964, "grad_norm": 0.1858903020620346, "learning_rate": 7.856554208507755e-05, "loss": 0.6818, "step": 3722 }, { "epoch": 0.765340733888375, "grad_norm": 0.1843085139989853, "learning_rate": 7.85588886105942e-05, "loss": 0.6661, "step": 3723 }, { "epoch": 0.7655463048617536, "grad_norm": 0.18377020955085754, "learning_rate": 7.855223348282126e-05, "loss": 0.6742, "step": 3724 }, { "epoch": 0.765751875835132, "grad_norm": 0.1833381950855255, "learning_rate": 7.854557670208659e-05, "loss": 0.6676, "step": 3725 }, { "epoch": 0.7659574468085106, "grad_norm": 0.19020181894302368, "learning_rate": 7.853891826871816e-05, "loss": 0.6742, "step": 3726 }, { "epoch": 0.7661630177818892, "grad_norm": 0.18213771283626556, "learning_rate": 7.853225818304398e-05, "loss": 0.5946, "step": 3727 }, { "epoch": 0.7663685887552678, "grad_norm": 0.20896635949611664, "learning_rate": 7.852559644539216e-05, "loss": 0.6719, "step": 3728 }, { "epoch": 0.7665741597286463, "grad_norm": 0.19129472970962524, "learning_rate": 7.851893305609091e-05, "loss": 0.6838, "step": 3729 }, { "epoch": 0.7667797307020249, "grad_norm": 0.18608838319778442, "learning_rate": 7.85122680154685e-05, "loss": 0.6702, "step": 3730 }, { "epoch": 0.7669853016754035, "grad_norm": 0.13603243231773376, "learning_rate": 7.85056013238533e-05, "loss": 0.5653, "step": 3731 }, { "epoch": 0.7671908726487819, "grad_norm": 0.1969052106142044, "learning_rate": 7.849893298157369e-05, "loss": 0.6705, "step": 3732 }, { "epoch": 0.7673964436221605, "grad_norm": 0.19232457876205444, "learning_rate": 7.849226298895824e-05, "loss": 0.6542, "step": 3733 }, { "epoch": 0.7676020145955391, "grad_norm": 0.18796077370643616, "learning_rate": 7.848559134633555e-05, "loss": 0.6682, "step": 3734 }, { "epoch": 0.7678075855689177, "grad_norm": 0.19674451649188995, "learning_rate": 7.847891805403426e-05, "loss": 0.6574, "step": 3735 }, { "epoch": 0.7680131565422962, "grad_norm": 0.19735072553157806, "learning_rate": 7.847224311238316e-05, "loss": 0.6637, "step": 3736 }, { "epoch": 0.7682187275156748, "grad_norm": 0.22023150324821472, "learning_rate": 7.846556652171112e-05, "loss": 0.6634, "step": 3737 }, { "epoch": 0.7684242984890534, "grad_norm": 0.18101370334625244, "learning_rate": 7.845888828234701e-05, "loss": 0.6424, "step": 3738 }, { "epoch": 0.768629869462432, "grad_norm": 0.18563824892044067, "learning_rate": 7.845220839461987e-05, "loss": 0.6618, "step": 3739 }, { "epoch": 0.7688354404358104, "grad_norm": 0.18954195082187653, "learning_rate": 7.844552685885877e-05, "loss": 0.6885, "step": 3740 }, { "epoch": 0.769041011409189, "grad_norm": 0.14499548077583313, "learning_rate": 7.843884367539289e-05, "loss": 0.6127, "step": 3741 }, { "epoch": 0.7692465823825676, "grad_norm": 0.20436535775661469, "learning_rate": 7.843215884455147e-05, "loss": 0.6805, "step": 3742 }, { "epoch": 0.7694521533559462, "grad_norm": 0.20969851315021515, "learning_rate": 7.842547236666386e-05, "loss": 0.6548, "step": 3743 }, { "epoch": 0.7696577243293247, "grad_norm": 0.19497977197170258, "learning_rate": 7.841878424205944e-05, "loss": 0.7104, "step": 3744 }, { "epoch": 0.7698632953027033, "grad_norm": 0.1905307173728943, "learning_rate": 7.841209447106772e-05, "loss": 0.6676, "step": 3745 }, { "epoch": 0.7700688662760818, "grad_norm": 0.1859470009803772, "learning_rate": 7.840540305401828e-05, "loss": 0.6712, "step": 3746 }, { "epoch": 0.7702744372494604, "grad_norm": 0.19429220259189606, "learning_rate": 7.839870999124077e-05, "loss": 0.6763, "step": 3747 }, { "epoch": 0.7704800082228389, "grad_norm": 0.188473641872406, "learning_rate": 7.839201528306492e-05, "loss": 0.6856, "step": 3748 }, { "epoch": 0.7706855791962175, "grad_norm": 0.19540703296661377, "learning_rate": 7.838531892982057e-05, "loss": 0.6616, "step": 3749 }, { "epoch": 0.7708911501695961, "grad_norm": 0.1938808113336563, "learning_rate": 7.837862093183758e-05, "loss": 0.6553, "step": 3750 }, { "epoch": 0.7710967211429746, "grad_norm": 0.1836869865655899, "learning_rate": 7.837192128944594e-05, "loss": 0.6768, "step": 3751 }, { "epoch": 0.7713022921163531, "grad_norm": 0.1519763171672821, "learning_rate": 7.836522000297572e-05, "loss": 0.6059, "step": 3752 }, { "epoch": 0.7715078630897317, "grad_norm": 0.19223132729530334, "learning_rate": 7.835851707275707e-05, "loss": 0.7093, "step": 3753 }, { "epoch": 0.7717134340631103, "grad_norm": 0.19785994291305542, "learning_rate": 7.83518124991202e-05, "loss": 0.6557, "step": 3754 }, { "epoch": 0.7719190050364888, "grad_norm": 0.18960314989089966, "learning_rate": 7.834510628239541e-05, "loss": 0.6495, "step": 3755 }, { "epoch": 0.7721245760098674, "grad_norm": 0.1869727522134781, "learning_rate": 7.833839842291309e-05, "loss": 0.6561, "step": 3756 }, { "epoch": 0.772330146983246, "grad_norm": 0.19522154331207275, "learning_rate": 7.83316889210037e-05, "loss": 0.6781, "step": 3757 }, { "epoch": 0.7725357179566246, "grad_norm": 0.19209223985671997, "learning_rate": 7.832497777699779e-05, "loss": 0.6598, "step": 3758 }, { "epoch": 0.772741288930003, "grad_norm": 0.19709967076778412, "learning_rate": 7.831826499122599e-05, "loss": 0.6977, "step": 3759 }, { "epoch": 0.7729468599033816, "grad_norm": 0.19524455070495605, "learning_rate": 7.8311550564019e-05, "loss": 0.6701, "step": 3760 }, { "epoch": 0.7731524308767602, "grad_norm": 0.19056567549705505, "learning_rate": 7.830483449570762e-05, "loss": 0.652, "step": 3761 }, { "epoch": 0.7733580018501388, "grad_norm": 0.2009115368127823, "learning_rate": 7.829811678662269e-05, "loss": 0.6796, "step": 3762 }, { "epoch": 0.7735635728235173, "grad_norm": 0.1854369342327118, "learning_rate": 7.829139743709518e-05, "loss": 0.6959, "step": 3763 }, { "epoch": 0.7737691437968959, "grad_norm": 0.19334383308887482, "learning_rate": 7.828467644745614e-05, "loss": 0.6803, "step": 3764 }, { "epoch": 0.7739747147702745, "grad_norm": 0.1896241158246994, "learning_rate": 7.827795381803666e-05, "loss": 0.6589, "step": 3765 }, { "epoch": 0.774180285743653, "grad_norm": 0.19462954998016357, "learning_rate": 7.827122954916793e-05, "loss": 0.6884, "step": 3766 }, { "epoch": 0.7743858567170315, "grad_norm": 0.15615877509117126, "learning_rate": 7.826450364118124e-05, "loss": 0.5868, "step": 3767 }, { "epoch": 0.7745914276904101, "grad_norm": 0.21053725481033325, "learning_rate": 7.825777609440793e-05, "loss": 0.6619, "step": 3768 }, { "epoch": 0.7747969986637887, "grad_norm": 0.1837691068649292, "learning_rate": 7.825104690917943e-05, "loss": 0.68, "step": 3769 }, { "epoch": 0.7750025696371672, "grad_norm": 0.18419477343559265, "learning_rate": 7.824431608582728e-05, "loss": 0.6629, "step": 3770 }, { "epoch": 0.7752081406105458, "grad_norm": 0.19641302525997162, "learning_rate": 7.823758362468305e-05, "loss": 0.6919, "step": 3771 }, { "epoch": 0.7754137115839244, "grad_norm": 0.14012254774570465, "learning_rate": 7.823084952607842e-05, "loss": 0.5845, "step": 3772 }, { "epoch": 0.775619282557303, "grad_norm": 0.13224144279956818, "learning_rate": 7.822411379034516e-05, "loss": 0.5851, "step": 3773 }, { "epoch": 0.7758248535306814, "grad_norm": 0.20598402619361877, "learning_rate": 7.82173764178151e-05, "loss": 0.6987, "step": 3774 }, { "epoch": 0.77603042450406, "grad_norm": 0.19516415894031525, "learning_rate": 7.821063740882017e-05, "loss": 0.681, "step": 3775 }, { "epoch": 0.7762359954774386, "grad_norm": 0.192254900932312, "learning_rate": 7.820389676369237e-05, "loss": 0.6647, "step": 3776 }, { "epoch": 0.7764415664508172, "grad_norm": 0.21489369869232178, "learning_rate": 7.819715448276374e-05, "loss": 0.6804, "step": 3777 }, { "epoch": 0.7766471374241957, "grad_norm": 0.18683873116970062, "learning_rate": 7.81904105663665e-05, "loss": 0.6766, "step": 3778 }, { "epoch": 0.7768527083975743, "grad_norm": 0.19451092183589935, "learning_rate": 7.818366501483285e-05, "loss": 0.6689, "step": 3779 }, { "epoch": 0.7770582793709528, "grad_norm": 0.16607536375522614, "learning_rate": 7.817691782849512e-05, "loss": 0.6039, "step": 3780 }, { "epoch": 0.7772638503443314, "grad_norm": 0.20235170423984528, "learning_rate": 7.817016900768573e-05, "loss": 0.6846, "step": 3781 }, { "epoch": 0.7774694213177099, "grad_norm": 0.1997910737991333, "learning_rate": 7.816341855273715e-05, "loss": 0.665, "step": 3782 }, { "epoch": 0.7776749922910885, "grad_norm": 0.19691520929336548, "learning_rate": 7.815666646398193e-05, "loss": 0.6791, "step": 3783 }, { "epoch": 0.7778805632644671, "grad_norm": 0.14885997772216797, "learning_rate": 7.814991274175273e-05, "loss": 0.6101, "step": 3784 }, { "epoch": 0.7780861342378456, "grad_norm": 0.19798895716667175, "learning_rate": 7.814315738638227e-05, "loss": 0.6652, "step": 3785 }, { "epoch": 0.7782917052112241, "grad_norm": 0.13677549362182617, "learning_rate": 7.813640039820337e-05, "loss": 0.583, "step": 3786 }, { "epoch": 0.7784972761846027, "grad_norm": 0.19505973160266876, "learning_rate": 7.81296417775489e-05, "loss": 0.7306, "step": 3787 }, { "epoch": 0.7787028471579813, "grad_norm": 0.18989427387714386, "learning_rate": 7.812288152475182e-05, "loss": 0.6883, "step": 3788 }, { "epoch": 0.7789084181313598, "grad_norm": 0.18871872127056122, "learning_rate": 7.811611964014518e-05, "loss": 0.6781, "step": 3789 }, { "epoch": 0.7791139891047384, "grad_norm": 0.19525344669818878, "learning_rate": 7.81093561240621e-05, "loss": 0.657, "step": 3790 }, { "epoch": 0.779319560078117, "grad_norm": 0.1633206307888031, "learning_rate": 7.810259097683582e-05, "loss": 0.5749, "step": 3791 }, { "epoch": 0.7795251310514956, "grad_norm": 0.19155313074588776, "learning_rate": 7.80958241987996e-05, "loss": 0.6782, "step": 3792 }, { "epoch": 0.779730702024874, "grad_norm": 0.18953226506710052, "learning_rate": 7.80890557902868e-05, "loss": 0.668, "step": 3793 }, { "epoch": 0.7799362729982526, "grad_norm": 0.19336241483688354, "learning_rate": 7.808228575163088e-05, "loss": 0.6523, "step": 3794 }, { "epoch": 0.7801418439716312, "grad_norm": 0.18969465792179108, "learning_rate": 7.807551408316537e-05, "loss": 0.6893, "step": 3795 }, { "epoch": 0.7803474149450098, "grad_norm": 0.19042238593101501, "learning_rate": 7.806874078522388e-05, "loss": 0.64, "step": 3796 }, { "epoch": 0.7805529859183883, "grad_norm": 0.1883266568183899, "learning_rate": 7.80619658581401e-05, "loss": 0.6471, "step": 3797 }, { "epoch": 0.7807585568917669, "grad_norm": 0.1871403008699417, "learning_rate": 7.805518930224777e-05, "loss": 0.6642, "step": 3798 }, { "epoch": 0.7809641278651455, "grad_norm": 0.1827799677848816, "learning_rate": 7.804841111788078e-05, "loss": 0.677, "step": 3799 }, { "epoch": 0.781169698838524, "grad_norm": 0.18511800467967987, "learning_rate": 7.804163130537304e-05, "loss": 0.6586, "step": 3800 }, { "epoch": 0.7813752698119025, "grad_norm": 0.1907230168581009, "learning_rate": 7.803484986505855e-05, "loss": 0.6573, "step": 3801 }, { "epoch": 0.7815808407852811, "grad_norm": 0.18352137506008148, "learning_rate": 7.802806679727144e-05, "loss": 0.6952, "step": 3802 }, { "epoch": 0.7817864117586597, "grad_norm": 0.18589456379413605, "learning_rate": 7.802128210234583e-05, "loss": 0.6877, "step": 3803 }, { "epoch": 0.7819919827320382, "grad_norm": 0.19165122509002686, "learning_rate": 7.8014495780616e-05, "loss": 0.6721, "step": 3804 }, { "epoch": 0.7821975537054168, "grad_norm": 0.18092942237854004, "learning_rate": 7.800770783241627e-05, "loss": 0.6472, "step": 3805 }, { "epoch": 0.7824031246787954, "grad_norm": 0.1938347965478897, "learning_rate": 7.800091825808104e-05, "loss": 0.6875, "step": 3806 }, { "epoch": 0.782608695652174, "grad_norm": 0.18910136818885803, "learning_rate": 7.799412705794484e-05, "loss": 0.6634, "step": 3807 }, { "epoch": 0.7828142666255524, "grad_norm": 0.18492446839809418, "learning_rate": 7.798733423234219e-05, "loss": 0.6772, "step": 3808 }, { "epoch": 0.783019837598931, "grad_norm": 0.18603304028511047, "learning_rate": 7.798053978160777e-05, "loss": 0.6888, "step": 3809 }, { "epoch": 0.7832254085723096, "grad_norm": 0.1817874163389206, "learning_rate": 7.797374370607632e-05, "loss": 0.6675, "step": 3810 }, { "epoch": 0.7834309795456882, "grad_norm": 0.1888546198606491, "learning_rate": 7.796694600608261e-05, "loss": 0.6472, "step": 3811 }, { "epoch": 0.7836365505190667, "grad_norm": 0.18347470462322235, "learning_rate": 7.796014668196159e-05, "loss": 0.6368, "step": 3812 }, { "epoch": 0.7838421214924453, "grad_norm": 0.18692941963672638, "learning_rate": 7.795334573404817e-05, "loss": 0.6637, "step": 3813 }, { "epoch": 0.7840476924658238, "grad_norm": 0.18573735654354095, "learning_rate": 7.794654316267745e-05, "loss": 0.6716, "step": 3814 }, { "epoch": 0.7842532634392024, "grad_norm": 0.1885242462158203, "learning_rate": 7.793973896818452e-05, "loss": 0.6957, "step": 3815 }, { "epoch": 0.7844588344125809, "grad_norm": 0.19421452283859253, "learning_rate": 7.793293315090462e-05, "loss": 0.6977, "step": 3816 }, { "epoch": 0.7846644053859595, "grad_norm": 0.18501219153404236, "learning_rate": 7.792612571117304e-05, "loss": 0.676, "step": 3817 }, { "epoch": 0.7848699763593381, "grad_norm": 0.18256261944770813, "learning_rate": 7.791931664932514e-05, "loss": 0.6637, "step": 3818 }, { "epoch": 0.7850755473327167, "grad_norm": 0.16926661133766174, "learning_rate": 7.791250596569636e-05, "loss": 0.5883, "step": 3819 }, { "epoch": 0.7852811183060951, "grad_norm": 0.19965988397598267, "learning_rate": 7.790569366062226e-05, "loss": 0.6873, "step": 3820 }, { "epoch": 0.7854866892794737, "grad_norm": 0.19432468712329865, "learning_rate": 7.789887973443842e-05, "loss": 0.6727, "step": 3821 }, { "epoch": 0.7856922602528523, "grad_norm": 1.5224770307540894, "learning_rate": 7.789206418748055e-05, "loss": 0.6645, "step": 3822 }, { "epoch": 0.7858978312262308, "grad_norm": 0.25981712341308594, "learning_rate": 7.788524702008442e-05, "loss": 0.6693, "step": 3823 }, { "epoch": 0.7861034021996094, "grad_norm": 0.17504632472991943, "learning_rate": 7.787842823258587e-05, "loss": 0.6081, "step": 3824 }, { "epoch": 0.786308973172988, "grad_norm": 0.20936280488967896, "learning_rate": 7.787160782532084e-05, "loss": 0.6833, "step": 3825 }, { "epoch": 0.7865145441463666, "grad_norm": 0.2347778081893921, "learning_rate": 7.786478579862532e-05, "loss": 0.6824, "step": 3826 }, { "epoch": 0.786720115119745, "grad_norm": 0.19294393062591553, "learning_rate": 7.785796215283543e-05, "loss": 0.6811, "step": 3827 }, { "epoch": 0.7869256860931236, "grad_norm": 0.253738671541214, "learning_rate": 7.785113688828731e-05, "loss": 0.6015, "step": 3828 }, { "epoch": 0.7871312570665022, "grad_norm": 0.22543035447597504, "learning_rate": 7.784431000531722e-05, "loss": 0.6593, "step": 3829 }, { "epoch": 0.7873368280398808, "grad_norm": 0.19480814039707184, "learning_rate": 7.78374815042615e-05, "loss": 0.6131, "step": 3830 }, { "epoch": 0.7875423990132593, "grad_norm": 0.2131412923336029, "learning_rate": 7.783065138545655e-05, "loss": 0.6982, "step": 3831 }, { "epoch": 0.7877479699866379, "grad_norm": 0.20891313254833221, "learning_rate": 7.782381964923885e-05, "loss": 0.6981, "step": 3832 }, { "epoch": 0.7879535409600165, "grad_norm": 0.15176214277744293, "learning_rate": 7.781698629594498e-05, "loss": 0.5964, "step": 3833 }, { "epoch": 0.788159111933395, "grad_norm": 0.19954368472099304, "learning_rate": 7.781015132591156e-05, "loss": 0.681, "step": 3834 }, { "epoch": 0.7883646829067735, "grad_norm": 0.19388937950134277, "learning_rate": 7.780331473947537e-05, "loss": 0.6776, "step": 3835 }, { "epoch": 0.7885702538801521, "grad_norm": 0.19515137374401093, "learning_rate": 7.779647653697317e-05, "loss": 0.7054, "step": 3836 }, { "epoch": 0.7887758248535307, "grad_norm": 0.15485966205596924, "learning_rate": 7.778963671874186e-05, "loss": 0.5838, "step": 3837 }, { "epoch": 0.7889813958269093, "grad_norm": 0.2033955603837967, "learning_rate": 7.778279528511841e-05, "loss": 0.6831, "step": 3838 }, { "epoch": 0.7891869668002878, "grad_norm": 0.14127175509929657, "learning_rate": 7.777595223643985e-05, "loss": 0.5782, "step": 3839 }, { "epoch": 0.7893925377736664, "grad_norm": 0.19278831779956818, "learning_rate": 7.776910757304333e-05, "loss": 0.6604, "step": 3840 }, { "epoch": 0.789598108747045, "grad_norm": 0.19700968265533447, "learning_rate": 7.776226129526606e-05, "loss": 0.6487, "step": 3841 }, { "epoch": 0.7898036797204234, "grad_norm": 0.20007772743701935, "learning_rate": 7.775541340344528e-05, "loss": 0.7053, "step": 3842 }, { "epoch": 0.790009250693802, "grad_norm": 0.1945502907037735, "learning_rate": 7.774856389791838e-05, "loss": 0.6633, "step": 3843 }, { "epoch": 0.7902148216671806, "grad_norm": 0.18347761034965515, "learning_rate": 7.774171277902282e-05, "loss": 0.6509, "step": 3844 }, { "epoch": 0.7904203926405592, "grad_norm": 0.1927865594625473, "learning_rate": 7.773486004709608e-05, "loss": 0.6873, "step": 3845 }, { "epoch": 0.7906259636139377, "grad_norm": 0.1933821141719818, "learning_rate": 7.772800570247582e-05, "loss": 0.6784, "step": 3846 }, { "epoch": 0.7908315345873163, "grad_norm": 0.1437695473432541, "learning_rate": 7.772114974549966e-05, "loss": 0.5979, "step": 3847 }, { "epoch": 0.7910371055606948, "grad_norm": 0.20420506596565247, "learning_rate": 7.77142921765054e-05, "loss": 0.7083, "step": 3848 }, { "epoch": 0.7912426765340734, "grad_norm": 0.13508614897727966, "learning_rate": 7.770743299583089e-05, "loss": 0.5824, "step": 3849 }, { "epoch": 0.7914482475074519, "grad_norm": 0.1953742653131485, "learning_rate": 7.770057220381401e-05, "loss": 0.6655, "step": 3850 }, { "epoch": 0.7916538184808305, "grad_norm": 0.192901611328125, "learning_rate": 7.769370980079277e-05, "loss": 0.6922, "step": 3851 }, { "epoch": 0.7918593894542091, "grad_norm": 0.19612765312194824, "learning_rate": 7.768684578710528e-05, "loss": 0.6687, "step": 3852 }, { "epoch": 0.7920649604275877, "grad_norm": 0.19205497205257416, "learning_rate": 7.767998016308968e-05, "loss": 0.6837, "step": 3853 }, { "epoch": 0.7922705314009661, "grad_norm": 0.15582695603370667, "learning_rate": 7.767311292908419e-05, "loss": 0.5945, "step": 3854 }, { "epoch": 0.7924761023743447, "grad_norm": 0.18942193686962128, "learning_rate": 7.766624408542713e-05, "loss": 0.652, "step": 3855 }, { "epoch": 0.7926816733477233, "grad_norm": 0.19103151559829712, "learning_rate": 7.765937363245692e-05, "loss": 0.6518, "step": 3856 }, { "epoch": 0.7928872443211019, "grad_norm": 0.18634134531021118, "learning_rate": 7.765250157051202e-05, "loss": 0.6556, "step": 3857 }, { "epoch": 0.7930928152944804, "grad_norm": 0.1883394718170166, "learning_rate": 7.764562789993099e-05, "loss": 0.6736, "step": 3858 }, { "epoch": 0.793298386267859, "grad_norm": 0.18593887984752655, "learning_rate": 7.763875262105245e-05, "loss": 0.652, "step": 3859 }, { "epoch": 0.7935039572412376, "grad_norm": 0.2020663321018219, "learning_rate": 7.763187573421511e-05, "loss": 0.6447, "step": 3860 }, { "epoch": 0.793709528214616, "grad_norm": 0.18651576340198517, "learning_rate": 7.76249972397578e-05, "loss": 0.6746, "step": 3861 }, { "epoch": 0.7939150991879946, "grad_norm": 0.19070084393024445, "learning_rate": 7.761811713801935e-05, "loss": 0.6866, "step": 3862 }, { "epoch": 0.7941206701613732, "grad_norm": 0.18511120975017548, "learning_rate": 7.761123542933872e-05, "loss": 0.6491, "step": 3863 }, { "epoch": 0.7943262411347518, "grad_norm": 0.18863095343112946, "learning_rate": 7.760435211405495e-05, "loss": 0.672, "step": 3864 }, { "epoch": 0.7945318121081303, "grad_norm": 0.19631804525852203, "learning_rate": 7.759746719250714e-05, "loss": 0.6509, "step": 3865 }, { "epoch": 0.7947373830815089, "grad_norm": 0.17893162369728088, "learning_rate": 7.75905806650345e-05, "loss": 0.6707, "step": 3866 }, { "epoch": 0.7949429540548875, "grad_norm": 0.18233318626880646, "learning_rate": 7.758369253197626e-05, "loss": 0.657, "step": 3867 }, { "epoch": 0.795148525028266, "grad_norm": 0.19054913520812988, "learning_rate": 7.757680279367178e-05, "loss": 0.6796, "step": 3868 }, { "epoch": 0.7953540960016445, "grad_norm": 0.20700985193252563, "learning_rate": 7.75699114504605e-05, "loss": 0.6672, "step": 3869 }, { "epoch": 0.7955596669750231, "grad_norm": 0.1838599294424057, "learning_rate": 7.756301850268193e-05, "loss": 0.6721, "step": 3870 }, { "epoch": 0.7957652379484017, "grad_norm": 0.1944621503353119, "learning_rate": 7.755612395067562e-05, "loss": 0.6751, "step": 3871 }, { "epoch": 0.7959708089217803, "grad_norm": 0.18728716671466827, "learning_rate": 7.754922779478125e-05, "loss": 0.6765, "step": 3872 }, { "epoch": 0.7961763798951588, "grad_norm": 0.18458257615566254, "learning_rate": 7.754233003533856e-05, "loss": 0.6609, "step": 3873 }, { "epoch": 0.7963819508685374, "grad_norm": 0.18987616896629333, "learning_rate": 7.753543067268737e-05, "loss": 0.647, "step": 3874 }, { "epoch": 0.796587521841916, "grad_norm": 0.19032716751098633, "learning_rate": 7.752852970716761e-05, "loss": 0.6514, "step": 3875 }, { "epoch": 0.7967930928152945, "grad_norm": 0.18918365240097046, "learning_rate": 7.752162713911918e-05, "loss": 0.6705, "step": 3876 }, { "epoch": 0.796998663788673, "grad_norm": 0.18836969137191772, "learning_rate": 7.751472296888222e-05, "loss": 0.6651, "step": 3877 }, { "epoch": 0.7972042347620516, "grad_norm": 0.18875330686569214, "learning_rate": 7.750781719679683e-05, "loss": 0.6864, "step": 3878 }, { "epoch": 0.7974098057354302, "grad_norm": 0.18728755414485931, "learning_rate": 7.750090982320321e-05, "loss": 0.6629, "step": 3879 }, { "epoch": 0.7976153767088087, "grad_norm": 0.1937887966632843, "learning_rate": 7.749400084844169e-05, "loss": 0.6673, "step": 3880 }, { "epoch": 0.7978209476821873, "grad_norm": 0.16451017558574677, "learning_rate": 7.748709027285261e-05, "loss": 0.5989, "step": 3881 }, { "epoch": 0.7980265186555658, "grad_norm": 0.1364785134792328, "learning_rate": 7.748017809677646e-05, "loss": 0.5949, "step": 3882 }, { "epoch": 0.7982320896289444, "grad_norm": 0.14087210595607758, "learning_rate": 7.747326432055372e-05, "loss": 0.5753, "step": 3883 }, { "epoch": 0.7984376606023229, "grad_norm": 0.20993009209632874, "learning_rate": 7.746634894452504e-05, "loss": 0.7021, "step": 3884 }, { "epoch": 0.7986432315757015, "grad_norm": 0.1940746009349823, "learning_rate": 7.74594319690311e-05, "loss": 0.6743, "step": 3885 }, { "epoch": 0.7988488025490801, "grad_norm": 0.1924261897802353, "learning_rate": 7.745251339441265e-05, "loss": 0.6795, "step": 3886 }, { "epoch": 0.7990543735224587, "grad_norm": 0.1905447542667389, "learning_rate": 7.744559322101056e-05, "loss": 0.6862, "step": 3887 }, { "epoch": 0.7992599444958371, "grad_norm": 0.18997174501419067, "learning_rate": 7.743867144916573e-05, "loss": 0.5848, "step": 3888 }, { "epoch": 0.7994655154692157, "grad_norm": 0.1488848179578781, "learning_rate": 7.743174807921919e-05, "loss": 0.5842, "step": 3889 }, { "epoch": 0.7996710864425943, "grad_norm": 0.14569362998008728, "learning_rate": 7.7424823111512e-05, "loss": 0.5866, "step": 3890 }, { "epoch": 0.7998766574159729, "grad_norm": 0.22627940773963928, "learning_rate": 7.741789654638532e-05, "loss": 0.6954, "step": 3891 }, { "epoch": 0.8000822283893514, "grad_norm": 0.18143914639949799, "learning_rate": 7.74109683841804e-05, "loss": 0.5874, "step": 3892 }, { "epoch": 0.80028779936273, "grad_norm": 0.1479119211435318, "learning_rate": 7.740403862523857e-05, "loss": 0.5729, "step": 3893 }, { "epoch": 0.8004933703361086, "grad_norm": 0.20130044221878052, "learning_rate": 7.73971072699012e-05, "loss": 0.6855, "step": 3894 }, { "epoch": 0.8006989413094872, "grad_norm": 0.19785720109939575, "learning_rate": 7.739017431850978e-05, "loss": 0.687, "step": 3895 }, { "epoch": 0.8009045122828656, "grad_norm": 0.20219095051288605, "learning_rate": 7.738323977140587e-05, "loss": 0.585, "step": 3896 }, { "epoch": 0.8011100832562442, "grad_norm": 0.1963326632976532, "learning_rate": 7.737630362893109e-05, "loss": 0.6628, "step": 3897 }, { "epoch": 0.8013156542296228, "grad_norm": 0.18930426239967346, "learning_rate": 7.736936589142717e-05, "loss": 0.6674, "step": 3898 }, { "epoch": 0.8015212252030013, "grad_norm": 0.18726347386837006, "learning_rate": 7.736242655923587e-05, "loss": 0.6837, "step": 3899 }, { "epoch": 0.8017267961763799, "grad_norm": 0.19241462647914886, "learning_rate": 7.735548563269907e-05, "loss": 0.6677, "step": 3900 }, { "epoch": 0.8019323671497585, "grad_norm": 0.1922820508480072, "learning_rate": 7.734854311215874e-05, "loss": 0.6865, "step": 3901 }, { "epoch": 0.802137938123137, "grad_norm": 0.19233377277851105, "learning_rate": 7.734159899795688e-05, "loss": 0.6813, "step": 3902 }, { "epoch": 0.8023435090965155, "grad_norm": 0.18713760375976562, "learning_rate": 7.73346532904356e-05, "loss": 0.6537, "step": 3903 }, { "epoch": 0.8025490800698941, "grad_norm": 0.19880633056163788, "learning_rate": 7.732770598993708e-05, "loss": 0.6728, "step": 3904 }, { "epoch": 0.8027546510432727, "grad_norm": 0.19050458073616028, "learning_rate": 7.73207570968036e-05, "loss": 0.6749, "step": 3905 }, { "epoch": 0.8029602220166513, "grad_norm": 0.1801813244819641, "learning_rate": 7.731380661137747e-05, "loss": 0.5939, "step": 3906 }, { "epoch": 0.8031657929900298, "grad_norm": 0.19383971393108368, "learning_rate": 7.730685453400113e-05, "loss": 0.6826, "step": 3907 }, { "epoch": 0.8033713639634084, "grad_norm": 0.20955929160118103, "learning_rate": 7.729990086501707e-05, "loss": 0.6954, "step": 3908 }, { "epoch": 0.803576934936787, "grad_norm": 0.19068995118141174, "learning_rate": 7.729294560476786e-05, "loss": 0.6686, "step": 3909 }, { "epoch": 0.8037825059101655, "grad_norm": 0.19245314598083496, "learning_rate": 7.728598875359615e-05, "loss": 0.6619, "step": 3910 }, { "epoch": 0.803988076883544, "grad_norm": 0.1979014128446579, "learning_rate": 7.727903031184469e-05, "loss": 0.6614, "step": 3911 }, { "epoch": 0.8041936478569226, "grad_norm": 0.1900876760482788, "learning_rate": 7.727207027985626e-05, "loss": 0.6486, "step": 3912 }, { "epoch": 0.8043992188303012, "grad_norm": 0.17994777858257294, "learning_rate": 7.726510865797379e-05, "loss": 0.6729, "step": 3913 }, { "epoch": 0.8046047898036797, "grad_norm": 0.18554867804050446, "learning_rate": 7.725814544654021e-05, "loss": 0.6541, "step": 3914 }, { "epoch": 0.8048103607770583, "grad_norm": 0.24200813472270966, "learning_rate": 7.725118064589859e-05, "loss": 0.6514, "step": 3915 }, { "epoch": 0.8050159317504368, "grad_norm": 0.18101008236408234, "learning_rate": 7.724421425639201e-05, "loss": 0.6382, "step": 3916 }, { "epoch": 0.8052215027238154, "grad_norm": 0.18432863056659698, "learning_rate": 7.723724627836374e-05, "loss": 0.64, "step": 3917 }, { "epoch": 0.8054270736971939, "grad_norm": 0.19102488458156586, "learning_rate": 7.7230276712157e-05, "loss": 0.7106, "step": 3918 }, { "epoch": 0.8056326446705725, "grad_norm": 0.16466036438941956, "learning_rate": 7.722330555811519e-05, "loss": 0.5831, "step": 3919 }, { "epoch": 0.8058382156439511, "grad_norm": 0.19325773417949677, "learning_rate": 7.721633281658171e-05, "loss": 0.6855, "step": 3920 }, { "epoch": 0.8060437866173297, "grad_norm": 0.1921764314174652, "learning_rate": 7.720935848790009e-05, "loss": 0.6858, "step": 3921 }, { "epoch": 0.8062493575907081, "grad_norm": 0.1909746527671814, "learning_rate": 7.720238257241394e-05, "loss": 0.6825, "step": 3922 }, { "epoch": 0.8064549285640867, "grad_norm": 0.18359649181365967, "learning_rate": 7.71954050704669e-05, "loss": 0.6807, "step": 3923 }, { "epoch": 0.8066604995374653, "grad_norm": 0.1895141303539276, "learning_rate": 7.718842598240273e-05, "loss": 0.7047, "step": 3924 }, { "epoch": 0.8068660705108439, "grad_norm": 0.18683840334415436, "learning_rate": 7.718144530856527e-05, "loss": 0.6704, "step": 3925 }, { "epoch": 0.8070716414842224, "grad_norm": 0.19502970576286316, "learning_rate": 7.717446304929841e-05, "loss": 0.6785, "step": 3926 }, { "epoch": 0.807277212457601, "grad_norm": 0.1623646318912506, "learning_rate": 7.716747920494615e-05, "loss": 0.5998, "step": 3927 }, { "epoch": 0.8074827834309796, "grad_norm": 0.13050900399684906, "learning_rate": 7.716049377585252e-05, "loss": 0.5749, "step": 3928 }, { "epoch": 0.8076883544043582, "grad_norm": 0.2015300691127777, "learning_rate": 7.715350676236169e-05, "loss": 0.6902, "step": 3929 }, { "epoch": 0.8078939253777366, "grad_norm": 0.19763372838497162, "learning_rate": 7.714651816481788e-05, "loss": 0.6666, "step": 3930 }, { "epoch": 0.8080994963511152, "grad_norm": 0.19438831508159637, "learning_rate": 7.713952798356535e-05, "loss": 0.6901, "step": 3931 }, { "epoch": 0.8083050673244938, "grad_norm": 0.1897808313369751, "learning_rate": 7.71325362189485e-05, "loss": 0.6652, "step": 3932 }, { "epoch": 0.8085106382978723, "grad_norm": 0.2024880349636078, "learning_rate": 7.712554287131179e-05, "loss": 0.6983, "step": 3933 }, { "epoch": 0.8087162092712509, "grad_norm": 0.21040861308574677, "learning_rate": 7.711854794099973e-05, "loss": 0.6676, "step": 3934 }, { "epoch": 0.8089217802446295, "grad_norm": 0.19779765605926514, "learning_rate": 7.711155142835693e-05, "loss": 0.6699, "step": 3935 }, { "epoch": 0.809127351218008, "grad_norm": 0.18733692169189453, "learning_rate": 7.710455333372809e-05, "loss": 0.6876, "step": 3936 }, { "epoch": 0.8093329221913865, "grad_norm": 0.18417513370513916, "learning_rate": 7.709755365745796e-05, "loss": 0.6592, "step": 3937 }, { "epoch": 0.8095384931647651, "grad_norm": 0.19497236609458923, "learning_rate": 7.709055239989138e-05, "loss": 0.6704, "step": 3938 }, { "epoch": 0.8097440641381437, "grad_norm": 0.19937434792518616, "learning_rate": 7.708354956137329e-05, "loss": 0.6672, "step": 3939 }, { "epoch": 0.8099496351115223, "grad_norm": 0.18484531342983246, "learning_rate": 7.707654514224865e-05, "loss": 0.639, "step": 3940 }, { "epoch": 0.8101552060849008, "grad_norm": 0.21879440546035767, "learning_rate": 7.706953914286256e-05, "loss": 0.5811, "step": 3941 }, { "epoch": 0.8103607770582794, "grad_norm": 0.19117337465286255, "learning_rate": 7.706253156356018e-05, "loss": 0.6602, "step": 3942 }, { "epoch": 0.810566348031658, "grad_norm": 0.20928023755550385, "learning_rate": 7.705552240468672e-05, "loss": 0.6755, "step": 3943 }, { "epoch": 0.8107719190050365, "grad_norm": 0.1899488866329193, "learning_rate": 7.70485116665875e-05, "loss": 0.6596, "step": 3944 }, { "epoch": 0.810977489978415, "grad_norm": 0.1829700917005539, "learning_rate": 7.70414993496079e-05, "loss": 0.6536, "step": 3945 }, { "epoch": 0.8111830609517936, "grad_norm": 0.2187718152999878, "learning_rate": 7.70344854540934e-05, "loss": 0.6712, "step": 3946 }, { "epoch": 0.8113886319251722, "grad_norm": 0.1931912750005722, "learning_rate": 7.702746998038952e-05, "loss": 0.6848, "step": 3947 }, { "epoch": 0.8115942028985508, "grad_norm": 0.1904575526714325, "learning_rate": 7.70204529288419e-05, "loss": 0.6688, "step": 3948 }, { "epoch": 0.8117997738719293, "grad_norm": 0.18743041157722473, "learning_rate": 7.701343429979622e-05, "loss": 0.6804, "step": 3949 }, { "epoch": 0.8120053448453078, "grad_norm": 0.1948167085647583, "learning_rate": 7.700641409359827e-05, "loss": 0.6985, "step": 3950 }, { "epoch": 0.8122109158186864, "grad_norm": 0.19588027894496918, "learning_rate": 7.69993923105939e-05, "loss": 0.6802, "step": 3951 }, { "epoch": 0.8124164867920649, "grad_norm": 0.18361736834049225, "learning_rate": 7.699236895112903e-05, "loss": 0.5713, "step": 3952 }, { "epoch": 0.8126220577654435, "grad_norm": 0.1924244612455368, "learning_rate": 7.698534401554966e-05, "loss": 0.6732, "step": 3953 }, { "epoch": 0.8128276287388221, "grad_norm": 0.19700728356838226, "learning_rate": 7.697831750420189e-05, "loss": 0.6635, "step": 3954 }, { "epoch": 0.8130331997122007, "grad_norm": 0.20763562619686127, "learning_rate": 7.69712894174319e-05, "loss": 0.6926, "step": 3955 }, { "epoch": 0.8132387706855791, "grad_norm": 0.19522826373577118, "learning_rate": 7.69642597555859e-05, "loss": 0.6651, "step": 3956 }, { "epoch": 0.8134443416589577, "grad_norm": 0.18719004094600677, "learning_rate": 7.695722851901024e-05, "loss": 0.6871, "step": 3957 }, { "epoch": 0.8136499126323363, "grad_norm": 0.18853691220283508, "learning_rate": 7.695019570805129e-05, "loss": 0.6951, "step": 3958 }, { "epoch": 0.8138554836057149, "grad_norm": 0.191143199801445, "learning_rate": 7.694316132305553e-05, "loss": 0.6819, "step": 3959 }, { "epoch": 0.8140610545790934, "grad_norm": 0.20034968852996826, "learning_rate": 7.69361253643695e-05, "loss": 0.6813, "step": 3960 }, { "epoch": 0.814266625552472, "grad_norm": 0.1926213502883911, "learning_rate": 7.692908783233987e-05, "loss": 0.6766, "step": 3961 }, { "epoch": 0.8144721965258506, "grad_norm": 0.17970655858516693, "learning_rate": 7.692204872731329e-05, "loss": 0.6708, "step": 3962 }, { "epoch": 0.8146777674992292, "grad_norm": 0.18484726548194885, "learning_rate": 7.691500804963659e-05, "loss": 0.6606, "step": 3963 }, { "epoch": 0.8148833384726076, "grad_norm": 0.19342055916786194, "learning_rate": 7.690796579965661e-05, "loss": 0.6878, "step": 3964 }, { "epoch": 0.8150889094459862, "grad_norm": 0.17727455496788025, "learning_rate": 7.69009219777203e-05, "loss": 0.5893, "step": 3965 }, { "epoch": 0.8152944804193648, "grad_norm": 0.14557015895843506, "learning_rate": 7.689387658417466e-05, "loss": 0.5706, "step": 3966 }, { "epoch": 0.8155000513927434, "grad_norm": 0.20403575897216797, "learning_rate": 7.688682961936678e-05, "loss": 0.6717, "step": 3967 }, { "epoch": 0.8157056223661219, "grad_norm": 0.1949741244316101, "learning_rate": 7.687978108364386e-05, "loss": 0.6679, "step": 3968 }, { "epoch": 0.8159111933395005, "grad_norm": 0.18995149433612823, "learning_rate": 7.687273097735314e-05, "loss": 0.6625, "step": 3969 }, { "epoch": 0.816116764312879, "grad_norm": 0.1978754699230194, "learning_rate": 7.686567930084193e-05, "loss": 0.6665, "step": 3970 }, { "epoch": 0.8163223352862575, "grad_norm": 0.20074686408042908, "learning_rate": 7.685862605445763e-05, "loss": 0.585, "step": 3971 }, { "epoch": 0.8165279062596361, "grad_norm": 0.2053072452545166, "learning_rate": 7.685157123854774e-05, "loss": 0.6753, "step": 3972 }, { "epoch": 0.8167334772330147, "grad_norm": 0.19377997517585754, "learning_rate": 7.68445148534598e-05, "loss": 0.7029, "step": 3973 }, { "epoch": 0.8169390482063933, "grad_norm": 0.19419549405574799, "learning_rate": 7.683745689954146e-05, "loss": 0.6722, "step": 3974 }, { "epoch": 0.8171446191797718, "grad_norm": 0.1902785748243332, "learning_rate": 7.683039737714042e-05, "loss": 0.6982, "step": 3975 }, { "epoch": 0.8173501901531504, "grad_norm": 0.19267836213111877, "learning_rate": 7.68233362866045e-05, "loss": 0.6485, "step": 3976 }, { "epoch": 0.817555761126529, "grad_norm": 0.1380038857460022, "learning_rate": 7.681627362828152e-05, "loss": 0.583, "step": 3977 }, { "epoch": 0.8177613320999075, "grad_norm": 0.20162338018417358, "learning_rate": 7.680920940251947e-05, "loss": 0.662, "step": 3978 }, { "epoch": 0.817966903073286, "grad_norm": 0.12970632314682007, "learning_rate": 7.680214360966631e-05, "loss": 0.5716, "step": 3979 }, { "epoch": 0.8181724740466646, "grad_norm": 0.20082327723503113, "learning_rate": 7.679507625007021e-05, "loss": 0.681, "step": 3980 }, { "epoch": 0.8183780450200432, "grad_norm": 0.18788529932498932, "learning_rate": 7.67880073240793e-05, "loss": 0.6779, "step": 3981 }, { "epoch": 0.8185836159934218, "grad_norm": 0.1803288459777832, "learning_rate": 7.678093683204185e-05, "loss": 0.6553, "step": 3982 }, { "epoch": 0.8187891869668003, "grad_norm": 0.17987079918384552, "learning_rate": 7.677386477430619e-05, "loss": 0.6784, "step": 3983 }, { "epoch": 0.8189947579401788, "grad_norm": 0.14350593090057373, "learning_rate": 7.676679115122071e-05, "loss": 0.5904, "step": 3984 }, { "epoch": 0.8192003289135574, "grad_norm": 0.18889760971069336, "learning_rate": 7.675971596313391e-05, "loss": 0.6551, "step": 3985 }, { "epoch": 0.819405899886936, "grad_norm": 0.1940951943397522, "learning_rate": 7.675263921039436e-05, "loss": 0.6905, "step": 3986 }, { "epoch": 0.8196114708603145, "grad_norm": 0.18888835608959198, "learning_rate": 7.674556089335068e-05, "loss": 0.6613, "step": 3987 }, { "epoch": 0.8198170418336931, "grad_norm": 0.18659929931163788, "learning_rate": 7.673848101235161e-05, "loss": 0.6346, "step": 3988 }, { "epoch": 0.8200226128070717, "grad_norm": 0.19220280647277832, "learning_rate": 7.67313995677459e-05, "loss": 0.6835, "step": 3989 }, { "epoch": 0.8202281837804501, "grad_norm": 0.18803051114082336, "learning_rate": 7.672431655988245e-05, "loss": 0.6733, "step": 3990 }, { "epoch": 0.8204337547538287, "grad_norm": 0.15034914016723633, "learning_rate": 7.671723198911022e-05, "loss": 0.5774, "step": 3991 }, { "epoch": 0.8206393257272073, "grad_norm": 0.19378551840782166, "learning_rate": 7.671014585577821e-05, "loss": 0.6688, "step": 3992 }, { "epoch": 0.8208448967005859, "grad_norm": 0.22061464190483093, "learning_rate": 7.670305816023551e-05, "loss": 0.6763, "step": 3993 }, { "epoch": 0.8210504676739644, "grad_norm": 0.18267303705215454, "learning_rate": 7.669596890283132e-05, "loss": 0.6657, "step": 3994 }, { "epoch": 0.821256038647343, "grad_norm": 0.1902119219303131, "learning_rate": 7.66888780839149e-05, "loss": 0.6827, "step": 3995 }, { "epoch": 0.8214616096207216, "grad_norm": 0.1934443563222885, "learning_rate": 7.668178570383558e-05, "loss": 0.6979, "step": 3996 }, { "epoch": 0.8216671805941002, "grad_norm": 0.19263286888599396, "learning_rate": 7.667469176294272e-05, "loss": 0.6665, "step": 3997 }, { "epoch": 0.8218727515674786, "grad_norm": 0.13605189323425293, "learning_rate": 7.666759626158587e-05, "loss": 0.5615, "step": 3998 }, { "epoch": 0.8220783225408572, "grad_norm": 0.19073757529258728, "learning_rate": 7.666049920011457e-05, "loss": 0.6676, "step": 3999 }, { "epoch": 0.8222838935142358, "grad_norm": 0.193292036652565, "learning_rate": 7.665340057887844e-05, "loss": 0.6751, "step": 4000 }, { "epoch": 0.8224894644876144, "grad_norm": 0.18150904774665833, "learning_rate": 7.664630039822722e-05, "loss": 0.6678, "step": 4001 }, { "epoch": 0.8226950354609929, "grad_norm": 0.19092898070812225, "learning_rate": 7.663919865851071e-05, "loss": 0.6643, "step": 4002 }, { "epoch": 0.8229006064343715, "grad_norm": 0.1463061273097992, "learning_rate": 7.663209536007873e-05, "loss": 0.6015, "step": 4003 }, { "epoch": 0.82310617740775, "grad_norm": 0.13264085352420807, "learning_rate": 7.662499050328129e-05, "loss": 0.5761, "step": 4004 }, { "epoch": 0.8233117483811286, "grad_norm": 0.19010482728481293, "learning_rate": 7.661788408846837e-05, "loss": 0.6417, "step": 4005 }, { "epoch": 0.8235173193545071, "grad_norm": 0.1999100148677826, "learning_rate": 7.661077611599007e-05, "loss": 0.6863, "step": 4006 }, { "epoch": 0.8237228903278857, "grad_norm": 0.19514624774456024, "learning_rate": 7.660366658619658e-05, "loss": 0.6738, "step": 4007 }, { "epoch": 0.8239284613012643, "grad_norm": 0.18463024497032166, "learning_rate": 7.659655549943817e-05, "loss": 0.6723, "step": 4008 }, { "epoch": 0.8241340322746428, "grad_norm": 0.19612738490104675, "learning_rate": 7.658944285606515e-05, "loss": 0.6856, "step": 4009 }, { "epoch": 0.8243396032480214, "grad_norm": 0.18983608484268188, "learning_rate": 7.658232865642793e-05, "loss": 0.6705, "step": 4010 }, { "epoch": 0.8245451742214, "grad_norm": 0.18740776181221008, "learning_rate": 7.657521290087699e-05, "loss": 0.6769, "step": 4011 }, { "epoch": 0.8247507451947785, "grad_norm": 0.1823440045118332, "learning_rate": 7.656809558976289e-05, "loss": 0.663, "step": 4012 }, { "epoch": 0.824956316168157, "grad_norm": 0.18513023853302002, "learning_rate": 7.656097672343626e-05, "loss": 0.6657, "step": 4013 }, { "epoch": 0.8251618871415356, "grad_norm": 0.1865355670452118, "learning_rate": 7.655385630224783e-05, "loss": 0.649, "step": 4014 }, { "epoch": 0.8253674581149142, "grad_norm": 0.18735235929489136, "learning_rate": 7.654673432654839e-05, "loss": 0.6717, "step": 4015 }, { "epoch": 0.8255730290882928, "grad_norm": 0.25272443890571594, "learning_rate": 7.65396107966888e-05, "loss": 0.5985, "step": 4016 }, { "epoch": 0.8257786000616713, "grad_norm": 0.19560717046260834, "learning_rate": 7.653248571301998e-05, "loss": 0.6861, "step": 4017 }, { "epoch": 0.8259841710350498, "grad_norm": 0.2014644891023636, "learning_rate": 7.652535907589299e-05, "loss": 0.6849, "step": 4018 }, { "epoch": 0.8261897420084284, "grad_norm": 0.15079200267791748, "learning_rate": 7.65182308856589e-05, "loss": 0.5943, "step": 4019 }, { "epoch": 0.826395312981807, "grad_norm": 0.19071127474308014, "learning_rate": 7.651110114266889e-05, "loss": 0.672, "step": 4020 }, { "epoch": 0.8266008839551855, "grad_norm": 0.1912720799446106, "learning_rate": 7.650396984727422e-05, "loss": 0.672, "step": 4021 }, { "epoch": 0.8268064549285641, "grad_norm": 0.1873595118522644, "learning_rate": 7.64968369998262e-05, "loss": 0.6576, "step": 4022 }, { "epoch": 0.8270120259019427, "grad_norm": 0.19510895013809204, "learning_rate": 7.648970260067623e-05, "loss": 0.6711, "step": 4023 }, { "epoch": 0.8272175968753213, "grad_norm": 0.1938508152961731, "learning_rate": 7.64825666501758e-05, "loss": 0.6629, "step": 4024 }, { "epoch": 0.8274231678486997, "grad_norm": 0.1958763152360916, "learning_rate": 7.647542914867646e-05, "loss": 0.6749, "step": 4025 }, { "epoch": 0.8276287388220783, "grad_norm": 0.18302227556705475, "learning_rate": 7.646829009652985e-05, "loss": 0.6462, "step": 4026 }, { "epoch": 0.8278343097954569, "grad_norm": 0.15973201394081116, "learning_rate": 7.646114949408764e-05, "loss": 0.5734, "step": 4027 }, { "epoch": 0.8280398807688354, "grad_norm": 0.18773558735847473, "learning_rate": 7.645400734170168e-05, "loss": 0.6912, "step": 4028 }, { "epoch": 0.828245451742214, "grad_norm": 0.12838105857372284, "learning_rate": 7.644686363972378e-05, "loss": 0.5789, "step": 4029 }, { "epoch": 0.8284510227155926, "grad_norm": 0.19766302406787872, "learning_rate": 7.643971838850589e-05, "loss": 0.6654, "step": 4030 }, { "epoch": 0.8286565936889712, "grad_norm": 0.1896764189004898, "learning_rate": 7.643257158840001e-05, "loss": 0.7013, "step": 4031 }, { "epoch": 0.8288621646623496, "grad_norm": 0.14424748718738556, "learning_rate": 7.642542323975826e-05, "loss": 0.5759, "step": 4032 }, { "epoch": 0.8290677356357282, "grad_norm": 0.192418172955513, "learning_rate": 7.641827334293279e-05, "loss": 0.697, "step": 4033 }, { "epoch": 0.8292733066091068, "grad_norm": 0.19316205382347107, "learning_rate": 7.641112189827583e-05, "loss": 0.6466, "step": 4034 }, { "epoch": 0.8294788775824854, "grad_norm": 0.17913931608200073, "learning_rate": 7.640396890613972e-05, "loss": 0.6539, "step": 4035 }, { "epoch": 0.8296844485558639, "grad_norm": 0.1839427500963211, "learning_rate": 7.639681436687685e-05, "loss": 0.678, "step": 4036 }, { "epoch": 0.8298900195292425, "grad_norm": 0.18442392349243164, "learning_rate": 7.638965828083966e-05, "loss": 0.6628, "step": 4037 }, { "epoch": 0.830095590502621, "grad_norm": 0.1920039802789688, "learning_rate": 7.638250064838073e-05, "loss": 0.6813, "step": 4038 }, { "epoch": 0.8303011614759996, "grad_norm": 0.14554156363010406, "learning_rate": 7.637534146985269e-05, "loss": 0.5533, "step": 4039 }, { "epoch": 0.8305067324493781, "grad_norm": 0.13095219433307648, "learning_rate": 7.63681807456082e-05, "loss": 0.5738, "step": 4040 }, { "epoch": 0.8307123034227567, "grad_norm": 0.2078784555196762, "learning_rate": 7.636101847600008e-05, "loss": 0.6674, "step": 4041 }, { "epoch": 0.8309178743961353, "grad_norm": 0.21770761907100677, "learning_rate": 7.635385466138116e-05, "loss": 0.6671, "step": 4042 }, { "epoch": 0.8311234453695138, "grad_norm": 0.18896861374378204, "learning_rate": 7.634668930210436e-05, "loss": 0.6855, "step": 4043 }, { "epoch": 0.8313290163428924, "grad_norm": 0.14647965133190155, "learning_rate": 7.633952239852269e-05, "loss": 0.598, "step": 4044 }, { "epoch": 0.831534587316271, "grad_norm": 0.19375310838222504, "learning_rate": 7.633235395098923e-05, "loss": 0.6639, "step": 4045 }, { "epoch": 0.8317401582896495, "grad_norm": 0.19974082708358765, "learning_rate": 7.632518395985715e-05, "loss": 0.6907, "step": 4046 }, { "epoch": 0.831945729263028, "grad_norm": 0.19184468686580658, "learning_rate": 7.631801242547967e-05, "loss": 0.6713, "step": 4047 }, { "epoch": 0.8321513002364066, "grad_norm": 0.13093294203281403, "learning_rate": 7.631083934821008e-05, "loss": 0.5689, "step": 4048 }, { "epoch": 0.8323568712097852, "grad_norm": 0.19299007952213287, "learning_rate": 7.63036647284018e-05, "loss": 0.6664, "step": 4049 }, { "epoch": 0.8325624421831638, "grad_norm": 0.19684211909770966, "learning_rate": 7.629648856640827e-05, "loss": 0.6594, "step": 4050 }, { "epoch": 0.8327680131565423, "grad_norm": 0.1866525262594223, "learning_rate": 7.6289310862583e-05, "loss": 0.6664, "step": 4051 }, { "epoch": 0.8329735841299208, "grad_norm": 0.1905846893787384, "learning_rate": 7.628213161727966e-05, "loss": 0.6458, "step": 4052 }, { "epoch": 0.8331791551032994, "grad_norm": 0.19215607643127441, "learning_rate": 7.62749508308519e-05, "loss": 0.6508, "step": 4053 }, { "epoch": 0.833384726076678, "grad_norm": 0.18882425129413605, "learning_rate": 7.62677685036535e-05, "loss": 0.6679, "step": 4054 }, { "epoch": 0.8335902970500565, "grad_norm": 0.1906069815158844, "learning_rate": 7.626058463603828e-05, "loss": 0.6619, "step": 4055 }, { "epoch": 0.8337958680234351, "grad_norm": 0.18673735857009888, "learning_rate": 7.625339922836016e-05, "loss": 0.6658, "step": 4056 }, { "epoch": 0.8340014389968137, "grad_norm": 0.19083453714847565, "learning_rate": 7.624621228097316e-05, "loss": 0.6631, "step": 4057 }, { "epoch": 0.8342070099701923, "grad_norm": 0.18321901559829712, "learning_rate": 7.62390237942313e-05, "loss": 0.6579, "step": 4058 }, { "epoch": 0.8344125809435707, "grad_norm": 0.14776909351348877, "learning_rate": 7.623183376848878e-05, "loss": 0.5934, "step": 4059 }, { "epoch": 0.8346181519169493, "grad_norm": 0.20167462527751923, "learning_rate": 7.622464220409975e-05, "loss": 0.6709, "step": 4060 }, { "epoch": 0.8348237228903279, "grad_norm": 0.19711320102214813, "learning_rate": 7.621744910141858e-05, "loss": 0.6672, "step": 4061 }, { "epoch": 0.8350292938637064, "grad_norm": 0.18972383439540863, "learning_rate": 7.621025446079956e-05, "loss": 0.6677, "step": 4062 }, { "epoch": 0.835234864837085, "grad_norm": 0.19243162870407104, "learning_rate": 7.620305828259722e-05, "loss": 0.6874, "step": 4063 }, { "epoch": 0.8354404358104636, "grad_norm": 0.18802182376384735, "learning_rate": 7.619586056716601e-05, "loss": 0.6656, "step": 4064 }, { "epoch": 0.8356460067838422, "grad_norm": 0.14523807168006897, "learning_rate": 7.618866131486058e-05, "loss": 0.6011, "step": 4065 }, { "epoch": 0.8358515777572206, "grad_norm": 0.18922917544841766, "learning_rate": 7.618146052603557e-05, "loss": 0.6577, "step": 4066 }, { "epoch": 0.8360571487305992, "grad_norm": 0.19187946617603302, "learning_rate": 7.617425820104574e-05, "loss": 0.6774, "step": 4067 }, { "epoch": 0.8362627197039778, "grad_norm": 0.1862529069185257, "learning_rate": 7.616705434024593e-05, "loss": 0.6503, "step": 4068 }, { "epoch": 0.8364682906773564, "grad_norm": 0.19143825769424438, "learning_rate": 7.615984894399102e-05, "loss": 0.6803, "step": 4069 }, { "epoch": 0.8366738616507349, "grad_norm": 0.18703386187553406, "learning_rate": 7.615264201263599e-05, "loss": 0.6779, "step": 4070 }, { "epoch": 0.8368794326241135, "grad_norm": 0.18577006459236145, "learning_rate": 7.61454335465359e-05, "loss": 0.6671, "step": 4071 }, { "epoch": 0.837085003597492, "grad_norm": 0.18921016156673431, "learning_rate": 7.613822354604587e-05, "loss": 0.6955, "step": 4072 }, { "epoch": 0.8372905745708706, "grad_norm": 0.1349778026342392, "learning_rate": 7.613101201152111e-05, "loss": 0.568, "step": 4073 }, { "epoch": 0.8374961455442491, "grad_norm": 0.1813334822654724, "learning_rate": 7.612379894331689e-05, "loss": 0.6512, "step": 4074 }, { "epoch": 0.8377017165176277, "grad_norm": 0.1277725249528885, "learning_rate": 7.611658434178857e-05, "loss": 0.5773, "step": 4075 }, { "epoch": 0.8379072874910063, "grad_norm": 0.1959075778722763, "learning_rate": 7.610936820729157e-05, "loss": 0.6923, "step": 4076 }, { "epoch": 0.8381128584643849, "grad_norm": 0.19275759160518646, "learning_rate": 7.610215054018142e-05, "loss": 0.6868, "step": 4077 }, { "epoch": 0.8383184294377634, "grad_norm": 0.19022993743419647, "learning_rate": 7.609493134081367e-05, "loss": 0.636, "step": 4078 }, { "epoch": 0.838524000411142, "grad_norm": 0.1396605670452118, "learning_rate": 7.608771060954399e-05, "loss": 0.5913, "step": 4079 }, { "epoch": 0.8387295713845205, "grad_norm": 0.126824289560318, "learning_rate": 7.608048834672812e-05, "loss": 0.5857, "step": 4080 }, { "epoch": 0.838935142357899, "grad_norm": 0.20024533569812775, "learning_rate": 7.607326455272187e-05, "loss": 0.6722, "step": 4081 }, { "epoch": 0.8391407133312776, "grad_norm": 0.19841928780078888, "learning_rate": 7.606603922788108e-05, "loss": 0.6507, "step": 4082 }, { "epoch": 0.8393462843046562, "grad_norm": 0.17838910222053528, "learning_rate": 7.605881237256175e-05, "loss": 0.6203, "step": 4083 }, { "epoch": 0.8395518552780348, "grad_norm": 0.1466301828622818, "learning_rate": 7.605158398711991e-05, "loss": 0.5627, "step": 4084 }, { "epoch": 0.8397574262514133, "grad_norm": 0.1911042481660843, "learning_rate": 7.604435407191167e-05, "loss": 0.656, "step": 4085 }, { "epoch": 0.8399629972247918, "grad_norm": 0.1837422102689743, "learning_rate": 7.60371226272932e-05, "loss": 0.653, "step": 4086 }, { "epoch": 0.8401685681981704, "grad_norm": 0.1889040619134903, "learning_rate": 7.602988965362075e-05, "loss": 0.6757, "step": 4087 }, { "epoch": 0.840374139171549, "grad_norm": 0.18443772196769714, "learning_rate": 7.602265515125069e-05, "loss": 0.6627, "step": 4088 }, { "epoch": 0.8405797101449275, "grad_norm": 0.19531475007534027, "learning_rate": 7.601541912053939e-05, "loss": 0.6678, "step": 4089 }, { "epoch": 0.8407852811183061, "grad_norm": 0.18012624979019165, "learning_rate": 7.600818156184338e-05, "loss": 0.6605, "step": 4090 }, { "epoch": 0.8409908520916847, "grad_norm": 0.16611045598983765, "learning_rate": 7.600094247551918e-05, "loss": 0.606, "step": 4091 }, { "epoch": 0.8411964230650633, "grad_norm": 0.1904737800359726, "learning_rate": 7.599370186192345e-05, "loss": 0.6825, "step": 4092 }, { "epoch": 0.8414019940384417, "grad_norm": 0.1872866153717041, "learning_rate": 7.598645972141288e-05, "loss": 0.6555, "step": 4093 }, { "epoch": 0.8416075650118203, "grad_norm": 0.1912485808134079, "learning_rate": 7.59792160543443e-05, "loss": 0.667, "step": 4094 }, { "epoch": 0.8418131359851989, "grad_norm": 0.18316781520843506, "learning_rate": 7.597197086107451e-05, "loss": 0.6583, "step": 4095 }, { "epoch": 0.8420187069585775, "grad_norm": 0.18488352000713348, "learning_rate": 7.596472414196049e-05, "loss": 0.6619, "step": 4096 }, { "epoch": 0.842224277931956, "grad_norm": 0.16305844485759735, "learning_rate": 7.595747589735923e-05, "loss": 0.5869, "step": 4097 }, { "epoch": 0.8424298489053346, "grad_norm": 0.19764935970306396, "learning_rate": 7.595022612762786e-05, "loss": 0.6704, "step": 4098 }, { "epoch": 0.8426354198787132, "grad_norm": 0.2008553147315979, "learning_rate": 7.594297483312348e-05, "loss": 0.6928, "step": 4099 }, { "epoch": 0.8428409908520916, "grad_norm": 0.19005800783634186, "learning_rate": 7.593572201420336e-05, "loss": 0.68, "step": 4100 }, { "epoch": 0.8430465618254702, "grad_norm": 0.18260590732097626, "learning_rate": 7.592846767122481e-05, "loss": 0.6452, "step": 4101 }, { "epoch": 0.8432521327988488, "grad_norm": 0.24055607616901398, "learning_rate": 7.592121180454522e-05, "loss": 0.6555, "step": 4102 }, { "epoch": 0.8434577037722274, "grad_norm": 0.18779988586902618, "learning_rate": 7.591395441452205e-05, "loss": 0.6558, "step": 4103 }, { "epoch": 0.8436632747456059, "grad_norm": 0.19184498488903046, "learning_rate": 7.590669550151284e-05, "loss": 0.6737, "step": 4104 }, { "epoch": 0.8438688457189845, "grad_norm": 0.17881546914577484, "learning_rate": 7.58994350658752e-05, "loss": 0.6482, "step": 4105 }, { "epoch": 0.844074416692363, "grad_norm": 0.19403071701526642, "learning_rate": 7.589217310796682e-05, "loss": 0.6316, "step": 4106 }, { "epoch": 0.8442799876657416, "grad_norm": 0.18991516530513763, "learning_rate": 7.588490962814544e-05, "loss": 0.6286, "step": 4107 }, { "epoch": 0.8444855586391201, "grad_norm": 0.19792747497558594, "learning_rate": 7.587764462676895e-05, "loss": 0.6514, "step": 4108 }, { "epoch": 0.8446911296124987, "grad_norm": 0.18424390256404877, "learning_rate": 7.587037810419521e-05, "loss": 0.6726, "step": 4109 }, { "epoch": 0.8448967005858773, "grad_norm": 0.16541998088359833, "learning_rate": 7.586311006078223e-05, "loss": 0.5817, "step": 4110 }, { "epoch": 0.8451022715592559, "grad_norm": 0.19858099520206451, "learning_rate": 7.585584049688807e-05, "loss": 0.6799, "step": 4111 }, { "epoch": 0.8453078425326344, "grad_norm": 0.19580329954624176, "learning_rate": 7.58485694128709e-05, "loss": 0.6626, "step": 4112 }, { "epoch": 0.845513413506013, "grad_norm": 0.18652157485485077, "learning_rate": 7.584129680908886e-05, "loss": 0.6406, "step": 4113 }, { "epoch": 0.8457189844793915, "grad_norm": 0.1859186291694641, "learning_rate": 7.58340226859003e-05, "loss": 0.6477, "step": 4114 }, { "epoch": 0.8459245554527701, "grad_norm": 0.1960713267326355, "learning_rate": 7.582674704366354e-05, "loss": 0.6685, "step": 4115 }, { "epoch": 0.8461301264261486, "grad_norm": 0.19311878085136414, "learning_rate": 7.581946988273706e-05, "loss": 0.6976, "step": 4116 }, { "epoch": 0.8463356973995272, "grad_norm": 0.18788793683052063, "learning_rate": 7.581219120347933e-05, "loss": 0.6545, "step": 4117 }, { "epoch": 0.8465412683729058, "grad_norm": 0.1906074583530426, "learning_rate": 7.580491100624896e-05, "loss": 0.6772, "step": 4118 }, { "epoch": 0.8467468393462843, "grad_norm": 0.18752005696296692, "learning_rate": 7.579762929140462e-05, "loss": 0.672, "step": 4119 }, { "epoch": 0.8469524103196628, "grad_norm": 0.1863172948360443, "learning_rate": 7.579034605930502e-05, "loss": 0.6502, "step": 4120 }, { "epoch": 0.8471579812930414, "grad_norm": 0.18836906552314758, "learning_rate": 7.578306131030898e-05, "loss": 0.6438, "step": 4121 }, { "epoch": 0.84736355226642, "grad_norm": 0.1857694834470749, "learning_rate": 7.577577504477541e-05, "loss": 0.6595, "step": 4122 }, { "epoch": 0.8475691232397985, "grad_norm": 0.18018977344036102, "learning_rate": 7.576848726306323e-05, "loss": 0.6315, "step": 4123 }, { "epoch": 0.8477746942131771, "grad_norm": 0.18060006201267242, "learning_rate": 7.57611979655315e-05, "loss": 0.6764, "step": 4124 }, { "epoch": 0.8479802651865557, "grad_norm": 0.18697619438171387, "learning_rate": 7.575390715253932e-05, "loss": 0.6397, "step": 4125 }, { "epoch": 0.8481858361599343, "grad_norm": 0.19681645929813385, "learning_rate": 7.574661482444589e-05, "loss": 0.663, "step": 4126 }, { "epoch": 0.8483914071333127, "grad_norm": 0.18985417485237122, "learning_rate": 7.573932098161043e-05, "loss": 0.6413, "step": 4127 }, { "epoch": 0.8485969781066913, "grad_norm": 0.183248370885849, "learning_rate": 7.573202562439232e-05, "loss": 0.6521, "step": 4128 }, { "epoch": 0.8488025490800699, "grad_norm": 0.17444172501564026, "learning_rate": 7.572472875315095e-05, "loss": 0.5904, "step": 4129 }, { "epoch": 0.8490081200534485, "grad_norm": 0.21605822443962097, "learning_rate": 7.57174303682458e-05, "loss": 0.6615, "step": 4130 }, { "epoch": 0.849213691026827, "grad_norm": 0.20160672068595886, "learning_rate": 7.571013047003643e-05, "loss": 0.7124, "step": 4131 }, { "epoch": 0.8494192620002056, "grad_norm": 0.18523965775966644, "learning_rate": 7.570282905888246e-05, "loss": 0.6608, "step": 4132 }, { "epoch": 0.8496248329735842, "grad_norm": 0.19887828826904297, "learning_rate": 7.569552613514362e-05, "loss": 0.6699, "step": 4133 }, { "epoch": 0.8498304039469627, "grad_norm": 0.19583609700202942, "learning_rate": 7.568822169917967e-05, "loss": 0.6682, "step": 4134 }, { "epoch": 0.8500359749203412, "grad_norm": 0.19429847598075867, "learning_rate": 7.568091575135048e-05, "loss": 0.6828, "step": 4135 }, { "epoch": 0.8502415458937198, "grad_norm": 0.1865924745798111, "learning_rate": 7.567360829201597e-05, "loss": 0.674, "step": 4136 }, { "epoch": 0.8504471168670984, "grad_norm": 0.17295409739017487, "learning_rate": 7.566629932153615e-05, "loss": 0.5802, "step": 4137 }, { "epoch": 0.8506526878404769, "grad_norm": 0.1509198248386383, "learning_rate": 7.565898884027107e-05, "loss": 0.5835, "step": 4138 }, { "epoch": 0.8508582588138555, "grad_norm": 0.2158360481262207, "learning_rate": 7.565167684858095e-05, "loss": 0.6711, "step": 4139 }, { "epoch": 0.851063829787234, "grad_norm": 0.17296075820922852, "learning_rate": 7.564436334682594e-05, "loss": 0.6029, "step": 4140 }, { "epoch": 0.8512694007606126, "grad_norm": 0.21175174415111542, "learning_rate": 7.56370483353664e-05, "loss": 0.7072, "step": 4141 }, { "epoch": 0.8514749717339911, "grad_norm": 0.1445254236459732, "learning_rate": 7.562973181456269e-05, "loss": 0.5766, "step": 4142 }, { "epoch": 0.8516805427073697, "grad_norm": 0.19627566635608673, "learning_rate": 7.562241378477526e-05, "loss": 0.6652, "step": 4143 }, { "epoch": 0.8518861136807483, "grad_norm": 0.22292684018611908, "learning_rate": 7.561509424636462e-05, "loss": 0.7013, "step": 4144 }, { "epoch": 0.8520916846541269, "grad_norm": 0.1842968612909317, "learning_rate": 7.560777319969138e-05, "loss": 0.6621, "step": 4145 }, { "epoch": 0.8522972556275054, "grad_norm": 0.19120851159095764, "learning_rate": 7.560045064511622e-05, "loss": 0.6508, "step": 4146 }, { "epoch": 0.852502826600884, "grad_norm": 0.21807745099067688, "learning_rate": 7.559312658299988e-05, "loss": 0.6831, "step": 4147 }, { "epoch": 0.8527083975742625, "grad_norm": 0.19106024503707886, "learning_rate": 7.558580101370318e-05, "loss": 0.6636, "step": 4148 }, { "epoch": 0.8529139685476411, "grad_norm": 0.1850479245185852, "learning_rate": 7.557847393758702e-05, "loss": 0.589, "step": 4149 }, { "epoch": 0.8531195395210196, "grad_norm": 0.1937406063079834, "learning_rate": 7.55711453550124e-05, "loss": 0.6401, "step": 4150 }, { "epoch": 0.8533251104943982, "grad_norm": 0.12518863379955292, "learning_rate": 7.556381526634031e-05, "loss": 0.5776, "step": 4151 }, { "epoch": 0.8535306814677768, "grad_norm": 0.6598914861679077, "learning_rate": 7.555648367193191e-05, "loss": 0.6637, "step": 4152 }, { "epoch": 0.8537362524411554, "grad_norm": 0.19615043699741364, "learning_rate": 7.554915057214837e-05, "loss": 0.6883, "step": 4153 }, { "epoch": 0.8539418234145338, "grad_norm": 0.18384511768817902, "learning_rate": 7.554181596735097e-05, "loss": 0.6749, "step": 4154 }, { "epoch": 0.8541473943879124, "grad_norm": 0.198414608836174, "learning_rate": 7.553447985790105e-05, "loss": 0.6878, "step": 4155 }, { "epoch": 0.854352965361291, "grad_norm": 0.19876956939697266, "learning_rate": 7.552714224416002e-05, "loss": 0.6398, "step": 4156 }, { "epoch": 0.8545585363346695, "grad_norm": 0.18689413368701935, "learning_rate": 7.551980312648939e-05, "loss": 0.6765, "step": 4157 }, { "epoch": 0.8547641073080481, "grad_norm": 0.1880849003791809, "learning_rate": 7.55124625052507e-05, "loss": 0.6596, "step": 4158 }, { "epoch": 0.8549696782814267, "grad_norm": 0.18960778415203094, "learning_rate": 7.550512038080559e-05, "loss": 0.6677, "step": 4159 }, { "epoch": 0.8551752492548053, "grad_norm": 0.20969745516777039, "learning_rate": 7.549777675351581e-05, "loss": 0.5811, "step": 4160 }, { "epoch": 0.8553808202281837, "grad_norm": 0.1950722187757492, "learning_rate": 7.549043162374308e-05, "loss": 0.6807, "step": 4161 }, { "epoch": 0.8555863912015623, "grad_norm": 0.20414437353610992, "learning_rate": 7.54830849918493e-05, "loss": 0.6937, "step": 4162 }, { "epoch": 0.8557919621749409, "grad_norm": 0.3100520670413971, "learning_rate": 7.547573685819643e-05, "loss": 0.5698, "step": 4163 }, { "epoch": 0.8559975331483195, "grad_norm": 0.186519056558609, "learning_rate": 7.546838722314641e-05, "loss": 0.6604, "step": 4164 }, { "epoch": 0.856203104121698, "grad_norm": 0.19283641874790192, "learning_rate": 7.546103608706137e-05, "loss": 0.6484, "step": 4165 }, { "epoch": 0.8564086750950766, "grad_norm": 0.1958523392677307, "learning_rate": 7.545368345030348e-05, "loss": 0.6814, "step": 4166 }, { "epoch": 0.8566142460684552, "grad_norm": 0.19231447577476501, "learning_rate": 7.544632931323492e-05, "loss": 0.6768, "step": 4167 }, { "epoch": 0.8568198170418337, "grad_norm": 0.18475113809108734, "learning_rate": 7.543897367621804e-05, "loss": 0.6781, "step": 4168 }, { "epoch": 0.8570253880152122, "grad_norm": 0.1537688672542572, "learning_rate": 7.543161653961518e-05, "loss": 0.6122, "step": 4169 }, { "epoch": 0.8572309589885908, "grad_norm": 0.20179788768291473, "learning_rate": 7.542425790378882e-05, "loss": 0.6563, "step": 4170 }, { "epoch": 0.8574365299619694, "grad_norm": 0.1862722784280777, "learning_rate": 7.541689776910149e-05, "loss": 0.6752, "step": 4171 }, { "epoch": 0.857642100935348, "grad_norm": 0.18401017785072327, "learning_rate": 7.540953613591576e-05, "loss": 0.6828, "step": 4172 }, { "epoch": 0.8578476719087265, "grad_norm": 0.18829752504825592, "learning_rate": 7.540217300459431e-05, "loss": 0.6479, "step": 4173 }, { "epoch": 0.858053242882105, "grad_norm": 0.19413256645202637, "learning_rate": 7.539480837549991e-05, "loss": 0.6429, "step": 4174 }, { "epoch": 0.8582588138554836, "grad_norm": 0.19081558287143707, "learning_rate": 7.538744224899536e-05, "loss": 0.647, "step": 4175 }, { "epoch": 0.8584643848288621, "grad_norm": 0.15339916944503784, "learning_rate": 7.538007462544356e-05, "loss": 0.5791, "step": 4176 }, { "epoch": 0.8586699558022407, "grad_norm": 0.12977366149425507, "learning_rate": 7.537270550520749e-05, "loss": 0.6098, "step": 4177 }, { "epoch": 0.8588755267756193, "grad_norm": 0.21286390721797943, "learning_rate": 7.536533488865016e-05, "loss": 0.6783, "step": 4178 }, { "epoch": 0.8590810977489979, "grad_norm": 0.14268797636032104, "learning_rate": 7.535796277613473e-05, "loss": 0.5743, "step": 4179 }, { "epoch": 0.8592866687223764, "grad_norm": 0.19620656967163086, "learning_rate": 7.535058916802435e-05, "loss": 0.6796, "step": 4180 }, { "epoch": 0.859492239695755, "grad_norm": 0.18335068225860596, "learning_rate": 7.534321406468231e-05, "loss": 0.6621, "step": 4181 }, { "epoch": 0.8596978106691335, "grad_norm": 0.19787956774234772, "learning_rate": 7.533583746647194e-05, "loss": 0.6775, "step": 4182 }, { "epoch": 0.8599033816425121, "grad_norm": 0.19326303899288177, "learning_rate": 7.532845937375664e-05, "loss": 0.6674, "step": 4183 }, { "epoch": 0.8601089526158906, "grad_norm": 0.1872076541185379, "learning_rate": 7.532107978689988e-05, "loss": 0.6777, "step": 4184 }, { "epoch": 0.8603145235892692, "grad_norm": 0.18660016357898712, "learning_rate": 7.531369870626528e-05, "loss": 0.6712, "step": 4185 }, { "epoch": 0.8605200945626478, "grad_norm": 0.19512499868869781, "learning_rate": 7.53063161322164e-05, "loss": 0.6848, "step": 4186 }, { "epoch": 0.8607256655360264, "grad_norm": 0.19282682240009308, "learning_rate": 7.5298932065117e-05, "loss": 0.6611, "step": 4187 }, { "epoch": 0.8609312365094048, "grad_norm": 0.2191070318222046, "learning_rate": 7.529154650533081e-05, "loss": 0.6792, "step": 4188 }, { "epoch": 0.8611368074827834, "grad_norm": 0.1931408941745758, "learning_rate": 7.528415945322172e-05, "loss": 0.6362, "step": 4189 }, { "epoch": 0.861342378456162, "grad_norm": 0.18459977209568024, "learning_rate": 7.527677090915364e-05, "loss": 0.5784, "step": 4190 }, { "epoch": 0.8615479494295405, "grad_norm": 0.19997800886631012, "learning_rate": 7.526938087349057e-05, "loss": 0.677, "step": 4191 }, { "epoch": 0.8617535204029191, "grad_norm": 0.19136178493499756, "learning_rate": 7.52619893465966e-05, "loss": 0.6854, "step": 4192 }, { "epoch": 0.8619590913762977, "grad_norm": 0.18970435857772827, "learning_rate": 7.525459632883582e-05, "loss": 0.674, "step": 4193 }, { "epoch": 0.8621646623496763, "grad_norm": 0.21736173331737518, "learning_rate": 7.524720182057252e-05, "loss": 0.6546, "step": 4194 }, { "epoch": 0.8623702333230547, "grad_norm": 0.1582231968641281, "learning_rate": 7.523980582217096e-05, "loss": 0.5956, "step": 4195 }, { "epoch": 0.8625758042964333, "grad_norm": 0.19707003235816956, "learning_rate": 7.52324083339955e-05, "loss": 0.6682, "step": 4196 }, { "epoch": 0.8627813752698119, "grad_norm": 0.19862191379070282, "learning_rate": 7.522500935641058e-05, "loss": 0.6435, "step": 4197 }, { "epoch": 0.8629869462431905, "grad_norm": 0.1881260871887207, "learning_rate": 7.521760888978073e-05, "loss": 0.6581, "step": 4198 }, { "epoch": 0.863192517216569, "grad_norm": 0.1898849755525589, "learning_rate": 7.521020693447052e-05, "loss": 0.6645, "step": 4199 }, { "epoch": 0.8633980881899476, "grad_norm": 0.1787111759185791, "learning_rate": 7.520280349084462e-05, "loss": 0.6113, "step": 4200 }, { "epoch": 0.8636036591633262, "grad_norm": 0.19326132535934448, "learning_rate": 7.519539855926777e-05, "loss": 0.6772, "step": 4201 }, { "epoch": 0.8638092301367047, "grad_norm": 0.18564841151237488, "learning_rate": 7.518799214010474e-05, "loss": 0.6657, "step": 4202 }, { "epoch": 0.8640148011100832, "grad_norm": 0.2385823279619217, "learning_rate": 7.518058423372045e-05, "loss": 0.5945, "step": 4203 }, { "epoch": 0.8642203720834618, "grad_norm": 0.199651300907135, "learning_rate": 7.517317484047984e-05, "loss": 0.664, "step": 4204 }, { "epoch": 0.8644259430568404, "grad_norm": 0.194375678896904, "learning_rate": 7.516576396074794e-05, "loss": 0.6745, "step": 4205 }, { "epoch": 0.864631514030219, "grad_norm": 0.18686725199222565, "learning_rate": 7.515835159488984e-05, "loss": 0.6897, "step": 4206 }, { "epoch": 0.8648370850035975, "grad_norm": 0.18740524351596832, "learning_rate": 7.515093774327071e-05, "loss": 0.6931, "step": 4207 }, { "epoch": 0.865042655976976, "grad_norm": 0.1922253668308258, "learning_rate": 7.514352240625581e-05, "loss": 0.6467, "step": 4208 }, { "epoch": 0.8652482269503546, "grad_norm": 0.19109128415584564, "learning_rate": 7.513610558421045e-05, "loss": 0.6697, "step": 4209 }, { "epoch": 0.8654537979237331, "grad_norm": 0.18134894967079163, "learning_rate": 7.512868727750002e-05, "loss": 0.6566, "step": 4210 }, { "epoch": 0.8656593688971117, "grad_norm": 0.1900303065776825, "learning_rate": 7.512126748648999e-05, "loss": 0.6987, "step": 4211 }, { "epoch": 0.8658649398704903, "grad_norm": 0.19076496362686157, "learning_rate": 7.51138462115459e-05, "loss": 0.6514, "step": 4212 }, { "epoch": 0.8660705108438689, "grad_norm": 0.18519791960716248, "learning_rate": 7.510642345303338e-05, "loss": 0.6964, "step": 4213 }, { "epoch": 0.8662760818172474, "grad_norm": 0.13831019401550293, "learning_rate": 7.509899921131805e-05, "loss": 0.5829, "step": 4214 }, { "epoch": 0.866481652790626, "grad_norm": 0.20118573307991028, "learning_rate": 7.509157348676574e-05, "loss": 0.6699, "step": 4215 }, { "epoch": 0.8666872237640045, "grad_norm": 0.18774531781673431, "learning_rate": 7.508414627974225e-05, "loss": 0.6612, "step": 4216 }, { "epoch": 0.8668927947373831, "grad_norm": 0.17688573896884918, "learning_rate": 7.507671759061346e-05, "loss": 0.6519, "step": 4217 }, { "epoch": 0.8670983657107616, "grad_norm": 0.18357358872890472, "learning_rate": 7.50692874197454e-05, "loss": 0.6792, "step": 4218 }, { "epoch": 0.8673039366841402, "grad_norm": 0.19416451454162598, "learning_rate": 7.506185576750409e-05, "loss": 0.6708, "step": 4219 }, { "epoch": 0.8675095076575188, "grad_norm": 0.18293076753616333, "learning_rate": 7.505442263425565e-05, "loss": 0.6843, "step": 4220 }, { "epoch": 0.8677150786308974, "grad_norm": 0.18310247361660004, "learning_rate": 7.504698802036629e-05, "loss": 0.6409, "step": 4221 }, { "epoch": 0.8679206496042758, "grad_norm": 0.18264919519424438, "learning_rate": 7.503955192620225e-05, "loss": 0.6709, "step": 4222 }, { "epoch": 0.8681262205776544, "grad_norm": 0.19960664212703705, "learning_rate": 7.50321143521299e-05, "loss": 0.6537, "step": 4223 }, { "epoch": 0.868331791551033, "grad_norm": 0.19281069934368134, "learning_rate": 7.502467529851565e-05, "loss": 0.6657, "step": 4224 }, { "epoch": 0.8685373625244116, "grad_norm": 0.19561025500297546, "learning_rate": 7.501723476572599e-05, "loss": 0.6867, "step": 4225 }, { "epoch": 0.8687429334977901, "grad_norm": 0.17898957431316376, "learning_rate": 7.500979275412747e-05, "loss": 0.6587, "step": 4226 }, { "epoch": 0.8689485044711687, "grad_norm": 0.19035303592681885, "learning_rate": 7.500234926408671e-05, "loss": 0.6719, "step": 4227 }, { "epoch": 0.8691540754445473, "grad_norm": 0.1813403069972992, "learning_rate": 7.499490429597044e-05, "loss": 0.6734, "step": 4228 }, { "epoch": 0.8693596464179257, "grad_norm": 0.18334521353244781, "learning_rate": 7.498745785014543e-05, "loss": 0.6559, "step": 4229 }, { "epoch": 0.8695652173913043, "grad_norm": 0.17807736992835999, "learning_rate": 7.498000992697854e-05, "loss": 0.6318, "step": 4230 }, { "epoch": 0.8697707883646829, "grad_norm": 0.18650507926940918, "learning_rate": 7.497256052683668e-05, "loss": 0.666, "step": 4231 }, { "epoch": 0.8699763593380615, "grad_norm": 0.18326011300086975, "learning_rate": 7.496510965008686e-05, "loss": 0.6587, "step": 4232 }, { "epoch": 0.87018193031144, "grad_norm": 0.1905418336391449, "learning_rate": 7.495765729709615e-05, "loss": 0.6544, "step": 4233 }, { "epoch": 0.8703875012848186, "grad_norm": 0.187713161110878, "learning_rate": 7.495020346823168e-05, "loss": 0.6711, "step": 4234 }, { "epoch": 0.8705930722581972, "grad_norm": 0.1464671492576599, "learning_rate": 7.494274816386066e-05, "loss": 0.5836, "step": 4235 }, { "epoch": 0.8707986432315757, "grad_norm": 0.5995880961418152, "learning_rate": 7.49352913843504e-05, "loss": 0.6806, "step": 4236 }, { "epoch": 0.8710042142049542, "grad_norm": 0.18340499699115753, "learning_rate": 7.492783313006827e-05, "loss": 0.676, "step": 4237 }, { "epoch": 0.8712097851783328, "grad_norm": 0.191572368144989, "learning_rate": 7.492037340138165e-05, "loss": 0.6651, "step": 4238 }, { "epoch": 0.8714153561517114, "grad_norm": 0.13379883766174316, "learning_rate": 7.49129121986581e-05, "loss": 0.5498, "step": 4239 }, { "epoch": 0.87162092712509, "grad_norm": 0.19760626554489136, "learning_rate": 7.490544952226517e-05, "loss": 0.6624, "step": 4240 }, { "epoch": 0.8718264980984685, "grad_norm": 0.19867949187755585, "learning_rate": 7.489798537257052e-05, "loss": 0.6542, "step": 4241 }, { "epoch": 0.872032069071847, "grad_norm": 0.13943122327327728, "learning_rate": 7.489051974994188e-05, "loss": 0.5833, "step": 4242 }, { "epoch": 0.8722376400452256, "grad_norm": 0.20543548464775085, "learning_rate": 7.488305265474704e-05, "loss": 0.6621, "step": 4243 }, { "epoch": 0.8724432110186042, "grad_norm": 0.19805829226970673, "learning_rate": 7.487558408735387e-05, "loss": 0.6489, "step": 4244 }, { "epoch": 0.8726487819919827, "grad_norm": 0.1895926296710968, "learning_rate": 7.486811404813032e-05, "loss": 0.688, "step": 4245 }, { "epoch": 0.8728543529653613, "grad_norm": 0.13180671632289886, "learning_rate": 7.486064253744436e-05, "loss": 0.587, "step": 4246 }, { "epoch": 0.8730599239387399, "grad_norm": 0.20886261761188507, "learning_rate": 7.485316955566414e-05, "loss": 0.6347, "step": 4247 }, { "epoch": 0.8732654949121184, "grad_norm": 0.20359115302562714, "learning_rate": 7.484569510315778e-05, "loss": 0.6872, "step": 4248 }, { "epoch": 0.873471065885497, "grad_norm": 0.184517964720726, "learning_rate": 7.483821918029351e-05, "loss": 0.6556, "step": 4249 }, { "epoch": 0.8736766368588755, "grad_norm": 0.1971379816532135, "learning_rate": 7.483074178743966e-05, "loss": 0.6817, "step": 4250 }, { "epoch": 0.8738822078322541, "grad_norm": 0.19668948650360107, "learning_rate": 7.482326292496458e-05, "loss": 0.6625, "step": 4251 }, { "epoch": 0.8740877788056326, "grad_norm": 0.1894627958536148, "learning_rate": 7.481578259323674e-05, "loss": 0.6445, "step": 4252 }, { "epoch": 0.8742933497790112, "grad_norm": 0.1403988003730774, "learning_rate": 7.480830079262465e-05, "loss": 0.5633, "step": 4253 }, { "epoch": 0.8744989207523898, "grad_norm": 0.12436271458864212, "learning_rate": 7.48008175234969e-05, "loss": 0.5708, "step": 4254 }, { "epoch": 0.8747044917257684, "grad_norm": 0.7834026217460632, "learning_rate": 7.479333278622216e-05, "loss": 0.6563, "step": 4255 }, { "epoch": 0.8749100626991468, "grad_norm": 0.1350373923778534, "learning_rate": 7.478584658116915e-05, "loss": 0.5961, "step": 4256 }, { "epoch": 0.8751156336725254, "grad_norm": 0.1937408745288849, "learning_rate": 7.477835890870672e-05, "loss": 0.6703, "step": 4257 }, { "epoch": 0.875321204645904, "grad_norm": 0.13636933267116547, "learning_rate": 7.477086976920373e-05, "loss": 0.5909, "step": 4258 }, { "epoch": 0.8755267756192826, "grad_norm": 0.21809430420398712, "learning_rate": 7.476337916302911e-05, "loss": 0.6848, "step": 4259 }, { "epoch": 0.8757323465926611, "grad_norm": 0.16706953942775726, "learning_rate": 7.475588709055195e-05, "loss": 0.5596, "step": 4260 }, { "epoch": 0.8759379175660397, "grad_norm": 0.19577208161354065, "learning_rate": 7.47483935521413e-05, "loss": 0.6608, "step": 4261 }, { "epoch": 0.8761434885394183, "grad_norm": 0.194346085190773, "learning_rate": 7.474089854816633e-05, "loss": 0.6508, "step": 4262 }, { "epoch": 0.8763490595127968, "grad_norm": 0.20509304106235504, "learning_rate": 7.47334020789963e-05, "loss": 0.6794, "step": 4263 }, { "epoch": 0.8765546304861753, "grad_norm": 0.20143075287342072, "learning_rate": 7.472590414500053e-05, "loss": 0.691, "step": 4264 }, { "epoch": 0.8767602014595539, "grad_norm": 0.2505229711532593, "learning_rate": 7.471840474654838e-05, "loss": 0.652, "step": 4265 }, { "epoch": 0.8769657724329325, "grad_norm": 0.18424780666828156, "learning_rate": 7.471090388400936e-05, "loss": 0.6396, "step": 4266 }, { "epoch": 0.877171343406311, "grad_norm": 0.18971550464630127, "learning_rate": 7.470340155775296e-05, "loss": 0.6445, "step": 4267 }, { "epoch": 0.8773769143796896, "grad_norm": 0.19411668181419373, "learning_rate": 7.46958977681488e-05, "loss": 0.6377, "step": 4268 }, { "epoch": 0.8775824853530682, "grad_norm": 0.1822851151227951, "learning_rate": 7.468839251556656e-05, "loss": 0.6684, "step": 4269 }, { "epoch": 0.8777880563264467, "grad_norm": 0.17239375412464142, "learning_rate": 7.468088580037598e-05, "loss": 0.5929, "step": 4270 }, { "epoch": 0.8779936272998252, "grad_norm": 0.19313600659370422, "learning_rate": 7.467337762294689e-05, "loss": 0.659, "step": 4271 }, { "epoch": 0.8781991982732038, "grad_norm": 0.18807615339756012, "learning_rate": 7.466586798364918e-05, "loss": 0.6608, "step": 4272 }, { "epoch": 0.8784047692465824, "grad_norm": 0.1784089207649231, "learning_rate": 7.46583568828528e-05, "loss": 0.6781, "step": 4273 }, { "epoch": 0.878610340219961, "grad_norm": 0.21919219195842743, "learning_rate": 7.46508443209278e-05, "loss": 0.6469, "step": 4274 }, { "epoch": 0.8788159111933395, "grad_norm": 0.20207509398460388, "learning_rate": 7.464333029824429e-05, "loss": 0.6928, "step": 4275 }, { "epoch": 0.879021482166718, "grad_norm": 0.18525582551956177, "learning_rate": 7.463581481517245e-05, "loss": 0.6391, "step": 4276 }, { "epoch": 0.8792270531400966, "grad_norm": 0.1859021782875061, "learning_rate": 7.462829787208254e-05, "loss": 0.6515, "step": 4277 }, { "epoch": 0.8794326241134752, "grad_norm": 0.1962486058473587, "learning_rate": 7.462077946934488e-05, "loss": 0.6575, "step": 4278 }, { "epoch": 0.8796381950868537, "grad_norm": 0.1927611082792282, "learning_rate": 7.461325960732984e-05, "loss": 0.6696, "step": 4279 }, { "epoch": 0.8798437660602323, "grad_norm": 0.1841474175453186, "learning_rate": 7.460573828640791e-05, "loss": 0.6796, "step": 4280 }, { "epoch": 0.8800493370336109, "grad_norm": 0.17558540403842926, "learning_rate": 7.459821550694965e-05, "loss": 0.6047, "step": 4281 }, { "epoch": 0.8802549080069895, "grad_norm": 0.19254080951213837, "learning_rate": 7.459069126932565e-05, "loss": 0.6795, "step": 4282 }, { "epoch": 0.8804604789803679, "grad_norm": 0.21128569543361664, "learning_rate": 7.45831655739066e-05, "loss": 0.6753, "step": 4283 }, { "epoch": 0.8806660499537465, "grad_norm": 0.18865573406219482, "learning_rate": 7.457563842106324e-05, "loss": 0.6917, "step": 4284 }, { "epoch": 0.8808716209271251, "grad_norm": 0.14653199911117554, "learning_rate": 7.456810981116643e-05, "loss": 0.5964, "step": 4285 }, { "epoch": 0.8810771919005036, "grad_norm": 0.19860735535621643, "learning_rate": 7.456057974458704e-05, "loss": 0.6534, "step": 4286 }, { "epoch": 0.8812827628738822, "grad_norm": 0.1889762133359909, "learning_rate": 7.455304822169606e-05, "loss": 0.6638, "step": 4287 }, { "epoch": 0.8814883338472608, "grad_norm": 0.19104063510894775, "learning_rate": 7.454551524286451e-05, "loss": 0.6779, "step": 4288 }, { "epoch": 0.8816939048206394, "grad_norm": 0.14345437288284302, "learning_rate": 7.453798080846353e-05, "loss": 0.5678, "step": 4289 }, { "epoch": 0.8818994757940178, "grad_norm": 0.1917407065629959, "learning_rate": 7.453044491886429e-05, "loss": 0.6866, "step": 4290 }, { "epoch": 0.8821050467673964, "grad_norm": 0.18986758589744568, "learning_rate": 7.452290757443806e-05, "loss": 0.6745, "step": 4291 }, { "epoch": 0.882310617740775, "grad_norm": 0.18070244789123535, "learning_rate": 7.451536877555617e-05, "loss": 0.6416, "step": 4292 }, { "epoch": 0.8825161887141536, "grad_norm": 0.1812668889760971, "learning_rate": 7.450782852259e-05, "loss": 0.6547, "step": 4293 }, { "epoch": 0.8827217596875321, "grad_norm": 0.19002775847911835, "learning_rate": 7.450028681591104e-05, "loss": 0.6392, "step": 4294 }, { "epoch": 0.8829273306609107, "grad_norm": 0.18384869396686554, "learning_rate": 7.449274365589083e-05, "loss": 0.6789, "step": 4295 }, { "epoch": 0.8831329016342893, "grad_norm": 0.13192251324653625, "learning_rate": 7.4485199042901e-05, "loss": 0.5635, "step": 4296 }, { "epoch": 0.8833384726076678, "grad_norm": 0.20571434497833252, "learning_rate": 7.447765297731322e-05, "loss": 0.7032, "step": 4297 }, { "epoch": 0.8835440435810463, "grad_norm": 0.1892521232366562, "learning_rate": 7.447010545949926e-05, "loss": 0.6616, "step": 4298 }, { "epoch": 0.8837496145544249, "grad_norm": 0.1817133128643036, "learning_rate": 7.446255648983095e-05, "loss": 0.68, "step": 4299 }, { "epoch": 0.8839551855278035, "grad_norm": 0.18332892656326294, "learning_rate": 7.445500606868016e-05, "loss": 0.6436, "step": 4300 }, { "epoch": 0.8841607565011821, "grad_norm": 0.18680675327777863, "learning_rate": 7.444745419641893e-05, "loss": 0.6678, "step": 4301 }, { "epoch": 0.8843663274745606, "grad_norm": 0.18525037169456482, "learning_rate": 7.443990087341926e-05, "loss": 0.6411, "step": 4302 }, { "epoch": 0.8845718984479392, "grad_norm": 0.18258033692836761, "learning_rate": 7.443234610005327e-05, "loss": 0.6625, "step": 4303 }, { "epoch": 0.8847774694213177, "grad_norm": 0.1923125982284546, "learning_rate": 7.442478987669315e-05, "loss": 0.646, "step": 4304 }, { "epoch": 0.8849830403946962, "grad_norm": 0.18216663599014282, "learning_rate": 7.441723220371118e-05, "loss": 0.6628, "step": 4305 }, { "epoch": 0.8851886113680748, "grad_norm": 0.15292415022850037, "learning_rate": 7.440967308147966e-05, "loss": 0.5989, "step": 4306 }, { "epoch": 0.8853941823414534, "grad_norm": 0.187953382730484, "learning_rate": 7.440211251037101e-05, "loss": 0.6624, "step": 4307 }, { "epoch": 0.885599753314832, "grad_norm": 0.1256251335144043, "learning_rate": 7.439455049075771e-05, "loss": 0.5845, "step": 4308 }, { "epoch": 0.8858053242882105, "grad_norm": 0.19565753638744354, "learning_rate": 7.438698702301229e-05, "loss": 0.674, "step": 4309 }, { "epoch": 0.886010895261589, "grad_norm": 0.1811288446187973, "learning_rate": 7.437942210750737e-05, "loss": 0.6772, "step": 4310 }, { "epoch": 0.8862164662349676, "grad_norm": 0.18292637169361115, "learning_rate": 7.437185574461564e-05, "loss": 0.6611, "step": 4311 }, { "epoch": 0.8864220372083462, "grad_norm": 0.18883992731571198, "learning_rate": 7.436428793470987e-05, "loss": 0.6885, "step": 4312 }, { "epoch": 0.8866276081817247, "grad_norm": 0.17563700675964355, "learning_rate": 7.435671867816288e-05, "loss": 0.6364, "step": 4313 }, { "epoch": 0.8868331791551033, "grad_norm": 0.1886730045080185, "learning_rate": 7.434914797534758e-05, "loss": 0.6734, "step": 4314 }, { "epoch": 0.8870387501284819, "grad_norm": 0.18746259808540344, "learning_rate": 7.434157582663691e-05, "loss": 0.6793, "step": 4315 }, { "epoch": 0.8872443211018605, "grad_norm": 0.16091619431972504, "learning_rate": 7.433400223240397e-05, "loss": 0.6101, "step": 4316 }, { "epoch": 0.8874498920752389, "grad_norm": 0.1879081130027771, "learning_rate": 7.432642719302184e-05, "loss": 0.6706, "step": 4317 }, { "epoch": 0.8876554630486175, "grad_norm": 0.1933298110961914, "learning_rate": 7.431885070886372e-05, "loss": 0.6647, "step": 4318 }, { "epoch": 0.8878610340219961, "grad_norm": 0.12698352336883545, "learning_rate": 7.431127278030285e-05, "loss": 0.5725, "step": 4319 }, { "epoch": 0.8880666049953746, "grad_norm": 0.18227995932102203, "learning_rate": 7.430369340771258e-05, "loss": 0.6751, "step": 4320 }, { "epoch": 0.8882721759687532, "grad_norm": 0.12696510553359985, "learning_rate": 7.429611259146628e-05, "loss": 0.5934, "step": 4321 }, { "epoch": 0.8884777469421318, "grad_norm": 0.12385066598653793, "learning_rate": 7.428853033193745e-05, "loss": 0.5753, "step": 4322 }, { "epoch": 0.8886833179155104, "grad_norm": 0.189598947763443, "learning_rate": 7.428094662949964e-05, "loss": 0.6631, "step": 4323 }, { "epoch": 0.8888888888888888, "grad_norm": 0.19134309887886047, "learning_rate": 7.427336148452645e-05, "loss": 0.6627, "step": 4324 }, { "epoch": 0.8890944598622674, "grad_norm": 0.1795106679201126, "learning_rate": 7.426577489739155e-05, "loss": 0.6591, "step": 4325 }, { "epoch": 0.889300030835646, "grad_norm": 0.18666116893291473, "learning_rate": 7.425818686846872e-05, "loss": 0.6704, "step": 4326 }, { "epoch": 0.8895056018090246, "grad_norm": 0.18348322808742523, "learning_rate": 7.425059739813177e-05, "loss": 0.6872, "step": 4327 }, { "epoch": 0.8897111727824031, "grad_norm": 0.18486203253269196, "learning_rate": 7.424300648675459e-05, "loss": 0.683, "step": 4328 }, { "epoch": 0.8899167437557817, "grad_norm": 0.19054512679576874, "learning_rate": 7.423541413471117e-05, "loss": 0.6541, "step": 4329 }, { "epoch": 0.8901223147291603, "grad_norm": 0.18132087588310242, "learning_rate": 7.422782034237554e-05, "loss": 0.6879, "step": 4330 }, { "epoch": 0.8903278857025388, "grad_norm": 0.17876796424388885, "learning_rate": 7.422022511012182e-05, "loss": 0.6338, "step": 4331 }, { "epoch": 0.8905334566759173, "grad_norm": 0.18260298669338226, "learning_rate": 7.421262843832417e-05, "loss": 0.6436, "step": 4332 }, { "epoch": 0.8907390276492959, "grad_norm": 0.19324032962322235, "learning_rate": 7.420503032735688e-05, "loss": 0.6672, "step": 4333 }, { "epoch": 0.8909445986226745, "grad_norm": 0.1886059194803238, "learning_rate": 7.419743077759423e-05, "loss": 0.6803, "step": 4334 }, { "epoch": 0.8911501695960531, "grad_norm": 0.18304765224456787, "learning_rate": 7.418982978941065e-05, "loss": 0.682, "step": 4335 }, { "epoch": 0.8913557405694316, "grad_norm": 0.17993968725204468, "learning_rate": 7.418222736318057e-05, "loss": 0.5898, "step": 4336 }, { "epoch": 0.8915613115428102, "grad_norm": 0.21449178457260132, "learning_rate": 7.417462349927855e-05, "loss": 0.6657, "step": 4337 }, { "epoch": 0.8917668825161887, "grad_norm": 0.1957646608352661, "learning_rate": 7.41670181980792e-05, "loss": 0.6752, "step": 4338 }, { "epoch": 0.8919724534895672, "grad_norm": 0.1868593990802765, "learning_rate": 7.415941145995719e-05, "loss": 0.7023, "step": 4339 }, { "epoch": 0.8921780244629458, "grad_norm": 0.17802853882312775, "learning_rate": 7.415180328528726e-05, "loss": 0.6407, "step": 4340 }, { "epoch": 0.8923835954363244, "grad_norm": 0.1869519203901291, "learning_rate": 7.414419367444425e-05, "loss": 0.6797, "step": 4341 }, { "epoch": 0.892589166409703, "grad_norm": 0.1857430785894394, "learning_rate": 7.413658262780301e-05, "loss": 0.6507, "step": 4342 }, { "epoch": 0.8927947373830815, "grad_norm": 0.18577779829502106, "learning_rate": 7.412897014573856e-05, "loss": 0.6426, "step": 4343 }, { "epoch": 0.89300030835646, "grad_norm": 0.18945308029651642, "learning_rate": 7.412135622862588e-05, "loss": 0.6654, "step": 4344 }, { "epoch": 0.8932058793298386, "grad_norm": 0.19126087427139282, "learning_rate": 7.41137408768401e-05, "loss": 0.6727, "step": 4345 }, { "epoch": 0.8934114503032172, "grad_norm": 0.18092653155326843, "learning_rate": 7.410612409075639e-05, "loss": 0.6423, "step": 4346 }, { "epoch": 0.8936170212765957, "grad_norm": 0.18795473873615265, "learning_rate": 7.409850587074997e-05, "loss": 0.671, "step": 4347 }, { "epoch": 0.8938225922499743, "grad_norm": 0.18189306557178497, "learning_rate": 7.409088621719618e-05, "loss": 0.6605, "step": 4348 }, { "epoch": 0.8940281632233529, "grad_norm": 0.532122015953064, "learning_rate": 7.40832651304704e-05, "loss": 0.7076, "step": 4349 }, { "epoch": 0.8942337341967315, "grad_norm": 0.18601365387439728, "learning_rate": 7.407564261094808e-05, "loss": 0.6822, "step": 4350 }, { "epoch": 0.8944393051701099, "grad_norm": 0.17646148800849915, "learning_rate": 7.406801865900474e-05, "loss": 0.5773, "step": 4351 }, { "epoch": 0.8946448761434885, "grad_norm": 0.19108296930789948, "learning_rate": 7.406039327501599e-05, "loss": 0.6699, "step": 4352 }, { "epoch": 0.8948504471168671, "grad_norm": 0.13074934482574463, "learning_rate": 7.40527664593575e-05, "loss": 0.5759, "step": 4353 }, { "epoch": 0.8950560180902457, "grad_norm": 0.2011304795742035, "learning_rate": 7.4045138212405e-05, "loss": 0.6647, "step": 4354 }, { "epoch": 0.8952615890636242, "grad_norm": 0.20265452563762665, "learning_rate": 7.403750853453428e-05, "loss": 0.6872, "step": 4355 }, { "epoch": 0.8954671600370028, "grad_norm": 0.14710208773612976, "learning_rate": 7.402987742612124e-05, "loss": 0.5707, "step": 4356 }, { "epoch": 0.8956727310103814, "grad_norm": 0.1870591640472412, "learning_rate": 7.402224488754184e-05, "loss": 0.6863, "step": 4357 }, { "epoch": 0.8958783019837598, "grad_norm": 0.18606425821781158, "learning_rate": 7.401461091917206e-05, "loss": 0.6825, "step": 4358 }, { "epoch": 0.8960838729571384, "grad_norm": 0.18178561329841614, "learning_rate": 7.400697552138803e-05, "loss": 0.6685, "step": 4359 }, { "epoch": 0.896289443930517, "grad_norm": 0.1832335740327835, "learning_rate": 7.399933869456589e-05, "loss": 0.6756, "step": 4360 }, { "epoch": 0.8964950149038956, "grad_norm": 0.18786631524562836, "learning_rate": 7.399170043908187e-05, "loss": 0.6464, "step": 4361 }, { "epoch": 0.8967005858772741, "grad_norm": 0.18036015331745148, "learning_rate": 7.398406075531228e-05, "loss": 0.6493, "step": 4362 }, { "epoch": 0.8969061568506527, "grad_norm": 0.19510389864444733, "learning_rate": 7.39764196436335e-05, "loss": 0.6499, "step": 4363 }, { "epoch": 0.8971117278240313, "grad_norm": 0.18855442106723785, "learning_rate": 7.396877710442194e-05, "loss": 0.6618, "step": 4364 }, { "epoch": 0.8973172987974098, "grad_norm": 0.1755952090024948, "learning_rate": 7.396113313805416e-05, "loss": 0.5859, "step": 4365 }, { "epoch": 0.8975228697707883, "grad_norm": 0.19632591307163239, "learning_rate": 7.395348774490668e-05, "loss": 0.6806, "step": 4366 }, { "epoch": 0.8977284407441669, "grad_norm": 0.1848839372396469, "learning_rate": 7.394584092535622e-05, "loss": 0.6589, "step": 4367 }, { "epoch": 0.8979340117175455, "grad_norm": 0.1884489208459854, "learning_rate": 7.393819267977945e-05, "loss": 0.6858, "step": 4368 }, { "epoch": 0.8981395826909241, "grad_norm": 0.1883459985256195, "learning_rate": 7.393054300855318e-05, "loss": 0.6714, "step": 4369 }, { "epoch": 0.8983451536643026, "grad_norm": 0.18213316798210144, "learning_rate": 7.392289191205428e-05, "loss": 0.6601, "step": 4370 }, { "epoch": 0.8985507246376812, "grad_norm": 0.18287204205989838, "learning_rate": 7.391523939065969e-05, "loss": 0.6714, "step": 4371 }, { "epoch": 0.8987562956110597, "grad_norm": 0.18707792460918427, "learning_rate": 7.390758544474639e-05, "loss": 0.6407, "step": 4372 }, { "epoch": 0.8989618665844383, "grad_norm": 0.18532080948352814, "learning_rate": 7.389993007469148e-05, "loss": 0.6813, "step": 4373 }, { "epoch": 0.8991674375578168, "grad_norm": 0.17980536818504333, "learning_rate": 7.38922732808721e-05, "loss": 0.6335, "step": 4374 }, { "epoch": 0.8993730085311954, "grad_norm": 0.18949337303638458, "learning_rate": 7.388461506366544e-05, "loss": 0.6959, "step": 4375 }, { "epoch": 0.899578579504574, "grad_norm": 0.18386761844158173, "learning_rate": 7.387695542344881e-05, "loss": 0.6337, "step": 4376 }, { "epoch": 0.8997841504779525, "grad_norm": 0.18090958893299103, "learning_rate": 7.386929436059956e-05, "loss": 0.6445, "step": 4377 }, { "epoch": 0.899989721451331, "grad_norm": 0.18790413439273834, "learning_rate": 7.386163187549511e-05, "loss": 0.6622, "step": 4378 }, { "epoch": 0.9001952924247096, "grad_norm": 0.18693870306015015, "learning_rate": 7.385396796851296e-05, "loss": 0.6711, "step": 4379 }, { "epoch": 0.9004008633980882, "grad_norm": 0.18476144969463348, "learning_rate": 7.384630264003067e-05, "loss": 0.6642, "step": 4380 }, { "epoch": 0.9006064343714667, "grad_norm": 0.18623842298984528, "learning_rate": 7.383863589042587e-05, "loss": 0.6242, "step": 4381 }, { "epoch": 0.9008120053448453, "grad_norm": 0.14655017852783203, "learning_rate": 7.383096772007628e-05, "loss": 0.5558, "step": 4382 }, { "epoch": 0.9010175763182239, "grad_norm": 0.18449489772319794, "learning_rate": 7.382329812935963e-05, "loss": 0.6603, "step": 4383 }, { "epoch": 0.9012231472916025, "grad_norm": 0.1364215761423111, "learning_rate": 7.381562711865385e-05, "loss": 0.5671, "step": 4384 }, { "epoch": 0.9014287182649809, "grad_norm": 0.19321440160274506, "learning_rate": 7.380795468833679e-05, "loss": 0.6826, "step": 4385 }, { "epoch": 0.9016342892383595, "grad_norm": 0.18807579576969147, "learning_rate": 7.380028083878644e-05, "loss": 0.6982, "step": 4386 }, { "epoch": 0.9018398602117381, "grad_norm": 0.18062882125377655, "learning_rate": 7.379260557038088e-05, "loss": 0.6676, "step": 4387 }, { "epoch": 0.9020454311851167, "grad_norm": 0.14082865417003632, "learning_rate": 7.37849288834982e-05, "loss": 0.6026, "step": 4388 }, { "epoch": 0.9022510021584952, "grad_norm": 0.1912989616394043, "learning_rate": 7.377725077851663e-05, "loss": 0.6711, "step": 4389 }, { "epoch": 0.9024565731318738, "grad_norm": 0.12428473681211472, "learning_rate": 7.376957125581441e-05, "loss": 0.5805, "step": 4390 }, { "epoch": 0.9026621441052524, "grad_norm": 0.1931021362543106, "learning_rate": 7.376189031576991e-05, "loss": 0.6652, "step": 4391 }, { "epoch": 0.902867715078631, "grad_norm": 0.1896105408668518, "learning_rate": 7.375420795876148e-05, "loss": 0.6592, "step": 4392 }, { "epoch": 0.9030732860520094, "grad_norm": 0.18101376295089722, "learning_rate": 7.374652418516761e-05, "loss": 0.6803, "step": 4393 }, { "epoch": 0.903278857025388, "grad_norm": 0.18925289809703827, "learning_rate": 7.373883899536688e-05, "loss": 0.6599, "step": 4394 }, { "epoch": 0.9034844279987666, "grad_norm": 0.18771770596504211, "learning_rate": 7.373115238973786e-05, "loss": 0.6866, "step": 4395 }, { "epoch": 0.9036899989721451, "grad_norm": 0.18801310658454895, "learning_rate": 7.372346436865927e-05, "loss": 0.6602, "step": 4396 }, { "epoch": 0.9038955699455237, "grad_norm": 0.1810484528541565, "learning_rate": 7.371577493250983e-05, "loss": 0.6377, "step": 4397 }, { "epoch": 0.9041011409189023, "grad_norm": 0.18624310195446014, "learning_rate": 7.370808408166838e-05, "loss": 0.6655, "step": 4398 }, { "epoch": 0.9043067118922808, "grad_norm": 0.1851394772529602, "learning_rate": 7.37003918165138e-05, "loss": 0.6622, "step": 4399 }, { "epoch": 0.9045122828656593, "grad_norm": 0.20104296505451202, "learning_rate": 7.369269813742507e-05, "loss": 0.6727, "step": 4400 }, { "epoch": 0.9047178538390379, "grad_norm": 0.15082360804080963, "learning_rate": 7.368500304478121e-05, "loss": 0.5995, "step": 4401 }, { "epoch": 0.9049234248124165, "grad_norm": 0.13055920600891113, "learning_rate": 7.367730653896132e-05, "loss": 0.5763, "step": 4402 }, { "epoch": 0.9051289957857951, "grad_norm": 0.1956562101840973, "learning_rate": 7.366960862034458e-05, "loss": 0.6743, "step": 4403 }, { "epoch": 0.9053345667591736, "grad_norm": 0.1876806765794754, "learning_rate": 7.366190928931021e-05, "loss": 0.6862, "step": 4404 }, { "epoch": 0.9055401377325522, "grad_norm": 0.1496850550174713, "learning_rate": 7.365420854623755e-05, "loss": 0.5858, "step": 4405 }, { "epoch": 0.9057457087059307, "grad_norm": 0.14271092414855957, "learning_rate": 7.364650639150596e-05, "loss": 0.6152, "step": 4406 }, { "epoch": 0.9059512796793093, "grad_norm": 0.20220176875591278, "learning_rate": 7.36388028254949e-05, "loss": 0.6771, "step": 4407 }, { "epoch": 0.9061568506526878, "grad_norm": 0.13460181653499603, "learning_rate": 7.363109784858388e-05, "loss": 0.5904, "step": 4408 }, { "epoch": 0.9063624216260664, "grad_norm": 0.13429884612560272, "learning_rate": 7.362339146115248e-05, "loss": 0.5729, "step": 4409 }, { "epoch": 0.906567992599445, "grad_norm": 0.18408654630184174, "learning_rate": 7.361568366358038e-05, "loss": 0.6534, "step": 4410 }, { "epoch": 0.9067735635728236, "grad_norm": 0.20259039103984833, "learning_rate": 7.360797445624729e-05, "loss": 0.6585, "step": 4411 }, { "epoch": 0.906979134546202, "grad_norm": 0.18721166253089905, "learning_rate": 7.360026383953301e-05, "loss": 0.6825, "step": 4412 }, { "epoch": 0.9071847055195806, "grad_norm": 0.18604475259780884, "learning_rate": 7.359255181381741e-05, "loss": 0.6372, "step": 4413 }, { "epoch": 0.9073902764929592, "grad_norm": 0.1993558555841446, "learning_rate": 7.358483837948043e-05, "loss": 0.653, "step": 4414 }, { "epoch": 0.9075958474663377, "grad_norm": 0.18966707587242126, "learning_rate": 7.357712353690205e-05, "loss": 0.6598, "step": 4415 }, { "epoch": 0.9078014184397163, "grad_norm": 0.18439289927482605, "learning_rate": 7.35694072864624e-05, "loss": 0.6923, "step": 4416 }, { "epoch": 0.9080069894130949, "grad_norm": 0.1838844269514084, "learning_rate": 7.356168962854155e-05, "loss": 0.6617, "step": 4417 }, { "epoch": 0.9082125603864735, "grad_norm": 0.18378853797912598, "learning_rate": 7.355397056351975e-05, "loss": 0.6939, "step": 4418 }, { "epoch": 0.9084181313598519, "grad_norm": 0.1807030588388443, "learning_rate": 7.354625009177729e-05, "loss": 0.6425, "step": 4419 }, { "epoch": 0.9086237023332305, "grad_norm": 0.18497875332832336, "learning_rate": 7.353852821369452e-05, "loss": 0.682, "step": 4420 }, { "epoch": 0.9088292733066091, "grad_norm": 0.18819686770439148, "learning_rate": 7.353080492965184e-05, "loss": 0.6772, "step": 4421 }, { "epoch": 0.9090348442799877, "grad_norm": 0.19138747453689575, "learning_rate": 7.352308024002977e-05, "loss": 0.5944, "step": 4422 }, { "epoch": 0.9092404152533662, "grad_norm": 0.1951584368944168, "learning_rate": 7.351535414520884e-05, "loss": 0.6523, "step": 4423 }, { "epoch": 0.9094459862267448, "grad_norm": 0.19077381491661072, "learning_rate": 7.350762664556969e-05, "loss": 0.6364, "step": 4424 }, { "epoch": 0.9096515572001234, "grad_norm": 0.18306457996368408, "learning_rate": 7.349989774149302e-05, "loss": 0.6616, "step": 4425 }, { "epoch": 0.909857128173502, "grad_norm": 0.18449269235134125, "learning_rate": 7.349216743335961e-05, "loss": 0.6431, "step": 4426 }, { "epoch": 0.9100626991468804, "grad_norm": 0.18515408039093018, "learning_rate": 7.348443572155027e-05, "loss": 0.634, "step": 4427 }, { "epoch": 0.910268270120259, "grad_norm": 0.16887053847312927, "learning_rate": 7.347670260644592e-05, "loss": 0.5846, "step": 4428 }, { "epoch": 0.9104738410936376, "grad_norm": 0.19056186079978943, "learning_rate": 7.346896808842753e-05, "loss": 0.6496, "step": 4429 }, { "epoch": 0.9106794120670162, "grad_norm": 0.19287076592445374, "learning_rate": 7.346123216787616e-05, "loss": 0.6689, "step": 4430 }, { "epoch": 0.9108849830403947, "grad_norm": 0.18666431307792664, "learning_rate": 7.34534948451729e-05, "loss": 0.6639, "step": 4431 }, { "epoch": 0.9110905540137733, "grad_norm": 0.1829727292060852, "learning_rate": 7.344575612069893e-05, "loss": 0.6354, "step": 4432 }, { "epoch": 0.9112961249871518, "grad_norm": 0.19133751094341278, "learning_rate": 7.343801599483554e-05, "loss": 0.6718, "step": 4433 }, { "epoch": 0.9115016959605303, "grad_norm": 0.19330398738384247, "learning_rate": 7.3430274467964e-05, "loss": 0.6725, "step": 4434 }, { "epoch": 0.9117072669339089, "grad_norm": 0.18251781165599823, "learning_rate": 7.342253154046571e-05, "loss": 0.6553, "step": 4435 }, { "epoch": 0.9119128379072875, "grad_norm": 0.1795288473367691, "learning_rate": 7.341478721272215e-05, "loss": 0.6338, "step": 4436 }, { "epoch": 0.9121184088806661, "grad_norm": 0.19127197563648224, "learning_rate": 7.340704148511483e-05, "loss": 0.6715, "step": 4437 }, { "epoch": 0.9123239798540446, "grad_norm": 0.1778208166360855, "learning_rate": 7.339929435802536e-05, "loss": 0.6374, "step": 4438 }, { "epoch": 0.9125295508274232, "grad_norm": 0.18795600533485413, "learning_rate": 7.339154583183538e-05, "loss": 0.6714, "step": 4439 }, { "epoch": 0.9127351218008017, "grad_norm": 0.1836930364370346, "learning_rate": 7.338379590692665e-05, "loss": 0.6638, "step": 4440 }, { "epoch": 0.9129406927741803, "grad_norm": 0.1537027806043625, "learning_rate": 7.337604458368095e-05, "loss": 0.5768, "step": 4441 }, { "epoch": 0.9131462637475588, "grad_norm": 0.1962456852197647, "learning_rate": 7.336829186248018e-05, "loss": 0.679, "step": 4442 }, { "epoch": 0.9133518347209374, "grad_norm": 0.185762420296669, "learning_rate": 7.336053774370626e-05, "loss": 0.6342, "step": 4443 }, { "epoch": 0.913557405694316, "grad_norm": 0.18311749398708344, "learning_rate": 7.33527822277412e-05, "loss": 0.6502, "step": 4444 }, { "epoch": 0.9137629766676946, "grad_norm": 0.17960374057292938, "learning_rate": 7.334502531496707e-05, "loss": 0.6496, "step": 4445 }, { "epoch": 0.913968547641073, "grad_norm": 0.17319880425930023, "learning_rate": 7.333726700576603e-05, "loss": 0.6354, "step": 4446 }, { "epoch": 0.9141741186144516, "grad_norm": 0.19266557693481445, "learning_rate": 7.332950730052029e-05, "loss": 0.66, "step": 4447 }, { "epoch": 0.9143796895878302, "grad_norm": 0.18681232631206512, "learning_rate": 7.332174619961215e-05, "loss": 0.6807, "step": 4448 }, { "epoch": 0.9145852605612088, "grad_norm": 0.14952509105205536, "learning_rate": 7.331398370342393e-05, "loss": 0.5697, "step": 4449 }, { "epoch": 0.9147908315345873, "grad_norm": 0.18955743312835693, "learning_rate": 7.33062198123381e-05, "loss": 0.6537, "step": 4450 }, { "epoch": 0.9149964025079659, "grad_norm": 0.18778184056282043, "learning_rate": 7.32984545267371e-05, "loss": 0.6738, "step": 4451 }, { "epoch": 0.9152019734813445, "grad_norm": 0.12955501675605774, "learning_rate": 7.329068784700352e-05, "loss": 0.5692, "step": 4452 }, { "epoch": 0.9154075444547229, "grad_norm": 0.12300048768520355, "learning_rate": 7.328291977351998e-05, "loss": 0.5731, "step": 4453 }, { "epoch": 0.9156131154281015, "grad_norm": 0.1227407306432724, "learning_rate": 7.327515030666918e-05, "loss": 0.5563, "step": 4454 }, { "epoch": 0.9158186864014801, "grad_norm": 0.1943395435810089, "learning_rate": 7.326737944683387e-05, "loss": 0.6464, "step": 4455 }, { "epoch": 0.9160242573748587, "grad_norm": 0.1225886195898056, "learning_rate": 7.32596071943969e-05, "loss": 0.5903, "step": 4456 }, { "epoch": 0.9162298283482372, "grad_norm": 0.19221986830234528, "learning_rate": 7.325183354974119e-05, "loss": 0.6723, "step": 4457 }, { "epoch": 0.9164353993216158, "grad_norm": 0.12794283032417297, "learning_rate": 7.324405851324967e-05, "loss": 0.5684, "step": 4458 }, { "epoch": 0.9166409702949944, "grad_norm": 0.18591567873954773, "learning_rate": 7.32362820853054e-05, "loss": 0.6615, "step": 4459 }, { "epoch": 0.916846541268373, "grad_norm": 0.1794724315404892, "learning_rate": 7.32285042662915e-05, "loss": 0.66, "step": 4460 }, { "epoch": 0.9170521122417514, "grad_norm": 0.17996345460414886, "learning_rate": 7.322072505659111e-05, "loss": 0.6703, "step": 4461 }, { "epoch": 0.91725768321513, "grad_norm": 0.1860412210226059, "learning_rate": 7.321294445658754e-05, "loss": 0.6633, "step": 4462 }, { "epoch": 0.9174632541885086, "grad_norm": 0.18460632860660553, "learning_rate": 7.320516246666401e-05, "loss": 0.6719, "step": 4463 }, { "epoch": 0.9176688251618872, "grad_norm": 0.179931178689003, "learning_rate": 7.3197379087204e-05, "loss": 0.6874, "step": 4464 }, { "epoch": 0.9178743961352657, "grad_norm": 0.17799624800682068, "learning_rate": 7.31895943185909e-05, "loss": 0.6829, "step": 4465 }, { "epoch": 0.9180799671086443, "grad_norm": 0.17857834696769714, "learning_rate": 7.318180816120825e-05, "loss": 0.6732, "step": 4466 }, { "epoch": 0.9182855380820228, "grad_norm": 0.18361206352710724, "learning_rate": 7.317402061543963e-05, "loss": 0.6628, "step": 4467 }, { "epoch": 0.9184911090554013, "grad_norm": 0.18038511276245117, "learning_rate": 7.316623168166869e-05, "loss": 0.65, "step": 4468 }, { "epoch": 0.9186966800287799, "grad_norm": 0.18144308030605316, "learning_rate": 7.315844136027917e-05, "loss": 0.6874, "step": 4469 }, { "epoch": 0.9189022510021585, "grad_norm": 0.18093526363372803, "learning_rate": 7.315064965165486e-05, "loss": 0.6514, "step": 4470 }, { "epoch": 0.9191078219755371, "grad_norm": 0.15748950839042664, "learning_rate": 7.314285655617962e-05, "loss": 0.5854, "step": 4471 }, { "epoch": 0.9193133929489156, "grad_norm": 0.18608981370925903, "learning_rate": 7.313506207423738e-05, "loss": 0.6583, "step": 4472 }, { "epoch": 0.9195189639222942, "grad_norm": 0.13079427182674408, "learning_rate": 7.312726620621211e-05, "loss": 0.5866, "step": 4473 }, { "epoch": 0.9197245348956727, "grad_norm": 0.1949155479669571, "learning_rate": 7.311946895248793e-05, "loss": 0.6501, "step": 4474 }, { "epoch": 0.9199301058690513, "grad_norm": 0.19749537110328674, "learning_rate": 7.311167031344894e-05, "loss": 0.6782, "step": 4475 }, { "epoch": 0.9201356768424298, "grad_norm": 0.18449652194976807, "learning_rate": 7.310387028947934e-05, "loss": 0.6683, "step": 4476 }, { "epoch": 0.9203412478158084, "grad_norm": 0.18260683119297028, "learning_rate": 7.309606888096341e-05, "loss": 0.6541, "step": 4477 }, { "epoch": 0.920546818789187, "grad_norm": 0.16052718460559845, "learning_rate": 7.308826608828548e-05, "loss": 0.5706, "step": 4478 }, { "epoch": 0.9207523897625656, "grad_norm": 0.1838260293006897, "learning_rate": 7.308046191182998e-05, "loss": 0.6577, "step": 4479 }, { "epoch": 0.920957960735944, "grad_norm": 0.12468434125185013, "learning_rate": 7.307265635198135e-05, "loss": 0.5989, "step": 4480 }, { "epoch": 0.9211635317093226, "grad_norm": 0.12467863410711288, "learning_rate": 7.306484940912416e-05, "loss": 0.5734, "step": 4481 }, { "epoch": 0.9213691026827012, "grad_norm": 0.2048860639333725, "learning_rate": 7.305704108364301e-05, "loss": 0.6777, "step": 4482 }, { "epoch": 0.9215746736560798, "grad_norm": 0.12955395877361298, "learning_rate": 7.304923137592258e-05, "loss": 0.5742, "step": 4483 }, { "epoch": 0.9217802446294583, "grad_norm": 0.18409843742847443, "learning_rate": 7.304142028634764e-05, "loss": 0.6323, "step": 4484 }, { "epoch": 0.9219858156028369, "grad_norm": 0.1324310153722763, "learning_rate": 7.303360781530299e-05, "loss": 0.5826, "step": 4485 }, { "epoch": 0.9221913865762155, "grad_norm": 0.18649353086948395, "learning_rate": 7.30257939631735e-05, "loss": 0.6743, "step": 4486 }, { "epoch": 0.9223969575495939, "grad_norm": 0.19196631014347076, "learning_rate": 7.301797873034412e-05, "loss": 0.6578, "step": 4487 }, { "epoch": 0.9226025285229725, "grad_norm": 0.13730254769325256, "learning_rate": 7.301016211719992e-05, "loss": 0.5787, "step": 4488 }, { "epoch": 0.9228080994963511, "grad_norm": 0.17747841775417328, "learning_rate": 7.300234412412593e-05, "loss": 0.6616, "step": 4489 }, { "epoch": 0.9230136704697297, "grad_norm": 0.1930990219116211, "learning_rate": 7.299452475150732e-05, "loss": 0.6509, "step": 4490 }, { "epoch": 0.9232192414431082, "grad_norm": 0.18891580402851105, "learning_rate": 7.298670399972933e-05, "loss": 0.6808, "step": 4491 }, { "epoch": 0.9234248124164868, "grad_norm": 0.1751311719417572, "learning_rate": 7.297888186917724e-05, "loss": 0.649, "step": 4492 }, { "epoch": 0.9236303833898654, "grad_norm": 0.37240174412727356, "learning_rate": 7.297105836023642e-05, "loss": 0.6677, "step": 4493 }, { "epoch": 0.923835954363244, "grad_norm": 0.1758231371641159, "learning_rate": 7.296323347329228e-05, "loss": 0.6484, "step": 4494 }, { "epoch": 0.9240415253366224, "grad_norm": 0.18870992958545685, "learning_rate": 7.295540720873034e-05, "loss": 0.6792, "step": 4495 }, { "epoch": 0.924247096310001, "grad_norm": 0.17921528220176697, "learning_rate": 7.294757956693616e-05, "loss": 0.6595, "step": 4496 }, { "epoch": 0.9244526672833796, "grad_norm": 0.18485888838768005, "learning_rate": 7.293975054829534e-05, "loss": 0.6875, "step": 4497 }, { "epoch": 0.9246582382567582, "grad_norm": 0.1897556483745575, "learning_rate": 7.293192015319359e-05, "loss": 0.6486, "step": 4498 }, { "epoch": 0.9248638092301367, "grad_norm": 0.18818815052509308, "learning_rate": 7.29240883820167e-05, "loss": 0.6567, "step": 4499 }, { "epoch": 0.9250693802035153, "grad_norm": 0.1831316202878952, "learning_rate": 7.291625523515051e-05, "loss": 0.6784, "step": 4500 }, { "epoch": 0.9252749511768938, "grad_norm": 0.18603403866291046, "learning_rate": 7.290842071298088e-05, "loss": 0.6519, "step": 4501 }, { "epoch": 0.9254805221502724, "grad_norm": 0.18271493911743164, "learning_rate": 7.290058481589381e-05, "loss": 0.6522, "step": 4502 }, { "epoch": 0.9256860931236509, "grad_norm": 0.1795085072517395, "learning_rate": 7.289274754427536e-05, "loss": 0.6418, "step": 4503 }, { "epoch": 0.9258916640970295, "grad_norm": 0.18269407749176025, "learning_rate": 7.288490889851158e-05, "loss": 0.6724, "step": 4504 }, { "epoch": 0.9260972350704081, "grad_norm": 0.18335239589214325, "learning_rate": 7.287706887898867e-05, "loss": 0.6758, "step": 4505 }, { "epoch": 0.9263028060437866, "grad_norm": 0.17889541387557983, "learning_rate": 7.28692274860929e-05, "loss": 0.6228, "step": 4506 }, { "epoch": 0.9265083770171652, "grad_norm": 0.1919565200805664, "learning_rate": 7.286138472021053e-05, "loss": 0.6629, "step": 4507 }, { "epoch": 0.9267139479905437, "grad_norm": 0.162271648645401, "learning_rate": 7.285354058172796e-05, "loss": 0.5823, "step": 4508 }, { "epoch": 0.9269195189639223, "grad_norm": 0.18953320384025574, "learning_rate": 7.284569507103164e-05, "loss": 0.687, "step": 4509 }, { "epoch": 0.9271250899373008, "grad_norm": 0.18748800456523895, "learning_rate": 7.283784818850807e-05, "loss": 0.6741, "step": 4510 }, { "epoch": 0.9273306609106794, "grad_norm": 0.20176881551742554, "learning_rate": 7.282999993454383e-05, "loss": 0.6658, "step": 4511 }, { "epoch": 0.927536231884058, "grad_norm": 0.19243447482585907, "learning_rate": 7.282215030952558e-05, "loss": 0.6633, "step": 4512 }, { "epoch": 0.9277418028574366, "grad_norm": 0.18939968943595886, "learning_rate": 7.281429931384001e-05, "loss": 0.6858, "step": 4513 }, { "epoch": 0.927947373830815, "grad_norm": 0.1850508600473404, "learning_rate": 7.280644694787393e-05, "loss": 0.6459, "step": 4514 }, { "epoch": 0.9281529448041936, "grad_norm": 0.18251655995845795, "learning_rate": 7.279859321201418e-05, "loss": 0.6619, "step": 4515 }, { "epoch": 0.9283585157775722, "grad_norm": 0.18301190435886383, "learning_rate": 7.279073810664767e-05, "loss": 0.6507, "step": 4516 }, { "epoch": 0.9285640867509508, "grad_norm": 0.15676529705524445, "learning_rate": 7.278288163216138e-05, "loss": 0.5846, "step": 4517 }, { "epoch": 0.9287696577243293, "grad_norm": 0.18805831670761108, "learning_rate": 7.277502378894237e-05, "loss": 0.6531, "step": 4518 }, { "epoch": 0.9289752286977079, "grad_norm": 0.1892201006412506, "learning_rate": 7.276716457737776e-05, "loss": 0.659, "step": 4519 }, { "epoch": 0.9291807996710865, "grad_norm": 0.13629117608070374, "learning_rate": 7.275930399785473e-05, "loss": 0.569, "step": 4520 }, { "epoch": 0.929386370644465, "grad_norm": 0.20433087646961212, "learning_rate": 7.275144205076053e-05, "loss": 0.6686, "step": 4521 }, { "epoch": 0.9295919416178435, "grad_norm": 0.1851327121257782, "learning_rate": 7.274357873648252e-05, "loss": 0.6472, "step": 4522 }, { "epoch": 0.9297975125912221, "grad_norm": 0.19075118005275726, "learning_rate": 7.273571405540802e-05, "loss": 0.6702, "step": 4523 }, { "epoch": 0.9300030835646007, "grad_norm": 0.1823331117630005, "learning_rate": 7.272784800792457e-05, "loss": 0.6637, "step": 4524 }, { "epoch": 0.9302086545379792, "grad_norm": 0.18252402544021606, "learning_rate": 7.271998059441962e-05, "loss": 0.6553, "step": 4525 }, { "epoch": 0.9304142255113578, "grad_norm": 0.18108795583248138, "learning_rate": 7.27121118152808e-05, "loss": 0.6487, "step": 4526 }, { "epoch": 0.9306197964847364, "grad_norm": 0.18125282227993011, "learning_rate": 7.270424167089574e-05, "loss": 0.6674, "step": 4527 }, { "epoch": 0.930825367458115, "grad_norm": 0.17340825498104095, "learning_rate": 7.269637016165218e-05, "loss": 0.6521, "step": 4528 }, { "epoch": 0.9310309384314934, "grad_norm": 0.17793837189674377, "learning_rate": 7.268849728793794e-05, "loss": 0.6443, "step": 4529 }, { "epoch": 0.931236509404872, "grad_norm": 0.1863885074853897, "learning_rate": 7.268062305014085e-05, "loss": 0.6374, "step": 4530 }, { "epoch": 0.9314420803782506, "grad_norm": 0.1790206879377365, "learning_rate": 7.267274744864883e-05, "loss": 0.6463, "step": 4531 }, { "epoch": 0.9316476513516292, "grad_norm": 0.19194868206977844, "learning_rate": 7.266487048384987e-05, "loss": 0.6575, "step": 4532 }, { "epoch": 0.9318532223250077, "grad_norm": 0.17925460636615753, "learning_rate": 7.265699215613208e-05, "loss": 0.655, "step": 4533 }, { "epoch": 0.9320587932983863, "grad_norm": 0.18405981361865997, "learning_rate": 7.264911246588353e-05, "loss": 0.6661, "step": 4534 }, { "epoch": 0.9322643642717648, "grad_norm": 0.15504823625087738, "learning_rate": 7.264123141349245e-05, "loss": 0.5726, "step": 4535 }, { "epoch": 0.9324699352451434, "grad_norm": 0.1932215392589569, "learning_rate": 7.26333489993471e-05, "loss": 0.659, "step": 4536 }, { "epoch": 0.9326755062185219, "grad_norm": 0.182255357503891, "learning_rate": 7.262546522383579e-05, "loss": 0.6792, "step": 4537 }, { "epoch": 0.9328810771919005, "grad_norm": 0.1837291121482849, "learning_rate": 7.261758008734693e-05, "loss": 0.6816, "step": 4538 }, { "epoch": 0.9330866481652791, "grad_norm": 0.1409105658531189, "learning_rate": 7.2609693590269e-05, "loss": 0.5832, "step": 4539 }, { "epoch": 0.9332922191386577, "grad_norm": 0.19682304561138153, "learning_rate": 7.260180573299049e-05, "loss": 0.6693, "step": 4540 }, { "epoch": 0.9334977901120362, "grad_norm": 0.1264413744211197, "learning_rate": 7.259391651590005e-05, "loss": 0.5933, "step": 4541 }, { "epoch": 0.9337033610854147, "grad_norm": 0.1842966377735138, "learning_rate": 7.258602593938629e-05, "loss": 0.6619, "step": 4542 }, { "epoch": 0.9339089320587933, "grad_norm": 0.18830621242523193, "learning_rate": 7.257813400383798e-05, "loss": 0.6614, "step": 4543 }, { "epoch": 0.9341145030321718, "grad_norm": 0.17995837330818176, "learning_rate": 7.257024070964391e-05, "loss": 0.6535, "step": 4544 }, { "epoch": 0.9343200740055504, "grad_norm": 0.1838386356830597, "learning_rate": 7.256234605719294e-05, "loss": 0.6598, "step": 4545 }, { "epoch": 0.934525644978929, "grad_norm": 0.18245835602283478, "learning_rate": 7.2554450046874e-05, "loss": 0.6377, "step": 4546 }, { "epoch": 0.9347312159523076, "grad_norm": 0.18414458632469177, "learning_rate": 7.254655267907611e-05, "loss": 0.6616, "step": 4547 }, { "epoch": 0.934936786925686, "grad_norm": 0.14779187738895416, "learning_rate": 7.253865395418832e-05, "loss": 0.574, "step": 4548 }, { "epoch": 0.9351423578990646, "grad_norm": 0.13919095695018768, "learning_rate": 7.253075387259975e-05, "loss": 0.5738, "step": 4549 }, { "epoch": 0.9353479288724432, "grad_norm": 0.20152714848518372, "learning_rate": 7.252285243469962e-05, "loss": 0.656, "step": 4550 }, { "epoch": 0.9355534998458218, "grad_norm": 0.20961932837963104, "learning_rate": 7.251494964087721e-05, "loss": 0.6724, "step": 4551 }, { "epoch": 0.9357590708192003, "grad_norm": 0.1847916692495346, "learning_rate": 7.25070454915218e-05, "loss": 0.6601, "step": 4552 }, { "epoch": 0.9359646417925789, "grad_norm": 0.1776532083749771, "learning_rate": 7.249913998702287e-05, "loss": 0.645, "step": 4553 }, { "epoch": 0.9361702127659575, "grad_norm": 0.18173301219940186, "learning_rate": 7.249123312776982e-05, "loss": 0.6983, "step": 4554 }, { "epoch": 0.936375783739336, "grad_norm": 0.18323563039302826, "learning_rate": 7.24833249141522e-05, "loss": 0.6603, "step": 4555 }, { "epoch": 0.9365813547127145, "grad_norm": 0.18385376036167145, "learning_rate": 7.247541534655962e-05, "loss": 0.6551, "step": 4556 }, { "epoch": 0.9367869256860931, "grad_norm": 0.18663281202316284, "learning_rate": 7.246750442538176e-05, "loss": 0.6562, "step": 4557 }, { "epoch": 0.9369924966594717, "grad_norm": 0.18781627714633942, "learning_rate": 7.245959215100834e-05, "loss": 0.6772, "step": 4558 }, { "epoch": 0.9371980676328503, "grad_norm": 0.18314847350120544, "learning_rate": 7.245167852382915e-05, "loss": 0.6523, "step": 4559 }, { "epoch": 0.9374036386062288, "grad_norm": 0.18462207913398743, "learning_rate": 7.244376354423408e-05, "loss": 0.6716, "step": 4560 }, { "epoch": 0.9376092095796074, "grad_norm": 0.18789240717887878, "learning_rate": 7.243584721261302e-05, "loss": 0.6672, "step": 4561 }, { "epoch": 0.937814780552986, "grad_norm": 0.18339639902114868, "learning_rate": 7.242792952935604e-05, "loss": 0.6526, "step": 4562 }, { "epoch": 0.9380203515263644, "grad_norm": 0.18866628408432007, "learning_rate": 7.242001049485314e-05, "loss": 0.6739, "step": 4563 }, { "epoch": 0.938225922499743, "grad_norm": 0.18578583002090454, "learning_rate": 7.241209010949452e-05, "loss": 0.6485, "step": 4564 }, { "epoch": 0.9384314934731216, "grad_norm": 0.18551675975322723, "learning_rate": 7.240416837367032e-05, "loss": 0.6537, "step": 4565 }, { "epoch": 0.9386370644465002, "grad_norm": 0.1823461502790451, "learning_rate": 7.239624528777082e-05, "loss": 0.6626, "step": 4566 }, { "epoch": 0.9388426354198787, "grad_norm": 0.18426772952079773, "learning_rate": 7.23883208521864e-05, "loss": 0.6314, "step": 4567 }, { "epoch": 0.9390482063932573, "grad_norm": 0.19278199970722198, "learning_rate": 7.23803950673074e-05, "loss": 0.6813, "step": 4568 }, { "epoch": 0.9392537773666358, "grad_norm": 0.17879322171211243, "learning_rate": 7.23724679335243e-05, "loss": 0.6412, "step": 4569 }, { "epoch": 0.9394593483400144, "grad_norm": 0.18191079795360565, "learning_rate": 7.236453945122767e-05, "loss": 0.6825, "step": 4570 }, { "epoch": 0.9396649193133929, "grad_norm": 0.19142381846904755, "learning_rate": 7.235660962080805e-05, "loss": 0.6717, "step": 4571 }, { "epoch": 0.9398704902867715, "grad_norm": 0.18709653615951538, "learning_rate": 7.234867844265617e-05, "loss": 0.6483, "step": 4572 }, { "epoch": 0.9400760612601501, "grad_norm": 0.18491537868976593, "learning_rate": 7.234074591716271e-05, "loss": 0.6614, "step": 4573 }, { "epoch": 0.9402816322335287, "grad_norm": 0.2287359982728958, "learning_rate": 7.233281204471851e-05, "loss": 0.5824, "step": 4574 }, { "epoch": 0.9404872032069072, "grad_norm": 0.1951487511396408, "learning_rate": 7.232487682571439e-05, "loss": 0.6553, "step": 4575 }, { "epoch": 0.9406927741802857, "grad_norm": 0.19920621812343597, "learning_rate": 7.231694026054133e-05, "loss": 0.6497, "step": 4576 }, { "epoch": 0.9408983451536643, "grad_norm": 0.15709790587425232, "learning_rate": 7.230900234959028e-05, "loss": 0.5685, "step": 4577 }, { "epoch": 0.9411039161270429, "grad_norm": 0.19238202273845673, "learning_rate": 7.230106309325234e-05, "loss": 0.6771, "step": 4578 }, { "epoch": 0.9413094871004214, "grad_norm": 0.1886894553899765, "learning_rate": 7.229312249191862e-05, "loss": 0.6278, "step": 4579 }, { "epoch": 0.9415150580738, "grad_norm": 0.19003844261169434, "learning_rate": 7.228518054598032e-05, "loss": 0.6583, "step": 4580 }, { "epoch": 0.9417206290471786, "grad_norm": 0.15929581224918365, "learning_rate": 7.227723725582871e-05, "loss": 0.5738, "step": 4581 }, { "epoch": 0.941926200020557, "grad_norm": 0.19181537628173828, "learning_rate": 7.226929262185511e-05, "loss": 0.6692, "step": 4582 }, { "epoch": 0.9421317709939356, "grad_norm": 0.19717134535312653, "learning_rate": 7.226134664445093e-05, "loss": 0.665, "step": 4583 }, { "epoch": 0.9423373419673142, "grad_norm": 0.17591415345668793, "learning_rate": 7.22533993240076e-05, "loss": 0.6358, "step": 4584 }, { "epoch": 0.9425429129406928, "grad_norm": 0.18756897747516632, "learning_rate": 7.224545066091669e-05, "loss": 0.6755, "step": 4585 }, { "epoch": 0.9427484839140713, "grad_norm": 0.18418292701244354, "learning_rate": 7.223750065556977e-05, "loss": 0.6498, "step": 4586 }, { "epoch": 0.9429540548874499, "grad_norm": 0.14689591526985168, "learning_rate": 7.222954930835849e-05, "loss": 0.5795, "step": 4587 }, { "epoch": 0.9431596258608285, "grad_norm": 0.18386992812156677, "learning_rate": 7.222159661967459e-05, "loss": 0.6699, "step": 4588 }, { "epoch": 0.943365196834207, "grad_norm": 0.1894700974225998, "learning_rate": 7.221364258990985e-05, "loss": 0.6571, "step": 4589 }, { "epoch": 0.9435707678075855, "grad_norm": 0.17809130251407623, "learning_rate": 7.220568721945614e-05, "loss": 0.6409, "step": 4590 }, { "epoch": 0.9437763387809641, "grad_norm": 0.18572141230106354, "learning_rate": 7.219773050870537e-05, "loss": 0.6774, "step": 4591 }, { "epoch": 0.9439819097543427, "grad_norm": 0.1781856119632721, "learning_rate": 7.218977245804955e-05, "loss": 0.6939, "step": 4592 }, { "epoch": 0.9441874807277213, "grad_norm": 0.1840573400259018, "learning_rate": 7.218181306788074e-05, "loss": 0.6654, "step": 4593 }, { "epoch": 0.9443930517010998, "grad_norm": 0.1829008311033249, "learning_rate": 7.217385233859102e-05, "loss": 0.6673, "step": 4594 }, { "epoch": 0.9445986226744784, "grad_norm": 0.18518169224262238, "learning_rate": 7.216589027057262e-05, "loss": 0.6902, "step": 4595 }, { "epoch": 0.944804193647857, "grad_norm": 0.18205900490283966, "learning_rate": 7.215792686421779e-05, "loss": 0.6773, "step": 4596 }, { "epoch": 0.9450097646212354, "grad_norm": 0.14691917598247528, "learning_rate": 7.214996211991883e-05, "loss": 0.5941, "step": 4597 }, { "epoch": 0.945215335594614, "grad_norm": 0.18329864740371704, "learning_rate": 7.214199603806812e-05, "loss": 0.6699, "step": 4598 }, { "epoch": 0.9454209065679926, "grad_norm": 0.19199949502944946, "learning_rate": 7.213402861905814e-05, "loss": 0.6787, "step": 4599 }, { "epoch": 0.9456264775413712, "grad_norm": 0.15589165687561035, "learning_rate": 7.21260598632814e-05, "loss": 0.592, "step": 4600 }, { "epoch": 0.9458320485147497, "grad_norm": 0.12858957052230835, "learning_rate": 7.211808977113046e-05, "loss": 0.5699, "step": 4601 }, { "epoch": 0.9460376194881283, "grad_norm": 0.19628196954727173, "learning_rate": 7.2110118342998e-05, "loss": 0.6516, "step": 4602 }, { "epoch": 0.9462431904615068, "grad_norm": 0.1841566562652588, "learning_rate": 7.210214557927672e-05, "loss": 0.6473, "step": 4603 }, { "epoch": 0.9464487614348854, "grad_norm": 0.17483435571193695, "learning_rate": 7.20941714803594e-05, "loss": 0.6634, "step": 4604 }, { "epoch": 0.9466543324082639, "grad_norm": 0.18066710233688354, "learning_rate": 7.20861960466389e-05, "loss": 0.6443, "step": 4605 }, { "epoch": 0.9468599033816425, "grad_norm": 0.18473312258720398, "learning_rate": 7.207821927850811e-05, "loss": 0.6632, "step": 4606 }, { "epoch": 0.9470654743550211, "grad_norm": 0.18283243477344513, "learning_rate": 7.207024117636002e-05, "loss": 0.6703, "step": 4607 }, { "epoch": 0.9472710453283997, "grad_norm": 0.16648972034454346, "learning_rate": 7.206226174058766e-05, "loss": 0.5717, "step": 4608 }, { "epoch": 0.9474766163017782, "grad_norm": 0.19748379290103912, "learning_rate": 7.205428097158419e-05, "loss": 0.6838, "step": 4609 }, { "epoch": 0.9476821872751567, "grad_norm": 0.19934770464897156, "learning_rate": 7.204629886974271e-05, "loss": 0.6729, "step": 4610 }, { "epoch": 0.9478877582485353, "grad_norm": 0.19454436004161835, "learning_rate": 7.203831543545651e-05, "loss": 0.6693, "step": 4611 }, { "epoch": 0.9480933292219139, "grad_norm": 0.18130190670490265, "learning_rate": 7.203033066911889e-05, "loss": 0.6533, "step": 4612 }, { "epoch": 0.9482989001952924, "grad_norm": 0.17981968820095062, "learning_rate": 7.202234457112322e-05, "loss": 0.6574, "step": 4613 }, { "epoch": 0.948504471168671, "grad_norm": 0.1938825398683548, "learning_rate": 7.201435714186294e-05, "loss": 0.6517, "step": 4614 }, { "epoch": 0.9487100421420496, "grad_norm": 0.18407849967479706, "learning_rate": 7.200636838173153e-05, "loss": 0.6561, "step": 4615 }, { "epoch": 0.948915613115428, "grad_norm": 0.186232790350914, "learning_rate": 7.199837829112259e-05, "loss": 0.6417, "step": 4616 }, { "epoch": 0.9491211840888066, "grad_norm": 0.14791084825992584, "learning_rate": 7.199038687042973e-05, "loss": 0.591, "step": 4617 }, { "epoch": 0.9493267550621852, "grad_norm": 0.1812361627817154, "learning_rate": 7.198239412004667e-05, "loss": 0.6669, "step": 4618 }, { "epoch": 0.9495323260355638, "grad_norm": 0.18593737483024597, "learning_rate": 7.197440004036716e-05, "loss": 0.6999, "step": 4619 }, { "epoch": 0.9497378970089423, "grad_norm": 0.17995983362197876, "learning_rate": 7.196640463178506e-05, "loss": 0.6708, "step": 4620 }, { "epoch": 0.9499434679823209, "grad_norm": 0.19636765122413635, "learning_rate": 7.195840789469422e-05, "loss": 0.6667, "step": 4621 }, { "epoch": 0.9501490389556995, "grad_norm": 0.18465958535671234, "learning_rate": 7.195040982948865e-05, "loss": 0.6646, "step": 4622 }, { "epoch": 0.950354609929078, "grad_norm": 0.1823161542415619, "learning_rate": 7.194241043656234e-05, "loss": 0.636, "step": 4623 }, { "epoch": 0.9505601809024565, "grad_norm": 0.19029709696769714, "learning_rate": 7.19344097163094e-05, "loss": 0.6731, "step": 4624 }, { "epoch": 0.9507657518758351, "grad_norm": 0.19934682548046112, "learning_rate": 7.192640766912397e-05, "loss": 0.6384, "step": 4625 }, { "epoch": 0.9509713228492137, "grad_norm": 0.14476045966148376, "learning_rate": 7.19184042954003e-05, "loss": 0.5821, "step": 4626 }, { "epoch": 0.9511768938225923, "grad_norm": 0.1817658394575119, "learning_rate": 7.191039959553266e-05, "loss": 0.6815, "step": 4627 }, { "epoch": 0.9513824647959708, "grad_norm": 0.1834515631198883, "learning_rate": 7.190239356991542e-05, "loss": 0.6393, "step": 4628 }, { "epoch": 0.9515880357693494, "grad_norm": 0.18767185509204865, "learning_rate": 7.189438621894298e-05, "loss": 0.6657, "step": 4629 }, { "epoch": 0.951793606742728, "grad_norm": 0.18882089853286743, "learning_rate": 7.188637754300984e-05, "loss": 0.6531, "step": 4630 }, { "epoch": 0.9519991777161065, "grad_norm": 0.18064305186271667, "learning_rate": 7.187836754251055e-05, "loss": 0.6739, "step": 4631 }, { "epoch": 0.952204748689485, "grad_norm": 0.18906618654727936, "learning_rate": 7.187035621783972e-05, "loss": 0.68, "step": 4632 }, { "epoch": 0.9524103196628636, "grad_norm": 0.1903999000787735, "learning_rate": 7.186234356939204e-05, "loss": 0.6503, "step": 4633 }, { "epoch": 0.9526158906362422, "grad_norm": 0.184392049908638, "learning_rate": 7.185432959756222e-05, "loss": 0.6723, "step": 4634 }, { "epoch": 0.9528214616096207, "grad_norm": 0.19594880938529968, "learning_rate": 7.184631430274512e-05, "loss": 0.6487, "step": 4635 }, { "epoch": 0.9530270325829993, "grad_norm": 0.1459794044494629, "learning_rate": 7.183829768533558e-05, "loss": 0.5766, "step": 4636 }, { "epoch": 0.9532326035563778, "grad_norm": 0.19931526482105255, "learning_rate": 7.183027974572856e-05, "loss": 0.6702, "step": 4637 }, { "epoch": 0.9534381745297564, "grad_norm": 0.18936146795749664, "learning_rate": 7.182226048431907e-05, "loss": 0.6409, "step": 4638 }, { "epoch": 0.9536437455031349, "grad_norm": 0.12762728333473206, "learning_rate": 7.181423990150215e-05, "loss": 0.5624, "step": 4639 }, { "epoch": 0.9538493164765135, "grad_norm": 0.1938938945531845, "learning_rate": 7.180621799767298e-05, "loss": 0.6835, "step": 4640 }, { "epoch": 0.9540548874498921, "grad_norm": 0.1908787190914154, "learning_rate": 7.179819477322673e-05, "loss": 0.679, "step": 4641 }, { "epoch": 0.9542604584232707, "grad_norm": 0.17859888076782227, "learning_rate": 7.179017022855868e-05, "loss": 0.6604, "step": 4642 }, { "epoch": 0.9544660293966492, "grad_norm": 0.14399871230125427, "learning_rate": 7.178214436406416e-05, "loss": 0.5768, "step": 4643 }, { "epoch": 0.9546716003700277, "grad_norm": 0.19949081540107727, "learning_rate": 7.177411718013858e-05, "loss": 0.6536, "step": 4644 }, { "epoch": 0.9548771713434063, "grad_norm": 0.12567096948623657, "learning_rate": 7.176608867717738e-05, "loss": 0.579, "step": 4645 }, { "epoch": 0.9550827423167849, "grad_norm": 0.1978704035282135, "learning_rate": 7.175805885557608e-05, "loss": 0.654, "step": 4646 }, { "epoch": 0.9552883132901634, "grad_norm": 0.1830187439918518, "learning_rate": 7.175002771573031e-05, "loss": 0.665, "step": 4647 }, { "epoch": 0.955493884263542, "grad_norm": 0.14475895464420319, "learning_rate": 7.17419952580357e-05, "loss": 0.5824, "step": 4648 }, { "epoch": 0.9556994552369206, "grad_norm": 0.2026558667421341, "learning_rate": 7.173396148288796e-05, "loss": 0.6604, "step": 4649 }, { "epoch": 0.9559050262102992, "grad_norm": 0.18734343349933624, "learning_rate": 7.172592639068291e-05, "loss": 0.6658, "step": 4650 }, { "epoch": 0.9561105971836776, "grad_norm": 0.18206274509429932, "learning_rate": 7.171788998181637e-05, "loss": 0.6371, "step": 4651 }, { "epoch": 0.9563161681570562, "grad_norm": 0.18837840855121613, "learning_rate": 7.170985225668428e-05, "loss": 0.6306, "step": 4652 }, { "epoch": 0.9565217391304348, "grad_norm": 0.19700245559215546, "learning_rate": 7.17018132156826e-05, "loss": 0.6645, "step": 4653 }, { "epoch": 0.9567273101038133, "grad_norm": 0.18174946308135986, "learning_rate": 7.169377285920738e-05, "loss": 0.6657, "step": 4654 }, { "epoch": 0.9569328810771919, "grad_norm": 0.1869078427553177, "learning_rate": 7.168573118765476e-05, "loss": 0.6752, "step": 4655 }, { "epoch": 0.9571384520505705, "grad_norm": 0.19436730444431305, "learning_rate": 7.167768820142088e-05, "loss": 0.6694, "step": 4656 }, { "epoch": 0.957344023023949, "grad_norm": 0.18894408643245697, "learning_rate": 7.166964390090199e-05, "loss": 0.6644, "step": 4657 }, { "epoch": 0.9575495939973275, "grad_norm": 0.18464897572994232, "learning_rate": 7.16615982864944e-05, "loss": 0.6457, "step": 4658 }, { "epoch": 0.9577551649707061, "grad_norm": 0.1893334686756134, "learning_rate": 7.16535513585945e-05, "loss": 0.6692, "step": 4659 }, { "epoch": 0.9579607359440847, "grad_norm": 0.151536226272583, "learning_rate": 7.164550311759869e-05, "loss": 0.5774, "step": 4660 }, { "epoch": 0.9581663069174633, "grad_norm": 0.20720963180065155, "learning_rate": 7.163745356390347e-05, "loss": 0.6608, "step": 4661 }, { "epoch": 0.9583718778908418, "grad_norm": 0.18656425178050995, "learning_rate": 7.162940269790543e-05, "loss": 0.6502, "step": 4662 }, { "epoch": 0.9585774488642204, "grad_norm": 0.18301479518413544, "learning_rate": 7.162135052000116e-05, "loss": 0.6854, "step": 4663 }, { "epoch": 0.958783019837599, "grad_norm": 0.14167705178260803, "learning_rate": 7.161329703058742e-05, "loss": 0.5932, "step": 4664 }, { "epoch": 0.9589885908109775, "grad_norm": 0.13432294130325317, "learning_rate": 7.16052422300609e-05, "loss": 0.5758, "step": 4665 }, { "epoch": 0.959194161784356, "grad_norm": 0.2055593878030777, "learning_rate": 7.159718611881845e-05, "loss": 0.6646, "step": 4666 }, { "epoch": 0.9593997327577346, "grad_norm": 0.19777736067771912, "learning_rate": 7.158912869725695e-05, "loss": 0.6821, "step": 4667 }, { "epoch": 0.9596053037311132, "grad_norm": 0.18612885475158691, "learning_rate": 7.158106996577336e-05, "loss": 0.6758, "step": 4668 }, { "epoch": 0.9598108747044918, "grad_norm": 0.1979762464761734, "learning_rate": 7.15730099247647e-05, "loss": 0.6779, "step": 4669 }, { "epoch": 0.9600164456778703, "grad_norm": 0.1957666128873825, "learning_rate": 7.156494857462803e-05, "loss": 0.657, "step": 4670 }, { "epoch": 0.9602220166512488, "grad_norm": 0.16183792054653168, "learning_rate": 7.155688591576051e-05, "loss": 0.5905, "step": 4671 }, { "epoch": 0.9604275876246274, "grad_norm": 0.181317538022995, "learning_rate": 7.154882194855936e-05, "loss": 0.633, "step": 4672 }, { "epoch": 0.9606331585980059, "grad_norm": 0.1878432035446167, "learning_rate": 7.154075667342183e-05, "loss": 0.6703, "step": 4673 }, { "epoch": 0.9608387295713845, "grad_norm": 0.19090843200683594, "learning_rate": 7.153269009074528e-05, "loss": 0.6737, "step": 4674 }, { "epoch": 0.9610443005447631, "grad_norm": 0.18672534823417664, "learning_rate": 7.15246222009271e-05, "loss": 0.6585, "step": 4675 }, { "epoch": 0.9612498715181417, "grad_norm": 0.18867382407188416, "learning_rate": 7.151655300436475e-05, "loss": 0.6403, "step": 4676 }, { "epoch": 0.9614554424915202, "grad_norm": 0.18556974828243256, "learning_rate": 7.150848250145578e-05, "loss": 0.6543, "step": 4677 }, { "epoch": 0.9616610134648987, "grad_norm": 0.18414060771465302, "learning_rate": 7.150041069259777e-05, "loss": 0.6671, "step": 4678 }, { "epoch": 0.9618665844382773, "grad_norm": 0.14137166738510132, "learning_rate": 7.14923375781884e-05, "loss": 0.5742, "step": 4679 }, { "epoch": 0.9620721554116559, "grad_norm": 0.19371961057186127, "learning_rate": 7.148426315862537e-05, "loss": 0.6423, "step": 4680 }, { "epoch": 0.9622777263850344, "grad_norm": 0.1935972273349762, "learning_rate": 7.147618743430648e-05, "loss": 0.6896, "step": 4681 }, { "epoch": 0.962483297358413, "grad_norm": 0.19424404203891754, "learning_rate": 7.14681104056296e-05, "loss": 0.6634, "step": 4682 }, { "epoch": 0.9626888683317916, "grad_norm": 0.18401269614696503, "learning_rate": 7.146003207299263e-05, "loss": 0.6301, "step": 4683 }, { "epoch": 0.9628944393051702, "grad_norm": 0.18967941403388977, "learning_rate": 7.145195243679354e-05, "loss": 0.6583, "step": 4684 }, { "epoch": 0.9631000102785486, "grad_norm": 0.18623065948486328, "learning_rate": 7.14438714974304e-05, "loss": 0.6411, "step": 4685 }, { "epoch": 0.9633055812519272, "grad_norm": 0.18971066176891327, "learning_rate": 7.14357892553013e-05, "loss": 0.6237, "step": 4686 }, { "epoch": 0.9635111522253058, "grad_norm": 0.13411302864551544, "learning_rate": 7.142770571080443e-05, "loss": 0.5835, "step": 4687 }, { "epoch": 0.9637167231986844, "grad_norm": 0.19216755032539368, "learning_rate": 7.141962086433802e-05, "loss": 0.6423, "step": 4688 }, { "epoch": 0.9639222941720629, "grad_norm": 0.18625061213970184, "learning_rate": 7.141153471630038e-05, "loss": 0.6641, "step": 4689 }, { "epoch": 0.9641278651454415, "grad_norm": 0.19605115056037903, "learning_rate": 7.140344726708988e-05, "loss": 0.6713, "step": 4690 }, { "epoch": 0.96433343611882, "grad_norm": 0.18507269024848938, "learning_rate": 7.139535851710492e-05, "loss": 0.6626, "step": 4691 }, { "epoch": 0.9645390070921985, "grad_norm": 0.1852700561285019, "learning_rate": 7.138726846674403e-05, "loss": 0.6751, "step": 4692 }, { "epoch": 0.9647445780655771, "grad_norm": 0.17961280047893524, "learning_rate": 7.137917711640575e-05, "loss": 0.6648, "step": 4693 }, { "epoch": 0.9649501490389557, "grad_norm": 0.20353473722934723, "learning_rate": 7.137108446648873e-05, "loss": 0.6485, "step": 4694 }, { "epoch": 0.9651557200123343, "grad_norm": 0.18588435649871826, "learning_rate": 7.136299051739162e-05, "loss": 0.6377, "step": 4695 }, { "epoch": 0.9653612909857128, "grad_norm": 0.14005832374095917, "learning_rate": 7.135489526951318e-05, "loss": 0.5717, "step": 4696 }, { "epoch": 0.9655668619590914, "grad_norm": 0.18969693779945374, "learning_rate": 7.134679872325224e-05, "loss": 0.6724, "step": 4697 }, { "epoch": 0.96577243293247, "grad_norm": 0.12744298577308655, "learning_rate": 7.133870087900768e-05, "loss": 0.5729, "step": 4698 }, { "epoch": 0.9659780039058485, "grad_norm": 0.18571147322654724, "learning_rate": 7.133060173717842e-05, "loss": 0.6547, "step": 4699 }, { "epoch": 0.966183574879227, "grad_norm": 0.19381268322467804, "learning_rate": 7.13225012981635e-05, "loss": 0.672, "step": 4700 }, { "epoch": 0.9663891458526056, "grad_norm": 0.18004442751407623, "learning_rate": 7.131439956236194e-05, "loss": 0.6923, "step": 4701 }, { "epoch": 0.9665947168259842, "grad_norm": 0.18902912735939026, "learning_rate": 7.130629653017293e-05, "loss": 0.6709, "step": 4702 }, { "epoch": 0.9668002877993628, "grad_norm": 0.1331816166639328, "learning_rate": 7.129819220199566e-05, "loss": 0.5755, "step": 4703 }, { "epoch": 0.9670058587727413, "grad_norm": 0.1306556910276413, "learning_rate": 7.129008657822936e-05, "loss": 0.5504, "step": 4704 }, { "epoch": 0.9672114297461198, "grad_norm": 0.12396678328514099, "learning_rate": 7.128197965927337e-05, "loss": 0.5786, "step": 4705 }, { "epoch": 0.9674170007194984, "grad_norm": 0.2061123251914978, "learning_rate": 7.127387144552709e-05, "loss": 0.6777, "step": 4706 }, { "epoch": 0.967622571692877, "grad_norm": 0.1253053843975067, "learning_rate": 7.126576193738997e-05, "loss": 0.5862, "step": 4707 }, { "epoch": 0.9678281426662555, "grad_norm": 0.1296505630016327, "learning_rate": 7.125765113526151e-05, "loss": 0.5758, "step": 4708 }, { "epoch": 0.9680337136396341, "grad_norm": 0.1793881356716156, "learning_rate": 7.124953903954132e-05, "loss": 0.6242, "step": 4709 }, { "epoch": 0.9682392846130127, "grad_norm": 0.13087224960327148, "learning_rate": 7.124142565062903e-05, "loss": 0.5745, "step": 4710 }, { "epoch": 0.9684448555863912, "grad_norm": 0.12415716052055359, "learning_rate": 7.123331096892434e-05, "loss": 0.5658, "step": 4711 }, { "epoch": 0.9686504265597697, "grad_norm": 0.18544363975524902, "learning_rate": 7.122519499482706e-05, "loss": 0.6601, "step": 4712 }, { "epoch": 0.9688559975331483, "grad_norm": 0.17584609985351562, "learning_rate": 7.121707772873699e-05, "loss": 0.6448, "step": 4713 }, { "epoch": 0.9690615685065269, "grad_norm": 0.18083997070789337, "learning_rate": 7.120895917105402e-05, "loss": 0.6701, "step": 4714 }, { "epoch": 0.9692671394799054, "grad_norm": 0.17472274601459503, "learning_rate": 7.120083932217815e-05, "loss": 0.6957, "step": 4715 }, { "epoch": 0.969472710453284, "grad_norm": 0.15184062719345093, "learning_rate": 7.119271818250936e-05, "loss": 0.5817, "step": 4716 }, { "epoch": 0.9696782814266626, "grad_norm": 0.18855705857276917, "learning_rate": 7.11845957524478e-05, "loss": 0.6749, "step": 4717 }, { "epoch": 0.9698838524000412, "grad_norm": 0.18199525773525238, "learning_rate": 7.117647203239358e-05, "loss": 0.6665, "step": 4718 }, { "epoch": 0.9700894233734196, "grad_norm": 0.1904478669166565, "learning_rate": 7.116834702274693e-05, "loss": 0.6339, "step": 4719 }, { "epoch": 0.9702949943467982, "grad_norm": 0.18044617772102356, "learning_rate": 7.116022072390815e-05, "loss": 0.6575, "step": 4720 }, { "epoch": 0.9705005653201768, "grad_norm": 0.17925746738910675, "learning_rate": 7.115209313627755e-05, "loss": 0.6639, "step": 4721 }, { "epoch": 0.9707061362935554, "grad_norm": 0.18334949016571045, "learning_rate": 7.114396426025557e-05, "loss": 0.6716, "step": 4722 }, { "epoch": 0.9709117072669339, "grad_norm": 0.17840418219566345, "learning_rate": 7.113583409624265e-05, "loss": 0.6672, "step": 4723 }, { "epoch": 0.9711172782403125, "grad_norm": 0.14346054196357727, "learning_rate": 7.112770264463936e-05, "loss": 0.6005, "step": 4724 }, { "epoch": 0.971322849213691, "grad_norm": 0.20740103721618652, "learning_rate": 7.111956990584626e-05, "loss": 0.6906, "step": 4725 }, { "epoch": 0.9715284201870696, "grad_norm": 0.1770005226135254, "learning_rate": 7.111143588026406e-05, "loss": 0.6421, "step": 4726 }, { "epoch": 0.9717339911604481, "grad_norm": 0.18892593681812286, "learning_rate": 7.110330056829344e-05, "loss": 0.6357, "step": 4727 }, { "epoch": 0.9719395621338267, "grad_norm": 0.18768368661403656, "learning_rate": 7.109516397033522e-05, "loss": 0.6538, "step": 4728 }, { "epoch": 0.9721451331072053, "grad_norm": 0.1885116994380951, "learning_rate": 7.108702608679022e-05, "loss": 0.6792, "step": 4729 }, { "epoch": 0.9723507040805838, "grad_norm": 0.19139648973941803, "learning_rate": 7.10788869180594e-05, "loss": 0.6307, "step": 4730 }, { "epoch": 0.9725562750539624, "grad_norm": 0.18306319415569305, "learning_rate": 7.107074646454368e-05, "loss": 0.6564, "step": 4731 }, { "epoch": 0.972761846027341, "grad_norm": 0.18829376995563507, "learning_rate": 7.106260472664417e-05, "loss": 0.6439, "step": 4732 }, { "epoch": 0.9729674170007195, "grad_norm": 0.18569877743721008, "learning_rate": 7.105446170476193e-05, "loss": 0.6301, "step": 4733 }, { "epoch": 0.973172987974098, "grad_norm": 0.17895673215389252, "learning_rate": 7.104631739929814e-05, "loss": 0.6752, "step": 4734 }, { "epoch": 0.9733785589474766, "grad_norm": 0.14281636476516724, "learning_rate": 7.103817181065402e-05, "loss": 0.5585, "step": 4735 }, { "epoch": 0.9735841299208552, "grad_norm": 0.18822631239891052, "learning_rate": 7.103002493923089e-05, "loss": 0.6773, "step": 4736 }, { "epoch": 0.9737897008942338, "grad_norm": 0.19597260653972626, "learning_rate": 7.102187678543009e-05, "loss": 0.6525, "step": 4737 }, { "epoch": 0.9739952718676123, "grad_norm": 0.18155749142169952, "learning_rate": 7.101372734965306e-05, "loss": 0.6369, "step": 4738 }, { "epoch": 0.9742008428409908, "grad_norm": 0.1390804648399353, "learning_rate": 7.100557663230125e-05, "loss": 0.5831, "step": 4739 }, { "epoch": 0.9744064138143694, "grad_norm": 0.19495947659015656, "learning_rate": 7.099742463377626e-05, "loss": 0.6545, "step": 4740 }, { "epoch": 0.974611984787748, "grad_norm": 0.12120307236909866, "learning_rate": 7.098927135447965e-05, "loss": 0.5725, "step": 4741 }, { "epoch": 0.9748175557611265, "grad_norm": 0.18559974431991577, "learning_rate": 7.09811167948131e-05, "loss": 0.6441, "step": 4742 }, { "epoch": 0.9750231267345051, "grad_norm": 0.18373870849609375, "learning_rate": 7.097296095517838e-05, "loss": 0.6765, "step": 4743 }, { "epoch": 0.9752286977078837, "grad_norm": 0.13721033930778503, "learning_rate": 7.096480383597725e-05, "loss": 0.5717, "step": 4744 }, { "epoch": 0.9754342686812622, "grad_norm": 0.25049659609794617, "learning_rate": 7.095664543761162e-05, "loss": 0.653, "step": 4745 }, { "epoch": 0.9756398396546407, "grad_norm": 0.17938856780529022, "learning_rate": 7.094848576048339e-05, "loss": 0.6455, "step": 4746 }, { "epoch": 0.9758454106280193, "grad_norm": 0.18572570383548737, "learning_rate": 7.094032480499454e-05, "loss": 0.634, "step": 4747 }, { "epoch": 0.9760509816013979, "grad_norm": 0.18931305408477783, "learning_rate": 7.093216257154713e-05, "loss": 0.6397, "step": 4748 }, { "epoch": 0.9762565525747764, "grad_norm": 0.178866446018219, "learning_rate": 7.092399906054328e-05, "loss": 0.6501, "step": 4749 }, { "epoch": 0.976462123548155, "grad_norm": 0.13158805668354034, "learning_rate": 7.091583427238515e-05, "loss": 0.5743, "step": 4750 }, { "epoch": 0.9766676945215336, "grad_norm": 0.18385621905326843, "learning_rate": 7.090766820747502e-05, "loss": 0.6433, "step": 4751 }, { "epoch": 0.9768732654949122, "grad_norm": 0.18304161727428436, "learning_rate": 7.089950086621515e-05, "loss": 0.6304, "step": 4752 }, { "epoch": 0.9770788364682906, "grad_norm": 0.14201928675174713, "learning_rate": 7.089133224900794e-05, "loss": 0.5821, "step": 4753 }, { "epoch": 0.9772844074416692, "grad_norm": 0.18627387285232544, "learning_rate": 7.08831623562558e-05, "loss": 0.6627, "step": 4754 }, { "epoch": 0.9774899784150478, "grad_norm": 0.18866147100925446, "learning_rate": 7.087499118836123e-05, "loss": 0.6627, "step": 4755 }, { "epoch": 0.9776955493884264, "grad_norm": 0.1349857598543167, "learning_rate": 7.086681874572677e-05, "loss": 0.5733, "step": 4756 }, { "epoch": 0.9779011203618049, "grad_norm": 0.18248964846134186, "learning_rate": 7.085864502875506e-05, "loss": 0.6549, "step": 4757 }, { "epoch": 0.9781066913351835, "grad_norm": 0.188977912068367, "learning_rate": 7.085047003784879e-05, "loss": 0.6531, "step": 4758 }, { "epoch": 0.978312262308562, "grad_norm": 0.1410411149263382, "learning_rate": 7.084229377341068e-05, "loss": 0.5773, "step": 4759 }, { "epoch": 0.9785178332819406, "grad_norm": 0.18209992349147797, "learning_rate": 7.083411623584352e-05, "loss": 0.6653, "step": 4760 }, { "epoch": 0.9787234042553191, "grad_norm": 0.18571458756923676, "learning_rate": 7.082593742555023e-05, "loss": 0.6621, "step": 4761 }, { "epoch": 0.9789289752286977, "grad_norm": 0.18278199434280396, "learning_rate": 7.08177573429337e-05, "loss": 0.6688, "step": 4762 }, { "epoch": 0.9791345462020763, "grad_norm": 0.17872901260852814, "learning_rate": 7.080957598839693e-05, "loss": 0.6442, "step": 4763 }, { "epoch": 0.9793401171754548, "grad_norm": 0.17393019795417786, "learning_rate": 7.080139336234299e-05, "loss": 0.6474, "step": 4764 }, { "epoch": 0.9795456881488334, "grad_norm": 0.18381252884864807, "learning_rate": 7.0793209465175e-05, "loss": 0.6469, "step": 4765 }, { "epoch": 0.979751259122212, "grad_norm": 0.18060103058815002, "learning_rate": 7.078502429729614e-05, "loss": 0.6635, "step": 4766 }, { "epoch": 0.9799568300955905, "grad_norm": 0.18748174607753754, "learning_rate": 7.077683785910964e-05, "loss": 0.6695, "step": 4767 }, { "epoch": 0.980162401068969, "grad_norm": 0.18352623283863068, "learning_rate": 7.076865015101882e-05, "loss": 0.6475, "step": 4768 }, { "epoch": 0.9803679720423476, "grad_norm": 0.14373265206813812, "learning_rate": 7.076046117342705e-05, "loss": 0.5666, "step": 4769 }, { "epoch": 0.9805735430157262, "grad_norm": 0.18376867473125458, "learning_rate": 7.075227092673777e-05, "loss": 0.6542, "step": 4770 }, { "epoch": 0.9807791139891048, "grad_norm": 0.1273968666791916, "learning_rate": 7.074407941135447e-05, "loss": 0.5939, "step": 4771 }, { "epoch": 0.9809846849624833, "grad_norm": 0.19144566357135773, "learning_rate": 7.073588662768069e-05, "loss": 0.655, "step": 4772 }, { "epoch": 0.9811902559358618, "grad_norm": 0.18799123167991638, "learning_rate": 7.072769257612007e-05, "loss": 0.6726, "step": 4773 }, { "epoch": 0.9813958269092404, "grad_norm": 0.19798782467842102, "learning_rate": 7.071949725707628e-05, "loss": 0.6438, "step": 4774 }, { "epoch": 0.981601397882619, "grad_norm": 0.18581277132034302, "learning_rate": 7.07113006709531e-05, "loss": 0.6562, "step": 4775 }, { "epoch": 0.9818069688559975, "grad_norm": 0.1861695498228073, "learning_rate": 7.070310281815429e-05, "loss": 0.6693, "step": 4776 }, { "epoch": 0.9820125398293761, "grad_norm": 0.15388214588165283, "learning_rate": 7.069490369908374e-05, "loss": 0.5852, "step": 4777 }, { "epoch": 0.9822181108027547, "grad_norm": 0.19053393602371216, "learning_rate": 7.068670331414539e-05, "loss": 0.6512, "step": 4778 }, { "epoch": 0.9824236817761333, "grad_norm": 0.19945350289344788, "learning_rate": 7.067850166374322e-05, "loss": 0.6898, "step": 4779 }, { "epoch": 0.9826292527495117, "grad_norm": 0.12717384099960327, "learning_rate": 7.067029874828131e-05, "loss": 0.5656, "step": 4780 }, { "epoch": 0.9828348237228903, "grad_norm": 0.18999631702899933, "learning_rate": 7.066209456816373e-05, "loss": 0.6775, "step": 4781 }, { "epoch": 0.9830403946962689, "grad_norm": 0.1788676530122757, "learning_rate": 7.065388912379472e-05, "loss": 0.6573, "step": 4782 }, { "epoch": 0.9832459656696474, "grad_norm": 0.1846441626548767, "learning_rate": 7.06456824155785e-05, "loss": 0.6285, "step": 4783 }, { "epoch": 0.983451536643026, "grad_norm": 0.18069574236869812, "learning_rate": 7.063747444391937e-05, "loss": 0.6477, "step": 4784 }, { "epoch": 0.9836571076164046, "grad_norm": 0.18519815802574158, "learning_rate": 7.062926520922171e-05, "loss": 0.6372, "step": 4785 }, { "epoch": 0.9838626785897832, "grad_norm": 0.1890534907579422, "learning_rate": 7.062105471188993e-05, "loss": 0.6727, "step": 4786 }, { "epoch": 0.9840682495631616, "grad_norm": 0.14361847937107086, "learning_rate": 7.061284295232854e-05, "loss": 0.5637, "step": 4787 }, { "epoch": 0.9842738205365402, "grad_norm": 0.20357996225357056, "learning_rate": 7.060462993094209e-05, "loss": 0.643, "step": 4788 }, { "epoch": 0.9844793915099188, "grad_norm": 0.12664301693439484, "learning_rate": 7.059641564813521e-05, "loss": 0.5653, "step": 4789 }, { "epoch": 0.9846849624832974, "grad_norm": 0.11784827709197998, "learning_rate": 7.058820010431256e-05, "loss": 0.5801, "step": 4790 }, { "epoch": 0.9848905334566759, "grad_norm": 0.19485826790332794, "learning_rate": 7.057998329987889e-05, "loss": 0.6846, "step": 4791 }, { "epoch": 0.9850961044300545, "grad_norm": 0.19157935678958893, "learning_rate": 7.057176523523901e-05, "loss": 0.6641, "step": 4792 }, { "epoch": 0.985301675403433, "grad_norm": 0.17738407850265503, "learning_rate": 7.056354591079778e-05, "loss": 0.646, "step": 4793 }, { "epoch": 0.9855072463768116, "grad_norm": 0.18637309968471527, "learning_rate": 7.055532532696012e-05, "loss": 0.6406, "step": 4794 }, { "epoch": 0.9857128173501901, "grad_norm": 0.18436288833618164, "learning_rate": 7.054710348413103e-05, "loss": 0.6349, "step": 4795 }, { "epoch": 0.9859183883235687, "grad_norm": 0.1875494122505188, "learning_rate": 7.053888038271555e-05, "loss": 0.6585, "step": 4796 }, { "epoch": 0.9861239592969473, "grad_norm": 0.18584869801998138, "learning_rate": 7.053065602311882e-05, "loss": 0.6729, "step": 4797 }, { "epoch": 0.9863295302703259, "grad_norm": 0.1785837858915329, "learning_rate": 7.052243040574597e-05, "loss": 0.6571, "step": 4798 }, { "epoch": 0.9865351012437044, "grad_norm": 0.18055270612239838, "learning_rate": 7.051420353100228e-05, "loss": 0.6732, "step": 4799 }, { "epoch": 0.986740672217083, "grad_norm": 0.17629997432231903, "learning_rate": 7.050597539929304e-05, "loss": 0.6463, "step": 4800 }, { "epoch": 0.9869462431904615, "grad_norm": 0.17916452884674072, "learning_rate": 7.049774601102361e-05, "loss": 0.664, "step": 4801 }, { "epoch": 0.98715181416384, "grad_norm": 0.17072445154190063, "learning_rate": 7.04895153665994e-05, "loss": 0.6522, "step": 4802 }, { "epoch": 0.9873573851372186, "grad_norm": 0.18947453796863556, "learning_rate": 7.048128346642591e-05, "loss": 0.6475, "step": 4803 }, { "epoch": 0.9875629561105972, "grad_norm": 0.18113267421722412, "learning_rate": 7.047305031090869e-05, "loss": 0.6505, "step": 4804 }, { "epoch": 0.9877685270839758, "grad_norm": 0.18333598971366882, "learning_rate": 7.046481590045331e-05, "loss": 0.6697, "step": 4805 }, { "epoch": 0.9879740980573543, "grad_norm": 0.17988578975200653, "learning_rate": 7.045658023546551e-05, "loss": 0.6637, "step": 4806 }, { "epoch": 0.9881796690307328, "grad_norm": 0.18621614575386047, "learning_rate": 7.044834331635098e-05, "loss": 0.6762, "step": 4807 }, { "epoch": 0.9883852400041114, "grad_norm": 0.17191414535045624, "learning_rate": 7.04401051435155e-05, "loss": 0.6615, "step": 4808 }, { "epoch": 0.98859081097749, "grad_norm": 0.1875917762517929, "learning_rate": 7.043186571736496e-05, "loss": 0.6757, "step": 4809 }, { "epoch": 0.9887963819508685, "grad_norm": 0.18261222541332245, "learning_rate": 7.042362503830527e-05, "loss": 0.635, "step": 4810 }, { "epoch": 0.9890019529242471, "grad_norm": 0.18588493764400482, "learning_rate": 7.04153831067424e-05, "loss": 0.6719, "step": 4811 }, { "epoch": 0.9892075238976257, "grad_norm": 0.1783093363046646, "learning_rate": 7.040713992308239e-05, "loss": 0.6538, "step": 4812 }, { "epoch": 0.9894130948710043, "grad_norm": 0.18314149975776672, "learning_rate": 7.039889548773136e-05, "loss": 0.6912, "step": 4813 }, { "epoch": 0.9896186658443827, "grad_norm": 0.18791064620018005, "learning_rate": 7.039064980109544e-05, "loss": 0.6627, "step": 4814 }, { "epoch": 0.9898242368177613, "grad_norm": 0.18856315314769745, "learning_rate": 7.038240286358089e-05, "loss": 0.5847, "step": 4815 }, { "epoch": 0.9900298077911399, "grad_norm": 0.19757792353630066, "learning_rate": 7.0374154675594e-05, "loss": 0.6815, "step": 4816 }, { "epoch": 0.9902353787645185, "grad_norm": 0.18688839673995972, "learning_rate": 7.036590523754109e-05, "loss": 0.6686, "step": 4817 }, { "epoch": 0.990440949737897, "grad_norm": 0.1844862699508667, "learning_rate": 7.035765454982861e-05, "loss": 0.6518, "step": 4818 }, { "epoch": 0.9906465207112756, "grad_norm": 0.18571245670318604, "learning_rate": 7.0349402612863e-05, "loss": 0.642, "step": 4819 }, { "epoch": 0.9908520916846542, "grad_norm": 0.1918804794549942, "learning_rate": 7.034114942705081e-05, "loss": 0.6427, "step": 4820 }, { "epoch": 0.9910576626580326, "grad_norm": 0.19276823103427887, "learning_rate": 7.033289499279863e-05, "loss": 0.6943, "step": 4821 }, { "epoch": 0.9912632336314112, "grad_norm": 0.18441876769065857, "learning_rate": 7.032463931051311e-05, "loss": 0.6596, "step": 4822 }, { "epoch": 0.9914688046047898, "grad_norm": 0.1893150806427002, "learning_rate": 7.031638238060099e-05, "loss": 0.6599, "step": 4823 }, { "epoch": 0.9916743755781684, "grad_norm": 0.18663519620895386, "learning_rate": 7.030812420346902e-05, "loss": 0.6508, "step": 4824 }, { "epoch": 0.9918799465515469, "grad_norm": 0.18189288675785065, "learning_rate": 7.029986477952409e-05, "loss": 0.5656, "step": 4825 }, { "epoch": 0.9920855175249255, "grad_norm": 0.1928027868270874, "learning_rate": 7.029160410917305e-05, "loss": 0.6758, "step": 4826 }, { "epoch": 0.992291088498304, "grad_norm": 0.19040422141551971, "learning_rate": 7.028334219282291e-05, "loss": 0.6546, "step": 4827 }, { "epoch": 0.9924966594716826, "grad_norm": 0.15369752049446106, "learning_rate": 7.027507903088066e-05, "loss": 0.5874, "step": 4828 }, { "epoch": 0.9927022304450611, "grad_norm": 0.13231946527957916, "learning_rate": 7.026681462375339e-05, "loss": 0.5761, "step": 4829 }, { "epoch": 0.9929078014184397, "grad_norm": 0.1998869776725769, "learning_rate": 7.025854897184828e-05, "loss": 0.6637, "step": 4830 }, { "epoch": 0.9931133723918183, "grad_norm": 0.18532314896583557, "learning_rate": 7.025028207557251e-05, "loss": 0.6492, "step": 4831 }, { "epoch": 0.9933189433651969, "grad_norm": 0.1902119517326355, "learning_rate": 7.024201393533337e-05, "loss": 0.6405, "step": 4832 }, { "epoch": 0.9935245143385754, "grad_norm": 0.17781443893909454, "learning_rate": 7.023374455153817e-05, "loss": 0.6644, "step": 4833 }, { "epoch": 0.993730085311954, "grad_norm": 0.1855769008398056, "learning_rate": 7.022547392459434e-05, "loss": 0.6642, "step": 4834 }, { "epoch": 0.9939356562853325, "grad_norm": 0.18379683792591095, "learning_rate": 7.02172020549093e-05, "loss": 0.6377, "step": 4835 }, { "epoch": 0.9941412272587111, "grad_norm": 0.17909419536590576, "learning_rate": 7.020892894289058e-05, "loss": 0.6393, "step": 4836 }, { "epoch": 0.9943467982320896, "grad_norm": 0.17869077622890472, "learning_rate": 7.020065458894575e-05, "loss": 0.6718, "step": 4837 }, { "epoch": 0.9945523692054682, "grad_norm": 0.18221206963062286, "learning_rate": 7.019237899348247e-05, "loss": 0.6812, "step": 4838 }, { "epoch": 0.9947579401788468, "grad_norm": 0.1849188208580017, "learning_rate": 7.018410215690841e-05, "loss": 0.586, "step": 4839 }, { "epoch": 0.9949635111522253, "grad_norm": 0.2003324180841446, "learning_rate": 7.017582407963136e-05, "loss": 0.6561, "step": 4840 }, { "epoch": 0.9951690821256038, "grad_norm": 0.19074849784374237, "learning_rate": 7.016754476205913e-05, "loss": 0.6509, "step": 4841 }, { "epoch": 0.9953746530989824, "grad_norm": 0.14551801979541779, "learning_rate": 7.01592642045996e-05, "loss": 0.5873, "step": 4842 }, { "epoch": 0.995580224072361, "grad_norm": 0.1880098134279251, "learning_rate": 7.01509824076607e-05, "loss": 0.6588, "step": 4843 }, { "epoch": 0.9957857950457395, "grad_norm": 0.18471471965312958, "learning_rate": 7.014269937165048e-05, "loss": 0.6426, "step": 4844 }, { "epoch": 0.9959913660191181, "grad_norm": 1.0276774168014526, "learning_rate": 7.013441509697696e-05, "loss": 0.6583, "step": 4845 }, { "epoch": 0.9961969369924967, "grad_norm": 0.13645489513874054, "learning_rate": 7.01261295840483e-05, "loss": 0.5675, "step": 4846 }, { "epoch": 0.9964025079658753, "grad_norm": 0.12980090081691742, "learning_rate": 7.011784283327266e-05, "loss": 0.5932, "step": 4847 }, { "epoch": 0.9966080789392537, "grad_norm": 0.21611304581165314, "learning_rate": 7.010955484505831e-05, "loss": 0.6966, "step": 4848 }, { "epoch": 0.9968136499126323, "grad_norm": 0.21331064403057098, "learning_rate": 7.010126561981356e-05, "loss": 0.6875, "step": 4849 }, { "epoch": 0.9970192208860109, "grad_norm": 0.19772310554981232, "learning_rate": 7.009297515794678e-05, "loss": 0.6665, "step": 4850 }, { "epoch": 0.9972247918593895, "grad_norm": 0.20528611540794373, "learning_rate": 7.008468345986637e-05, "loss": 0.6648, "step": 4851 }, { "epoch": 0.997430362832768, "grad_norm": 0.2074098140001297, "learning_rate": 7.007639052598088e-05, "loss": 0.6817, "step": 4852 }, { "epoch": 0.9976359338061466, "grad_norm": 0.19538044929504395, "learning_rate": 7.006809635669882e-05, "loss": 0.6414, "step": 4853 }, { "epoch": 0.9978415047795252, "grad_norm": 0.189046248793602, "learning_rate": 7.005980095242883e-05, "loss": 0.5861, "step": 4854 }, { "epoch": 0.9980470757529037, "grad_norm": 0.14991891384124756, "learning_rate": 7.005150431357957e-05, "loss": 0.5907, "step": 4855 }, { "epoch": 0.9982526467262822, "grad_norm": 0.24315035343170166, "learning_rate": 7.004320644055979e-05, "loss": 0.6664, "step": 4856 }, { "epoch": 0.9984582176996608, "grad_norm": 0.183399498462677, "learning_rate": 7.003490733377827e-05, "loss": 0.5846, "step": 4857 }, { "epoch": 0.9986637886730394, "grad_norm": 0.20931483805179596, "learning_rate": 7.002660699364389e-05, "loss": 0.6624, "step": 4858 }, { "epoch": 0.9988693596464179, "grad_norm": 0.19488368928432465, "learning_rate": 7.001830542056555e-05, "loss": 0.6757, "step": 4859 }, { "epoch": 0.9990749306197965, "grad_norm": 0.16465352475643158, "learning_rate": 7.001000261495223e-05, "loss": 0.5584, "step": 4860 }, { "epoch": 0.999280501593175, "grad_norm": 0.17341670393943787, "learning_rate": 7.0001698577213e-05, "loss": 0.5857, "step": 4861 }, { "epoch": 0.9994860725665536, "grad_norm": 0.21987827122211456, "learning_rate": 6.99933933077569e-05, "loss": 0.6423, "step": 4862 }, { "epoch": 0.9996916435399321, "grad_norm": 0.21325050294399261, "learning_rate": 6.998508680699317e-05, "loss": 0.6558, "step": 4863 }, { "epoch": 0.9998972145133107, "grad_norm": 0.1891472041606903, "learning_rate": 6.997677907533099e-05, "loss": 0.6461, "step": 4864 }, { "epoch": 1.0001027854866893, "grad_norm": 0.20316524803638458, "learning_rate": 6.996847011317963e-05, "loss": 0.5995, "step": 4865 }, { "epoch": 1.0003083564600679, "grad_norm": 0.271843820810318, "learning_rate": 6.996015992094846e-05, "loss": 0.5709, "step": 4866 }, { "epoch": 1.0005139274334465, "grad_norm": 0.22854308784008026, "learning_rate": 6.995184849904686e-05, "loss": 0.5628, "step": 4867 }, { "epoch": 1.000719498406825, "grad_norm": 0.20615056157112122, "learning_rate": 6.994353584788431e-05, "loss": 0.5559, "step": 4868 }, { "epoch": 1.0009250693802034, "grad_norm": 0.24276702105998993, "learning_rate": 6.993522196787035e-05, "loss": 0.5764, "step": 4869 }, { "epoch": 1.001130640353582, "grad_norm": 0.28377482295036316, "learning_rate": 6.992690685941454e-05, "loss": 0.5666, "step": 4870 }, { "epoch": 1.0013362113269606, "grad_norm": 0.2509450912475586, "learning_rate": 6.991859052292654e-05, "loss": 0.5716, "step": 4871 }, { "epoch": 1.0015417823003392, "grad_norm": 0.20262686908245087, "learning_rate": 6.991027295881606e-05, "loss": 0.5314, "step": 4872 }, { "epoch": 1.0017473532737178, "grad_norm": 0.1771395355463028, "learning_rate": 6.990195416749287e-05, "loss": 0.5826, "step": 4873 }, { "epoch": 1.0019529242470964, "grad_norm": 0.22669513523578644, "learning_rate": 6.989363414936676e-05, "loss": 0.5785, "step": 4874 }, { "epoch": 1.002158495220475, "grad_norm": 0.18329079449176788, "learning_rate": 6.988531290484768e-05, "loss": 0.5626, "step": 4875 }, { "epoch": 1.0023640661938533, "grad_norm": 0.17352893948554993, "learning_rate": 6.987699043434552e-05, "loss": 0.5549, "step": 4876 }, { "epoch": 1.002569637167232, "grad_norm": 0.2029443383216858, "learning_rate": 6.986866673827032e-05, "loss": 0.5679, "step": 4877 }, { "epoch": 1.0027752081406105, "grad_norm": 0.21238186955451965, "learning_rate": 6.986034181703216e-05, "loss": 0.579, "step": 4878 }, { "epoch": 1.002980779113989, "grad_norm": 0.20517666637897491, "learning_rate": 6.985201567104115e-05, "loss": 0.5578, "step": 4879 }, { "epoch": 1.0031863500873677, "grad_norm": 0.221823588013649, "learning_rate": 6.984368830070747e-05, "loss": 0.5559, "step": 4880 }, { "epoch": 1.0033919210607463, "grad_norm": 0.21827368438243866, "learning_rate": 6.98353597064414e-05, "loss": 0.5811, "step": 4881 }, { "epoch": 1.0035974920341248, "grad_norm": 0.20785865187644958, "learning_rate": 6.982702988865326e-05, "loss": 0.5226, "step": 4882 }, { "epoch": 1.0038030630075034, "grad_norm": 0.18137192726135254, "learning_rate": 6.981869884775336e-05, "loss": 0.5244, "step": 4883 }, { "epoch": 1.0040086339808818, "grad_norm": 0.18444296717643738, "learning_rate": 6.981036658415218e-05, "loss": 0.5603, "step": 4884 }, { "epoch": 1.0042142049542604, "grad_norm": 0.22535867989063263, "learning_rate": 6.980203309826021e-05, "loss": 0.5684, "step": 4885 }, { "epoch": 1.004419775927639, "grad_norm": 0.21289990842342377, "learning_rate": 6.979369839048799e-05, "loss": 0.5214, "step": 4886 }, { "epoch": 1.0046253469010176, "grad_norm": 0.16380147635936737, "learning_rate": 6.978536246124615e-05, "loss": 0.5145, "step": 4887 }, { "epoch": 1.0048309178743962, "grad_norm": 0.170881450176239, "learning_rate": 6.977702531094534e-05, "loss": 0.5329, "step": 4888 }, { "epoch": 1.0050364888477747, "grad_norm": 0.17499133944511414, "learning_rate": 6.976868693999629e-05, "loss": 0.5228, "step": 4889 }, { "epoch": 1.0052420598211533, "grad_norm": 0.20944778621196747, "learning_rate": 6.976034734880981e-05, "loss": 0.5465, "step": 4890 }, { "epoch": 1.005447630794532, "grad_norm": 0.20664618909358978, "learning_rate": 6.975200653779674e-05, "loss": 0.5645, "step": 4891 }, { "epoch": 1.0056532017679103, "grad_norm": 0.20452165603637695, "learning_rate": 6.974366450736801e-05, "loss": 0.536, "step": 4892 }, { "epoch": 1.0058587727412889, "grad_norm": 0.20522767305374146, "learning_rate": 6.973532125793457e-05, "loss": 0.5524, "step": 4893 }, { "epoch": 1.0060643437146675, "grad_norm": 0.20214490592479706, "learning_rate": 6.972697678990747e-05, "loss": 0.5829, "step": 4894 }, { "epoch": 1.006269914688046, "grad_norm": 0.19097676873207092, "learning_rate": 6.971863110369778e-05, "loss": 0.5589, "step": 4895 }, { "epoch": 1.0064754856614246, "grad_norm": 0.19728168845176697, "learning_rate": 6.97102841997167e-05, "loss": 0.5546, "step": 4896 }, { "epoch": 1.0066810566348032, "grad_norm": 0.1733403503894806, "learning_rate": 6.97019360783754e-05, "loss": 0.5264, "step": 4897 }, { "epoch": 1.0068866276081818, "grad_norm": 0.17879877984523773, "learning_rate": 6.969358674008516e-05, "loss": 0.5623, "step": 4898 }, { "epoch": 1.0070921985815602, "grad_norm": 0.19583040475845337, "learning_rate": 6.968523618525733e-05, "loss": 0.5773, "step": 4899 }, { "epoch": 1.0072977695549388, "grad_norm": 0.193648099899292, "learning_rate": 6.967688441430328e-05, "loss": 0.576, "step": 4900 }, { "epoch": 1.0075033405283174, "grad_norm": 0.1968041956424713, "learning_rate": 6.966853142763448e-05, "loss": 0.5513, "step": 4901 }, { "epoch": 1.007708911501696, "grad_norm": 0.196999654173851, "learning_rate": 6.966017722566246e-05, "loss": 0.5576, "step": 4902 }, { "epoch": 1.0079144824750745, "grad_norm": 0.19729197025299072, "learning_rate": 6.965182180879873e-05, "loss": 0.5689, "step": 4903 }, { "epoch": 1.0081200534484531, "grad_norm": 0.20436379313468933, "learning_rate": 6.964346517745498e-05, "loss": 0.5766, "step": 4904 }, { "epoch": 1.0083256244218317, "grad_norm": 0.19463692605495453, "learning_rate": 6.963510733204288e-05, "loss": 0.5477, "step": 4905 }, { "epoch": 1.0085311953952103, "grad_norm": 0.19440148770809174, "learning_rate": 6.962674827297418e-05, "loss": 0.5578, "step": 4906 }, { "epoch": 1.0087367663685887, "grad_norm": 0.19789615273475647, "learning_rate": 6.961838800066072e-05, "loss": 0.5559, "step": 4907 }, { "epoch": 1.0089423373419673, "grad_norm": 0.19245871901512146, "learning_rate": 6.961002651551432e-05, "loss": 0.5484, "step": 4908 }, { "epoch": 1.0091479083153458, "grad_norm": 0.19907300174236298, "learning_rate": 6.960166381794697e-05, "loss": 0.5343, "step": 4909 }, { "epoch": 1.0093534792887244, "grad_norm": 0.19037161767482758, "learning_rate": 6.959329990837061e-05, "loss": 0.5389, "step": 4910 }, { "epoch": 1.009559050262103, "grad_norm": 0.18873098492622375, "learning_rate": 6.958493478719733e-05, "loss": 0.5582, "step": 4911 }, { "epoch": 1.0097646212354816, "grad_norm": 0.19992291927337646, "learning_rate": 6.95765684548392e-05, "loss": 0.5746, "step": 4912 }, { "epoch": 1.0099701922088602, "grad_norm": 0.2006637305021286, "learning_rate": 6.956820091170844e-05, "loss": 0.5731, "step": 4913 }, { "epoch": 1.0101757631822386, "grad_norm": 0.1814371794462204, "learning_rate": 6.955983215821724e-05, "loss": 0.5409, "step": 4914 }, { "epoch": 1.0103813341556172, "grad_norm": 0.16987626254558563, "learning_rate": 6.955146219477788e-05, "loss": 0.5778, "step": 4915 }, { "epoch": 1.0105869051289957, "grad_norm": 0.19764257967472076, "learning_rate": 6.954309102180276e-05, "loss": 0.5729, "step": 4916 }, { "epoch": 1.0107924761023743, "grad_norm": 0.19731703400611877, "learning_rate": 6.953471863970424e-05, "loss": 0.5507, "step": 4917 }, { "epoch": 1.010998047075753, "grad_norm": 0.18993428349494934, "learning_rate": 6.952634504889484e-05, "loss": 0.5448, "step": 4918 }, { "epoch": 1.0112036180491315, "grad_norm": 0.1911395788192749, "learning_rate": 6.951797024978703e-05, "loss": 0.5319, "step": 4919 }, { "epoch": 1.01140918902251, "grad_norm": 0.2100227028131485, "learning_rate": 6.950959424279342e-05, "loss": 0.5865, "step": 4920 }, { "epoch": 1.0116147599958887, "grad_norm": 0.1891854703426361, "learning_rate": 6.950121702832666e-05, "loss": 0.5353, "step": 4921 }, { "epoch": 1.011820330969267, "grad_norm": 0.16457346081733704, "learning_rate": 6.949283860679946e-05, "loss": 0.519, "step": 4922 }, { "epoch": 1.0120259019426456, "grad_norm": 0.13976171612739563, "learning_rate": 6.948445897862458e-05, "loss": 0.5277, "step": 4923 }, { "epoch": 1.0122314729160242, "grad_norm": 0.17471906542778015, "learning_rate": 6.947607814421486e-05, "loss": 0.5693, "step": 4924 }, { "epoch": 1.0124370438894028, "grad_norm": 0.22914856672286987, "learning_rate": 6.946769610398316e-05, "loss": 0.5756, "step": 4925 }, { "epoch": 1.0126426148627814, "grad_norm": 0.20704926550388336, "learning_rate": 6.945931285834242e-05, "loss": 0.5726, "step": 4926 }, { "epoch": 1.01284818583616, "grad_norm": 0.19101639091968536, "learning_rate": 6.945092840770567e-05, "loss": 0.556, "step": 4927 }, { "epoch": 1.0130537568095386, "grad_norm": 0.2030901461839676, "learning_rate": 6.944254275248597e-05, "loss": 0.5723, "step": 4928 }, { "epoch": 1.013259327782917, "grad_norm": 0.24590568244457245, "learning_rate": 6.943415589309642e-05, "loss": 0.551, "step": 4929 }, { "epoch": 1.0134648987562955, "grad_norm": 0.1951897293329239, "learning_rate": 6.942576782995022e-05, "loss": 0.5712, "step": 4930 }, { "epoch": 1.0136704697296741, "grad_norm": 0.19376187026500702, "learning_rate": 6.94173785634606e-05, "loss": 0.5565, "step": 4931 }, { "epoch": 1.0138760407030527, "grad_norm": 0.20647601783275604, "learning_rate": 6.940898809404086e-05, "loss": 0.5822, "step": 4932 }, { "epoch": 1.0140816116764313, "grad_norm": 0.1940944641828537, "learning_rate": 6.940059642210438e-05, "loss": 0.5529, "step": 4933 }, { "epoch": 1.0142871826498099, "grad_norm": 0.18651318550109863, "learning_rate": 6.939220354806455e-05, "loss": 0.519, "step": 4934 }, { "epoch": 1.0144927536231885, "grad_norm": 0.17096173763275146, "learning_rate": 6.938380947233487e-05, "loss": 0.5716, "step": 4935 }, { "epoch": 1.014698324596567, "grad_norm": 0.20484822988510132, "learning_rate": 6.937541419532885e-05, "loss": 0.569, "step": 4936 }, { "epoch": 1.0149038955699454, "grad_norm": 2.4529898166656494, "learning_rate": 6.936701771746012e-05, "loss": 0.5871, "step": 4937 }, { "epoch": 1.015109466543324, "grad_norm": 0.1707240641117096, "learning_rate": 6.935862003914231e-05, "loss": 0.5322, "step": 4938 }, { "epoch": 1.0153150375167026, "grad_norm": 0.20285925269126892, "learning_rate": 6.935022116078915e-05, "loss": 0.5767, "step": 4939 }, { "epoch": 1.0155206084900812, "grad_norm": 0.23825408518314362, "learning_rate": 6.93418210828144e-05, "loss": 0.5537, "step": 4940 }, { "epoch": 1.0157261794634598, "grad_norm": 0.25726380944252014, "learning_rate": 6.93334198056319e-05, "loss": 0.5728, "step": 4941 }, { "epoch": 1.0159317504368384, "grad_norm": 0.2844366431236267, "learning_rate": 6.932501732965554e-05, "loss": 0.5752, "step": 4942 }, { "epoch": 1.016137321410217, "grad_norm": 0.24454839527606964, "learning_rate": 6.931661365529926e-05, "loss": 0.5687, "step": 4943 }, { "epoch": 1.0163428923835955, "grad_norm": 0.2527025043964386, "learning_rate": 6.930820878297711e-05, "loss": 0.5439, "step": 4944 }, { "epoch": 1.016548463356974, "grad_norm": 0.5170005559921265, "learning_rate": 6.92998027131031e-05, "loss": 0.5863, "step": 4945 }, { "epoch": 1.0167540343303525, "grad_norm": 0.2004466950893402, "learning_rate": 6.92913954460914e-05, "loss": 0.542, "step": 4946 }, { "epoch": 1.016959605303731, "grad_norm": 0.2018880397081375, "learning_rate": 6.928298698235619e-05, "loss": 0.5909, "step": 4947 }, { "epoch": 1.0171651762771097, "grad_norm": 0.21628795564174652, "learning_rate": 6.927457732231169e-05, "loss": 0.5622, "step": 4948 }, { "epoch": 1.0173707472504883, "grad_norm": 0.21719391644001007, "learning_rate": 6.926616646637225e-05, "loss": 0.5624, "step": 4949 }, { "epoch": 1.0175763182238668, "grad_norm": 0.20705457031726837, "learning_rate": 6.92577544149522e-05, "loss": 0.56, "step": 4950 }, { "epoch": 1.0177818891972454, "grad_norm": 0.1947423666715622, "learning_rate": 6.924934116846596e-05, "loss": 0.5193, "step": 4951 }, { "epoch": 1.0179874601706238, "grad_norm": 0.1868080198764801, "learning_rate": 6.924092672732802e-05, "loss": 0.5699, "step": 4952 }, { "epoch": 1.0181930311440024, "grad_norm": 0.2158852070569992, "learning_rate": 6.923251109195293e-05, "loss": 0.5611, "step": 4953 }, { "epoch": 1.018398602117381, "grad_norm": 0.17527857422828674, "learning_rate": 6.922409426275528e-05, "loss": 0.5361, "step": 4954 }, { "epoch": 1.0186041730907596, "grad_norm": 0.16154874861240387, "learning_rate": 6.921567624014973e-05, "loss": 0.5337, "step": 4955 }, { "epoch": 1.0188097440641382, "grad_norm": 0.18655456602573395, "learning_rate": 6.920725702455099e-05, "loss": 0.5684, "step": 4956 }, { "epoch": 1.0190153150375167, "grad_norm": 0.22478148341178894, "learning_rate": 6.919883661637383e-05, "loss": 0.5722, "step": 4957 }, { "epoch": 1.0192208860108953, "grad_norm": 0.20847651362419128, "learning_rate": 6.919041501603313e-05, "loss": 0.5891, "step": 4958 }, { "epoch": 1.019426456984274, "grad_norm": 0.17430467903614044, "learning_rate": 6.918199222394373e-05, "loss": 0.5449, "step": 4959 }, { "epoch": 1.0196320279576523, "grad_norm": 0.17865034937858582, "learning_rate": 6.917356824052059e-05, "loss": 0.54, "step": 4960 }, { "epoch": 1.0198375989310309, "grad_norm": 0.19386602938175201, "learning_rate": 6.916514306617874e-05, "loss": 0.5582, "step": 4961 }, { "epoch": 1.0200431699044095, "grad_norm": 0.1756899505853653, "learning_rate": 6.915671670133324e-05, "loss": 0.521, "step": 4962 }, { "epoch": 1.020248740877788, "grad_norm": 0.16583296656608582, "learning_rate": 6.914828914639922e-05, "loss": 0.5647, "step": 4963 }, { "epoch": 1.0204543118511666, "grad_norm": 0.19850464165210724, "learning_rate": 6.913986040179185e-05, "loss": 0.5415, "step": 4964 }, { "epoch": 1.0206598828245452, "grad_norm": 0.2507860064506531, "learning_rate": 6.913143046792639e-05, "loss": 0.5441, "step": 4965 }, { "epoch": 1.0208654537979238, "grad_norm": 0.19658030569553375, "learning_rate": 6.912299934521814e-05, "loss": 0.5782, "step": 4966 }, { "epoch": 1.0210710247713024, "grad_norm": 0.19466283917427063, "learning_rate": 6.911456703408246e-05, "loss": 0.5552, "step": 4967 }, { "epoch": 1.0212765957446808, "grad_norm": 0.16281276941299438, "learning_rate": 6.910613353493479e-05, "loss": 0.5291, "step": 4968 }, { "epoch": 1.0214821667180594, "grad_norm": 0.1634058803319931, "learning_rate": 6.909769884819057e-05, "loss": 0.5497, "step": 4969 }, { "epoch": 1.021687737691438, "grad_norm": 0.1930556446313858, "learning_rate": 6.908926297426537e-05, "loss": 0.5608, "step": 4970 }, { "epoch": 1.0218933086648165, "grad_norm": 0.19795656204223633, "learning_rate": 6.908082591357478e-05, "loss": 0.5729, "step": 4971 }, { "epoch": 1.0220988796381951, "grad_norm": 0.19776557385921478, "learning_rate": 6.907238766653445e-05, "loss": 0.5634, "step": 4972 }, { "epoch": 1.0223044506115737, "grad_norm": 0.19151826202869415, "learning_rate": 6.90639482335601e-05, "loss": 0.571, "step": 4973 }, { "epoch": 1.0225100215849523, "grad_norm": 0.18954800069332123, "learning_rate": 6.905550761506747e-05, "loss": 0.5519, "step": 4974 }, { "epoch": 1.0227155925583307, "grad_norm": 0.19335106015205383, "learning_rate": 6.904706581147243e-05, "loss": 0.5452, "step": 4975 }, { "epoch": 1.0229211635317093, "grad_norm": 0.20168174803256989, "learning_rate": 6.903862282319087e-05, "loss": 0.5838, "step": 4976 }, { "epoch": 1.0231267345050878, "grad_norm": 0.20087262988090515, "learning_rate": 6.90301786506387e-05, "loss": 0.5656, "step": 4977 }, { "epoch": 1.0233323054784664, "grad_norm": 0.1917273849248886, "learning_rate": 6.902173329423195e-05, "loss": 0.5679, "step": 4978 }, { "epoch": 1.023537876451845, "grad_norm": 0.1951013058423996, "learning_rate": 6.901328675438669e-05, "loss": 0.5635, "step": 4979 }, { "epoch": 1.0237434474252236, "grad_norm": 0.20168475806713104, "learning_rate": 6.9004839031519e-05, "loss": 0.5826, "step": 4980 }, { "epoch": 1.0239490183986022, "grad_norm": 0.19177857041358948, "learning_rate": 6.899639012604512e-05, "loss": 0.5675, "step": 4981 }, { "epoch": 1.0241545893719808, "grad_norm": 0.15916599333286285, "learning_rate": 6.898794003838124e-05, "loss": 0.5457, "step": 4982 }, { "epoch": 1.0243601603453591, "grad_norm": 0.1605004519224167, "learning_rate": 6.897948876894369e-05, "loss": 0.5663, "step": 4983 }, { "epoch": 1.0245657313187377, "grad_norm": 0.19485965371131897, "learning_rate": 6.897103631814878e-05, "loss": 0.5683, "step": 4984 }, { "epoch": 1.0247713022921163, "grad_norm": 0.1926756501197815, "learning_rate": 6.896258268641298e-05, "loss": 0.5525, "step": 4985 }, { "epoch": 1.024976873265495, "grad_norm": 0.19675122201442719, "learning_rate": 6.895412787415272e-05, "loss": 0.5811, "step": 4986 }, { "epoch": 1.0251824442388735, "grad_norm": 0.19753362238407135, "learning_rate": 6.894567188178454e-05, "loss": 0.582, "step": 4987 }, { "epoch": 1.025388015212252, "grad_norm": 0.195309117436409, "learning_rate": 6.893721470972502e-05, "loss": 0.5446, "step": 4988 }, { "epoch": 1.0255935861856307, "grad_norm": 0.19395774602890015, "learning_rate": 6.892875635839081e-05, "loss": 0.574, "step": 4989 }, { "epoch": 1.025799157159009, "grad_norm": 0.1611412912607193, "learning_rate": 6.892029682819864e-05, "loss": 0.5342, "step": 4990 }, { "epoch": 1.0260047281323876, "grad_norm": 0.1334841102361679, "learning_rate": 6.891183611956523e-05, "loss": 0.5458, "step": 4991 }, { "epoch": 1.0262102991057662, "grad_norm": 0.18414229154586792, "learning_rate": 6.890337423290743e-05, "loss": 0.5658, "step": 4992 }, { "epoch": 1.0264158700791448, "grad_norm": 0.20537151396274567, "learning_rate": 6.88949111686421e-05, "loss": 0.6111, "step": 4993 }, { "epoch": 1.0266214410525234, "grad_norm": 0.18854451179504395, "learning_rate": 6.88864469271862e-05, "loss": 0.5655, "step": 4994 }, { "epoch": 1.026827012025902, "grad_norm": 0.19057627022266388, "learning_rate": 6.887798150895667e-05, "loss": 0.5645, "step": 4995 }, { "epoch": 1.0270325829992806, "grad_norm": 0.22320972383022308, "learning_rate": 6.886951491437062e-05, "loss": 0.5688, "step": 4996 }, { "epoch": 1.0272381539726592, "grad_norm": 0.2112189084291458, "learning_rate": 6.886104714384512e-05, "loss": 0.5599, "step": 4997 }, { "epoch": 1.0274437249460375, "grad_norm": 0.1889009028673172, "learning_rate": 6.885257819779736e-05, "loss": 0.5472, "step": 4998 }, { "epoch": 1.0276492959194161, "grad_norm": 0.18562033772468567, "learning_rate": 6.884410807664456e-05, "loss": 0.5478, "step": 4999 }, { "epoch": 1.0278548668927947, "grad_norm": 0.1892947107553482, "learning_rate": 6.8835636780804e-05, "loss": 0.5561, "step": 5000 }, { "epoch": 1.0280604378661733, "grad_norm": 0.19414404034614563, "learning_rate": 6.882716431069303e-05, "loss": 0.5769, "step": 5001 }, { "epoch": 1.0282660088395519, "grad_norm": 0.194126158952713, "learning_rate": 6.881869066672904e-05, "loss": 0.5609, "step": 5002 }, { "epoch": 1.0284715798129305, "grad_norm": 0.1930353194475174, "learning_rate": 6.881021584932949e-05, "loss": 0.57, "step": 5003 }, { "epoch": 1.028677150786309, "grad_norm": 0.18623441457748413, "learning_rate": 6.88017398589119e-05, "loss": 0.5429, "step": 5004 }, { "epoch": 1.0288827217596874, "grad_norm": 0.1921243667602539, "learning_rate": 6.879326269589382e-05, "loss": 0.5579, "step": 5005 }, { "epoch": 1.029088292733066, "grad_norm": 0.18247570097446442, "learning_rate": 6.87847843606929e-05, "loss": 0.5402, "step": 5006 }, { "epoch": 1.0292938637064446, "grad_norm": 0.17088961601257324, "learning_rate": 6.877630485372684e-05, "loss": 0.5483, "step": 5007 }, { "epoch": 1.0294994346798232, "grad_norm": 0.20109815895557404, "learning_rate": 6.876782417541334e-05, "loss": 0.5541, "step": 5008 }, { "epoch": 1.0297050056532018, "grad_norm": 0.19609639048576355, "learning_rate": 6.875934232617027e-05, "loss": 0.5629, "step": 5009 }, { "epoch": 1.0299105766265804, "grad_norm": 0.19312147796154022, "learning_rate": 6.875085930641543e-05, "loss": 0.5603, "step": 5010 }, { "epoch": 1.030116147599959, "grad_norm": 0.1975242644548416, "learning_rate": 6.874237511656677e-05, "loss": 0.5763, "step": 5011 }, { "epoch": 1.0303217185733375, "grad_norm": 0.1999368965625763, "learning_rate": 6.873388975704225e-05, "loss": 0.5884, "step": 5012 }, { "epoch": 1.030527289546716, "grad_norm": 0.16335703432559967, "learning_rate": 6.872540322825994e-05, "loss": 0.5181, "step": 5013 }, { "epoch": 1.0307328605200945, "grad_norm": 0.17105185985565186, "learning_rate": 6.871691553063788e-05, "loss": 0.566, "step": 5014 }, { "epoch": 1.030938431493473, "grad_norm": 0.21174640953540802, "learning_rate": 6.870842666459425e-05, "loss": 0.5851, "step": 5015 }, { "epoch": 1.0311440024668517, "grad_norm": 0.19627945125102997, "learning_rate": 6.869993663054725e-05, "loss": 0.5655, "step": 5016 }, { "epoch": 1.0313495734402303, "grad_norm": 0.18857216835021973, "learning_rate": 6.869144542891517e-05, "loss": 0.5448, "step": 5017 }, { "epoch": 1.0315551444136088, "grad_norm": 0.16696830093860626, "learning_rate": 6.868295306011628e-05, "loss": 0.5241, "step": 5018 }, { "epoch": 1.0317607153869874, "grad_norm": 0.16772493720054626, "learning_rate": 6.867445952456899e-05, "loss": 0.5759, "step": 5019 }, { "epoch": 1.0319662863603658, "grad_norm": 0.19662131369113922, "learning_rate": 6.866596482269175e-05, "loss": 0.5647, "step": 5020 }, { "epoch": 1.0321718573337444, "grad_norm": 0.20151005685329437, "learning_rate": 6.8657468954903e-05, "loss": 0.5577, "step": 5021 }, { "epoch": 1.032377428307123, "grad_norm": 0.16064363718032837, "learning_rate": 6.864897192162136e-05, "loss": 0.5182, "step": 5022 }, { "epoch": 1.0325829992805016, "grad_norm": 0.16647809743881226, "learning_rate": 6.864047372326539e-05, "loss": 0.5572, "step": 5023 }, { "epoch": 1.0327885702538802, "grad_norm": 0.18600209057331085, "learning_rate": 6.86319743602538e-05, "loss": 0.5426, "step": 5024 }, { "epoch": 1.0329941412272587, "grad_norm": 0.15970858931541443, "learning_rate": 6.862347383300529e-05, "loss": 0.5119, "step": 5025 }, { "epoch": 1.0331997122006373, "grad_norm": 0.16188785433769226, "learning_rate": 6.861497214193861e-05, "loss": 0.5732, "step": 5026 }, { "epoch": 1.033405283174016, "grad_norm": 0.19583800435066223, "learning_rate": 6.860646928747265e-05, "loss": 0.5387, "step": 5027 }, { "epoch": 1.0336108541473943, "grad_norm": 0.19050170481204987, "learning_rate": 6.859796527002627e-05, "loss": 0.5715, "step": 5028 }, { "epoch": 1.0338164251207729, "grad_norm": 0.19282078742980957, "learning_rate": 6.858946009001844e-05, "loss": 0.5717, "step": 5029 }, { "epoch": 1.0340219960941515, "grad_norm": 0.19418777525424957, "learning_rate": 6.858095374786818e-05, "loss": 0.558, "step": 5030 }, { "epoch": 1.03422756706753, "grad_norm": 0.2037775069475174, "learning_rate": 6.857244624399455e-05, "loss": 0.5487, "step": 5031 }, { "epoch": 1.0344331380409086, "grad_norm": 0.20054981112480164, "learning_rate": 6.856393757881665e-05, "loss": 0.5565, "step": 5032 }, { "epoch": 1.0346387090142872, "grad_norm": 0.2035524547100067, "learning_rate": 6.855542775275369e-05, "loss": 0.5952, "step": 5033 }, { "epoch": 1.0348442799876658, "grad_norm": 0.1957496851682663, "learning_rate": 6.854691676622492e-05, "loss": 0.5563, "step": 5034 }, { "epoch": 1.0350498509610444, "grad_norm": 0.21326994895935059, "learning_rate": 6.853840461964961e-05, "loss": 0.5745, "step": 5035 }, { "epoch": 1.0352554219344228, "grad_norm": 0.16329696774482727, "learning_rate": 6.852989131344712e-05, "loss": 0.5331, "step": 5036 }, { "epoch": 1.0354609929078014, "grad_norm": 0.16014549136161804, "learning_rate": 6.852137684803686e-05, "loss": 0.5432, "step": 5037 }, { "epoch": 1.03566656388118, "grad_norm": 0.164669468998909, "learning_rate": 6.851286122383831e-05, "loss": 0.5325, "step": 5038 }, { "epoch": 1.0358721348545585, "grad_norm": 0.16093246638774872, "learning_rate": 6.850434444127098e-05, "loss": 0.5639, "step": 5039 }, { "epoch": 1.0360777058279371, "grad_norm": 0.20291577279567719, "learning_rate": 6.849582650075445e-05, "loss": 0.5414, "step": 5040 }, { "epoch": 1.0362832768013157, "grad_norm": 0.16935724020004272, "learning_rate": 6.848730740270839e-05, "loss": 0.5082, "step": 5041 }, { "epoch": 1.0364888477746943, "grad_norm": 0.1641445755958557, "learning_rate": 6.847878714755244e-05, "loss": 0.5472, "step": 5042 }, { "epoch": 1.0366944187480727, "grad_norm": 0.196893572807312, "learning_rate": 6.847026573570642e-05, "loss": 0.5856, "step": 5043 }, { "epoch": 1.0368999897214513, "grad_norm": 0.19081740081310272, "learning_rate": 6.846174316759007e-05, "loss": 0.5622, "step": 5044 }, { "epoch": 1.0371055606948298, "grad_norm": 0.16471846401691437, "learning_rate": 6.845321944362332e-05, "loss": 0.5485, "step": 5045 }, { "epoch": 1.0373111316682084, "grad_norm": 0.16739195585250854, "learning_rate": 6.844469456422606e-05, "loss": 0.5717, "step": 5046 }, { "epoch": 1.037516702641587, "grad_norm": 0.19602900743484497, "learning_rate": 6.843616852981831e-05, "loss": 0.563, "step": 5047 }, { "epoch": 1.0377222736149656, "grad_norm": 0.19288770854473114, "learning_rate": 6.842764134082004e-05, "loss": 0.5641, "step": 5048 }, { "epoch": 1.0379278445883442, "grad_norm": 0.18607531487941742, "learning_rate": 6.841911299765141e-05, "loss": 0.5437, "step": 5049 }, { "epoch": 1.0381334155617228, "grad_norm": 0.20571644604206085, "learning_rate": 6.84105835007325e-05, "loss": 0.5444, "step": 5050 }, { "epoch": 1.0383389865351011, "grad_norm": 0.2017316222190857, "learning_rate": 6.840205285048359e-05, "loss": 0.5615, "step": 5051 }, { "epoch": 1.0385445575084797, "grad_norm": 0.19289067387580872, "learning_rate": 6.839352104732492e-05, "loss": 0.5715, "step": 5052 }, { "epoch": 1.0387501284818583, "grad_norm": 0.19483251869678497, "learning_rate": 6.838498809167681e-05, "loss": 0.5936, "step": 5053 }, { "epoch": 1.038955699455237, "grad_norm": 0.17118024826049805, "learning_rate": 6.837645398395962e-05, "loss": 0.5091, "step": 5054 }, { "epoch": 1.0391612704286155, "grad_norm": 0.15377415716648102, "learning_rate": 6.836791872459382e-05, "loss": 0.5493, "step": 5055 }, { "epoch": 1.039366841401994, "grad_norm": 0.16883303225040436, "learning_rate": 6.835938231399989e-05, "loss": 0.5309, "step": 5056 }, { "epoch": 1.0395724123753727, "grad_norm": 0.18094538152217865, "learning_rate": 6.835084475259835e-05, "loss": 0.5579, "step": 5057 }, { "epoch": 1.0397779833487513, "grad_norm": 0.1928682029247284, "learning_rate": 6.834230604080986e-05, "loss": 0.5608, "step": 5058 }, { "epoch": 1.0399835543221296, "grad_norm": 0.16030603647232056, "learning_rate": 6.833376617905504e-05, "loss": 0.5228, "step": 5059 }, { "epoch": 1.0401891252955082, "grad_norm": 0.16263644397258759, "learning_rate": 6.832522516775462e-05, "loss": 0.5724, "step": 5060 }, { "epoch": 1.0403946962688868, "grad_norm": 0.2015358805656433, "learning_rate": 6.831668300732938e-05, "loss": 0.5652, "step": 5061 }, { "epoch": 1.0406002672422654, "grad_norm": 0.19693398475646973, "learning_rate": 6.830813969820015e-05, "loss": 0.5457, "step": 5062 }, { "epoch": 1.040805838215644, "grad_norm": 0.19118033349514008, "learning_rate": 6.829959524078782e-05, "loss": 0.5615, "step": 5063 }, { "epoch": 1.0410114091890226, "grad_norm": 0.2018975466489792, "learning_rate": 6.829104963551332e-05, "loss": 0.5883, "step": 5064 }, { "epoch": 1.0412169801624012, "grad_norm": 0.1714175045490265, "learning_rate": 6.828250288279768e-05, "loss": 0.514, "step": 5065 }, { "epoch": 1.0414225511357795, "grad_norm": 0.1588568538427353, "learning_rate": 6.827395498306195e-05, "loss": 0.5603, "step": 5066 }, { "epoch": 1.0416281221091581, "grad_norm": 0.18845230340957642, "learning_rate": 6.826540593672724e-05, "loss": 0.5685, "step": 5067 }, { "epoch": 1.0418336930825367, "grad_norm": 0.19369468092918396, "learning_rate": 6.825685574421471e-05, "loss": 0.5599, "step": 5068 }, { "epoch": 1.0420392640559153, "grad_norm": 0.19052891433238983, "learning_rate": 6.824830440594561e-05, "loss": 0.5593, "step": 5069 }, { "epoch": 1.0422448350292939, "grad_norm": 0.19876545667648315, "learning_rate": 6.823975192234123e-05, "loss": 0.5911, "step": 5070 }, { "epoch": 1.0424504060026725, "grad_norm": 0.19648174941539764, "learning_rate": 6.823119829382285e-05, "loss": 0.544, "step": 5071 }, { "epoch": 1.042655976976051, "grad_norm": 0.20026130974292755, "learning_rate": 6.822264352081194e-05, "loss": 0.5574, "step": 5072 }, { "epoch": 1.0428615479494296, "grad_norm": 0.18708030879497528, "learning_rate": 6.821408760372994e-05, "loss": 0.5367, "step": 5073 }, { "epoch": 1.043067118922808, "grad_norm": 0.18605582416057587, "learning_rate": 6.820553054299832e-05, "loss": 0.5383, "step": 5074 }, { "epoch": 1.0432726898961866, "grad_norm": 0.19726519286632538, "learning_rate": 6.81969723390387e-05, "loss": 0.5922, "step": 5075 }, { "epoch": 1.0434782608695652, "grad_norm": 0.1689324975013733, "learning_rate": 6.818841299227264e-05, "loss": 0.5238, "step": 5076 }, { "epoch": 1.0436838318429438, "grad_norm": 0.16380931437015533, "learning_rate": 6.817985250312187e-05, "loss": 0.5622, "step": 5077 }, { "epoch": 1.0438894028163224, "grad_norm": 0.18795007467269897, "learning_rate": 6.817129087200812e-05, "loss": 0.5529, "step": 5078 }, { "epoch": 1.044094973789701, "grad_norm": 0.21080972254276276, "learning_rate": 6.816272809935315e-05, "loss": 0.5553, "step": 5079 }, { "epoch": 1.0443005447630795, "grad_norm": 0.19788028299808502, "learning_rate": 6.815416418557885e-05, "loss": 0.5868, "step": 5080 }, { "epoch": 1.044506115736458, "grad_norm": 0.17419970035552979, "learning_rate": 6.81455991311071e-05, "loss": 0.5412, "step": 5081 }, { "epoch": 1.0447116867098365, "grad_norm": 0.1657952070236206, "learning_rate": 6.813703293635986e-05, "loss": 0.557, "step": 5082 }, { "epoch": 1.044917257683215, "grad_norm": 0.19872242212295532, "learning_rate": 6.812846560175916e-05, "loss": 0.5702, "step": 5083 }, { "epoch": 1.0451228286565937, "grad_norm": 0.18654018640518188, "learning_rate": 6.811989712772704e-05, "loss": 0.5414, "step": 5084 }, { "epoch": 1.0453283996299723, "grad_norm": 0.1918267160654068, "learning_rate": 6.811132751468566e-05, "loss": 0.5687, "step": 5085 }, { "epoch": 1.0455339706033508, "grad_norm": 0.1659933179616928, "learning_rate": 6.810275676305719e-05, "loss": 0.5324, "step": 5086 }, { "epoch": 1.0457395415767294, "grad_norm": 0.13235360383987427, "learning_rate": 6.809418487326388e-05, "loss": 0.5161, "step": 5087 }, { "epoch": 1.045945112550108, "grad_norm": 0.17006467282772064, "learning_rate": 6.808561184572802e-05, "loss": 0.5641, "step": 5088 }, { "epoch": 1.0461506835234864, "grad_norm": 0.20476646721363068, "learning_rate": 6.807703768087196e-05, "loss": 0.5604, "step": 5089 }, { "epoch": 1.046356254496865, "grad_norm": 0.2010469287633896, "learning_rate": 6.806846237911815e-05, "loss": 0.559, "step": 5090 }, { "epoch": 1.0465618254702436, "grad_norm": 0.1989106982946396, "learning_rate": 6.805988594088898e-05, "loss": 0.5642, "step": 5091 }, { "epoch": 1.0467673964436222, "grad_norm": 0.17077521979808807, "learning_rate": 6.805130836660703e-05, "loss": 0.537, "step": 5092 }, { "epoch": 1.0469729674170007, "grad_norm": 0.16533872485160828, "learning_rate": 6.804272965669486e-05, "loss": 0.5552, "step": 5093 }, { "epoch": 1.0471785383903793, "grad_norm": 0.19124990701675415, "learning_rate": 6.80341498115751e-05, "loss": 0.5574, "step": 5094 }, { "epoch": 1.047384109363758, "grad_norm": 0.19139717519283295, "learning_rate": 6.802556883167043e-05, "loss": 0.5446, "step": 5095 }, { "epoch": 1.0475896803371363, "grad_norm": 0.19126202166080475, "learning_rate": 6.801698671740362e-05, "loss": 0.5634, "step": 5096 }, { "epoch": 1.0477952513105149, "grad_norm": 0.19115038216114044, "learning_rate": 6.800840346919744e-05, "loss": 0.5393, "step": 5097 }, { "epoch": 1.0480008222838935, "grad_norm": 0.1927635818719864, "learning_rate": 6.799981908747476e-05, "loss": 0.5527, "step": 5098 }, { "epoch": 1.048206393257272, "grad_norm": 0.20182408392429352, "learning_rate": 6.799123357265852e-05, "loss": 0.5691, "step": 5099 }, { "epoch": 1.0484119642306506, "grad_norm": 0.1980399638414383, "learning_rate": 6.798264692517165e-05, "loss": 0.5593, "step": 5100 }, { "epoch": 1.0486175352040292, "grad_norm": 0.19788923859596252, "learning_rate": 6.797405914543717e-05, "loss": 0.571, "step": 5101 }, { "epoch": 1.0488231061774078, "grad_norm": 0.18928498029708862, "learning_rate": 6.79654702338782e-05, "loss": 0.5616, "step": 5102 }, { "epoch": 1.0490286771507864, "grad_norm": 0.18653394281864166, "learning_rate": 6.795688019091784e-05, "loss": 0.5553, "step": 5103 }, { "epoch": 1.0492342481241648, "grad_norm": 0.17353960871696472, "learning_rate": 6.79482890169793e-05, "loss": 0.5348, "step": 5104 }, { "epoch": 1.0494398190975434, "grad_norm": 0.1659521758556366, "learning_rate": 6.79396967124858e-05, "loss": 0.5799, "step": 5105 }, { "epoch": 1.049645390070922, "grad_norm": 0.16796258091926575, "learning_rate": 6.79311032778607e-05, "loss": 0.5178, "step": 5106 }, { "epoch": 1.0498509610443005, "grad_norm": 0.16122405230998993, "learning_rate": 6.79225087135273e-05, "loss": 0.555, "step": 5107 }, { "epoch": 1.0500565320176791, "grad_norm": 0.20025917887687683, "learning_rate": 6.791391301990902e-05, "loss": 0.5649, "step": 5108 }, { "epoch": 1.0502621029910577, "grad_norm": 0.19537703692913055, "learning_rate": 6.790531619742936e-05, "loss": 0.5517, "step": 5109 }, { "epoch": 1.0504676739644363, "grad_norm": 0.18383800983428955, "learning_rate": 6.789671824651183e-05, "loss": 0.5673, "step": 5110 }, { "epoch": 1.0506732449378149, "grad_norm": 0.19807079434394836, "learning_rate": 6.788811916758002e-05, "loss": 0.5811, "step": 5111 }, { "epoch": 1.0508788159111933, "grad_norm": 0.19630371034145355, "learning_rate": 6.787951896105754e-05, "loss": 0.5306, "step": 5112 }, { "epoch": 1.0510843868845718, "grad_norm": 0.18975067138671875, "learning_rate": 6.78709176273681e-05, "loss": 0.569, "step": 5113 }, { "epoch": 1.0512899578579504, "grad_norm": 0.20035415887832642, "learning_rate": 6.786231516693547e-05, "loss": 0.5387, "step": 5114 }, { "epoch": 1.051495528831329, "grad_norm": 0.1959179788827896, "learning_rate": 6.785371158018341e-05, "loss": 0.5653, "step": 5115 }, { "epoch": 1.0517010998047076, "grad_norm": 0.16765445470809937, "learning_rate": 6.78451068675358e-05, "loss": 0.5406, "step": 5116 }, { "epoch": 1.0519066707780862, "grad_norm": 0.15998391807079315, "learning_rate": 6.783650102941656e-05, "loss": 0.5506, "step": 5117 }, { "epoch": 1.0521122417514648, "grad_norm": 0.19628630578517914, "learning_rate": 6.782789406624964e-05, "loss": 0.5581, "step": 5118 }, { "epoch": 1.0523178127248431, "grad_norm": 0.20828314125537872, "learning_rate": 6.781928597845909e-05, "loss": 0.549, "step": 5119 }, { "epoch": 1.0525233836982217, "grad_norm": 0.1985846757888794, "learning_rate": 6.781067676646896e-05, "loss": 0.5625, "step": 5120 }, { "epoch": 1.0527289546716003, "grad_norm": 0.21041736006736755, "learning_rate": 6.780206643070343e-05, "loss": 0.5387, "step": 5121 }, { "epoch": 1.052934525644979, "grad_norm": 0.20188267529010773, "learning_rate": 6.779345497158664e-05, "loss": 0.5511, "step": 5122 }, { "epoch": 1.0531400966183575, "grad_norm": 0.19628292322158813, "learning_rate": 6.778484238954287e-05, "loss": 0.5509, "step": 5123 }, { "epoch": 1.053345667591736, "grad_norm": 0.19556038081645966, "learning_rate": 6.77762286849964e-05, "loss": 0.5563, "step": 5124 }, { "epoch": 1.0535512385651147, "grad_norm": 0.18803851306438446, "learning_rate": 6.776761385837161e-05, "loss": 0.5833, "step": 5125 }, { "epoch": 1.0537568095384933, "grad_norm": 0.20179903507232666, "learning_rate": 6.77589979100929e-05, "loss": 0.5745, "step": 5126 }, { "epoch": 1.0539623805118716, "grad_norm": 0.19985097646713257, "learning_rate": 6.775038084058473e-05, "loss": 0.5741, "step": 5127 }, { "epoch": 1.0541679514852502, "grad_norm": 0.19471241533756256, "learning_rate": 6.774176265027164e-05, "loss": 0.569, "step": 5128 }, { "epoch": 1.0543735224586288, "grad_norm": 0.18633931875228882, "learning_rate": 6.77331433395782e-05, "loss": 0.5274, "step": 5129 }, { "epoch": 1.0545790934320074, "grad_norm": 0.18471461534500122, "learning_rate": 6.772452290892902e-05, "loss": 0.5643, "step": 5130 }, { "epoch": 1.054784664405386, "grad_norm": 0.16938886046409607, "learning_rate": 6.771590135874883e-05, "loss": 0.5321, "step": 5131 }, { "epoch": 1.0549902353787646, "grad_norm": 0.16139011085033417, "learning_rate": 6.770727868946237e-05, "loss": 0.5531, "step": 5132 }, { "epoch": 1.0551958063521432, "grad_norm": 0.1963978409767151, "learning_rate": 6.769865490149439e-05, "loss": 0.5727, "step": 5133 }, { "epoch": 1.0554013773255218, "grad_norm": 0.16599130630493164, "learning_rate": 6.76900299952698e-05, "loss": 0.5472, "step": 5134 }, { "epoch": 1.0556069482989001, "grad_norm": 0.16204878687858582, "learning_rate": 6.768140397121347e-05, "loss": 0.5799, "step": 5135 }, { "epoch": 1.0558125192722787, "grad_norm": 0.19888906180858612, "learning_rate": 6.767277682975037e-05, "loss": 0.571, "step": 5136 }, { "epoch": 1.0560180902456573, "grad_norm": 0.19019466638565063, "learning_rate": 6.766414857130556e-05, "loss": 0.5547, "step": 5137 }, { "epoch": 1.0562236612190359, "grad_norm": 0.19364401698112488, "learning_rate": 6.765551919630407e-05, "loss": 0.5889, "step": 5138 }, { "epoch": 1.0564292321924145, "grad_norm": 0.19653116166591644, "learning_rate": 6.764688870517104e-05, "loss": 0.5778, "step": 5139 }, { "epoch": 1.056634803165793, "grad_norm": 0.20161549746990204, "learning_rate": 6.763825709833164e-05, "loss": 0.5708, "step": 5140 }, { "epoch": 1.0568403741391716, "grad_norm": 0.19760295748710632, "learning_rate": 6.762962437621112e-05, "loss": 0.5555, "step": 5141 }, { "epoch": 1.05704594511255, "grad_norm": 0.1907760202884674, "learning_rate": 6.76209905392348e-05, "loss": 0.5496, "step": 5142 }, { "epoch": 1.0572515160859286, "grad_norm": 0.19071267545223236, "learning_rate": 6.7612355587828e-05, "loss": 0.5601, "step": 5143 }, { "epoch": 1.0574570870593072, "grad_norm": 0.1920759677886963, "learning_rate": 6.760371952241613e-05, "loss": 0.5783, "step": 5144 }, { "epoch": 1.0576626580326858, "grad_norm": 0.17088396847248077, "learning_rate": 6.759508234342465e-05, "loss": 0.5436, "step": 5145 }, { "epoch": 1.0578682290060644, "grad_norm": 0.13676489889621735, "learning_rate": 6.758644405127908e-05, "loss": 0.5261, "step": 5146 }, { "epoch": 1.058073799979443, "grad_norm": 0.16138045489788055, "learning_rate": 6.757780464640496e-05, "loss": 0.5751, "step": 5147 }, { "epoch": 1.0582793709528215, "grad_norm": 0.191030353307724, "learning_rate": 6.756916412922794e-05, "loss": 0.5597, "step": 5148 }, { "epoch": 1.0584849419262001, "grad_norm": 0.19613182544708252, "learning_rate": 6.75605225001737e-05, "loss": 0.5644, "step": 5149 }, { "epoch": 1.0586905128995785, "grad_norm": 0.1948171854019165, "learning_rate": 6.755187975966795e-05, "loss": 0.5637, "step": 5150 }, { "epoch": 1.058896083872957, "grad_norm": 0.1817820966243744, "learning_rate": 6.754323590813649e-05, "loss": 0.5389, "step": 5151 }, { "epoch": 1.0591016548463357, "grad_norm": 0.1902126520872116, "learning_rate": 6.753459094600518e-05, "loss": 0.5745, "step": 5152 }, { "epoch": 1.0593072258197143, "grad_norm": 0.19361747801303864, "learning_rate": 6.752594487369989e-05, "loss": 0.5834, "step": 5153 }, { "epoch": 1.0595127967930928, "grad_norm": 0.176842600107193, "learning_rate": 6.751729769164659e-05, "loss": 0.5306, "step": 5154 }, { "epoch": 1.0597183677664714, "grad_norm": 0.16082750260829926, "learning_rate": 6.750864940027127e-05, "loss": 0.5461, "step": 5155 }, { "epoch": 1.05992393873985, "grad_norm": 0.17407187819480896, "learning_rate": 6.75e-05, "loss": 0.5479, "step": 5156 }, { "epoch": 1.0601295097132284, "grad_norm": 0.2048111855983734, "learning_rate": 6.74913494912589e-05, "loss": 0.5636, "step": 5157 }, { "epoch": 1.060335080686607, "grad_norm": 0.19643527269363403, "learning_rate": 6.748269787447414e-05, "loss": 0.577, "step": 5158 }, { "epoch": 1.0605406516599856, "grad_norm": 0.19927412271499634, "learning_rate": 6.747404515007194e-05, "loss": 0.5753, "step": 5159 }, { "epoch": 1.0607462226333642, "grad_norm": 0.20352114737033844, "learning_rate": 6.746539131847856e-05, "loss": 0.5699, "step": 5160 }, { "epoch": 1.0609517936067427, "grad_norm": 0.19683125615119934, "learning_rate": 6.745673638012037e-05, "loss": 0.5847, "step": 5161 }, { "epoch": 1.0611573645801213, "grad_norm": 0.19472289085388184, "learning_rate": 6.744808033542373e-05, "loss": 0.5613, "step": 5162 }, { "epoch": 1.0613629355535, "grad_norm": 0.19928975403308868, "learning_rate": 6.74394231848151e-05, "loss": 0.5638, "step": 5163 }, { "epoch": 1.0615685065268785, "grad_norm": 0.2239234298467636, "learning_rate": 6.743076492872096e-05, "loss": 0.5674, "step": 5164 }, { "epoch": 1.0617740775002569, "grad_norm": 0.18867623805999756, "learning_rate": 6.742210556756789e-05, "loss": 0.5242, "step": 5165 }, { "epoch": 1.0619796484736355, "grad_norm": 0.17035862803459167, "learning_rate": 6.741344510178247e-05, "loss": 0.5613, "step": 5166 }, { "epoch": 1.062185219447014, "grad_norm": 0.20985183119773865, "learning_rate": 6.740478353179138e-05, "loss": 0.5737, "step": 5167 }, { "epoch": 1.0623907904203926, "grad_norm": 0.21184340119361877, "learning_rate": 6.739612085802131e-05, "loss": 0.5656, "step": 5168 }, { "epoch": 1.0625963613937712, "grad_norm": 0.19316667318344116, "learning_rate": 6.738745708089905e-05, "loss": 0.5726, "step": 5169 }, { "epoch": 1.0628019323671498, "grad_norm": 0.19877000153064728, "learning_rate": 6.737879220085143e-05, "loss": 0.5813, "step": 5170 }, { "epoch": 1.0630075033405284, "grad_norm": 0.20379842817783356, "learning_rate": 6.73701262183053e-05, "loss": 0.5675, "step": 5171 }, { "epoch": 1.0632130743139068, "grad_norm": 0.2046133428812027, "learning_rate": 6.736145913368762e-05, "loss": 0.5525, "step": 5172 }, { "epoch": 1.0634186452872854, "grad_norm": 0.19095589220523834, "learning_rate": 6.735279094742535e-05, "loss": 0.549, "step": 5173 }, { "epoch": 1.063624216260664, "grad_norm": 0.20165902376174927, "learning_rate": 6.734412165994556e-05, "loss": 0.5807, "step": 5174 }, { "epoch": 1.0638297872340425, "grad_norm": 0.19797958433628082, "learning_rate": 6.733545127167533e-05, "loss": 0.5532, "step": 5175 }, { "epoch": 1.0640353582074211, "grad_norm": 0.1996331512928009, "learning_rate": 6.732677978304182e-05, "loss": 0.5686, "step": 5176 }, { "epoch": 1.0642409291807997, "grad_norm": 0.19206491112709045, "learning_rate": 6.731810719447222e-05, "loss": 0.5458, "step": 5177 }, { "epoch": 1.0644465001541783, "grad_norm": 0.2006731927394867, "learning_rate": 6.730943350639379e-05, "loss": 0.5576, "step": 5178 }, { "epoch": 1.0646520711275569, "grad_norm": 0.20075003802776337, "learning_rate": 6.730075871923384e-05, "loss": 0.615, "step": 5179 }, { "epoch": 1.0648576421009353, "grad_norm": 0.1874108761548996, "learning_rate": 6.729208283341975e-05, "loss": 0.5487, "step": 5180 }, { "epoch": 1.0650632130743138, "grad_norm": 0.19633813202381134, "learning_rate": 6.728340584937892e-05, "loss": 0.57, "step": 5181 }, { "epoch": 1.0652687840476924, "grad_norm": 0.18520689010620117, "learning_rate": 6.727472776753885e-05, "loss": 0.5608, "step": 5182 }, { "epoch": 1.065474355021071, "grad_norm": 0.19115997850894928, "learning_rate": 6.726604858832704e-05, "loss": 0.5627, "step": 5183 }, { "epoch": 1.0656799259944496, "grad_norm": 0.19133557379245758, "learning_rate": 6.725736831217111e-05, "loss": 0.5502, "step": 5184 }, { "epoch": 1.0658854969678282, "grad_norm": 0.19521182775497437, "learning_rate": 6.724868693949864e-05, "loss": 0.5613, "step": 5185 }, { "epoch": 1.0660910679412068, "grad_norm": 0.19791938364505768, "learning_rate": 6.724000447073739e-05, "loss": 0.5791, "step": 5186 }, { "epoch": 1.0662966389145851, "grad_norm": 0.20099619030952454, "learning_rate": 6.723132090631505e-05, "loss": 0.5536, "step": 5187 }, { "epoch": 1.0665022098879637, "grad_norm": 0.19620858132839203, "learning_rate": 6.722263624665944e-05, "loss": 0.564, "step": 5188 }, { "epoch": 1.0667077808613423, "grad_norm": 0.19696368277072906, "learning_rate": 6.721395049219841e-05, "loss": 0.5791, "step": 5189 }, { "epoch": 1.066913351834721, "grad_norm": 0.1899513453245163, "learning_rate": 6.720526364335987e-05, "loss": 0.5405, "step": 5190 }, { "epoch": 1.0671189228080995, "grad_norm": 0.1824042797088623, "learning_rate": 6.719657570057178e-05, "loss": 0.5631, "step": 5191 }, { "epoch": 1.067324493781478, "grad_norm": 0.18800349533557892, "learning_rate": 6.718788666426216e-05, "loss": 0.5563, "step": 5192 }, { "epoch": 1.0675300647548567, "grad_norm": 0.18425163626670837, "learning_rate": 6.717919653485905e-05, "loss": 0.5475, "step": 5193 }, { "epoch": 1.0677356357282353, "grad_norm": 0.1711423397064209, "learning_rate": 6.71705053127906e-05, "loss": 0.5814, "step": 5194 }, { "epoch": 1.0679412067016136, "grad_norm": 0.2124950885772705, "learning_rate": 6.716181299848497e-05, "loss": 0.5802, "step": 5195 }, { "epoch": 1.0681467776749922, "grad_norm": 0.16117288172245026, "learning_rate": 6.715311959237042e-05, "loss": 0.5269, "step": 5196 }, { "epoch": 1.0683523486483708, "grad_norm": 0.16499152779579163, "learning_rate": 6.714442509487519e-05, "loss": 0.5588, "step": 5197 }, { "epoch": 1.0685579196217494, "grad_norm": 0.20156873762607574, "learning_rate": 6.713572950642765e-05, "loss": 0.575, "step": 5198 }, { "epoch": 1.068763490595128, "grad_norm": 0.19442300498485565, "learning_rate": 6.712703282745618e-05, "loss": 0.5727, "step": 5199 }, { "epoch": 1.0689690615685066, "grad_norm": 0.19860216975212097, "learning_rate": 6.711833505838921e-05, "loss": 0.5325, "step": 5200 }, { "epoch": 1.0691746325418852, "grad_norm": 0.1945996880531311, "learning_rate": 6.710963619965526e-05, "loss": 0.569, "step": 5201 }, { "epoch": 1.0693802035152635, "grad_norm": 0.20251908898353577, "learning_rate": 6.710093625168289e-05, "loss": 0.5884, "step": 5202 }, { "epoch": 1.0695857744886421, "grad_norm": 0.20491336286067963, "learning_rate": 6.709223521490067e-05, "loss": 0.5788, "step": 5203 }, { "epoch": 1.0697913454620207, "grad_norm": 0.19236387312412262, "learning_rate": 6.708353308973728e-05, "loss": 0.5606, "step": 5204 }, { "epoch": 1.0699969164353993, "grad_norm": 0.16769090294837952, "learning_rate": 6.707482987662144e-05, "loss": 0.5143, "step": 5205 }, { "epoch": 1.0702024874087779, "grad_norm": 0.1578342467546463, "learning_rate": 6.70661255759819e-05, "loss": 0.556, "step": 5206 }, { "epoch": 1.0704080583821565, "grad_norm": 0.19746308028697968, "learning_rate": 6.705742018824751e-05, "loss": 0.5619, "step": 5207 }, { "epoch": 1.070613629355535, "grad_norm": 0.1942613422870636, "learning_rate": 6.704871371384711e-05, "loss": 0.5738, "step": 5208 }, { "epoch": 1.0708192003289136, "grad_norm": 0.1871325820684433, "learning_rate": 6.704000615320964e-05, "loss": 0.578, "step": 5209 }, { "epoch": 1.0710247713022922, "grad_norm": 0.19533534348011017, "learning_rate": 6.703129750676409e-05, "loss": 0.574, "step": 5210 }, { "epoch": 1.0712303422756706, "grad_norm": 0.19535306096076965, "learning_rate": 6.702258777493947e-05, "loss": 0.5756, "step": 5211 }, { "epoch": 1.0714359132490492, "grad_norm": 0.18510495126247406, "learning_rate": 6.70138769581649e-05, "loss": 0.5639, "step": 5212 }, { "epoch": 1.0716414842224278, "grad_norm": 0.18865178525447845, "learning_rate": 6.70051650568695e-05, "loss": 0.5588, "step": 5213 }, { "epoch": 1.0718470551958064, "grad_norm": 0.19118379056453705, "learning_rate": 6.699645207148247e-05, "loss": 0.5695, "step": 5214 }, { "epoch": 1.072052626169185, "grad_norm": 0.19162502884864807, "learning_rate": 6.698773800243305e-05, "loss": 0.5647, "step": 5215 }, { "epoch": 1.0722581971425635, "grad_norm": 0.18810777366161346, "learning_rate": 6.697902285015056e-05, "loss": 0.5608, "step": 5216 }, { "epoch": 1.0724637681159421, "grad_norm": 0.16454185545444489, "learning_rate": 6.697030661506433e-05, "loss": 0.5383, "step": 5217 }, { "epoch": 1.0726693390893205, "grad_norm": 0.15946544706821442, "learning_rate": 6.69615892976038e-05, "loss": 0.551, "step": 5218 }, { "epoch": 1.072874910062699, "grad_norm": 0.20395736396312714, "learning_rate": 6.695287089819838e-05, "loss": 0.5643, "step": 5219 }, { "epoch": 1.0730804810360777, "grad_norm": 0.19649049639701843, "learning_rate": 6.694415141727766e-05, "loss": 0.5594, "step": 5220 }, { "epoch": 1.0732860520094563, "grad_norm": 0.18935894966125488, "learning_rate": 6.693543085527115e-05, "loss": 0.5627, "step": 5221 }, { "epoch": 1.0734916229828348, "grad_norm": 0.21237939596176147, "learning_rate": 6.69267092126085e-05, "loss": 0.5703, "step": 5222 }, { "epoch": 1.0736971939562134, "grad_norm": 0.19148610532283783, "learning_rate": 6.691798648971935e-05, "loss": 0.5535, "step": 5223 }, { "epoch": 1.073902764929592, "grad_norm": 2.203571319580078, "learning_rate": 6.690926268703345e-05, "loss": 0.6328, "step": 5224 }, { "epoch": 1.0741083359029706, "grad_norm": 0.20368382334709167, "learning_rate": 6.69005378049806e-05, "loss": 0.559, "step": 5225 }, { "epoch": 1.074313906876349, "grad_norm": 0.22243089973926544, "learning_rate": 6.68918118439906e-05, "loss": 0.5621, "step": 5226 }, { "epoch": 1.0745194778497276, "grad_norm": 0.25355663895606995, "learning_rate": 6.688308480449335e-05, "loss": 0.5876, "step": 5227 }, { "epoch": 1.0747250488231062, "grad_norm": 0.2832355201244354, "learning_rate": 6.68743566869188e-05, "loss": 0.5542, "step": 5228 }, { "epoch": 1.0749306197964847, "grad_norm": 0.1938430219888687, "learning_rate": 6.686562749169694e-05, "loss": 0.5282, "step": 5229 }, { "epoch": 1.0751361907698633, "grad_norm": 0.263157457113266, "learning_rate": 6.685689721925782e-05, "loss": 0.5783, "step": 5230 }, { "epoch": 1.075341761743242, "grad_norm": 0.2489389330148697, "learning_rate": 6.684816587003152e-05, "loss": 0.5584, "step": 5231 }, { "epoch": 1.0755473327166205, "grad_norm": 0.18948674201965332, "learning_rate": 6.683943344444821e-05, "loss": 0.5549, "step": 5232 }, { "epoch": 1.0757529036899989, "grad_norm": 0.18527735769748688, "learning_rate": 6.683069994293808e-05, "loss": 0.5533, "step": 5233 }, { "epoch": 1.0759584746633775, "grad_norm": 0.23963753879070282, "learning_rate": 6.682196536593142e-05, "loss": 0.5675, "step": 5234 }, { "epoch": 1.076164045636756, "grad_norm": 0.23618869483470917, "learning_rate": 6.681322971385852e-05, "loss": 0.581, "step": 5235 }, { "epoch": 1.0763696166101346, "grad_norm": 0.18081532418727875, "learning_rate": 6.680449298714974e-05, "loss": 0.5488, "step": 5236 }, { "epoch": 1.0765751875835132, "grad_norm": 0.17601439356803894, "learning_rate": 6.679575518623549e-05, "loss": 0.5718, "step": 5237 }, { "epoch": 1.0767807585568918, "grad_norm": 0.22532643377780914, "learning_rate": 6.678701631154627e-05, "loss": 0.5777, "step": 5238 }, { "epoch": 1.0769863295302704, "grad_norm": 0.21322833001613617, "learning_rate": 6.677827636351259e-05, "loss": 0.5803, "step": 5239 }, { "epoch": 1.077191900503649, "grad_norm": 0.19407659769058228, "learning_rate": 6.676953534256501e-05, "loss": 0.525, "step": 5240 }, { "epoch": 1.0773974714770274, "grad_norm": 0.32453683018684387, "learning_rate": 6.676079324913419e-05, "loss": 0.5812, "step": 5241 }, { "epoch": 1.077603042450406, "grad_norm": 0.2009628862142563, "learning_rate": 6.675205008365081e-05, "loss": 0.557, "step": 5242 }, { "epoch": 1.0778086134237845, "grad_norm": 0.17055638134479523, "learning_rate": 6.674330584654557e-05, "loss": 0.5503, "step": 5243 }, { "epoch": 1.0780141843971631, "grad_norm": 0.21184168756008148, "learning_rate": 6.673456053824928e-05, "loss": 0.5644, "step": 5244 }, { "epoch": 1.0782197553705417, "grad_norm": 0.20383425056934357, "learning_rate": 6.672581415919279e-05, "loss": 0.586, "step": 5245 }, { "epoch": 1.0784253263439203, "grad_norm": 0.20638933777809143, "learning_rate": 6.671706670980697e-05, "loss": 0.5577, "step": 5246 }, { "epoch": 1.0786308973172989, "grad_norm": 0.2084139883518219, "learning_rate": 6.670831819052278e-05, "loss": 0.5818, "step": 5247 }, { "epoch": 1.0788364682906773, "grad_norm": 0.1990043967962265, "learning_rate": 6.669956860177122e-05, "loss": 0.5694, "step": 5248 }, { "epoch": 1.0790420392640558, "grad_norm": 0.22096286714076996, "learning_rate": 6.669081794398334e-05, "loss": 0.5815, "step": 5249 }, { "epoch": 1.0792476102374344, "grad_norm": 0.19977039098739624, "learning_rate": 6.668206621759023e-05, "loss": 0.552, "step": 5250 }, { "epoch": 1.079453181210813, "grad_norm": 0.16734324395656586, "learning_rate": 6.667331342302308e-05, "loss": 0.5378, "step": 5251 }, { "epoch": 1.0796587521841916, "grad_norm": 0.1833125352859497, "learning_rate": 6.666455956071307e-05, "loss": 0.5605, "step": 5252 }, { "epoch": 1.0798643231575702, "grad_norm": 0.2064771205186844, "learning_rate": 6.665580463109147e-05, "loss": 0.5548, "step": 5253 }, { "epoch": 1.0800698941309488, "grad_norm": 0.19935967028141022, "learning_rate": 6.664704863458959e-05, "loss": 0.5673, "step": 5254 }, { "epoch": 1.0802754651043274, "grad_norm": 0.20248223841190338, "learning_rate": 6.66382915716388e-05, "loss": 0.5799, "step": 5255 }, { "epoch": 1.0804810360777057, "grad_norm": 0.19460590183734894, "learning_rate": 6.662953344267054e-05, "loss": 0.5549, "step": 5256 }, { "epoch": 1.0806866070510843, "grad_norm": 0.19697195291519165, "learning_rate": 6.662077424811624e-05, "loss": 0.5809, "step": 5257 }, { "epoch": 1.080892178024463, "grad_norm": 0.19642481207847595, "learning_rate": 6.661201398840747e-05, "loss": 0.5708, "step": 5258 }, { "epoch": 1.0810977489978415, "grad_norm": 0.1706124097108841, "learning_rate": 6.660325266397576e-05, "loss": 0.5569, "step": 5259 }, { "epoch": 1.08130331997122, "grad_norm": 0.16144689917564392, "learning_rate": 6.659449027525279e-05, "loss": 0.5646, "step": 5260 }, { "epoch": 1.0815088909445987, "grad_norm": 0.17022046446800232, "learning_rate": 6.658572682267019e-05, "loss": 0.5469, "step": 5261 }, { "epoch": 1.0817144619179773, "grad_norm": 0.16290414333343506, "learning_rate": 6.657696230665974e-05, "loss": 0.5779, "step": 5262 }, { "epoch": 1.0819200328913556, "grad_norm": 0.20431680977344513, "learning_rate": 6.656819672765321e-05, "loss": 0.5886, "step": 5263 }, { "epoch": 1.0821256038647342, "grad_norm": 0.19603441655635834, "learning_rate": 6.655943008608243e-05, "loss": 0.5559, "step": 5264 }, { "epoch": 1.0823311748381128, "grad_norm": 0.19341200590133667, "learning_rate": 6.65506623823793e-05, "loss": 0.5685, "step": 5265 }, { "epoch": 1.0825367458114914, "grad_norm": 0.17218126356601715, "learning_rate": 6.654189361697576e-05, "loss": 0.5443, "step": 5266 }, { "epoch": 1.08274231678487, "grad_norm": 0.13615413010120392, "learning_rate": 6.653312379030381e-05, "loss": 0.5515, "step": 5267 }, { "epoch": 1.0829478877582486, "grad_norm": 0.1653267741203308, "learning_rate": 6.652435290279549e-05, "loss": 0.5599, "step": 5268 }, { "epoch": 1.0831534587316272, "grad_norm": 0.1650351732969284, "learning_rate": 6.651558095488292e-05, "loss": 0.5362, "step": 5269 }, { "epoch": 1.0833590297050057, "grad_norm": 0.16134947538375854, "learning_rate": 6.650680794699823e-05, "loss": 0.559, "step": 5270 }, { "epoch": 1.0835646006783841, "grad_norm": 0.1984068751335144, "learning_rate": 6.649803387957362e-05, "loss": 0.5631, "step": 5271 }, { "epoch": 1.0837701716517627, "grad_norm": 0.1926686316728592, "learning_rate": 6.648925875304139e-05, "loss": 0.5864, "step": 5272 }, { "epoch": 1.0839757426251413, "grad_norm": 0.1906096339225769, "learning_rate": 6.648048256783382e-05, "loss": 0.557, "step": 5273 }, { "epoch": 1.0841813135985199, "grad_norm": 0.1856287568807602, "learning_rate": 6.647170532438327e-05, "loss": 0.5717, "step": 5274 }, { "epoch": 1.0843868845718985, "grad_norm": 0.17093409597873688, "learning_rate": 6.646292702312214e-05, "loss": 0.5314, "step": 5275 }, { "epoch": 1.084592455545277, "grad_norm": 0.17193061113357544, "learning_rate": 6.645414766448293e-05, "loss": 0.5795, "step": 5276 }, { "epoch": 1.0847980265186556, "grad_norm": 0.1909974366426468, "learning_rate": 6.644536724889814e-05, "loss": 0.5638, "step": 5277 }, { "epoch": 1.085003597492034, "grad_norm": 0.16503417491912842, "learning_rate": 6.643658577680033e-05, "loss": 0.5364, "step": 5278 }, { "epoch": 1.0852091684654126, "grad_norm": 0.12726576626300812, "learning_rate": 6.642780324862215e-05, "loss": 0.5181, "step": 5279 }, { "epoch": 1.0854147394387912, "grad_norm": 0.15936200320720673, "learning_rate": 6.641901966479623e-05, "loss": 0.5601, "step": 5280 }, { "epoch": 1.0856203104121698, "grad_norm": 0.1608133465051651, "learning_rate": 6.641023502575535e-05, "loss": 0.5125, "step": 5281 }, { "epoch": 1.0858258813855484, "grad_norm": 0.15383280813694, "learning_rate": 6.640144933193223e-05, "loss": 0.5724, "step": 5282 }, { "epoch": 1.086031452358927, "grad_norm": 0.15517185628414154, "learning_rate": 6.639266258375977e-05, "loss": 0.5275, "step": 5283 }, { "epoch": 1.0862370233323055, "grad_norm": 0.16167797148227692, "learning_rate": 6.63838747816708e-05, "loss": 0.5644, "step": 5284 }, { "epoch": 1.0864425943056841, "grad_norm": 0.1940879076719284, "learning_rate": 6.637508592609827e-05, "loss": 0.5677, "step": 5285 }, { "epoch": 1.0866481652790625, "grad_norm": 0.18758495151996613, "learning_rate": 6.636629601747515e-05, "loss": 0.5874, "step": 5286 }, { "epoch": 1.086853736252441, "grad_norm": 0.1906895488500595, "learning_rate": 6.635750505623451e-05, "loss": 0.5747, "step": 5287 }, { "epoch": 1.0870593072258197, "grad_norm": 0.18820390105247498, "learning_rate": 6.63487130428094e-05, "loss": 0.5581, "step": 5288 }, { "epoch": 1.0872648781991983, "grad_norm": 0.19802720844745636, "learning_rate": 6.633991997763299e-05, "loss": 0.5604, "step": 5289 }, { "epoch": 1.0874704491725768, "grad_norm": 0.16742005944252014, "learning_rate": 6.633112586113847e-05, "loss": 0.5223, "step": 5290 }, { "epoch": 1.0876760201459554, "grad_norm": 0.14373008906841278, "learning_rate": 6.632233069375907e-05, "loss": 0.5368, "step": 5291 }, { "epoch": 1.087881591119334, "grad_norm": 0.1611548662185669, "learning_rate": 6.63135344759281e-05, "loss": 0.5546, "step": 5292 }, { "epoch": 1.0880871620927126, "grad_norm": 0.19752389192581177, "learning_rate": 6.630473720807892e-05, "loss": 0.5597, "step": 5293 }, { "epoch": 1.088292733066091, "grad_norm": 0.19738554954528809, "learning_rate": 6.62959388906449e-05, "loss": 0.5787, "step": 5294 }, { "epoch": 1.0884983040394696, "grad_norm": 0.1929868459701538, "learning_rate": 6.628713952405951e-05, "loss": 0.5614, "step": 5295 }, { "epoch": 1.0887038750128482, "grad_norm": 0.2048940360546112, "learning_rate": 6.627833910875626e-05, "loss": 0.5715, "step": 5296 }, { "epoch": 1.0889094459862267, "grad_norm": 0.19857628643512726, "learning_rate": 6.62695376451687e-05, "loss": 0.5694, "step": 5297 }, { "epoch": 1.0891150169596053, "grad_norm": 0.19346579909324646, "learning_rate": 6.626073513373043e-05, "loss": 0.5612, "step": 5298 }, { "epoch": 1.089320587932984, "grad_norm": 0.1919691264629364, "learning_rate": 6.62519315748751e-05, "loss": 0.5584, "step": 5299 }, { "epoch": 1.0895261589063625, "grad_norm": 0.1884642243385315, "learning_rate": 6.624312696903644e-05, "loss": 0.5576, "step": 5300 }, { "epoch": 1.089731729879741, "grad_norm": 0.1730055809020996, "learning_rate": 6.623432131664822e-05, "loss": 0.5565, "step": 5301 }, { "epoch": 1.0899373008531195, "grad_norm": 0.19262228906154633, "learning_rate": 6.62255146181442e-05, "loss": 0.5645, "step": 5302 }, { "epoch": 1.090142871826498, "grad_norm": 0.19675207138061523, "learning_rate": 6.62167068739583e-05, "loss": 0.5845, "step": 5303 }, { "epoch": 1.0903484427998766, "grad_norm": 0.1958772838115692, "learning_rate": 6.620789808452443e-05, "loss": 0.5653, "step": 5304 }, { "epoch": 1.0905540137732552, "grad_norm": 0.18935401737689972, "learning_rate": 6.619908825027655e-05, "loss": 0.5523, "step": 5305 }, { "epoch": 1.0907595847466338, "grad_norm": 0.19371245801448822, "learning_rate": 6.619027737164865e-05, "loss": 0.551, "step": 5306 }, { "epoch": 1.0909651557200124, "grad_norm": 0.19392549991607666, "learning_rate": 6.618146544907485e-05, "loss": 0.5731, "step": 5307 }, { "epoch": 1.091170726693391, "grad_norm": 0.19857439398765564, "learning_rate": 6.617265248298926e-05, "loss": 0.5364, "step": 5308 }, { "epoch": 1.0913762976667694, "grad_norm": 0.2129819244146347, "learning_rate": 6.616383847382601e-05, "loss": 0.5635, "step": 5309 }, { "epoch": 1.091581868640148, "grad_norm": 0.18669261038303375, "learning_rate": 6.615502342201938e-05, "loss": 0.5533, "step": 5310 }, { "epoch": 1.0917874396135265, "grad_norm": 0.20277494192123413, "learning_rate": 6.614620732800363e-05, "loss": 0.5874, "step": 5311 }, { "epoch": 1.0919930105869051, "grad_norm": 0.19775375723838806, "learning_rate": 6.613739019221306e-05, "loss": 0.5737, "step": 5312 }, { "epoch": 1.0921985815602837, "grad_norm": 0.19743028283119202, "learning_rate": 6.612857201508208e-05, "loss": 0.5853, "step": 5313 }, { "epoch": 1.0924041525336623, "grad_norm": 0.18763835728168488, "learning_rate": 6.611975279704511e-05, "loss": 0.5728, "step": 5314 }, { "epoch": 1.0926097235070409, "grad_norm": 0.19164253771305084, "learning_rate": 6.611093253853664e-05, "loss": 0.5734, "step": 5315 }, { "epoch": 1.0928152944804195, "grad_norm": 0.19013293087482452, "learning_rate": 6.610211123999119e-05, "loss": 0.5647, "step": 5316 }, { "epoch": 1.0930208654537978, "grad_norm": 0.19846196472644806, "learning_rate": 6.609328890184334e-05, "loss": 0.5613, "step": 5317 }, { "epoch": 1.0932264364271764, "grad_norm": 0.18824782967567444, "learning_rate": 6.608446552452777e-05, "loss": 0.5496, "step": 5318 }, { "epoch": 1.093432007400555, "grad_norm": 0.19030706584453583, "learning_rate": 6.60756411084791e-05, "loss": 0.5488, "step": 5319 }, { "epoch": 1.0936375783739336, "grad_norm": 0.16634370386600494, "learning_rate": 6.606681565413211e-05, "loss": 0.5618, "step": 5320 }, { "epoch": 1.0938431493473122, "grad_norm": 0.20281003415584564, "learning_rate": 6.605798916192157e-05, "loss": 0.5718, "step": 5321 }, { "epoch": 1.0940487203206908, "grad_norm": 0.16052670776844025, "learning_rate": 6.604916163228235e-05, "loss": 0.5356, "step": 5322 }, { "epoch": 1.0942542912940694, "grad_norm": 0.19108809530735016, "learning_rate": 6.60403330656493e-05, "loss": 0.5525, "step": 5323 }, { "epoch": 1.0944598622674477, "grad_norm": 0.20535770058631897, "learning_rate": 6.603150346245738e-05, "loss": 0.5542, "step": 5324 }, { "epoch": 1.0946654332408263, "grad_norm": 0.20727907121181488, "learning_rate": 6.60226728231416e-05, "loss": 0.583, "step": 5325 }, { "epoch": 1.094871004214205, "grad_norm": 0.19222858548164368, "learning_rate": 6.601384114813699e-05, "loss": 0.5626, "step": 5326 }, { "epoch": 1.0950765751875835, "grad_norm": 0.19487687945365906, "learning_rate": 6.600500843787864e-05, "loss": 0.5649, "step": 5327 }, { "epoch": 1.095282146160962, "grad_norm": 0.1915174126625061, "learning_rate": 6.599617469280171e-05, "loss": 0.5561, "step": 5328 }, { "epoch": 1.0954877171343407, "grad_norm": 0.17732886970043182, "learning_rate": 6.598733991334137e-05, "loss": 0.5102, "step": 5329 }, { "epoch": 1.0956932881077193, "grad_norm": 0.17406459152698517, "learning_rate": 6.59785040999329e-05, "loss": 0.5818, "step": 5330 }, { "epoch": 1.0958988590810979, "grad_norm": 0.19681067764759064, "learning_rate": 6.596966725301158e-05, "loss": 0.5573, "step": 5331 }, { "epoch": 1.0961044300544762, "grad_norm": 0.2060333490371704, "learning_rate": 6.596082937301277e-05, "loss": 0.5757, "step": 5332 }, { "epoch": 1.0963100010278548, "grad_norm": 0.1740088164806366, "learning_rate": 6.595199046037187e-05, "loss": 0.5204, "step": 5333 }, { "epoch": 1.0965155720012334, "grad_norm": 0.15425589680671692, "learning_rate": 6.594315051552434e-05, "loss": 0.549, "step": 5334 }, { "epoch": 1.096721142974612, "grad_norm": 0.20004071295261383, "learning_rate": 6.593430953890564e-05, "loss": 0.5745, "step": 5335 }, { "epoch": 1.0969267139479906, "grad_norm": 0.21765153110027313, "learning_rate": 6.592546753095138e-05, "loss": 0.5779, "step": 5336 }, { "epoch": 1.0971322849213692, "grad_norm": 0.2069845348596573, "learning_rate": 6.591662449209714e-05, "loss": 0.5681, "step": 5337 }, { "epoch": 1.0973378558947477, "grad_norm": 0.16083793342113495, "learning_rate": 6.590778042277856e-05, "loss": 0.522, "step": 5338 }, { "epoch": 1.0975434268681261, "grad_norm": 0.13454684615135193, "learning_rate": 6.589893532343137e-05, "loss": 0.5234, "step": 5339 }, { "epoch": 1.0977489978415047, "grad_norm": 0.16017797589302063, "learning_rate": 6.589008919449132e-05, "loss": 0.5456, "step": 5340 }, { "epoch": 1.0979545688148833, "grad_norm": 0.19706310331821442, "learning_rate": 6.588124203639421e-05, "loss": 0.5598, "step": 5341 }, { "epoch": 1.0981601397882619, "grad_norm": 0.19830232858657837, "learning_rate": 6.587239384957593e-05, "loss": 0.5587, "step": 5342 }, { "epoch": 1.0983657107616405, "grad_norm": 0.19337981939315796, "learning_rate": 6.586354463447233e-05, "loss": 0.5635, "step": 5343 }, { "epoch": 1.098571281735019, "grad_norm": 0.19587767124176025, "learning_rate": 6.585469439151942e-05, "loss": 0.5626, "step": 5344 }, { "epoch": 1.0987768527083976, "grad_norm": 0.20316268503665924, "learning_rate": 6.584584312115318e-05, "loss": 0.5743, "step": 5345 }, { "epoch": 1.0989824236817762, "grad_norm": 0.19595171511173248, "learning_rate": 6.583699082380969e-05, "loss": 0.5579, "step": 5346 }, { "epoch": 1.0991879946551546, "grad_norm": 0.18075229227542877, "learning_rate": 6.582813749992504e-05, "loss": 0.5336, "step": 5347 }, { "epoch": 1.0993935656285332, "grad_norm": 0.1714819073677063, "learning_rate": 6.581928314993542e-05, "loss": 0.5727, "step": 5348 }, { "epoch": 1.0995991366019118, "grad_norm": 0.2072882503271103, "learning_rate": 6.581042777427703e-05, "loss": 0.5859, "step": 5349 }, { "epoch": 1.0998047075752904, "grad_norm": 0.19539666175842285, "learning_rate": 6.580157137338613e-05, "loss": 0.5764, "step": 5350 }, { "epoch": 1.100010278548669, "grad_norm": 0.20228314399719238, "learning_rate": 6.579271394769901e-05, "loss": 0.5831, "step": 5351 }, { "epoch": 1.1002158495220475, "grad_norm": 0.16698522865772247, "learning_rate": 6.578385549765209e-05, "loss": 0.5292, "step": 5352 }, { "epoch": 1.1004214204954261, "grad_norm": 0.15886962413787842, "learning_rate": 6.577499602368176e-05, "loss": 0.5586, "step": 5353 }, { "epoch": 1.1006269914688045, "grad_norm": 0.16973358392715454, "learning_rate": 6.576613552622443e-05, "loss": 0.5373, "step": 5354 }, { "epoch": 1.100832562442183, "grad_norm": 0.16206781566143036, "learning_rate": 6.575727400571672e-05, "loss": 0.5407, "step": 5355 }, { "epoch": 1.1010381334155617, "grad_norm": 0.19393891096115112, "learning_rate": 6.57484114625951e-05, "loss": 0.5683, "step": 5356 }, { "epoch": 1.1012437043889403, "grad_norm": 0.19983628392219543, "learning_rate": 6.573954789729625e-05, "loss": 0.5771, "step": 5357 }, { "epoch": 1.1014492753623188, "grad_norm": 0.19126926362514496, "learning_rate": 6.573068331025679e-05, "loss": 0.559, "step": 5358 }, { "epoch": 1.1016548463356974, "grad_norm": 0.19822482764720917, "learning_rate": 6.572181770191347e-05, "loss": 0.5587, "step": 5359 }, { "epoch": 1.101860417309076, "grad_norm": 0.19558537006378174, "learning_rate": 6.571295107270304e-05, "loss": 0.562, "step": 5360 }, { "epoch": 1.1020659882824546, "grad_norm": 0.1923174262046814, "learning_rate": 6.570408342306233e-05, "loss": 0.545, "step": 5361 }, { "epoch": 1.102271559255833, "grad_norm": 0.19644415378570557, "learning_rate": 6.569521475342819e-05, "loss": 0.5765, "step": 5362 }, { "epoch": 1.1024771302292116, "grad_norm": 0.16399532556533813, "learning_rate": 6.568634506423757e-05, "loss": 0.5231, "step": 5363 }, { "epoch": 1.1026827012025902, "grad_norm": 0.13997915387153625, "learning_rate": 6.567747435592738e-05, "loss": 0.5166, "step": 5364 }, { "epoch": 1.1028882721759687, "grad_norm": 0.16803216934204102, "learning_rate": 6.56686026289347e-05, "loss": 0.5639, "step": 5365 }, { "epoch": 1.1030938431493473, "grad_norm": 0.16553597152233124, "learning_rate": 6.565972988369658e-05, "loss": 0.5339, "step": 5366 }, { "epoch": 1.103299414122726, "grad_norm": 0.16284188628196716, "learning_rate": 6.565085612065012e-05, "loss": 0.5559, "step": 5367 }, { "epoch": 1.1035049850961045, "grad_norm": 0.20028123259544373, "learning_rate": 6.56419813402325e-05, "loss": 0.5803, "step": 5368 }, { "epoch": 1.1037105560694829, "grad_norm": 0.1928570717573166, "learning_rate": 6.563310554288094e-05, "loss": 0.5508, "step": 5369 }, { "epoch": 1.1039161270428615, "grad_norm": 0.1684267520904541, "learning_rate": 6.562422872903271e-05, "loss": 0.5431, "step": 5370 }, { "epoch": 1.10412169801624, "grad_norm": 0.13885952532291412, "learning_rate": 6.561535089912512e-05, "loss": 0.5324, "step": 5371 }, { "epoch": 1.1043272689896186, "grad_norm": 0.4177161455154419, "learning_rate": 6.560647205359556e-05, "loss": 0.5474, "step": 5372 }, { "epoch": 1.1045328399629972, "grad_norm": 0.201374813914299, "learning_rate": 6.559759219288145e-05, "loss": 0.5477, "step": 5373 }, { "epoch": 1.1047384109363758, "grad_norm": 0.19698698818683624, "learning_rate": 6.558871131742022e-05, "loss": 0.5579, "step": 5374 }, { "epoch": 1.1049439819097544, "grad_norm": 0.19668418169021606, "learning_rate": 6.557982942764941e-05, "loss": 0.592, "step": 5375 }, { "epoch": 1.105149552883133, "grad_norm": 0.18167072534561157, "learning_rate": 6.557094652400662e-05, "loss": 0.5506, "step": 5376 }, { "epoch": 1.1053551238565114, "grad_norm": 0.15802860260009766, "learning_rate": 6.556206260692943e-05, "loss": 0.5303, "step": 5377 }, { "epoch": 1.10556069482989, "grad_norm": 0.1602732092142105, "learning_rate": 6.55531776768555e-05, "loss": 0.5617, "step": 5378 }, { "epoch": 1.1057662658032685, "grad_norm": 0.20059829950332642, "learning_rate": 6.55442917342226e-05, "loss": 0.5598, "step": 5379 }, { "epoch": 1.1059718367766471, "grad_norm": 0.19668720662593842, "learning_rate": 6.553540477946846e-05, "loss": 0.5747, "step": 5380 }, { "epoch": 1.1061774077500257, "grad_norm": 0.19892635941505432, "learning_rate": 6.552651681303091e-05, "loss": 0.5767, "step": 5381 }, { "epoch": 1.1063829787234043, "grad_norm": 0.2011987864971161, "learning_rate": 6.551762783534783e-05, "loss": 0.5782, "step": 5382 }, { "epoch": 1.1065885496967829, "grad_norm": 0.18638330698013306, "learning_rate": 6.550873784685711e-05, "loss": 0.5516, "step": 5383 }, { "epoch": 1.1067941206701615, "grad_norm": 0.1961633563041687, "learning_rate": 6.549984684799675e-05, "loss": 0.5462, "step": 5384 }, { "epoch": 1.1069996916435398, "grad_norm": 0.18275189399719238, "learning_rate": 6.549095483920473e-05, "loss": 0.5296, "step": 5385 }, { "epoch": 1.1072052626169184, "grad_norm": 0.16657038033008575, "learning_rate": 6.548206182091915e-05, "loss": 0.5184, "step": 5386 }, { "epoch": 1.107410833590297, "grad_norm": 0.16570012271404266, "learning_rate": 6.547316779357812e-05, "loss": 0.5533, "step": 5387 }, { "epoch": 1.1076164045636756, "grad_norm": 0.21582432091236115, "learning_rate": 6.546427275761979e-05, "loss": 0.5526, "step": 5388 }, { "epoch": 1.1078219755370542, "grad_norm": 0.19760467112064362, "learning_rate": 6.54553767134824e-05, "loss": 0.5558, "step": 5389 }, { "epoch": 1.1080275465104328, "grad_norm": 0.19710463285446167, "learning_rate": 6.544647966160421e-05, "loss": 0.5413, "step": 5390 }, { "epoch": 1.1082331174838114, "grad_norm": 0.195608451962471, "learning_rate": 6.543758160242353e-05, "loss": 0.581, "step": 5391 }, { "epoch": 1.10843868845719, "grad_norm": 0.1914118230342865, "learning_rate": 6.542868253637873e-05, "loss": 0.5282, "step": 5392 }, { "epoch": 1.1086442594305683, "grad_norm": 0.16971172392368317, "learning_rate": 6.541978246390823e-05, "loss": 0.5427, "step": 5393 }, { "epoch": 1.108849830403947, "grad_norm": 0.19938012957572937, "learning_rate": 6.541088138545049e-05, "loss": 0.5378, "step": 5394 }, { "epoch": 1.1090554013773255, "grad_norm": 0.2031160593032837, "learning_rate": 6.540197930144403e-05, "loss": 0.5679, "step": 5395 }, { "epoch": 1.109260972350704, "grad_norm": 0.19984202086925507, "learning_rate": 6.53930762123274e-05, "loss": 0.5565, "step": 5396 }, { "epoch": 1.1094665433240827, "grad_norm": 0.17272289097309113, "learning_rate": 6.538417211853923e-05, "loss": 0.5411, "step": 5397 }, { "epoch": 1.1096721142974613, "grad_norm": 0.17256368696689606, "learning_rate": 6.537526702051815e-05, "loss": 0.5649, "step": 5398 }, { "epoch": 1.1098776852708399, "grad_norm": 0.1994207799434662, "learning_rate": 6.536636091870292e-05, "loss": 0.5794, "step": 5399 }, { "epoch": 1.1100832562442182, "grad_norm": 0.18973985314369202, "learning_rate": 6.535745381353226e-05, "loss": 0.5726, "step": 5400 }, { "epoch": 1.1102888272175968, "grad_norm": 0.18999481201171875, "learning_rate": 6.534854570544502e-05, "loss": 0.582, "step": 5401 }, { "epoch": 1.1104943981909754, "grad_norm": 0.19933466613292694, "learning_rate": 6.533963659488005e-05, "loss": 0.5795, "step": 5402 }, { "epoch": 1.110699969164354, "grad_norm": 0.16730111837387085, "learning_rate": 6.533072648227623e-05, "loss": 0.5339, "step": 5403 }, { "epoch": 1.1109055401377326, "grad_norm": 0.13518232107162476, "learning_rate": 6.532181536807256e-05, "loss": 0.5358, "step": 5404 }, { "epoch": 1.1111111111111112, "grad_norm": 0.13433937728405, "learning_rate": 6.531290325270802e-05, "loss": 0.5316, "step": 5405 }, { "epoch": 1.1113166820844897, "grad_norm": 0.16764889657497406, "learning_rate": 6.530399013662168e-05, "loss": 0.5494, "step": 5406 }, { "epoch": 1.1115222530578683, "grad_norm": 0.20979219675064087, "learning_rate": 6.529507602025265e-05, "loss": 0.5727, "step": 5407 }, { "epoch": 1.1117278240312467, "grad_norm": 0.196084126830101, "learning_rate": 6.528616090404008e-05, "loss": 0.5542, "step": 5408 }, { "epoch": 1.1119333950046253, "grad_norm": 0.19469597935676575, "learning_rate": 6.527724478842318e-05, "loss": 0.5717, "step": 5409 }, { "epoch": 1.1121389659780039, "grad_norm": 0.19987237453460693, "learning_rate": 6.526832767384121e-05, "loss": 0.5782, "step": 5410 }, { "epoch": 1.1123445369513825, "grad_norm": 0.20070701837539673, "learning_rate": 6.525940956073347e-05, "loss": 0.5578, "step": 5411 }, { "epoch": 1.112550107924761, "grad_norm": 0.20500093698501587, "learning_rate": 6.52504904495393e-05, "loss": 0.566, "step": 5412 }, { "epoch": 1.1127556788981396, "grad_norm": 0.17960810661315918, "learning_rate": 6.524157034069813e-05, "loss": 0.5331, "step": 5413 }, { "epoch": 1.1129612498715182, "grad_norm": 0.17575471103191376, "learning_rate": 6.523264923464939e-05, "loss": 0.5575, "step": 5414 }, { "epoch": 1.1131668208448966, "grad_norm": 0.2222844958305359, "learning_rate": 6.522372713183259e-05, "loss": 0.5928, "step": 5415 }, { "epoch": 1.1133723918182752, "grad_norm": 0.19698132574558258, "learning_rate": 6.521480403268727e-05, "loss": 0.5484, "step": 5416 }, { "epoch": 1.1135779627916538, "grad_norm": 0.16682282090187073, "learning_rate": 6.520587993765305e-05, "loss": 0.5474, "step": 5417 }, { "epoch": 1.1137835337650324, "grad_norm": 0.5412604808807373, "learning_rate": 6.519695484716958e-05, "loss": 0.5692, "step": 5418 }, { "epoch": 1.113989104738411, "grad_norm": 0.1983635425567627, "learning_rate": 6.518802876167654e-05, "loss": 0.5231, "step": 5419 }, { "epoch": 1.1141946757117895, "grad_norm": 0.1765107810497284, "learning_rate": 6.517910168161367e-05, "loss": 0.5307, "step": 5420 }, { "epoch": 1.1144002466851681, "grad_norm": 0.17099499702453613, "learning_rate": 6.517017360742077e-05, "loss": 0.5787, "step": 5421 }, { "epoch": 1.1146058176585467, "grad_norm": 0.174418643116951, "learning_rate": 6.51612445395377e-05, "loss": 0.524, "step": 5422 }, { "epoch": 1.114811388631925, "grad_norm": 0.1620262712240219, "learning_rate": 6.515231447840435e-05, "loss": 0.5454, "step": 5423 }, { "epoch": 1.1150169596053037, "grad_norm": 0.20404332876205444, "learning_rate": 6.514338342446066e-05, "loss": 0.5735, "step": 5424 }, { "epoch": 1.1152225305786823, "grad_norm": 0.19146005809307098, "learning_rate": 6.513445137814661e-05, "loss": 0.5627, "step": 5425 }, { "epoch": 1.1154281015520608, "grad_norm": 0.1799180954694748, "learning_rate": 6.512551833990226e-05, "loss": 0.5394, "step": 5426 }, { "epoch": 1.1156336725254394, "grad_norm": 0.14841869473457336, "learning_rate": 6.511658431016768e-05, "loss": 0.5174, "step": 5427 }, { "epoch": 1.115839243498818, "grad_norm": 0.1631341278553009, "learning_rate": 6.510764928938301e-05, "loss": 0.5401, "step": 5428 }, { "epoch": 1.1160448144721966, "grad_norm": 0.202928826212883, "learning_rate": 6.509871327798846e-05, "loss": 0.5576, "step": 5429 }, { "epoch": 1.116250385445575, "grad_norm": 0.20302633941173553, "learning_rate": 6.508977627642423e-05, "loss": 0.5684, "step": 5430 }, { "epoch": 1.1164559564189536, "grad_norm": 0.2004452496767044, "learning_rate": 6.508083828513062e-05, "loss": 0.5695, "step": 5431 }, { "epoch": 1.1166615273923322, "grad_norm": 0.21618925034999847, "learning_rate": 6.507189930454797e-05, "loss": 0.5447, "step": 5432 }, { "epoch": 1.1168670983657107, "grad_norm": 0.41041454672813416, "learning_rate": 6.506295933511667e-05, "loss": 0.5416, "step": 5433 }, { "epoch": 1.1170726693390893, "grad_norm": 0.17429187893867493, "learning_rate": 6.505401837727712e-05, "loss": 0.5784, "step": 5434 }, { "epoch": 1.117278240312468, "grad_norm": 0.20878252387046814, "learning_rate": 6.504507643146983e-05, "loss": 0.5594, "step": 5435 }, { "epoch": 1.1174838112858465, "grad_norm": 0.21358852088451385, "learning_rate": 6.503613349813532e-05, "loss": 0.5902, "step": 5436 }, { "epoch": 1.117689382259225, "grad_norm": 0.1675240397453308, "learning_rate": 6.502718957771415e-05, "loss": 0.5253, "step": 5437 }, { "epoch": 1.1178949532326035, "grad_norm": 0.1364658623933792, "learning_rate": 6.501824467064695e-05, "loss": 0.5097, "step": 5438 }, { "epoch": 1.118100524205982, "grad_norm": 0.170567587018013, "learning_rate": 6.500929877737442e-05, "loss": 0.5704, "step": 5439 }, { "epoch": 1.1183060951793606, "grad_norm": 0.2064054310321808, "learning_rate": 6.500035189833725e-05, "loss": 0.5945, "step": 5440 }, { "epoch": 1.1185116661527392, "grad_norm": 0.19481217861175537, "learning_rate": 6.499140403397623e-05, "loss": 0.5454, "step": 5441 }, { "epoch": 1.1187172371261178, "grad_norm": 0.19907177984714508, "learning_rate": 6.498245518473216e-05, "loss": 0.5479, "step": 5442 }, { "epoch": 1.1189228080994964, "grad_norm": 0.20047436654567719, "learning_rate": 6.497350535104592e-05, "loss": 0.5321, "step": 5443 }, { "epoch": 1.119128379072875, "grad_norm": 0.19042466580867767, "learning_rate": 6.496455453335842e-05, "loss": 0.5252, "step": 5444 }, { "epoch": 1.1193339500462534, "grad_norm": 0.17058107256889343, "learning_rate": 6.495560273211066e-05, "loss": 0.5588, "step": 5445 }, { "epoch": 1.119539521019632, "grad_norm": 0.21633589267730713, "learning_rate": 6.494664994774363e-05, "loss": 0.5613, "step": 5446 }, { "epoch": 1.1197450919930105, "grad_norm": 0.18286935985088348, "learning_rate": 6.493769618069835e-05, "loss": 0.5415, "step": 5447 }, { "epoch": 1.1199506629663891, "grad_norm": 0.17194852232933044, "learning_rate": 6.492874143141599e-05, "loss": 0.5713, "step": 5448 }, { "epoch": 1.1201562339397677, "grad_norm": 0.1954166442155838, "learning_rate": 6.49197857003377e-05, "loss": 0.5635, "step": 5449 }, { "epoch": 1.1203618049131463, "grad_norm": 0.22501884400844574, "learning_rate": 6.491082898790465e-05, "loss": 0.5615, "step": 5450 }, { "epoch": 1.1205673758865249, "grad_norm": 0.19493956863880157, "learning_rate": 6.490187129455813e-05, "loss": 0.5409, "step": 5451 }, { "epoch": 1.1207729468599035, "grad_norm": 0.19634434580802917, "learning_rate": 6.489291262073942e-05, "loss": 0.5698, "step": 5452 }, { "epoch": 1.1209785178332818, "grad_norm": 0.22804930806159973, "learning_rate": 6.48839529668899e-05, "loss": 0.5685, "step": 5453 }, { "epoch": 1.1211840888066604, "grad_norm": 0.18841257691383362, "learning_rate": 6.487499233345094e-05, "loss": 0.5362, "step": 5454 }, { "epoch": 1.121389659780039, "grad_norm": 0.16956526041030884, "learning_rate": 6.4866030720864e-05, "loss": 0.5396, "step": 5455 }, { "epoch": 1.1215952307534176, "grad_norm": 0.1671314686536789, "learning_rate": 6.48570681295706e-05, "loss": 0.5741, "step": 5456 }, { "epoch": 1.1218008017267962, "grad_norm": 0.16687725484371185, "learning_rate": 6.484810456001226e-05, "loss": 0.5651, "step": 5457 }, { "epoch": 1.1220063727001748, "grad_norm": 0.16390031576156616, "learning_rate": 6.483914001263058e-05, "loss": 0.5638, "step": 5458 }, { "epoch": 1.1222119436735534, "grad_norm": 0.16974018514156342, "learning_rate": 6.483017448786719e-05, "loss": 0.5198, "step": 5459 }, { "epoch": 1.1224175146469317, "grad_norm": 0.17362362146377563, "learning_rate": 6.48212079861638e-05, "loss": 0.5437, "step": 5460 }, { "epoch": 1.1226230856203103, "grad_norm": 0.19400741159915924, "learning_rate": 6.481224050796213e-05, "loss": 0.5481, "step": 5461 }, { "epoch": 1.122828656593689, "grad_norm": 0.1908549964427948, "learning_rate": 6.480327205370397e-05, "loss": 0.5593, "step": 5462 }, { "epoch": 1.1230342275670675, "grad_norm": 0.1656675636768341, "learning_rate": 6.479430262383116e-05, "loss": 0.5369, "step": 5463 }, { "epoch": 1.123239798540446, "grad_norm": 0.16304363310337067, "learning_rate": 6.478533221878556e-05, "loss": 0.5697, "step": 5464 }, { "epoch": 1.1234453695138247, "grad_norm": 0.19559811055660248, "learning_rate": 6.477636083900914e-05, "loss": 0.5856, "step": 5465 }, { "epoch": 1.1236509404872033, "grad_norm": 0.17000918090343475, "learning_rate": 6.476738848494385e-05, "loss": 0.5545, "step": 5466 }, { "epoch": 1.1238565114605819, "grad_norm": 0.1553100198507309, "learning_rate": 6.475841515703172e-05, "loss": 0.5531, "step": 5467 }, { "epoch": 1.1240620824339604, "grad_norm": 0.1977023035287857, "learning_rate": 6.474944085571482e-05, "loss": 0.5735, "step": 5468 }, { "epoch": 1.1242676534073388, "grad_norm": 0.1898386925458908, "learning_rate": 6.47404655814353e-05, "loss": 0.5487, "step": 5469 }, { "epoch": 1.1244732243807174, "grad_norm": 0.18860745429992676, "learning_rate": 6.473148933463529e-05, "loss": 0.5634, "step": 5470 }, { "epoch": 1.124678795354096, "grad_norm": 0.1715293824672699, "learning_rate": 6.472251211575704e-05, "loss": 0.546, "step": 5471 }, { "epoch": 1.1248843663274746, "grad_norm": 0.13662804663181305, "learning_rate": 6.471353392524277e-05, "loss": 0.5186, "step": 5472 }, { "epoch": 1.1250899373008532, "grad_norm": 0.16437150537967682, "learning_rate": 6.470455476353486e-05, "loss": 0.5628, "step": 5473 }, { "epoch": 1.1252955082742317, "grad_norm": 0.20408563315868378, "learning_rate": 6.469557463107562e-05, "loss": 0.5723, "step": 5474 }, { "epoch": 1.1255010792476101, "grad_norm": 0.19299282133579254, "learning_rate": 6.468659352830746e-05, "loss": 0.5923, "step": 5475 }, { "epoch": 1.1257066502209887, "grad_norm": 0.18226416409015656, "learning_rate": 6.467761145567286e-05, "loss": 0.5624, "step": 5476 }, { "epoch": 1.1259122211943673, "grad_norm": 0.18840613961219788, "learning_rate": 6.466862841361432e-05, "loss": 0.5697, "step": 5477 }, { "epoch": 1.1261177921677459, "grad_norm": 0.1877157837152481, "learning_rate": 6.465964440257438e-05, "loss": 0.5625, "step": 5478 }, { "epoch": 1.1263233631411245, "grad_norm": 0.25759997963905334, "learning_rate": 6.465065942299567e-05, "loss": 0.5425, "step": 5479 }, { "epoch": 1.126528934114503, "grad_norm": 0.19235903024673462, "learning_rate": 6.46416734753208e-05, "loss": 0.5562, "step": 5480 }, { "epoch": 1.1267345050878816, "grad_norm": 0.20213893055915833, "learning_rate": 6.46326865599925e-05, "loss": 0.5467, "step": 5481 }, { "epoch": 1.1269400760612602, "grad_norm": 0.19602036476135254, "learning_rate": 6.462369867745348e-05, "loss": 0.5814, "step": 5482 }, { "epoch": 1.1271456470346388, "grad_norm": 0.19586962461471558, "learning_rate": 6.461470982814657e-05, "loss": 0.5604, "step": 5483 }, { "epoch": 1.1273512180080172, "grad_norm": 0.18865470588207245, "learning_rate": 6.460572001251456e-05, "loss": 0.5345, "step": 5484 }, { "epoch": 1.1275567889813958, "grad_norm": 0.19333775341510773, "learning_rate": 6.459672923100036e-05, "loss": 0.572, "step": 5485 }, { "epoch": 1.1277623599547744, "grad_norm": 0.1982879638671875, "learning_rate": 6.458773748404693e-05, "loss": 0.593, "step": 5486 }, { "epoch": 1.127967930928153, "grad_norm": 0.19559934735298157, "learning_rate": 6.457874477209722e-05, "loss": 0.5625, "step": 5487 }, { "epoch": 1.1281735019015315, "grad_norm": 0.190285325050354, "learning_rate": 6.456975109559425e-05, "loss": 0.5579, "step": 5488 }, { "epoch": 1.1283790728749101, "grad_norm": 0.1935376673936844, "learning_rate": 6.456075645498113e-05, "loss": 0.5611, "step": 5489 }, { "epoch": 1.1285846438482887, "grad_norm": 0.1908402442932129, "learning_rate": 6.455176085070095e-05, "loss": 0.5556, "step": 5490 }, { "epoch": 1.128790214821667, "grad_norm": 0.1894407868385315, "learning_rate": 6.45427642831969e-05, "loss": 0.5655, "step": 5491 }, { "epoch": 1.1289957857950457, "grad_norm": 0.16991651058197021, "learning_rate": 6.453376675291221e-05, "loss": 0.5269, "step": 5492 }, { "epoch": 1.1292013567684243, "grad_norm": 0.14893554151058197, "learning_rate": 6.452476826029012e-05, "loss": 0.5192, "step": 5493 }, { "epoch": 1.1294069277418028, "grad_norm": 0.15781262516975403, "learning_rate": 6.451576880577397e-05, "loss": 0.5827, "step": 5494 }, { "epoch": 1.1296124987151814, "grad_norm": 0.16692712903022766, "learning_rate": 6.45067683898071e-05, "loss": 0.5338, "step": 5495 }, { "epoch": 1.12981806968856, "grad_norm": 0.18039274215698242, "learning_rate": 6.449776701283292e-05, "loss": 0.5598, "step": 5496 }, { "epoch": 1.1300236406619386, "grad_norm": 0.20324920117855072, "learning_rate": 6.448876467529488e-05, "loss": 0.5711, "step": 5497 }, { "epoch": 1.1302292116353172, "grad_norm": 0.19356949627399445, "learning_rate": 6.447976137763652e-05, "loss": 0.5498, "step": 5498 }, { "epoch": 1.1304347826086956, "grad_norm": 0.19591811299324036, "learning_rate": 6.447075712030135e-05, "loss": 0.5585, "step": 5499 }, { "epoch": 1.1306403535820742, "grad_norm": 0.18893134593963623, "learning_rate": 6.4461751903733e-05, "loss": 0.5425, "step": 5500 }, { "epoch": 1.1308459245554527, "grad_norm": 0.1979568600654602, "learning_rate": 6.445274572837509e-05, "loss": 0.5395, "step": 5501 }, { "epoch": 1.1310514955288313, "grad_norm": 0.20097365975379944, "learning_rate": 6.444373859467131e-05, "loss": 0.5571, "step": 5502 }, { "epoch": 1.13125706650221, "grad_norm": 0.1974884420633316, "learning_rate": 6.443473050306541e-05, "loss": 0.5778, "step": 5503 }, { "epoch": 1.1314626374755885, "grad_norm": 0.19491606950759888, "learning_rate": 6.442572145400119e-05, "loss": 0.5408, "step": 5504 }, { "epoch": 1.131668208448967, "grad_norm": 0.2038283497095108, "learning_rate": 6.441671144792245e-05, "loss": 0.5597, "step": 5505 }, { "epoch": 1.1318737794223455, "grad_norm": 0.2011345475912094, "learning_rate": 6.440770048527311e-05, "loss": 0.5645, "step": 5506 }, { "epoch": 1.132079350395724, "grad_norm": 0.20046375691890717, "learning_rate": 6.439868856649706e-05, "loss": 0.565, "step": 5507 }, { "epoch": 1.1322849213691026, "grad_norm": 0.19624361395835876, "learning_rate": 6.438967569203831e-05, "loss": 0.5556, "step": 5508 }, { "epoch": 1.1324904923424812, "grad_norm": 0.19601401686668396, "learning_rate": 6.438066186234086e-05, "loss": 0.5608, "step": 5509 }, { "epoch": 1.1326960633158598, "grad_norm": 0.19871017336845398, "learning_rate": 6.437164707784877e-05, "loss": 0.5616, "step": 5510 }, { "epoch": 1.1329016342892384, "grad_norm": 0.19127802550792694, "learning_rate": 6.43626313390062e-05, "loss": 0.5778, "step": 5511 }, { "epoch": 1.133107205262617, "grad_norm": 0.19276481866836548, "learning_rate": 6.435361464625726e-05, "loss": 0.5488, "step": 5512 }, { "epoch": 1.1333127762359956, "grad_norm": 0.19331035017967224, "learning_rate": 6.434459700004619e-05, "loss": 0.5149, "step": 5513 }, { "epoch": 1.133518347209374, "grad_norm": 0.19106508791446686, "learning_rate": 6.433557840081726e-05, "loss": 0.5277, "step": 5514 }, { "epoch": 1.1337239181827525, "grad_norm": 0.1550726294517517, "learning_rate": 6.432655884901473e-05, "loss": 0.5596, "step": 5515 }, { "epoch": 1.1339294891561311, "grad_norm": 0.20075179636478424, "learning_rate": 6.431753834508299e-05, "loss": 0.5461, "step": 5516 }, { "epoch": 1.1341350601295097, "grad_norm": 0.20653320848941803, "learning_rate": 6.430851688946643e-05, "loss": 0.6038, "step": 5517 }, { "epoch": 1.1343406311028883, "grad_norm": 0.19482316076755524, "learning_rate": 6.42994944826095e-05, "loss": 0.5716, "step": 5518 }, { "epoch": 1.1345462020762669, "grad_norm": 0.18027710914611816, "learning_rate": 6.429047112495667e-05, "loss": 0.5531, "step": 5519 }, { "epoch": 1.1347517730496455, "grad_norm": 0.15849310159683228, "learning_rate": 6.428144681695247e-05, "loss": 0.5674, "step": 5520 }, { "epoch": 1.1349573440230238, "grad_norm": 0.19099898636341095, "learning_rate": 6.427242155904154e-05, "loss": 0.5405, "step": 5521 }, { "epoch": 1.1351629149964024, "grad_norm": 0.2118232399225235, "learning_rate": 6.426339535166847e-05, "loss": 0.5569, "step": 5522 }, { "epoch": 1.135368485969781, "grad_norm": 0.19552506506443024, "learning_rate": 6.425436819527792e-05, "loss": 0.5575, "step": 5523 }, { "epoch": 1.1355740569431596, "grad_norm": 0.19680903851985931, "learning_rate": 6.424534009031468e-05, "loss": 0.5644, "step": 5524 }, { "epoch": 1.1357796279165382, "grad_norm": 0.19150924682617188, "learning_rate": 6.423631103722348e-05, "loss": 0.5453, "step": 5525 }, { "epoch": 1.1359851988899168, "grad_norm": 0.19185394048690796, "learning_rate": 6.422728103644915e-05, "loss": 0.5408, "step": 5526 }, { "epoch": 1.1361907698632954, "grad_norm": 0.19700084626674652, "learning_rate": 6.421825008843652e-05, "loss": 0.5664, "step": 5527 }, { "epoch": 1.136396340836674, "grad_norm": 0.19622080028057098, "learning_rate": 6.420921819363057e-05, "loss": 0.5848, "step": 5528 }, { "epoch": 1.1366019118100525, "grad_norm": 0.19052082300186157, "learning_rate": 6.420018535247621e-05, "loss": 0.5607, "step": 5529 }, { "epoch": 1.136807482783431, "grad_norm": 0.18648898601531982, "learning_rate": 6.419115156541846e-05, "loss": 0.5627, "step": 5530 }, { "epoch": 1.1370130537568095, "grad_norm": 0.20063170790672302, "learning_rate": 6.418211683290235e-05, "loss": 0.5857, "step": 5531 }, { "epoch": 1.137218624730188, "grad_norm": 0.18962214887142181, "learning_rate": 6.417308115537303e-05, "loss": 0.5854, "step": 5532 }, { "epoch": 1.1374241957035667, "grad_norm": 0.20246468484401703, "learning_rate": 6.41640445332756e-05, "loss": 0.5883, "step": 5533 }, { "epoch": 1.1376297666769453, "grad_norm": 0.18931740522384644, "learning_rate": 6.415500696705528e-05, "loss": 0.5262, "step": 5534 }, { "epoch": 1.1378353376503239, "grad_norm": 0.19331716001033783, "learning_rate": 6.41459684571573e-05, "loss": 0.5534, "step": 5535 }, { "epoch": 1.1380409086237022, "grad_norm": 0.19788740575313568, "learning_rate": 6.413692900402693e-05, "loss": 0.5702, "step": 5536 }, { "epoch": 1.1382464795970808, "grad_norm": 0.19547824561595917, "learning_rate": 6.41278886081095e-05, "loss": 0.5647, "step": 5537 }, { "epoch": 1.1384520505704594, "grad_norm": 0.1888136863708496, "learning_rate": 6.411884726985043e-05, "loss": 0.5445, "step": 5538 }, { "epoch": 1.138657621543838, "grad_norm": 0.19497732818126678, "learning_rate": 6.410980498969512e-05, "loss": 0.5777, "step": 5539 }, { "epoch": 1.1388631925172166, "grad_norm": 0.18465173244476318, "learning_rate": 6.410076176808901e-05, "loss": 0.5299, "step": 5540 }, { "epoch": 1.1390687634905952, "grad_norm": 0.1675313413143158, "learning_rate": 6.409171760547765e-05, "loss": 0.5722, "step": 5541 }, { "epoch": 1.1392743344639737, "grad_norm": 0.2085336148738861, "learning_rate": 6.408267250230661e-05, "loss": 0.5745, "step": 5542 }, { "epoch": 1.1394799054373523, "grad_norm": 0.19899022579193115, "learning_rate": 6.407362645902148e-05, "loss": 0.5709, "step": 5543 }, { "epoch": 1.139685476410731, "grad_norm": 0.1954008936882019, "learning_rate": 6.406457947606792e-05, "loss": 0.5704, "step": 5544 }, { "epoch": 1.1398910473841093, "grad_norm": 0.17613859474658966, "learning_rate": 6.405553155389165e-05, "loss": 0.5395, "step": 5545 }, { "epoch": 1.1400966183574879, "grad_norm": 0.1824086457490921, "learning_rate": 6.40464826929384e-05, "loss": 0.5558, "step": 5546 }, { "epoch": 1.1403021893308665, "grad_norm": 0.20690536499023438, "learning_rate": 6.403743289365398e-05, "loss": 0.5626, "step": 5547 }, { "epoch": 1.140507760304245, "grad_norm": 0.20793819427490234, "learning_rate": 6.40283821564842e-05, "loss": 0.5819, "step": 5548 }, { "epoch": 1.1407133312776236, "grad_norm": 0.19500964879989624, "learning_rate": 6.401933048187499e-05, "loss": 0.5696, "step": 5549 }, { "epoch": 1.1409189022510022, "grad_norm": 0.19967152178287506, "learning_rate": 6.401027787027225e-05, "loss": 0.5567, "step": 5550 }, { "epoch": 1.1411244732243806, "grad_norm": 0.19124870002269745, "learning_rate": 6.400122432212198e-05, "loss": 0.5276, "step": 5551 }, { "epoch": 1.1413300441977592, "grad_norm": 0.1926882117986679, "learning_rate": 6.399216983787019e-05, "loss": 0.5785, "step": 5552 }, { "epoch": 1.1415356151711378, "grad_norm": 0.1873985081911087, "learning_rate": 6.398311441796297e-05, "loss": 0.5496, "step": 5553 }, { "epoch": 1.1417411861445164, "grad_norm": 0.164115771651268, "learning_rate": 6.397405806284642e-05, "loss": 0.5343, "step": 5554 }, { "epoch": 1.141946757117895, "grad_norm": 0.16665267944335938, "learning_rate": 6.396500077296673e-05, "loss": 0.5769, "step": 5555 }, { "epoch": 1.1421523280912735, "grad_norm": 0.1954329013824463, "learning_rate": 6.395594254877009e-05, "loss": 0.5652, "step": 5556 }, { "epoch": 1.1423578990646521, "grad_norm": 0.19422973692417145, "learning_rate": 6.394688339070277e-05, "loss": 0.5596, "step": 5557 }, { "epoch": 1.1425634700380307, "grad_norm": 0.19732142984867096, "learning_rate": 6.393782329921104e-05, "loss": 0.5887, "step": 5558 }, { "epoch": 1.1427690410114093, "grad_norm": 0.195445254445076, "learning_rate": 6.392876227474128e-05, "loss": 0.5737, "step": 5559 }, { "epoch": 1.1429746119847877, "grad_norm": 0.18976660072803497, "learning_rate": 6.391970031773988e-05, "loss": 0.5693, "step": 5560 }, { "epoch": 1.1431801829581663, "grad_norm": 0.18721553683280945, "learning_rate": 6.391063742865327e-05, "loss": 0.5393, "step": 5561 }, { "epoch": 1.1433857539315448, "grad_norm": 0.19081273674964905, "learning_rate": 6.390157360792794e-05, "loss": 0.5565, "step": 5562 }, { "epoch": 1.1435913249049234, "grad_norm": 0.19391131401062012, "learning_rate": 6.389250885601043e-05, "loss": 0.5571, "step": 5563 }, { "epoch": 1.143796895878302, "grad_norm": 0.18970650434494019, "learning_rate": 6.388344317334732e-05, "loss": 0.571, "step": 5564 }, { "epoch": 1.1440024668516806, "grad_norm": 0.1937200129032135, "learning_rate": 6.38743765603852e-05, "loss": 0.5619, "step": 5565 }, { "epoch": 1.1442080378250592, "grad_norm": 0.189813494682312, "learning_rate": 6.386530901757078e-05, "loss": 0.562, "step": 5566 }, { "epoch": 1.1444136087984376, "grad_norm": 0.19848157465457916, "learning_rate": 6.385624054535078e-05, "loss": 0.5776, "step": 5567 }, { "epoch": 1.1446191797718162, "grad_norm": 0.19412924349308014, "learning_rate": 6.384717114417191e-05, "loss": 0.5637, "step": 5568 }, { "epoch": 1.1448247507451947, "grad_norm": 0.20294548571109772, "learning_rate": 6.383810081448103e-05, "loss": 0.5626, "step": 5569 }, { "epoch": 1.1450303217185733, "grad_norm": 0.17422319948673248, "learning_rate": 6.382902955672496e-05, "loss": 0.5506, "step": 5570 }, { "epoch": 1.145235892691952, "grad_norm": 0.15921704471111298, "learning_rate": 6.381995737135062e-05, "loss": 0.5882, "step": 5571 }, { "epoch": 1.1454414636653305, "grad_norm": 0.20002704858779907, "learning_rate": 6.381088425880495e-05, "loss": 0.5677, "step": 5572 }, { "epoch": 1.145647034638709, "grad_norm": 0.1957893818616867, "learning_rate": 6.38018102195349e-05, "loss": 0.5629, "step": 5573 }, { "epoch": 1.1458526056120877, "grad_norm": 0.19180312752723694, "learning_rate": 6.379273525398758e-05, "loss": 0.5645, "step": 5574 }, { "epoch": 1.146058176585466, "grad_norm": 0.18908941745758057, "learning_rate": 6.378365936261e-05, "loss": 0.558, "step": 5575 }, { "epoch": 1.1462637475588446, "grad_norm": 0.19693338871002197, "learning_rate": 6.377458254584934e-05, "loss": 0.5741, "step": 5576 }, { "epoch": 1.1464693185322232, "grad_norm": 0.190039724111557, "learning_rate": 6.376550480415275e-05, "loss": 0.5431, "step": 5577 }, { "epoch": 1.1466748895056018, "grad_norm": 0.1961604356765747, "learning_rate": 6.375642613796745e-05, "loss": 0.563, "step": 5578 }, { "epoch": 1.1468804604789804, "grad_norm": 0.19689500331878662, "learning_rate": 6.374734654774068e-05, "loss": 0.5579, "step": 5579 }, { "epoch": 1.147086031452359, "grad_norm": 0.1831909865140915, "learning_rate": 6.373826603391979e-05, "loss": 0.5688, "step": 5580 }, { "epoch": 1.1472916024257376, "grad_norm": 0.18834874033927917, "learning_rate": 6.372918459695212e-05, "loss": 0.55, "step": 5581 }, { "epoch": 1.147497173399116, "grad_norm": 0.19605682790279388, "learning_rate": 6.372010223728504e-05, "loss": 0.5774, "step": 5582 }, { "epoch": 1.1477027443724945, "grad_norm": 0.19253866374492645, "learning_rate": 6.371101895536605e-05, "loss": 0.589, "step": 5583 }, { "epoch": 1.1479083153458731, "grad_norm": 0.18777357041835785, "learning_rate": 6.370193475164258e-05, "loss": 0.5665, "step": 5584 }, { "epoch": 1.1481138863192517, "grad_norm": 0.1673029363155365, "learning_rate": 6.36928496265622e-05, "loss": 0.5298, "step": 5585 }, { "epoch": 1.1483194572926303, "grad_norm": 0.15895609557628632, "learning_rate": 6.36837635805725e-05, "loss": 0.5323, "step": 5586 }, { "epoch": 1.1485250282660089, "grad_norm": 0.19794027507305145, "learning_rate": 6.367467661412111e-05, "loss": 0.5677, "step": 5587 }, { "epoch": 1.1487305992393875, "grad_norm": 0.20095770061016083, "learning_rate": 6.366558872765569e-05, "loss": 0.5562, "step": 5588 }, { "epoch": 1.148936170212766, "grad_norm": 0.16546010971069336, "learning_rate": 6.365649992162393e-05, "loss": 0.5212, "step": 5589 }, { "epoch": 1.1491417411861444, "grad_norm": 0.16688905656337738, "learning_rate": 6.364741019647363e-05, "loss": 0.5421, "step": 5590 }, { "epoch": 1.149347312159523, "grad_norm": 0.12764035165309906, "learning_rate": 6.36383195526526e-05, "loss": 0.5154, "step": 5591 }, { "epoch": 1.1495528831329016, "grad_norm": 0.16854409873485565, "learning_rate": 6.362922799060866e-05, "loss": 0.5689, "step": 5592 }, { "epoch": 1.1497584541062802, "grad_norm": 0.19557413458824158, "learning_rate": 6.362013551078974e-05, "loss": 0.5581, "step": 5593 }, { "epoch": 1.1499640250796588, "grad_norm": 0.19200196862220764, "learning_rate": 6.361104211364377e-05, "loss": 0.5744, "step": 5594 }, { "epoch": 1.1501695960530374, "grad_norm": 0.19255445897579193, "learning_rate": 6.360194779961875e-05, "loss": 0.5677, "step": 5595 }, { "epoch": 1.150375167026416, "grad_norm": 0.1860598772764206, "learning_rate": 6.359285256916269e-05, "loss": 0.5239, "step": 5596 }, { "epoch": 1.1505807379997943, "grad_norm": 0.18977835774421692, "learning_rate": 6.358375642272371e-05, "loss": 0.5502, "step": 5597 }, { "epoch": 1.150786308973173, "grad_norm": 0.18825951218605042, "learning_rate": 6.35746593607499e-05, "loss": 0.5701, "step": 5598 }, { "epoch": 1.1509918799465515, "grad_norm": 0.1946858912706375, "learning_rate": 6.356556138368945e-05, "loss": 0.5735, "step": 5599 }, { "epoch": 1.15119745091993, "grad_norm": 0.1934114545583725, "learning_rate": 6.355646249199055e-05, "loss": 0.57, "step": 5600 }, { "epoch": 1.1514030218933087, "grad_norm": 0.19345784187316895, "learning_rate": 6.354736268610148e-05, "loss": 0.568, "step": 5601 }, { "epoch": 1.1516085928666873, "grad_norm": 0.1907486766576767, "learning_rate": 6.353826196647056e-05, "loss": 0.5609, "step": 5602 }, { "epoch": 1.1518141638400659, "grad_norm": 0.19529633224010468, "learning_rate": 6.35291603335461e-05, "loss": 0.5531, "step": 5603 }, { "epoch": 1.1520197348134444, "grad_norm": 0.19347496330738068, "learning_rate": 6.352005778777652e-05, "loss": 0.5748, "step": 5604 }, { "epoch": 1.1522253057868228, "grad_norm": 0.1948879212141037, "learning_rate": 6.351095432961024e-05, "loss": 0.565, "step": 5605 }, { "epoch": 1.1524308767602014, "grad_norm": 0.19510291516780853, "learning_rate": 6.350184995949578e-05, "loss": 0.5492, "step": 5606 }, { "epoch": 1.15263644773358, "grad_norm": 0.198397696018219, "learning_rate": 6.349274467788165e-05, "loss": 0.5506, "step": 5607 }, { "epoch": 1.1528420187069586, "grad_norm": 0.1937544345855713, "learning_rate": 6.348363848521643e-05, "loss": 0.556, "step": 5608 }, { "epoch": 1.1530475896803372, "grad_norm": 0.1949324756860733, "learning_rate": 6.347453138194872e-05, "loss": 0.5608, "step": 5609 }, { "epoch": 1.1532531606537157, "grad_norm": 0.18160304427146912, "learning_rate": 6.34654233685272e-05, "loss": 0.5396, "step": 5610 }, { "epoch": 1.1534587316270943, "grad_norm": 0.1651293933391571, "learning_rate": 6.345631444540058e-05, "loss": 0.5618, "step": 5611 }, { "epoch": 1.1536643026004727, "grad_norm": 0.19430503249168396, "learning_rate": 6.344720461301761e-05, "loss": 0.5766, "step": 5612 }, { "epoch": 1.1538698735738513, "grad_norm": 0.19232423603534698, "learning_rate": 6.34380938718271e-05, "loss": 0.5543, "step": 5613 }, { "epoch": 1.1540754445472299, "grad_norm": 0.19700485467910767, "learning_rate": 6.342898222227788e-05, "loss": 0.6007, "step": 5614 }, { "epoch": 1.1542810155206085, "grad_norm": 0.18897385895252228, "learning_rate": 6.341986966481883e-05, "loss": 0.5658, "step": 5615 }, { "epoch": 1.154486586493987, "grad_norm": 0.18891417980194092, "learning_rate": 6.341075619989891e-05, "loss": 0.5725, "step": 5616 }, { "epoch": 1.1546921574673656, "grad_norm": 0.17012788355350494, "learning_rate": 6.340164182796707e-05, "loss": 0.5365, "step": 5617 }, { "epoch": 1.1548977284407442, "grad_norm": 0.16269567608833313, "learning_rate": 6.339252654947236e-05, "loss": 0.5708, "step": 5618 }, { "epoch": 1.1551032994141228, "grad_norm": 0.19354234635829926, "learning_rate": 6.338341036486385e-05, "loss": 0.5645, "step": 5619 }, { "epoch": 1.1553088703875014, "grad_norm": 0.19386227428913116, "learning_rate": 6.33742932745906e-05, "loss": 0.5772, "step": 5620 }, { "epoch": 1.1555144413608798, "grad_norm": 0.17871583998203278, "learning_rate": 6.336517527910182e-05, "loss": 0.5568, "step": 5621 }, { "epoch": 1.1557200123342584, "grad_norm": 0.18921589851379395, "learning_rate": 6.335605637884668e-05, "loss": 0.5555, "step": 5622 }, { "epoch": 1.155925583307637, "grad_norm": 0.19476552307605743, "learning_rate": 6.334693657427446e-05, "loss": 0.5581, "step": 5623 }, { "epoch": 1.1561311542810155, "grad_norm": 0.18380312621593475, "learning_rate": 6.333781586583441e-05, "loss": 0.5322, "step": 5624 }, { "epoch": 1.1563367252543941, "grad_norm": 0.18677309155464172, "learning_rate": 6.332869425397588e-05, "loss": 0.5712, "step": 5625 }, { "epoch": 1.1565422962277727, "grad_norm": 0.19158649444580078, "learning_rate": 6.331957173914826e-05, "loss": 0.5846, "step": 5626 }, { "epoch": 1.156747867201151, "grad_norm": 0.19586919248104095, "learning_rate": 6.331044832180098e-05, "loss": 0.5589, "step": 5627 }, { "epoch": 1.1569534381745297, "grad_norm": 0.15967777371406555, "learning_rate": 6.330132400238347e-05, "loss": 0.5268, "step": 5628 }, { "epoch": 1.1571590091479083, "grad_norm": 0.1551171988248825, "learning_rate": 6.329219878134528e-05, "loss": 0.5509, "step": 5629 }, { "epoch": 1.1573645801212868, "grad_norm": 0.18467473983764648, "learning_rate": 6.328307265913595e-05, "loss": 0.5574, "step": 5630 }, { "epoch": 1.1575701510946654, "grad_norm": 0.19859431684017181, "learning_rate": 6.327394563620509e-05, "loss": 0.5613, "step": 5631 }, { "epoch": 1.157775722068044, "grad_norm": 0.19411081075668335, "learning_rate": 6.326481771300234e-05, "loss": 0.5589, "step": 5632 }, { "epoch": 1.1579812930414226, "grad_norm": 0.18985684216022491, "learning_rate": 6.325568888997739e-05, "loss": 0.5673, "step": 5633 }, { "epoch": 1.1581868640148012, "grad_norm": 0.1923382729291916, "learning_rate": 6.324655916757997e-05, "loss": 0.558, "step": 5634 }, { "epoch": 1.1583924349881798, "grad_norm": 0.20484760403633118, "learning_rate": 6.323742854625986e-05, "loss": 0.5561, "step": 5635 }, { "epoch": 1.1585980059615582, "grad_norm": 0.15869790315628052, "learning_rate": 6.32282970264669e-05, "loss": 0.5412, "step": 5636 }, { "epoch": 1.1588035769349367, "grad_norm": 0.16667144000530243, "learning_rate": 6.321916460865092e-05, "loss": 0.5605, "step": 5637 }, { "epoch": 1.1590091479083153, "grad_norm": 0.1636246144771576, "learning_rate": 6.321003129326187e-05, "loss": 0.5297, "step": 5638 }, { "epoch": 1.159214718881694, "grad_norm": 0.1557888388633728, "learning_rate": 6.320089708074971e-05, "loss": 0.5433, "step": 5639 }, { "epoch": 1.1594202898550725, "grad_norm": 0.18941344320774078, "learning_rate": 6.31917619715644e-05, "loss": 0.552, "step": 5640 }, { "epoch": 1.159625860828451, "grad_norm": 0.18825402855873108, "learning_rate": 6.318262596615602e-05, "loss": 0.5447, "step": 5641 }, { "epoch": 1.1598314318018295, "grad_norm": 0.16153112053871155, "learning_rate": 6.317348906497463e-05, "loss": 0.5363, "step": 5642 }, { "epoch": 1.160037002775208, "grad_norm": 0.15847079455852509, "learning_rate": 6.31643512684704e-05, "loss": 0.5523, "step": 5643 }, { "epoch": 1.1602425737485866, "grad_norm": 0.19507279992103577, "learning_rate": 6.315521257709345e-05, "loss": 0.5575, "step": 5644 }, { "epoch": 1.1604481447219652, "grad_norm": 0.19227170944213867, "learning_rate": 6.314607299129406e-05, "loss": 0.5725, "step": 5645 }, { "epoch": 1.1606537156953438, "grad_norm": 0.18888604640960693, "learning_rate": 6.313693251152247e-05, "loss": 0.5532, "step": 5646 }, { "epoch": 1.1608592866687224, "grad_norm": 0.20485495030879974, "learning_rate": 6.312779113822896e-05, "loss": 0.5469, "step": 5647 }, { "epoch": 1.161064857642101, "grad_norm": 0.1900404691696167, "learning_rate": 6.311864887186393e-05, "loss": 0.5593, "step": 5648 }, { "epoch": 1.1612704286154796, "grad_norm": 0.1928151249885559, "learning_rate": 6.310950571287774e-05, "loss": 0.553, "step": 5649 }, { "epoch": 1.1614759995888582, "grad_norm": 0.2048550844192505, "learning_rate": 6.310036166172086e-05, "loss": 0.5602, "step": 5650 }, { "epoch": 1.1616815705622365, "grad_norm": 0.16492126882076263, "learning_rate": 6.309121671884375e-05, "loss": 0.5306, "step": 5651 }, { "epoch": 1.1618871415356151, "grad_norm": 0.1620352864265442, "learning_rate": 6.308207088469697e-05, "loss": 0.5384, "step": 5652 }, { "epoch": 1.1620927125089937, "grad_norm": 0.19016869366168976, "learning_rate": 6.307292415973108e-05, "loss": 0.5666, "step": 5653 }, { "epoch": 1.1622982834823723, "grad_norm": 0.18808448314666748, "learning_rate": 6.306377654439666e-05, "loss": 0.5522, "step": 5654 }, { "epoch": 1.1625038544557509, "grad_norm": 0.1816331297159195, "learning_rate": 6.305462803914441e-05, "loss": 0.543, "step": 5655 }, { "epoch": 1.1627094254291295, "grad_norm": 0.18618269264698029, "learning_rate": 6.304547864442503e-05, "loss": 0.5674, "step": 5656 }, { "epoch": 1.162914996402508, "grad_norm": 0.1989988088607788, "learning_rate": 6.303632836068925e-05, "loss": 0.5658, "step": 5657 }, { "epoch": 1.1631205673758864, "grad_norm": 0.1896226555109024, "learning_rate": 6.302717718838788e-05, "loss": 0.572, "step": 5658 }, { "epoch": 1.163326138349265, "grad_norm": 0.16433072090148926, "learning_rate": 6.301802512797176e-05, "loss": 0.542, "step": 5659 }, { "epoch": 1.1635317093226436, "grad_norm": 0.1611773520708084, "learning_rate": 6.300887217989174e-05, "loss": 0.5528, "step": 5660 }, { "epoch": 1.1637372802960222, "grad_norm": 0.1935834139585495, "learning_rate": 6.299971834459877e-05, "loss": 0.5699, "step": 5661 }, { "epoch": 1.1639428512694008, "grad_norm": 0.19088830053806305, "learning_rate": 6.29905636225438e-05, "loss": 0.5742, "step": 5662 }, { "epoch": 1.1641484222427794, "grad_norm": 0.1988966315984726, "learning_rate": 6.298140801417786e-05, "loss": 0.566, "step": 5663 }, { "epoch": 1.164353993216158, "grad_norm": 0.2001844048500061, "learning_rate": 6.297225151995198e-05, "loss": 0.5765, "step": 5664 }, { "epoch": 1.1645595641895365, "grad_norm": 0.16796830296516418, "learning_rate": 6.296309414031727e-05, "loss": 0.5534, "step": 5665 }, { "epoch": 1.164765135162915, "grad_norm": 0.15863637626171112, "learning_rate": 6.295393587572489e-05, "loss": 0.576, "step": 5666 }, { "epoch": 1.1649707061362935, "grad_norm": 0.19147972762584686, "learning_rate": 6.2944776726626e-05, "loss": 0.5694, "step": 5667 }, { "epoch": 1.165176277109672, "grad_norm": 0.18630050122737885, "learning_rate": 6.293561669347181e-05, "loss": 0.561, "step": 5668 }, { "epoch": 1.1653818480830507, "grad_norm": 0.1899455487728119, "learning_rate": 6.292645577671364e-05, "loss": 0.5807, "step": 5669 }, { "epoch": 1.1655874190564293, "grad_norm": 0.19663108885288239, "learning_rate": 6.291729397680277e-05, "loss": 0.5594, "step": 5670 }, { "epoch": 1.1657929900298079, "grad_norm": 0.18838699162006378, "learning_rate": 6.290813129419058e-05, "loss": 0.5572, "step": 5671 }, { "epoch": 1.1659985610031864, "grad_norm": 0.19074362516403198, "learning_rate": 6.289896772932845e-05, "loss": 0.5593, "step": 5672 }, { "epoch": 1.1662041319765648, "grad_norm": 0.1634715497493744, "learning_rate": 6.288980328266785e-05, "loss": 0.5333, "step": 5673 }, { "epoch": 1.1664097029499434, "grad_norm": 0.13483376801013947, "learning_rate": 6.288063795466027e-05, "loss": 0.5092, "step": 5674 }, { "epoch": 1.166615273923322, "grad_norm": 0.18257947266101837, "learning_rate": 6.28714717457572e-05, "loss": 0.5607, "step": 5675 }, { "epoch": 1.1668208448967006, "grad_norm": 0.19993911683559418, "learning_rate": 6.286230465641028e-05, "loss": 0.5628, "step": 5676 }, { "epoch": 1.1670264158700792, "grad_norm": 0.1948871612548828, "learning_rate": 6.28531366870711e-05, "loss": 0.5566, "step": 5677 }, { "epoch": 1.1672319868434577, "grad_norm": 0.1864452362060547, "learning_rate": 6.28439678381913e-05, "loss": 0.5496, "step": 5678 }, { "epoch": 1.1674375578168363, "grad_norm": 0.17033499479293823, "learning_rate": 6.28347981102226e-05, "loss": 0.5291, "step": 5679 }, { "epoch": 1.167643128790215, "grad_norm": 0.16329137980937958, "learning_rate": 6.282562750361679e-05, "loss": 0.5538, "step": 5680 }, { "epoch": 1.1678486997635933, "grad_norm": 0.20135296881198883, "learning_rate": 6.281645601882561e-05, "loss": 0.5409, "step": 5681 }, { "epoch": 1.1680542707369719, "grad_norm": 0.16525396704673767, "learning_rate": 6.28072836563009e-05, "loss": 0.5034, "step": 5682 }, { "epoch": 1.1682598417103505, "grad_norm": 0.16303305327892303, "learning_rate": 6.279811041649457e-05, "loss": 0.5464, "step": 5683 }, { "epoch": 1.168465412683729, "grad_norm": 0.20432288944721222, "learning_rate": 6.278893629985854e-05, "loss": 0.5617, "step": 5684 }, { "epoch": 1.1686709836571076, "grad_norm": 0.19627077877521515, "learning_rate": 6.277976130684476e-05, "loss": 0.5516, "step": 5685 }, { "epoch": 1.1688765546304862, "grad_norm": 0.19442994892597198, "learning_rate": 6.277058543790522e-05, "loss": 0.5859, "step": 5686 }, { "epoch": 1.1690821256038648, "grad_norm": 0.1668756902217865, "learning_rate": 6.276140869349202e-05, "loss": 0.5412, "step": 5687 }, { "epoch": 1.1692876965772432, "grad_norm": 0.16319718956947327, "learning_rate": 6.275223107405723e-05, "loss": 0.5365, "step": 5688 }, { "epoch": 1.1694932675506218, "grad_norm": 0.20029065012931824, "learning_rate": 6.274305258005296e-05, "loss": 0.5555, "step": 5689 }, { "epoch": 1.1696988385240004, "grad_norm": 0.16278813779354095, "learning_rate": 6.273387321193146e-05, "loss": 0.5314, "step": 5690 }, { "epoch": 1.169904409497379, "grad_norm": 0.16741250455379486, "learning_rate": 6.272469297014488e-05, "loss": 0.5435, "step": 5691 }, { "epoch": 1.1701099804707575, "grad_norm": 0.2003338634967804, "learning_rate": 6.271551185514553e-05, "loss": 0.5842, "step": 5692 }, { "epoch": 1.1703155514441361, "grad_norm": 0.17789803445339203, "learning_rate": 6.270632986738573e-05, "loss": 0.5276, "step": 5693 }, { "epoch": 1.1705211224175147, "grad_norm": 0.16743101179599762, "learning_rate": 6.269714700731782e-05, "loss": 0.5777, "step": 5694 }, { "epoch": 1.1707266933908933, "grad_norm": 0.19358138740062714, "learning_rate": 6.268796327539417e-05, "loss": 0.5585, "step": 5695 }, { "epoch": 1.1709322643642717, "grad_norm": 0.16014361381530762, "learning_rate": 6.267877867206724e-05, "loss": 0.506, "step": 5696 }, { "epoch": 1.1711378353376503, "grad_norm": 0.15720070898532867, "learning_rate": 6.266959319778953e-05, "loss": 0.5688, "step": 5697 }, { "epoch": 1.1713434063110288, "grad_norm": 0.1944281905889511, "learning_rate": 6.266040685301356e-05, "loss": 0.5611, "step": 5698 }, { "epoch": 1.1715489772844074, "grad_norm": 0.19197237491607666, "learning_rate": 6.265121963819189e-05, "loss": 0.5491, "step": 5699 }, { "epoch": 1.171754548257786, "grad_norm": 0.1880941390991211, "learning_rate": 6.26420315537771e-05, "loss": 0.5478, "step": 5700 }, { "epoch": 1.1719601192311646, "grad_norm": 0.18762564659118652, "learning_rate": 6.26328426002219e-05, "loss": 0.5592, "step": 5701 }, { "epoch": 1.1721656902045432, "grad_norm": 0.19078297913074493, "learning_rate": 6.262365277797894e-05, "loss": 0.5801, "step": 5702 }, { "epoch": 1.1723712611779216, "grad_norm": 0.15825822949409485, "learning_rate": 6.2614462087501e-05, "loss": 0.5238, "step": 5703 }, { "epoch": 1.1725768321513002, "grad_norm": 0.16313259303569794, "learning_rate": 6.260527052924083e-05, "loss": 0.5675, "step": 5704 }, { "epoch": 1.1727824031246787, "grad_norm": 0.20915348827838898, "learning_rate": 6.259607810365128e-05, "loss": 0.5871, "step": 5705 }, { "epoch": 1.1729879740980573, "grad_norm": 0.1840449571609497, "learning_rate": 6.258688481118519e-05, "loss": 0.5617, "step": 5706 }, { "epoch": 1.173193545071436, "grad_norm": 0.19125378131866455, "learning_rate": 6.257769065229551e-05, "loss": 0.5525, "step": 5707 }, { "epoch": 1.1733991160448145, "grad_norm": 0.16844969987869263, "learning_rate": 6.256849562743514e-05, "loss": 0.5422, "step": 5708 }, { "epoch": 1.173604687018193, "grad_norm": 0.17428073287010193, "learning_rate": 6.255929973705714e-05, "loss": 0.5564, "step": 5709 }, { "epoch": 1.1738102579915717, "grad_norm": 0.1962093710899353, "learning_rate": 6.255010298161448e-05, "loss": 0.5671, "step": 5710 }, { "epoch": 1.1740158289649503, "grad_norm": 0.19688303768634796, "learning_rate": 6.254090536156028e-05, "loss": 0.5736, "step": 5711 }, { "epoch": 1.1742213999383286, "grad_norm": 0.19924046099185944, "learning_rate": 6.253170687734769e-05, "loss": 0.5536, "step": 5712 }, { "epoch": 1.1744269709117072, "grad_norm": 0.21053309738636017, "learning_rate": 6.252250752942981e-05, "loss": 0.5725, "step": 5713 }, { "epoch": 1.1746325418850858, "grad_norm": 0.15548844635486603, "learning_rate": 6.251330731825989e-05, "loss": 0.5061, "step": 5714 }, { "epoch": 1.1748381128584644, "grad_norm": 0.16448529064655304, "learning_rate": 6.250410624429118e-05, "loss": 0.5618, "step": 5715 }, { "epoch": 1.175043683831843, "grad_norm": 0.19345583021640778, "learning_rate": 6.249490430797699e-05, "loss": 0.548, "step": 5716 }, { "epoch": 1.1752492548052216, "grad_norm": 0.19691455364227295, "learning_rate": 6.248570150977061e-05, "loss": 0.5466, "step": 5717 }, { "epoch": 1.1754548257786, "grad_norm": 0.19735218584537506, "learning_rate": 6.247649785012545e-05, "loss": 0.5595, "step": 5718 }, { "epoch": 1.1756603967519785, "grad_norm": 0.19617964327335358, "learning_rate": 6.246729332949493e-05, "loss": 0.5774, "step": 5719 }, { "epoch": 1.1758659677253571, "grad_norm": 0.19635650515556335, "learning_rate": 6.24580879483325e-05, "loss": 0.5542, "step": 5720 }, { "epoch": 1.1760715386987357, "grad_norm": 0.19671329855918884, "learning_rate": 6.244888170709169e-05, "loss": 0.5775, "step": 5721 }, { "epoch": 1.1762771096721143, "grad_norm": 0.20057837665081024, "learning_rate": 6.243967460622603e-05, "loss": 0.5706, "step": 5722 }, { "epoch": 1.1764826806454929, "grad_norm": 0.1965552419424057, "learning_rate": 6.243046664618911e-05, "loss": 0.5698, "step": 5723 }, { "epoch": 1.1766882516188715, "grad_norm": 0.19308249652385712, "learning_rate": 6.242125782743456e-05, "loss": 0.5642, "step": 5724 }, { "epoch": 1.17689382259225, "grad_norm": 0.19306235015392303, "learning_rate": 6.241204815041608e-05, "loss": 0.576, "step": 5725 }, { "epoch": 1.1770993935656286, "grad_norm": 0.18735530972480774, "learning_rate": 6.240283761558737e-05, "loss": 0.5678, "step": 5726 }, { "epoch": 1.177304964539007, "grad_norm": 0.1929217427968979, "learning_rate": 6.239362622340218e-05, "loss": 0.5542, "step": 5727 }, { "epoch": 1.1775105355123856, "grad_norm": 0.19190043210983276, "learning_rate": 6.238441397431433e-05, "loss": 0.5836, "step": 5728 }, { "epoch": 1.1777161064857642, "grad_norm": 0.1934564858675003, "learning_rate": 6.237520086877766e-05, "loss": 0.5532, "step": 5729 }, { "epoch": 1.1779216774591428, "grad_norm": 0.16846685111522675, "learning_rate": 6.236598690724606e-05, "loss": 0.5279, "step": 5730 }, { "epoch": 1.1781272484325214, "grad_norm": 0.1717388778924942, "learning_rate": 6.235677209017345e-05, "loss": 0.5595, "step": 5731 }, { "epoch": 1.1783328194059, "grad_norm": 0.18958315253257751, "learning_rate": 6.234755641801379e-05, "loss": 0.5657, "step": 5732 }, { "epoch": 1.1785383903792783, "grad_norm": 0.19686202704906464, "learning_rate": 6.233833989122112e-05, "loss": 0.5983, "step": 5733 }, { "epoch": 1.178743961352657, "grad_norm": 0.1927022784948349, "learning_rate": 6.232912251024948e-05, "loss": 0.5968, "step": 5734 }, { "epoch": 1.1789495323260355, "grad_norm": 0.19848833978176117, "learning_rate": 6.231990427555297e-05, "loss": 0.5491, "step": 5735 }, { "epoch": 1.179155103299414, "grad_norm": 0.189555823802948, "learning_rate": 6.231068518758572e-05, "loss": 0.5525, "step": 5736 }, { "epoch": 1.1793606742727927, "grad_norm": 0.19321559369564056, "learning_rate": 6.230146524680194e-05, "loss": 0.5792, "step": 5737 }, { "epoch": 1.1795662452461713, "grad_norm": 0.19412335753440857, "learning_rate": 6.229224445365582e-05, "loss": 0.5731, "step": 5738 }, { "epoch": 1.1797718162195499, "grad_norm": 0.20160719752311707, "learning_rate": 6.228302280860166e-05, "loss": 0.5931, "step": 5739 }, { "epoch": 1.1799773871929284, "grad_norm": 0.19900692999362946, "learning_rate": 6.227380031209373e-05, "loss": 0.5437, "step": 5740 }, { "epoch": 1.180182958166307, "grad_norm": 0.19047874212265015, "learning_rate": 6.226457696458639e-05, "loss": 0.5529, "step": 5741 }, { "epoch": 1.1803885291396854, "grad_norm": 0.19529984891414642, "learning_rate": 6.225535276653405e-05, "loss": 0.5672, "step": 5742 }, { "epoch": 1.180594100113064, "grad_norm": 0.19696053862571716, "learning_rate": 6.224612771839113e-05, "loss": 0.572, "step": 5743 }, { "epoch": 1.1807996710864426, "grad_norm": 0.19073131680488586, "learning_rate": 6.22369018206121e-05, "loss": 0.5524, "step": 5744 }, { "epoch": 1.1810052420598212, "grad_norm": 0.18917502462863922, "learning_rate": 6.222767507365148e-05, "loss": 0.5542, "step": 5745 }, { "epoch": 1.1812108130331997, "grad_norm": 0.19207759201526642, "learning_rate": 6.221844747796384e-05, "loss": 0.5594, "step": 5746 }, { "epoch": 1.1814163840065783, "grad_norm": 0.1916734278202057, "learning_rate": 6.220921903400376e-05, "loss": 0.554, "step": 5747 }, { "epoch": 1.181621954979957, "grad_norm": 0.1720525622367859, "learning_rate": 6.21999897422259e-05, "loss": 0.517, "step": 5748 }, { "epoch": 1.1818275259533353, "grad_norm": 0.1582804173231125, "learning_rate": 6.219075960308494e-05, "loss": 0.5714, "step": 5749 }, { "epoch": 1.1820330969267139, "grad_norm": 0.20018833875656128, "learning_rate": 6.218152861703561e-05, "loss": 0.5783, "step": 5750 }, { "epoch": 1.1822386679000925, "grad_norm": 0.16681919991970062, "learning_rate": 6.217229678453265e-05, "loss": 0.5182, "step": 5751 }, { "epoch": 1.182444238873471, "grad_norm": 0.1674472838640213, "learning_rate": 6.21630641060309e-05, "loss": 0.5756, "step": 5752 }, { "epoch": 1.1826498098468496, "grad_norm": 0.19080859422683716, "learning_rate": 6.215383058198521e-05, "loss": 0.5616, "step": 5753 }, { "epoch": 1.1828553808202282, "grad_norm": 0.18792377412319183, "learning_rate": 6.214459621285047e-05, "loss": 0.5482, "step": 5754 }, { "epoch": 1.1830609517936068, "grad_norm": 0.1907912641763687, "learning_rate": 6.21353609990816e-05, "loss": 0.5613, "step": 5755 }, { "epoch": 1.1832665227669854, "grad_norm": 0.1828346997499466, "learning_rate": 6.212612494113358e-05, "loss": 0.5496, "step": 5756 }, { "epoch": 1.1834720937403638, "grad_norm": 0.19093002378940582, "learning_rate": 6.211688803946142e-05, "loss": 0.5769, "step": 5757 }, { "epoch": 1.1836776647137424, "grad_norm": 0.1904676854610443, "learning_rate": 6.21076502945202e-05, "loss": 0.5385, "step": 5758 }, { "epoch": 1.183883235687121, "grad_norm": 0.1881975680589676, "learning_rate": 6.209841170676502e-05, "loss": 0.5633, "step": 5759 }, { "epoch": 1.1840888066604995, "grad_norm": 0.20327463746070862, "learning_rate": 6.208917227665102e-05, "loss": 0.5714, "step": 5760 }, { "epoch": 1.1842943776338781, "grad_norm": 0.18997357785701752, "learning_rate": 6.207993200463335e-05, "loss": 0.551, "step": 5761 }, { "epoch": 1.1844999486072567, "grad_norm": 0.1653435230255127, "learning_rate": 6.207069089116728e-05, "loss": 0.5465, "step": 5762 }, { "epoch": 1.1847055195806353, "grad_norm": 0.1645163893699646, "learning_rate": 6.206144893670805e-05, "loss": 0.5411, "step": 5763 }, { "epoch": 1.1849110905540137, "grad_norm": 0.18971189856529236, "learning_rate": 6.205220614171098e-05, "loss": 0.5724, "step": 5764 }, { "epoch": 1.1851166615273923, "grad_norm": 0.19266551733016968, "learning_rate": 6.204296250663142e-05, "loss": 0.544, "step": 5765 }, { "epoch": 1.1853222325007708, "grad_norm": 0.1676861196756363, "learning_rate": 6.203371803192475e-05, "loss": 0.5232, "step": 5766 }, { "epoch": 1.1855278034741494, "grad_norm": 0.16158527135849, "learning_rate": 6.20244727180464e-05, "loss": 0.5324, "step": 5767 }, { "epoch": 1.185733374447528, "grad_norm": 0.16184964776039124, "learning_rate": 6.201522656545186e-05, "loss": 0.5454, "step": 5768 }, { "epoch": 1.1859389454209066, "grad_norm": 0.16072934865951538, "learning_rate": 6.200597957459664e-05, "loss": 0.5676, "step": 5769 }, { "epoch": 1.1861445163942852, "grad_norm": 0.19808636605739594, "learning_rate": 6.199673174593629e-05, "loss": 0.5426, "step": 5770 }, { "epoch": 1.1863500873676638, "grad_norm": 0.19355566799640656, "learning_rate": 6.19874830799264e-05, "loss": 0.5601, "step": 5771 }, { "epoch": 1.1865556583410422, "grad_norm": 0.1977650374174118, "learning_rate": 6.197823357702263e-05, "loss": 0.5749, "step": 5772 }, { "epoch": 1.1867612293144207, "grad_norm": 0.17442461848258972, "learning_rate": 6.196898323768065e-05, "loss": 0.5253, "step": 5773 }, { "epoch": 1.1869668002877993, "grad_norm": 0.15890754759311676, "learning_rate": 6.195973206235616e-05, "loss": 0.5509, "step": 5774 }, { "epoch": 1.187172371261178, "grad_norm": 0.18826748430728912, "learning_rate": 6.195048005150496e-05, "loss": 0.54, "step": 5775 }, { "epoch": 1.1873779422345565, "grad_norm": 0.18961307406425476, "learning_rate": 6.194122720558282e-05, "loss": 0.5505, "step": 5776 }, { "epoch": 1.187583513207935, "grad_norm": 0.19002290070056915, "learning_rate": 6.193197352504561e-05, "loss": 0.5637, "step": 5777 }, { "epoch": 1.1877890841813137, "grad_norm": 0.1975557655096054, "learning_rate": 6.19227190103492e-05, "loss": 0.5667, "step": 5778 }, { "epoch": 1.187994655154692, "grad_norm": 0.20086504518985748, "learning_rate": 6.191346366194952e-05, "loss": 0.5792, "step": 5779 }, { "epoch": 1.1882002261280706, "grad_norm": 0.19469043612480164, "learning_rate": 6.190420748030253e-05, "loss": 0.562, "step": 5780 }, { "epoch": 1.1884057971014492, "grad_norm": 0.19469872117042542, "learning_rate": 6.189495046586427e-05, "loss": 0.5725, "step": 5781 }, { "epoch": 1.1886113680748278, "grad_norm": 0.1903071254491806, "learning_rate": 6.188569261909076e-05, "loss": 0.5604, "step": 5782 }, { "epoch": 1.1888169390482064, "grad_norm": 0.18922393023967743, "learning_rate": 6.187643394043808e-05, "loss": 0.5336, "step": 5783 }, { "epoch": 1.189022510021585, "grad_norm": 0.19879461824893951, "learning_rate": 6.186717443036239e-05, "loss": 0.5699, "step": 5784 }, { "epoch": 1.1892280809949636, "grad_norm": 0.19611231982707977, "learning_rate": 6.185791408931986e-05, "loss": 0.533, "step": 5785 }, { "epoch": 1.1894336519683422, "grad_norm": 0.17245331406593323, "learning_rate": 6.18486529177667e-05, "loss": 0.5268, "step": 5786 }, { "epoch": 1.1896392229417208, "grad_norm": 0.15049666166305542, "learning_rate": 6.183939091615915e-05, "loss": 0.5324, "step": 5787 }, { "epoch": 1.1898447939150991, "grad_norm": 0.1296570748090744, "learning_rate": 6.183012808495353e-05, "loss": 0.5245, "step": 5788 }, { "epoch": 1.1900503648884777, "grad_norm": 0.1654006838798523, "learning_rate": 6.182086442460614e-05, "loss": 0.5405, "step": 5789 }, { "epoch": 1.1902559358618563, "grad_norm": 0.20028263330459595, "learning_rate": 6.181159993557338e-05, "loss": 0.5792, "step": 5790 }, { "epoch": 1.1904615068352349, "grad_norm": 0.19533969461917877, "learning_rate": 6.18023346183117e-05, "loss": 0.5698, "step": 5791 }, { "epoch": 1.1906670778086135, "grad_norm": 0.16536763310432434, "learning_rate": 6.17930684732775e-05, "loss": 0.5253, "step": 5792 }, { "epoch": 1.190872648781992, "grad_norm": 0.16189715266227722, "learning_rate": 6.178380150092732e-05, "loss": 0.5759, "step": 5793 }, { "epoch": 1.1910782197553704, "grad_norm": 0.1967983990907669, "learning_rate": 6.177453370171768e-05, "loss": 0.5721, "step": 5794 }, { "epoch": 1.191283790728749, "grad_norm": 0.1946103274822235, "learning_rate": 6.176526507610518e-05, "loss": 0.5587, "step": 5795 }, { "epoch": 1.1914893617021276, "grad_norm": 0.20200130343437195, "learning_rate": 6.175599562454641e-05, "loss": 0.571, "step": 5796 }, { "epoch": 1.1916949326755062, "grad_norm": 0.19911526143550873, "learning_rate": 6.174672534749808e-05, "loss": 0.5615, "step": 5797 }, { "epoch": 1.1919005036488848, "grad_norm": 0.19905459880828857, "learning_rate": 6.173745424541684e-05, "loss": 0.5793, "step": 5798 }, { "epoch": 1.1921060746222634, "grad_norm": 0.1912047415971756, "learning_rate": 6.172818231875947e-05, "loss": 0.5543, "step": 5799 }, { "epoch": 1.192311645595642, "grad_norm": 0.16958840191364288, "learning_rate": 6.171890956798275e-05, "loss": 0.5339, "step": 5800 }, { "epoch": 1.1925172165690205, "grad_norm": 0.1356760561466217, "learning_rate": 6.170963599354349e-05, "loss": 0.5175, "step": 5801 }, { "epoch": 1.1927227875423991, "grad_norm": 0.1700810045003891, "learning_rate": 6.170036159589856e-05, "loss": 0.554, "step": 5802 }, { "epoch": 1.1929283585157775, "grad_norm": 0.17295996844768524, "learning_rate": 6.169108637550488e-05, "loss": 0.5169, "step": 5803 }, { "epoch": 1.193133929489156, "grad_norm": 0.1662554293870926, "learning_rate": 6.16818103328194e-05, "loss": 0.5882, "step": 5804 }, { "epoch": 1.1933395004625347, "grad_norm": 0.1974506676197052, "learning_rate": 6.167253346829909e-05, "loss": 0.5556, "step": 5805 }, { "epoch": 1.1935450714359133, "grad_norm": 0.19866618514060974, "learning_rate": 6.166325578240098e-05, "loss": 0.5748, "step": 5806 }, { "epoch": 1.1937506424092919, "grad_norm": 0.19283287227153778, "learning_rate": 6.165397727558214e-05, "loss": 0.5611, "step": 5807 }, { "epoch": 1.1939562133826704, "grad_norm": 0.19626696407794952, "learning_rate": 6.164469794829967e-05, "loss": 0.5579, "step": 5808 }, { "epoch": 1.1941617843560488, "grad_norm": 0.19367843866348267, "learning_rate": 6.163541780101075e-05, "loss": 0.5642, "step": 5809 }, { "epoch": 1.1943673553294274, "grad_norm": 0.19207385182380676, "learning_rate": 6.162613683417253e-05, "loss": 0.5586, "step": 5810 }, { "epoch": 1.194572926302806, "grad_norm": 0.19212685525417328, "learning_rate": 6.161685504824227e-05, "loss": 0.5427, "step": 5811 }, { "epoch": 1.1947784972761846, "grad_norm": 0.1972237080335617, "learning_rate": 6.160757244367723e-05, "loss": 0.5595, "step": 5812 }, { "epoch": 1.1949840682495632, "grad_norm": 0.2040352076292038, "learning_rate": 6.159828902093471e-05, "loss": 0.5384, "step": 5813 }, { "epoch": 1.1951896392229417, "grad_norm": 0.1992282271385193, "learning_rate": 6.158900478047206e-05, "loss": 0.5757, "step": 5814 }, { "epoch": 1.1953952101963203, "grad_norm": 0.18852105736732483, "learning_rate": 6.15797197227467e-05, "loss": 0.5714, "step": 5815 }, { "epoch": 1.195600781169699, "grad_norm": 0.18910926580429077, "learning_rate": 6.157043384821604e-05, "loss": 0.5506, "step": 5816 }, { "epoch": 1.1958063521430775, "grad_norm": 0.19245147705078125, "learning_rate": 6.156114715733756e-05, "loss": 0.5513, "step": 5817 }, { "epoch": 1.1960119231164559, "grad_norm": 0.19064119458198547, "learning_rate": 6.155185965056875e-05, "loss": 0.5643, "step": 5818 }, { "epoch": 1.1962174940898345, "grad_norm": 0.2007809430360794, "learning_rate": 6.15425713283672e-05, "loss": 0.5773, "step": 5819 }, { "epoch": 1.196423065063213, "grad_norm": 0.1933142989873886, "learning_rate": 6.153328219119048e-05, "loss": 0.5504, "step": 5820 }, { "epoch": 1.1966286360365916, "grad_norm": 0.16889862716197968, "learning_rate": 6.152399223949619e-05, "loss": 0.5338, "step": 5821 }, { "epoch": 1.1968342070099702, "grad_norm": 0.16849687695503235, "learning_rate": 6.151470147374206e-05, "loss": 0.5679, "step": 5822 }, { "epoch": 1.1970397779833488, "grad_norm": 0.19202522933483124, "learning_rate": 6.150540989438577e-05, "loss": 0.5656, "step": 5823 }, { "epoch": 1.1972453489567274, "grad_norm": 0.19393931329250336, "learning_rate": 6.149611750188508e-05, "loss": 0.5745, "step": 5824 }, { "epoch": 1.1974509199301058, "grad_norm": 0.15858381986618042, "learning_rate": 6.14868242966978e-05, "loss": 0.5202, "step": 5825 }, { "epoch": 1.1976564909034844, "grad_norm": 0.15841448307037354, "learning_rate": 6.147753027928173e-05, "loss": 0.5518, "step": 5826 }, { "epoch": 1.197862061876863, "grad_norm": 0.18990083038806915, "learning_rate": 6.146823545009475e-05, "loss": 0.5576, "step": 5827 }, { "epoch": 1.1980676328502415, "grad_norm": 0.1819765716791153, "learning_rate": 6.14589398095948e-05, "loss": 0.5608, "step": 5828 }, { "epoch": 1.1982732038236201, "grad_norm": 0.1861831545829773, "learning_rate": 6.144964335823981e-05, "loss": 0.5659, "step": 5829 }, { "epoch": 1.1984787747969987, "grad_norm": 0.18785440921783447, "learning_rate": 6.14403460964878e-05, "loss": 0.5752, "step": 5830 }, { "epoch": 1.1986843457703773, "grad_norm": 0.1981627196073532, "learning_rate": 6.143104802479673e-05, "loss": 0.578, "step": 5831 }, { "epoch": 1.198889916743756, "grad_norm": 0.19505171477794647, "learning_rate": 6.142174914362476e-05, "loss": 0.542, "step": 5832 }, { "epoch": 1.1990954877171343, "grad_norm": 0.1755106896162033, "learning_rate": 6.141244945342995e-05, "loss": 0.53, "step": 5833 }, { "epoch": 1.1993010586905128, "grad_norm": 0.1715668886899948, "learning_rate": 6.140314895467045e-05, "loss": 0.5479, "step": 5834 }, { "epoch": 1.1995066296638914, "grad_norm": 0.19255517423152924, "learning_rate": 6.13938476478045e-05, "loss": 0.5572, "step": 5835 }, { "epoch": 1.19971220063727, "grad_norm": 0.1867235153913498, "learning_rate": 6.13845455332903e-05, "loss": 0.5865, "step": 5836 }, { "epoch": 1.1999177716106486, "grad_norm": 0.18764084577560425, "learning_rate": 6.137524261158612e-05, "loss": 0.5437, "step": 5837 }, { "epoch": 1.2001233425840272, "grad_norm": 0.20819789171218872, "learning_rate": 6.136593888315025e-05, "loss": 0.5891, "step": 5838 }, { "epoch": 1.2003289135574058, "grad_norm": 0.1949729472398758, "learning_rate": 6.13566343484411e-05, "loss": 0.5662, "step": 5839 }, { "epoch": 1.2005344845307842, "grad_norm": 0.18804004788398743, "learning_rate": 6.1347329007917e-05, "loss": 0.5601, "step": 5840 }, { "epoch": 1.2007400555041627, "grad_norm": 0.18714557588100433, "learning_rate": 6.133802286203642e-05, "loss": 0.5637, "step": 5841 }, { "epoch": 1.2009456264775413, "grad_norm": 0.19639329612255096, "learning_rate": 6.132871591125781e-05, "loss": 0.5698, "step": 5842 }, { "epoch": 1.20115119745092, "grad_norm": 0.20430424809455872, "learning_rate": 6.131940815603969e-05, "loss": 0.5739, "step": 5843 }, { "epoch": 1.2013567684242985, "grad_norm": 0.19093136489391327, "learning_rate": 6.13100995968406e-05, "loss": 0.5455, "step": 5844 }, { "epoch": 1.201562339397677, "grad_norm": 0.1929858773946762, "learning_rate": 6.130079023411915e-05, "loss": 0.5741, "step": 5845 }, { "epoch": 1.2017679103710557, "grad_norm": 0.19032742083072662, "learning_rate": 6.129148006833394e-05, "loss": 0.5586, "step": 5846 }, { "epoch": 1.2019734813444343, "grad_norm": 0.19212977588176727, "learning_rate": 6.128216909994367e-05, "loss": 0.5655, "step": 5847 }, { "epoch": 1.2021790523178126, "grad_norm": 0.19061528146266937, "learning_rate": 6.127285732940702e-05, "loss": 0.5499, "step": 5848 }, { "epoch": 1.2023846232911912, "grad_norm": 0.19122721254825592, "learning_rate": 6.126354475718275e-05, "loss": 0.5456, "step": 5849 }, { "epoch": 1.2025901942645698, "grad_norm": 0.17146308720111847, "learning_rate": 6.125423138372965e-05, "loss": 0.5346, "step": 5850 }, { "epoch": 1.2027957652379484, "grad_norm": 0.1573454737663269, "learning_rate": 6.124491720950655e-05, "loss": 0.5312, "step": 5851 }, { "epoch": 1.203001336211327, "grad_norm": 0.16374094784259796, "learning_rate": 6.123560223497228e-05, "loss": 0.5587, "step": 5852 }, { "epoch": 1.2032069071847056, "grad_norm": 0.18009409308433533, "learning_rate": 6.12262864605858e-05, "loss": 0.5452, "step": 5853 }, { "epoch": 1.2034124781580842, "grad_norm": 0.17497576773166656, "learning_rate": 6.1216969886806e-05, "loss": 0.5535, "step": 5854 }, { "epoch": 1.2036180491314625, "grad_norm": 0.2043164074420929, "learning_rate": 6.120765251409191e-05, "loss": 0.591, "step": 5855 }, { "epoch": 1.2038236201048411, "grad_norm": 0.1914680004119873, "learning_rate": 6.119833434290255e-05, "loss": 0.5526, "step": 5856 }, { "epoch": 1.2040291910782197, "grad_norm": 0.1849730759859085, "learning_rate": 6.118901537369694e-05, "loss": 0.5739, "step": 5857 }, { "epoch": 1.2042347620515983, "grad_norm": 0.1906820684671402, "learning_rate": 6.117969560693423e-05, "loss": 0.5544, "step": 5858 }, { "epoch": 1.2044403330249769, "grad_norm": 0.19102442264556885, "learning_rate": 6.117037504307351e-05, "loss": 0.5478, "step": 5859 }, { "epoch": 1.2046459039983555, "grad_norm": 0.1686401218175888, "learning_rate": 6.116105368257403e-05, "loss": 0.5448, "step": 5860 }, { "epoch": 1.204851474971734, "grad_norm": 0.13795730471611023, "learning_rate": 6.115173152589495e-05, "loss": 0.5262, "step": 5861 }, { "epoch": 1.2050570459451126, "grad_norm": 0.164164200425148, "learning_rate": 6.114240857349556e-05, "loss": 0.5684, "step": 5862 }, { "epoch": 1.205262616918491, "grad_norm": 0.19996531307697296, "learning_rate": 6.113308482583514e-05, "loss": 0.5608, "step": 5863 }, { "epoch": 1.2054681878918696, "grad_norm": 0.19715693593025208, "learning_rate": 6.112376028337305e-05, "loss": 0.566, "step": 5864 }, { "epoch": 1.2056737588652482, "grad_norm": 0.1752108633518219, "learning_rate": 6.111443494656864e-05, "loss": 0.5366, "step": 5865 }, { "epoch": 1.2058793298386268, "grad_norm": 0.16722378134727478, "learning_rate": 6.110510881588135e-05, "loss": 0.5602, "step": 5866 }, { "epoch": 1.2060849008120054, "grad_norm": 0.18732362985610962, "learning_rate": 6.10957818917706e-05, "loss": 0.5498, "step": 5867 }, { "epoch": 1.206290471785384, "grad_norm": 0.1660609394311905, "learning_rate": 6.108645417469593e-05, "loss": 0.5257, "step": 5868 }, { "epoch": 1.2064960427587625, "grad_norm": 0.1357351690530777, "learning_rate": 6.107712566511685e-05, "loss": 0.5126, "step": 5869 }, { "epoch": 1.206701613732141, "grad_norm": 0.1652655005455017, "learning_rate": 6.106779636349292e-05, "loss": 0.5602, "step": 5870 }, { "epoch": 1.2069071847055195, "grad_norm": 0.20981089770793915, "learning_rate": 6.105846627028379e-05, "loss": 0.5616, "step": 5871 }, { "epoch": 1.207112755678898, "grad_norm": 0.19564464688301086, "learning_rate": 6.104913538594905e-05, "loss": 0.5609, "step": 5872 }, { "epoch": 1.2073183266522767, "grad_norm": 0.19752687215805054, "learning_rate": 6.103980371094844e-05, "loss": 0.5766, "step": 5873 }, { "epoch": 1.2075238976256553, "grad_norm": 0.20465241372585297, "learning_rate": 6.103047124574167e-05, "loss": 0.5877, "step": 5874 }, { "epoch": 1.2077294685990339, "grad_norm": 0.19926784932613373, "learning_rate": 6.102113799078851e-05, "loss": 0.5558, "step": 5875 }, { "epoch": 1.2079350395724124, "grad_norm": 0.1923745572566986, "learning_rate": 6.1011803946548774e-05, "loss": 0.5595, "step": 5876 }, { "epoch": 1.208140610545791, "grad_norm": 0.16840709745883942, "learning_rate": 6.100246911348227e-05, "loss": 0.5261, "step": 5877 }, { "epoch": 1.2083461815191696, "grad_norm": 0.16660816967487335, "learning_rate": 6.099313349204893e-05, "loss": 0.5633, "step": 5878 }, { "epoch": 1.208551752492548, "grad_norm": 0.1967456340789795, "learning_rate": 6.098379708270863e-05, "loss": 0.5616, "step": 5879 }, { "epoch": 1.2087573234659266, "grad_norm": 0.19242748618125916, "learning_rate": 6.097445988592138e-05, "loss": 0.5474, "step": 5880 }, { "epoch": 1.2089628944393052, "grad_norm": 0.2012694627046585, "learning_rate": 6.096512190214715e-05, "loss": 0.5508, "step": 5881 }, { "epoch": 1.2091684654126837, "grad_norm": 0.1632763296365738, "learning_rate": 6.0955783131845994e-05, "loss": 0.5535, "step": 5882 }, { "epoch": 1.2093740363860623, "grad_norm": 0.16215071082115173, "learning_rate": 6.094644357547796e-05, "loss": 0.5579, "step": 5883 }, { "epoch": 1.209579607359441, "grad_norm": 0.19483166933059692, "learning_rate": 6.09371032335032e-05, "loss": 0.5576, "step": 5884 }, { "epoch": 1.2097851783328193, "grad_norm": 0.18877603113651276, "learning_rate": 6.092776210638185e-05, "loss": 0.5426, "step": 5885 }, { "epoch": 1.2099907493061979, "grad_norm": 0.1930856853723526, "learning_rate": 6.0918420194574104e-05, "loss": 0.5597, "step": 5886 }, { "epoch": 1.2101963202795765, "grad_norm": 0.1913139820098877, "learning_rate": 6.0909077498540194e-05, "loss": 0.5747, "step": 5887 }, { "epoch": 1.210401891252955, "grad_norm": 0.16376695036888123, "learning_rate": 6.0899734018740396e-05, "loss": 0.502, "step": 5888 }, { "epoch": 1.2106074622263336, "grad_norm": 0.15658964216709137, "learning_rate": 6.0890389755635035e-05, "loss": 0.5453, "step": 5889 }, { "epoch": 1.2108130331997122, "grad_norm": 0.1946595311164856, "learning_rate": 6.088104470968441e-05, "loss": 0.5533, "step": 5890 }, { "epoch": 1.2110186041730908, "grad_norm": 0.19284933805465698, "learning_rate": 6.0871698881348966e-05, "loss": 0.5385, "step": 5891 }, { "epoch": 1.2112241751464694, "grad_norm": 0.19203589856624603, "learning_rate": 6.0862352271089104e-05, "loss": 0.5533, "step": 5892 }, { "epoch": 1.211429746119848, "grad_norm": 0.19579070806503296, "learning_rate": 6.0853004879365265e-05, "loss": 0.5648, "step": 5893 }, { "epoch": 1.2116353170932264, "grad_norm": 0.19746367633342743, "learning_rate": 6.084365670663799e-05, "loss": 0.5473, "step": 5894 }, { "epoch": 1.211840888066605, "grad_norm": 0.199397012591362, "learning_rate": 6.08343077533678e-05, "loss": 0.5522, "step": 5895 }, { "epoch": 1.2120464590399835, "grad_norm": 0.16631294786930084, "learning_rate": 6.082495802001527e-05, "loss": 0.5414, "step": 5896 }, { "epoch": 1.2122520300133621, "grad_norm": 0.15855452418327332, "learning_rate": 6.0815607507041024e-05, "loss": 0.5403, "step": 5897 }, { "epoch": 1.2124576009867407, "grad_norm": 0.196935772895813, "learning_rate": 6.08062562149057e-05, "loss": 0.5665, "step": 5898 }, { "epoch": 1.2126631719601193, "grad_norm": 0.19539684057235718, "learning_rate": 6.079690414407004e-05, "loss": 0.5524, "step": 5899 }, { "epoch": 1.2128687429334977, "grad_norm": 0.19079557061195374, "learning_rate": 6.078755129499475e-05, "loss": 0.5628, "step": 5900 }, { "epoch": 1.2130743139068763, "grad_norm": 0.19366958737373352, "learning_rate": 6.077819766814058e-05, "loss": 0.5889, "step": 5901 }, { "epoch": 1.2132798848802548, "grad_norm": 0.19458188116550446, "learning_rate": 6.076884326396837e-05, "loss": 0.571, "step": 5902 }, { "epoch": 1.2134854558536334, "grad_norm": 0.16850589215755463, "learning_rate": 6.075948808293894e-05, "loss": 0.5335, "step": 5903 }, { "epoch": 1.213691026827012, "grad_norm": 0.16787506639957428, "learning_rate": 6.075013212551321e-05, "loss": 0.5353, "step": 5904 }, { "epoch": 1.2138965978003906, "grad_norm": 0.1945338398218155, "learning_rate": 6.074077539215208e-05, "loss": 0.5491, "step": 5905 }, { "epoch": 1.2141021687737692, "grad_norm": 0.19000251591205597, "learning_rate": 6.0731417883316524e-05, "loss": 0.5523, "step": 5906 }, { "epoch": 1.2143077397471478, "grad_norm": 0.18971100449562073, "learning_rate": 6.0722059599467525e-05, "loss": 0.5531, "step": 5907 }, { "epoch": 1.2145133107205264, "grad_norm": 0.16435407102108002, "learning_rate": 6.071270054106613e-05, "loss": 0.5286, "step": 5908 }, { "epoch": 1.2147188816939047, "grad_norm": 0.17342285811901093, "learning_rate": 6.070334070857343e-05, "loss": 0.5616, "step": 5909 }, { "epoch": 1.2149244526672833, "grad_norm": 0.19488383829593658, "learning_rate": 6.069398010245053e-05, "loss": 0.5584, "step": 5910 }, { "epoch": 1.215130023640662, "grad_norm": 0.1964189112186432, "learning_rate": 6.068461872315858e-05, "loss": 0.5744, "step": 5911 }, { "epoch": 1.2153355946140405, "grad_norm": 0.19528479874134064, "learning_rate": 6.067525657115879e-05, "loss": 0.557, "step": 5912 }, { "epoch": 1.215541165587419, "grad_norm": 0.19183097779750824, "learning_rate": 6.066589364691237e-05, "loss": 0.5591, "step": 5913 }, { "epoch": 1.2157467365607977, "grad_norm": 0.19744020700454712, "learning_rate": 6.065652995088058e-05, "loss": 0.5627, "step": 5914 }, { "epoch": 1.2159523075341763, "grad_norm": 0.18547560274600983, "learning_rate": 6.064716548352475e-05, "loss": 0.5539, "step": 5915 }, { "epoch": 1.2161578785075546, "grad_norm": 0.19087590277194977, "learning_rate": 6.063780024530621e-05, "loss": 0.5627, "step": 5916 }, { "epoch": 1.2163634494809332, "grad_norm": 0.19286733865737915, "learning_rate": 6.0628434236686325e-05, "loss": 0.5523, "step": 5917 }, { "epoch": 1.2165690204543118, "grad_norm": 0.1942092925310135, "learning_rate": 6.061906745812655e-05, "loss": 0.574, "step": 5918 }, { "epoch": 1.2167745914276904, "grad_norm": 0.19682841002941132, "learning_rate": 6.060969991008832e-05, "loss": 0.5768, "step": 5919 }, { "epoch": 1.216980162401069, "grad_norm": 0.194288969039917, "learning_rate": 6.060033159303314e-05, "loss": 0.5704, "step": 5920 }, { "epoch": 1.2171857333744476, "grad_norm": 0.20371194183826447, "learning_rate": 6.059096250742252e-05, "loss": 0.5677, "step": 5921 }, { "epoch": 1.2173913043478262, "grad_norm": 0.20336924493312836, "learning_rate": 6.058159265371807e-05, "loss": 0.5228, "step": 5922 }, { "epoch": 1.2175968753212048, "grad_norm": 0.1702810525894165, "learning_rate": 6.0572222032381374e-05, "loss": 0.5534, "step": 5923 }, { "epoch": 1.2178024462945831, "grad_norm": 0.13445743918418884, "learning_rate": 6.056285064387407e-05, "loss": 0.5294, "step": 5924 }, { "epoch": 1.2180080172679617, "grad_norm": 0.12932245433330536, "learning_rate": 6.055347848865787e-05, "loss": 0.5243, "step": 5925 }, { "epoch": 1.2182135882413403, "grad_norm": 0.16721323132514954, "learning_rate": 6.054410556719448e-05, "loss": 0.5473, "step": 5926 }, { "epoch": 1.2184191592147189, "grad_norm": 0.2189573496580124, "learning_rate": 6.053473187994566e-05, "loss": 0.566, "step": 5927 }, { "epoch": 1.2186247301880975, "grad_norm": 0.19731007516384125, "learning_rate": 6.052535742737321e-05, "loss": 0.533, "step": 5928 }, { "epoch": 1.218830301161476, "grad_norm": 0.19551746547222137, "learning_rate": 6.051598220993896e-05, "loss": 0.5785, "step": 5929 }, { "epoch": 1.2190358721348546, "grad_norm": 0.2288779616355896, "learning_rate": 6.0506606228104784e-05, "loss": 0.5354, "step": 5930 }, { "epoch": 1.219241443108233, "grad_norm": 0.17528457939624786, "learning_rate": 6.0497229482332605e-05, "loss": 0.5383, "step": 5931 }, { "epoch": 1.2194470140816116, "grad_norm": 0.17240411043167114, "learning_rate": 6.0487851973084365e-05, "loss": 0.5693, "step": 5932 }, { "epoch": 1.2196525850549902, "grad_norm": 0.199370875954628, "learning_rate": 6.047847370082204e-05, "loss": 0.548, "step": 5933 }, { "epoch": 1.2198581560283688, "grad_norm": 0.20105613768100739, "learning_rate": 6.046909466600768e-05, "loss": 0.5604, "step": 5934 }, { "epoch": 1.2200637270017474, "grad_norm": 0.16920122504234314, "learning_rate": 6.0459714869103304e-05, "loss": 0.5377, "step": 5935 }, { "epoch": 1.220269297975126, "grad_norm": 0.17022979259490967, "learning_rate": 6.0450334310571046e-05, "loss": 0.556, "step": 5936 }, { "epoch": 1.2204748689485045, "grad_norm": 0.22041717171669006, "learning_rate": 6.044095299087304e-05, "loss": 0.5874, "step": 5937 }, { "epoch": 1.2206804399218831, "grad_norm": 0.20872265100479126, "learning_rate": 6.0431570910471436e-05, "loss": 0.5687, "step": 5938 }, { "epoch": 1.2208860108952615, "grad_norm": 0.18911628425121307, "learning_rate": 6.042218806982847e-05, "loss": 0.5712, "step": 5939 }, { "epoch": 1.22109158186864, "grad_norm": 0.19167855381965637, "learning_rate": 6.0412804469406384e-05, "loss": 0.5601, "step": 5940 }, { "epoch": 1.2212971528420187, "grad_norm": 0.19254928827285767, "learning_rate": 6.040342010966745e-05, "loss": 0.5746, "step": 5941 }, { "epoch": 1.2215027238153973, "grad_norm": 0.19120313227176666, "learning_rate": 6.0394034991073994e-05, "loss": 0.5502, "step": 5942 }, { "epoch": 1.2217082947887759, "grad_norm": 0.1880388706922531, "learning_rate": 6.038464911408841e-05, "loss": 0.5629, "step": 5943 }, { "epoch": 1.2219138657621544, "grad_norm": 0.19094626605510712, "learning_rate": 6.0375262479173064e-05, "loss": 0.5742, "step": 5944 }, { "epoch": 1.222119436735533, "grad_norm": 0.19934087991714478, "learning_rate": 6.0365875086790386e-05, "loss": 0.6047, "step": 5945 }, { "epoch": 1.2223250077089114, "grad_norm": 0.16785962879657745, "learning_rate": 6.035648693740287e-05, "loss": 0.5404, "step": 5946 }, { "epoch": 1.22253057868229, "grad_norm": 0.160533607006073, "learning_rate": 6.0347098031473025e-05, "loss": 0.5391, "step": 5947 }, { "epoch": 1.2227361496556686, "grad_norm": 0.201270192861557, "learning_rate": 6.033770836946339e-05, "loss": 0.5811, "step": 5948 }, { "epoch": 1.2229417206290472, "grad_norm": 0.1920137256383896, "learning_rate": 6.0328317951836554e-05, "loss": 0.5595, "step": 5949 }, { "epoch": 1.2231472916024257, "grad_norm": 0.19600927829742432, "learning_rate": 6.031892677905513e-05, "loss": 0.5679, "step": 5950 }, { "epoch": 1.2233528625758043, "grad_norm": 0.19393356144428253, "learning_rate": 6.030953485158178e-05, "loss": 0.5586, "step": 5951 }, { "epoch": 1.223558433549183, "grad_norm": 0.19558121263980865, "learning_rate": 6.030014216987922e-05, "loss": 0.5584, "step": 5952 }, { "epoch": 1.2237640045225615, "grad_norm": 0.1591499000787735, "learning_rate": 6.029074873441015e-05, "loss": 0.512, "step": 5953 }, { "epoch": 1.2239695754959399, "grad_norm": 0.1601012945175171, "learning_rate": 6.028135454563737e-05, "loss": 0.5482, "step": 5954 }, { "epoch": 1.2241751464693185, "grad_norm": 0.1917879432439804, "learning_rate": 6.027195960402367e-05, "loss": 0.5619, "step": 5955 }, { "epoch": 1.224380717442697, "grad_norm": 0.16363351047039032, "learning_rate": 6.026256391003192e-05, "loss": 0.5272, "step": 5956 }, { "epoch": 1.2245862884160756, "grad_norm": 0.1613667905330658, "learning_rate": 6.0253167464124965e-05, "loss": 0.5448, "step": 5957 }, { "epoch": 1.2247918593894542, "grad_norm": 0.19327108561992645, "learning_rate": 6.0243770266765754e-05, "loss": 0.5631, "step": 5958 }, { "epoch": 1.2249974303628328, "grad_norm": 0.20113897323608398, "learning_rate": 6.023437231841721e-05, "loss": 0.5433, "step": 5959 }, { "epoch": 1.2252030013362114, "grad_norm": 0.19953328371047974, "learning_rate": 6.022497361954237e-05, "loss": 0.5555, "step": 5960 }, { "epoch": 1.2254085723095898, "grad_norm": 0.16104325652122498, "learning_rate": 6.021557417060423e-05, "loss": 0.5269, "step": 5961 }, { "epoch": 1.2256141432829684, "grad_norm": 0.16105084121227264, "learning_rate": 6.0206173972065865e-05, "loss": 0.5649, "step": 5962 }, { "epoch": 1.225819714256347, "grad_norm": 0.1889335662126541, "learning_rate": 6.0196773024390374e-05, "loss": 0.5536, "step": 5963 }, { "epoch": 1.2260252852297255, "grad_norm": 0.19481204450130463, "learning_rate": 6.018737132804093e-05, "loss": 0.5673, "step": 5964 }, { "epoch": 1.2262308562031041, "grad_norm": 0.16492706537246704, "learning_rate": 6.017796888348068e-05, "loss": 0.548, "step": 5965 }, { "epoch": 1.2264364271764827, "grad_norm": 0.1624189019203186, "learning_rate": 6.016856569117283e-05, "loss": 0.5659, "step": 5966 }, { "epoch": 1.2266419981498613, "grad_norm": 0.19174005091190338, "learning_rate": 6.015916175158066e-05, "loss": 0.5483, "step": 5967 }, { "epoch": 1.22684756912324, "grad_norm": 0.19172148406505585, "learning_rate": 6.014975706516744e-05, "loss": 0.5629, "step": 5968 }, { "epoch": 1.2270531400966185, "grad_norm": 0.20126576721668243, "learning_rate": 6.014035163239649e-05, "loss": 0.5609, "step": 5969 }, { "epoch": 1.2272587110699968, "grad_norm": 0.19356362521648407, "learning_rate": 6.0130945453731196e-05, "loss": 0.557, "step": 5970 }, { "epoch": 1.2274642820433754, "grad_norm": 0.19379346072673798, "learning_rate": 6.012153852963494e-05, "loss": 0.5644, "step": 5971 }, { "epoch": 1.227669853016754, "grad_norm": 0.18843898177146912, "learning_rate": 6.011213086057114e-05, "loss": 0.5655, "step": 5972 }, { "epoch": 1.2278754239901326, "grad_norm": 0.1895827353000641, "learning_rate": 6.010272244700331e-05, "loss": 0.5324, "step": 5973 }, { "epoch": 1.2280809949635112, "grad_norm": 0.19657573103904724, "learning_rate": 6.009331328939492e-05, "loss": 0.5604, "step": 5974 }, { "epoch": 1.2282865659368898, "grad_norm": 0.1885729730129242, "learning_rate": 6.0083903388209536e-05, "loss": 0.5601, "step": 5975 }, { "epoch": 1.2284921369102682, "grad_norm": 0.16260753571987152, "learning_rate": 6.007449274391073e-05, "loss": 0.5245, "step": 5976 }, { "epoch": 1.2286977078836467, "grad_norm": 0.13464370369911194, "learning_rate": 6.0065081356962124e-05, "loss": 0.5164, "step": 5977 }, { "epoch": 1.2289032788570253, "grad_norm": 0.17227724194526672, "learning_rate": 6.0055669227827384e-05, "loss": 0.5848, "step": 5978 }, { "epoch": 1.229108849830404, "grad_norm": 0.19165630638599396, "learning_rate": 6.0046256356970185e-05, "loss": 0.5713, "step": 5979 }, { "epoch": 1.2293144208037825, "grad_norm": 0.191480353474617, "learning_rate": 6.003684274485426e-05, "loss": 0.5564, "step": 5980 }, { "epoch": 1.229519991777161, "grad_norm": 0.19356124103069305, "learning_rate": 6.002742839194338e-05, "loss": 0.5711, "step": 5981 }, { "epoch": 1.2297255627505397, "grad_norm": 0.18836161494255066, "learning_rate": 6.001801329870134e-05, "loss": 0.551, "step": 5982 }, { "epoch": 1.2299311337239183, "grad_norm": 0.18804924190044403, "learning_rate": 6.0008597465591966e-05, "loss": 0.5641, "step": 5983 }, { "epoch": 1.2301367046972969, "grad_norm": 0.20674586296081543, "learning_rate": 5.999918089307915e-05, "loss": 0.5664, "step": 5984 }, { "epoch": 1.2303422756706752, "grad_norm": 0.1936078518629074, "learning_rate": 5.9989763581626806e-05, "loss": 0.552, "step": 5985 }, { "epoch": 1.2305478466440538, "grad_norm": 0.19843873381614685, "learning_rate": 5.998034553169886e-05, "loss": 0.562, "step": 5986 }, { "epoch": 1.2307534176174324, "grad_norm": 0.18645739555358887, "learning_rate": 5.997092674375932e-05, "loss": 0.5424, "step": 5987 }, { "epoch": 1.230958988590811, "grad_norm": 0.18855836987495422, "learning_rate": 5.9961507218272196e-05, "loss": 0.5706, "step": 5988 }, { "epoch": 1.2311645595641896, "grad_norm": 0.18944047391414642, "learning_rate": 5.9952086955701535e-05, "loss": 0.5564, "step": 5989 }, { "epoch": 1.2313701305375682, "grad_norm": 0.1880870759487152, "learning_rate": 5.994266595651143e-05, "loss": 0.5662, "step": 5990 }, { "epoch": 1.2315757015109465, "grad_norm": 0.19140774011611938, "learning_rate": 5.993324422116602e-05, "loss": 0.5469, "step": 5991 }, { "epoch": 1.2317812724843251, "grad_norm": 0.1923801451921463, "learning_rate": 5.9923821750129466e-05, "loss": 0.5715, "step": 5992 }, { "epoch": 1.2319868434577037, "grad_norm": 0.18575525283813477, "learning_rate": 5.991439854386597e-05, "loss": 0.5325, "step": 5993 }, { "epoch": 1.2321924144310823, "grad_norm": 0.19030645489692688, "learning_rate": 5.9904974602839764e-05, "loss": 0.5366, "step": 5994 }, { "epoch": 1.2323979854044609, "grad_norm": 0.19156965613365173, "learning_rate": 5.9895549927515114e-05, "loss": 0.5741, "step": 5995 }, { "epoch": 1.2326035563778395, "grad_norm": 0.1905066967010498, "learning_rate": 5.988612451835636e-05, "loss": 0.5452, "step": 5996 }, { "epoch": 1.232809127351218, "grad_norm": 0.18837079405784607, "learning_rate": 5.987669837582782e-05, "loss": 0.5644, "step": 5997 }, { "epoch": 1.2330146983245966, "grad_norm": 0.1969577670097351, "learning_rate": 5.9867271500393884e-05, "loss": 0.5653, "step": 5998 }, { "epoch": 1.2332202692979752, "grad_norm": 0.1714939922094345, "learning_rate": 5.9857843892518975e-05, "loss": 0.5255, "step": 5999 }, { "epoch": 1.2334258402713536, "grad_norm": 0.16838547587394714, "learning_rate": 5.984841555266753e-05, "loss": 0.5574, "step": 6000 }, { "epoch": 1.2336314112447322, "grad_norm": 0.18724249303340912, "learning_rate": 5.983898648130407e-05, "loss": 0.5286, "step": 6001 }, { "epoch": 1.2338369822181108, "grad_norm": 0.1969245970249176, "learning_rate": 5.98295566788931e-05, "loss": 0.5673, "step": 6002 }, { "epoch": 1.2340425531914894, "grad_norm": 0.1898987591266632, "learning_rate": 5.982012614589917e-05, "loss": 0.5545, "step": 6003 }, { "epoch": 1.234248124164868, "grad_norm": 0.1573200672864914, "learning_rate": 5.9810694882786916e-05, "loss": 0.5205, "step": 6004 }, { "epoch": 1.2344536951382465, "grad_norm": 0.1741228699684143, "learning_rate": 5.9801262890020935e-05, "loss": 0.567, "step": 6005 }, { "epoch": 1.2346592661116251, "grad_norm": 0.19393646717071533, "learning_rate": 5.9791830168065914e-05, "loss": 0.5476, "step": 6006 }, { "epoch": 1.2348648370850035, "grad_norm": 0.19462937116622925, "learning_rate": 5.978239671738655e-05, "loss": 0.5361, "step": 6007 }, { "epoch": 1.235070408058382, "grad_norm": 0.18887047469615936, "learning_rate": 5.9772962538447604e-05, "loss": 0.5682, "step": 6008 }, { "epoch": 1.2352759790317607, "grad_norm": 0.19533561170101166, "learning_rate": 5.976352763171385e-05, "loss": 0.5776, "step": 6009 }, { "epoch": 1.2354815500051393, "grad_norm": 0.2016497403383255, "learning_rate": 5.975409199765008e-05, "loss": 0.5768, "step": 6010 }, { "epoch": 1.2356871209785179, "grad_norm": 0.19525597989559174, "learning_rate": 5.9744655636721166e-05, "loss": 0.5774, "step": 6011 }, { "epoch": 1.2358926919518964, "grad_norm": 0.19392353296279907, "learning_rate": 5.973521854939198e-05, "loss": 0.5451, "step": 6012 }, { "epoch": 1.236098262925275, "grad_norm": 0.1947338730096817, "learning_rate": 5.9725780736127456e-05, "loss": 0.5697, "step": 6013 }, { "epoch": 1.2363038338986536, "grad_norm": 0.20187315344810486, "learning_rate": 5.971634219739253e-05, "loss": 0.5441, "step": 6014 }, { "epoch": 1.236509404872032, "grad_norm": 0.1915546953678131, "learning_rate": 5.970690293365222e-05, "loss": 0.5692, "step": 6015 }, { "epoch": 1.2367149758454106, "grad_norm": 0.18739596009254456, "learning_rate": 5.969746294537153e-05, "loss": 0.5582, "step": 6016 }, { "epoch": 1.2369205468187892, "grad_norm": 0.18742164969444275, "learning_rate": 5.968802223301554e-05, "loss": 0.5538, "step": 6017 }, { "epoch": 1.2371261177921677, "grad_norm": 0.18883053958415985, "learning_rate": 5.967858079704935e-05, "loss": 0.5569, "step": 6018 }, { "epoch": 1.2373316887655463, "grad_norm": 0.1861804723739624, "learning_rate": 5.966913863793809e-05, "loss": 0.5506, "step": 6019 }, { "epoch": 1.237537259738925, "grad_norm": 0.1672678142786026, "learning_rate": 5.965969575614694e-05, "loss": 0.5207, "step": 6020 }, { "epoch": 1.2377428307123035, "grad_norm": 0.1628050059080124, "learning_rate": 5.965025215214109e-05, "loss": 0.564, "step": 6021 }, { "epoch": 1.2379484016856819, "grad_norm": 0.16974832117557526, "learning_rate": 5.964080782638579e-05, "loss": 0.5396, "step": 6022 }, { "epoch": 1.2381539726590605, "grad_norm": 0.1564965546131134, "learning_rate": 5.963136277934634e-05, "loss": 0.5456, "step": 6023 }, { "epoch": 1.238359543632439, "grad_norm": 0.19115638732910156, "learning_rate": 5.962191701148801e-05, "loss": 0.5821, "step": 6024 }, { "epoch": 1.2385651146058176, "grad_norm": 0.1846878081560135, "learning_rate": 5.9612470523276176e-05, "loss": 0.5708, "step": 6025 }, { "epoch": 1.2387706855791962, "grad_norm": 0.1887466162443161, "learning_rate": 5.9603023315176224e-05, "loss": 0.5633, "step": 6026 }, { "epoch": 1.2389762565525748, "grad_norm": 0.1877734214067459, "learning_rate": 5.959357538765356e-05, "loss": 0.5343, "step": 6027 }, { "epoch": 1.2391818275259534, "grad_norm": 0.1928664743900299, "learning_rate": 5.958412674117365e-05, "loss": 0.553, "step": 6028 }, { "epoch": 1.239387398499332, "grad_norm": 0.19139814376831055, "learning_rate": 5.957467737620199e-05, "loss": 0.5586, "step": 6029 }, { "epoch": 1.2395929694727104, "grad_norm": 0.18959654867649078, "learning_rate": 5.9565227293204084e-05, "loss": 0.5756, "step": 6030 }, { "epoch": 1.239798540446089, "grad_norm": 0.17210416495800018, "learning_rate": 5.9555776492645513e-05, "loss": 0.5649, "step": 6031 }, { "epoch": 1.2400041114194675, "grad_norm": 0.160491481423378, "learning_rate": 5.954632497499187e-05, "loss": 0.5464, "step": 6032 }, { "epoch": 1.2402096823928461, "grad_norm": 0.19676798582077026, "learning_rate": 5.9536872740708777e-05, "loss": 0.5877, "step": 6033 }, { "epoch": 1.2404152533662247, "grad_norm": 0.20140545070171356, "learning_rate": 5.952741979026192e-05, "loss": 0.5762, "step": 6034 }, { "epoch": 1.2406208243396033, "grad_norm": 0.19546420872211456, "learning_rate": 5.951796612411698e-05, "loss": 0.5576, "step": 6035 }, { "epoch": 1.240826395312982, "grad_norm": 0.16486842930316925, "learning_rate": 5.9508511742739716e-05, "loss": 0.5115, "step": 6036 }, { "epoch": 1.2410319662863603, "grad_norm": 0.13164182007312775, "learning_rate": 5.94990566465959e-05, "loss": 0.5294, "step": 6037 }, { "epoch": 1.2412375372597388, "grad_norm": 0.15759903192520142, "learning_rate": 5.9489600836151305e-05, "loss": 0.5432, "step": 6038 }, { "epoch": 1.2414431082331174, "grad_norm": 0.2032260000705719, "learning_rate": 5.948014431187181e-05, "loss": 0.5613, "step": 6039 }, { "epoch": 1.241648679206496, "grad_norm": 0.19559217989444733, "learning_rate": 5.947068707422329e-05, "loss": 0.5402, "step": 6040 }, { "epoch": 1.2418542501798746, "grad_norm": 0.19073714315891266, "learning_rate": 5.9461229123671654e-05, "loss": 0.534, "step": 6041 }, { "epoch": 1.2420598211532532, "grad_norm": 0.1976533830165863, "learning_rate": 5.9451770460682846e-05, "loss": 0.5591, "step": 6042 }, { "epoch": 1.2422653921266318, "grad_norm": 0.2046486884355545, "learning_rate": 5.944231108572287e-05, "loss": 0.5668, "step": 6043 }, { "epoch": 1.2424709631000104, "grad_norm": 0.19867998361587524, "learning_rate": 5.9432850999257705e-05, "loss": 0.5453, "step": 6044 }, { "epoch": 1.242676534073389, "grad_norm": 0.18936549127101898, "learning_rate": 5.9423390201753446e-05, "loss": 0.5649, "step": 6045 }, { "epoch": 1.2428821050467673, "grad_norm": 0.19626031816005707, "learning_rate": 5.941392869367616e-05, "loss": 0.5673, "step": 6046 }, { "epoch": 1.243087676020146, "grad_norm": 0.19594736397266388, "learning_rate": 5.9404466475492e-05, "loss": 0.5673, "step": 6047 }, { "epoch": 1.2432932469935245, "grad_norm": 0.19246500730514526, "learning_rate": 5.939500354766707e-05, "loss": 0.5708, "step": 6048 }, { "epoch": 1.243498817966903, "grad_norm": 0.18370835483074188, "learning_rate": 5.9385539910667615e-05, "loss": 0.5339, "step": 6049 }, { "epoch": 1.2437043889402817, "grad_norm": 0.1910664439201355, "learning_rate": 5.9376075564959836e-05, "loss": 0.5801, "step": 6050 }, { "epoch": 1.2439099599136603, "grad_norm": 0.19655410945415497, "learning_rate": 5.936661051101002e-05, "loss": 0.5389, "step": 6051 }, { "epoch": 1.2441155308870386, "grad_norm": 0.23548901081085205, "learning_rate": 5.9357144749284446e-05, "loss": 0.5509, "step": 6052 }, { "epoch": 1.2443211018604172, "grad_norm": 0.1724226176738739, "learning_rate": 5.934767828024946e-05, "loss": 0.5405, "step": 6053 }, { "epoch": 1.2445266728337958, "grad_norm": 0.16652943193912506, "learning_rate": 5.9338211104371424e-05, "loss": 0.5401, "step": 6054 }, { "epoch": 1.2447322438071744, "grad_norm": 0.20364424586296082, "learning_rate": 5.932874322211674e-05, "loss": 0.5624, "step": 6055 }, { "epoch": 1.244937814780553, "grad_norm": 0.1893276572227478, "learning_rate": 5.931927463395186e-05, "loss": 0.541, "step": 6056 }, { "epoch": 1.2451433857539316, "grad_norm": 0.1932743936777115, "learning_rate": 5.930980534034323e-05, "loss": 0.5789, "step": 6057 }, { "epoch": 1.2453489567273102, "grad_norm": 0.192164346575737, "learning_rate": 5.930033534175739e-05, "loss": 0.5711, "step": 6058 }, { "epoch": 1.2455545277006888, "grad_norm": 0.18755845725536346, "learning_rate": 5.9290864638660864e-05, "loss": 0.5503, "step": 6059 }, { "epoch": 1.2457600986740673, "grad_norm": 0.19044922292232513, "learning_rate": 5.928139323152022e-05, "loss": 0.5441, "step": 6060 }, { "epoch": 1.2459656696474457, "grad_norm": 0.16590002179145813, "learning_rate": 5.9271921120802106e-05, "loss": 0.5255, "step": 6061 }, { "epoch": 1.2461712406208243, "grad_norm": 0.16867230832576752, "learning_rate": 5.926244830697312e-05, "loss": 0.5825, "step": 6062 }, { "epoch": 1.2463768115942029, "grad_norm": 0.20571991801261902, "learning_rate": 5.925297479049999e-05, "loss": 0.552, "step": 6063 }, { "epoch": 1.2465823825675815, "grad_norm": 0.20340660214424133, "learning_rate": 5.92435005718494e-05, "loss": 0.5572, "step": 6064 }, { "epoch": 1.24678795354096, "grad_norm": 0.19198235869407654, "learning_rate": 5.923402565148811e-05, "loss": 0.5569, "step": 6065 }, { "epoch": 1.2469935245143386, "grad_norm": 0.1904488056898117, "learning_rate": 5.92245500298829e-05, "loss": 0.5641, "step": 6066 }, { "epoch": 1.247199095487717, "grad_norm": 0.1928306370973587, "learning_rate": 5.921507370750061e-05, "loss": 0.5613, "step": 6067 }, { "epoch": 1.2474046664610956, "grad_norm": 0.18856725096702576, "learning_rate": 5.920559668480808e-05, "loss": 0.5478, "step": 6068 }, { "epoch": 1.2476102374344742, "grad_norm": 0.19025270640850067, "learning_rate": 5.919611896227218e-05, "loss": 0.553, "step": 6069 }, { "epoch": 1.2478158084078528, "grad_norm": 0.18751074373722076, "learning_rate": 5.918664054035987e-05, "loss": 0.5571, "step": 6070 }, { "epoch": 1.2480213793812314, "grad_norm": 0.18929120898246765, "learning_rate": 5.917716141953807e-05, "loss": 0.5674, "step": 6071 }, { "epoch": 1.24822695035461, "grad_norm": 0.19729354977607727, "learning_rate": 5.916768160027381e-05, "loss": 0.5493, "step": 6072 }, { "epoch": 1.2484325213279885, "grad_norm": 0.1939440220594406, "learning_rate": 5.9158201083034086e-05, "loss": 0.5617, "step": 6073 }, { "epoch": 1.2486380923013671, "grad_norm": 0.19020439684391022, "learning_rate": 5.914871986828596e-05, "loss": 0.551, "step": 6074 }, { "epoch": 1.2488436632747457, "grad_norm": 0.19423425197601318, "learning_rate": 5.913923795649656e-05, "loss": 0.5513, "step": 6075 }, { "epoch": 1.249049234248124, "grad_norm": 0.1902787834405899, "learning_rate": 5.912975534813298e-05, "loss": 0.5467, "step": 6076 }, { "epoch": 1.2492548052215027, "grad_norm": 0.16620683670043945, "learning_rate": 5.91202720436624e-05, "loss": 0.5262, "step": 6077 }, { "epoch": 1.2494603761948813, "grad_norm": 0.15968933701515198, "learning_rate": 5.911078804355202e-05, "loss": 0.5616, "step": 6078 }, { "epoch": 1.2496659471682598, "grad_norm": 0.19238422811031342, "learning_rate": 5.910130334826906e-05, "loss": 0.5515, "step": 6079 }, { "epoch": 1.2498715181416384, "grad_norm": 0.19091928005218506, "learning_rate": 5.9091817958280786e-05, "loss": 0.5648, "step": 6080 }, { "epoch": 1.250077089115017, "grad_norm": 0.19049179553985596, "learning_rate": 5.908233187405452e-05, "loss": 0.55, "step": 6081 }, { "epoch": 1.2502826600883954, "grad_norm": 0.19400426745414734, "learning_rate": 5.907284509605757e-05, "loss": 0.5554, "step": 6082 }, { "epoch": 1.250488231061774, "grad_norm": 0.19264687597751617, "learning_rate": 5.9063357624757316e-05, "loss": 0.5693, "step": 6083 }, { "epoch": 1.2506938020351526, "grad_norm": 0.1882631927728653, "learning_rate": 5.905386946062118e-05, "loss": 0.5509, "step": 6084 }, { "epoch": 1.2508993730085312, "grad_norm": 0.1930553913116455, "learning_rate": 5.9044380604116575e-05, "loss": 0.5667, "step": 6085 }, { "epoch": 1.2511049439819097, "grad_norm": 0.19695702195167542, "learning_rate": 5.9034891055710985e-05, "loss": 0.5592, "step": 6086 }, { "epoch": 1.2513105149552883, "grad_norm": 0.19834263622760773, "learning_rate": 5.90254008158719e-05, "loss": 0.5621, "step": 6087 }, { "epoch": 1.251516085928667, "grad_norm": 0.19930176436901093, "learning_rate": 5.9015909885066885e-05, "loss": 0.5845, "step": 6088 }, { "epoch": 1.2517216569020455, "grad_norm": 0.1929783821105957, "learning_rate": 5.90064182637635e-05, "loss": 0.5658, "step": 6089 }, { "epoch": 1.251927227875424, "grad_norm": 0.2053227424621582, "learning_rate": 5.899692595242934e-05, "loss": 0.559, "step": 6090 }, { "epoch": 1.2521327988488027, "grad_norm": 0.1878289431333542, "learning_rate": 5.898743295153208e-05, "loss": 0.5331, "step": 6091 }, { "epoch": 1.252338369822181, "grad_norm": 0.1905200332403183, "learning_rate": 5.897793926153935e-05, "loss": 0.5687, "step": 6092 }, { "epoch": 1.2525439407955596, "grad_norm": 0.16592474281787872, "learning_rate": 5.89684448829189e-05, "loss": 0.509, "step": 6093 }, { "epoch": 1.2527495117689382, "grad_norm": 0.15698356926441193, "learning_rate": 5.895894981613845e-05, "loss": 0.558, "step": 6094 }, { "epoch": 1.2529550827423168, "grad_norm": 0.19929586350917816, "learning_rate": 5.89494540616658e-05, "loss": 0.5595, "step": 6095 }, { "epoch": 1.2531606537156954, "grad_norm": 0.19312036037445068, "learning_rate": 5.893995761996875e-05, "loss": 0.5577, "step": 6096 }, { "epoch": 1.2533662246890738, "grad_norm": 0.19632984697818756, "learning_rate": 5.8930460491515125e-05, "loss": 0.5715, "step": 6097 }, { "epoch": 1.2535717956624524, "grad_norm": 0.1999562531709671, "learning_rate": 5.8920962676772836e-05, "loss": 0.5578, "step": 6098 }, { "epoch": 1.253777366635831, "grad_norm": 0.1987222284078598, "learning_rate": 5.891146417620978e-05, "loss": 0.5777, "step": 6099 }, { "epoch": 1.2539829376092095, "grad_norm": 0.17240692675113678, "learning_rate": 5.8901964990293894e-05, "loss": 0.546, "step": 6100 }, { "epoch": 1.2541885085825881, "grad_norm": 0.1715145856142044, "learning_rate": 5.8892465119493184e-05, "loss": 0.5658, "step": 6101 }, { "epoch": 1.2543940795559667, "grad_norm": 0.18989497423171997, "learning_rate": 5.888296456427565e-05, "loss": 0.5718, "step": 6102 }, { "epoch": 1.2545996505293453, "grad_norm": 0.1893077790737152, "learning_rate": 5.887346332510934e-05, "loss": 0.572, "step": 6103 }, { "epoch": 1.2548052215027239, "grad_norm": 0.16260646283626556, "learning_rate": 5.886396140246233e-05, "loss": 0.5399, "step": 6104 }, { "epoch": 1.2550107924761025, "grad_norm": 1.3922818899154663, "learning_rate": 5.8854458796802744e-05, "loss": 0.5587, "step": 6105 }, { "epoch": 1.255216363449481, "grad_norm": 0.16991350054740906, "learning_rate": 5.8844955508598745e-05, "loss": 0.5286, "step": 6106 }, { "epoch": 1.2554219344228594, "grad_norm": 0.21412529051303864, "learning_rate": 5.8835451538318476e-05, "loss": 0.5637, "step": 6107 }, { "epoch": 1.255627505396238, "grad_norm": 0.40624189376831055, "learning_rate": 5.882594688643019e-05, "loss": 0.5364, "step": 6108 }, { "epoch": 1.2558330763696166, "grad_norm": 0.2089642882347107, "learning_rate": 5.881644155340213e-05, "loss": 0.5669, "step": 6109 }, { "epoch": 1.2560386473429952, "grad_norm": 0.4316593110561371, "learning_rate": 5.880693553970256e-05, "loss": 0.564, "step": 6110 }, { "epoch": 1.2562442183163738, "grad_norm": 0.21521629393100739, "learning_rate": 5.879742884579981e-05, "loss": 0.5774, "step": 6111 }, { "epoch": 1.2564497892897522, "grad_norm": 0.2025582194328308, "learning_rate": 5.878792147216223e-05, "loss": 0.5487, "step": 6112 }, { "epoch": 1.2566553602631307, "grad_norm": 0.21197755634784698, "learning_rate": 5.8778413419258204e-05, "loss": 0.5674, "step": 6113 }, { "epoch": 1.2568609312365093, "grad_norm": 0.21161524951457977, "learning_rate": 5.876890468755614e-05, "loss": 0.5915, "step": 6114 }, { "epoch": 1.257066502209888, "grad_norm": 0.20301292836666107, "learning_rate": 5.875939527752451e-05, "loss": 0.5569, "step": 6115 }, { "epoch": 1.2572720731832665, "grad_norm": 0.20232078433036804, "learning_rate": 5.874988518963178e-05, "loss": 0.5686, "step": 6116 }, { "epoch": 1.257477644156645, "grad_norm": 0.19668982923030853, "learning_rate": 5.8740374424346484e-05, "loss": 0.5472, "step": 6117 }, { "epoch": 1.2576832151300237, "grad_norm": 0.19299955666065216, "learning_rate": 5.8730862982137155e-05, "loss": 0.554, "step": 6118 }, { "epoch": 1.2578887861034023, "grad_norm": 0.16891315579414368, "learning_rate": 5.872135086347238e-05, "loss": 0.549, "step": 6119 }, { "epoch": 1.2580943570767809, "grad_norm": 0.19991520047187805, "learning_rate": 5.87118380688208e-05, "loss": 0.5791, "step": 6120 }, { "epoch": 1.2582999280501594, "grad_norm": 0.19644920527935028, "learning_rate": 5.870232459865102e-05, "loss": 0.5416, "step": 6121 }, { "epoch": 1.2585054990235378, "grad_norm": 0.19781053066253662, "learning_rate": 5.869281045343177e-05, "loss": 0.5701, "step": 6122 }, { "epoch": 1.2587110699969164, "grad_norm": 0.1692863404750824, "learning_rate": 5.868329563363175e-05, "loss": 0.5307, "step": 6123 }, { "epoch": 1.258916640970295, "grad_norm": 0.16794486343860626, "learning_rate": 5.8673780139719697e-05, "loss": 0.572, "step": 6124 }, { "epoch": 1.2591222119436736, "grad_norm": 0.16393691301345825, "learning_rate": 5.866426397216442e-05, "loss": 0.5017, "step": 6125 }, { "epoch": 1.2593277829170522, "grad_norm": 0.20335790514945984, "learning_rate": 5.8654747131434714e-05, "loss": 0.5663, "step": 6126 }, { "epoch": 1.2595333538904308, "grad_norm": 0.20092669129371643, "learning_rate": 5.864522961799944e-05, "loss": 0.5714, "step": 6127 }, { "epoch": 1.2597389248638091, "grad_norm": 0.16403307020664215, "learning_rate": 5.863571143232748e-05, "loss": 0.5319, "step": 6128 }, { "epoch": 1.2599444958371877, "grad_norm": 0.1622430980205536, "learning_rate": 5.8626192574887756e-05, "loss": 0.5429, "step": 6129 }, { "epoch": 1.2601500668105663, "grad_norm": 0.19496072828769684, "learning_rate": 5.861667304614922e-05, "loss": 0.5497, "step": 6130 }, { "epoch": 1.2603556377839449, "grad_norm": 0.18575909733772278, "learning_rate": 5.860715284658084e-05, "loss": 0.5494, "step": 6131 }, { "epoch": 1.2605612087573235, "grad_norm": 0.19597534835338593, "learning_rate": 5.8597631976651635e-05, "loss": 0.5602, "step": 6132 }, { "epoch": 1.260766779730702, "grad_norm": 0.1906193345785141, "learning_rate": 5.858811043683066e-05, "loss": 0.5495, "step": 6133 }, { "epoch": 1.2609723507040806, "grad_norm": 0.16364972293376923, "learning_rate": 5.8578588227586995e-05, "loss": 0.5283, "step": 6134 }, { "epoch": 1.2611779216774592, "grad_norm": 0.15908394753932953, "learning_rate": 5.8569065349389746e-05, "loss": 0.5484, "step": 6135 }, { "epoch": 1.2613834926508378, "grad_norm": 0.18748100101947784, "learning_rate": 5.855954180270808e-05, "loss": 0.5653, "step": 6136 }, { "epoch": 1.2615890636242162, "grad_norm": 0.19369830191135406, "learning_rate": 5.855001758801116e-05, "loss": 0.5627, "step": 6137 }, { "epoch": 1.2617946345975948, "grad_norm": 0.19096927344799042, "learning_rate": 5.8540492705768205e-05, "loss": 0.5464, "step": 6138 }, { "epoch": 1.2620002055709734, "grad_norm": 0.19514234364032745, "learning_rate": 5.853096715644847e-05, "loss": 0.569, "step": 6139 }, { "epoch": 1.262205776544352, "grad_norm": 0.19120776653289795, "learning_rate": 5.852144094052123e-05, "loss": 0.5634, "step": 6140 }, { "epoch": 1.2624113475177305, "grad_norm": 0.19928298890590668, "learning_rate": 5.851191405845579e-05, "loss": 0.5745, "step": 6141 }, { "epoch": 1.2626169184911091, "grad_norm": 0.1887395977973938, "learning_rate": 5.850238651072149e-05, "loss": 0.56, "step": 6142 }, { "epoch": 1.2628224894644875, "grad_norm": 0.19872866570949554, "learning_rate": 5.849285829778772e-05, "loss": 0.5627, "step": 6143 }, { "epoch": 1.263028060437866, "grad_norm": 0.16826018691062927, "learning_rate": 5.8483329420123906e-05, "loss": 0.5414, "step": 6144 }, { "epoch": 1.2632336314112447, "grad_norm": 0.16626615822315216, "learning_rate": 5.847379987819944e-05, "loss": 0.5532, "step": 6145 }, { "epoch": 1.2634392023846233, "grad_norm": 0.1921907663345337, "learning_rate": 5.8464269672483855e-05, "loss": 0.5543, "step": 6146 }, { "epoch": 1.2636447733580018, "grad_norm": 0.191694438457489, "learning_rate": 5.8454738803446616e-05, "loss": 0.5442, "step": 6147 }, { "epoch": 1.2638503443313804, "grad_norm": 0.19045263528823853, "learning_rate": 5.8445207271557306e-05, "loss": 0.5794, "step": 6148 }, { "epoch": 1.264055915304759, "grad_norm": 0.19358719885349274, "learning_rate": 5.843567507728545e-05, "loss": 0.5692, "step": 6149 }, { "epoch": 1.2642614862781376, "grad_norm": 0.19511562585830688, "learning_rate": 5.8426142221100706e-05, "loss": 0.5648, "step": 6150 }, { "epoch": 1.2644670572515162, "grad_norm": 0.1978984773159027, "learning_rate": 5.841660870347268e-05, "loss": 0.5792, "step": 6151 }, { "epoch": 1.2646726282248946, "grad_norm": 0.189521923661232, "learning_rate": 5.840707452487104e-05, "loss": 0.5421, "step": 6152 }, { "epoch": 1.2648781991982732, "grad_norm": 0.1647057980298996, "learning_rate": 5.8397539685765516e-05, "loss": 0.5296, "step": 6153 }, { "epoch": 1.2650837701716517, "grad_norm": 0.15688472986221313, "learning_rate": 5.8388004186625836e-05, "loss": 0.5423, "step": 6154 }, { "epoch": 1.2652893411450303, "grad_norm": 0.19488799571990967, "learning_rate": 5.8378468027921766e-05, "loss": 0.5396, "step": 6155 }, { "epoch": 1.265494912118409, "grad_norm": 0.19577009975910187, "learning_rate": 5.8368931210123085e-05, "loss": 0.5487, "step": 6156 }, { "epoch": 1.2657004830917875, "grad_norm": 0.19283023476600647, "learning_rate": 5.835939373369966e-05, "loss": 0.5554, "step": 6157 }, { "epoch": 1.2659060540651659, "grad_norm": 0.19187267124652863, "learning_rate": 5.834985559912136e-05, "loss": 0.5572, "step": 6158 }, { "epoch": 1.2661116250385445, "grad_norm": 0.19688525795936584, "learning_rate": 5.834031680685805e-05, "loss": 0.5667, "step": 6159 }, { "epoch": 1.266317196011923, "grad_norm": 0.17647728323936462, "learning_rate": 5.83307773573797e-05, "loss": 0.546, "step": 6160 }, { "epoch": 1.2665227669853016, "grad_norm": 0.16302068531513214, "learning_rate": 5.8321237251156254e-05, "loss": 0.5648, "step": 6161 }, { "epoch": 1.2667283379586802, "grad_norm": 0.1963539719581604, "learning_rate": 5.8311696488657714e-05, "loss": 0.5584, "step": 6162 }, { "epoch": 1.2669339089320588, "grad_norm": 0.19600288569927216, "learning_rate": 5.8302155070354105e-05, "loss": 0.5657, "step": 6163 }, { "epoch": 1.2671394799054374, "grad_norm": 0.17675581574440002, "learning_rate": 5.829261299671549e-05, "loss": 0.5394, "step": 6164 }, { "epoch": 1.267345050878816, "grad_norm": 0.16274531185626984, "learning_rate": 5.828307026821196e-05, "loss": 0.5493, "step": 6165 }, { "epoch": 1.2675506218521946, "grad_norm": 0.18789401650428772, "learning_rate": 5.827352688531365e-05, "loss": 0.5438, "step": 6166 }, { "epoch": 1.267756192825573, "grad_norm": 0.19160960614681244, "learning_rate": 5.82639828484907e-05, "loss": 0.558, "step": 6167 }, { "epoch": 1.2679617637989515, "grad_norm": 0.1683780699968338, "learning_rate": 5.8254438158213306e-05, "loss": 0.5021, "step": 6168 }, { "epoch": 1.2681673347723301, "grad_norm": 0.14388030767440796, "learning_rate": 5.824489281495171e-05, "loss": 0.5228, "step": 6169 }, { "epoch": 1.2683729057457087, "grad_norm": 0.1721310168504715, "learning_rate": 5.8235346819176135e-05, "loss": 0.5546, "step": 6170 }, { "epoch": 1.2685784767190873, "grad_norm": 0.19721747934818268, "learning_rate": 5.822580017135691e-05, "loss": 0.5533, "step": 6171 }, { "epoch": 1.2687840476924659, "grad_norm": 0.18930335342884064, "learning_rate": 5.8216252871964314e-05, "loss": 0.5671, "step": 6172 }, { "epoch": 1.2689896186658443, "grad_norm": 0.1941603124141693, "learning_rate": 5.8206704921468695e-05, "loss": 0.5594, "step": 6173 }, { "epoch": 1.2691951896392228, "grad_norm": 0.20115360617637634, "learning_rate": 5.819715632034048e-05, "loss": 0.5645, "step": 6174 }, { "epoch": 1.2694007606126014, "grad_norm": 0.19006428122520447, "learning_rate": 5.818760706905004e-05, "loss": 0.5384, "step": 6175 }, { "epoch": 1.26960633158598, "grad_norm": 0.18901333212852478, "learning_rate": 5.8178057168067844e-05, "loss": 0.5551, "step": 6176 }, { "epoch": 1.2698119025593586, "grad_norm": 0.1722274273633957, "learning_rate": 5.816850661786436e-05, "loss": 0.529, "step": 6177 }, { "epoch": 1.2700174735327372, "grad_norm": 0.16205133497714996, "learning_rate": 5.815895541891012e-05, "loss": 0.5608, "step": 6178 }, { "epoch": 1.2702230445061158, "grad_norm": 0.20700521767139435, "learning_rate": 5.814940357167563e-05, "loss": 0.5537, "step": 6179 }, { "epoch": 1.2704286154794944, "grad_norm": 0.19888941943645477, "learning_rate": 5.8139851076631486e-05, "loss": 0.5919, "step": 6180 }, { "epoch": 1.270634186452873, "grad_norm": 0.18785306811332703, "learning_rate": 5.813029793424831e-05, "loss": 0.5355, "step": 6181 }, { "epoch": 1.2708397574262515, "grad_norm": 0.1864861100912094, "learning_rate": 5.812074414499673e-05, "loss": 0.5585, "step": 6182 }, { "epoch": 1.27104532839963, "grad_norm": 0.16200599074363708, "learning_rate": 5.81111897093474e-05, "loss": 0.5484, "step": 6183 }, { "epoch": 1.2712508993730085, "grad_norm": 0.15543238818645477, "learning_rate": 5.8101634627771034e-05, "loss": 0.5398, "step": 6184 }, { "epoch": 1.271456470346387, "grad_norm": 0.1934465765953064, "learning_rate": 5.809207890073837e-05, "loss": 0.5703, "step": 6185 }, { "epoch": 1.2716620413197657, "grad_norm": 0.17177589237689972, "learning_rate": 5.808252252872018e-05, "loss": 0.535, "step": 6186 }, { "epoch": 1.2718676122931443, "grad_norm": 0.1565936654806137, "learning_rate": 5.807296551218723e-05, "loss": 0.5704, "step": 6187 }, { "epoch": 1.2720731832665226, "grad_norm": 0.1956259161233902, "learning_rate": 5.80634078516104e-05, "loss": 0.5477, "step": 6188 }, { "epoch": 1.2722787542399012, "grad_norm": 0.19236725568771362, "learning_rate": 5.80538495474605e-05, "loss": 0.5691, "step": 6189 }, { "epoch": 1.2724843252132798, "grad_norm": 0.16895383596420288, "learning_rate": 5.804429060020845e-05, "loss": 0.5185, "step": 6190 }, { "epoch": 1.2726898961866584, "grad_norm": 0.15849240124225616, "learning_rate": 5.8034731010325176e-05, "loss": 0.5699, "step": 6191 }, { "epoch": 1.272895467160037, "grad_norm": 0.1865822672843933, "learning_rate": 5.802517077828163e-05, "loss": 0.5255, "step": 6192 }, { "epoch": 1.2731010381334156, "grad_norm": 0.16672882437705994, "learning_rate": 5.80156099045488e-05, "loss": 0.5399, "step": 6193 }, { "epoch": 1.2733066091067942, "grad_norm": 0.1562536656856537, "learning_rate": 5.8006048389597694e-05, "loss": 0.55, "step": 6194 }, { "epoch": 1.2735121800801728, "grad_norm": 0.19599376618862152, "learning_rate": 5.7996486233899395e-05, "loss": 0.5545, "step": 6195 }, { "epoch": 1.2737177510535513, "grad_norm": 0.1640097200870514, "learning_rate": 5.798692343792495e-05, "loss": 0.5277, "step": 6196 }, { "epoch": 1.27392332202693, "grad_norm": 0.17527011036872864, "learning_rate": 5.797736000214549e-05, "loss": 0.5735, "step": 6197 }, { "epoch": 1.2741288930003083, "grad_norm": 0.19275882840156555, "learning_rate": 5.7967795927032164e-05, "loss": 0.5686, "step": 6198 }, { "epoch": 1.2743344639736869, "grad_norm": 0.19368760287761688, "learning_rate": 5.7958231213056144e-05, "loss": 0.5665, "step": 6199 }, { "epoch": 1.2745400349470655, "grad_norm": 0.1672065258026123, "learning_rate": 5.794866586068862e-05, "loss": 0.5532, "step": 6200 }, { "epoch": 1.274745605920444, "grad_norm": 0.1615796685218811, "learning_rate": 5.7939099870400865e-05, "loss": 0.5549, "step": 6201 }, { "epoch": 1.2749511768938226, "grad_norm": 0.18721790611743927, "learning_rate": 5.7929533242664137e-05, "loss": 0.5476, "step": 6202 }, { "epoch": 1.275156747867201, "grad_norm": 0.19924210011959076, "learning_rate": 5.791996597794975e-05, "loss": 0.5929, "step": 6203 }, { "epoch": 1.2753623188405796, "grad_norm": 0.16278637945652008, "learning_rate": 5.791039807672901e-05, "loss": 0.545, "step": 6204 }, { "epoch": 1.2755678898139582, "grad_norm": 0.12655942142009735, "learning_rate": 5.7900829539473304e-05, "loss": 0.5253, "step": 6205 }, { "epoch": 1.2757734607873368, "grad_norm": 0.16198953986167908, "learning_rate": 5.789126036665403e-05, "loss": 0.5607, "step": 6206 }, { "epoch": 1.2759790317607154, "grad_norm": 0.1700884997844696, "learning_rate": 5.7881690558742605e-05, "loss": 0.5321, "step": 6207 }, { "epoch": 1.276184602734094, "grad_norm": 0.15518617630004883, "learning_rate": 5.7872120116210494e-05, "loss": 0.5518, "step": 6208 }, { "epoch": 1.2763901737074725, "grad_norm": 0.18900856375694275, "learning_rate": 5.7862549039529196e-05, "loss": 0.5467, "step": 6209 }, { "epoch": 1.2765957446808511, "grad_norm": 0.2112400233745575, "learning_rate": 5.785297732917023e-05, "loss": 0.5821, "step": 6210 }, { "epoch": 1.2768013156542297, "grad_norm": 0.19592179358005524, "learning_rate": 5.784340498560513e-05, "loss": 0.5889, "step": 6211 }, { "epoch": 1.2770068866276083, "grad_norm": 0.1897910088300705, "learning_rate": 5.783383200930551e-05, "loss": 0.5657, "step": 6212 }, { "epoch": 1.2772124576009867, "grad_norm": 0.1914108544588089, "learning_rate": 5.782425840074297e-05, "loss": 0.5578, "step": 6213 }, { "epoch": 1.2774180285743653, "grad_norm": 0.19016936421394348, "learning_rate": 5.781468416038914e-05, "loss": 0.5599, "step": 6214 }, { "epoch": 1.2776235995477438, "grad_norm": 0.18804775178432465, "learning_rate": 5.780510928871574e-05, "loss": 0.5671, "step": 6215 }, { "epoch": 1.2778291705211224, "grad_norm": 0.18596555292606354, "learning_rate": 5.779553378619445e-05, "loss": 0.5355, "step": 6216 }, { "epoch": 1.278034741494501, "grad_norm": 0.19289173185825348, "learning_rate": 5.778595765329702e-05, "loss": 0.583, "step": 6217 }, { "epoch": 1.2782403124678796, "grad_norm": 0.18467681109905243, "learning_rate": 5.7776380890495214e-05, "loss": 0.561, "step": 6218 }, { "epoch": 1.278445883441258, "grad_norm": 0.19433990120887756, "learning_rate": 5.776680349826083e-05, "loss": 0.5548, "step": 6219 }, { "epoch": 1.2786514544146366, "grad_norm": 0.1940041035413742, "learning_rate": 5.7757225477065725e-05, "loss": 0.5654, "step": 6220 }, { "epoch": 1.2788570253880152, "grad_norm": 0.1894046515226364, "learning_rate": 5.774764682738174e-05, "loss": 0.5628, "step": 6221 }, { "epoch": 1.2790625963613937, "grad_norm": 0.20354604721069336, "learning_rate": 5.7738067549680776e-05, "loss": 0.569, "step": 6222 }, { "epoch": 1.2792681673347723, "grad_norm": 0.18965789675712585, "learning_rate": 5.7728487644434754e-05, "loss": 0.5458, "step": 6223 }, { "epoch": 1.279473738308151, "grad_norm": 0.18858371675014496, "learning_rate": 5.771890711211566e-05, "loss": 0.5415, "step": 6224 }, { "epoch": 1.2796793092815295, "grad_norm": 0.19351953268051147, "learning_rate": 5.7709325953195444e-05, "loss": 0.5504, "step": 6225 }, { "epoch": 1.279884880254908, "grad_norm": 0.18949908018112183, "learning_rate": 5.769974416814615e-05, "loss": 0.541, "step": 6226 }, { "epoch": 1.2800904512282867, "grad_norm": 0.19526349008083344, "learning_rate": 5.769016175743982e-05, "loss": 0.5634, "step": 6227 }, { "epoch": 1.280296022201665, "grad_norm": 0.17583510279655457, "learning_rate": 5.7680578721548524e-05, "loss": 0.5462, "step": 6228 }, { "epoch": 1.2805015931750436, "grad_norm": 0.1601148396730423, "learning_rate": 5.767099506094438e-05, "loss": 0.5474, "step": 6229 }, { "epoch": 1.2807071641484222, "grad_norm": 0.19925040006637573, "learning_rate": 5.766141077609955e-05, "loss": 0.5884, "step": 6230 }, { "epoch": 1.2809127351218008, "grad_norm": 0.20039363205432892, "learning_rate": 5.765182586748619e-05, "loss": 0.5624, "step": 6231 }, { "epoch": 1.2811183060951794, "grad_norm": 0.19234807789325714, "learning_rate": 5.764224033557649e-05, "loss": 0.5994, "step": 6232 }, { "epoch": 1.281323877068558, "grad_norm": 0.19299016892910004, "learning_rate": 5.76326541808427e-05, "loss": 0.5786, "step": 6233 }, { "epoch": 1.2815294480419364, "grad_norm": 0.2128915637731552, "learning_rate": 5.762306740375709e-05, "loss": 0.5763, "step": 6234 }, { "epoch": 1.281735019015315, "grad_norm": 0.19753651320934296, "learning_rate": 5.761348000479194e-05, "loss": 0.5565, "step": 6235 }, { "epoch": 1.2819405899886935, "grad_norm": 0.19530276954174042, "learning_rate": 5.76038919844196e-05, "loss": 0.567, "step": 6236 }, { "epoch": 1.2821461609620721, "grad_norm": 0.1876569539308548, "learning_rate": 5.7594303343112406e-05, "loss": 0.5358, "step": 6237 }, { "epoch": 1.2823517319354507, "grad_norm": 0.19202187657356262, "learning_rate": 5.758471408134276e-05, "loss": 0.5589, "step": 6238 }, { "epoch": 1.2825573029088293, "grad_norm": 0.2080259472131729, "learning_rate": 5.757512419958305e-05, "loss": 0.5767, "step": 6239 }, { "epoch": 1.2827628738822079, "grad_norm": 0.2008046805858612, "learning_rate": 5.756553369830577e-05, "loss": 0.5486, "step": 6240 }, { "epoch": 1.2829684448555865, "grad_norm": 0.18698541820049286, "learning_rate": 5.7555942577983364e-05, "loss": 0.5471, "step": 6241 }, { "epoch": 1.283174015828965, "grad_norm": 0.19184443354606628, "learning_rate": 5.754635083908835e-05, "loss": 0.5703, "step": 6242 }, { "epoch": 1.2833795868023434, "grad_norm": 0.18551193177700043, "learning_rate": 5.753675848209329e-05, "loss": 0.5353, "step": 6243 }, { "epoch": 1.283585157775722, "grad_norm": 0.17165902256965637, "learning_rate": 5.7527165507470705e-05, "loss": 0.5094, "step": 6244 }, { "epoch": 1.2837907287491006, "grad_norm": 0.16080299019813538, "learning_rate": 5.7517571915693255e-05, "loss": 0.5797, "step": 6245 }, { "epoch": 1.2839962997224792, "grad_norm": 0.16521471738815308, "learning_rate": 5.750797770723353e-05, "loss": 0.5199, "step": 6246 }, { "epoch": 1.2842018706958578, "grad_norm": 0.12971197068691254, "learning_rate": 5.749838288256421e-05, "loss": 0.5376, "step": 6247 }, { "epoch": 1.2844074416692364, "grad_norm": 0.13733793795108795, "learning_rate": 5.748878744215799e-05, "loss": 0.5266, "step": 6248 }, { "epoch": 1.2846130126426147, "grad_norm": 0.1690482795238495, "learning_rate": 5.747919138648757e-05, "loss": 0.5737, "step": 6249 }, { "epoch": 1.2848185836159933, "grad_norm": 0.19658613204956055, "learning_rate": 5.746959471602572e-05, "loss": 0.5531, "step": 6250 }, { "epoch": 1.285024154589372, "grad_norm": 0.1984742283821106, "learning_rate": 5.7459997431245236e-05, "loss": 0.5877, "step": 6251 }, { "epoch": 1.2852297255627505, "grad_norm": 0.1888909637928009, "learning_rate": 5.74503995326189e-05, "loss": 0.5391, "step": 6252 }, { "epoch": 1.285435296536129, "grad_norm": 0.19062168896198273, "learning_rate": 5.744080102061958e-05, "loss": 0.5662, "step": 6253 }, { "epoch": 1.2856408675095077, "grad_norm": 0.1896916627883911, "learning_rate": 5.7431201895720146e-05, "loss": 0.5658, "step": 6254 }, { "epoch": 1.2858464384828863, "grad_norm": 0.19082388281822205, "learning_rate": 5.742160215839349e-05, "loss": 0.5624, "step": 6255 }, { "epoch": 1.2860520094562649, "grad_norm": 0.1924538016319275, "learning_rate": 5.741200180911255e-05, "loss": 0.5813, "step": 6256 }, { "epoch": 1.2862575804296434, "grad_norm": 0.18487077951431274, "learning_rate": 5.740240084835031e-05, "loss": 0.5528, "step": 6257 }, { "epoch": 1.286463151403022, "grad_norm": 0.18869616091251373, "learning_rate": 5.7392799276579745e-05, "loss": 0.5472, "step": 6258 }, { "epoch": 1.2866687223764004, "grad_norm": 0.19108757376670837, "learning_rate": 5.738319709427386e-05, "loss": 0.5516, "step": 6259 }, { "epoch": 1.286874293349779, "grad_norm": 0.18827085196971893, "learning_rate": 5.7373594301905764e-05, "loss": 0.519, "step": 6260 }, { "epoch": 1.2870798643231576, "grad_norm": 0.17874634265899658, "learning_rate": 5.736399089994849e-05, "loss": 0.5608, "step": 6261 }, { "epoch": 1.2872854352965362, "grad_norm": 0.19754135608673096, "learning_rate": 5.73543868888752e-05, "loss": 0.5846, "step": 6262 }, { "epoch": 1.2874910062699148, "grad_norm": 0.16421428322792053, "learning_rate": 5.734478226915899e-05, "loss": 0.5233, "step": 6263 }, { "epoch": 1.2876965772432931, "grad_norm": 0.16342876851558685, "learning_rate": 5.733517704127306e-05, "loss": 0.5307, "step": 6264 }, { "epoch": 1.2879021482166717, "grad_norm": 0.19278982281684875, "learning_rate": 5.732557120569061e-05, "loss": 0.5424, "step": 6265 }, { "epoch": 1.2881077191900503, "grad_norm": 0.18997056782245636, "learning_rate": 5.731596476288488e-05, "loss": 0.5628, "step": 6266 }, { "epoch": 1.2883132901634289, "grad_norm": 0.19608962535858154, "learning_rate": 5.730635771332912e-05, "loss": 0.546, "step": 6267 }, { "epoch": 1.2885188611368075, "grad_norm": 0.18659254908561707, "learning_rate": 5.729675005749666e-05, "loss": 0.5634, "step": 6268 }, { "epoch": 1.288724432110186, "grad_norm": 0.1904764473438263, "learning_rate": 5.7287141795860774e-05, "loss": 0.5523, "step": 6269 }, { "epoch": 1.2889300030835646, "grad_norm": 0.685501754283905, "learning_rate": 5.727753292889485e-05, "loss": 0.5588, "step": 6270 }, { "epoch": 1.2891355740569432, "grad_norm": 0.19180195033550262, "learning_rate": 5.726792345707227e-05, "loss": 0.552, "step": 6271 }, { "epoch": 1.2893411450303218, "grad_norm": 0.18611235916614532, "learning_rate": 5.7258313380866436e-05, "loss": 0.5342, "step": 6272 }, { "epoch": 1.2895467160037004, "grad_norm": 0.1877206414937973, "learning_rate": 5.7248702700750796e-05, "loss": 0.5512, "step": 6273 }, { "epoch": 1.2897522869770788, "grad_norm": 0.19219855964183807, "learning_rate": 5.723909141719883e-05, "loss": 0.5525, "step": 6274 }, { "epoch": 1.2899578579504574, "grad_norm": 0.1869809925556183, "learning_rate": 5.722947953068403e-05, "loss": 0.541, "step": 6275 }, { "epoch": 1.290163428923836, "grad_norm": 0.19108881056308746, "learning_rate": 5.721986704167994e-05, "loss": 0.5669, "step": 6276 }, { "epoch": 1.2903689998972145, "grad_norm": 0.1971481740474701, "learning_rate": 5.72102539506601e-05, "loss": 0.5596, "step": 6277 }, { "epoch": 1.2905745708705931, "grad_norm": 0.24877598881721497, "learning_rate": 5.7200640258098134e-05, "loss": 0.5511, "step": 6278 }, { "epoch": 1.2907801418439715, "grad_norm": 0.16880907118320465, "learning_rate": 5.719102596446765e-05, "loss": 0.5211, "step": 6279 }, { "epoch": 1.29098571281735, "grad_norm": 0.16007640957832336, "learning_rate": 5.718141107024229e-05, "loss": 0.5402, "step": 6280 }, { "epoch": 1.2911912837907287, "grad_norm": 0.1952618956565857, "learning_rate": 5.717179557589574e-05, "loss": 0.5729, "step": 6281 }, { "epoch": 1.2913968547641073, "grad_norm": 0.16671602427959442, "learning_rate": 5.7162179481901725e-05, "loss": 0.5312, "step": 6282 }, { "epoch": 1.2916024257374858, "grad_norm": 0.15948770940303802, "learning_rate": 5.7152562788733975e-05, "loss": 0.5243, "step": 6283 }, { "epoch": 1.2918079967108644, "grad_norm": 0.1951056569814682, "learning_rate": 5.7142945496866235e-05, "loss": 0.5665, "step": 6284 }, { "epoch": 1.292013567684243, "grad_norm": 0.1952039748430252, "learning_rate": 5.713332760677234e-05, "loss": 0.5717, "step": 6285 }, { "epoch": 1.2922191386576216, "grad_norm": 0.1987905502319336, "learning_rate": 5.7123709118926104e-05, "loss": 0.567, "step": 6286 }, { "epoch": 1.2924247096310002, "grad_norm": 0.19743449985980988, "learning_rate": 5.711409003380138e-05, "loss": 0.5466, "step": 6287 }, { "epoch": 1.2926302806043788, "grad_norm": 0.19229763746261597, "learning_rate": 5.710447035187206e-05, "loss": 0.5583, "step": 6288 }, { "epoch": 1.2928358515777572, "grad_norm": 0.18883401155471802, "learning_rate": 5.709485007361208e-05, "loss": 0.54, "step": 6289 }, { "epoch": 1.2930414225511357, "grad_norm": 0.19647282361984253, "learning_rate": 5.708522919949536e-05, "loss": 0.583, "step": 6290 }, { "epoch": 1.2932469935245143, "grad_norm": 0.18365654349327087, "learning_rate": 5.707560772999587e-05, "loss": 0.5476, "step": 6291 }, { "epoch": 1.293452564497893, "grad_norm": 0.19475975632667542, "learning_rate": 5.7065985665587646e-05, "loss": 0.5476, "step": 6292 }, { "epoch": 1.2936581354712715, "grad_norm": 0.18907500803470612, "learning_rate": 5.70563630067447e-05, "loss": 0.5483, "step": 6293 }, { "epoch": 1.29386370644465, "grad_norm": 0.189442440867424, "learning_rate": 5.704673975394109e-05, "loss": 0.5387, "step": 6294 }, { "epoch": 1.2940692774180285, "grad_norm": 0.19112446904182434, "learning_rate": 5.703711590765093e-05, "loss": 0.5714, "step": 6295 }, { "epoch": 1.294274848391407, "grad_norm": 0.19194044172763824, "learning_rate": 5.7027491468348326e-05, "loss": 0.5521, "step": 6296 }, { "epoch": 1.2944804193647856, "grad_norm": 0.18977665901184082, "learning_rate": 5.7017866436507434e-05, "loss": 0.5738, "step": 6297 }, { "epoch": 1.2946859903381642, "grad_norm": 0.19306746125221252, "learning_rate": 5.700824081260243e-05, "loss": 0.5636, "step": 6298 }, { "epoch": 1.2948915613115428, "grad_norm": 0.19150002300739288, "learning_rate": 5.699861459710753e-05, "loss": 0.5506, "step": 6299 }, { "epoch": 1.2950971322849214, "grad_norm": 0.211594358086586, "learning_rate": 5.698898779049697e-05, "loss": 0.5631, "step": 6300 }, { "epoch": 1.2953027032583, "grad_norm": 0.19325849413871765, "learning_rate": 5.697936039324502e-05, "loss": 0.5571, "step": 6301 }, { "epoch": 1.2955082742316786, "grad_norm": 0.1876952350139618, "learning_rate": 5.696973240582597e-05, "loss": 0.5579, "step": 6302 }, { "epoch": 1.2957138452050572, "grad_norm": 0.16953028738498688, "learning_rate": 5.6960103828714164e-05, "loss": 0.5279, "step": 6303 }, { "epoch": 1.2959194161784355, "grad_norm": 0.16833354532718658, "learning_rate": 5.695047466238393e-05, "loss": 0.5394, "step": 6304 }, { "epoch": 1.2961249871518141, "grad_norm": 0.16338950395584106, "learning_rate": 5.694084490730967e-05, "loss": 0.5196, "step": 6305 }, { "epoch": 1.2963305581251927, "grad_norm": 0.16173096001148224, "learning_rate": 5.6931214563965805e-05, "loss": 0.5538, "step": 6306 }, { "epoch": 1.2965361290985713, "grad_norm": 0.19378416240215302, "learning_rate": 5.692158363282675e-05, "loss": 0.5448, "step": 6307 }, { "epoch": 1.2967417000719499, "grad_norm": 0.18964388966560364, "learning_rate": 5.691195211436699e-05, "loss": 0.5423, "step": 6308 }, { "epoch": 1.2969472710453285, "grad_norm": 0.18687476217746735, "learning_rate": 5.690232000906103e-05, "loss": 0.5643, "step": 6309 }, { "epoch": 1.2971528420187068, "grad_norm": 0.1913549154996872, "learning_rate": 5.689268731738339e-05, "loss": 0.554, "step": 6310 }, { "epoch": 1.2973584129920854, "grad_norm": 0.19576480984687805, "learning_rate": 5.688305403980863e-05, "loss": 0.5846, "step": 6311 }, { "epoch": 1.297563983965464, "grad_norm": 0.2015174776315689, "learning_rate": 5.687342017681135e-05, "loss": 0.5571, "step": 6312 }, { "epoch": 1.2977695549388426, "grad_norm": 0.1950497329235077, "learning_rate": 5.6863785728866154e-05, "loss": 0.5471, "step": 6313 }, { "epoch": 1.2979751259122212, "grad_norm": 0.19457519054412842, "learning_rate": 5.6854150696447686e-05, "loss": 0.5689, "step": 6314 }, { "epoch": 1.2981806968855998, "grad_norm": 0.18924319744110107, "learning_rate": 5.684451508003061e-05, "loss": 0.5632, "step": 6315 }, { "epoch": 1.2983862678589784, "grad_norm": 0.20829612016677856, "learning_rate": 5.6834878880089635e-05, "loss": 0.554, "step": 6316 }, { "epoch": 1.298591838832357, "grad_norm": 0.19046112895011902, "learning_rate": 5.6825242097099514e-05, "loss": 0.5508, "step": 6317 }, { "epoch": 1.2987974098057355, "grad_norm": 0.19234079122543335, "learning_rate": 5.681560473153495e-05, "loss": 0.5417, "step": 6318 }, { "epoch": 1.299002980779114, "grad_norm": 0.19579647481441498, "learning_rate": 5.68059667838708e-05, "loss": 0.5449, "step": 6319 }, { "epoch": 1.2992085517524925, "grad_norm": 0.19146116077899933, "learning_rate": 5.679632825458184e-05, "loss": 0.5603, "step": 6320 }, { "epoch": 1.299414122725871, "grad_norm": 0.19622944295406342, "learning_rate": 5.6786689144142917e-05, "loss": 0.5568, "step": 6321 }, { "epoch": 1.2996196936992497, "grad_norm": 0.19650766253471375, "learning_rate": 5.6777049453028914e-05, "loss": 0.5603, "step": 6322 }, { "epoch": 1.2998252646726283, "grad_norm": 0.20279136300086975, "learning_rate": 5.676740918171472e-05, "loss": 0.5455, "step": 6323 }, { "epoch": 1.3000308356460069, "grad_norm": 0.1786477267742157, "learning_rate": 5.67577683306753e-05, "loss": 0.5148, "step": 6324 }, { "epoch": 1.3002364066193852, "grad_norm": 0.15858376026153564, "learning_rate": 5.674812690038557e-05, "loss": 0.5217, "step": 6325 }, { "epoch": 1.3004419775927638, "grad_norm": 0.16333921253681183, "learning_rate": 5.673848489132054e-05, "loss": 0.5504, "step": 6326 }, { "epoch": 1.3006475485661424, "grad_norm": 0.20864447951316833, "learning_rate": 5.672884230395524e-05, "loss": 0.5664, "step": 6327 }, { "epoch": 1.300853119539521, "grad_norm": 0.20059353113174438, "learning_rate": 5.6719199138764686e-05, "loss": 0.575, "step": 6328 }, { "epoch": 1.3010586905128996, "grad_norm": 0.1858949512243271, "learning_rate": 5.670955539622396e-05, "loss": 0.535, "step": 6329 }, { "epoch": 1.3012642614862782, "grad_norm": 0.1687631458044052, "learning_rate": 5.669991107680818e-05, "loss": 0.54, "step": 6330 }, { "epoch": 1.3014698324596568, "grad_norm": 0.16431094706058502, "learning_rate": 5.6690266180992464e-05, "loss": 0.5506, "step": 6331 }, { "epoch": 1.3016754034330353, "grad_norm": 0.21161231398582458, "learning_rate": 5.668062070925197e-05, "loss": 0.5579, "step": 6332 }, { "epoch": 1.301880974406414, "grad_norm": 0.20481392741203308, "learning_rate": 5.66709746620619e-05, "loss": 0.5693, "step": 6333 }, { "epoch": 1.3020865453797923, "grad_norm": 0.2095717191696167, "learning_rate": 5.6661328039897456e-05, "loss": 0.5543, "step": 6334 }, { "epoch": 1.3022921163531709, "grad_norm": 0.17169706523418427, "learning_rate": 5.665168084323387e-05, "loss": 0.513, "step": 6335 }, { "epoch": 1.3024976873265495, "grad_norm": 0.184236079454422, "learning_rate": 5.664203307254644e-05, "loss": 0.5606, "step": 6336 }, { "epoch": 1.302703258299928, "grad_norm": 0.210636168718338, "learning_rate": 5.6632384728310464e-05, "loss": 0.5587, "step": 6337 }, { "epoch": 1.3029088292733066, "grad_norm": 0.20916485786437988, "learning_rate": 5.6622735811001255e-05, "loss": 0.5563, "step": 6338 }, { "epoch": 1.3031144002466852, "grad_norm": 0.19716860353946686, "learning_rate": 5.6613086321094175e-05, "loss": 0.5461, "step": 6339 }, { "epoch": 1.3033199712200636, "grad_norm": 0.20383410155773163, "learning_rate": 5.660343625906461e-05, "loss": 0.5711, "step": 6340 }, { "epoch": 1.3035255421934422, "grad_norm": 0.19553574919700623, "learning_rate": 5.6593785625387965e-05, "loss": 0.5719, "step": 6341 }, { "epoch": 1.3037311131668208, "grad_norm": 0.20345737040042877, "learning_rate": 5.65841344205397e-05, "loss": 0.5902, "step": 6342 }, { "epoch": 1.3039366841401994, "grad_norm": 0.1968560367822647, "learning_rate": 5.657448264499528e-05, "loss": 0.5552, "step": 6343 }, { "epoch": 1.304142255113578, "grad_norm": 0.19714896380901337, "learning_rate": 5.6564830299230204e-05, "loss": 0.5477, "step": 6344 }, { "epoch": 1.3043478260869565, "grad_norm": 0.2418747991323471, "learning_rate": 5.6555177383719986e-05, "loss": 0.5675, "step": 6345 }, { "epoch": 1.3045533970603351, "grad_norm": 0.16260170936584473, "learning_rate": 5.654552389894019e-05, "loss": 0.5324, "step": 6346 }, { "epoch": 1.3047589680337137, "grad_norm": 0.15336725115776062, "learning_rate": 5.653586984536639e-05, "loss": 0.5376, "step": 6347 }, { "epoch": 1.3049645390070923, "grad_norm": 0.13179324567317963, "learning_rate": 5.652621522347421e-05, "loss": 0.5133, "step": 6348 }, { "epoch": 1.305170109980471, "grad_norm": 0.16065613925457, "learning_rate": 5.651656003373927e-05, "loss": 0.5376, "step": 6349 }, { "epoch": 1.3053756809538493, "grad_norm": 0.20791570842266083, "learning_rate": 5.650690427663725e-05, "loss": 0.5707, "step": 6350 }, { "epoch": 1.3055812519272278, "grad_norm": 0.19432078301906586, "learning_rate": 5.649724795264384e-05, "loss": 0.5642, "step": 6351 }, { "epoch": 1.3057868229006064, "grad_norm": 0.19507555663585663, "learning_rate": 5.6487591062234756e-05, "loss": 0.5484, "step": 6352 }, { "epoch": 1.305992393873985, "grad_norm": 0.18937799334526062, "learning_rate": 5.647793360588575e-05, "loss": 0.5504, "step": 6353 }, { "epoch": 1.3061979648473636, "grad_norm": 0.18545973300933838, "learning_rate": 5.646827558407261e-05, "loss": 0.5353, "step": 6354 }, { "epoch": 1.306403535820742, "grad_norm": 0.210302472114563, "learning_rate": 5.645861699727114e-05, "loss": 0.5373, "step": 6355 }, { "epoch": 1.3066091067941206, "grad_norm": 0.20394356548786163, "learning_rate": 5.644895784595715e-05, "loss": 0.5707, "step": 6356 }, { "epoch": 1.3068146777674992, "grad_norm": 0.20221911370754242, "learning_rate": 5.6439298130606546e-05, "loss": 0.5635, "step": 6357 }, { "epoch": 1.3070202487408777, "grad_norm": 0.20493952929973602, "learning_rate": 5.642963785169518e-05, "loss": 0.5635, "step": 6358 }, { "epoch": 1.3072258197142563, "grad_norm": 0.2118876874446869, "learning_rate": 5.641997700969898e-05, "loss": 0.5578, "step": 6359 }, { "epoch": 1.307431390687635, "grad_norm": 0.1980256587266922, "learning_rate": 5.6410315605093875e-05, "loss": 0.5551, "step": 6360 }, { "epoch": 1.3076369616610135, "grad_norm": 0.20084832608699799, "learning_rate": 5.640065363835586e-05, "loss": 0.569, "step": 6361 }, { "epoch": 1.307842532634392, "grad_norm": 0.1686294972896576, "learning_rate": 5.639099110996092e-05, "loss": 0.5371, "step": 6362 }, { "epoch": 1.3080481036077707, "grad_norm": 0.15857572853565216, "learning_rate": 5.63813280203851e-05, "loss": 0.5402, "step": 6363 }, { "epoch": 1.3082536745811493, "grad_norm": 0.15745136141777039, "learning_rate": 5.6371664370104435e-05, "loss": 0.5196, "step": 6364 }, { "epoch": 1.3084592455545276, "grad_norm": 0.15688499808311462, "learning_rate": 5.6362000159595034e-05, "loss": 0.5361, "step": 6365 }, { "epoch": 1.3086648165279062, "grad_norm": 0.18788595497608185, "learning_rate": 5.635233538933298e-05, "loss": 0.551, "step": 6366 }, { "epoch": 1.3088703875012848, "grad_norm": 0.19345730543136597, "learning_rate": 5.634267005979442e-05, "loss": 0.5762, "step": 6367 }, { "epoch": 1.3090759584746634, "grad_norm": 0.1903630942106247, "learning_rate": 5.633300417145553e-05, "loss": 0.5489, "step": 6368 }, { "epoch": 1.309281529448042, "grad_norm": 0.19679617881774902, "learning_rate": 5.632333772479249e-05, "loss": 0.5641, "step": 6369 }, { "epoch": 1.3094871004214204, "grad_norm": 0.19722123444080353, "learning_rate": 5.631367072028152e-05, "loss": 0.5428, "step": 6370 }, { "epoch": 1.309692671394799, "grad_norm": 0.19673387706279755, "learning_rate": 5.630400315839888e-05, "loss": 0.5763, "step": 6371 }, { "epoch": 1.3098982423681775, "grad_norm": 0.19249959290027618, "learning_rate": 5.629433503962084e-05, "loss": 0.5687, "step": 6372 }, { "epoch": 1.3101038133415561, "grad_norm": 0.18873926997184753, "learning_rate": 5.6284666364423695e-05, "loss": 0.557, "step": 6373 }, { "epoch": 1.3103093843149347, "grad_norm": 0.2006826251745224, "learning_rate": 5.627499713328378e-05, "loss": 0.549, "step": 6374 }, { "epoch": 1.3105149552883133, "grad_norm": 0.15970605611801147, "learning_rate": 5.6265327346677465e-05, "loss": 0.5264, "step": 6375 }, { "epoch": 1.3107205262616919, "grad_norm": 0.16438056528568268, "learning_rate": 5.6255657005081134e-05, "loss": 0.5647, "step": 6376 }, { "epoch": 1.3109260972350705, "grad_norm": 0.19391551613807678, "learning_rate": 5.624598610897117e-05, "loss": 0.5691, "step": 6377 }, { "epoch": 1.311131668208449, "grad_norm": 0.19656315445899963, "learning_rate": 5.623631465882405e-05, "loss": 0.5626, "step": 6378 }, { "epoch": 1.3113372391818277, "grad_norm": 0.18690890073776245, "learning_rate": 5.622664265511623e-05, "loss": 0.5395, "step": 6379 }, { "epoch": 1.311542810155206, "grad_norm": 0.19605736434459686, "learning_rate": 5.621697009832418e-05, "loss": 0.5796, "step": 6380 }, { "epoch": 1.3117483811285846, "grad_norm": 0.19763530790805817, "learning_rate": 5.620729698892445e-05, "loss": 0.5447, "step": 6381 }, { "epoch": 1.3119539521019632, "grad_norm": 0.18934392929077148, "learning_rate": 5.6197623327393584e-05, "loss": 0.575, "step": 6382 }, { "epoch": 1.3121595230753418, "grad_norm": 0.19040028750896454, "learning_rate": 5.6187949114208155e-05, "loss": 0.5448, "step": 6383 }, { "epoch": 1.3123650940487204, "grad_norm": 0.20778769254684448, "learning_rate": 5.6178274349844766e-05, "loss": 0.5336, "step": 6384 }, { "epoch": 1.312570665022099, "grad_norm": 0.18825723230838776, "learning_rate": 5.6168599034780034e-05, "loss": 0.5409, "step": 6385 }, { "epoch": 1.3127762359954773, "grad_norm": 0.1885683834552765, "learning_rate": 5.615892316949064e-05, "loss": 0.5617, "step": 6386 }, { "epoch": 1.312981806968856, "grad_norm": 0.16970692574977875, "learning_rate": 5.614924675445325e-05, "loss": 0.5322, "step": 6387 }, { "epoch": 1.3131873779422345, "grad_norm": 0.1596226543188095, "learning_rate": 5.613956979014459e-05, "loss": 0.5696, "step": 6388 }, { "epoch": 1.313392948915613, "grad_norm": 0.18783892691135406, "learning_rate": 5.61298922770414e-05, "loss": 0.5507, "step": 6389 }, { "epoch": 1.3135985198889917, "grad_norm": 0.2017127424478531, "learning_rate": 5.612021421562043e-05, "loss": 0.5858, "step": 6390 }, { "epoch": 1.3138040908623703, "grad_norm": 0.1910979151725769, "learning_rate": 5.611053560635848e-05, "loss": 0.5607, "step": 6391 }, { "epoch": 1.3140096618357489, "grad_norm": 0.2119234949350357, "learning_rate": 5.6100856449732384e-05, "loss": 0.5665, "step": 6392 }, { "epoch": 1.3142152328091274, "grad_norm": 0.19099730253219604, "learning_rate": 5.609117674621896e-05, "loss": 0.5601, "step": 6393 }, { "epoch": 1.314420803782506, "grad_norm": 0.18972419202327728, "learning_rate": 5.60814964962951e-05, "loss": 0.5419, "step": 6394 }, { "epoch": 1.3146263747558844, "grad_norm": 0.15883517265319824, "learning_rate": 5.6071815700437716e-05, "loss": 0.5145, "step": 6395 }, { "epoch": 1.314831945729263, "grad_norm": 0.1622246950864792, "learning_rate": 5.606213435912371e-05, "loss": 0.5542, "step": 6396 }, { "epoch": 1.3150375167026416, "grad_norm": 0.20873090624809265, "learning_rate": 5.605245247283005e-05, "loss": 0.5812, "step": 6397 }, { "epoch": 1.3152430876760202, "grad_norm": 0.1877153068780899, "learning_rate": 5.604277004203371e-05, "loss": 0.5479, "step": 6398 }, { "epoch": 1.3154486586493987, "grad_norm": 0.19027303159236908, "learning_rate": 5.6033087067211714e-05, "loss": 0.5552, "step": 6399 }, { "epoch": 1.3156542296227773, "grad_norm": 0.19082914292812347, "learning_rate": 5.602340354884108e-05, "loss": 0.5544, "step": 6400 }, { "epoch": 1.3158598005961557, "grad_norm": 0.1900823563337326, "learning_rate": 5.601371948739888e-05, "loss": 0.5564, "step": 6401 }, { "epoch": 1.3160653715695343, "grad_norm": 0.1659982055425644, "learning_rate": 5.60040348833622e-05, "loss": 0.5338, "step": 6402 }, { "epoch": 1.3162709425429129, "grad_norm": 0.16377677023410797, "learning_rate": 5.599434973720815e-05, "loss": 0.5685, "step": 6403 }, { "epoch": 1.3164765135162915, "grad_norm": 0.1914215385913849, "learning_rate": 5.5984664049413884e-05, "loss": 0.5734, "step": 6404 }, { "epoch": 1.31668208448967, "grad_norm": 0.19817842543125153, "learning_rate": 5.5974977820456546e-05, "loss": 0.5658, "step": 6405 }, { "epoch": 1.3168876554630486, "grad_norm": 0.1932641863822937, "learning_rate": 5.596529105081336e-05, "loss": 0.5597, "step": 6406 }, { "epoch": 1.3170932264364272, "grad_norm": 0.18866626918315887, "learning_rate": 5.595560374096154e-05, "loss": 0.5736, "step": 6407 }, { "epoch": 1.3172987974098058, "grad_norm": 0.1907801777124405, "learning_rate": 5.594591589137831e-05, "loss": 0.575, "step": 6408 }, { "epoch": 1.3175043683831844, "grad_norm": 0.18488825857639313, "learning_rate": 5.5936227502540984e-05, "loss": 0.5658, "step": 6409 }, { "epoch": 1.3177099393565628, "grad_norm": 0.18911798298358917, "learning_rate": 5.592653857492684e-05, "loss": 0.5505, "step": 6410 }, { "epoch": 1.3179155103299414, "grad_norm": 0.161835715174675, "learning_rate": 5.59168491090132e-05, "loss": 0.5313, "step": 6411 }, { "epoch": 1.31812108130332, "grad_norm": 0.15991567075252533, "learning_rate": 5.590715910527745e-05, "loss": 0.5707, "step": 6412 }, { "epoch": 1.3183266522766985, "grad_norm": 0.1980849802494049, "learning_rate": 5.589746856419694e-05, "loss": 0.5339, "step": 6413 }, { "epoch": 1.3185322232500771, "grad_norm": 0.1609208732843399, "learning_rate": 5.58877774862491e-05, "loss": 0.5264, "step": 6414 }, { "epoch": 1.3187377942234557, "grad_norm": 0.16349831223487854, "learning_rate": 5.587808587191134e-05, "loss": 0.5642, "step": 6415 }, { "epoch": 1.318943365196834, "grad_norm": 0.1919315755367279, "learning_rate": 5.586839372166113e-05, "loss": 0.57, "step": 6416 }, { "epoch": 1.3191489361702127, "grad_norm": 0.19255201518535614, "learning_rate": 5.585870103597596e-05, "loss": 0.5692, "step": 6417 }, { "epoch": 1.3193545071435913, "grad_norm": 0.1922633796930313, "learning_rate": 5.584900781533334e-05, "loss": 0.5675, "step": 6418 }, { "epoch": 1.3195600781169698, "grad_norm": 0.19982829689979553, "learning_rate": 5.5839314060210826e-05, "loss": 0.5711, "step": 6419 }, { "epoch": 1.3197656490903484, "grad_norm": 0.19519644975662231, "learning_rate": 5.582961977108598e-05, "loss": 0.5645, "step": 6420 }, { "epoch": 1.319971220063727, "grad_norm": 0.19568218290805817, "learning_rate": 5.5819924948436374e-05, "loss": 0.5638, "step": 6421 }, { "epoch": 1.3201767910371056, "grad_norm": 0.1948254555463791, "learning_rate": 5.581022959273963e-05, "loss": 0.5511, "step": 6422 }, { "epoch": 1.3203823620104842, "grad_norm": 0.19327300786972046, "learning_rate": 5.580053370447341e-05, "loss": 0.523, "step": 6423 }, { "epoch": 1.3205879329838628, "grad_norm": 0.19158729910850525, "learning_rate": 5.5790837284115365e-05, "loss": 0.5628, "step": 6424 }, { "epoch": 1.3207935039572412, "grad_norm": 0.2012944519519806, "learning_rate": 5.578114033214322e-05, "loss": 0.5486, "step": 6425 }, { "epoch": 1.3209990749306197, "grad_norm": 0.19401337206363678, "learning_rate": 5.577144284903466e-05, "loss": 0.569, "step": 6426 }, { "epoch": 1.3212046459039983, "grad_norm": 0.19512306153774261, "learning_rate": 5.576174483526748e-05, "loss": 0.5581, "step": 6427 }, { "epoch": 1.321410216877377, "grad_norm": 0.18876834213733673, "learning_rate": 5.5752046291319415e-05, "loss": 0.5591, "step": 6428 }, { "epoch": 1.3216157878507555, "grad_norm": 0.19513283669948578, "learning_rate": 5.574234721766829e-05, "loss": 0.57, "step": 6429 }, { "epoch": 1.321821358824134, "grad_norm": 0.16624127328395844, "learning_rate": 5.5732647614791933e-05, "loss": 0.5405, "step": 6430 }, { "epoch": 1.3220269297975125, "grad_norm": 0.16485817730426788, "learning_rate": 5.572294748316818e-05, "loss": 0.54, "step": 6431 }, { "epoch": 1.322232500770891, "grad_norm": 0.16315220296382904, "learning_rate": 5.571324682327493e-05, "loss": 0.5326, "step": 6432 }, { "epoch": 1.3224380717442696, "grad_norm": 0.17077341675758362, "learning_rate": 5.570354563559009e-05, "loss": 0.5464, "step": 6433 }, { "epoch": 1.3226436427176482, "grad_norm": 0.19310691952705383, "learning_rate": 5.569384392059158e-05, "loss": 0.5544, "step": 6434 }, { "epoch": 1.3228492136910268, "grad_norm": 0.19178032875061035, "learning_rate": 5.568414167875736e-05, "loss": 0.5595, "step": 6435 }, { "epoch": 1.3230547846644054, "grad_norm": 0.19363771378993988, "learning_rate": 5.567443891056542e-05, "loss": 0.5565, "step": 6436 }, { "epoch": 1.323260355637784, "grad_norm": 0.16950379312038422, "learning_rate": 5.566473561649376e-05, "loss": 0.5465, "step": 6437 }, { "epoch": 1.3234659266111626, "grad_norm": 0.15700620412826538, "learning_rate": 5.565503179702043e-05, "loss": 0.5377, "step": 6438 }, { "epoch": 1.3236714975845412, "grad_norm": 0.16397301852703094, "learning_rate": 5.564532745262348e-05, "loss": 0.536, "step": 6439 }, { "epoch": 1.3238770685579198, "grad_norm": 0.20148152112960815, "learning_rate": 5.5635622583781e-05, "loss": 0.5598, "step": 6440 }, { "epoch": 1.3240826395312981, "grad_norm": 0.16813023388385773, "learning_rate": 5.562591719097112e-05, "loss": 0.5117, "step": 6441 }, { "epoch": 1.3242882105046767, "grad_norm": 0.15760543942451477, "learning_rate": 5.5616211274671956e-05, "loss": 0.5487, "step": 6442 }, { "epoch": 1.3244937814780553, "grad_norm": 0.18859198689460754, "learning_rate": 5.5606504835361675e-05, "loss": 0.5293, "step": 6443 }, { "epoch": 1.3246993524514339, "grad_norm": 0.19250252842903137, "learning_rate": 5.559679787351849e-05, "loss": 0.5722, "step": 6444 }, { "epoch": 1.3249049234248125, "grad_norm": 0.1938043236732483, "learning_rate": 5.558709038962061e-05, "loss": 0.553, "step": 6445 }, { "epoch": 1.3251104943981908, "grad_norm": 0.19342714548110962, "learning_rate": 5.557738238414624e-05, "loss": 0.5467, "step": 6446 }, { "epoch": 1.3253160653715694, "grad_norm": 0.20176750421524048, "learning_rate": 5.556767385757371e-05, "loss": 0.5503, "step": 6447 }, { "epoch": 1.325521636344948, "grad_norm": 0.19387808442115784, "learning_rate": 5.555796481038127e-05, "loss": 0.5651, "step": 6448 }, { "epoch": 1.3257272073183266, "grad_norm": 0.17772021889686584, "learning_rate": 5.5548255243047236e-05, "loss": 0.506, "step": 6449 }, { "epoch": 1.3259327782917052, "grad_norm": 0.1652149111032486, "learning_rate": 5.553854515604998e-05, "loss": 0.5591, "step": 6450 }, { "epoch": 1.3261383492650838, "grad_norm": 0.19004401564598083, "learning_rate": 5.552883454986786e-05, "loss": 0.5616, "step": 6451 }, { "epoch": 1.3263439202384624, "grad_norm": 0.1958709955215454, "learning_rate": 5.551912342497929e-05, "loss": 0.5523, "step": 6452 }, { "epoch": 1.326549491211841, "grad_norm": 0.18773847818374634, "learning_rate": 5.550941178186265e-05, "loss": 0.5625, "step": 6453 }, { "epoch": 1.3267550621852195, "grad_norm": 0.16042830049991608, "learning_rate": 5.549969962099643e-05, "loss": 0.5096, "step": 6454 }, { "epoch": 1.3269606331585981, "grad_norm": 0.1585341989994049, "learning_rate": 5.548998694285908e-05, "loss": 0.5587, "step": 6455 }, { "epoch": 1.3271662041319765, "grad_norm": 0.18803685903549194, "learning_rate": 5.54802737479291e-05, "loss": 0.5649, "step": 6456 }, { "epoch": 1.327371775105355, "grad_norm": 0.1625043749809265, "learning_rate": 5.5470560036685025e-05, "loss": 0.5228, "step": 6457 }, { "epoch": 1.3275773460787337, "grad_norm": 0.1575174331665039, "learning_rate": 5.54608458096054e-05, "loss": 0.5426, "step": 6458 }, { "epoch": 1.3277829170521123, "grad_norm": 0.19953930377960205, "learning_rate": 5.545113106716877e-05, "loss": 0.5559, "step": 6459 }, { "epoch": 1.3279884880254909, "grad_norm": 0.2004413902759552, "learning_rate": 5.5441415809853786e-05, "loss": 0.5624, "step": 6460 }, { "epoch": 1.3281940589988694, "grad_norm": 0.18838083744049072, "learning_rate": 5.543170003813903e-05, "loss": 0.5626, "step": 6461 }, { "epoch": 1.3283996299722478, "grad_norm": 0.1713562160730362, "learning_rate": 5.542198375250319e-05, "loss": 0.5454, "step": 6462 }, { "epoch": 1.3286052009456264, "grad_norm": 0.13531114161014557, "learning_rate": 5.5412266953424905e-05, "loss": 0.5289, "step": 6463 }, { "epoch": 1.328810771919005, "grad_norm": 0.16264608502388, "learning_rate": 5.540254964138291e-05, "loss": 0.5403, "step": 6464 }, { "epoch": 1.3290163428923836, "grad_norm": 0.16079317033290863, "learning_rate": 5.5392831816855915e-05, "loss": 0.5081, "step": 6465 }, { "epoch": 1.3292219138657622, "grad_norm": 0.15615412592887878, "learning_rate": 5.538311348032266e-05, "loss": 0.558, "step": 6466 }, { "epoch": 1.3294274848391407, "grad_norm": 0.18808799982070923, "learning_rate": 5.5373394632261934e-05, "loss": 0.5462, "step": 6467 }, { "epoch": 1.3296330558125193, "grad_norm": 0.1914406418800354, "learning_rate": 5.536367527315255e-05, "loss": 0.5668, "step": 6468 }, { "epoch": 1.329838626785898, "grad_norm": 0.27818214893341064, "learning_rate": 5.5353955403473325e-05, "loss": 0.5524, "step": 6469 }, { "epoch": 1.3300441977592765, "grad_norm": 0.19103524088859558, "learning_rate": 5.53442350237031e-05, "loss": 0.577, "step": 6470 }, { "epoch": 1.3302497687326549, "grad_norm": 0.17256119847297668, "learning_rate": 5.533451413432077e-05, "loss": 0.5307, "step": 6471 }, { "epoch": 1.3304553397060335, "grad_norm": 0.1665564626455307, "learning_rate": 5.532479273580523e-05, "loss": 0.5791, "step": 6472 }, { "epoch": 1.330660910679412, "grad_norm": 0.16080975532531738, "learning_rate": 5.531507082863542e-05, "loss": 0.5073, "step": 6473 }, { "epoch": 1.3308664816527906, "grad_norm": 0.16216245293617249, "learning_rate": 5.5305348413290264e-05, "loss": 0.5609, "step": 6474 }, { "epoch": 1.3310720526261692, "grad_norm": 0.16360749304294586, "learning_rate": 5.529562549024878e-05, "loss": 0.5257, "step": 6475 }, { "epoch": 1.3312776235995478, "grad_norm": 0.1617291420698166, "learning_rate": 5.528590205998994e-05, "loss": 0.5577, "step": 6476 }, { "epoch": 1.3314831945729262, "grad_norm": 0.1931338757276535, "learning_rate": 5.527617812299278e-05, "loss": 0.5589, "step": 6477 }, { "epoch": 1.3316887655463048, "grad_norm": 0.18447865545749664, "learning_rate": 5.526645367973636e-05, "loss": 0.5692, "step": 6478 }, { "epoch": 1.3318943365196834, "grad_norm": 0.16455183923244476, "learning_rate": 5.525672873069975e-05, "loss": 0.5236, "step": 6479 }, { "epoch": 1.332099907493062, "grad_norm": 0.15722709894180298, "learning_rate": 5.524700327636206e-05, "loss": 0.5514, "step": 6480 }, { "epoch": 1.3323054784664405, "grad_norm": 0.18713107705116272, "learning_rate": 5.5237277317202405e-05, "loss": 0.5401, "step": 6481 }, { "epoch": 1.3325110494398191, "grad_norm": 0.19015434384346008, "learning_rate": 5.522755085369994e-05, "loss": 0.5464, "step": 6482 }, { "epoch": 1.3327166204131977, "grad_norm": 0.18974623084068298, "learning_rate": 5.5217823886333854e-05, "loss": 0.5409, "step": 6483 }, { "epoch": 1.3329221913865763, "grad_norm": 0.19141395390033722, "learning_rate": 5.520809641558334e-05, "loss": 0.5512, "step": 6484 }, { "epoch": 1.333127762359955, "grad_norm": 0.19724808633327484, "learning_rate": 5.519836844192763e-05, "loss": 0.5687, "step": 6485 }, { "epoch": 1.3333333333333333, "grad_norm": 0.18789160251617432, "learning_rate": 5.518863996584599e-05, "loss": 0.5373, "step": 6486 }, { "epoch": 1.3335389043067118, "grad_norm": 0.198290154337883, "learning_rate": 5.517891098781766e-05, "loss": 0.5726, "step": 6487 }, { "epoch": 1.3337444752800904, "grad_norm": 0.19129502773284912, "learning_rate": 5.516918150832197e-05, "loss": 0.547, "step": 6488 }, { "epoch": 1.333950046253469, "grad_norm": 0.16152769327163696, "learning_rate": 5.515945152783824e-05, "loss": 0.5004, "step": 6489 }, { "epoch": 1.3341556172268476, "grad_norm": 0.1580476313829422, "learning_rate": 5.5149721046845824e-05, "loss": 0.5455, "step": 6490 }, { "epoch": 1.3343611882002262, "grad_norm": 0.190731480717659, "learning_rate": 5.513999006582407e-05, "loss": 0.566, "step": 6491 }, { "epoch": 1.3345667591736046, "grad_norm": 0.1941419392824173, "learning_rate": 5.513025858525242e-05, "loss": 0.5748, "step": 6492 }, { "epoch": 1.3347723301469832, "grad_norm": 0.20120371878147125, "learning_rate": 5.512052660561026e-05, "loss": 0.5662, "step": 6493 }, { "epoch": 1.3349779011203617, "grad_norm": 0.2006073296070099, "learning_rate": 5.511079412737706e-05, "loss": 0.5741, "step": 6494 }, { "epoch": 1.3351834720937403, "grad_norm": 0.18886934220790863, "learning_rate": 5.510106115103231e-05, "loss": 0.5534, "step": 6495 }, { "epoch": 1.335389043067119, "grad_norm": 0.18579721450805664, "learning_rate": 5.5091327677055484e-05, "loss": 0.5403, "step": 6496 }, { "epoch": 1.3355946140404975, "grad_norm": 0.1928054839372635, "learning_rate": 5.50815937059261e-05, "loss": 0.5666, "step": 6497 }, { "epoch": 1.335800185013876, "grad_norm": 0.161499485373497, "learning_rate": 5.5071859238123714e-05, "loss": 0.5366, "step": 6498 }, { "epoch": 1.3360057559872547, "grad_norm": 0.1295616626739502, "learning_rate": 5.506212427412791e-05, "loss": 0.4916, "step": 6499 }, { "epoch": 1.3362113269606333, "grad_norm": 0.15952670574188232, "learning_rate": 5.505238881441827e-05, "loss": 0.5444, "step": 6500 }, { "epoch": 1.3364168979340116, "grad_norm": 0.202559694647789, "learning_rate": 5.5042652859474414e-05, "loss": 0.5592, "step": 6501 }, { "epoch": 1.3366224689073902, "grad_norm": 0.16196085512638092, "learning_rate": 5.5032916409776003e-05, "loss": 0.5164, "step": 6502 }, { "epoch": 1.3368280398807688, "grad_norm": 0.1672007143497467, "learning_rate": 5.502317946580268e-05, "loss": 0.5319, "step": 6503 }, { "epoch": 1.3370336108541474, "grad_norm": 0.16251109540462494, "learning_rate": 5.501344202803415e-05, "loss": 0.5215, "step": 6504 }, { "epoch": 1.337239181827526, "grad_norm": 0.12841519713401794, "learning_rate": 5.500370409695014e-05, "loss": 0.5087, "step": 6505 }, { "epoch": 1.3374447528009046, "grad_norm": 0.16203691065311432, "learning_rate": 5.499396567303039e-05, "loss": 0.5683, "step": 6506 }, { "epoch": 1.337650323774283, "grad_norm": 0.18712860345840454, "learning_rate": 5.4984226756754664e-05, "loss": 0.5488, "step": 6507 }, { "epoch": 1.3378558947476615, "grad_norm": 0.19168932735919952, "learning_rate": 5.497448734860274e-05, "loss": 0.5639, "step": 6508 }, { "epoch": 1.3380614657210401, "grad_norm": 0.18323485553264618, "learning_rate": 5.4964747449054464e-05, "loss": 0.5504, "step": 6509 }, { "epoch": 1.3382670366944187, "grad_norm": 0.16930492222309113, "learning_rate": 5.4955007058589646e-05, "loss": 0.5296, "step": 6510 }, { "epoch": 1.3384726076677973, "grad_norm": 0.16478413343429565, "learning_rate": 5.494526617768816e-05, "loss": 0.557, "step": 6511 }, { "epoch": 1.3386781786411759, "grad_norm": 0.1620486080646515, "learning_rate": 5.4935524806829885e-05, "loss": 0.5328, "step": 6512 }, { "epoch": 1.3388837496145545, "grad_norm": 0.15588897466659546, "learning_rate": 5.4925782946494754e-05, "loss": 0.5307, "step": 6513 }, { "epoch": 1.339089320587933, "grad_norm": 0.16102923452854156, "learning_rate": 5.4916040597162677e-05, "loss": 0.5318, "step": 6514 }, { "epoch": 1.3392948915613117, "grad_norm": 0.13110311329364777, "learning_rate": 5.490629775931364e-05, "loss": 0.515, "step": 6515 }, { "epoch": 1.3395004625346902, "grad_norm": 0.1619655340909958, "learning_rate": 5.4896554433427606e-05, "loss": 0.5477, "step": 6516 }, { "epoch": 1.3397060335080686, "grad_norm": 0.20572912693023682, "learning_rate": 5.48868106199846e-05, "loss": 0.5571, "step": 6517 }, { "epoch": 1.3399116044814472, "grad_norm": 0.156040221452713, "learning_rate": 5.487706631946464e-05, "loss": 0.5231, "step": 6518 }, { "epoch": 1.3401171754548258, "grad_norm": 0.16056253015995026, "learning_rate": 5.486732153234778e-05, "loss": 0.5529, "step": 6519 }, { "epoch": 1.3403227464282044, "grad_norm": 0.19152522087097168, "learning_rate": 5.485757625911413e-05, "loss": 0.545, "step": 6520 }, { "epoch": 1.340528317401583, "grad_norm": 0.185153067111969, "learning_rate": 5.484783050024376e-05, "loss": 0.5545, "step": 6521 }, { "epoch": 1.3407338883749613, "grad_norm": 0.18557578325271606, "learning_rate": 5.4838084256216796e-05, "loss": 0.5631, "step": 6522 }, { "epoch": 1.34093945934834, "grad_norm": 0.1944609433412552, "learning_rate": 5.482833752751343e-05, "loss": 0.5673, "step": 6523 }, { "epoch": 1.3411450303217185, "grad_norm": 0.1916920244693756, "learning_rate": 5.4818590314613796e-05, "loss": 0.5406, "step": 6524 }, { "epoch": 1.341350601295097, "grad_norm": 0.199026957154274, "learning_rate": 5.48088426179981e-05, "loss": 0.5614, "step": 6525 }, { "epoch": 1.3415561722684757, "grad_norm": 0.19180314242839813, "learning_rate": 5.479909443814658e-05, "loss": 0.5676, "step": 6526 }, { "epoch": 1.3417617432418543, "grad_norm": 0.18850663304328918, "learning_rate": 5.478934577553949e-05, "loss": 0.5644, "step": 6527 }, { "epoch": 1.3419673142152329, "grad_norm": 0.19104434549808502, "learning_rate": 5.477959663065709e-05, "loss": 0.5517, "step": 6528 }, { "epoch": 1.3421728851886114, "grad_norm": 0.16571475565433502, "learning_rate": 5.476984700397966e-05, "loss": 0.5328, "step": 6529 }, { "epoch": 1.34237845616199, "grad_norm": 0.1614765077829361, "learning_rate": 5.4760096895987535e-05, "loss": 0.5574, "step": 6530 }, { "epoch": 1.3425840271353686, "grad_norm": 0.18632696568965912, "learning_rate": 5.4750346307161064e-05, "loss": 0.5605, "step": 6531 }, { "epoch": 1.342789598108747, "grad_norm": 0.1589028239250183, "learning_rate": 5.474059523798059e-05, "loss": 0.5214, "step": 6532 }, { "epoch": 1.3429951690821256, "grad_norm": 0.16524967551231384, "learning_rate": 5.473084368892653e-05, "loss": 0.565, "step": 6533 }, { "epoch": 1.3432007400555042, "grad_norm": 0.1631617695093155, "learning_rate": 5.4721091660479276e-05, "loss": 0.5324, "step": 6534 }, { "epoch": 1.3434063110288827, "grad_norm": 0.1608559638261795, "learning_rate": 5.471133915311927e-05, "loss": 0.5469, "step": 6535 }, { "epoch": 1.3436118820022613, "grad_norm": 0.1971094310283661, "learning_rate": 5.470158616732698e-05, "loss": 0.5692, "step": 6536 }, { "epoch": 1.3438174529756397, "grad_norm": 0.19706624746322632, "learning_rate": 5.469183270358288e-05, "loss": 0.5694, "step": 6537 }, { "epoch": 1.3440230239490183, "grad_norm": 0.18402022123336792, "learning_rate": 5.468207876236748e-05, "loss": 0.5478, "step": 6538 }, { "epoch": 1.3442285949223969, "grad_norm": 0.16580908000469208, "learning_rate": 5.467232434416132e-05, "loss": 0.5444, "step": 6539 }, { "epoch": 1.3444341658957755, "grad_norm": 0.1564161777496338, "learning_rate": 5.466256944944494e-05, "loss": 0.5379, "step": 6540 }, { "epoch": 1.344639736869154, "grad_norm": 0.19156378507614136, "learning_rate": 5.465281407869894e-05, "loss": 0.5479, "step": 6541 }, { "epoch": 1.3448453078425326, "grad_norm": 0.18408456444740295, "learning_rate": 5.46430582324039e-05, "loss": 0.5402, "step": 6542 }, { "epoch": 1.3450508788159112, "grad_norm": 0.18590892851352692, "learning_rate": 5.463330191104045e-05, "loss": 0.5345, "step": 6543 }, { "epoch": 1.3452564497892898, "grad_norm": 0.2050226926803589, "learning_rate": 5.4623545115089246e-05, "loss": 0.5731, "step": 6544 }, { "epoch": 1.3454620207626684, "grad_norm": 0.19850295782089233, "learning_rate": 5.461378784503095e-05, "loss": 0.5583, "step": 6545 }, { "epoch": 1.345667591736047, "grad_norm": 0.16567668318748474, "learning_rate": 5.4604030101346255e-05, "loss": 0.531, "step": 6546 }, { "epoch": 1.3458731627094254, "grad_norm": 0.15176017582416534, "learning_rate": 5.4594271884515884e-05, "loss": 0.5567, "step": 6547 }, { "epoch": 1.346078733682804, "grad_norm": 0.19408267736434937, "learning_rate": 5.45845131950206e-05, "loss": 0.5601, "step": 6548 }, { "epoch": 1.3462843046561825, "grad_norm": 0.18972966074943542, "learning_rate": 5.457475403334114e-05, "loss": 0.574, "step": 6549 }, { "epoch": 1.3464898756295611, "grad_norm": 0.19591477513313293, "learning_rate": 5.456499439995829e-05, "loss": 0.5559, "step": 6550 }, { "epoch": 1.3466954466029397, "grad_norm": 0.18834471702575684, "learning_rate": 5.455523429535289e-05, "loss": 0.5537, "step": 6551 }, { "epoch": 1.3469010175763183, "grad_norm": 0.1918981820344925, "learning_rate": 5.454547372000575e-05, "loss": 0.5594, "step": 6552 }, { "epoch": 1.3471065885496967, "grad_norm": 0.16592934727668762, "learning_rate": 5.453571267439773e-05, "loss": 0.509, "step": 6553 }, { "epoch": 1.3473121595230753, "grad_norm": 0.16087022423744202, "learning_rate": 5.4525951159009726e-05, "loss": 0.5429, "step": 6554 }, { "epoch": 1.3475177304964538, "grad_norm": 0.19623617827892303, "learning_rate": 5.4516189174322635e-05, "loss": 0.5526, "step": 6555 }, { "epoch": 1.3477233014698324, "grad_norm": 0.19142059981822968, "learning_rate": 5.450642672081737e-05, "loss": 0.5726, "step": 6556 }, { "epoch": 1.347928872443211, "grad_norm": 0.1905898004770279, "learning_rate": 5.44966637989749e-05, "loss": 0.556, "step": 6557 }, { "epoch": 1.3481344434165896, "grad_norm": 0.19187632203102112, "learning_rate": 5.448690040927618e-05, "loss": 0.5517, "step": 6558 }, { "epoch": 1.3483400143899682, "grad_norm": 0.19854268431663513, "learning_rate": 5.447713655220224e-05, "loss": 0.5642, "step": 6559 }, { "epoch": 1.3485455853633468, "grad_norm": 0.18761958181858063, "learning_rate": 5.446737222823405e-05, "loss": 0.5595, "step": 6560 }, { "epoch": 1.3487511563367254, "grad_norm": 0.19532154500484467, "learning_rate": 5.445760743785271e-05, "loss": 0.5764, "step": 6561 }, { "epoch": 1.3489567273101037, "grad_norm": 0.20847441256046295, "learning_rate": 5.444784218153924e-05, "loss": 0.5326, "step": 6562 }, { "epoch": 1.3491622982834823, "grad_norm": 0.2053038477897644, "learning_rate": 5.4438076459774746e-05, "loss": 0.54, "step": 6563 }, { "epoch": 1.349367869256861, "grad_norm": 0.1965019851922989, "learning_rate": 5.4428310273040335e-05, "loss": 0.5454, "step": 6564 }, { "epoch": 1.3495734402302395, "grad_norm": 0.19706155359745026, "learning_rate": 5.4418543621817165e-05, "loss": 0.5847, "step": 6565 }, { "epoch": 1.349779011203618, "grad_norm": 0.18815022706985474, "learning_rate": 5.440877650658636e-05, "loss": 0.5541, "step": 6566 }, { "epoch": 1.3499845821769967, "grad_norm": 0.16428446769714355, "learning_rate": 5.43990089278291e-05, "loss": 0.5459, "step": 6567 }, { "epoch": 1.350190153150375, "grad_norm": 0.16542398929595947, "learning_rate": 5.438924088602662e-05, "loss": 0.5646, "step": 6568 }, { "epoch": 1.3503957241237536, "grad_norm": 0.15714940428733826, "learning_rate": 5.437947238166012e-05, "loss": 0.5173, "step": 6569 }, { "epoch": 1.3506012950971322, "grad_norm": 0.15711595118045807, "learning_rate": 5.436970341521084e-05, "loss": 0.5552, "step": 6570 }, { "epoch": 1.3508068660705108, "grad_norm": 0.1985914558172226, "learning_rate": 5.4359933987160086e-05, "loss": 0.5668, "step": 6571 }, { "epoch": 1.3510124370438894, "grad_norm": 0.19462761282920837, "learning_rate": 5.435016409798913e-05, "loss": 0.5585, "step": 6572 }, { "epoch": 1.351218008017268, "grad_norm": 0.19194667041301727, "learning_rate": 5.434039374817929e-05, "loss": 0.5631, "step": 6573 }, { "epoch": 1.3514235789906466, "grad_norm": 0.19980405271053314, "learning_rate": 5.43306229382119e-05, "loss": 0.5535, "step": 6574 }, { "epoch": 1.3516291499640252, "grad_norm": 0.193598210811615, "learning_rate": 5.432085166856834e-05, "loss": 0.5606, "step": 6575 }, { "epoch": 1.3518347209374038, "grad_norm": 0.16227704286575317, "learning_rate": 5.431107993972999e-05, "loss": 0.5169, "step": 6576 }, { "epoch": 1.3520402919107821, "grad_norm": 0.16246087849140167, "learning_rate": 5.430130775217823e-05, "loss": 0.5548, "step": 6577 }, { "epoch": 1.3522458628841607, "grad_norm": 0.16693639755249023, "learning_rate": 5.4291535106394524e-05, "loss": 0.5287, "step": 6578 }, { "epoch": 1.3524514338575393, "grad_norm": 0.16185717284679413, "learning_rate": 5.4281762002860304e-05, "loss": 0.5556, "step": 6579 }, { "epoch": 1.3526570048309179, "grad_norm": 0.19650043547153473, "learning_rate": 5.427198844205706e-05, "loss": 0.5632, "step": 6580 }, { "epoch": 1.3528625758042965, "grad_norm": 0.16057594120502472, "learning_rate": 5.426221442446627e-05, "loss": 0.5163, "step": 6581 }, { "epoch": 1.353068146777675, "grad_norm": 0.15515869855880737, "learning_rate": 5.425243995056949e-05, "loss": 0.5588, "step": 6582 }, { "epoch": 1.3532737177510534, "grad_norm": 0.19516292214393616, "learning_rate": 5.4242665020848224e-05, "loss": 0.5814, "step": 6583 }, { "epoch": 1.353479288724432, "grad_norm": 0.1625499576330185, "learning_rate": 5.423288963578405e-05, "loss": 0.5264, "step": 6584 }, { "epoch": 1.3536848596978106, "grad_norm": 0.16830846667289734, "learning_rate": 5.422311379585857e-05, "loss": 0.5258, "step": 6585 }, { "epoch": 1.3538904306711892, "grad_norm": 0.19009056687355042, "learning_rate": 5.4213337501553374e-05, "loss": 0.5549, "step": 6586 }, { "epoch": 1.3540960016445678, "grad_norm": 0.18671362102031708, "learning_rate": 5.4203560753350115e-05, "loss": 0.5482, "step": 6587 }, { "epoch": 1.3543015726179464, "grad_norm": 0.1931658238172531, "learning_rate": 5.419378355173042e-05, "loss": 0.5665, "step": 6588 }, { "epoch": 1.354507143591325, "grad_norm": 0.1925138682126999, "learning_rate": 5.4184005897175985e-05, "loss": 0.5649, "step": 6589 }, { "epoch": 1.3547127145647035, "grad_norm": 0.1919427365064621, "learning_rate": 5.41742277901685e-05, "loss": 0.5425, "step": 6590 }, { "epoch": 1.3549182855380821, "grad_norm": 0.19209784269332886, "learning_rate": 5.416444923118968e-05, "loss": 0.5561, "step": 6591 }, { "epoch": 1.3551238565114605, "grad_norm": 0.17238673567771912, "learning_rate": 5.415467022072131e-05, "loss": 0.5302, "step": 6592 }, { "epoch": 1.355329427484839, "grad_norm": 0.1562458574771881, "learning_rate": 5.414489075924512e-05, "loss": 0.5435, "step": 6593 }, { "epoch": 1.3555349984582177, "grad_norm": 0.19020064175128937, "learning_rate": 5.41351108472429e-05, "loss": 0.5327, "step": 6594 }, { "epoch": 1.3557405694315963, "grad_norm": 0.20159995555877686, "learning_rate": 5.412533048519646e-05, "loss": 0.5489, "step": 6595 }, { "epoch": 1.3559461404049749, "grad_norm": 0.19280879199504852, "learning_rate": 5.411554967358765e-05, "loss": 0.542, "step": 6596 }, { "epoch": 1.3561517113783534, "grad_norm": 0.18953213095664978, "learning_rate": 5.410576841289831e-05, "loss": 0.5464, "step": 6597 }, { "epoch": 1.3563572823517318, "grad_norm": 0.18897344172000885, "learning_rate": 5.409598670361032e-05, "loss": 0.5427, "step": 6598 }, { "epoch": 1.3565628533251104, "grad_norm": 0.20002910494804382, "learning_rate": 5.408620454620558e-05, "loss": 0.5554, "step": 6599 }, { "epoch": 1.356768424298489, "grad_norm": 0.18375547230243683, "learning_rate": 5.4076421941166016e-05, "loss": 0.555, "step": 6600 }, { "epoch": 1.3569739952718676, "grad_norm": 0.17289654910564423, "learning_rate": 5.406663888897355e-05, "loss": 0.5342, "step": 6601 }, { "epoch": 1.3571795662452462, "grad_norm": 0.16519290208816528, "learning_rate": 5.405685539011017e-05, "loss": 0.5506, "step": 6602 }, { "epoch": 1.3573851372186247, "grad_norm": 0.19404758512973785, "learning_rate": 5.404707144505786e-05, "loss": 0.5703, "step": 6603 }, { "epoch": 1.3575907081920033, "grad_norm": 0.1909807026386261, "learning_rate": 5.403728705429864e-05, "loss": 0.5762, "step": 6604 }, { "epoch": 1.357796279165382, "grad_norm": 0.19107364118099213, "learning_rate": 5.4027502218314505e-05, "loss": 0.5411, "step": 6605 }, { "epoch": 1.3580018501387605, "grad_norm": 0.18892939388751984, "learning_rate": 5.401771693758754e-05, "loss": 0.5456, "step": 6606 }, { "epoch": 1.358207421112139, "grad_norm": 0.19617542624473572, "learning_rate": 5.400793121259981e-05, "loss": 0.5759, "step": 6607 }, { "epoch": 1.3584129920855175, "grad_norm": 0.19577234983444214, "learning_rate": 5.39981450438334e-05, "loss": 0.5668, "step": 6608 }, { "epoch": 1.358618563058896, "grad_norm": 0.21422545611858368, "learning_rate": 5.3988358431770455e-05, "loss": 0.5677, "step": 6609 }, { "epoch": 1.3588241340322746, "grad_norm": 0.16092784702777863, "learning_rate": 5.397857137689311e-05, "loss": 0.5076, "step": 6610 }, { "epoch": 1.3590297050056532, "grad_norm": 0.15695548057556152, "learning_rate": 5.39687838796835e-05, "loss": 0.5357, "step": 6611 }, { "epoch": 1.3592352759790318, "grad_norm": 0.20313376188278198, "learning_rate": 5.395899594062383e-05, "loss": 0.5823, "step": 6612 }, { "epoch": 1.3594408469524102, "grad_norm": 0.19227701425552368, "learning_rate": 5.3949207560196306e-05, "loss": 0.5674, "step": 6613 }, { "epoch": 1.3596464179257888, "grad_norm": 0.190741628408432, "learning_rate": 5.393941873888316e-05, "loss": 0.548, "step": 6614 }, { "epoch": 1.3598519888991674, "grad_norm": 0.19307512044906616, "learning_rate": 5.3929629477166624e-05, "loss": 0.5449, "step": 6615 }, { "epoch": 1.360057559872546, "grad_norm": 0.19279111921787262, "learning_rate": 5.3919839775529e-05, "loss": 0.5505, "step": 6616 }, { "epoch": 1.3602631308459245, "grad_norm": 0.1940283179283142, "learning_rate": 5.391004963445255e-05, "loss": 0.5564, "step": 6617 }, { "epoch": 1.3604687018193031, "grad_norm": 0.22000883519649506, "learning_rate": 5.39002590544196e-05, "loss": 0.5643, "step": 6618 }, { "epoch": 1.3606742727926817, "grad_norm": 0.1951514333486557, "learning_rate": 5.3890468035912484e-05, "loss": 0.5502, "step": 6619 }, { "epoch": 1.3608798437660603, "grad_norm": 0.19694966077804565, "learning_rate": 5.388067657941357e-05, "loss": 0.5609, "step": 6620 }, { "epoch": 1.361085414739439, "grad_norm": 0.165736585855484, "learning_rate": 5.387088468540522e-05, "loss": 0.5275, "step": 6621 }, { "epoch": 1.3612909857128175, "grad_norm": 0.1606799215078354, "learning_rate": 5.3861092354369843e-05, "loss": 0.5503, "step": 6622 }, { "epoch": 1.3614965566861958, "grad_norm": 0.1982721984386444, "learning_rate": 5.385129958678986e-05, "loss": 0.5561, "step": 6623 }, { "epoch": 1.3617021276595744, "grad_norm": 0.20562221109867096, "learning_rate": 5.384150638314773e-05, "loss": 0.5675, "step": 6624 }, { "epoch": 1.361907698632953, "grad_norm": 0.19149377942085266, "learning_rate": 5.3831712743925905e-05, "loss": 0.5675, "step": 6625 }, { "epoch": 1.3621132696063316, "grad_norm": 0.19633962213993073, "learning_rate": 5.382191866960686e-05, "loss": 0.5566, "step": 6626 }, { "epoch": 1.3623188405797102, "grad_norm": 0.19432850182056427, "learning_rate": 5.381212416067313e-05, "loss": 0.5525, "step": 6627 }, { "epoch": 1.3625244115530886, "grad_norm": 0.18926875293254852, "learning_rate": 5.380232921760723e-05, "loss": 0.5573, "step": 6628 }, { "epoch": 1.3627299825264672, "grad_norm": 0.16620329022407532, "learning_rate": 5.379253384089169e-05, "loss": 0.5206, "step": 6629 }, { "epoch": 1.3629355534998457, "grad_norm": 0.1583135575056076, "learning_rate": 5.378273803100913e-05, "loss": 0.5458, "step": 6630 }, { "epoch": 1.3631411244732243, "grad_norm": 0.19092857837677002, "learning_rate": 5.3772941788442106e-05, "loss": 0.5782, "step": 6631 }, { "epoch": 1.363346695446603, "grad_norm": 0.19434650242328644, "learning_rate": 5.3763145113673234e-05, "loss": 0.5743, "step": 6632 }, { "epoch": 1.3635522664199815, "grad_norm": 0.19643783569335938, "learning_rate": 5.375334800718518e-05, "loss": 0.5689, "step": 6633 }, { "epoch": 1.36375783739336, "grad_norm": 0.16674213111400604, "learning_rate": 5.374355046946057e-05, "loss": 0.5268, "step": 6634 }, { "epoch": 1.3639634083667387, "grad_norm": 0.16963227093219757, "learning_rate": 5.3733752500982095e-05, "loss": 0.5625, "step": 6635 }, { "epoch": 1.3641689793401173, "grad_norm": 0.18819878995418549, "learning_rate": 5.372395410223246e-05, "loss": 0.5633, "step": 6636 }, { "epoch": 1.3643745503134959, "grad_norm": 0.19265903532505035, "learning_rate": 5.371415527369439e-05, "loss": 0.5459, "step": 6637 }, { "epoch": 1.3645801212868742, "grad_norm": 0.19311292469501495, "learning_rate": 5.370435601585061e-05, "loss": 0.5648, "step": 6638 }, { "epoch": 1.3647856922602528, "grad_norm": 0.19344937801361084, "learning_rate": 5.3694556329183904e-05, "loss": 0.5701, "step": 6639 }, { "epoch": 1.3649912632336314, "grad_norm": 0.24478478729724884, "learning_rate": 5.368475621417703e-05, "loss": 0.5532, "step": 6640 }, { "epoch": 1.36519683420701, "grad_norm": 0.19150310754776, "learning_rate": 5.367495567131282e-05, "loss": 0.5471, "step": 6641 }, { "epoch": 1.3654024051803886, "grad_norm": 0.19197209179401398, "learning_rate": 5.3665154701074097e-05, "loss": 0.5406, "step": 6642 }, { "epoch": 1.3656079761537672, "grad_norm": 0.19130434095859528, "learning_rate": 5.365535330394368e-05, "loss": 0.5363, "step": 6643 }, { "epoch": 1.3658135471271455, "grad_norm": 0.19257521629333496, "learning_rate": 5.3645551480404487e-05, "loss": 0.5547, "step": 6644 }, { "epoch": 1.3660191181005241, "grad_norm": 0.18824981153011322, "learning_rate": 5.363574923093936e-05, "loss": 0.5723, "step": 6645 }, { "epoch": 1.3662246890739027, "grad_norm": 0.19089485704898834, "learning_rate": 5.362594655603123e-05, "loss": 0.536, "step": 6646 }, { "epoch": 1.3664302600472813, "grad_norm": 0.1918558031320572, "learning_rate": 5.3616143456163055e-05, "loss": 0.5404, "step": 6647 }, { "epoch": 1.3666358310206599, "grad_norm": 0.199978306889534, "learning_rate": 5.3606339931817756e-05, "loss": 0.5633, "step": 6648 }, { "epoch": 1.3668414019940385, "grad_norm": 0.1935882270336151, "learning_rate": 5.35965359834783e-05, "loss": 0.5777, "step": 6649 }, { "epoch": 1.367046972967417, "grad_norm": 0.19100281596183777, "learning_rate": 5.358673161162771e-05, "loss": 0.547, "step": 6650 }, { "epoch": 1.3672525439407957, "grad_norm": 0.19073952734470367, "learning_rate": 5.357692681674898e-05, "loss": 0.5613, "step": 6651 }, { "epoch": 1.3674581149141742, "grad_norm": 0.16322961449623108, "learning_rate": 5.356712159932516e-05, "loss": 0.5327, "step": 6652 }, { "epoch": 1.3676636858875526, "grad_norm": 0.1632666438817978, "learning_rate": 5.35573159598393e-05, "loss": 0.5418, "step": 6653 }, { "epoch": 1.3678692568609312, "grad_norm": 0.1909777820110321, "learning_rate": 5.3547509898774476e-05, "loss": 0.5595, "step": 6654 }, { "epoch": 1.3680748278343098, "grad_norm": 0.19034305214881897, "learning_rate": 5.353770341661378e-05, "loss": 0.5576, "step": 6655 }, { "epoch": 1.3682803988076884, "grad_norm": 0.19562803208827972, "learning_rate": 5.352789651384036e-05, "loss": 0.5549, "step": 6656 }, { "epoch": 1.368485969781067, "grad_norm": 0.2044394165277481, "learning_rate": 5.351808919093733e-05, "loss": 0.5686, "step": 6657 }, { "epoch": 1.3686915407544455, "grad_norm": 0.19082361459732056, "learning_rate": 5.350828144838786e-05, "loss": 0.5626, "step": 6658 }, { "epoch": 1.368897111727824, "grad_norm": 0.21942925453186035, "learning_rate": 5.349847328667514e-05, "loss": 0.583, "step": 6659 }, { "epoch": 1.3691026827012025, "grad_norm": 0.19300974905490875, "learning_rate": 5.348866470628235e-05, "loss": 0.5538, "step": 6660 }, { "epoch": 1.369308253674581, "grad_norm": 0.1846531480550766, "learning_rate": 5.347885570769273e-05, "loss": 0.5331, "step": 6661 }, { "epoch": 1.3695138246479597, "grad_norm": 0.19142849743366241, "learning_rate": 5.346904629138953e-05, "loss": 0.5606, "step": 6662 }, { "epoch": 1.3697193956213383, "grad_norm": 0.19237980246543884, "learning_rate": 5.3459236457856e-05, "loss": 0.5426, "step": 6663 }, { "epoch": 1.3699249665947169, "grad_norm": 0.20076246559619904, "learning_rate": 5.344942620757541e-05, "loss": 0.5676, "step": 6664 }, { "epoch": 1.3701305375680954, "grad_norm": 0.193067267537117, "learning_rate": 5.34396155410311e-05, "loss": 0.557, "step": 6665 }, { "epoch": 1.370336108541474, "grad_norm": 0.19357764720916748, "learning_rate": 5.342980445870637e-05, "loss": 0.5676, "step": 6666 }, { "epoch": 1.3705416795148526, "grad_norm": 0.16621150076389313, "learning_rate": 5.341999296108457e-05, "loss": 0.526, "step": 6667 }, { "epoch": 1.370747250488231, "grad_norm": 0.13069656491279602, "learning_rate": 5.341018104864909e-05, "loss": 0.5275, "step": 6668 }, { "epoch": 1.3709528214616096, "grad_norm": 0.16230368614196777, "learning_rate": 5.3400368721883284e-05, "loss": 0.5518, "step": 6669 }, { "epoch": 1.3711583924349882, "grad_norm": 0.2009955644607544, "learning_rate": 5.339055598127059e-05, "loss": 0.5503, "step": 6670 }, { "epoch": 1.3713639634083667, "grad_norm": 0.18965907394886017, "learning_rate": 5.33807428272944e-05, "loss": 0.566, "step": 6671 }, { "epoch": 1.3715695343817453, "grad_norm": 0.1938343495130539, "learning_rate": 5.3370929260438196e-05, "loss": 0.5559, "step": 6672 }, { "epoch": 1.371775105355124, "grad_norm": 0.17998439073562622, "learning_rate": 5.336111528118543e-05, "loss": 0.515, "step": 6673 }, { "epoch": 1.3719806763285023, "grad_norm": 0.16804425418376923, "learning_rate": 5.335130089001958e-05, "loss": 0.5192, "step": 6674 }, { "epoch": 1.3721862473018809, "grad_norm": 0.16128107905387878, "learning_rate": 5.3341486087424194e-05, "loss": 0.5566, "step": 6675 }, { "epoch": 1.3723918182752595, "grad_norm": 0.1895219087600708, "learning_rate": 5.333167087388276e-05, "loss": 0.5678, "step": 6676 }, { "epoch": 1.372597389248638, "grad_norm": 0.18738722801208496, "learning_rate": 5.3321855249878845e-05, "loss": 0.5647, "step": 6677 }, { "epoch": 1.3728029602220166, "grad_norm": 0.19784080982208252, "learning_rate": 5.331203921589602e-05, "loss": 0.5661, "step": 6678 }, { "epoch": 1.3730085311953952, "grad_norm": 0.17455421388149261, "learning_rate": 5.3302222772417875e-05, "loss": 0.5411, "step": 6679 }, { "epoch": 1.3732141021687738, "grad_norm": 0.13297952711582184, "learning_rate": 5.329240591992803e-05, "loss": 0.532, "step": 6680 }, { "epoch": 1.3734196731421524, "grad_norm": 0.16244389116764069, "learning_rate": 5.328258865891008e-05, "loss": 0.5617, "step": 6681 }, { "epoch": 1.373625244115531, "grad_norm": 0.19854487478733063, "learning_rate": 5.3272770989847724e-05, "loss": 0.5724, "step": 6682 }, { "epoch": 1.3738308150889094, "grad_norm": 0.19856125116348267, "learning_rate": 5.32629529132246e-05, "loss": 0.588, "step": 6683 }, { "epoch": 1.374036386062288, "grad_norm": 0.19242699444293976, "learning_rate": 5.32531344295244e-05, "loss": 0.5468, "step": 6684 }, { "epoch": 1.3742419570356665, "grad_norm": 0.19373014569282532, "learning_rate": 5.3243315539230844e-05, "loss": 0.5487, "step": 6685 }, { "epoch": 1.3744475280090451, "grad_norm": 0.19233091175556183, "learning_rate": 5.323349624282766e-05, "loss": 0.5524, "step": 6686 }, { "epoch": 1.3746530989824237, "grad_norm": 0.1918216347694397, "learning_rate": 5.32236765407986e-05, "loss": 0.5538, "step": 6687 }, { "epoch": 1.3748586699558023, "grad_norm": 0.1914103925228119, "learning_rate": 5.3213856433627426e-05, "loss": 0.5608, "step": 6688 }, { "epoch": 1.3750642409291807, "grad_norm": 0.19780538976192474, "learning_rate": 5.320403592179795e-05, "loss": 0.5701, "step": 6689 }, { "epoch": 1.3752698119025593, "grad_norm": 0.19317637383937836, "learning_rate": 5.3194215005793964e-05, "loss": 0.551, "step": 6690 }, { "epoch": 1.3754753828759378, "grad_norm": 0.17101670801639557, "learning_rate": 5.31843936860993e-05, "loss": 0.5369, "step": 6691 }, { "epoch": 1.3756809538493164, "grad_norm": 0.1648482233285904, "learning_rate": 5.317457196319782e-05, "loss": 0.5706, "step": 6692 }, { "epoch": 1.375886524822695, "grad_norm": 0.1978417932987213, "learning_rate": 5.3164749837573395e-05, "loss": 0.5429, "step": 6693 }, { "epoch": 1.3760920957960736, "grad_norm": 0.19628840684890747, "learning_rate": 5.31549273097099e-05, "loss": 0.5649, "step": 6694 }, { "epoch": 1.3762976667694522, "grad_norm": 0.1944446712732315, "learning_rate": 5.314510438009125e-05, "loss": 0.548, "step": 6695 }, { "epoch": 1.3765032377428308, "grad_norm": 0.19895857572555542, "learning_rate": 5.313528104920138e-05, "loss": 0.5428, "step": 6696 }, { "epoch": 1.3767088087162094, "grad_norm": 0.18742914497852325, "learning_rate": 5.312545731752423e-05, "loss": 0.5525, "step": 6697 }, { "epoch": 1.376914379689588, "grad_norm": 0.1647169291973114, "learning_rate": 5.311563318554379e-05, "loss": 0.5259, "step": 6698 }, { "epoch": 1.3771199506629663, "grad_norm": 0.1640775054693222, "learning_rate": 5.310580865374401e-05, "loss": 0.5602, "step": 6699 }, { "epoch": 1.377325521636345, "grad_norm": 0.19247397780418396, "learning_rate": 5.309598372260895e-05, "loss": 0.5539, "step": 6700 }, { "epoch": 1.3775310926097235, "grad_norm": 0.17393262684345245, "learning_rate": 5.3086158392622606e-05, "loss": 0.5212, "step": 6701 }, { "epoch": 1.377736663583102, "grad_norm": 0.17243215441703796, "learning_rate": 5.307633266426903e-05, "loss": 0.5667, "step": 6702 }, { "epoch": 1.3779422345564807, "grad_norm": 0.19524256885051727, "learning_rate": 5.3066506538032286e-05, "loss": 0.5447, "step": 6703 }, { "epoch": 1.378147805529859, "grad_norm": 0.19185814261436462, "learning_rate": 5.305668001439647e-05, "loss": 0.5564, "step": 6704 }, { "epoch": 1.3783533765032376, "grad_norm": 0.19080397486686707, "learning_rate": 5.3046853093845694e-05, "loss": 0.5545, "step": 6705 }, { "epoch": 1.3785589474766162, "grad_norm": 0.20013724267482758, "learning_rate": 5.303702577686408e-05, "loss": 0.5444, "step": 6706 }, { "epoch": 1.3787645184499948, "grad_norm": 0.19205878674983978, "learning_rate": 5.302719806393576e-05, "loss": 0.5582, "step": 6707 }, { "epoch": 1.3789700894233734, "grad_norm": 0.16551436483860016, "learning_rate": 5.3017369955544915e-05, "loss": 0.5166, "step": 6708 }, { "epoch": 1.379175660396752, "grad_norm": 0.15659868717193604, "learning_rate": 5.300754145217573e-05, "loss": 0.5345, "step": 6709 }, { "epoch": 1.3793812313701306, "grad_norm": 0.19091999530792236, "learning_rate": 5.299771255431239e-05, "loss": 0.5393, "step": 6710 }, { "epoch": 1.3795868023435092, "grad_norm": 0.19453977048397064, "learning_rate": 5.298788326243915e-05, "loss": 0.5471, "step": 6711 }, { "epoch": 1.3797923733168878, "grad_norm": 0.18982084095478058, "learning_rate": 5.2978053577040225e-05, "loss": 0.5482, "step": 6712 }, { "epoch": 1.3799979442902663, "grad_norm": 0.20918771624565125, "learning_rate": 5.2968223498599895e-05, "loss": 0.5698, "step": 6713 }, { "epoch": 1.3802035152636447, "grad_norm": 0.20116795599460602, "learning_rate": 5.2958393027602444e-05, "loss": 0.5605, "step": 6714 }, { "epoch": 1.3804090862370233, "grad_norm": 0.18591387569904327, "learning_rate": 5.294856216453216e-05, "loss": 0.5381, "step": 6715 }, { "epoch": 1.3806146572104019, "grad_norm": 0.19346030056476593, "learning_rate": 5.293873090987336e-05, "loss": 0.565, "step": 6716 }, { "epoch": 1.3808202281837805, "grad_norm": 0.18695658445358276, "learning_rate": 5.292889926411041e-05, "loss": 0.5261, "step": 6717 }, { "epoch": 1.381025799157159, "grad_norm": 0.16254091262817383, "learning_rate": 5.291906722772765e-05, "loss": 0.5208, "step": 6718 }, { "epoch": 1.3812313701305377, "grad_norm": 0.15224479138851166, "learning_rate": 5.2909234801209445e-05, "loss": 0.5667, "step": 6719 }, { "epoch": 1.381436941103916, "grad_norm": 0.16312278807163239, "learning_rate": 5.2899401985040215e-05, "loss": 0.5439, "step": 6720 }, { "epoch": 1.3816425120772946, "grad_norm": 0.15921905636787415, "learning_rate": 5.288956877970438e-05, "loss": 0.5442, "step": 6721 }, { "epoch": 1.3818480830506732, "grad_norm": 0.20192372798919678, "learning_rate": 5.287973518568635e-05, "loss": 0.5779, "step": 6722 }, { "epoch": 1.3820536540240518, "grad_norm": 0.16968026757240295, "learning_rate": 5.286990120347061e-05, "loss": 0.5533, "step": 6723 }, { "epoch": 1.3822592249974304, "grad_norm": 0.16610193252563477, "learning_rate": 5.2860066833541636e-05, "loss": 0.5593, "step": 6724 }, { "epoch": 1.382464795970809, "grad_norm": 0.19108933210372925, "learning_rate": 5.285023207638389e-05, "loss": 0.564, "step": 6725 }, { "epoch": 1.3826703669441875, "grad_norm": 0.18995323777198792, "learning_rate": 5.28403969324819e-05, "loss": 0.5523, "step": 6726 }, { "epoch": 1.3828759379175661, "grad_norm": 0.18891942501068115, "learning_rate": 5.2830561402320215e-05, "loss": 0.5453, "step": 6727 }, { "epoch": 1.3830815088909447, "grad_norm": 0.19134697318077087, "learning_rate": 5.2820725486383356e-05, "loss": 0.565, "step": 6728 }, { "epoch": 1.383287079864323, "grad_norm": 0.16310301423072815, "learning_rate": 5.28108891851559e-05, "loss": 0.549, "step": 6729 }, { "epoch": 1.3834926508377017, "grad_norm": 0.13107767701148987, "learning_rate": 5.280105249912246e-05, "loss": 0.5449, "step": 6730 }, { "epoch": 1.3836982218110803, "grad_norm": 0.12225886434316635, "learning_rate": 5.279121542876761e-05, "loss": 0.5211, "step": 6731 }, { "epoch": 1.3839037927844589, "grad_norm": 0.16120769083499908, "learning_rate": 5.2781377974576e-05, "loss": 0.536, "step": 6732 }, { "epoch": 1.3841093637578374, "grad_norm": 0.20347453653812408, "learning_rate": 5.2771540137032256e-05, "loss": 0.5692, "step": 6733 }, { "epoch": 1.384314934731216, "grad_norm": 0.19555138051509857, "learning_rate": 5.2761701916621064e-05, "loss": 0.5155, "step": 6734 }, { "epoch": 1.3845205057045944, "grad_norm": 0.2035539448261261, "learning_rate": 5.27518633138271e-05, "loss": 0.5697, "step": 6735 }, { "epoch": 1.384726076677973, "grad_norm": 0.18798959255218506, "learning_rate": 5.274202432913505e-05, "loss": 0.553, "step": 6736 }, { "epoch": 1.3849316476513516, "grad_norm": 0.1946985423564911, "learning_rate": 5.2732184963029663e-05, "loss": 0.5551, "step": 6737 }, { "epoch": 1.3851372186247302, "grad_norm": 0.17025156319141388, "learning_rate": 5.272234521599565e-05, "loss": 0.5342, "step": 6738 }, { "epoch": 1.3853427895981087, "grad_norm": 0.16380397975444794, "learning_rate": 5.27125050885178e-05, "loss": 0.5592, "step": 6739 }, { "epoch": 1.3855483605714873, "grad_norm": 0.19385696947574615, "learning_rate": 5.2702664581080845e-05, "loss": 0.5499, "step": 6740 }, { "epoch": 1.385753931544866, "grad_norm": 0.19014237821102142, "learning_rate": 5.2692823694169624e-05, "loss": 0.5322, "step": 6741 }, { "epoch": 1.3859595025182445, "grad_norm": 0.16555199027061462, "learning_rate": 5.2682982428268926e-05, "loss": 0.5253, "step": 6742 }, { "epoch": 1.386165073491623, "grad_norm": 0.1773664802312851, "learning_rate": 5.26731407838636e-05, "loss": 0.5609, "step": 6743 }, { "epoch": 1.3863706444650015, "grad_norm": 0.20064838230609894, "learning_rate": 5.26632987614385e-05, "loss": 0.5682, "step": 6744 }, { "epoch": 1.38657621543838, "grad_norm": 0.16893361508846283, "learning_rate": 5.2653456361478486e-05, "loss": 0.5296, "step": 6745 }, { "epoch": 1.3867817864117586, "grad_norm": 0.12028443813323975, "learning_rate": 5.264361358446845e-05, "loss": 0.5109, "step": 6746 }, { "epoch": 1.3869873573851372, "grad_norm": 0.16433177888393402, "learning_rate": 5.263377043089329e-05, "loss": 0.5492, "step": 6747 }, { "epoch": 1.3871929283585158, "grad_norm": 0.20058415830135345, "learning_rate": 5.262392690123795e-05, "loss": 0.5544, "step": 6748 }, { "epoch": 1.3873984993318944, "grad_norm": 0.1890854686498642, "learning_rate": 5.261408299598737e-05, "loss": 0.5518, "step": 6749 }, { "epoch": 1.3876040703052728, "grad_norm": 0.1862923949956894, "learning_rate": 5.260423871562648e-05, "loss": 0.5598, "step": 6750 }, { "epoch": 1.3878096412786514, "grad_norm": 0.18234452605247498, "learning_rate": 5.2594394060640325e-05, "loss": 0.5486, "step": 6751 }, { "epoch": 1.38801521225203, "grad_norm": 0.16496604681015015, "learning_rate": 5.258454903151385e-05, "loss": 0.4984, "step": 6752 }, { "epoch": 1.3882207832254085, "grad_norm": 0.1623886525630951, "learning_rate": 5.2574703628732104e-05, "loss": 0.5521, "step": 6753 }, { "epoch": 1.3884263541987871, "grad_norm": 0.20108892023563385, "learning_rate": 5.25648578527801e-05, "loss": 0.571, "step": 6754 }, { "epoch": 1.3886319251721657, "grad_norm": 0.18858185410499573, "learning_rate": 5.2555011704142925e-05, "loss": 0.5343, "step": 6755 }, { "epoch": 1.3888374961455443, "grad_norm": 0.18392902612686157, "learning_rate": 5.2545165183305625e-05, "loss": 0.5448, "step": 6756 }, { "epoch": 1.389043067118923, "grad_norm": 0.19124126434326172, "learning_rate": 5.253531829075331e-05, "loss": 0.5493, "step": 6757 }, { "epoch": 1.3892486380923015, "grad_norm": 0.19267001748085022, "learning_rate": 5.252547102697108e-05, "loss": 0.5504, "step": 6758 }, { "epoch": 1.3894542090656798, "grad_norm": 0.19391465187072754, "learning_rate": 5.251562339244407e-05, "loss": 0.5503, "step": 6759 }, { "epoch": 1.3896597800390584, "grad_norm": 0.16429035365581512, "learning_rate": 5.250577538765741e-05, "loss": 0.5135, "step": 6760 }, { "epoch": 1.389865351012437, "grad_norm": 0.15530334413051605, "learning_rate": 5.249592701309629e-05, "loss": 0.5197, "step": 6761 }, { "epoch": 1.3900709219858156, "grad_norm": 0.19579361379146576, "learning_rate": 5.248607826924589e-05, "loss": 0.5486, "step": 6762 }, { "epoch": 1.3902764929591942, "grad_norm": 0.1991192102432251, "learning_rate": 5.2476229156591384e-05, "loss": 0.5713, "step": 6763 }, { "epoch": 1.3904820639325728, "grad_norm": 0.19221562147140503, "learning_rate": 5.246637967561802e-05, "loss": 0.5394, "step": 6764 }, { "epoch": 1.3906876349059512, "grad_norm": 0.18756262958049774, "learning_rate": 5.245652982681102e-05, "loss": 0.5317, "step": 6765 }, { "epoch": 1.3908932058793297, "grad_norm": 0.16349650919437408, "learning_rate": 5.244667961065567e-05, "loss": 0.5351, "step": 6766 }, { "epoch": 1.3910987768527083, "grad_norm": 0.17436912655830383, "learning_rate": 5.24368290276372e-05, "loss": 0.5597, "step": 6767 }, { "epoch": 1.391304347826087, "grad_norm": 0.19610293209552765, "learning_rate": 5.242697807824093e-05, "loss": 0.5688, "step": 6768 }, { "epoch": 1.3915099187994655, "grad_norm": 0.19287322461605072, "learning_rate": 5.241712676295217e-05, "loss": 0.5456, "step": 6769 }, { "epoch": 1.391715489772844, "grad_norm": 0.1898210346698761, "learning_rate": 5.240727508225623e-05, "loss": 0.5595, "step": 6770 }, { "epoch": 1.3919210607462227, "grad_norm": 0.1842799186706543, "learning_rate": 5.239742303663847e-05, "loss": 0.5492, "step": 6771 }, { "epoch": 1.3921266317196013, "grad_norm": 0.18624331057071686, "learning_rate": 5.238757062658426e-05, "loss": 0.5388, "step": 6772 }, { "epoch": 1.3923322026929799, "grad_norm": 0.16960440576076508, "learning_rate": 5.237771785257897e-05, "loss": 0.5353, "step": 6773 }, { "epoch": 1.3925377736663584, "grad_norm": 0.13957920670509338, "learning_rate": 5.2367864715108005e-05, "loss": 0.5144, "step": 6774 }, { "epoch": 1.3927433446397368, "grad_norm": 0.1618185192346573, "learning_rate": 5.235801121465677e-05, "loss": 0.5447, "step": 6775 }, { "epoch": 1.3929489156131154, "grad_norm": 0.19508126378059387, "learning_rate": 5.234815735171073e-05, "loss": 0.5684, "step": 6776 }, { "epoch": 1.393154486586494, "grad_norm": 0.1584571748971939, "learning_rate": 5.233830312675533e-05, "loss": 0.4997, "step": 6777 }, { "epoch": 1.3933600575598726, "grad_norm": 0.15756317973136902, "learning_rate": 5.232844854027601e-05, "loss": 0.5506, "step": 6778 }, { "epoch": 1.3935656285332512, "grad_norm": 0.2031278908252716, "learning_rate": 5.231859359275831e-05, "loss": 0.5452, "step": 6779 }, { "epoch": 1.3937711995066295, "grad_norm": 0.1902448683977127, "learning_rate": 5.230873828468769e-05, "loss": 0.5624, "step": 6780 }, { "epoch": 1.3939767704800081, "grad_norm": 0.19727613031864166, "learning_rate": 5.22988826165497e-05, "loss": 0.5699, "step": 6781 }, { "epoch": 1.3941823414533867, "grad_norm": 0.18354666233062744, "learning_rate": 5.228902658882989e-05, "loss": 0.5463, "step": 6782 }, { "epoch": 1.3943879124267653, "grad_norm": 0.16275332868099213, "learning_rate": 5.22791702020138e-05, "loss": 0.5167, "step": 6783 }, { "epoch": 1.3945934834001439, "grad_norm": 0.15852688252925873, "learning_rate": 5.226931345658701e-05, "loss": 0.5342, "step": 6784 }, { "epoch": 1.3947990543735225, "grad_norm": 0.16829104721546173, "learning_rate": 5.2259456353035136e-05, "loss": 0.5206, "step": 6785 }, { "epoch": 1.395004625346901, "grad_norm": 0.15513145923614502, "learning_rate": 5.2249598891843765e-05, "loss": 0.5584, "step": 6786 }, { "epoch": 1.3952101963202796, "grad_norm": 0.1593499630689621, "learning_rate": 5.223974107349855e-05, "loss": 0.5256, "step": 6787 }, { "epoch": 1.3954157672936582, "grad_norm": 0.16022507846355438, "learning_rate": 5.222988289848512e-05, "loss": 0.5377, "step": 6788 }, { "epoch": 1.3956213382670368, "grad_norm": 0.19908879697322845, "learning_rate": 5.222002436728917e-05, "loss": 0.5567, "step": 6789 }, { "epoch": 1.3958269092404152, "grad_norm": 0.1942145675420761, "learning_rate": 5.2210165480396364e-05, "loss": 0.5503, "step": 6790 }, { "epoch": 1.3960324802137938, "grad_norm": 0.20177899301052094, "learning_rate": 5.2200306238292396e-05, "loss": 0.5572, "step": 6791 }, { "epoch": 1.3962380511871724, "grad_norm": 0.20615504682064056, "learning_rate": 5.219044664146299e-05, "loss": 0.5572, "step": 6792 }, { "epoch": 1.396443622160551, "grad_norm": 0.16137507557868958, "learning_rate": 5.21805866903939e-05, "loss": 0.5327, "step": 6793 }, { "epoch": 1.3966491931339295, "grad_norm": 0.13222044706344604, "learning_rate": 5.217072638557086e-05, "loss": 0.5397, "step": 6794 }, { "epoch": 1.396854764107308, "grad_norm": 0.15501753985881805, "learning_rate": 5.216086572747963e-05, "loss": 0.5588, "step": 6795 }, { "epoch": 1.3970603350806865, "grad_norm": 0.16480109095573425, "learning_rate": 5.2151004716606035e-05, "loss": 0.4947, "step": 6796 }, { "epoch": 1.397265906054065, "grad_norm": 0.1597471535205841, "learning_rate": 5.214114335343585e-05, "loss": 0.5504, "step": 6797 }, { "epoch": 1.3974714770274437, "grad_norm": 0.18874730169773102, "learning_rate": 5.2131281638454914e-05, "loss": 0.5601, "step": 6798 }, { "epoch": 1.3976770480008223, "grad_norm": 0.19088098406791687, "learning_rate": 5.212141957214907e-05, "loss": 0.557, "step": 6799 }, { "epoch": 1.3978826189742009, "grad_norm": 0.19219143688678741, "learning_rate": 5.2111557155004156e-05, "loss": 0.5574, "step": 6800 }, { "epoch": 1.3980881899475794, "grad_norm": 0.19509856402873993, "learning_rate": 5.2101694387506074e-05, "loss": 0.5609, "step": 6801 }, { "epoch": 1.398293760920958, "grad_norm": 0.19519266486167908, "learning_rate": 5.2091831270140694e-05, "loss": 0.5598, "step": 6802 }, { "epoch": 1.3984993318943366, "grad_norm": 0.16416554152965546, "learning_rate": 5.208196780339394e-05, "loss": 0.5073, "step": 6803 }, { "epoch": 1.3987049028677152, "grad_norm": 0.16652482748031616, "learning_rate": 5.207210398775174e-05, "loss": 0.5577, "step": 6804 }, { "epoch": 1.3989104738410936, "grad_norm": 0.1610838919878006, "learning_rate": 5.206223982370001e-05, "loss": 0.5424, "step": 6805 }, { "epoch": 1.3991160448144722, "grad_norm": 0.12500424683094025, "learning_rate": 5.2052375311724755e-05, "loss": 0.5185, "step": 6806 }, { "epoch": 1.3993216157878507, "grad_norm": 0.16289743781089783, "learning_rate": 5.204251045231191e-05, "loss": 0.548, "step": 6807 }, { "epoch": 1.3995271867612293, "grad_norm": 0.1971302479505539, "learning_rate": 5.203264524594751e-05, "loss": 0.5481, "step": 6808 }, { "epoch": 1.399732757734608, "grad_norm": 0.1616830974817276, "learning_rate": 5.2022779693117535e-05, "loss": 0.5206, "step": 6809 }, { "epoch": 1.3999383287079865, "grad_norm": 0.13564690947532654, "learning_rate": 5.201291379430804e-05, "loss": 0.5078, "step": 6810 }, { "epoch": 1.4001438996813649, "grad_norm": 0.20377317070960999, "learning_rate": 5.200304755000506e-05, "loss": 0.5494, "step": 6811 }, { "epoch": 1.4003494706547435, "grad_norm": 0.20373232662677765, "learning_rate": 5.199318096069465e-05, "loss": 0.5652, "step": 6812 }, { "epoch": 1.400555041628122, "grad_norm": 0.19755113124847412, "learning_rate": 5.198331402686291e-05, "loss": 0.5687, "step": 6813 }, { "epoch": 1.4007606126015006, "grad_norm": 0.18689025938510895, "learning_rate": 5.197344674899593e-05, "loss": 0.5576, "step": 6814 }, { "epoch": 1.4009661835748792, "grad_norm": 0.1978052258491516, "learning_rate": 5.196357912757982e-05, "loss": 0.5807, "step": 6815 }, { "epoch": 1.4011717545482578, "grad_norm": 0.16826669871807098, "learning_rate": 5.19537111631007e-05, "loss": 0.4959, "step": 6816 }, { "epoch": 1.4013773255216364, "grad_norm": 0.16866251826286316, "learning_rate": 5.1943842856044745e-05, "loss": 0.5509, "step": 6817 }, { "epoch": 1.401582896495015, "grad_norm": 0.16553765535354614, "learning_rate": 5.19339742068981e-05, "loss": 0.5225, "step": 6818 }, { "epoch": 1.4017884674683936, "grad_norm": 0.15738850831985474, "learning_rate": 5.192410521614695e-05, "loss": 0.5439, "step": 6819 }, { "epoch": 1.401994038441772, "grad_norm": 0.1941434144973755, "learning_rate": 5.1914235884277515e-05, "loss": 0.5431, "step": 6820 }, { "epoch": 1.4021996094151505, "grad_norm": 0.19510993361473083, "learning_rate": 5.1904366211775995e-05, "loss": 0.5699, "step": 6821 }, { "epoch": 1.4024051803885291, "grad_norm": 0.21199296414852142, "learning_rate": 5.189449619912862e-05, "loss": 0.5497, "step": 6822 }, { "epoch": 1.4026107513619077, "grad_norm": 0.20840586721897125, "learning_rate": 5.188462584682163e-05, "loss": 0.5692, "step": 6823 }, { "epoch": 1.4028163223352863, "grad_norm": 0.18796321749687195, "learning_rate": 5.187475515534132e-05, "loss": 0.5497, "step": 6824 }, { "epoch": 1.403021893308665, "grad_norm": 0.18638098239898682, "learning_rate": 5.186488412517396e-05, "loss": 0.556, "step": 6825 }, { "epoch": 1.4032274642820433, "grad_norm": 0.18943150341510773, "learning_rate": 5.185501275680582e-05, "loss": 0.5451, "step": 6826 }, { "epoch": 1.4034330352554218, "grad_norm": 0.19243142008781433, "learning_rate": 5.184514105072326e-05, "loss": 0.5348, "step": 6827 }, { "epoch": 1.4036386062288004, "grad_norm": 0.19465966522693634, "learning_rate": 5.1835269007412585e-05, "loss": 0.5711, "step": 6828 }, { "epoch": 1.403844177202179, "grad_norm": 0.1992519199848175, "learning_rate": 5.1825396627360166e-05, "loss": 0.5768, "step": 6829 }, { "epoch": 1.4040497481755576, "grad_norm": 0.16625314950942993, "learning_rate": 5.181552391105235e-05, "loss": 0.5431, "step": 6830 }, { "epoch": 1.4042553191489362, "grad_norm": 0.16418209671974182, "learning_rate": 5.180565085897552e-05, "loss": 0.5585, "step": 6831 }, { "epoch": 1.4044608901223148, "grad_norm": 0.19852881133556366, "learning_rate": 5.17957774716161e-05, "loss": 0.5298, "step": 6832 }, { "epoch": 1.4046664610956934, "grad_norm": 0.18581949174404144, "learning_rate": 5.178590374946047e-05, "loss": 0.5466, "step": 6833 }, { "epoch": 1.404872032069072, "grad_norm": 0.19243168830871582, "learning_rate": 5.177602969299509e-05, "loss": 0.552, "step": 6834 }, { "epoch": 1.4050776030424503, "grad_norm": 0.20078270137310028, "learning_rate": 5.1766155302706397e-05, "loss": 0.56, "step": 6835 }, { "epoch": 1.405283174015829, "grad_norm": 0.18953198194503784, "learning_rate": 5.175628057908085e-05, "loss": 0.5404, "step": 6836 }, { "epoch": 1.4054887449892075, "grad_norm": 0.19314275681972504, "learning_rate": 5.174640552260494e-05, "loss": 0.553, "step": 6837 }, { "epoch": 1.405694315962586, "grad_norm": 0.19777776300907135, "learning_rate": 5.1736530133765175e-05, "loss": 0.5539, "step": 6838 }, { "epoch": 1.4058998869359647, "grad_norm": 0.18886315822601318, "learning_rate": 5.1726654413048036e-05, "loss": 0.5508, "step": 6839 }, { "epoch": 1.4061054579093433, "grad_norm": 0.16566768288612366, "learning_rate": 5.171677836094008e-05, "loss": 0.5384, "step": 6840 }, { "epoch": 1.4063110288827216, "grad_norm": 0.12670090794563293, "learning_rate": 5.170690197792785e-05, "loss": 0.5064, "step": 6841 }, { "epoch": 1.4065165998561002, "grad_norm": 0.16452710330486298, "learning_rate": 5.1697025264497915e-05, "loss": 0.5549, "step": 6842 }, { "epoch": 1.4067221708294788, "grad_norm": 0.23035211861133575, "learning_rate": 5.168714822113684e-05, "loss": 0.533, "step": 6843 }, { "epoch": 1.4069277418028574, "grad_norm": 0.1920643001794815, "learning_rate": 5.167727084833123e-05, "loss": 0.5667, "step": 6844 }, { "epoch": 1.407133312776236, "grad_norm": 0.1763206124305725, "learning_rate": 5.1667393146567695e-05, "loss": 0.5285, "step": 6845 }, { "epoch": 1.4073388837496146, "grad_norm": 0.17114083468914032, "learning_rate": 5.1657515116332866e-05, "loss": 0.5385, "step": 6846 }, { "epoch": 1.4075444547229932, "grad_norm": 0.19775407016277313, "learning_rate": 5.164763675811338e-05, "loss": 0.569, "step": 6847 }, { "epoch": 1.4077500256963718, "grad_norm": 0.18887090682983398, "learning_rate": 5.163775807239591e-05, "loss": 0.5487, "step": 6848 }, { "epoch": 1.4079555966697503, "grad_norm": 0.1911323517560959, "learning_rate": 5.162787905966711e-05, "loss": 0.5632, "step": 6849 }, { "epoch": 1.4081611676431287, "grad_norm": 0.19571152329444885, "learning_rate": 5.16179997204137e-05, "loss": 0.5655, "step": 6850 }, { "epoch": 1.4083667386165073, "grad_norm": 0.1829329878091812, "learning_rate": 5.160812005512236e-05, "loss": 0.5319, "step": 6851 }, { "epoch": 1.4085723095898859, "grad_norm": 0.19352376461029053, "learning_rate": 5.1598240064279846e-05, "loss": 0.5616, "step": 6852 }, { "epoch": 1.4087778805632645, "grad_norm": 0.19807998836040497, "learning_rate": 5.158835974837289e-05, "loss": 0.5414, "step": 6853 }, { "epoch": 1.408983451536643, "grad_norm": 0.1893458068370819, "learning_rate": 5.157847910788822e-05, "loss": 0.5426, "step": 6854 }, { "epoch": 1.4091890225100216, "grad_norm": 0.1907995045185089, "learning_rate": 5.1568598143312656e-05, "loss": 0.5472, "step": 6855 }, { "epoch": 1.4093945934834, "grad_norm": 0.17473357915878296, "learning_rate": 5.1558716855132956e-05, "loss": 0.4997, "step": 6856 }, { "epoch": 1.4096001644567786, "grad_norm": 0.16449564695358276, "learning_rate": 5.154883524383592e-05, "loss": 0.5579, "step": 6857 }, { "epoch": 1.4098057354301572, "grad_norm": 0.1907692849636078, "learning_rate": 5.153895330990839e-05, "loss": 0.5778, "step": 6858 }, { "epoch": 1.4100113064035358, "grad_norm": 0.18911254405975342, "learning_rate": 5.1529071053837206e-05, "loss": 0.532, "step": 6859 }, { "epoch": 1.4102168773769144, "grad_norm": 0.19013933837413788, "learning_rate": 5.151918847610918e-05, "loss": 0.5414, "step": 6860 }, { "epoch": 1.410422448350293, "grad_norm": 0.1888997107744217, "learning_rate": 5.150930557721122e-05, "loss": 0.5472, "step": 6861 }, { "epoch": 1.4106280193236715, "grad_norm": 0.18794280290603638, "learning_rate": 5.14994223576302e-05, "loss": 0.5541, "step": 6862 }, { "epoch": 1.4108335902970501, "grad_norm": 0.19255901873111725, "learning_rate": 5.1489538817853034e-05, "loss": 0.5695, "step": 6863 }, { "epoch": 1.4110391612704287, "grad_norm": 0.18833082914352417, "learning_rate": 5.1479654958366594e-05, "loss": 0.5571, "step": 6864 }, { "epoch": 1.4112447322438073, "grad_norm": 0.1937963217496872, "learning_rate": 5.1469770779657864e-05, "loss": 0.5531, "step": 6865 }, { "epoch": 1.4114503032171857, "grad_norm": 0.16009144484996796, "learning_rate": 5.145988628221376e-05, "loss": 0.5195, "step": 6866 }, { "epoch": 1.4116558741905643, "grad_norm": 0.15770770609378815, "learning_rate": 5.145000146652126e-05, "loss": 0.5767, "step": 6867 }, { "epoch": 1.4118614451639429, "grad_norm": 0.18932950496673584, "learning_rate": 5.1440116333067313e-05, "loss": 0.5413, "step": 6868 }, { "epoch": 1.4120670161373214, "grad_norm": 0.2200823277235031, "learning_rate": 5.143023088233895e-05, "loss": 0.5721, "step": 6869 }, { "epoch": 1.4122725871107, "grad_norm": 0.19378498196601868, "learning_rate": 5.142034511482317e-05, "loss": 0.5732, "step": 6870 }, { "epoch": 1.4124781580840784, "grad_norm": 0.20359185338020325, "learning_rate": 5.141045903100698e-05, "loss": 0.5555, "step": 6871 }, { "epoch": 1.412683729057457, "grad_norm": 0.18266808986663818, "learning_rate": 5.140057263137744e-05, "loss": 0.5287, "step": 6872 }, { "epoch": 1.4128893000308356, "grad_norm": 0.191037118434906, "learning_rate": 5.139068591642161e-05, "loss": 0.5536, "step": 6873 }, { "epoch": 1.4130948710042142, "grad_norm": 0.19039712846279144, "learning_rate": 5.138079888662654e-05, "loss": 0.5692, "step": 6874 }, { "epoch": 1.4133004419775927, "grad_norm": 0.1601129174232483, "learning_rate": 5.1370911542479354e-05, "loss": 0.5244, "step": 6875 }, { "epoch": 1.4135060129509713, "grad_norm": 0.1585390418767929, "learning_rate": 5.1361023884467136e-05, "loss": 0.5695, "step": 6876 }, { "epoch": 1.41371158392435, "grad_norm": 0.2022130936384201, "learning_rate": 5.135113591307699e-05, "loss": 0.5696, "step": 6877 }, { "epoch": 1.4139171548977285, "grad_norm": 0.1920463740825653, "learning_rate": 5.134124762879606e-05, "loss": 0.5397, "step": 6878 }, { "epoch": 1.414122725871107, "grad_norm": 0.1937701404094696, "learning_rate": 5.13313590321115e-05, "loss": 0.5513, "step": 6879 }, { "epoch": 1.4143282968444857, "grad_norm": 0.16302789747714996, "learning_rate": 5.1321470123510486e-05, "loss": 0.524, "step": 6880 }, { "epoch": 1.414533867817864, "grad_norm": 0.1612044721841812, "learning_rate": 5.131158090348017e-05, "loss": 0.5558, "step": 6881 }, { "epoch": 1.4147394387912426, "grad_norm": 0.18755872547626495, "learning_rate": 5.130169137250777e-05, "loss": 0.5448, "step": 6882 }, { "epoch": 1.4149450097646212, "grad_norm": 0.16323046386241913, "learning_rate": 5.1291801531080475e-05, "loss": 0.5202, "step": 6883 }, { "epoch": 1.4151505807379998, "grad_norm": 0.15463986992835999, "learning_rate": 5.128191137968555e-05, "loss": 0.5395, "step": 6884 }, { "epoch": 1.4153561517113784, "grad_norm": 0.1867363005876541, "learning_rate": 5.12720209188102e-05, "loss": 0.5608, "step": 6885 }, { "epoch": 1.4155617226847568, "grad_norm": 0.18984296917915344, "learning_rate": 5.1262130148941705e-05, "loss": 0.5527, "step": 6886 }, { "epoch": 1.4157672936581354, "grad_norm": 0.18599240481853485, "learning_rate": 5.1252239070567315e-05, "loss": 0.538, "step": 6887 }, { "epoch": 1.415972864631514, "grad_norm": 0.19605940580368042, "learning_rate": 5.1242347684174327e-05, "loss": 0.5715, "step": 6888 }, { "epoch": 1.4161784356048925, "grad_norm": 0.19661271572113037, "learning_rate": 5.1232455990250055e-05, "loss": 0.5538, "step": 6889 }, { "epoch": 1.4163840065782711, "grad_norm": 0.1689828336238861, "learning_rate": 5.12225639892818e-05, "loss": 0.5337, "step": 6890 }, { "epoch": 1.4165895775516497, "grad_norm": 0.16040822863578796, "learning_rate": 5.1212671681756916e-05, "loss": 0.5651, "step": 6891 }, { "epoch": 1.4167951485250283, "grad_norm": 0.16304267942905426, "learning_rate": 5.120277906816272e-05, "loss": 0.5215, "step": 6892 }, { "epoch": 1.417000719498407, "grad_norm": 0.1574201136827469, "learning_rate": 5.119288614898659e-05, "loss": 0.5349, "step": 6893 }, { "epoch": 1.4172062904717855, "grad_norm": 0.20037010312080383, "learning_rate": 5.118299292471591e-05, "loss": 0.5484, "step": 6894 }, { "epoch": 1.417411861445164, "grad_norm": 0.16355712711811066, "learning_rate": 5.117309939583806e-05, "loss": 0.517, "step": 6895 }, { "epoch": 1.4176174324185424, "grad_norm": 0.15935970842838287, "learning_rate": 5.116320556284047e-05, "loss": 0.5531, "step": 6896 }, { "epoch": 1.417823003391921, "grad_norm": 0.20276428759098053, "learning_rate": 5.115331142621055e-05, "loss": 0.5586, "step": 6897 }, { "epoch": 1.4180285743652996, "grad_norm": 0.1946752518415451, "learning_rate": 5.114341698643573e-05, "loss": 0.5415, "step": 6898 }, { "epoch": 1.4182341453386782, "grad_norm": 0.1875738501548767, "learning_rate": 5.113352224400347e-05, "loss": 0.5354, "step": 6899 }, { "epoch": 1.4184397163120568, "grad_norm": 0.1904314160346985, "learning_rate": 5.112362719940123e-05, "loss": 0.5619, "step": 6900 }, { "epoch": 1.4186452872854354, "grad_norm": 0.20147216320037842, "learning_rate": 5.111373185311651e-05, "loss": 0.5728, "step": 6901 }, { "epoch": 1.4188508582588137, "grad_norm": 0.19195587933063507, "learning_rate": 5.110383620563679e-05, "loss": 0.5806, "step": 6902 }, { "epoch": 1.4190564292321923, "grad_norm": 0.16246861219406128, "learning_rate": 5.109394025744959e-05, "loss": 0.5218, "step": 6903 }, { "epoch": 1.419262000205571, "grad_norm": 0.16603510081768036, "learning_rate": 5.108404400904243e-05, "loss": 0.5348, "step": 6904 }, { "epoch": 1.4194675711789495, "grad_norm": 0.1957361102104187, "learning_rate": 5.1074147460902876e-05, "loss": 0.5661, "step": 6905 }, { "epoch": 1.419673142152328, "grad_norm": 0.1889890879392624, "learning_rate": 5.106425061351845e-05, "loss": 0.5672, "step": 6906 }, { "epoch": 1.4198787131257067, "grad_norm": 0.19111685454845428, "learning_rate": 5.1054353467376756e-05, "loss": 0.5739, "step": 6907 }, { "epoch": 1.4200842840990853, "grad_norm": 0.17033053934574127, "learning_rate": 5.104445602296536e-05, "loss": 0.5152, "step": 6908 }, { "epoch": 1.4202898550724639, "grad_norm": 0.1564977467060089, "learning_rate": 5.103455828077186e-05, "loss": 0.5598, "step": 6909 }, { "epoch": 1.4204954260458424, "grad_norm": 0.19049371778964996, "learning_rate": 5.1024660241283884e-05, "loss": 0.5463, "step": 6910 }, { "epoch": 1.4207009970192208, "grad_norm": 0.19642889499664307, "learning_rate": 5.101476190498906e-05, "loss": 0.578, "step": 6911 }, { "epoch": 1.4209065679925994, "grad_norm": 0.19157302379608154, "learning_rate": 5.1004863272375034e-05, "loss": 0.5386, "step": 6912 }, { "epoch": 1.421112138965978, "grad_norm": 0.19283618032932281, "learning_rate": 5.0994964343929445e-05, "loss": 0.5429, "step": 6913 }, { "epoch": 1.4213177099393566, "grad_norm": 0.19500254094600677, "learning_rate": 5.0985065120139994e-05, "loss": 0.54, "step": 6914 }, { "epoch": 1.4215232809127352, "grad_norm": 0.18495769798755646, "learning_rate": 5.097516560149434e-05, "loss": 0.5359, "step": 6915 }, { "epoch": 1.4217288518861138, "grad_norm": 0.18928299844264984, "learning_rate": 5.0965265788480225e-05, "loss": 0.5567, "step": 6916 }, { "epoch": 1.4219344228594921, "grad_norm": 0.18935348093509674, "learning_rate": 5.095536568158535e-05, "loss": 0.5359, "step": 6917 }, { "epoch": 1.4221399938328707, "grad_norm": 0.1989513635635376, "learning_rate": 5.094546528129743e-05, "loss": 0.5603, "step": 6918 }, { "epoch": 1.4223455648062493, "grad_norm": 0.16001847386360168, "learning_rate": 5.093556458810423e-05, "loss": 0.5223, "step": 6919 }, { "epoch": 1.4225511357796279, "grad_norm": 0.15646837651729584, "learning_rate": 5.0925663602493503e-05, "loss": 0.5285, "step": 6920 }, { "epoch": 1.4227567067530065, "grad_norm": 0.20338685810565948, "learning_rate": 5.091576232495304e-05, "loss": 0.574, "step": 6921 }, { "epoch": 1.422962277726385, "grad_norm": 0.1922929286956787, "learning_rate": 5.090586075597061e-05, "loss": 0.5376, "step": 6922 }, { "epoch": 1.4231678486997636, "grad_norm": 0.24350236356258392, "learning_rate": 5.089595889603401e-05, "loss": 0.5544, "step": 6923 }, { "epoch": 1.4233734196731422, "grad_norm": 0.1872577667236328, "learning_rate": 5.088605674563109e-05, "loss": 0.5748, "step": 6924 }, { "epoch": 1.4235789906465208, "grad_norm": 0.18415029346942902, "learning_rate": 5.0876154305249654e-05, "loss": 0.5457, "step": 6925 }, { "epoch": 1.4237845616198992, "grad_norm": 0.1886397749185562, "learning_rate": 5.086625157537757e-05, "loss": 0.5477, "step": 6926 }, { "epoch": 1.4239901325932778, "grad_norm": 0.19316554069519043, "learning_rate": 5.085634855650268e-05, "loss": 0.5608, "step": 6927 }, { "epoch": 1.4241957035666564, "grad_norm": 0.1911771446466446, "learning_rate": 5.084644524911288e-05, "loss": 0.5427, "step": 6928 }, { "epoch": 1.424401274540035, "grad_norm": 0.19828177988529205, "learning_rate": 5.083654165369604e-05, "loss": 0.5518, "step": 6929 }, { "epoch": 1.4246068455134135, "grad_norm": 0.16796253621578217, "learning_rate": 5.082663777074008e-05, "loss": 0.5173, "step": 6930 }, { "epoch": 1.4248124164867921, "grad_norm": 0.16129761934280396, "learning_rate": 5.0816733600732905e-05, "loss": 0.562, "step": 6931 }, { "epoch": 1.4250179874601705, "grad_norm": 0.19917796552181244, "learning_rate": 5.0806829144162455e-05, "loss": 0.5394, "step": 6932 }, { "epoch": 1.425223558433549, "grad_norm": 0.19599252939224243, "learning_rate": 5.079692440151668e-05, "loss": 0.5829, "step": 6933 }, { "epoch": 1.4254291294069277, "grad_norm": 0.1711527705192566, "learning_rate": 5.078701937328352e-05, "loss": 0.5075, "step": 6934 }, { "epoch": 1.4256347003803063, "grad_norm": 0.12597279250621796, "learning_rate": 5.077711405995098e-05, "loss": 0.497, "step": 6935 }, { "epoch": 1.4258402713536849, "grad_norm": 0.15089215338230133, "learning_rate": 5.076720846200702e-05, "loss": 0.5364, "step": 6936 }, { "epoch": 1.4260458423270634, "grad_norm": 0.19826306402683258, "learning_rate": 5.0757302579939656e-05, "loss": 0.5371, "step": 6937 }, { "epoch": 1.426251413300442, "grad_norm": 0.1632860153913498, "learning_rate": 5.0747396414236906e-05, "loss": 0.5114, "step": 6938 }, { "epoch": 1.4264569842738206, "grad_norm": 0.15971128642559052, "learning_rate": 5.07374899653868e-05, "loss": 0.5575, "step": 6939 }, { "epoch": 1.4266625552471992, "grad_norm": 0.18618735671043396, "learning_rate": 5.0727583233877376e-05, "loss": 0.557, "step": 6940 }, { "epoch": 1.4268681262205778, "grad_norm": 0.19377268850803375, "learning_rate": 5.07176762201967e-05, "loss": 0.5608, "step": 6941 }, { "epoch": 1.4270736971939562, "grad_norm": 0.18944592773914337, "learning_rate": 5.0707768924832844e-05, "loss": 0.5356, "step": 6942 }, { "epoch": 1.4272792681673347, "grad_norm": 0.1696036458015442, "learning_rate": 5.06978613482739e-05, "loss": 0.529, "step": 6943 }, { "epoch": 1.4274848391407133, "grad_norm": 0.1654544472694397, "learning_rate": 5.068795349100794e-05, "loss": 0.57, "step": 6944 }, { "epoch": 1.427690410114092, "grad_norm": 0.19743849337100983, "learning_rate": 5.067804535352311e-05, "loss": 0.558, "step": 6945 }, { "epoch": 1.4278959810874705, "grad_norm": 0.188226580619812, "learning_rate": 5.066813693630752e-05, "loss": 0.5425, "step": 6946 }, { "epoch": 1.4281015520608489, "grad_norm": 0.1916334182024002, "learning_rate": 5.065822823984931e-05, "loss": 0.582, "step": 6947 }, { "epoch": 1.4283071230342275, "grad_norm": 0.1938442885875702, "learning_rate": 5.064831926463664e-05, "loss": 0.5607, "step": 6948 }, { "epoch": 1.428512694007606, "grad_norm": 0.19236359000205994, "learning_rate": 5.0638410011157694e-05, "loss": 0.5811, "step": 6949 }, { "epoch": 1.4287182649809846, "grad_norm": 0.19282235205173492, "learning_rate": 5.0628500479900636e-05, "loss": 0.5456, "step": 6950 }, { "epoch": 1.4289238359543632, "grad_norm": 0.19609522819519043, "learning_rate": 5.0618590671353655e-05, "loss": 0.5484, "step": 6951 }, { "epoch": 1.4291294069277418, "grad_norm": 0.19038927555084229, "learning_rate": 5.060868058600499e-05, "loss": 0.538, "step": 6952 }, { "epoch": 1.4293349779011204, "grad_norm": 0.15865328907966614, "learning_rate": 5.0598770224342834e-05, "loss": 0.5187, "step": 6953 }, { "epoch": 1.429540548874499, "grad_norm": 0.1643393188714981, "learning_rate": 5.0588859586855435e-05, "loss": 0.561, "step": 6954 }, { "epoch": 1.4297461198478776, "grad_norm": 0.18920312821865082, "learning_rate": 5.057894867403106e-05, "loss": 0.5582, "step": 6955 }, { "epoch": 1.4299516908212562, "grad_norm": 0.20650269091129303, "learning_rate": 5.0569037486357954e-05, "loss": 0.5485, "step": 6956 }, { "epoch": 1.4301572617946345, "grad_norm": 0.19086134433746338, "learning_rate": 5.0559126024324394e-05, "loss": 0.5668, "step": 6957 }, { "epoch": 1.4303628327680131, "grad_norm": 0.18574881553649902, "learning_rate": 5.0549214288418695e-05, "loss": 0.5305, "step": 6958 }, { "epoch": 1.4305684037413917, "grad_norm": 0.16486965119838715, "learning_rate": 5.053930227912913e-05, "loss": 0.5394, "step": 6959 }, { "epoch": 1.4307739747147703, "grad_norm": 0.1669962853193283, "learning_rate": 5.052938999694403e-05, "loss": 0.5604, "step": 6960 }, { "epoch": 1.430979545688149, "grad_norm": 0.16902011632919312, "learning_rate": 5.0519477442351735e-05, "loss": 0.5269, "step": 6961 }, { "epoch": 1.4311851166615273, "grad_norm": 0.1662750244140625, "learning_rate": 5.0509564615840586e-05, "loss": 0.5506, "step": 6962 }, { "epoch": 1.4313906876349058, "grad_norm": 0.19221939146518707, "learning_rate": 5.049965151789895e-05, "loss": 0.5682, "step": 6963 }, { "epoch": 1.4315962586082844, "grad_norm": 0.18976832926273346, "learning_rate": 5.048973814901516e-05, "loss": 0.5402, "step": 6964 }, { "epoch": 1.431801829581663, "grad_norm": 0.18504224717617035, "learning_rate": 5.047982450967766e-05, "loss": 0.536, "step": 6965 }, { "epoch": 1.4320074005550416, "grad_norm": 0.18513992428779602, "learning_rate": 5.0469910600374815e-05, "loss": 0.5433, "step": 6966 }, { "epoch": 1.4322129715284202, "grad_norm": 0.1597176045179367, "learning_rate": 5.045999642159503e-05, "loss": 0.5006, "step": 6967 }, { "epoch": 1.4324185425017988, "grad_norm": 0.13741186261177063, "learning_rate": 5.045008197382674e-05, "loss": 0.5147, "step": 6968 }, { "epoch": 1.4326241134751774, "grad_norm": 0.16074904799461365, "learning_rate": 5.044016725755838e-05, "loss": 0.5536, "step": 6969 }, { "epoch": 1.432829684448556, "grad_norm": 0.21094325184822083, "learning_rate": 5.043025227327842e-05, "loss": 0.5529, "step": 6970 }, { "epoch": 1.4330352554219346, "grad_norm": 0.19735904037952423, "learning_rate": 5.0420337021475304e-05, "loss": 0.5282, "step": 6971 }, { "epoch": 1.433240826395313, "grad_norm": 0.1973976045846939, "learning_rate": 5.041042150263753e-05, "loss": 0.5593, "step": 6972 }, { "epoch": 1.4334463973686915, "grad_norm": 0.19355326890945435, "learning_rate": 5.0400505717253575e-05, "loss": 0.5692, "step": 6973 }, { "epoch": 1.43365196834207, "grad_norm": 0.19223208725452423, "learning_rate": 5.0390589665811944e-05, "loss": 0.5534, "step": 6974 }, { "epoch": 1.4338575393154487, "grad_norm": 0.1662292182445526, "learning_rate": 5.038067334880113e-05, "loss": 0.5175, "step": 6975 }, { "epoch": 1.4340631102888273, "grad_norm": 0.15810272097587585, "learning_rate": 5.0370756766709716e-05, "loss": 0.5404, "step": 6976 }, { "epoch": 1.4342686812622059, "grad_norm": 0.19795885682106018, "learning_rate": 5.0360839920026215e-05, "loss": 0.5718, "step": 6977 }, { "epoch": 1.4344742522355842, "grad_norm": 0.19126173853874207, "learning_rate": 5.0350922809239184e-05, "loss": 0.5549, "step": 6978 }, { "epoch": 1.4346798232089628, "grad_norm": 0.20567071437835693, "learning_rate": 5.03410054348372e-05, "loss": 0.5577, "step": 6979 }, { "epoch": 1.4348853941823414, "grad_norm": 0.1884375363588333, "learning_rate": 5.033108779730883e-05, "loss": 0.5491, "step": 6980 }, { "epoch": 1.43509096515572, "grad_norm": 0.16468265652656555, "learning_rate": 5.0321169897142695e-05, "loss": 0.5049, "step": 6981 }, { "epoch": 1.4352965361290986, "grad_norm": 0.16884614527225494, "learning_rate": 5.031125173482738e-05, "loss": 0.5472, "step": 6982 }, { "epoch": 1.4355021071024772, "grad_norm": 0.2028854638338089, "learning_rate": 5.0301333310851526e-05, "loss": 0.5737, "step": 6983 }, { "epoch": 1.4357076780758558, "grad_norm": 0.19400665163993835, "learning_rate": 5.029141462570376e-05, "loss": 0.5492, "step": 6984 }, { "epoch": 1.4359132490492343, "grad_norm": 0.19768649339675903, "learning_rate": 5.028149567987271e-05, "loss": 0.5461, "step": 6985 }, { "epoch": 1.436118820022613, "grad_norm": 0.164305180311203, "learning_rate": 5.027157647384708e-05, "loss": 0.5386, "step": 6986 }, { "epoch": 1.4363243909959913, "grad_norm": 0.16050846874713898, "learning_rate": 5.02616570081155e-05, "loss": 0.5472, "step": 6987 }, { "epoch": 1.4365299619693699, "grad_norm": 0.19127194583415985, "learning_rate": 5.025173728316668e-05, "loss": 0.5656, "step": 6988 }, { "epoch": 1.4367355329427485, "grad_norm": 0.1859859675168991, "learning_rate": 5.02418172994893e-05, "loss": 0.5506, "step": 6989 }, { "epoch": 1.436941103916127, "grad_norm": 0.16769689321517944, "learning_rate": 5.0231897057572085e-05, "loss": 0.5391, "step": 6990 }, { "epoch": 1.4371466748895056, "grad_norm": 0.16699868440628052, "learning_rate": 5.0221976557903755e-05, "loss": 0.5287, "step": 6991 }, { "epoch": 1.4373522458628842, "grad_norm": 0.19447840750217438, "learning_rate": 5.021205580097305e-05, "loss": 0.5451, "step": 6992 }, { "epoch": 1.4375578168362626, "grad_norm": 0.1894395351409912, "learning_rate": 5.020213478726871e-05, "loss": 0.546, "step": 6993 }, { "epoch": 1.4377633878096412, "grad_norm": 0.20027700066566467, "learning_rate": 5.0192213517279524e-05, "loss": 0.5488, "step": 6994 }, { "epoch": 1.4379689587830198, "grad_norm": 0.15890729427337646, "learning_rate": 5.0182291991494224e-05, "loss": 0.5155, "step": 6995 }, { "epoch": 1.4381745297563984, "grad_norm": 0.16410616040229797, "learning_rate": 5.017237021040163e-05, "loss": 0.5709, "step": 6996 }, { "epoch": 1.438380100729777, "grad_norm": 0.19332385063171387, "learning_rate": 5.016244817449054e-05, "loss": 0.5472, "step": 6997 }, { "epoch": 1.4385856717031555, "grad_norm": 0.18809527158737183, "learning_rate": 5.015252588424975e-05, "loss": 0.5594, "step": 6998 }, { "epoch": 1.4387912426765341, "grad_norm": 0.19198375940322876, "learning_rate": 5.0142603340168084e-05, "loss": 0.5545, "step": 6999 }, { "epoch": 1.4389968136499127, "grad_norm": 0.1915784478187561, "learning_rate": 5.0132680542734396e-05, "loss": 0.5627, "step": 7000 }, { "epoch": 1.4392023846232913, "grad_norm": 0.19142676889896393, "learning_rate": 5.012275749243752e-05, "loss": 0.5473, "step": 7001 }, { "epoch": 1.4394079555966697, "grad_norm": 0.18919003009796143, "learning_rate": 5.011283418976633e-05, "loss": 0.5513, "step": 7002 }, { "epoch": 1.4396135265700483, "grad_norm": 0.16133341193199158, "learning_rate": 5.010291063520969e-05, "loss": 0.4986, "step": 7003 }, { "epoch": 1.4398190975434269, "grad_norm": 0.15433275699615479, "learning_rate": 5.009298682925651e-05, "loss": 0.5429, "step": 7004 }, { "epoch": 1.4400246685168054, "grad_norm": 0.17464013397693634, "learning_rate": 5.008306277239567e-05, "loss": 0.524, "step": 7005 }, { "epoch": 1.440230239490184, "grad_norm": 0.15277941524982452, "learning_rate": 5.0073138465116075e-05, "loss": 0.5293, "step": 7006 }, { "epoch": 1.4404358104635626, "grad_norm": 0.1988225281238556, "learning_rate": 5.0063213907906665e-05, "loss": 0.5324, "step": 7007 }, { "epoch": 1.440641381436941, "grad_norm": 0.2008810192346573, "learning_rate": 5.005328910125638e-05, "loss": 0.5634, "step": 7008 }, { "epoch": 1.4408469524103196, "grad_norm": 0.19552162289619446, "learning_rate": 5.004336404565415e-05, "loss": 0.5382, "step": 7009 }, { "epoch": 1.4410525233836982, "grad_norm": 0.1576053947210312, "learning_rate": 5.003343874158895e-05, "loss": 0.4966, "step": 7010 }, { "epoch": 1.4412580943570767, "grad_norm": 0.18060800433158875, "learning_rate": 5.002351318954975e-05, "loss": 0.5758, "step": 7011 }, { "epoch": 1.4414636653304553, "grad_norm": 0.19537772238254547, "learning_rate": 5.001358739002553e-05, "loss": 0.5713, "step": 7012 }, { "epoch": 1.441669236303834, "grad_norm": 0.18666040897369385, "learning_rate": 5.0003661343505284e-05, "loss": 0.5334, "step": 7013 }, { "epoch": 1.4418748072772125, "grad_norm": 0.16254711151123047, "learning_rate": 4.9993735050478045e-05, "loss": 0.5159, "step": 7014 }, { "epoch": 1.442080378250591, "grad_norm": 0.1602196842432022, "learning_rate": 4.9983808511432824e-05, "loss": 0.5267, "step": 7015 }, { "epoch": 1.4422859492239697, "grad_norm": 0.1874070167541504, "learning_rate": 4.9973881726858644e-05, "loss": 0.5258, "step": 7016 }, { "epoch": 1.442491520197348, "grad_norm": 0.19187650084495544, "learning_rate": 4.996395469724456e-05, "loss": 0.5574, "step": 7017 }, { "epoch": 1.4426970911707266, "grad_norm": 0.1952408105134964, "learning_rate": 4.995402742307963e-05, "loss": 0.5735, "step": 7018 }, { "epoch": 1.4429026621441052, "grad_norm": 0.20097225904464722, "learning_rate": 4.9944099904852926e-05, "loss": 0.572, "step": 7019 }, { "epoch": 1.4431082331174838, "grad_norm": 0.16808289289474487, "learning_rate": 4.993417214305352e-05, "loss": 0.5367, "step": 7020 }, { "epoch": 1.4433138040908624, "grad_norm": 0.16581854224205017, "learning_rate": 4.992424413817053e-05, "loss": 0.5764, "step": 7021 }, { "epoch": 1.443519375064241, "grad_norm": 0.15527617931365967, "learning_rate": 4.9914315890693035e-05, "loss": 0.5166, "step": 7022 }, { "epoch": 1.4437249460376194, "grad_norm": 0.15834735333919525, "learning_rate": 4.990438740111017e-05, "loss": 0.5397, "step": 7023 }, { "epoch": 1.443930517010998, "grad_norm": 0.1944034993648529, "learning_rate": 4.989445866991105e-05, "loss": 0.5449, "step": 7024 }, { "epoch": 1.4441360879843765, "grad_norm": 0.1605810672044754, "learning_rate": 4.988452969758485e-05, "loss": 0.5229, "step": 7025 }, { "epoch": 1.4443416589577551, "grad_norm": 0.15166768431663513, "learning_rate": 4.9874600484620684e-05, "loss": 0.5337, "step": 7026 }, { "epoch": 1.4445472299311337, "grad_norm": 0.19105499982833862, "learning_rate": 4.9864671031507746e-05, "loss": 0.5351, "step": 7027 }, { "epoch": 1.4447528009045123, "grad_norm": 0.18772821128368378, "learning_rate": 4.98547413387352e-05, "loss": 0.5418, "step": 7028 }, { "epoch": 1.444958371877891, "grad_norm": 0.1658894121646881, "learning_rate": 4.984481140679224e-05, "loss": 0.5272, "step": 7029 }, { "epoch": 1.4451639428512695, "grad_norm": 0.17171718180179596, "learning_rate": 4.983488123616807e-05, "loss": 0.5593, "step": 7030 }, { "epoch": 1.445369513824648, "grad_norm": 0.18422532081604004, "learning_rate": 4.9824950827351894e-05, "loss": 0.5262, "step": 7031 }, { "epoch": 1.4455750847980267, "grad_norm": 0.19110561907291412, "learning_rate": 4.981502018083295e-05, "loss": 0.5546, "step": 7032 }, { "epoch": 1.445780655771405, "grad_norm": 0.18570828437805176, "learning_rate": 4.980508929710045e-05, "loss": 0.5493, "step": 7033 }, { "epoch": 1.4459862267447836, "grad_norm": 0.19072416424751282, "learning_rate": 4.9795158176643665e-05, "loss": 0.5656, "step": 7034 }, { "epoch": 1.4461917977181622, "grad_norm": 0.18956297636032104, "learning_rate": 4.978522681995186e-05, "loss": 0.5594, "step": 7035 }, { "epoch": 1.4463973686915408, "grad_norm": 0.1876407116651535, "learning_rate": 4.977529522751429e-05, "loss": 0.5668, "step": 7036 }, { "epoch": 1.4466029396649194, "grad_norm": 0.1943429410457611, "learning_rate": 4.976536339982024e-05, "loss": 0.5389, "step": 7037 }, { "epoch": 1.4468085106382977, "grad_norm": 0.19916300475597382, "learning_rate": 4.975543133735901e-05, "loss": 0.5564, "step": 7038 }, { "epoch": 1.4470140816116763, "grad_norm": 0.19892625510692596, "learning_rate": 4.974549904061991e-05, "loss": 0.5782, "step": 7039 }, { "epoch": 1.447219652585055, "grad_norm": 0.19441033899784088, "learning_rate": 4.9735566510092245e-05, "loss": 0.5703, "step": 7040 }, { "epoch": 1.4474252235584335, "grad_norm": 0.1984698474407196, "learning_rate": 4.972563374626536e-05, "loss": 0.5614, "step": 7041 }, { "epoch": 1.447630794531812, "grad_norm": 0.16778507828712463, "learning_rate": 4.971570074962859e-05, "loss": 0.5299, "step": 7042 }, { "epoch": 1.4478363655051907, "grad_norm": 0.14573578536510468, "learning_rate": 4.970576752067128e-05, "loss": 0.5233, "step": 7043 }, { "epoch": 1.4480419364785693, "grad_norm": 0.14844007790088654, "learning_rate": 4.9695834059882796e-05, "loss": 0.5304, "step": 7044 }, { "epoch": 1.4482475074519479, "grad_norm": 0.19099220633506775, "learning_rate": 4.968590036775251e-05, "loss": 0.5603, "step": 7045 }, { "epoch": 1.4484530784253264, "grad_norm": 0.16473321616649628, "learning_rate": 4.967596644476983e-05, "loss": 0.5134, "step": 7046 }, { "epoch": 1.448658649398705, "grad_norm": 0.17135196924209595, "learning_rate": 4.966603229142412e-05, "loss": 0.5579, "step": 7047 }, { "epoch": 1.4488642203720834, "grad_norm": 0.19533687829971313, "learning_rate": 4.9656097908204825e-05, "loss": 0.5617, "step": 7048 }, { "epoch": 1.449069791345462, "grad_norm": 0.1876286268234253, "learning_rate": 4.964616329560136e-05, "loss": 0.554, "step": 7049 }, { "epoch": 1.4492753623188406, "grad_norm": 0.16037873923778534, "learning_rate": 4.9636228454103126e-05, "loss": 0.529, "step": 7050 }, { "epoch": 1.4494809332922192, "grad_norm": 0.1680610179901123, "learning_rate": 4.962629338419958e-05, "loss": 0.5376, "step": 7051 }, { "epoch": 1.4496865042655978, "grad_norm": 0.1924995481967926, "learning_rate": 4.9616358086380196e-05, "loss": 0.5543, "step": 7052 }, { "epoch": 1.4498920752389761, "grad_norm": 0.1638346016407013, "learning_rate": 4.9606422561134425e-05, "loss": 0.5091, "step": 7053 }, { "epoch": 1.4500976462123547, "grad_norm": 0.16642382740974426, "learning_rate": 4.9596486808951735e-05, "loss": 0.5628, "step": 7054 }, { "epoch": 1.4503032171857333, "grad_norm": 0.16534394025802612, "learning_rate": 4.958655083032164e-05, "loss": 0.5297, "step": 7055 }, { "epoch": 1.4505087881591119, "grad_norm": 0.16639864444732666, "learning_rate": 4.95766146257336e-05, "loss": 0.5561, "step": 7056 }, { "epoch": 1.4507143591324905, "grad_norm": 0.190561905503273, "learning_rate": 4.956667819567717e-05, "loss": 0.5604, "step": 7057 }, { "epoch": 1.450919930105869, "grad_norm": 0.19295108318328857, "learning_rate": 4.955674154064182e-05, "loss": 0.5524, "step": 7058 }, { "epoch": 1.4511255010792476, "grad_norm": 0.19699627161026, "learning_rate": 4.9546804661117146e-05, "loss": 0.5482, "step": 7059 }, { "epoch": 1.4513310720526262, "grad_norm": 0.18727873265743256, "learning_rate": 4.953686755759265e-05, "loss": 0.5565, "step": 7060 }, { "epoch": 1.4515366430260048, "grad_norm": 0.19223269820213318, "learning_rate": 4.952693023055788e-05, "loss": 0.5661, "step": 7061 }, { "epoch": 1.4517422139993834, "grad_norm": 0.19679668545722961, "learning_rate": 4.951699268050243e-05, "loss": 0.5632, "step": 7062 }, { "epoch": 1.4519477849727618, "grad_norm": 0.19206634163856506, "learning_rate": 4.9507054907915866e-05, "loss": 0.5459, "step": 7063 }, { "epoch": 1.4521533559461404, "grad_norm": 0.19624993205070496, "learning_rate": 4.949711691328777e-05, "loss": 0.5741, "step": 7064 }, { "epoch": 1.452358926919519, "grad_norm": 0.19353879988193512, "learning_rate": 4.948717869710773e-05, "loss": 0.5228, "step": 7065 }, { "epoch": 1.4525644978928975, "grad_norm": 0.1924706995487213, "learning_rate": 4.947724025986538e-05, "loss": 0.5716, "step": 7066 }, { "epoch": 1.4527700688662761, "grad_norm": 0.19107024371623993, "learning_rate": 4.946730160205033e-05, "loss": 0.555, "step": 7067 }, { "epoch": 1.4529756398396547, "grad_norm": 0.18900389969348907, "learning_rate": 4.94573627241522e-05, "loss": 0.5505, "step": 7068 }, { "epoch": 1.453181210813033, "grad_norm": 0.16496512293815613, "learning_rate": 4.944742362666065e-05, "loss": 0.5272, "step": 7069 }, { "epoch": 1.4533867817864117, "grad_norm": 0.16446129977703094, "learning_rate": 4.9437484310065326e-05, "loss": 0.5483, "step": 7070 }, { "epoch": 1.4535923527597903, "grad_norm": 0.1935243159532547, "learning_rate": 4.942754477485588e-05, "loss": 0.5516, "step": 7071 }, { "epoch": 1.4537979237331689, "grad_norm": 0.1573350727558136, "learning_rate": 4.9417605021522016e-05, "loss": 0.5269, "step": 7072 }, { "epoch": 1.4540034947065474, "grad_norm": 0.1570722460746765, "learning_rate": 4.9407665050553395e-05, "loss": 0.5599, "step": 7073 }, { "epoch": 1.454209065679926, "grad_norm": 0.19235976040363312, "learning_rate": 4.9397724862439726e-05, "loss": 0.5488, "step": 7074 }, { "epoch": 1.4544146366533046, "grad_norm": 0.19353123009204865, "learning_rate": 4.938778445767069e-05, "loss": 0.5436, "step": 7075 }, { "epoch": 1.4546202076266832, "grad_norm": 0.192392498254776, "learning_rate": 4.9377843836736026e-05, "loss": 0.547, "step": 7076 }, { "epoch": 1.4548257786000618, "grad_norm": 0.1857522875070572, "learning_rate": 4.936790300012545e-05, "loss": 0.5477, "step": 7077 }, { "epoch": 1.4550313495734402, "grad_norm": 0.20272956788539886, "learning_rate": 4.935796194832872e-05, "loss": 0.5526, "step": 7078 }, { "epoch": 1.4552369205468187, "grad_norm": 0.1533660888671875, "learning_rate": 4.9348020681835573e-05, "loss": 0.5079, "step": 7079 }, { "epoch": 1.4554424915201973, "grad_norm": 0.15885986387729645, "learning_rate": 4.9338079201135777e-05, "loss": 0.544, "step": 7080 }, { "epoch": 1.455648062493576, "grad_norm": 0.19332925975322723, "learning_rate": 4.932813750671909e-05, "loss": 0.5493, "step": 7081 }, { "epoch": 1.4558536334669545, "grad_norm": 0.16609343886375427, "learning_rate": 4.931819559907529e-05, "loss": 0.5295, "step": 7082 }, { "epoch": 1.456059204440333, "grad_norm": 0.12420736253261566, "learning_rate": 4.930825347869418e-05, "loss": 0.5104, "step": 7083 }, { "epoch": 1.4562647754137115, "grad_norm": 0.12772247195243835, "learning_rate": 4.9298311146065565e-05, "loss": 0.5214, "step": 7084 }, { "epoch": 1.45647034638709, "grad_norm": 0.1771061271429062, "learning_rate": 4.9288368601679235e-05, "loss": 0.5358, "step": 7085 }, { "epoch": 1.4566759173604686, "grad_norm": 0.20758508145809174, "learning_rate": 4.9278425846025047e-05, "loss": 0.5321, "step": 7086 }, { "epoch": 1.4568814883338472, "grad_norm": 0.16325919330120087, "learning_rate": 4.926848287959281e-05, "loss": 0.5155, "step": 7087 }, { "epoch": 1.4570870593072258, "grad_norm": 0.15556760132312775, "learning_rate": 4.925853970287236e-05, "loss": 0.5374, "step": 7088 }, { "epoch": 1.4572926302806044, "grad_norm": 0.19319914281368256, "learning_rate": 4.924859631635356e-05, "loss": 0.5403, "step": 7089 }, { "epoch": 1.457498201253983, "grad_norm": 0.19514033198356628, "learning_rate": 4.9238652720526295e-05, "loss": 0.5609, "step": 7090 }, { "epoch": 1.4577037722273616, "grad_norm": 0.18153122067451477, "learning_rate": 4.922870891588042e-05, "loss": 0.5313, "step": 7091 }, { "epoch": 1.4579093432007402, "grad_norm": 0.19177407026290894, "learning_rate": 4.9218764902905814e-05, "loss": 0.5595, "step": 7092 }, { "epoch": 1.4581149141741185, "grad_norm": 0.18836280703544617, "learning_rate": 4.920882068209238e-05, "loss": 0.544, "step": 7093 }, { "epoch": 1.4583204851474971, "grad_norm": 0.19115997850894928, "learning_rate": 4.919887625393003e-05, "loss": 0.5544, "step": 7094 }, { "epoch": 1.4585260561208757, "grad_norm": 0.1862732619047165, "learning_rate": 4.918893161890867e-05, "loss": 0.5515, "step": 7095 }, { "epoch": 1.4587316270942543, "grad_norm": 0.15882770717144012, "learning_rate": 4.917898677751822e-05, "loss": 0.5248, "step": 7096 }, { "epoch": 1.458937198067633, "grad_norm": 0.16427573561668396, "learning_rate": 4.9169041730248634e-05, "loss": 0.5654, "step": 7097 }, { "epoch": 1.4591427690410115, "grad_norm": 0.19142089784145355, "learning_rate": 4.915909647758984e-05, "loss": 0.5522, "step": 7098 }, { "epoch": 1.4593483400143898, "grad_norm": 0.19446474313735962, "learning_rate": 4.914915102003181e-05, "loss": 0.5274, "step": 7099 }, { "epoch": 1.4595539109877684, "grad_norm": 0.1596178114414215, "learning_rate": 4.9139205358064495e-05, "loss": 0.5138, "step": 7100 }, { "epoch": 1.459759481961147, "grad_norm": 0.1602422297000885, "learning_rate": 4.912925949217788e-05, "loss": 0.5237, "step": 7101 }, { "epoch": 1.4599650529345256, "grad_norm": 0.19484317302703857, "learning_rate": 4.911931342286195e-05, "loss": 0.5393, "step": 7102 }, { "epoch": 1.4601706239079042, "grad_norm": 0.2035979926586151, "learning_rate": 4.91093671506067e-05, "loss": 0.5555, "step": 7103 }, { "epoch": 1.4603761948812828, "grad_norm": 0.19783945381641388, "learning_rate": 4.909942067590215e-05, "loss": 0.5507, "step": 7104 }, { "epoch": 1.4605817658546614, "grad_norm": 0.19101816415786743, "learning_rate": 4.9089473999238294e-05, "loss": 0.5457, "step": 7105 }, { "epoch": 1.46078733682804, "grad_norm": 0.18535058200359344, "learning_rate": 4.907952712110516e-05, "loss": 0.5209, "step": 7106 }, { "epoch": 1.4609929078014185, "grad_norm": 0.1839088499546051, "learning_rate": 4.906958004199281e-05, "loss": 0.5424, "step": 7107 }, { "epoch": 1.461198478774797, "grad_norm": 0.18688786029815674, "learning_rate": 4.905963276239127e-05, "loss": 0.5383, "step": 7108 }, { "epoch": 1.4614040497481755, "grad_norm": 0.19204580783843994, "learning_rate": 4.904968528279058e-05, "loss": 0.5667, "step": 7109 }, { "epoch": 1.461609620721554, "grad_norm": 0.19083940982818604, "learning_rate": 4.903973760368084e-05, "loss": 0.5628, "step": 7110 }, { "epoch": 1.4618151916949327, "grad_norm": 0.1922621876001358, "learning_rate": 4.9029789725552105e-05, "loss": 0.536, "step": 7111 }, { "epoch": 1.4620207626683113, "grad_norm": 0.19811585545539856, "learning_rate": 4.901984164889447e-05, "loss": 0.571, "step": 7112 }, { "epoch": 1.4622263336416899, "grad_norm": 0.1963101178407669, "learning_rate": 4.9009893374198015e-05, "loss": 0.568, "step": 7113 }, { "epoch": 1.4624319046150682, "grad_norm": 0.19826072454452515, "learning_rate": 4.899994490195286e-05, "loss": 0.541, "step": 7114 }, { "epoch": 1.4626374755884468, "grad_norm": 0.19222994148731232, "learning_rate": 4.898999623264913e-05, "loss": 0.5699, "step": 7115 }, { "epoch": 1.4628430465618254, "grad_norm": 0.19945533573627472, "learning_rate": 4.898004736677692e-05, "loss": 0.5663, "step": 7116 }, { "epoch": 1.463048617535204, "grad_norm": 0.18743856251239777, "learning_rate": 4.8970098304826384e-05, "loss": 0.5423, "step": 7117 }, { "epoch": 1.4632541885085826, "grad_norm": 0.1742721050977707, "learning_rate": 4.896014904728766e-05, "loss": 0.5273, "step": 7118 }, { "epoch": 1.4634597594819612, "grad_norm": 0.15842121839523315, "learning_rate": 4.895019959465091e-05, "loss": 0.5392, "step": 7119 }, { "epoch": 1.4636653304553398, "grad_norm": 0.1904791295528412, "learning_rate": 4.894024994740627e-05, "loss": 0.565, "step": 7120 }, { "epoch": 1.4638709014287183, "grad_norm": 0.18996872007846832, "learning_rate": 4.893030010604393e-05, "loss": 0.5624, "step": 7121 }, { "epoch": 1.464076472402097, "grad_norm": 0.18377164006233215, "learning_rate": 4.89203500710541e-05, "loss": 0.5628, "step": 7122 }, { "epoch": 1.4642820433754755, "grad_norm": 0.19251424074172974, "learning_rate": 4.891039984292693e-05, "loss": 0.5489, "step": 7123 }, { "epoch": 1.4644876143488539, "grad_norm": 0.1817564070224762, "learning_rate": 4.890044942215263e-05, "loss": 0.5592, "step": 7124 }, { "epoch": 1.4646931853222325, "grad_norm": 0.1885865181684494, "learning_rate": 4.8890498809221434e-05, "loss": 0.5447, "step": 7125 }, { "epoch": 1.464898756295611, "grad_norm": 0.19473087787628174, "learning_rate": 4.8880548004623545e-05, "loss": 0.5545, "step": 7126 }, { "epoch": 1.4651043272689896, "grad_norm": 0.18976017832756042, "learning_rate": 4.8870597008849175e-05, "loss": 0.5323, "step": 7127 }, { "epoch": 1.4653098982423682, "grad_norm": 0.1930120289325714, "learning_rate": 4.88606458223886e-05, "loss": 0.5459, "step": 7128 }, { "epoch": 1.4655154692157466, "grad_norm": 0.18661560118198395, "learning_rate": 4.885069444573205e-05, "loss": 0.5345, "step": 7129 }, { "epoch": 1.4657210401891252, "grad_norm": 0.1941232681274414, "learning_rate": 4.884074287936977e-05, "loss": 0.5289, "step": 7130 }, { "epoch": 1.4659266111625038, "grad_norm": 0.19508835673332214, "learning_rate": 4.883079112379204e-05, "loss": 0.5421, "step": 7131 }, { "epoch": 1.4661321821358824, "grad_norm": 0.200748473405838, "learning_rate": 4.882083917948914e-05, "loss": 0.5602, "step": 7132 }, { "epoch": 1.466337753109261, "grad_norm": 0.19630691409111023, "learning_rate": 4.8810887046951356e-05, "loss": 0.5469, "step": 7133 }, { "epoch": 1.4665433240826395, "grad_norm": 0.18631185591220856, "learning_rate": 4.880093472666897e-05, "loss": 0.5349, "step": 7134 }, { "epoch": 1.4667488950560181, "grad_norm": 0.20446190237998962, "learning_rate": 4.879098221913231e-05, "loss": 0.5395, "step": 7135 }, { "epoch": 1.4669544660293967, "grad_norm": 0.19369782507419586, "learning_rate": 4.8781029524831676e-05, "loss": 0.548, "step": 7136 }, { "epoch": 1.4671600370027753, "grad_norm": 0.19022773206233978, "learning_rate": 4.8771076644257365e-05, "loss": 0.5499, "step": 7137 }, { "epoch": 1.467365607976154, "grad_norm": 0.19664426147937775, "learning_rate": 4.876112357789977e-05, "loss": 0.5629, "step": 7138 }, { "epoch": 1.4675711789495323, "grad_norm": 0.19032470881938934, "learning_rate": 4.875117032624917e-05, "loss": 0.546, "step": 7139 }, { "epoch": 1.4677767499229109, "grad_norm": 0.18640637397766113, "learning_rate": 4.874121688979595e-05, "loss": 0.5317, "step": 7140 }, { "epoch": 1.4679823208962894, "grad_norm": 0.19098687171936035, "learning_rate": 4.873126326903045e-05, "loss": 0.5494, "step": 7141 }, { "epoch": 1.468187891869668, "grad_norm": 0.19771692156791687, "learning_rate": 4.872130946444305e-05, "loss": 0.5562, "step": 7142 }, { "epoch": 1.4683934628430466, "grad_norm": 0.18976187705993652, "learning_rate": 4.871135547652414e-05, "loss": 0.5607, "step": 7143 }, { "epoch": 1.4685990338164252, "grad_norm": 0.19151365756988525, "learning_rate": 4.870140130576408e-05, "loss": 0.5471, "step": 7144 }, { "epoch": 1.4688046047898036, "grad_norm": 0.19620567560195923, "learning_rate": 4.869144695265328e-05, "loss": 0.562, "step": 7145 }, { "epoch": 1.4690101757631822, "grad_norm": 0.19159796833992004, "learning_rate": 4.8681492417682154e-05, "loss": 0.5638, "step": 7146 }, { "epoch": 1.4692157467365607, "grad_norm": 0.20116734504699707, "learning_rate": 4.867153770134108e-05, "loss": 0.5677, "step": 7147 }, { "epoch": 1.4694213177099393, "grad_norm": 0.19330163300037384, "learning_rate": 4.866158280412053e-05, "loss": 0.5546, "step": 7148 }, { "epoch": 1.469626888683318, "grad_norm": 0.18877775967121124, "learning_rate": 4.86516277265109e-05, "loss": 0.559, "step": 7149 }, { "epoch": 1.4698324596566965, "grad_norm": 0.1901031881570816, "learning_rate": 4.864167246900265e-05, "loss": 0.5388, "step": 7150 }, { "epoch": 1.470038030630075, "grad_norm": 0.18822161853313446, "learning_rate": 4.8631717032086195e-05, "loss": 0.5466, "step": 7151 }, { "epoch": 1.4702436016034537, "grad_norm": 0.16988466680049896, "learning_rate": 4.862176141625203e-05, "loss": 0.5347, "step": 7152 }, { "epoch": 1.4704491725768323, "grad_norm": 0.12935671210289001, "learning_rate": 4.86118056219906e-05, "loss": 0.5038, "step": 7153 }, { "epoch": 1.4706547435502106, "grad_norm": 0.16515877842903137, "learning_rate": 4.860184964979239e-05, "loss": 0.5383, "step": 7154 }, { "epoch": 1.4708603145235892, "grad_norm": 0.2031169980764389, "learning_rate": 4.859189350014789e-05, "loss": 0.558, "step": 7155 }, { "epoch": 1.4710658854969678, "grad_norm": 0.1971338540315628, "learning_rate": 4.858193717354759e-05, "loss": 0.5552, "step": 7156 }, { "epoch": 1.4712714564703464, "grad_norm": 0.18545454740524292, "learning_rate": 4.857198067048199e-05, "loss": 0.5499, "step": 7157 }, { "epoch": 1.471477027443725, "grad_norm": 0.18908904492855072, "learning_rate": 4.856202399144157e-05, "loss": 0.5331, "step": 7158 }, { "epoch": 1.4716825984171036, "grad_norm": 0.18228811025619507, "learning_rate": 4.855206713691691e-05, "loss": 0.5181, "step": 7159 }, { "epoch": 1.471888169390482, "grad_norm": 0.1866607964038849, "learning_rate": 4.8542110107398483e-05, "loss": 0.5157, "step": 7160 }, { "epoch": 1.4720937403638605, "grad_norm": 0.19502104818820953, "learning_rate": 4.853215290337685e-05, "loss": 0.5462, "step": 7161 }, { "epoch": 1.4722993113372391, "grad_norm": 0.16694171726703644, "learning_rate": 4.852219552534256e-05, "loss": 0.5123, "step": 7162 }, { "epoch": 1.4725048823106177, "grad_norm": 0.1643698364496231, "learning_rate": 4.851223797378614e-05, "loss": 0.5402, "step": 7163 }, { "epoch": 1.4727104532839963, "grad_norm": 0.20267751812934875, "learning_rate": 4.85022802491982e-05, "loss": 0.5493, "step": 7164 }, { "epoch": 1.472916024257375, "grad_norm": 0.19984979927539825, "learning_rate": 4.849232235206927e-05, "loss": 0.5387, "step": 7165 }, { "epoch": 1.4731215952307535, "grad_norm": 0.19350376725196838, "learning_rate": 4.848236428288993e-05, "loss": 0.5465, "step": 7166 }, { "epoch": 1.473327166204132, "grad_norm": 0.2067371904850006, "learning_rate": 4.84724060421508e-05, "loss": 0.5688, "step": 7167 }, { "epoch": 1.4735327371775107, "grad_norm": 0.20047098398208618, "learning_rate": 4.846244763034243e-05, "loss": 0.5426, "step": 7168 }, { "epoch": 1.473738308150889, "grad_norm": 0.1930703967809677, "learning_rate": 4.845248904795547e-05, "loss": 0.5556, "step": 7169 }, { "epoch": 1.4739438791242676, "grad_norm": 0.19122304022312164, "learning_rate": 4.8442530295480496e-05, "loss": 0.5323, "step": 7170 }, { "epoch": 1.4741494500976462, "grad_norm": 0.1875450760126114, "learning_rate": 4.843257137340816e-05, "loss": 0.519, "step": 7171 }, { "epoch": 1.4743550210710248, "grad_norm": 0.18695366382598877, "learning_rate": 4.842261228222906e-05, "loss": 0.538, "step": 7172 }, { "epoch": 1.4745605920444034, "grad_norm": 0.19884580373764038, "learning_rate": 4.841265302243386e-05, "loss": 0.5696, "step": 7173 }, { "epoch": 1.474766163017782, "grad_norm": 0.19241276383399963, "learning_rate": 4.840269359451319e-05, "loss": 0.5595, "step": 7174 }, { "epoch": 1.4749717339911603, "grad_norm": 0.16710297763347626, "learning_rate": 4.839273399895772e-05, "loss": 0.5195, "step": 7175 }, { "epoch": 1.475177304964539, "grad_norm": 0.15979520976543427, "learning_rate": 4.8382774236258085e-05, "loss": 0.5616, "step": 7176 }, { "epoch": 1.4753828759379175, "grad_norm": 0.2003268003463745, "learning_rate": 4.8372814306904984e-05, "loss": 0.5718, "step": 7177 }, { "epoch": 1.475588446911296, "grad_norm": 0.18857726454734802, "learning_rate": 4.83628542113891e-05, "loss": 0.5305, "step": 7178 }, { "epoch": 1.4757940178846747, "grad_norm": 0.15321624279022217, "learning_rate": 4.8352893950201096e-05, "loss": 0.5213, "step": 7179 }, { "epoch": 1.4759995888580533, "grad_norm": 0.15973275899887085, "learning_rate": 4.834293352383168e-05, "loss": 0.5575, "step": 7180 }, { "epoch": 1.4762051598314319, "grad_norm": 0.18778233230113983, "learning_rate": 4.8332972932771556e-05, "loss": 0.5239, "step": 7181 }, { "epoch": 1.4764107308048104, "grad_norm": 0.15525855123996735, "learning_rate": 4.832301217751142e-05, "loss": 0.4881, "step": 7182 }, { "epoch": 1.476616301778189, "grad_norm": 0.15355351567268372, "learning_rate": 4.8313051258542024e-05, "loss": 0.5315, "step": 7183 }, { "epoch": 1.4768218727515674, "grad_norm": 0.2030985951423645, "learning_rate": 4.830309017635407e-05, "loss": 0.5901, "step": 7184 }, { "epoch": 1.477027443724946, "grad_norm": 0.19170239567756653, "learning_rate": 4.82931289314383e-05, "loss": 0.5517, "step": 7185 }, { "epoch": 1.4772330146983246, "grad_norm": 0.19333000481128693, "learning_rate": 4.828316752428545e-05, "loss": 0.5547, "step": 7186 }, { "epoch": 1.4774385856717032, "grad_norm": 0.19361145794391632, "learning_rate": 4.82732059553863e-05, "loss": 0.5518, "step": 7187 }, { "epoch": 1.4776441566450818, "grad_norm": 0.16968531906604767, "learning_rate": 4.8263244225231586e-05, "loss": 0.5055, "step": 7188 }, { "epoch": 1.4778497276184603, "grad_norm": 0.1647455245256424, "learning_rate": 4.825328233431207e-05, "loss": 0.5489, "step": 7189 }, { "epoch": 1.4780552985918387, "grad_norm": 0.18998976051807404, "learning_rate": 4.824332028311856e-05, "loss": 0.5302, "step": 7190 }, { "epoch": 1.4782608695652173, "grad_norm": 0.18618905544281006, "learning_rate": 4.8233358072141806e-05, "loss": 0.5217, "step": 7191 }, { "epoch": 1.4784664405385959, "grad_norm": 0.19258539378643036, "learning_rate": 4.822339570187261e-05, "loss": 0.5551, "step": 7192 }, { "epoch": 1.4786720115119745, "grad_norm": 0.1874276101589203, "learning_rate": 4.821343317280179e-05, "loss": 0.5409, "step": 7193 }, { "epoch": 1.478877582485353, "grad_norm": 0.18570971488952637, "learning_rate": 4.8203470485420126e-05, "loss": 0.5524, "step": 7194 }, { "epoch": 1.4790831534587316, "grad_norm": 0.19946832954883575, "learning_rate": 4.819350764021844e-05, "loss": 0.5618, "step": 7195 }, { "epoch": 1.4792887244321102, "grad_norm": 0.1732860952615738, "learning_rate": 4.818354463768756e-05, "loss": 0.5354, "step": 7196 }, { "epoch": 1.4794942954054888, "grad_norm": 0.16083048284053802, "learning_rate": 4.817358147831831e-05, "loss": 0.539, "step": 7197 }, { "epoch": 1.4796998663788674, "grad_norm": 0.1897859424352646, "learning_rate": 4.816361816260155e-05, "loss": 0.54, "step": 7198 }, { "epoch": 1.479905437352246, "grad_norm": 0.1890067458152771, "learning_rate": 4.815365469102809e-05, "loss": 0.5339, "step": 7199 }, { "epoch": 1.4801110083256244, "grad_norm": 0.19852851331233978, "learning_rate": 4.8143691064088823e-05, "loss": 0.555, "step": 7200 }, { "epoch": 1.480316579299003, "grad_norm": 0.1849977821111679, "learning_rate": 4.813372728227459e-05, "loss": 0.5255, "step": 7201 }, { "epoch": 1.4805221502723815, "grad_norm": 0.1914818435907364, "learning_rate": 4.8123763346076256e-05, "loss": 0.5525, "step": 7202 }, { "epoch": 1.4807277212457601, "grad_norm": 0.2014429122209549, "learning_rate": 4.811379925598469e-05, "loss": 0.5693, "step": 7203 }, { "epoch": 1.4809332922191387, "grad_norm": 0.1984141618013382, "learning_rate": 4.81038350124908e-05, "loss": 0.5566, "step": 7204 }, { "epoch": 1.481138863192517, "grad_norm": 0.19716762006282806, "learning_rate": 4.809387061608548e-05, "loss": 0.5513, "step": 7205 }, { "epoch": 1.4813444341658957, "grad_norm": 0.19718822836875916, "learning_rate": 4.8083906067259585e-05, "loss": 0.5376, "step": 7206 }, { "epoch": 1.4815500051392743, "grad_norm": 0.1910613626241684, "learning_rate": 4.807394136650406e-05, "loss": 0.5604, "step": 7207 }, { "epoch": 1.4817555761126529, "grad_norm": 0.19918161630630493, "learning_rate": 4.806397651430983e-05, "loss": 0.549, "step": 7208 }, { "epoch": 1.4819611470860314, "grad_norm": 0.18760617077350616, "learning_rate": 4.805401151116778e-05, "loss": 0.5507, "step": 7209 }, { "epoch": 1.48216671805941, "grad_norm": 0.15669982135295868, "learning_rate": 4.804404635756886e-05, "loss": 0.5268, "step": 7210 }, { "epoch": 1.4823722890327886, "grad_norm": 0.16258768737316132, "learning_rate": 4.803408105400401e-05, "loss": 0.5557, "step": 7211 }, { "epoch": 1.4825778600061672, "grad_norm": 0.200164794921875, "learning_rate": 4.802411560096418e-05, "loss": 0.5652, "step": 7212 }, { "epoch": 1.4827834309795458, "grad_norm": 0.1986524760723114, "learning_rate": 4.801414999894028e-05, "loss": 0.5608, "step": 7213 }, { "epoch": 1.4829890019529244, "grad_norm": 0.15464936196804047, "learning_rate": 4.8004184248423325e-05, "loss": 0.519, "step": 7214 }, { "epoch": 1.4831945729263027, "grad_norm": 0.15096427500247955, "learning_rate": 4.799421834990424e-05, "loss": 0.5417, "step": 7215 }, { "epoch": 1.4834001438996813, "grad_norm": 0.15722674131393433, "learning_rate": 4.798425230387402e-05, "loss": 0.5158, "step": 7216 }, { "epoch": 1.48360571487306, "grad_norm": 0.15923316776752472, "learning_rate": 4.797428611082362e-05, "loss": 0.5495, "step": 7217 }, { "epoch": 1.4838112858464385, "grad_norm": 0.16226224601268768, "learning_rate": 4.796431977124405e-05, "loss": 0.5213, "step": 7218 }, { "epoch": 1.484016856819817, "grad_norm": 0.16145376861095428, "learning_rate": 4.7954353285626314e-05, "loss": 0.568, "step": 7219 }, { "epoch": 1.4842224277931955, "grad_norm": 0.15974651277065277, "learning_rate": 4.7944386654461385e-05, "loss": 0.512, "step": 7220 }, { "epoch": 1.484427998766574, "grad_norm": 0.15350697934627533, "learning_rate": 4.7934419878240296e-05, "loss": 0.5473, "step": 7221 }, { "epoch": 1.4846335697399526, "grad_norm": 0.19197656214237213, "learning_rate": 4.792445295745406e-05, "loss": 0.5461, "step": 7222 }, { "epoch": 1.4848391407133312, "grad_norm": 0.19040462374687195, "learning_rate": 4.7914485892593686e-05, "loss": 0.5372, "step": 7223 }, { "epoch": 1.4850447116867098, "grad_norm": 0.1572524458169937, "learning_rate": 4.790451868415021e-05, "loss": 0.5145, "step": 7224 }, { "epoch": 1.4852502826600884, "grad_norm": 0.15703527629375458, "learning_rate": 4.7894551332614686e-05, "loss": 0.5627, "step": 7225 }, { "epoch": 1.485455853633467, "grad_norm": 0.16500575840473175, "learning_rate": 4.788458383847816e-05, "loss": 0.5229, "step": 7226 }, { "epoch": 1.4856614246068456, "grad_norm": 0.16244147717952728, "learning_rate": 4.787461620223164e-05, "loss": 0.5392, "step": 7227 }, { "epoch": 1.4858669955802242, "grad_norm": 0.19701159000396729, "learning_rate": 4.786464842436623e-05, "loss": 0.5197, "step": 7228 }, { "epoch": 1.4860725665536028, "grad_norm": 0.18858790397644043, "learning_rate": 4.785468050537298e-05, "loss": 0.5707, "step": 7229 }, { "epoch": 1.4862781375269811, "grad_norm": 0.1888207048177719, "learning_rate": 4.784471244574295e-05, "loss": 0.5432, "step": 7230 }, { "epoch": 1.4864837085003597, "grad_norm": 0.19446338713169098, "learning_rate": 4.783474424596726e-05, "loss": 0.5676, "step": 7231 }, { "epoch": 1.4866892794737383, "grad_norm": 0.19412629306316376, "learning_rate": 4.782477590653696e-05, "loss": 0.5435, "step": 7232 }, { "epoch": 1.486894850447117, "grad_norm": 0.18198393285274506, "learning_rate": 4.781480742794316e-05, "loss": 0.5172, "step": 7233 }, { "epoch": 1.4871004214204955, "grad_norm": 0.2016136646270752, "learning_rate": 4.7804838810676935e-05, "loss": 0.5872, "step": 7234 }, { "epoch": 1.487305992393874, "grad_norm": 0.17606668174266815, "learning_rate": 4.779487005522943e-05, "loss": 0.5324, "step": 7235 }, { "epoch": 1.4875115633672524, "grad_norm": 0.16043418645858765, "learning_rate": 4.778490116209174e-05, "loss": 0.5447, "step": 7236 }, { "epoch": 1.487717134340631, "grad_norm": 0.19674460589885712, "learning_rate": 4.7774932131754975e-05, "loss": 0.5595, "step": 7237 }, { "epoch": 1.4879227053140096, "grad_norm": 0.2002599984407425, "learning_rate": 4.776496296471029e-05, "loss": 0.5289, "step": 7238 }, { "epoch": 1.4881282762873882, "grad_norm": 0.18798843026161194, "learning_rate": 4.775499366144878e-05, "loss": 0.5465, "step": 7239 }, { "epoch": 1.4883338472607668, "grad_norm": 0.18151499330997467, "learning_rate": 4.7745024222461626e-05, "loss": 0.5398, "step": 7240 }, { "epoch": 1.4885394182341454, "grad_norm": 0.16490262746810913, "learning_rate": 4.773505464823995e-05, "loss": 0.5314, "step": 7241 }, { "epoch": 1.488744989207524, "grad_norm": 0.16644752025604248, "learning_rate": 4.772508493927492e-05, "loss": 0.5573, "step": 7242 }, { "epoch": 1.4889505601809025, "grad_norm": 0.1932040899991989, "learning_rate": 4.77151150960577e-05, "loss": 0.5464, "step": 7243 }, { "epoch": 1.4891561311542811, "grad_norm": 0.19342085719108582, "learning_rate": 4.770514511907943e-05, "loss": 0.5528, "step": 7244 }, { "epoch": 1.4893617021276595, "grad_norm": 0.1693827509880066, "learning_rate": 4.7695175008831317e-05, "loss": 0.5318, "step": 7245 }, { "epoch": 1.489567273101038, "grad_norm": 0.15933051705360413, "learning_rate": 4.768520476580454e-05, "loss": 0.5436, "step": 7246 }, { "epoch": 1.4897728440744167, "grad_norm": 0.19581708312034607, "learning_rate": 4.767523439049026e-05, "loss": 0.5502, "step": 7247 }, { "epoch": 1.4899784150477953, "grad_norm": 0.1909896582365036, "learning_rate": 4.7665263883379685e-05, "loss": 0.5415, "step": 7248 }, { "epoch": 1.4901839860211739, "grad_norm": 0.1643315702676773, "learning_rate": 4.765529324496402e-05, "loss": 0.5078, "step": 7249 }, { "epoch": 1.4903895569945524, "grad_norm": 0.15782994031906128, "learning_rate": 4.764532247573446e-05, "loss": 0.5169, "step": 7250 }, { "epoch": 1.4905951279679308, "grad_norm": 0.16611091792583466, "learning_rate": 4.763535157618222e-05, "loss": 0.5207, "step": 7251 }, { "epoch": 1.4908006989413094, "grad_norm": 0.1263076364994049, "learning_rate": 4.7625380546798546e-05, "loss": 0.5362, "step": 7252 }, { "epoch": 1.491006269914688, "grad_norm": 0.16741037368774414, "learning_rate": 4.761540938807464e-05, "loss": 0.5364, "step": 7253 }, { "epoch": 1.4912118408880666, "grad_norm": 0.19533216953277588, "learning_rate": 4.760543810050174e-05, "loss": 0.5505, "step": 7254 }, { "epoch": 1.4914174118614452, "grad_norm": 0.19828902184963226, "learning_rate": 4.759546668457107e-05, "loss": 0.5722, "step": 7255 }, { "epoch": 1.4916229828348238, "grad_norm": 0.19037294387817383, "learning_rate": 4.7585495140773894e-05, "loss": 0.557, "step": 7256 }, { "epoch": 1.4918285538082023, "grad_norm": 0.1699882447719574, "learning_rate": 4.7575523469601464e-05, "loss": 0.5252, "step": 7257 }, { "epoch": 1.492034124781581, "grad_norm": 0.16292616724967957, "learning_rate": 4.7565551671545003e-05, "loss": 0.5557, "step": 7258 }, { "epoch": 1.4922396957549595, "grad_norm": 0.1898314654827118, "learning_rate": 4.755557974709584e-05, "loss": 0.5341, "step": 7259 }, { "epoch": 1.4924452667283379, "grad_norm": 0.18847902119159698, "learning_rate": 4.7545607696745186e-05, "loss": 0.557, "step": 7260 }, { "epoch": 1.4926508377017165, "grad_norm": 0.16627360880374908, "learning_rate": 4.753563552098433e-05, "loss": 0.5283, "step": 7261 }, { "epoch": 1.492856408675095, "grad_norm": 0.16107410192489624, "learning_rate": 4.752566322030457e-05, "loss": 0.5447, "step": 7262 }, { "epoch": 1.4930619796484736, "grad_norm": 0.16727054119110107, "learning_rate": 4.751569079519721e-05, "loss": 0.5214, "step": 7263 }, { "epoch": 1.4932675506218522, "grad_norm": 0.1620626598596573, "learning_rate": 4.75057182461535e-05, "loss": 0.5117, "step": 7264 }, { "epoch": 1.4934731215952308, "grad_norm": 0.1607995629310608, "learning_rate": 4.749574557366477e-05, "loss": 0.5112, "step": 7265 }, { "epoch": 1.4936786925686092, "grad_norm": 0.16359218955039978, "learning_rate": 4.748577277822232e-05, "loss": 0.5528, "step": 7266 }, { "epoch": 1.4938842635419878, "grad_norm": 0.19799359142780304, "learning_rate": 4.747579986031747e-05, "loss": 0.5505, "step": 7267 }, { "epoch": 1.4940898345153664, "grad_norm": 0.1984180063009262, "learning_rate": 4.746582682044153e-05, "loss": 0.5721, "step": 7268 }, { "epoch": 1.494295405488745, "grad_norm": 0.196151003241539, "learning_rate": 4.745585365908582e-05, "loss": 0.5405, "step": 7269 }, { "epoch": 1.4945009764621235, "grad_norm": 0.16846486926078796, "learning_rate": 4.744588037674169e-05, "loss": 0.5246, "step": 7270 }, { "epoch": 1.4947065474355021, "grad_norm": 0.16317616403102875, "learning_rate": 4.743590697390045e-05, "loss": 0.5584, "step": 7271 }, { "epoch": 1.4949121184088807, "grad_norm": 0.18906491994857788, "learning_rate": 4.7425933451053474e-05, "loss": 0.5638, "step": 7272 }, { "epoch": 1.4951176893822593, "grad_norm": 0.16003085672855377, "learning_rate": 4.7415959808692085e-05, "loss": 0.5194, "step": 7273 }, { "epoch": 1.495323260355638, "grad_norm": 0.12602053582668304, "learning_rate": 4.740598604730766e-05, "loss": 0.5273, "step": 7274 }, { "epoch": 1.4955288313290163, "grad_norm": 0.15797413885593414, "learning_rate": 4.7396012167391536e-05, "loss": 0.537, "step": 7275 }, { "epoch": 1.4957344023023948, "grad_norm": 0.19751828908920288, "learning_rate": 4.73860381694351e-05, "loss": 0.5497, "step": 7276 }, { "epoch": 1.4959399732757734, "grad_norm": 0.1944907009601593, "learning_rate": 4.7376064053929724e-05, "loss": 0.5645, "step": 7277 }, { "epoch": 1.496145544249152, "grad_norm": 0.18657876551151276, "learning_rate": 4.736608982136676e-05, "loss": 0.5405, "step": 7278 }, { "epoch": 1.4963511152225306, "grad_norm": 0.19843631982803345, "learning_rate": 4.735611547223761e-05, "loss": 0.5417, "step": 7279 }, { "epoch": 1.4965566861959092, "grad_norm": 0.19256174564361572, "learning_rate": 4.7346141007033676e-05, "loss": 0.541, "step": 7280 }, { "epoch": 1.4967622571692876, "grad_norm": 0.18709734082221985, "learning_rate": 4.733616642624634e-05, "loss": 0.539, "step": 7281 }, { "epoch": 1.4969678281426662, "grad_norm": 0.1940479278564453, "learning_rate": 4.732619173036699e-05, "loss": 0.5556, "step": 7282 }, { "epoch": 1.4971733991160447, "grad_norm": 0.2690550684928894, "learning_rate": 4.731621691988705e-05, "loss": 0.5554, "step": 7283 }, { "epoch": 1.4973789700894233, "grad_norm": 0.19769832491874695, "learning_rate": 4.730624199529793e-05, "loss": 0.5355, "step": 7284 }, { "epoch": 1.497584541062802, "grad_norm": 0.23557159304618835, "learning_rate": 4.729626695709105e-05, "loss": 0.5675, "step": 7285 }, { "epoch": 1.4977901120361805, "grad_norm": 0.19808165729045868, "learning_rate": 4.728629180575783e-05, "loss": 0.5494, "step": 7286 }, { "epoch": 1.497995683009559, "grad_norm": 0.19333380460739136, "learning_rate": 4.7276316541789694e-05, "loss": 0.5534, "step": 7287 }, { "epoch": 1.4982012539829377, "grad_norm": 0.185968816280365, "learning_rate": 4.726634116567809e-05, "loss": 0.5273, "step": 7288 }, { "epoch": 1.4984068249563163, "grad_norm": 0.19186194241046906, "learning_rate": 4.725636567791443e-05, "loss": 0.5485, "step": 7289 }, { "epoch": 1.4986123959296949, "grad_norm": 0.16538295149803162, "learning_rate": 4.7246390078990195e-05, "loss": 0.5292, "step": 7290 }, { "epoch": 1.4988179669030732, "grad_norm": 0.16167549788951874, "learning_rate": 4.723641436939683e-05, "loss": 0.5493, "step": 7291 }, { "epoch": 1.4990235378764518, "grad_norm": 0.19126403331756592, "learning_rate": 4.722643854962577e-05, "loss": 0.531, "step": 7292 }, { "epoch": 1.4992291088498304, "grad_norm": 0.19075235724449158, "learning_rate": 4.721646262016849e-05, "loss": 0.5507, "step": 7293 }, { "epoch": 1.499434679823209, "grad_norm": 0.19539231061935425, "learning_rate": 4.720648658151645e-05, "loss": 0.5525, "step": 7294 }, { "epoch": 1.4996402507965876, "grad_norm": 0.19356007874011993, "learning_rate": 4.719651043416114e-05, "loss": 0.5398, "step": 7295 }, { "epoch": 1.499845821769966, "grad_norm": 0.19021181762218475, "learning_rate": 4.7186534178594016e-05, "loss": 0.5507, "step": 7296 }, { "epoch": 1.5000513927433445, "grad_norm": 0.1926860511302948, "learning_rate": 4.717655781530658e-05, "loss": 0.5716, "step": 7297 }, { "epoch": 1.5002569637167231, "grad_norm": 0.16523759067058563, "learning_rate": 4.716658134479031e-05, "loss": 0.4999, "step": 7298 }, { "epoch": 1.5004625346901017, "grad_norm": 0.15917497873306274, "learning_rate": 4.7156604767536716e-05, "loss": 0.5651, "step": 7299 }, { "epoch": 1.5006681056634803, "grad_norm": 0.16436144709587097, "learning_rate": 4.714662808403727e-05, "loss": 0.5352, "step": 7300 }, { "epoch": 1.5008736766368589, "grad_norm": 0.15653958916664124, "learning_rate": 4.71366512947835e-05, "loss": 0.5314, "step": 7301 }, { "epoch": 1.5010792476102375, "grad_norm": 0.1997946798801422, "learning_rate": 4.71266744002669e-05, "loss": 0.5585, "step": 7302 }, { "epoch": 1.501284818583616, "grad_norm": 0.1864425539970398, "learning_rate": 4.7116697400979e-05, "loss": 0.5312, "step": 7303 }, { "epoch": 1.5014903895569947, "grad_norm": 0.19595369696617126, "learning_rate": 4.710672029741131e-05, "loss": 0.5518, "step": 7304 }, { "epoch": 1.5016959605303732, "grad_norm": 0.16003580391407013, "learning_rate": 4.7096743090055354e-05, "loss": 0.5241, "step": 7305 }, { "epoch": 1.5019015315037518, "grad_norm": 0.16186951100826263, "learning_rate": 4.708676577940266e-05, "loss": 0.5391, "step": 7306 }, { "epoch": 1.5021071024771302, "grad_norm": 0.16420379281044006, "learning_rate": 4.707678836594478e-05, "loss": 0.4949, "step": 7307 }, { "epoch": 1.5023126734505088, "grad_norm": 0.15808852016925812, "learning_rate": 4.706681085017325e-05, "loss": 0.5566, "step": 7308 }, { "epoch": 1.5025182444238874, "grad_norm": 0.18840067088603973, "learning_rate": 4.7056833232579604e-05, "loss": 0.5491, "step": 7309 }, { "epoch": 1.502723815397266, "grad_norm": 0.16313523054122925, "learning_rate": 4.70468555136554e-05, "loss": 0.5258, "step": 7310 }, { "epoch": 1.5029293863706443, "grad_norm": 0.15801596641540527, "learning_rate": 4.703687769389219e-05, "loss": 0.5443, "step": 7311 }, { "epoch": 1.503134957344023, "grad_norm": 0.19635756313800812, "learning_rate": 4.702689977378154e-05, "loss": 0.5529, "step": 7312 }, { "epoch": 1.5033405283174015, "grad_norm": 0.1938237100839615, "learning_rate": 4.7016921753815e-05, "loss": 0.564, "step": 7313 }, { "epoch": 1.50354609929078, "grad_norm": 0.22758108377456665, "learning_rate": 4.7006943634484154e-05, "loss": 0.5604, "step": 7314 }, { "epoch": 1.5037516702641587, "grad_norm": 0.2014021873474121, "learning_rate": 4.699696541628058e-05, "loss": 0.5574, "step": 7315 }, { "epoch": 1.5039572412375373, "grad_norm": 0.1863914430141449, "learning_rate": 4.698698709969585e-05, "loss": 0.5268, "step": 7316 }, { "epoch": 1.5041628122109159, "grad_norm": 0.19100484251976013, "learning_rate": 4.6977008685221556e-05, "loss": 0.5515, "step": 7317 }, { "epoch": 1.5043683831842944, "grad_norm": 0.1965937614440918, "learning_rate": 4.6967030173349285e-05, "loss": 0.557, "step": 7318 }, { "epoch": 1.504573954157673, "grad_norm": 0.16544751822948456, "learning_rate": 4.695705156457064e-05, "loss": 0.5139, "step": 7319 }, { "epoch": 1.5047795251310516, "grad_norm": 0.1309744417667389, "learning_rate": 4.69470728593772e-05, "loss": 0.5451, "step": 7320 }, { "epoch": 1.5049850961044302, "grad_norm": 0.16225290298461914, "learning_rate": 4.6937094058260585e-05, "loss": 0.5624, "step": 7321 }, { "epoch": 1.5051906670778086, "grad_norm": 0.19539402425289154, "learning_rate": 4.69271151617124e-05, "loss": 0.5485, "step": 7322 }, { "epoch": 1.5053962380511872, "grad_norm": 0.19238321483135223, "learning_rate": 4.691713617022427e-05, "loss": 0.537, "step": 7323 }, { "epoch": 1.5056018090245658, "grad_norm": 0.19159597158432007, "learning_rate": 4.6907157084287774e-05, "loss": 0.5662, "step": 7324 }, { "epoch": 1.5058073799979443, "grad_norm": 0.16289053857326508, "learning_rate": 4.689717790439459e-05, "loss": 0.5125, "step": 7325 }, { "epoch": 1.5060129509713227, "grad_norm": 0.16851918399333954, "learning_rate": 4.6887198631036295e-05, "loss": 0.5272, "step": 7326 }, { "epoch": 1.5062185219447013, "grad_norm": 0.1957252323627472, "learning_rate": 4.687721926470455e-05, "loss": 0.5669, "step": 7327 }, { "epoch": 1.5064240929180799, "grad_norm": 0.1756441295146942, "learning_rate": 4.686723980589099e-05, "loss": 0.5055, "step": 7328 }, { "epoch": 1.5066296638914585, "grad_norm": 0.16394411027431488, "learning_rate": 4.685726025508726e-05, "loss": 0.5624, "step": 7329 }, { "epoch": 1.506835234864837, "grad_norm": 0.16520611941814423, "learning_rate": 4.684728061278499e-05, "loss": 0.5223, "step": 7330 }, { "epoch": 1.5070408058382156, "grad_norm": 0.12648457288742065, "learning_rate": 4.683730087947584e-05, "loss": 0.5101, "step": 7331 }, { "epoch": 1.5072463768115942, "grad_norm": 0.11723072826862335, "learning_rate": 4.682732105565146e-05, "loss": 0.5237, "step": 7332 }, { "epoch": 1.5074519477849728, "grad_norm": 0.12541693449020386, "learning_rate": 4.681734114180352e-05, "loss": 0.5185, "step": 7333 }, { "epoch": 1.5076575187583514, "grad_norm": 0.15850338339805603, "learning_rate": 4.6807361138423664e-05, "loss": 0.5335, "step": 7334 }, { "epoch": 1.50786308973173, "grad_norm": 0.1691320687532425, "learning_rate": 4.679738104600359e-05, "loss": 0.5226, "step": 7335 }, { "epoch": 1.5080686607051086, "grad_norm": 0.16223326325416565, "learning_rate": 4.678740086503494e-05, "loss": 0.5376, "step": 7336 }, { "epoch": 1.508274231678487, "grad_norm": 0.18564042448997498, "learning_rate": 4.6777420596009406e-05, "loss": 0.5129, "step": 7337 }, { "epoch": 1.5084798026518655, "grad_norm": 0.1631714105606079, "learning_rate": 4.676744023941866e-05, "loss": 0.5274, "step": 7338 }, { "epoch": 1.5086853736252441, "grad_norm": 0.15576131641864777, "learning_rate": 4.67574597957544e-05, "loss": 0.5431, "step": 7339 }, { "epoch": 1.5088909445986227, "grad_norm": 0.19135643541812897, "learning_rate": 4.6747479265508314e-05, "loss": 0.5605, "step": 7340 }, { "epoch": 1.509096515572001, "grad_norm": 0.2023853212594986, "learning_rate": 4.673749864917209e-05, "loss": 0.5562, "step": 7341 }, { "epoch": 1.5093020865453797, "grad_norm": 0.1936071515083313, "learning_rate": 4.672751794723743e-05, "loss": 0.5556, "step": 7342 }, { "epoch": 1.5095076575187583, "grad_norm": 0.18370911478996277, "learning_rate": 4.671753716019604e-05, "loss": 0.5524, "step": 7343 }, { "epoch": 1.5097132284921368, "grad_norm": 0.15776073932647705, "learning_rate": 4.6707556288539605e-05, "loss": 0.4955, "step": 7344 }, { "epoch": 1.5099187994655154, "grad_norm": 0.15899749100208282, "learning_rate": 4.6697575332759865e-05, "loss": 0.5609, "step": 7345 }, { "epoch": 1.510124370438894, "grad_norm": 0.19149565696716309, "learning_rate": 4.668759429334852e-05, "loss": 0.5453, "step": 7346 }, { "epoch": 1.5103299414122726, "grad_norm": 0.20891959965229034, "learning_rate": 4.667761317079729e-05, "loss": 0.5634, "step": 7347 }, { "epoch": 1.5105355123856512, "grad_norm": 0.18865418434143066, "learning_rate": 4.666763196559791e-05, "loss": 0.5462, "step": 7348 }, { "epoch": 1.5107410833590298, "grad_norm": 0.18833813071250916, "learning_rate": 4.6657650678242085e-05, "loss": 0.5334, "step": 7349 }, { "epoch": 1.5109466543324084, "grad_norm": 0.18930873274803162, "learning_rate": 4.664766930922157e-05, "loss": 0.5332, "step": 7350 }, { "epoch": 1.511152225305787, "grad_norm": 0.19529637694358826, "learning_rate": 4.663768785902807e-05, "loss": 0.5644, "step": 7351 }, { "epoch": 1.5113577962791653, "grad_norm": 0.1973542720079422, "learning_rate": 4.662770632815337e-05, "loss": 0.5617, "step": 7352 }, { "epoch": 1.511563367252544, "grad_norm": 0.18992508947849274, "learning_rate": 4.6617724717089174e-05, "loss": 0.5536, "step": 7353 }, { "epoch": 1.5117689382259225, "grad_norm": 0.16945968568325043, "learning_rate": 4.660774302632724e-05, "loss": 0.5303, "step": 7354 }, { "epoch": 1.511974509199301, "grad_norm": 0.15689992904663086, "learning_rate": 4.659776125635932e-05, "loss": 0.5519, "step": 7355 }, { "epoch": 1.5121800801726795, "grad_norm": 0.8934375643730164, "learning_rate": 4.6587779407677185e-05, "loss": 0.579, "step": 7356 }, { "epoch": 1.512385651146058, "grad_norm": 0.1862555593252182, "learning_rate": 4.657779748077257e-05, "loss": 0.5403, "step": 7357 }, { "epoch": 1.5125912221194366, "grad_norm": 0.19881917536258698, "learning_rate": 4.656781547613724e-05, "loss": 0.5623, "step": 7358 }, { "epoch": 1.5127967930928152, "grad_norm": 0.16885186731815338, "learning_rate": 4.655783339426297e-05, "loss": 0.5123, "step": 7359 }, { "epoch": 1.5130023640661938, "grad_norm": 0.1638081818819046, "learning_rate": 4.654785123564155e-05, "loss": 0.5536, "step": 7360 }, { "epoch": 1.5132079350395724, "grad_norm": 0.19882342219352722, "learning_rate": 4.653786900076472e-05, "loss": 0.5512, "step": 7361 }, { "epoch": 1.513413506012951, "grad_norm": 0.20189371705055237, "learning_rate": 4.652788669012427e-05, "loss": 0.5612, "step": 7362 }, { "epoch": 1.5136190769863296, "grad_norm": 0.1760426163673401, "learning_rate": 4.651790430421199e-05, "loss": 0.5255, "step": 7363 }, { "epoch": 1.5138246479597082, "grad_norm": 0.16108988225460052, "learning_rate": 4.6507921843519664e-05, "loss": 0.5382, "step": 7364 }, { "epoch": 1.5140302189330868, "grad_norm": 0.19698002934455872, "learning_rate": 4.649793930853907e-05, "loss": 0.5369, "step": 7365 }, { "epoch": 1.5142357899064653, "grad_norm": 0.20208927989006042, "learning_rate": 4.6487956699762004e-05, "loss": 0.5455, "step": 7366 }, { "epoch": 1.5144413608798437, "grad_norm": 0.1949499249458313, "learning_rate": 4.6477974017680275e-05, "loss": 0.5547, "step": 7367 }, { "epoch": 1.5146469318532223, "grad_norm": 0.19195735454559326, "learning_rate": 4.646799126278567e-05, "loss": 0.5309, "step": 7368 }, { "epoch": 1.5148525028266009, "grad_norm": 0.16471721231937408, "learning_rate": 4.645800843556999e-05, "loss": 0.5248, "step": 7369 }, { "epoch": 1.5150580737999795, "grad_norm": 0.16040369868278503, "learning_rate": 4.644802553652505e-05, "loss": 0.5192, "step": 7370 }, { "epoch": 1.5152636447733578, "grad_norm": 0.16164757311344147, "learning_rate": 4.643804256614267e-05, "loss": 0.5253, "step": 7371 }, { "epoch": 1.5154692157467364, "grad_norm": 0.15787971019744873, "learning_rate": 4.6428059524914643e-05, "loss": 0.5589, "step": 7372 }, { "epoch": 1.515674786720115, "grad_norm": 0.19109466671943665, "learning_rate": 4.641807641333281e-05, "loss": 0.5557, "step": 7373 }, { "epoch": 1.5158803576934936, "grad_norm": 0.19500547647476196, "learning_rate": 4.640809323188897e-05, "loss": 0.5407, "step": 7374 }, { "epoch": 1.5160859286668722, "grad_norm": 0.1970156580209732, "learning_rate": 4.639810998107497e-05, "loss": 0.5453, "step": 7375 }, { "epoch": 1.5162914996402508, "grad_norm": 0.20001158118247986, "learning_rate": 4.638812666138261e-05, "loss": 0.5552, "step": 7376 }, { "epoch": 1.5164970706136294, "grad_norm": 0.16510051488876343, "learning_rate": 4.637814327330376e-05, "loss": 0.5262, "step": 7377 }, { "epoch": 1.516702641587008, "grad_norm": 0.161884605884552, "learning_rate": 4.636815981733022e-05, "loss": 0.5346, "step": 7378 }, { "epoch": 1.5169082125603865, "grad_norm": 0.17277652025222778, "learning_rate": 4.635817629395383e-05, "loss": 0.5142, "step": 7379 }, { "epoch": 1.5171137835337651, "grad_norm": 0.15767474472522736, "learning_rate": 4.6348192703666444e-05, "loss": 0.529, "step": 7380 }, { "epoch": 1.5173193545071437, "grad_norm": 0.19689583778381348, "learning_rate": 4.633820904695992e-05, "loss": 0.5467, "step": 7381 }, { "epoch": 1.5175249254805223, "grad_norm": 0.19332459568977356, "learning_rate": 4.6328225324326066e-05, "loss": 0.5505, "step": 7382 }, { "epoch": 1.5177304964539007, "grad_norm": 0.19339875876903534, "learning_rate": 4.631824153625679e-05, "loss": 0.5504, "step": 7383 }, { "epoch": 1.5179360674272793, "grad_norm": 0.19665616750717163, "learning_rate": 4.63082576832439e-05, "loss": 0.5474, "step": 7384 }, { "epoch": 1.5181416384006579, "grad_norm": 0.19962632656097412, "learning_rate": 4.629827376577927e-05, "loss": 0.5514, "step": 7385 }, { "epoch": 1.5183472093740362, "grad_norm": 0.19536101818084717, "learning_rate": 4.628828978435475e-05, "loss": 0.55, "step": 7386 }, { "epoch": 1.5185527803474148, "grad_norm": 0.19217143952846527, "learning_rate": 4.627830573946223e-05, "loss": 0.5404, "step": 7387 }, { "epoch": 1.5187583513207934, "grad_norm": 0.20492962002754211, "learning_rate": 4.6268321631593556e-05, "loss": 0.5701, "step": 7388 }, { "epoch": 1.518963922294172, "grad_norm": 0.16076092422008514, "learning_rate": 4.6258337461240595e-05, "loss": 0.5199, "step": 7389 }, { "epoch": 1.5191694932675506, "grad_norm": 0.16766008734703064, "learning_rate": 4.624835322889524e-05, "loss": 0.544, "step": 7390 }, { "epoch": 1.5193750642409292, "grad_norm": 0.19400693476200104, "learning_rate": 4.623836893504934e-05, "loss": 0.526, "step": 7391 }, { "epoch": 1.5195806352143078, "grad_norm": 0.19015835225582123, "learning_rate": 4.62283845801948e-05, "loss": 0.5383, "step": 7392 }, { "epoch": 1.5197862061876863, "grad_norm": 0.19058318436145782, "learning_rate": 4.6218400164823495e-05, "loss": 0.5406, "step": 7393 }, { "epoch": 1.519991777161065, "grad_norm": 0.1955268830060959, "learning_rate": 4.620841568942731e-05, "loss": 0.5357, "step": 7394 }, { "epoch": 1.5201973481344435, "grad_norm": 0.16312715411186218, "learning_rate": 4.619843115449814e-05, "loss": 0.5241, "step": 7395 }, { "epoch": 1.520402919107822, "grad_norm": 0.16432897746562958, "learning_rate": 4.6188446560527846e-05, "loss": 0.5364, "step": 7396 }, { "epoch": 1.5206084900812007, "grad_norm": 0.1991865038871765, "learning_rate": 4.617846190800837e-05, "loss": 0.5332, "step": 7397 }, { "epoch": 1.520814061054579, "grad_norm": 0.19771799445152283, "learning_rate": 4.616847719743157e-05, "loss": 0.5473, "step": 7398 }, { "epoch": 1.5210196320279576, "grad_norm": 0.21633638441562653, "learning_rate": 4.615849242928936e-05, "loss": 0.582, "step": 7399 }, { "epoch": 1.5212252030013362, "grad_norm": 0.19637715816497803, "learning_rate": 4.614850760407364e-05, "loss": 0.5619, "step": 7400 }, { "epoch": 1.5214307739747148, "grad_norm": 0.1928258240222931, "learning_rate": 4.613852272227633e-05, "loss": 0.5578, "step": 7401 }, { "epoch": 1.5216363449480932, "grad_norm": 0.19066447019577026, "learning_rate": 4.612853778438931e-05, "loss": 0.5507, "step": 7402 }, { "epoch": 1.5218419159214718, "grad_norm": 0.19168606400489807, "learning_rate": 4.611855279090452e-05, "loss": 0.5625, "step": 7403 }, { "epoch": 1.5220474868948504, "grad_norm": 0.18386611342430115, "learning_rate": 4.610856774231386e-05, "loss": 0.5484, "step": 7404 }, { "epoch": 1.522253057868229, "grad_norm": 0.1938936412334442, "learning_rate": 4.609858263910925e-05, "loss": 0.5629, "step": 7405 }, { "epoch": 1.5224586288416075, "grad_norm": 0.1900719851255417, "learning_rate": 4.6088597481782606e-05, "loss": 0.5491, "step": 7406 }, { "epoch": 1.5226641998149861, "grad_norm": 0.18934617936611176, "learning_rate": 4.607861227082585e-05, "loss": 0.5377, "step": 7407 }, { "epoch": 1.5228697707883647, "grad_norm": 0.20040073990821838, "learning_rate": 4.606862700673091e-05, "loss": 0.5384, "step": 7408 }, { "epoch": 1.5230753417617433, "grad_norm": 0.19345182180404663, "learning_rate": 4.6058641689989724e-05, "loss": 0.5519, "step": 7409 }, { "epoch": 1.523280912735122, "grad_norm": 0.19998955726623535, "learning_rate": 4.6048656321094196e-05, "loss": 0.5499, "step": 7410 }, { "epoch": 1.5234864837085005, "grad_norm": 0.2003701776266098, "learning_rate": 4.603867090053627e-05, "loss": 0.5471, "step": 7411 }, { "epoch": 1.523692054681879, "grad_norm": 0.1997435837984085, "learning_rate": 4.6028685428807896e-05, "loss": 0.5349, "step": 7412 }, { "epoch": 1.5238976256552574, "grad_norm": 0.19210022687911987, "learning_rate": 4.6018699906400996e-05, "loss": 0.5452, "step": 7413 }, { "epoch": 1.524103196628636, "grad_norm": 0.19292627274990082, "learning_rate": 4.6008714333807496e-05, "loss": 0.5605, "step": 7414 }, { "epoch": 1.5243087676020146, "grad_norm": 0.18850092589855194, "learning_rate": 4.599872871151937e-05, "loss": 0.5521, "step": 7415 }, { "epoch": 1.5245143385753932, "grad_norm": 0.19602644443511963, "learning_rate": 4.5988743040028554e-05, "loss": 0.55, "step": 7416 }, { "epoch": 1.5247199095487716, "grad_norm": 0.19302399456501007, "learning_rate": 4.597875731982697e-05, "loss": 0.5361, "step": 7417 }, { "epoch": 1.5249254805221502, "grad_norm": 0.16675427556037903, "learning_rate": 4.596877155140661e-05, "loss": 0.5136, "step": 7418 }, { "epoch": 1.5251310514955287, "grad_norm": 0.15877321362495422, "learning_rate": 4.59587857352594e-05, "loss": 0.5591, "step": 7419 }, { "epoch": 1.5253366224689073, "grad_norm": 0.16738201677799225, "learning_rate": 4.594879987187729e-05, "loss": 0.5191, "step": 7420 }, { "epoch": 1.525542193442286, "grad_norm": 0.16919690370559692, "learning_rate": 4.5938813961752254e-05, "loss": 0.5439, "step": 7421 }, { "epoch": 1.5257477644156645, "grad_norm": 0.15980926156044006, "learning_rate": 4.592882800537624e-05, "loss": 0.5099, "step": 7422 }, { "epoch": 1.525953335389043, "grad_norm": 0.1241704598069191, "learning_rate": 4.5918842003241195e-05, "loss": 0.5069, "step": 7423 }, { "epoch": 1.5261589063624217, "grad_norm": 0.1193804070353508, "learning_rate": 4.59088559558391e-05, "loss": 0.5091, "step": 7424 }, { "epoch": 1.5263644773358003, "grad_norm": 0.12635476887226105, "learning_rate": 4.589886986366194e-05, "loss": 0.5111, "step": 7425 }, { "epoch": 1.5265700483091789, "grad_norm": 0.11729497462511063, "learning_rate": 4.5888883727201665e-05, "loss": 0.5215, "step": 7426 }, { "epoch": 1.5267756192825575, "grad_norm": 0.16425076127052307, "learning_rate": 4.5878897546950225e-05, "loss": 0.5357, "step": 7427 }, { "epoch": 1.5269811902559358, "grad_norm": 0.20362845063209534, "learning_rate": 4.586891132339962e-05, "loss": 0.5392, "step": 7428 }, { "epoch": 1.5271867612293144, "grad_norm": 0.1934981644153595, "learning_rate": 4.585892505704182e-05, "loss": 0.5484, "step": 7429 }, { "epoch": 1.527392332202693, "grad_norm": 0.19643427431583405, "learning_rate": 4.584893874836879e-05, "loss": 0.5564, "step": 7430 }, { "epoch": 1.5275979031760716, "grad_norm": 0.1882271021604538, "learning_rate": 4.583895239787251e-05, "loss": 0.5667, "step": 7431 }, { "epoch": 1.52780347414945, "grad_norm": 0.15838836133480072, "learning_rate": 4.5828966006044974e-05, "loss": 0.5059, "step": 7432 }, { "epoch": 1.5280090451228285, "grad_norm": 0.16002227365970612, "learning_rate": 4.581897957337817e-05, "loss": 0.5405, "step": 7433 }, { "epoch": 1.5282146160962071, "grad_norm": 0.19433261454105377, "learning_rate": 4.5808993100364055e-05, "loss": 0.5678, "step": 7434 }, { "epoch": 1.5284201870695857, "grad_norm": 0.16582860052585602, "learning_rate": 4.579900658749462e-05, "loss": 0.5538, "step": 7435 }, { "epoch": 1.5286257580429643, "grad_norm": 0.1574729084968567, "learning_rate": 4.5789020035261886e-05, "loss": 0.5472, "step": 7436 }, { "epoch": 1.5288313290163429, "grad_norm": 0.20113399624824524, "learning_rate": 4.577903344415781e-05, "loss": 0.568, "step": 7437 }, { "epoch": 1.5290368999897215, "grad_norm": 0.19250795245170593, "learning_rate": 4.57690468146744e-05, "loss": 0.548, "step": 7438 }, { "epoch": 1.5292424709631, "grad_norm": 0.1601334810256958, "learning_rate": 4.5759060147303655e-05, "loss": 0.4955, "step": 7439 }, { "epoch": 1.5294480419364787, "grad_norm": 0.16352780163288116, "learning_rate": 4.5749073442537566e-05, "loss": 0.5445, "step": 7440 }, { "epoch": 1.5296536129098572, "grad_norm": 0.1970401108264923, "learning_rate": 4.573908670086812e-05, "loss": 0.5818, "step": 7441 }, { "epoch": 1.5298591838832358, "grad_norm": 0.19766905903816223, "learning_rate": 4.572909992278734e-05, "loss": 0.5515, "step": 7442 }, { "epoch": 1.5300647548566142, "grad_norm": 0.19481036067008972, "learning_rate": 4.57191131087872e-05, "loss": 0.5512, "step": 7443 }, { "epoch": 1.5302703258299928, "grad_norm": 0.20617318153381348, "learning_rate": 4.570912625935972e-05, "loss": 0.5534, "step": 7444 }, { "epoch": 1.5304758968033714, "grad_norm": 0.20254306495189667, "learning_rate": 4.5699139374996906e-05, "loss": 0.5534, "step": 7445 }, { "epoch": 1.53068146777675, "grad_norm": 0.1929122805595398, "learning_rate": 4.568915245619076e-05, "loss": 0.5436, "step": 7446 }, { "epoch": 1.5308870387501283, "grad_norm": 0.19024674594402313, "learning_rate": 4.5679165503433306e-05, "loss": 0.5508, "step": 7447 }, { "epoch": 1.531092609723507, "grad_norm": 0.19227847456932068, "learning_rate": 4.5669178517216525e-05, "loss": 0.5456, "step": 7448 }, { "epoch": 1.5312981806968855, "grad_norm": 0.1958528608083725, "learning_rate": 4.5659191498032456e-05, "loss": 0.5482, "step": 7449 }, { "epoch": 1.531503751670264, "grad_norm": 0.19175393879413605, "learning_rate": 4.564920444637311e-05, "loss": 0.5557, "step": 7450 }, { "epoch": 1.5317093226436427, "grad_norm": 0.19114267826080322, "learning_rate": 4.5639217362730484e-05, "loss": 0.5439, "step": 7451 }, { "epoch": 1.5319148936170213, "grad_norm": 0.16341425478458405, "learning_rate": 4.56292302475966e-05, "loss": 0.507, "step": 7452 }, { "epoch": 1.5321204645903999, "grad_norm": 0.15693975985050201, "learning_rate": 4.56192431014635e-05, "loss": 0.5558, "step": 7453 }, { "epoch": 1.5323260355637784, "grad_norm": 0.21227800846099854, "learning_rate": 4.560925592482319e-05, "loss": 0.5398, "step": 7454 }, { "epoch": 1.532531606537157, "grad_norm": 0.19406823813915253, "learning_rate": 4.559926871816767e-05, "loss": 0.5334, "step": 7455 }, { "epoch": 1.5327371775105356, "grad_norm": 0.19032882153987885, "learning_rate": 4.558928148198898e-05, "loss": 0.5247, "step": 7456 }, { "epoch": 1.5329427484839142, "grad_norm": 0.19708728790283203, "learning_rate": 4.557929421677916e-05, "loss": 0.5549, "step": 7457 }, { "epoch": 1.5331483194572928, "grad_norm": 0.1929347962141037, "learning_rate": 4.556930692303021e-05, "loss": 0.5586, "step": 7458 }, { "epoch": 1.5333538904306712, "grad_norm": 0.19860495626926422, "learning_rate": 4.555931960123418e-05, "loss": 0.5539, "step": 7459 }, { "epoch": 1.5335594614040498, "grad_norm": 0.1928236037492752, "learning_rate": 4.554933225188308e-05, "loss": 0.5639, "step": 7460 }, { "epoch": 1.5337650323774283, "grad_norm": 0.19600355625152588, "learning_rate": 4.553934487546895e-05, "loss": 0.5587, "step": 7461 }, { "epoch": 1.5339706033508067, "grad_norm": 0.1872026026248932, "learning_rate": 4.5529357472483815e-05, "loss": 0.5292, "step": 7462 }, { "epoch": 1.5341761743241853, "grad_norm": 0.19457010924816132, "learning_rate": 4.551937004341971e-05, "loss": 0.5526, "step": 7463 }, { "epoch": 1.5343817452975639, "grad_norm": 0.19338703155517578, "learning_rate": 4.5509382588768684e-05, "loss": 0.5475, "step": 7464 }, { "epoch": 1.5345873162709425, "grad_norm": 0.16978971660137177, "learning_rate": 4.549939510902274e-05, "loss": 0.5315, "step": 7465 }, { "epoch": 1.534792887244321, "grad_norm": 0.16673077642917633, "learning_rate": 4.548940760467395e-05, "loss": 0.5475, "step": 7466 }, { "epoch": 1.5349984582176996, "grad_norm": 0.195562481880188, "learning_rate": 4.5479420076214315e-05, "loss": 0.5599, "step": 7467 }, { "epoch": 1.5352040291910782, "grad_norm": 0.1955966353416443, "learning_rate": 4.5469432524135913e-05, "loss": 0.5538, "step": 7468 }, { "epoch": 1.5354096001644568, "grad_norm": 0.20345093309879303, "learning_rate": 4.5459444948930754e-05, "loss": 0.5529, "step": 7469 }, { "epoch": 1.5356151711378354, "grad_norm": 0.16392046213150024, "learning_rate": 4.5449457351090896e-05, "loss": 0.53, "step": 7470 }, { "epoch": 1.535820742111214, "grad_norm": 0.1566355973482132, "learning_rate": 4.5439469731108383e-05, "loss": 0.5523, "step": 7471 }, { "epoch": 1.5360263130845926, "grad_norm": 0.1888071596622467, "learning_rate": 4.542948208947523e-05, "loss": 0.5527, "step": 7472 }, { "epoch": 1.5362318840579712, "grad_norm": 0.19896787405014038, "learning_rate": 4.5419494426683514e-05, "loss": 0.5568, "step": 7473 }, { "epoch": 1.5364374550313495, "grad_norm": 0.1599314957857132, "learning_rate": 4.5409506743225274e-05, "loss": 0.5418, "step": 7474 }, { "epoch": 1.5366430260047281, "grad_norm": 0.15871824324131012, "learning_rate": 4.5399519039592546e-05, "loss": 0.5393, "step": 7475 }, { "epoch": 1.5368485969781067, "grad_norm": 0.18515051901340485, "learning_rate": 4.538953131627737e-05, "loss": 0.5383, "step": 7476 }, { "epoch": 1.537054167951485, "grad_norm": 0.1832568496465683, "learning_rate": 4.5379543573771823e-05, "loss": 0.5393, "step": 7477 }, { "epoch": 1.5372597389248637, "grad_norm": 0.188548281788826, "learning_rate": 4.5369555812567926e-05, "loss": 0.5413, "step": 7478 }, { "epoch": 1.5374653098982423, "grad_norm": 0.16678757965564728, "learning_rate": 4.535956803315774e-05, "loss": 0.5216, "step": 7479 }, { "epoch": 1.5376708808716208, "grad_norm": 0.12842969596385956, "learning_rate": 4.534958023603333e-05, "loss": 0.5017, "step": 7480 }, { "epoch": 1.5378764518449994, "grad_norm": 0.16010682284832, "learning_rate": 4.5339592421686734e-05, "loss": 0.5213, "step": 7481 }, { "epoch": 1.538082022818378, "grad_norm": 0.20323491096496582, "learning_rate": 4.5329604590610004e-05, "loss": 0.5543, "step": 7482 }, { "epoch": 1.5382875937917566, "grad_norm": 0.19236190617084503, "learning_rate": 4.531961674329519e-05, "loss": 0.5641, "step": 7483 }, { "epoch": 1.5384931647651352, "grad_norm": 0.19376271963119507, "learning_rate": 4.5309628880234356e-05, "loss": 0.542, "step": 7484 }, { "epoch": 1.5386987357385138, "grad_norm": 0.18914787471294403, "learning_rate": 4.529964100191957e-05, "loss": 0.5481, "step": 7485 }, { "epoch": 1.5389043067118924, "grad_norm": 0.19532737135887146, "learning_rate": 4.5289653108842845e-05, "loss": 0.5634, "step": 7486 }, { "epoch": 1.539109877685271, "grad_norm": 0.1869991570711136, "learning_rate": 4.527966520149629e-05, "loss": 0.5536, "step": 7487 }, { "epoch": 1.5393154486586496, "grad_norm": 0.18661408126354218, "learning_rate": 4.526967728037191e-05, "loss": 0.5466, "step": 7488 }, { "epoch": 1.539521019632028, "grad_norm": 0.2640432119369507, "learning_rate": 4.525968934596181e-05, "loss": 0.5553, "step": 7489 }, { "epoch": 1.5397265906054065, "grad_norm": 0.20137301087379456, "learning_rate": 4.524970139875803e-05, "loss": 0.5563, "step": 7490 }, { "epoch": 1.539932161578785, "grad_norm": 0.17082248628139496, "learning_rate": 4.523971343925263e-05, "loss": 0.5198, "step": 7491 }, { "epoch": 1.5401377325521637, "grad_norm": 0.13131971657276154, "learning_rate": 4.5229725467937666e-05, "loss": 0.5375, "step": 7492 }, { "epoch": 1.540343303525542, "grad_norm": 0.16236910223960876, "learning_rate": 4.5219737485305194e-05, "loss": 0.5435, "step": 7493 }, { "epoch": 1.5405488744989206, "grad_norm": 0.19899526238441467, "learning_rate": 4.5209749491847295e-05, "loss": 0.5685, "step": 7494 }, { "epoch": 1.5407544454722992, "grad_norm": 0.19995881617069244, "learning_rate": 4.519976148805602e-05, "loss": 0.5646, "step": 7495 }, { "epoch": 1.5409600164456778, "grad_norm": 0.20216208696365356, "learning_rate": 4.518977347442341e-05, "loss": 0.5596, "step": 7496 }, { "epoch": 1.5411655874190564, "grad_norm": 0.17260567843914032, "learning_rate": 4.5179785451441574e-05, "loss": 0.5084, "step": 7497 }, { "epoch": 1.541371158392435, "grad_norm": 0.15725255012512207, "learning_rate": 4.516979741960254e-05, "loss": 0.5399, "step": 7498 }, { "epoch": 1.5415767293658136, "grad_norm": 0.1909477263689041, "learning_rate": 4.515980937939837e-05, "loss": 0.5416, "step": 7499 }, { "epoch": 1.5417823003391922, "grad_norm": 0.1896287351846695, "learning_rate": 4.514982133132114e-05, "loss": 0.5395, "step": 7500 }, { "epoch": 1.5419878713125708, "grad_norm": 0.188772514462471, "learning_rate": 4.5139833275862925e-05, "loss": 0.5456, "step": 7501 }, { "epoch": 1.5421934422859493, "grad_norm": 0.18162913620471954, "learning_rate": 4.5129845213515775e-05, "loss": 0.543, "step": 7502 }, { "epoch": 1.542399013259328, "grad_norm": 0.19076716899871826, "learning_rate": 4.511985714477175e-05, "loss": 0.5502, "step": 7503 }, { "epoch": 1.5426045842327063, "grad_norm": 0.20053020119667053, "learning_rate": 4.5109869070122946e-05, "loss": 0.5675, "step": 7504 }, { "epoch": 1.5428101552060849, "grad_norm": 0.19717735052108765, "learning_rate": 4.509988099006138e-05, "loss": 0.5525, "step": 7505 }, { "epoch": 1.5430157261794635, "grad_norm": 0.1972462683916092, "learning_rate": 4.5089892905079175e-05, "loss": 0.561, "step": 7506 }, { "epoch": 1.543221297152842, "grad_norm": 0.1987045705318451, "learning_rate": 4.507990481566833e-05, "loss": 0.5333, "step": 7507 }, { "epoch": 1.5434268681262204, "grad_norm": 0.18806061148643494, "learning_rate": 4.506991672232097e-05, "loss": 0.5213, "step": 7508 }, { "epoch": 1.543632439099599, "grad_norm": 0.19716767966747284, "learning_rate": 4.505992862552913e-05, "loss": 0.5605, "step": 7509 }, { "epoch": 1.5438380100729776, "grad_norm": 0.18911804258823395, "learning_rate": 4.50499405257849e-05, "loss": 0.559, "step": 7510 }, { "epoch": 1.5440435810463562, "grad_norm": 0.18609070777893066, "learning_rate": 4.5039952423580324e-05, "loss": 0.5176, "step": 7511 }, { "epoch": 1.5442491520197348, "grad_norm": 0.19210830330848694, "learning_rate": 4.502996431940748e-05, "loss": 0.5397, "step": 7512 }, { "epoch": 1.5444547229931134, "grad_norm": 0.1905742585659027, "learning_rate": 4.5019976213758434e-05, "loss": 0.5585, "step": 7513 }, { "epoch": 1.544660293966492, "grad_norm": 0.16525664925575256, "learning_rate": 4.500998810712525e-05, "loss": 0.5138, "step": 7514 }, { "epoch": 1.5448658649398705, "grad_norm": 0.16021090745925903, "learning_rate": 4.5e-05, "loss": 0.5536, "step": 7515 }, { "epoch": 1.5450714359132491, "grad_norm": 0.1621478945016861, "learning_rate": 4.499001189287476e-05, "loss": 0.5065, "step": 7516 }, { "epoch": 1.5452770068866277, "grad_norm": 0.19542866945266724, "learning_rate": 4.4980023786241585e-05, "loss": 0.5389, "step": 7517 }, { "epoch": 1.5454825778600063, "grad_norm": 0.18569281697273254, "learning_rate": 4.497003568059254e-05, "loss": 0.5289, "step": 7518 }, { "epoch": 1.5456881488333847, "grad_norm": 0.19323447346687317, "learning_rate": 4.496004757641968e-05, "loss": 0.5605, "step": 7519 }, { "epoch": 1.5458937198067633, "grad_norm": 0.18728816509246826, "learning_rate": 4.495005947421511e-05, "loss": 0.5522, "step": 7520 }, { "epoch": 1.5460992907801419, "grad_norm": 0.19524379074573517, "learning_rate": 4.4940071374470875e-05, "loss": 0.5501, "step": 7521 }, { "epoch": 1.5463048617535204, "grad_norm": 0.19686923921108246, "learning_rate": 4.4930083277679036e-05, "loss": 0.5574, "step": 7522 }, { "epoch": 1.5465104327268988, "grad_norm": 0.19316346943378448, "learning_rate": 4.492009518433167e-05, "loss": 0.5493, "step": 7523 }, { "epoch": 1.5467160037002774, "grad_norm": 0.19701054692268372, "learning_rate": 4.491010709492085e-05, "loss": 0.5269, "step": 7524 }, { "epoch": 1.546921574673656, "grad_norm": 0.1707211434841156, "learning_rate": 4.490011900993863e-05, "loss": 0.5326, "step": 7525 }, { "epoch": 1.5471271456470346, "grad_norm": 0.16687439382076263, "learning_rate": 4.489013092987706e-05, "loss": 0.5514, "step": 7526 }, { "epoch": 1.5473327166204132, "grad_norm": 0.1970919817686081, "learning_rate": 4.488014285522825e-05, "loss": 0.5512, "step": 7527 }, { "epoch": 1.5475382875937918, "grad_norm": 0.20226997137069702, "learning_rate": 4.487015478648423e-05, "loss": 0.5549, "step": 7528 }, { "epoch": 1.5477438585671703, "grad_norm": 0.1875869780778885, "learning_rate": 4.486016672413708e-05, "loss": 0.5532, "step": 7529 }, { "epoch": 1.547949429540549, "grad_norm": 0.19215047359466553, "learning_rate": 4.4850178668678864e-05, "loss": 0.5533, "step": 7530 }, { "epoch": 1.5481550005139275, "grad_norm": 0.18497878313064575, "learning_rate": 4.484019062060164e-05, "loss": 0.5389, "step": 7531 }, { "epoch": 1.548360571487306, "grad_norm": 0.18966837227344513, "learning_rate": 4.483020258039748e-05, "loss": 0.5352, "step": 7532 }, { "epoch": 1.5485661424606847, "grad_norm": 0.19131658971309662, "learning_rate": 4.482021454855844e-05, "loss": 0.5429, "step": 7533 }, { "epoch": 1.548771713434063, "grad_norm": 0.18846401572227478, "learning_rate": 4.481022652557658e-05, "loss": 0.5442, "step": 7534 }, { "epoch": 1.5489772844074416, "grad_norm": 0.16239413619041443, "learning_rate": 4.480023851194399e-05, "loss": 0.5047, "step": 7535 }, { "epoch": 1.5491828553808202, "grad_norm": 0.13217657804489136, "learning_rate": 4.479025050815272e-05, "loss": 0.4997, "step": 7536 }, { "epoch": 1.5493884263541988, "grad_norm": 0.12488622963428497, "learning_rate": 4.478026251469482e-05, "loss": 0.5081, "step": 7537 }, { "epoch": 1.5495939973275772, "grad_norm": 0.1763962060213089, "learning_rate": 4.477027453206236e-05, "loss": 0.5517, "step": 7538 }, { "epoch": 1.5497995683009558, "grad_norm": 0.20494931936264038, "learning_rate": 4.476028656074739e-05, "loss": 0.5535, "step": 7539 }, { "epoch": 1.5500051392743344, "grad_norm": 0.2072146087884903, "learning_rate": 4.4750298601241976e-05, "loss": 0.5409, "step": 7540 }, { "epoch": 1.550210710247713, "grad_norm": 0.1965474635362625, "learning_rate": 4.4740310654038194e-05, "loss": 0.5307, "step": 7541 }, { "epoch": 1.5504162812210915, "grad_norm": 0.16837544739246368, "learning_rate": 4.47303227196281e-05, "loss": 0.5289, "step": 7542 }, { "epoch": 1.5506218521944701, "grad_norm": 0.16805261373519897, "learning_rate": 4.4720334798503725e-05, "loss": 0.5413, "step": 7543 }, { "epoch": 1.5508274231678487, "grad_norm": 0.203588604927063, "learning_rate": 4.471034689115717e-05, "loss": 0.5474, "step": 7544 }, { "epoch": 1.5510329941412273, "grad_norm": 0.20456770062446594, "learning_rate": 4.470035899808046e-05, "loss": 0.5409, "step": 7545 }, { "epoch": 1.551238565114606, "grad_norm": 0.18718034029006958, "learning_rate": 4.469037111976566e-05, "loss": 0.537, "step": 7546 }, { "epoch": 1.5514441360879845, "grad_norm": 0.19375449419021606, "learning_rate": 4.4680383256704814e-05, "loss": 0.5322, "step": 7547 }, { "epoch": 1.551649707061363, "grad_norm": 0.23705141246318817, "learning_rate": 4.467039540939001e-05, "loss": 0.5616, "step": 7548 }, { "epoch": 1.5518552780347417, "grad_norm": 0.16841238737106323, "learning_rate": 4.466040757831328e-05, "loss": 0.4964, "step": 7549 }, { "epoch": 1.55206084900812, "grad_norm": 0.16423995792865753, "learning_rate": 4.465041976396668e-05, "loss": 0.5511, "step": 7550 }, { "epoch": 1.5522664199814986, "grad_norm": 0.1915719360113144, "learning_rate": 4.464043196684227e-05, "loss": 0.5412, "step": 7551 }, { "epoch": 1.5524719909548772, "grad_norm": 0.19022904336452484, "learning_rate": 4.463044418743209e-05, "loss": 0.5372, "step": 7552 }, { "epoch": 1.5526775619282556, "grad_norm": 0.19907855987548828, "learning_rate": 4.4620456426228196e-05, "loss": 0.5657, "step": 7553 }, { "epoch": 1.5528831329016342, "grad_norm": 0.1949799507856369, "learning_rate": 4.461046868372264e-05, "loss": 0.5452, "step": 7554 }, { "epoch": 1.5530887038750127, "grad_norm": 0.1677858829498291, "learning_rate": 4.4600480960407467e-05, "loss": 0.5087, "step": 7555 }, { "epoch": 1.5532942748483913, "grad_norm": 0.1660327911376953, "learning_rate": 4.459049325677474e-05, "loss": 0.5361, "step": 7556 }, { "epoch": 1.55349984582177, "grad_norm": 0.16196422278881073, "learning_rate": 4.45805055733165e-05, "loss": 0.5322, "step": 7557 }, { "epoch": 1.5537054167951485, "grad_norm": 0.1612974852323532, "learning_rate": 4.457051791052478e-05, "loss": 0.5549, "step": 7558 }, { "epoch": 1.553910987768527, "grad_norm": 0.19015921652317047, "learning_rate": 4.456053026889164e-05, "loss": 0.5375, "step": 7559 }, { "epoch": 1.5541165587419057, "grad_norm": 0.19856490194797516, "learning_rate": 4.45505426489091e-05, "loss": 0.5626, "step": 7560 }, { "epoch": 1.5543221297152843, "grad_norm": 0.18954843282699585, "learning_rate": 4.454055505106925e-05, "loss": 0.5461, "step": 7561 }, { "epoch": 1.5545277006886629, "grad_norm": 0.16355063021183014, "learning_rate": 4.45305674758641e-05, "loss": 0.5308, "step": 7562 }, { "epoch": 1.5547332716620414, "grad_norm": 0.16068147122859955, "learning_rate": 4.452057992378569e-05, "loss": 0.5596, "step": 7563 }, { "epoch": 1.55493884263542, "grad_norm": 0.18733803927898407, "learning_rate": 4.4510592395326064e-05, "loss": 0.5618, "step": 7564 }, { "epoch": 1.5551444136087984, "grad_norm": 0.16565637290477753, "learning_rate": 4.4500604890977264e-05, "loss": 0.533, "step": 7565 }, { "epoch": 1.555349984582177, "grad_norm": 0.1654541790485382, "learning_rate": 4.449061741123134e-05, "loss": 0.5562, "step": 7566 }, { "epoch": 1.5555555555555556, "grad_norm": 0.20242147147655487, "learning_rate": 4.448062995658028e-05, "loss": 0.5494, "step": 7567 }, { "epoch": 1.5557611265289342, "grad_norm": 0.19619537889957428, "learning_rate": 4.447064252751619e-05, "loss": 0.5455, "step": 7568 }, { "epoch": 1.5559666975023125, "grad_norm": 0.16296258568763733, "learning_rate": 4.446065512453106e-05, "loss": 0.5202, "step": 7569 }, { "epoch": 1.5561722684756911, "grad_norm": 0.15891185402870178, "learning_rate": 4.4450667748116935e-05, "loss": 0.5455, "step": 7570 }, { "epoch": 1.5563778394490697, "grad_norm": 0.19792260229587555, "learning_rate": 4.444068039876584e-05, "loss": 0.5495, "step": 7571 }, { "epoch": 1.5565834104224483, "grad_norm": 0.19216637313365936, "learning_rate": 4.4430693076969805e-05, "loss": 0.5576, "step": 7572 }, { "epoch": 1.5567889813958269, "grad_norm": 0.18915432691574097, "learning_rate": 4.442070578322086e-05, "loss": 0.5269, "step": 7573 }, { "epoch": 1.5569945523692055, "grad_norm": 0.19710315763950348, "learning_rate": 4.441071851801102e-05, "loss": 0.589, "step": 7574 }, { "epoch": 1.557200123342584, "grad_norm": 0.19663040339946747, "learning_rate": 4.4400731281832346e-05, "loss": 0.5445, "step": 7575 }, { "epoch": 1.5574056943159627, "grad_norm": 0.16456833481788635, "learning_rate": 4.4390744075176826e-05, "loss": 0.5084, "step": 7576 }, { "epoch": 1.5576112652893412, "grad_norm": 0.16168387234210968, "learning_rate": 4.438075689853651e-05, "loss": 0.5335, "step": 7577 }, { "epoch": 1.5578168362627198, "grad_norm": 0.19194790720939636, "learning_rate": 4.43707697524034e-05, "loss": 0.5517, "step": 7578 }, { "epoch": 1.5580224072360984, "grad_norm": 0.19601012766361237, "learning_rate": 4.4360782637269535e-05, "loss": 0.5568, "step": 7579 }, { "epoch": 1.5582279782094768, "grad_norm": 0.18594755232334137, "learning_rate": 4.435079555362691e-05, "loss": 0.5313, "step": 7580 }, { "epoch": 1.5584335491828554, "grad_norm": 0.16498349606990814, "learning_rate": 4.434080850196754e-05, "loss": 0.5261, "step": 7581 }, { "epoch": 1.558639120156234, "grad_norm": 0.15921123325824738, "learning_rate": 4.433082148278348e-05, "loss": 0.5481, "step": 7582 }, { "epoch": 1.5588446911296125, "grad_norm": 0.19702661037445068, "learning_rate": 4.4320834496566706e-05, "loss": 0.565, "step": 7583 }, { "epoch": 1.559050262102991, "grad_norm": 0.19030775129795074, "learning_rate": 4.431084754380925e-05, "loss": 0.5561, "step": 7584 }, { "epoch": 1.5592558330763695, "grad_norm": 0.19048479199409485, "learning_rate": 4.43008606250031e-05, "loss": 0.5367, "step": 7585 }, { "epoch": 1.559461404049748, "grad_norm": 0.189329594373703, "learning_rate": 4.429087374064029e-05, "loss": 0.5271, "step": 7586 }, { "epoch": 1.5596669750231267, "grad_norm": 0.1947106570005417, "learning_rate": 4.428088689121282e-05, "loss": 0.5415, "step": 7587 }, { "epoch": 1.5598725459965053, "grad_norm": 0.19340308010578156, "learning_rate": 4.427090007721267e-05, "loss": 0.5465, "step": 7588 }, { "epoch": 1.5600781169698839, "grad_norm": 0.19165843725204468, "learning_rate": 4.4260913299131885e-05, "loss": 0.5478, "step": 7589 }, { "epoch": 1.5602836879432624, "grad_norm": 0.20227845013141632, "learning_rate": 4.425092655746244e-05, "loss": 0.5432, "step": 7590 }, { "epoch": 1.560489258916641, "grad_norm": 0.20343764126300812, "learning_rate": 4.424093985269635e-05, "loss": 0.5508, "step": 7591 }, { "epoch": 1.5606948298900196, "grad_norm": 0.19420337677001953, "learning_rate": 4.423095318532561e-05, "loss": 0.5483, "step": 7592 }, { "epoch": 1.5609004008633982, "grad_norm": 0.19176806509494781, "learning_rate": 4.42209665558422e-05, "loss": 0.5431, "step": 7593 }, { "epoch": 1.5611059718367768, "grad_norm": 0.1622324138879776, "learning_rate": 4.421097996473813e-05, "loss": 0.5213, "step": 7594 }, { "epoch": 1.5613115428101552, "grad_norm": 0.1601867824792862, "learning_rate": 4.420099341250538e-05, "loss": 0.5538, "step": 7595 }, { "epoch": 1.5615171137835338, "grad_norm": 0.1894841194152832, "learning_rate": 4.4191006899635964e-05, "loss": 0.5515, "step": 7596 }, { "epoch": 1.5617226847569123, "grad_norm": 0.15804892778396606, "learning_rate": 4.418102042662184e-05, "loss": 0.493, "step": 7597 }, { "epoch": 1.561928255730291, "grad_norm": 0.15905854105949402, "learning_rate": 4.417103399395503e-05, "loss": 0.5405, "step": 7598 }, { "epoch": 1.5621338267036693, "grad_norm": 0.19244399666786194, "learning_rate": 4.4161047602127494e-05, "loss": 0.5372, "step": 7599 }, { "epoch": 1.5623393976770479, "grad_norm": 0.18696913123130798, "learning_rate": 4.415106125163123e-05, "loss": 0.534, "step": 7600 }, { "epoch": 1.5625449686504265, "grad_norm": 0.19538486003875732, "learning_rate": 4.41410749429582e-05, "loss": 0.5348, "step": 7601 }, { "epoch": 1.562750539623805, "grad_norm": 0.19690623879432678, "learning_rate": 4.4131088676600386e-05, "loss": 0.5461, "step": 7602 }, { "epoch": 1.5629561105971836, "grad_norm": 0.19831502437591553, "learning_rate": 4.412110245304978e-05, "loss": 0.5541, "step": 7603 }, { "epoch": 1.5631616815705622, "grad_norm": 0.20122960209846497, "learning_rate": 4.411111627279835e-05, "loss": 0.5473, "step": 7604 }, { "epoch": 1.5633672525439408, "grad_norm": 0.1640729159116745, "learning_rate": 4.410113013633807e-05, "loss": 0.5054, "step": 7605 }, { "epoch": 1.5635728235173194, "grad_norm": 0.16052688658237457, "learning_rate": 4.4091144044160905e-05, "loss": 0.5322, "step": 7606 }, { "epoch": 1.563778394490698, "grad_norm": 0.19739840924739838, "learning_rate": 4.408115799675881e-05, "loss": 0.5606, "step": 7607 }, { "epoch": 1.5639839654640766, "grad_norm": 0.19876334071159363, "learning_rate": 4.407117199462378e-05, "loss": 0.5147, "step": 7608 }, { "epoch": 1.5641895364374552, "grad_norm": 0.19272910058498383, "learning_rate": 4.406118603824775e-05, "loss": 0.5433, "step": 7609 }, { "epoch": 1.5643951074108335, "grad_norm": 0.1927374005317688, "learning_rate": 4.4051200128122715e-05, "loss": 0.5351, "step": 7610 }, { "epoch": 1.5646006783842121, "grad_norm": 0.19942370057106018, "learning_rate": 4.404121426474061e-05, "loss": 0.543, "step": 7611 }, { "epoch": 1.5648062493575907, "grad_norm": 0.15870188176631927, "learning_rate": 4.4031228448593395e-05, "loss": 0.5113, "step": 7612 }, { "epoch": 1.5650118203309693, "grad_norm": 0.1612454354763031, "learning_rate": 4.402124268017303e-05, "loss": 0.54, "step": 7613 }, { "epoch": 1.5652173913043477, "grad_norm": 0.19843849539756775, "learning_rate": 4.4011256959971465e-05, "loss": 0.5468, "step": 7614 }, { "epoch": 1.5654229622777263, "grad_norm": 0.1602935492992401, "learning_rate": 4.400127128848065e-05, "loss": 0.5168, "step": 7615 }, { "epoch": 1.5656285332511048, "grad_norm": 0.18167522549629211, "learning_rate": 4.39912856661925e-05, "loss": 0.5568, "step": 7616 }, { "epoch": 1.5658341042244834, "grad_norm": 0.16602426767349243, "learning_rate": 4.398130009359902e-05, "loss": 0.5254, "step": 7617 }, { "epoch": 1.566039675197862, "grad_norm": 0.16260112822055817, "learning_rate": 4.397131457119212e-05, "loss": 0.5646, "step": 7618 }, { "epoch": 1.5662452461712406, "grad_norm": 0.19944046437740326, "learning_rate": 4.396132909946373e-05, "loss": 0.5459, "step": 7619 }, { "epoch": 1.5664508171446192, "grad_norm": 0.19292668998241425, "learning_rate": 4.3951343678905816e-05, "loss": 0.5421, "step": 7620 }, { "epoch": 1.5666563881179978, "grad_norm": 0.19421285390853882, "learning_rate": 4.3941358310010295e-05, "loss": 0.5649, "step": 7621 }, { "epoch": 1.5668619590913764, "grad_norm": 0.1894664317369461, "learning_rate": 4.393137299326911e-05, "loss": 0.5683, "step": 7622 }, { "epoch": 1.567067530064755, "grad_norm": 0.18972072005271912, "learning_rate": 4.392138772917415e-05, "loss": 0.5459, "step": 7623 }, { "epoch": 1.5672731010381336, "grad_norm": 0.16586807370185852, "learning_rate": 4.39114025182174e-05, "loss": 0.5409, "step": 7624 }, { "epoch": 1.567478672011512, "grad_norm": 0.13293050229549408, "learning_rate": 4.390141736089076e-05, "loss": 0.5069, "step": 7625 }, { "epoch": 1.5676842429848905, "grad_norm": 0.15764681994915009, "learning_rate": 4.389143225768616e-05, "loss": 0.5475, "step": 7626 }, { "epoch": 1.567889813958269, "grad_norm": 0.1995992809534073, "learning_rate": 4.3881447209095495e-05, "loss": 0.5426, "step": 7627 }, { "epoch": 1.5680953849316477, "grad_norm": 0.1619638353586197, "learning_rate": 4.3871462215610696e-05, "loss": 0.5103, "step": 7628 }, { "epoch": 1.568300955905026, "grad_norm": 0.16626045107841492, "learning_rate": 4.386147727772369e-05, "loss": 0.5347, "step": 7629 }, { "epoch": 1.5685065268784046, "grad_norm": 0.20278498530387878, "learning_rate": 4.3851492395926364e-05, "loss": 0.5572, "step": 7630 }, { "epoch": 1.5687120978517832, "grad_norm": 0.2107708603143692, "learning_rate": 4.384150757071064e-05, "loss": 0.5623, "step": 7631 }, { "epoch": 1.5689176688251618, "grad_norm": 0.19431017339229584, "learning_rate": 4.383152280256844e-05, "loss": 0.5589, "step": 7632 }, { "epoch": 1.5691232397985404, "grad_norm": 0.1882307529449463, "learning_rate": 4.3821538091991645e-05, "loss": 0.5481, "step": 7633 }, { "epoch": 1.569328810771919, "grad_norm": 0.19112688302993774, "learning_rate": 4.3811553439472166e-05, "loss": 0.5419, "step": 7634 }, { "epoch": 1.5695343817452976, "grad_norm": 0.19997398555278778, "learning_rate": 4.380156884550188e-05, "loss": 0.5692, "step": 7635 }, { "epoch": 1.5697399527186762, "grad_norm": 0.19339673221111298, "learning_rate": 4.3791584310572686e-05, "loss": 0.5366, "step": 7636 }, { "epoch": 1.5699455236920548, "grad_norm": 0.18707948923110962, "learning_rate": 4.3781599835176504e-05, "loss": 0.5303, "step": 7637 }, { "epoch": 1.5701510946654333, "grad_norm": 0.1914735585451126, "learning_rate": 4.37716154198052e-05, "loss": 0.5569, "step": 7638 }, { "epoch": 1.570356665638812, "grad_norm": 0.19773781299591064, "learning_rate": 4.376163106495067e-05, "loss": 0.5482, "step": 7639 }, { "epoch": 1.5705622366121905, "grad_norm": 0.17177283763885498, "learning_rate": 4.3751646771104774e-05, "loss": 0.5203, "step": 7640 }, { "epoch": 1.5707678075855689, "grad_norm": 0.16656096279621124, "learning_rate": 4.374166253875942e-05, "loss": 0.5528, "step": 7641 }, { "epoch": 1.5709733785589475, "grad_norm": 0.19667677581310272, "learning_rate": 4.3731678368406464e-05, "loss": 0.5588, "step": 7642 }, { "epoch": 1.571178949532326, "grad_norm": 0.15893961489200592, "learning_rate": 4.372169426053777e-05, "loss": 0.5165, "step": 7643 }, { "epoch": 1.5713845205057044, "grad_norm": 0.15546555817127228, "learning_rate": 4.371171021564525e-05, "loss": 0.5631, "step": 7644 }, { "epoch": 1.571590091479083, "grad_norm": 0.16072389483451843, "learning_rate": 4.3701726234220744e-05, "loss": 0.5273, "step": 7645 }, { "epoch": 1.5717956624524616, "grad_norm": 0.15544024109840393, "learning_rate": 4.369174231675611e-05, "loss": 0.5508, "step": 7646 }, { "epoch": 1.5720012334258402, "grad_norm": 0.15451103448867798, "learning_rate": 4.3681758463743225e-05, "loss": 0.5066, "step": 7647 }, { "epoch": 1.5722068043992188, "grad_norm": 0.15433375537395477, "learning_rate": 4.367177467567394e-05, "loss": 0.5444, "step": 7648 }, { "epoch": 1.5724123753725974, "grad_norm": 0.16077595949172974, "learning_rate": 4.36617909530401e-05, "loss": 0.5234, "step": 7649 }, { "epoch": 1.572617946345976, "grad_norm": 0.15683984756469727, "learning_rate": 4.3651807296333555e-05, "loss": 0.5316, "step": 7650 }, { "epoch": 1.5728235173193545, "grad_norm": 0.1868003010749817, "learning_rate": 4.3641823706046186e-05, "loss": 0.5313, "step": 7651 }, { "epoch": 1.5730290882927331, "grad_norm": 0.1609300971031189, "learning_rate": 4.363184018266979e-05, "loss": 0.5225, "step": 7652 }, { "epoch": 1.5732346592661117, "grad_norm": 0.15994325280189514, "learning_rate": 4.362185672669626e-05, "loss": 0.5298, "step": 7653 }, { "epoch": 1.5734402302394903, "grad_norm": 0.1932908594608307, "learning_rate": 4.3611873338617393e-05, "loss": 0.5419, "step": 7654 }, { "epoch": 1.573645801212869, "grad_norm": 0.1590869426727295, "learning_rate": 4.3601890018925046e-05, "loss": 0.5014, "step": 7655 }, { "epoch": 1.5738513721862473, "grad_norm": 0.16261689364910126, "learning_rate": 4.359190676811104e-05, "loss": 0.5592, "step": 7656 }, { "epoch": 1.5740569431596259, "grad_norm": 0.20458675920963287, "learning_rate": 4.3581923586667196e-05, "loss": 0.5704, "step": 7657 }, { "epoch": 1.5742625141330044, "grad_norm": 0.189193993806839, "learning_rate": 4.3571940475085355e-05, "loss": 0.5508, "step": 7658 }, { "epoch": 1.574468085106383, "grad_norm": 0.18907295167446136, "learning_rate": 4.356195743385734e-05, "loss": 0.5312, "step": 7659 }, { "epoch": 1.5746736560797614, "grad_norm": 0.18863658607006073, "learning_rate": 4.3551974463474956e-05, "loss": 0.5668, "step": 7660 }, { "epoch": 1.57487922705314, "grad_norm": 0.1917717009782791, "learning_rate": 4.354199156443002e-05, "loss": 0.5327, "step": 7661 }, { "epoch": 1.5750847980265186, "grad_norm": 0.19521358609199524, "learning_rate": 4.353200873721435e-05, "loss": 0.5242, "step": 7662 }, { "epoch": 1.5752903689998972, "grad_norm": 0.19762447476387024, "learning_rate": 4.352202598231975e-05, "loss": 0.5609, "step": 7663 }, { "epoch": 1.5754959399732757, "grad_norm": 0.19108183681964874, "learning_rate": 4.3512043300237994e-05, "loss": 0.5453, "step": 7664 }, { "epoch": 1.5757015109466543, "grad_norm": 0.19864460825920105, "learning_rate": 4.3502060691460935e-05, "loss": 0.5685, "step": 7665 }, { "epoch": 1.575907081920033, "grad_norm": 0.1909678727388382, "learning_rate": 4.349207815648035e-05, "loss": 0.5466, "step": 7666 }, { "epoch": 1.5761126528934115, "grad_norm": 0.19452133774757385, "learning_rate": 4.348209569578802e-05, "loss": 0.5522, "step": 7667 }, { "epoch": 1.57631822386679, "grad_norm": 0.1838688850402832, "learning_rate": 4.3472113309875744e-05, "loss": 0.5366, "step": 7668 }, { "epoch": 1.5765237948401687, "grad_norm": 0.18900097906589508, "learning_rate": 4.3462130999235295e-05, "loss": 0.5301, "step": 7669 }, { "epoch": 1.5767293658135473, "grad_norm": 0.19407951831817627, "learning_rate": 4.345214876435847e-05, "loss": 0.544, "step": 7670 }, { "epoch": 1.5769349367869256, "grad_norm": 0.19032980501651764, "learning_rate": 4.344216660573703e-05, "loss": 0.5525, "step": 7671 }, { "epoch": 1.5771405077603042, "grad_norm": 0.19637268781661987, "learning_rate": 4.343218452386277e-05, "loss": 0.5492, "step": 7672 }, { "epoch": 1.5773460787336828, "grad_norm": 0.18958862125873566, "learning_rate": 4.342220251922744e-05, "loss": 0.5393, "step": 7673 }, { "epoch": 1.5775516497070614, "grad_norm": 0.1648726463317871, "learning_rate": 4.341222059232283e-05, "loss": 0.4955, "step": 7674 }, { "epoch": 1.5777572206804398, "grad_norm": 0.16251088678836823, "learning_rate": 4.340223874364069e-05, "loss": 0.5312, "step": 7675 }, { "epoch": 1.5779627916538184, "grad_norm": 0.19399689137935638, "learning_rate": 4.3392256973672776e-05, "loss": 0.5527, "step": 7676 }, { "epoch": 1.578168362627197, "grad_norm": 0.1864946484565735, "learning_rate": 4.338227528291085e-05, "loss": 0.5352, "step": 7677 }, { "epoch": 1.5783739336005755, "grad_norm": 0.19393518567085266, "learning_rate": 4.337229367184664e-05, "loss": 0.5451, "step": 7678 }, { "epoch": 1.5785795045739541, "grad_norm": 0.19147159159183502, "learning_rate": 4.3362312140971927e-05, "loss": 0.5515, "step": 7679 }, { "epoch": 1.5787850755473327, "grad_norm": 0.19576434791088104, "learning_rate": 4.3352330690778445e-05, "loss": 0.5504, "step": 7680 }, { "epoch": 1.5789906465207113, "grad_norm": 0.19198796153068542, "learning_rate": 4.3342349321757934e-05, "loss": 0.5452, "step": 7681 }, { "epoch": 1.57919621749409, "grad_norm": 0.19014614820480347, "learning_rate": 4.3332368034402105e-05, "loss": 0.5615, "step": 7682 }, { "epoch": 1.5794017884674685, "grad_norm": 0.1940838247537613, "learning_rate": 4.332238682920272e-05, "loss": 0.5369, "step": 7683 }, { "epoch": 1.579607359440847, "grad_norm": 0.1929844617843628, "learning_rate": 4.3312405706651496e-05, "loss": 0.5502, "step": 7684 }, { "epoch": 1.5798129304142257, "grad_norm": 0.1682363599538803, "learning_rate": 4.330242466724014e-05, "loss": 0.5245, "step": 7685 }, { "epoch": 1.580018501387604, "grad_norm": 0.14466369152069092, "learning_rate": 4.32924437114604e-05, "loss": 0.5162, "step": 7686 }, { "epoch": 1.5802240723609826, "grad_norm": 0.16307014226913452, "learning_rate": 4.3282462839803976e-05, "loss": 0.5432, "step": 7687 }, { "epoch": 1.5804296433343612, "grad_norm": 0.1943131685256958, "learning_rate": 4.3272482052762584e-05, "loss": 0.5377, "step": 7688 }, { "epoch": 1.5806352143077398, "grad_norm": 0.1945241242647171, "learning_rate": 4.3262501350827925e-05, "loss": 0.5425, "step": 7689 }, { "epoch": 1.5808407852811182, "grad_norm": 0.1876905858516693, "learning_rate": 4.3252520734491706e-05, "loss": 0.5435, "step": 7690 }, { "epoch": 1.5810463562544967, "grad_norm": 0.18484771251678467, "learning_rate": 4.3242540204245625e-05, "loss": 0.5292, "step": 7691 }, { "epoch": 1.5812519272278753, "grad_norm": 0.19600705802440643, "learning_rate": 4.323255976058135e-05, "loss": 0.5593, "step": 7692 }, { "epoch": 1.581457498201254, "grad_norm": 0.18599645793437958, "learning_rate": 4.3222579403990614e-05, "loss": 0.5226, "step": 7693 }, { "epoch": 1.5816630691746325, "grad_norm": 0.18677088618278503, "learning_rate": 4.321259913496508e-05, "loss": 0.517, "step": 7694 }, { "epoch": 1.581868640148011, "grad_norm": 0.19142663478851318, "learning_rate": 4.3202618953996425e-05, "loss": 0.5486, "step": 7695 }, { "epoch": 1.5820742111213897, "grad_norm": 0.19013050198554993, "learning_rate": 4.319263886157634e-05, "loss": 0.5584, "step": 7696 }, { "epoch": 1.5822797820947683, "grad_norm": 0.1859898418188095, "learning_rate": 4.31826588581965e-05, "loss": 0.5319, "step": 7697 }, { "epoch": 1.5824853530681469, "grad_norm": 0.19170920550823212, "learning_rate": 4.3172678944348556e-05, "loss": 0.5519, "step": 7698 }, { "epoch": 1.5826909240415254, "grad_norm": 0.21785251796245575, "learning_rate": 4.3162699120524165e-05, "loss": 0.5545, "step": 7699 }, { "epoch": 1.582896495014904, "grad_norm": 0.19362372159957886, "learning_rate": 4.3152719387215016e-05, "loss": 0.5692, "step": 7700 }, { "epoch": 1.5831020659882824, "grad_norm": 0.19510303437709808, "learning_rate": 4.3142739744912754e-05, "loss": 0.53, "step": 7701 }, { "epoch": 1.583307636961661, "grad_norm": 0.18352247774600983, "learning_rate": 4.3132760194109017e-05, "loss": 0.5042, "step": 7702 }, { "epoch": 1.5835132079350396, "grad_norm": 0.17170487344264984, "learning_rate": 4.312278073529546e-05, "loss": 0.5543, "step": 7703 }, { "epoch": 1.5837187789084182, "grad_norm": 0.19856008887290955, "learning_rate": 4.311280136896372e-05, "loss": 0.5696, "step": 7704 }, { "epoch": 1.5839243498817965, "grad_norm": 0.19567757844924927, "learning_rate": 4.310282209560543e-05, "loss": 0.5493, "step": 7705 }, { "epoch": 1.5841299208551751, "grad_norm": 0.20032745599746704, "learning_rate": 4.309284291571223e-05, "loss": 0.5603, "step": 7706 }, { "epoch": 1.5843354918285537, "grad_norm": 0.19758538901805878, "learning_rate": 4.308286382977575e-05, "loss": 0.5574, "step": 7707 }, { "epoch": 1.5845410628019323, "grad_norm": 0.1984431892633438, "learning_rate": 4.3072884838287605e-05, "loss": 0.5502, "step": 7708 }, { "epoch": 1.5847466337753109, "grad_norm": 0.18602418899536133, "learning_rate": 4.306290594173942e-05, "loss": 0.5592, "step": 7709 }, { "epoch": 1.5849522047486895, "grad_norm": 0.19030845165252686, "learning_rate": 4.3052927140622814e-05, "loss": 0.5444, "step": 7710 }, { "epoch": 1.585157775722068, "grad_norm": 0.1725304126739502, "learning_rate": 4.304294843542938e-05, "loss": 0.5359, "step": 7711 }, { "epoch": 1.5853633466954467, "grad_norm": 0.16047422587871552, "learning_rate": 4.3032969826650714e-05, "loss": 0.5433, "step": 7712 }, { "epoch": 1.5855689176688252, "grad_norm": 0.19161836802959442, "learning_rate": 4.302299131477844e-05, "loss": 0.5271, "step": 7713 }, { "epoch": 1.5857744886422038, "grad_norm": 0.15936709940433502, "learning_rate": 4.301301290030415e-05, "loss": 0.542, "step": 7714 }, { "epoch": 1.5859800596155824, "grad_norm": 0.16099698841571808, "learning_rate": 4.3003034583719435e-05, "loss": 0.5483, "step": 7715 }, { "epoch": 1.586185630588961, "grad_norm": 0.19221562147140503, "learning_rate": 4.299305636551585e-05, "loss": 0.5501, "step": 7716 }, { "epoch": 1.5863912015623394, "grad_norm": 0.15940634906291962, "learning_rate": 4.2983078246185015e-05, "loss": 0.5228, "step": 7717 }, { "epoch": 1.586596772535718, "grad_norm": 0.15413826704025269, "learning_rate": 4.297310022621849e-05, "loss": 0.5451, "step": 7718 }, { "epoch": 1.5868023435090965, "grad_norm": 0.18997204303741455, "learning_rate": 4.2963122306107816e-05, "loss": 0.5558, "step": 7719 }, { "epoch": 1.587007914482475, "grad_norm": 0.18921561539173126, "learning_rate": 4.295314448634461e-05, "loss": 0.5325, "step": 7720 }, { "epoch": 1.5872134854558535, "grad_norm": 0.19362856447696686, "learning_rate": 4.29431667674204e-05, "loss": 0.5475, "step": 7721 }, { "epoch": 1.587419056429232, "grad_norm": 0.1657908409833908, "learning_rate": 4.293318914982676e-05, "loss": 0.4907, "step": 7722 }, { "epoch": 1.5876246274026107, "grad_norm": 0.16281838715076447, "learning_rate": 4.2923211634055226e-05, "loss": 0.5385, "step": 7723 }, { "epoch": 1.5878301983759893, "grad_norm": 0.19449788331985474, "learning_rate": 4.291323422059735e-05, "loss": 0.5253, "step": 7724 }, { "epoch": 1.5880357693493679, "grad_norm": 0.19563239812850952, "learning_rate": 4.2903256909944665e-05, "loss": 0.5349, "step": 7725 }, { "epoch": 1.5882413403227464, "grad_norm": 0.19291435182094574, "learning_rate": 4.28932797025887e-05, "loss": 0.5294, "step": 7726 }, { "epoch": 1.588446911296125, "grad_norm": 0.21474219858646393, "learning_rate": 4.288330259902101e-05, "loss": 0.5389, "step": 7727 }, { "epoch": 1.5886524822695036, "grad_norm": 0.19437165558338165, "learning_rate": 4.28733255997331e-05, "loss": 0.5459, "step": 7728 }, { "epoch": 1.5888580532428822, "grad_norm": 0.18734323978424072, "learning_rate": 4.2863348705216516e-05, "loss": 0.5381, "step": 7729 }, { "epoch": 1.5890636242162608, "grad_norm": 0.19250360131263733, "learning_rate": 4.285337191596274e-05, "loss": 0.5357, "step": 7730 }, { "epoch": 1.5892691951896394, "grad_norm": 0.19198143482208252, "learning_rate": 4.284339523246331e-05, "loss": 0.5375, "step": 7731 }, { "epoch": 1.5894747661630177, "grad_norm": 0.18675874173641205, "learning_rate": 4.2833418655209703e-05, "loss": 0.5385, "step": 7732 }, { "epoch": 1.5896803371363963, "grad_norm": 0.19431853294372559, "learning_rate": 4.282344218469342e-05, "loss": 0.5468, "step": 7733 }, { "epoch": 1.589885908109775, "grad_norm": 0.16220088303089142, "learning_rate": 4.281346582140599e-05, "loss": 0.5035, "step": 7734 }, { "epoch": 1.5900914790831535, "grad_norm": 0.1646573841571808, "learning_rate": 4.2803489565838874e-05, "loss": 0.5567, "step": 7735 }, { "epoch": 1.5902970500565319, "grad_norm": 0.16230642795562744, "learning_rate": 4.2793513418483565e-05, "loss": 0.5234, "step": 7736 }, { "epoch": 1.5905026210299105, "grad_norm": 0.15772514045238495, "learning_rate": 4.2783537379831524e-05, "loss": 0.5393, "step": 7737 }, { "epoch": 1.590708192003289, "grad_norm": 0.18681733310222626, "learning_rate": 4.277356145037425e-05, "loss": 0.5332, "step": 7738 }, { "epoch": 1.5909137629766676, "grad_norm": 0.19121171534061432, "learning_rate": 4.276358563060319e-05, "loss": 0.5351, "step": 7739 }, { "epoch": 1.5911193339500462, "grad_norm": 0.2163754552602768, "learning_rate": 4.27536099210098e-05, "loss": 0.5121, "step": 7740 }, { "epoch": 1.5913249049234248, "grad_norm": 0.17165131866931915, "learning_rate": 4.274363432208556e-05, "loss": 0.5342, "step": 7741 }, { "epoch": 1.5915304758968034, "grad_norm": 0.17426596581935883, "learning_rate": 4.273365883432192e-05, "loss": 0.5432, "step": 7742 }, { "epoch": 1.591736046870182, "grad_norm": 0.16686050593852997, "learning_rate": 4.272368345821031e-05, "loss": 0.5046, "step": 7743 }, { "epoch": 1.5919416178435606, "grad_norm": 0.1610487550497055, "learning_rate": 4.2713708194242184e-05, "loss": 0.5472, "step": 7744 }, { "epoch": 1.5921471888169392, "grad_norm": 0.1983231157064438, "learning_rate": 4.270373304290897e-05, "loss": 0.5526, "step": 7745 }, { "epoch": 1.5923527597903178, "grad_norm": 0.19409148395061493, "learning_rate": 4.2693758004702076e-05, "loss": 0.5521, "step": 7746 }, { "epoch": 1.5925583307636961, "grad_norm": 0.18833288550376892, "learning_rate": 4.268378308011296e-05, "loss": 0.5263, "step": 7747 }, { "epoch": 1.5927639017370747, "grad_norm": 0.18587639927864075, "learning_rate": 4.2673808269633016e-05, "loss": 0.5297, "step": 7748 }, { "epoch": 1.5929694727104533, "grad_norm": 0.18532033264636993, "learning_rate": 4.266383357375367e-05, "loss": 0.5309, "step": 7749 }, { "epoch": 1.593175043683832, "grad_norm": 0.1910453587770462, "learning_rate": 4.2653858992966336e-05, "loss": 0.5683, "step": 7750 }, { "epoch": 1.5933806146572103, "grad_norm": 0.19505764544010162, "learning_rate": 4.26438845277624e-05, "loss": 0.5421, "step": 7751 }, { "epoch": 1.5935861856305888, "grad_norm": 0.19671325385570526, "learning_rate": 4.263391017863326e-05, "loss": 0.5408, "step": 7752 }, { "epoch": 1.5937917566039674, "grad_norm": 0.1978052705526352, "learning_rate": 4.26239359460703e-05, "loss": 0.5512, "step": 7753 }, { "epoch": 1.593997327577346, "grad_norm": 0.1925462931394577, "learning_rate": 4.26139618305649e-05, "loss": 0.5285, "step": 7754 }, { "epoch": 1.5942028985507246, "grad_norm": 0.1875825971364975, "learning_rate": 4.260398783260846e-05, "loss": 0.5481, "step": 7755 }, { "epoch": 1.5944084695241032, "grad_norm": 0.1970067173242569, "learning_rate": 4.2594013952692353e-05, "loss": 0.528, "step": 7756 }, { "epoch": 1.5946140404974818, "grad_norm": 0.19316576421260834, "learning_rate": 4.258404019130792e-05, "loss": 0.5348, "step": 7757 }, { "epoch": 1.5948196114708604, "grad_norm": 0.19398510456085205, "learning_rate": 4.257406654894653e-05, "loss": 0.5404, "step": 7758 }, { "epoch": 1.595025182444239, "grad_norm": 0.19227631390094757, "learning_rate": 4.256409302609956e-05, "loss": 0.5298, "step": 7759 }, { "epoch": 1.5952307534176176, "grad_norm": 0.16509932279586792, "learning_rate": 4.255411962325833e-05, "loss": 0.5097, "step": 7760 }, { "epoch": 1.5954363243909961, "grad_norm": 0.16759321093559265, "learning_rate": 4.254414634091418e-05, "loss": 0.5725, "step": 7761 }, { "epoch": 1.5956418953643745, "grad_norm": 0.19898711144924164, "learning_rate": 4.253417317955848e-05, "loss": 0.5409, "step": 7762 }, { "epoch": 1.595847466337753, "grad_norm": 0.19673512876033783, "learning_rate": 4.252420013968254e-05, "loss": 0.5403, "step": 7763 }, { "epoch": 1.5960530373111317, "grad_norm": 0.19727066159248352, "learning_rate": 4.251422722177769e-05, "loss": 0.5597, "step": 7764 }, { "epoch": 1.5962586082845103, "grad_norm": 0.16265854239463806, "learning_rate": 4.250425442633524e-05, "loss": 0.5227, "step": 7765 }, { "epoch": 1.5964641792578886, "grad_norm": 0.15699994564056396, "learning_rate": 4.2494281753846515e-05, "loss": 0.5637, "step": 7766 }, { "epoch": 1.5966697502312672, "grad_norm": 0.1968710571527481, "learning_rate": 4.2484309204802816e-05, "loss": 0.5566, "step": 7767 }, { "epoch": 1.5968753212046458, "grad_norm": 0.19877804815769196, "learning_rate": 4.2474336779695427e-05, "loss": 0.5463, "step": 7768 }, { "epoch": 1.5970808921780244, "grad_norm": 0.18838095664978027, "learning_rate": 4.246436447901567e-05, "loss": 0.5483, "step": 7769 }, { "epoch": 1.597286463151403, "grad_norm": 0.18883812427520752, "learning_rate": 4.245439230325483e-05, "loss": 0.5465, "step": 7770 }, { "epoch": 1.5974920341247816, "grad_norm": 0.20267321169376373, "learning_rate": 4.244442025290418e-05, "loss": 0.5651, "step": 7771 }, { "epoch": 1.5976976050981602, "grad_norm": 0.19783546030521393, "learning_rate": 4.2434448328455e-05, "loss": 0.5623, "step": 7772 }, { "epoch": 1.5979031760715388, "grad_norm": 0.20209753513336182, "learning_rate": 4.242447653039856e-05, "loss": 0.5378, "step": 7773 }, { "epoch": 1.5981087470449173, "grad_norm": 0.16521279513835907, "learning_rate": 4.2414504859226125e-05, "loss": 0.4948, "step": 7774 }, { "epoch": 1.598314318018296, "grad_norm": 0.15857960283756256, "learning_rate": 4.240453331542894e-05, "loss": 0.5269, "step": 7775 }, { "epoch": 1.5985198889916745, "grad_norm": 0.16982486844062805, "learning_rate": 4.239456189949828e-05, "loss": 0.5311, "step": 7776 }, { "epoch": 1.5987254599650529, "grad_norm": 0.15772342681884766, "learning_rate": 4.238459061192537e-05, "loss": 0.5586, "step": 7777 }, { "epoch": 1.5989310309384315, "grad_norm": 0.18286247551441193, "learning_rate": 4.2374619453201466e-05, "loss": 0.527, "step": 7778 }, { "epoch": 1.59913660191181, "grad_norm": 0.19069987535476685, "learning_rate": 4.236464842381778e-05, "loss": 0.5576, "step": 7779 }, { "epoch": 1.5993421728851887, "grad_norm": 0.19216850399971008, "learning_rate": 4.235467752426555e-05, "loss": 0.5289, "step": 7780 }, { "epoch": 1.599547743858567, "grad_norm": 0.1922430843114853, "learning_rate": 4.2344706755036e-05, "loss": 0.568, "step": 7781 }, { "epoch": 1.5997533148319456, "grad_norm": 0.18228840827941895, "learning_rate": 4.2334736116620314e-05, "loss": 0.531, "step": 7782 }, { "epoch": 1.5999588858053242, "grad_norm": 0.18847499787807465, "learning_rate": 4.2324765609509746e-05, "loss": 0.5421, "step": 7783 }, { "epoch": 1.6001644567787028, "grad_norm": 0.1916157454252243, "learning_rate": 4.231479523419547e-05, "loss": 0.5423, "step": 7784 }, { "epoch": 1.6003700277520814, "grad_norm": 0.19695116579532623, "learning_rate": 4.230482499116869e-05, "loss": 0.5403, "step": 7785 }, { "epoch": 1.60057559872546, "grad_norm": 0.20415250957012177, "learning_rate": 4.2294854880920575e-05, "loss": 0.5381, "step": 7786 }, { "epoch": 1.6007811696988385, "grad_norm": 0.20049957931041718, "learning_rate": 4.228488490394232e-05, "loss": 0.5711, "step": 7787 }, { "epoch": 1.6009867406722171, "grad_norm": 0.16379691660404205, "learning_rate": 4.227491506072508e-05, "loss": 0.5004, "step": 7788 }, { "epoch": 1.6011923116455957, "grad_norm": 0.16042593121528625, "learning_rate": 4.226494535176005e-05, "loss": 0.5595, "step": 7789 }, { "epoch": 1.6013978826189743, "grad_norm": 0.18765395879745483, "learning_rate": 4.2254975777538386e-05, "loss": 0.5608, "step": 7790 }, { "epoch": 1.601603453592353, "grad_norm": 0.16303540766239166, "learning_rate": 4.224500633855123e-05, "loss": 0.5161, "step": 7791 }, { "epoch": 1.6018090245657313, "grad_norm": 0.16182848811149597, "learning_rate": 4.223503703528973e-05, "loss": 0.5586, "step": 7792 }, { "epoch": 1.6020145955391099, "grad_norm": 0.1916949301958084, "learning_rate": 4.222506786824504e-05, "loss": 0.5563, "step": 7793 }, { "epoch": 1.6022201665124884, "grad_norm": 0.18221786618232727, "learning_rate": 4.221509883790828e-05, "loss": 0.5474, "step": 7794 }, { "epoch": 1.602425737485867, "grad_norm": 0.1872803419828415, "learning_rate": 4.2205129944770574e-05, "loss": 0.5405, "step": 7795 }, { "epoch": 1.6026313084592454, "grad_norm": 0.1901916116476059, "learning_rate": 4.2195161189323064e-05, "loss": 0.5595, "step": 7796 }, { "epoch": 1.602836879432624, "grad_norm": 0.48569947481155396, "learning_rate": 4.2185192572056856e-05, "loss": 0.5194, "step": 7797 }, { "epoch": 1.6030424504060026, "grad_norm": 0.1648416817188263, "learning_rate": 4.217522409346305e-05, "loss": 0.5493, "step": 7798 }, { "epoch": 1.6032480213793812, "grad_norm": 0.2003573477268219, "learning_rate": 4.216525575403275e-05, "loss": 0.5516, "step": 7799 }, { "epoch": 1.6034535923527597, "grad_norm": 0.16360460221767426, "learning_rate": 4.2155287554257056e-05, "loss": 0.5167, "step": 7800 }, { "epoch": 1.6036591633261383, "grad_norm": 0.12889930605888367, "learning_rate": 4.2145319494627034e-05, "loss": 0.4986, "step": 7801 }, { "epoch": 1.603864734299517, "grad_norm": 0.16201470792293549, "learning_rate": 4.213535157563378e-05, "loss": 0.5439, "step": 7802 }, { "epoch": 1.6040703052728955, "grad_norm": 0.19083839654922485, "learning_rate": 4.212538379776837e-05, "loss": 0.5315, "step": 7803 }, { "epoch": 1.604275876246274, "grad_norm": 0.19757793843746185, "learning_rate": 4.211541616152186e-05, "loss": 0.5288, "step": 7804 }, { "epoch": 1.6044814472196527, "grad_norm": 0.21021337807178497, "learning_rate": 4.210544866738532e-05, "loss": 0.5579, "step": 7805 }, { "epoch": 1.6046870181930313, "grad_norm": 0.18950164318084717, "learning_rate": 4.2095481315849796e-05, "loss": 0.5447, "step": 7806 }, { "epoch": 1.6048925891664099, "grad_norm": 0.18903128802776337, "learning_rate": 4.2085514107406326e-05, "loss": 0.5478, "step": 7807 }, { "epoch": 1.6050981601397882, "grad_norm": 0.1963806450366974, "learning_rate": 4.207554704254596e-05, "loss": 0.5411, "step": 7808 }, { "epoch": 1.6053037311131668, "grad_norm": 0.19509243965148926, "learning_rate": 4.20655801217597e-05, "loss": 0.5447, "step": 7809 }, { "epoch": 1.6055093020865454, "grad_norm": 0.18859466910362244, "learning_rate": 4.205561334553862e-05, "loss": 0.5434, "step": 7810 }, { "epoch": 1.6057148730599238, "grad_norm": 0.1625402718782425, "learning_rate": 4.20456467143737e-05, "loss": 0.5226, "step": 7811 }, { "epoch": 1.6059204440333024, "grad_norm": 0.16071906685829163, "learning_rate": 4.203568022875596e-05, "loss": 0.5362, "step": 7812 }, { "epoch": 1.606126015006681, "grad_norm": 0.19820047914981842, "learning_rate": 4.202571388917638e-05, "loss": 0.5452, "step": 7813 }, { "epoch": 1.6063315859800595, "grad_norm": 0.1983959972858429, "learning_rate": 4.2015747696126e-05, "loss": 0.526, "step": 7814 }, { "epoch": 1.6065371569534381, "grad_norm": 0.19241683185100555, "learning_rate": 4.200578165009578e-05, "loss": 0.5378, "step": 7815 }, { "epoch": 1.6067427279268167, "grad_norm": 0.19365909695625305, "learning_rate": 4.199581575157668e-05, "loss": 0.5589, "step": 7816 }, { "epoch": 1.6069482989001953, "grad_norm": 0.1934269517660141, "learning_rate": 4.198585000105971e-05, "loss": 0.5438, "step": 7817 }, { "epoch": 1.607153869873574, "grad_norm": 0.19813553988933563, "learning_rate": 4.1975884399035834e-05, "loss": 0.5569, "step": 7818 }, { "epoch": 1.6073594408469525, "grad_norm": 0.1831195056438446, "learning_rate": 4.1965918945995994e-05, "loss": 0.5217, "step": 7819 }, { "epoch": 1.607565011820331, "grad_norm": 0.16004779934883118, "learning_rate": 4.1955953642431144e-05, "loss": 0.5526, "step": 7820 }, { "epoch": 1.6077705827937097, "grad_norm": 0.19440321624279022, "learning_rate": 4.1945988488832236e-05, "loss": 0.5287, "step": 7821 }, { "epoch": 1.6079761537670882, "grad_norm": 0.18852464854717255, "learning_rate": 4.1936023485690185e-05, "loss": 0.526, "step": 7822 }, { "epoch": 1.6081817247404666, "grad_norm": 0.18994298577308655, "learning_rate": 4.192605863349594e-05, "loss": 0.5729, "step": 7823 }, { "epoch": 1.6083872957138452, "grad_norm": 0.18983709812164307, "learning_rate": 4.191609393274042e-05, "loss": 0.5418, "step": 7824 }, { "epoch": 1.6085928666872238, "grad_norm": 0.19144746661186218, "learning_rate": 4.190612938391454e-05, "loss": 0.5502, "step": 7825 }, { "epoch": 1.6087984376606024, "grad_norm": 0.18976972997188568, "learning_rate": 4.18961649875092e-05, "loss": 0.5363, "step": 7826 }, { "epoch": 1.6090040086339807, "grad_norm": 0.19141483306884766, "learning_rate": 4.188620074401532e-05, "loss": 0.5285, "step": 7827 }, { "epoch": 1.6092095796073593, "grad_norm": 0.19065243005752563, "learning_rate": 4.187623665392377e-05, "loss": 0.5374, "step": 7828 }, { "epoch": 1.609415150580738, "grad_norm": 0.19287769496440887, "learning_rate": 4.186627271772544e-05, "loss": 0.5363, "step": 7829 }, { "epoch": 1.6096207215541165, "grad_norm": 0.19527852535247803, "learning_rate": 4.1856308935911175e-05, "loss": 0.562, "step": 7830 }, { "epoch": 1.609826292527495, "grad_norm": 0.16113971173763275, "learning_rate": 4.184634530897191e-05, "loss": 0.5236, "step": 7831 }, { "epoch": 1.6100318635008737, "grad_norm": 0.16417936980724335, "learning_rate": 4.183638183739846e-05, "loss": 0.5406, "step": 7832 }, { "epoch": 1.6102374344742523, "grad_norm": 0.19486621022224426, "learning_rate": 4.1826418521681696e-05, "loss": 0.5471, "step": 7833 }, { "epoch": 1.6104430054476309, "grad_norm": 0.19623447954654694, "learning_rate": 4.181645536231245e-05, "loss": 0.5402, "step": 7834 }, { "epoch": 1.6106485764210094, "grad_norm": 0.19563686847686768, "learning_rate": 4.180649235978158e-05, "loss": 0.559, "step": 7835 }, { "epoch": 1.610854147394388, "grad_norm": 0.19006025791168213, "learning_rate": 4.17965295145799e-05, "loss": 0.5595, "step": 7836 }, { "epoch": 1.6110597183677666, "grad_norm": 0.1941699981689453, "learning_rate": 4.178656682719822e-05, "loss": 0.5391, "step": 7837 }, { "epoch": 1.611265289341145, "grad_norm": 0.20085136592388153, "learning_rate": 4.177660429812739e-05, "loss": 0.5546, "step": 7838 }, { "epoch": 1.6114708603145236, "grad_norm": 0.19179563224315643, "learning_rate": 4.1766641927858206e-05, "loss": 0.5336, "step": 7839 }, { "epoch": 1.6116764312879022, "grad_norm": 0.19993935525417328, "learning_rate": 4.175667971688145e-05, "loss": 0.5577, "step": 7840 }, { "epoch": 1.6118820022612808, "grad_norm": 0.19750361144542694, "learning_rate": 4.1746717665687934e-05, "loss": 0.5378, "step": 7841 }, { "epoch": 1.6120875732346591, "grad_norm": 0.1938353031873703, "learning_rate": 4.173675577476843e-05, "loss": 0.5184, "step": 7842 }, { "epoch": 1.6122931442080377, "grad_norm": 0.16070544719696045, "learning_rate": 4.172679404461371e-05, "loss": 0.521, "step": 7843 }, { "epoch": 1.6124987151814163, "grad_norm": 0.17982181906700134, "learning_rate": 4.171683247571455e-05, "loss": 0.546, "step": 7844 }, { "epoch": 1.6127042861547949, "grad_norm": 0.19503210484981537, "learning_rate": 4.170687106856171e-05, "loss": 0.557, "step": 7845 }, { "epoch": 1.6129098571281735, "grad_norm": 0.19316428899765015, "learning_rate": 4.1696909823645936e-05, "loss": 0.5496, "step": 7846 }, { "epoch": 1.613115428101552, "grad_norm": 0.19165056943893433, "learning_rate": 4.168694874145799e-05, "loss": 0.5376, "step": 7847 }, { "epoch": 1.6133209990749307, "grad_norm": 0.1679886281490326, "learning_rate": 4.167698782248859e-05, "loss": 0.5215, "step": 7848 }, { "epoch": 1.6135265700483092, "grad_norm": 0.16243119537830353, "learning_rate": 4.166702706722847e-05, "loss": 0.5333, "step": 7849 }, { "epoch": 1.6137321410216878, "grad_norm": 0.19812798500061035, "learning_rate": 4.1657066476168345e-05, "loss": 0.542, "step": 7850 }, { "epoch": 1.6139377119950664, "grad_norm": 0.20092356204986572, "learning_rate": 4.164710604979891e-05, "loss": 0.5599, "step": 7851 }, { "epoch": 1.614143282968445, "grad_norm": 0.1670868694782257, "learning_rate": 4.1637145788610914e-05, "loss": 0.5134, "step": 7852 }, { "epoch": 1.6143488539418234, "grad_norm": 0.1713995337486267, "learning_rate": 4.162718569309502e-05, "loss": 0.5345, "step": 7853 }, { "epoch": 1.614554424915202, "grad_norm": 0.1977371871471405, "learning_rate": 4.161722576374192e-05, "loss": 0.5363, "step": 7854 }, { "epoch": 1.6147599958885805, "grad_norm": 0.16193437576293945, "learning_rate": 4.1607266001042295e-05, "loss": 0.4881, "step": 7855 }, { "epoch": 1.6149655668619591, "grad_norm": 0.16229775547981262, "learning_rate": 4.159730640548683e-05, "loss": 0.5395, "step": 7856 }, { "epoch": 1.6151711378353375, "grad_norm": 0.1984437257051468, "learning_rate": 4.158734697756616e-05, "loss": 0.5634, "step": 7857 }, { "epoch": 1.615376708808716, "grad_norm": 0.1987016797065735, "learning_rate": 4.157738771777094e-05, "loss": 0.5676, "step": 7858 }, { "epoch": 1.6155822797820947, "grad_norm": 0.15975748002529144, "learning_rate": 4.156742862659185e-05, "loss": 0.5129, "step": 7859 }, { "epoch": 1.6157878507554733, "grad_norm": 0.15861687064170837, "learning_rate": 4.155746970451951e-05, "loss": 0.5272, "step": 7860 }, { "epoch": 1.6159934217288519, "grad_norm": 0.18841111660003662, "learning_rate": 4.154751095204455e-05, "loss": 0.54, "step": 7861 }, { "epoch": 1.6161989927022304, "grad_norm": 0.19690489768981934, "learning_rate": 4.153755236965758e-05, "loss": 0.5461, "step": 7862 }, { "epoch": 1.616404563675609, "grad_norm": 0.1665157824754715, "learning_rate": 4.1527593957849224e-05, "loss": 0.5081, "step": 7863 }, { "epoch": 1.6166101346489876, "grad_norm": 0.15810109674930573, "learning_rate": 4.1517635717110087e-05, "loss": 0.5394, "step": 7864 }, { "epoch": 1.6168157056223662, "grad_norm": 0.1974000781774521, "learning_rate": 4.150767764793074e-05, "loss": 0.5227, "step": 7865 }, { "epoch": 1.6170212765957448, "grad_norm": 0.19814777374267578, "learning_rate": 4.149771975080181e-05, "loss": 0.5464, "step": 7866 }, { "epoch": 1.6172268475691234, "grad_norm": 0.1915402114391327, "learning_rate": 4.148776202621386e-05, "loss": 0.541, "step": 7867 }, { "epoch": 1.6174324185425017, "grad_norm": 0.19537873566150665, "learning_rate": 4.147780447465745e-05, "loss": 0.5508, "step": 7868 }, { "epoch": 1.6176379895158803, "grad_norm": 0.1904834657907486, "learning_rate": 4.146784709662316e-05, "loss": 0.4971, "step": 7869 }, { "epoch": 1.617843560489259, "grad_norm": 0.19342583417892456, "learning_rate": 4.1457889892601536e-05, "loss": 0.5577, "step": 7870 }, { "epoch": 1.6180491314626375, "grad_norm": 0.19713959097862244, "learning_rate": 4.14479328630831e-05, "loss": 0.5568, "step": 7871 }, { "epoch": 1.6182547024360159, "grad_norm": 0.19126051664352417, "learning_rate": 4.143797600855843e-05, "loss": 0.5551, "step": 7872 }, { "epoch": 1.6184602734093945, "grad_norm": 0.16810829937458038, "learning_rate": 4.142801932951803e-05, "loss": 0.5213, "step": 7873 }, { "epoch": 1.618665844382773, "grad_norm": 0.15974818170070648, "learning_rate": 4.1418062826452424e-05, "loss": 0.5456, "step": 7874 }, { "epoch": 1.6188714153561516, "grad_norm": 0.19179581105709076, "learning_rate": 4.140810649985212e-05, "loss": 0.5298, "step": 7875 }, { "epoch": 1.6190769863295302, "grad_norm": 0.19233964383602142, "learning_rate": 4.139815035020762e-05, "loss": 0.5471, "step": 7876 }, { "epoch": 1.6192825573029088, "grad_norm": 0.18875513970851898, "learning_rate": 4.1388194378009406e-05, "loss": 0.5382, "step": 7877 }, { "epoch": 1.6194881282762874, "grad_norm": 0.18729184567928314, "learning_rate": 4.1378238583747975e-05, "loss": 0.5342, "step": 7878 }, { "epoch": 1.619693699249666, "grad_norm": 0.19150425493717194, "learning_rate": 4.136828296791382e-05, "loss": 0.565, "step": 7879 }, { "epoch": 1.6198992702230446, "grad_norm": 0.18844369053840637, "learning_rate": 4.1358327530997366e-05, "loss": 0.5405, "step": 7880 }, { "epoch": 1.6201048411964232, "grad_norm": 0.19033032655715942, "learning_rate": 4.1348372273489106e-05, "loss": 0.5274, "step": 7881 }, { "epoch": 1.6203104121698018, "grad_norm": 0.16202832758426666, "learning_rate": 4.133841719587948e-05, "loss": 0.4988, "step": 7882 }, { "epoch": 1.6205159831431801, "grad_norm": 0.16193822026252747, "learning_rate": 4.132846229865892e-05, "loss": 0.542, "step": 7883 }, { "epoch": 1.6207215541165587, "grad_norm": 0.1977519690990448, "learning_rate": 4.131850758231787e-05, "loss": 0.5588, "step": 7884 }, { "epoch": 1.6209271250899373, "grad_norm": 0.20576632022857666, "learning_rate": 4.1308553047346713e-05, "loss": 0.5583, "step": 7885 }, { "epoch": 1.621132696063316, "grad_norm": 0.1919194608926773, "learning_rate": 4.129859869423592e-05, "loss": 0.5519, "step": 7886 }, { "epoch": 1.6213382670366943, "grad_norm": 0.19272786378860474, "learning_rate": 4.128864452347587e-05, "loss": 0.5368, "step": 7887 }, { "epoch": 1.6215438380100728, "grad_norm": 0.19439461827278137, "learning_rate": 4.127869053555696e-05, "loss": 0.5374, "step": 7888 }, { "epoch": 1.6217494089834514, "grad_norm": 0.19123432040214539, "learning_rate": 4.126873673096956e-05, "loss": 0.5392, "step": 7889 }, { "epoch": 1.62195497995683, "grad_norm": 0.18603573739528656, "learning_rate": 4.1258783110204074e-05, "loss": 0.5217, "step": 7890 }, { "epoch": 1.6221605509302086, "grad_norm": 0.1992233395576477, "learning_rate": 4.1248829673750846e-05, "loss": 0.5625, "step": 7891 }, { "epoch": 1.6223661219035872, "grad_norm": 0.18787723779678345, "learning_rate": 4.123887642210024e-05, "loss": 0.554, "step": 7892 }, { "epoch": 1.6225716928769658, "grad_norm": 0.18760953843593597, "learning_rate": 4.122892335574263e-05, "loss": 0.5411, "step": 7893 }, { "epoch": 1.6227772638503444, "grad_norm": 0.19207806885242462, "learning_rate": 4.121897047516834e-05, "loss": 0.5274, "step": 7894 }, { "epoch": 1.622982834823723, "grad_norm": 0.1640760600566864, "learning_rate": 4.12090177808677e-05, "loss": 0.5044, "step": 7895 }, { "epoch": 1.6231884057971016, "grad_norm": 0.1595536321401596, "learning_rate": 4.1199065273331035e-05, "loss": 0.5382, "step": 7896 }, { "epoch": 1.6233939767704801, "grad_norm": 0.7902474403381348, "learning_rate": 4.118911295304866e-05, "loss": 0.5773, "step": 7897 }, { "epoch": 1.6235995477438587, "grad_norm": 0.15795102715492249, "learning_rate": 4.1179160820510866e-05, "loss": 0.5388, "step": 7898 }, { "epoch": 1.623805118717237, "grad_norm": 0.2210693508386612, "learning_rate": 4.116920887620797e-05, "loss": 0.5724, "step": 7899 }, { "epoch": 1.6240106896906157, "grad_norm": 0.16837280988693237, "learning_rate": 4.1159257120630244e-05, "loss": 0.5361, "step": 7900 }, { "epoch": 1.6242162606639943, "grad_norm": 0.16610947251319885, "learning_rate": 4.1149305554267965e-05, "loss": 0.5441, "step": 7901 }, { "epoch": 1.6244218316373726, "grad_norm": 0.19282789528369904, "learning_rate": 4.1139354177611413e-05, "loss": 0.5416, "step": 7902 }, { "epoch": 1.6246274026107512, "grad_norm": 0.19123776257038116, "learning_rate": 4.112940299115083e-05, "loss": 0.5602, "step": 7903 }, { "epoch": 1.6248329735841298, "grad_norm": 0.19537465274333954, "learning_rate": 4.111945199537648e-05, "loss": 0.5568, "step": 7904 }, { "epoch": 1.6250385445575084, "grad_norm": 0.1960020512342453, "learning_rate": 4.1109501190778585e-05, "loss": 0.5366, "step": 7905 }, { "epoch": 1.625244115530887, "grad_norm": 0.16584603488445282, "learning_rate": 4.109955057784737e-05, "loss": 0.5022, "step": 7906 }, { "epoch": 1.6254496865042656, "grad_norm": 0.14005246758460999, "learning_rate": 4.108960015707308e-05, "loss": 0.5147, "step": 7907 }, { "epoch": 1.6256552574776442, "grad_norm": 0.16588489711284637, "learning_rate": 4.107964992894592e-05, "loss": 0.5522, "step": 7908 }, { "epoch": 1.6258608284510228, "grad_norm": 0.19331607222557068, "learning_rate": 4.1069699893956074e-05, "loss": 0.5099, "step": 7909 }, { "epoch": 1.6260663994244013, "grad_norm": 0.1920442134141922, "learning_rate": 4.105975005259374e-05, "loss": 0.543, "step": 7910 }, { "epoch": 1.62627197039778, "grad_norm": 0.19395653903484344, "learning_rate": 4.1049800405349116e-05, "loss": 0.5589, "step": 7911 }, { "epoch": 1.6264775413711585, "grad_norm": 0.19541949033737183, "learning_rate": 4.103985095271236e-05, "loss": 0.5376, "step": 7912 }, { "epoch": 1.626683112344537, "grad_norm": 0.16967599093914032, "learning_rate": 4.102990169517362e-05, "loss": 0.5135, "step": 7913 }, { "epoch": 1.6268886833179155, "grad_norm": 0.16106168925762177, "learning_rate": 4.101995263322308e-05, "loss": 0.5548, "step": 7914 }, { "epoch": 1.627094254291294, "grad_norm": 0.20895619690418243, "learning_rate": 4.101000376735088e-05, "loss": 0.536, "step": 7915 }, { "epoch": 1.6272998252646727, "grad_norm": 0.1945531964302063, "learning_rate": 4.1000055098047144e-05, "loss": 0.5196, "step": 7916 }, { "epoch": 1.6275053962380512, "grad_norm": 0.19166290760040283, "learning_rate": 4.099010662580199e-05, "loss": 0.5329, "step": 7917 }, { "epoch": 1.6277109672114296, "grad_norm": 0.1970268040895462, "learning_rate": 4.0980158351105554e-05, "loss": 0.5334, "step": 7918 }, { "epoch": 1.6279165381848082, "grad_norm": 0.19781675934791565, "learning_rate": 4.097021027444791e-05, "loss": 0.557, "step": 7919 }, { "epoch": 1.6281221091581868, "grad_norm": 0.2081199437379837, "learning_rate": 4.0960262396319165e-05, "loss": 0.5316, "step": 7920 }, { "epoch": 1.6283276801315654, "grad_norm": 0.19772003591060638, "learning_rate": 4.0950314717209425e-05, "loss": 0.5286, "step": 7921 }, { "epoch": 1.628533251104944, "grad_norm": 0.1967727690935135, "learning_rate": 4.094036723760875e-05, "loss": 0.55, "step": 7922 }, { "epoch": 1.6287388220783225, "grad_norm": 0.1625976264476776, "learning_rate": 4.09304199580072e-05, "loss": 0.5024, "step": 7923 }, { "epoch": 1.6289443930517011, "grad_norm": 0.16001035273075104, "learning_rate": 4.092047287889484e-05, "loss": 0.5347, "step": 7924 }, { "epoch": 1.6291499640250797, "grad_norm": 0.20354917645454407, "learning_rate": 4.0910526000761725e-05, "loss": 0.5271, "step": 7925 }, { "epoch": 1.6293555349984583, "grad_norm": 0.20167338848114014, "learning_rate": 4.0900579324097874e-05, "loss": 0.547, "step": 7926 }, { "epoch": 1.629561105971837, "grad_norm": 0.1940862089395523, "learning_rate": 4.08906328493933e-05, "loss": 0.5388, "step": 7927 }, { "epoch": 1.6297666769452155, "grad_norm": 0.20124763250350952, "learning_rate": 4.088068657713805e-05, "loss": 0.5325, "step": 7928 }, { "epoch": 1.6299722479185939, "grad_norm": 0.1647825688123703, "learning_rate": 4.087074050782213e-05, "loss": 0.5181, "step": 7929 }, { "epoch": 1.6301778188919724, "grad_norm": 0.13776123523712158, "learning_rate": 4.0860794641935524e-05, "loss": 0.5102, "step": 7930 }, { "epoch": 1.630383389865351, "grad_norm": 0.1585695892572403, "learning_rate": 4.0850848979968205e-05, "loss": 0.5194, "step": 7931 }, { "epoch": 1.6305889608387296, "grad_norm": 0.19522860646247864, "learning_rate": 4.084090352241017e-05, "loss": 0.5335, "step": 7932 }, { "epoch": 1.630794531812108, "grad_norm": 0.200296089053154, "learning_rate": 4.0830958269751385e-05, "loss": 0.5442, "step": 7933 }, { "epoch": 1.6310001027854866, "grad_norm": 0.19578911364078522, "learning_rate": 4.0821013222481786e-05, "loss": 0.5368, "step": 7934 }, { "epoch": 1.6312056737588652, "grad_norm": 0.19223348796367645, "learning_rate": 4.0811068381091336e-05, "loss": 0.5389, "step": 7935 }, { "epoch": 1.6314112447322437, "grad_norm": 0.19662773609161377, "learning_rate": 4.080112374606998e-05, "loss": 0.5493, "step": 7936 }, { "epoch": 1.6316168157056223, "grad_norm": 0.1632963865995407, "learning_rate": 4.0791179317907626e-05, "loss": 0.4949, "step": 7937 }, { "epoch": 1.631822386679001, "grad_norm": 0.16675293445587158, "learning_rate": 4.0781235097094205e-05, "loss": 0.5681, "step": 7938 }, { "epoch": 1.6320279576523795, "grad_norm": 0.2007942795753479, "learning_rate": 4.0771291084119603e-05, "loss": 0.551, "step": 7939 }, { "epoch": 1.632233528625758, "grad_norm": 0.1977294385433197, "learning_rate": 4.076134727947373e-05, "loss": 0.5417, "step": 7940 }, { "epoch": 1.6324390995991367, "grad_norm": 0.20973463356494904, "learning_rate": 4.075140368364644e-05, "loss": 0.5623, "step": 7941 }, { "epoch": 1.6326446705725153, "grad_norm": 0.2024088203907013, "learning_rate": 4.074146029712765e-05, "loss": 0.5461, "step": 7942 }, { "epoch": 1.6328502415458939, "grad_norm": 0.18343862891197205, "learning_rate": 4.0731517120407205e-05, "loss": 0.5329, "step": 7943 }, { "epoch": 1.6330558125192722, "grad_norm": 0.19498711824417114, "learning_rate": 4.0721574153974966e-05, "loss": 0.5226, "step": 7944 }, { "epoch": 1.6332613834926508, "grad_norm": 0.1982509046792984, "learning_rate": 4.071163139832077e-05, "loss": 0.5745, "step": 7945 }, { "epoch": 1.6334669544660294, "grad_norm": 0.19435621798038483, "learning_rate": 4.0701688853934454e-05, "loss": 0.5448, "step": 7946 }, { "epoch": 1.633672525439408, "grad_norm": 0.19986435770988464, "learning_rate": 4.069174652130582e-05, "loss": 0.535, "step": 7947 }, { "epoch": 1.6338780964127864, "grad_norm": 0.2016473263502121, "learning_rate": 4.068180440092471e-05, "loss": 0.5354, "step": 7948 }, { "epoch": 1.634083667386165, "grad_norm": 0.1967112123966217, "learning_rate": 4.067186249328092e-05, "loss": 0.5405, "step": 7949 }, { "epoch": 1.6342892383595435, "grad_norm": 0.1958150565624237, "learning_rate": 4.0661920798864236e-05, "loss": 0.5235, "step": 7950 }, { "epoch": 1.6344948093329221, "grad_norm": 0.19553299248218536, "learning_rate": 4.065197931816444e-05, "loss": 0.5356, "step": 7951 }, { "epoch": 1.6347003803063007, "grad_norm": 0.19405850768089294, "learning_rate": 4.064203805167129e-05, "loss": 0.536, "step": 7952 }, { "epoch": 1.6349059512796793, "grad_norm": 0.20262351632118225, "learning_rate": 4.0632096999874556e-05, "loss": 0.546, "step": 7953 }, { "epoch": 1.635111522253058, "grad_norm": 0.1994638741016388, "learning_rate": 4.0622156163263986e-05, "loss": 0.5446, "step": 7954 }, { "epoch": 1.6353170932264365, "grad_norm": 0.19563588500022888, "learning_rate": 4.0612215542329316e-05, "loss": 0.5533, "step": 7955 }, { "epoch": 1.635522664199815, "grad_norm": 0.19695055484771729, "learning_rate": 4.060227513756029e-05, "loss": 0.522, "step": 7956 }, { "epoch": 1.6357282351731937, "grad_norm": 0.1933106780052185, "learning_rate": 4.059233494944662e-05, "loss": 0.5295, "step": 7957 }, { "epoch": 1.6359338061465722, "grad_norm": 0.1970299780368805, "learning_rate": 4.0582394978477997e-05, "loss": 0.5533, "step": 7958 }, { "epoch": 1.6361393771199506, "grad_norm": 0.19385181367397308, "learning_rate": 4.0572455225144124e-05, "loss": 0.5575, "step": 7959 }, { "epoch": 1.6363449480933292, "grad_norm": 0.19552960991859436, "learning_rate": 4.056251568993469e-05, "loss": 0.5427, "step": 7960 }, { "epoch": 1.6365505190667078, "grad_norm": 0.20119963586330414, "learning_rate": 4.055257637333935e-05, "loss": 0.5481, "step": 7961 }, { "epoch": 1.6367560900400864, "grad_norm": 0.19214770197868347, "learning_rate": 4.05426372758478e-05, "loss": 0.5258, "step": 7962 }, { "epoch": 1.6369616610134647, "grad_norm": 0.19121824204921722, "learning_rate": 4.0532698397949686e-05, "loss": 0.5093, "step": 7963 }, { "epoch": 1.6371672319868433, "grad_norm": 0.18680913746356964, "learning_rate": 4.052275974013464e-05, "loss": 0.54, "step": 7964 }, { "epoch": 1.637372802960222, "grad_norm": 0.1893320232629776, "learning_rate": 4.051282130289228e-05, "loss": 0.5448, "step": 7965 }, { "epoch": 1.6375783739336005, "grad_norm": 0.1885337233543396, "learning_rate": 4.050288308671225e-05, "loss": 0.5424, "step": 7966 }, { "epoch": 1.637783944906979, "grad_norm": 0.1980556845664978, "learning_rate": 4.049294509208415e-05, "loss": 0.5693, "step": 7967 }, { "epoch": 1.6379895158803577, "grad_norm": 0.194559246301651, "learning_rate": 4.0483007319497566e-05, "loss": 0.5378, "step": 7968 }, { "epoch": 1.6381950868537363, "grad_norm": 0.1900004744529724, "learning_rate": 4.047306976944211e-05, "loss": 0.5471, "step": 7969 }, { "epoch": 1.6384006578271149, "grad_norm": 0.19014038145542145, "learning_rate": 4.0463132442407365e-05, "loss": 0.5482, "step": 7970 }, { "epoch": 1.6386062288004934, "grad_norm": 0.17057844996452332, "learning_rate": 4.0453195338882867e-05, "loss": 0.5261, "step": 7971 }, { "epoch": 1.638811799773872, "grad_norm": 0.1279505342245102, "learning_rate": 4.044325845935818e-05, "loss": 0.5028, "step": 7972 }, { "epoch": 1.6390173707472506, "grad_norm": 0.16361773014068604, "learning_rate": 4.043332180432286e-05, "loss": 0.5524, "step": 7973 }, { "epoch": 1.6392229417206292, "grad_norm": 0.20223143696784973, "learning_rate": 4.042338537426641e-05, "loss": 0.5304, "step": 7974 }, { "epoch": 1.6394285126940076, "grad_norm": 0.20231173932552338, "learning_rate": 4.041344916967838e-05, "loss": 0.5483, "step": 7975 }, { "epoch": 1.6396340836673862, "grad_norm": 0.20245333015918732, "learning_rate": 4.040351319104828e-05, "loss": 0.5334, "step": 7976 }, { "epoch": 1.6398396546407648, "grad_norm": 0.15880252420902252, "learning_rate": 4.039357743886559e-05, "loss": 0.5144, "step": 7977 }, { "epoch": 1.6400452256141431, "grad_norm": 0.1283801794052124, "learning_rate": 4.0383641913619816e-05, "loss": 0.5137, "step": 7978 }, { "epoch": 1.6402507965875217, "grad_norm": 0.1789664328098297, "learning_rate": 4.0373706615800426e-05, "loss": 0.5355, "step": 7979 }, { "epoch": 1.6404563675609003, "grad_norm": 0.196334108710289, "learning_rate": 4.0363771545896894e-05, "loss": 0.5392, "step": 7980 }, { "epoch": 1.6406619385342789, "grad_norm": 0.19602881371974945, "learning_rate": 4.035383670439867e-05, "loss": 0.5369, "step": 7981 }, { "epoch": 1.6408675095076575, "grad_norm": 0.19509628415107727, "learning_rate": 4.0343902091795174e-05, "loss": 0.5494, "step": 7982 }, { "epoch": 1.641073080481036, "grad_norm": 0.19635361433029175, "learning_rate": 4.033396770857588e-05, "loss": 0.5527, "step": 7983 }, { "epoch": 1.6412786514544146, "grad_norm": 0.19803519546985626, "learning_rate": 4.0324033555230184e-05, "loss": 0.537, "step": 7984 }, { "epoch": 1.6414842224277932, "grad_norm": 0.20085453987121582, "learning_rate": 4.03140996322475e-05, "loss": 0.5394, "step": 7985 }, { "epoch": 1.6416897934011718, "grad_norm": 0.18997138738632202, "learning_rate": 4.030416594011722e-05, "loss": 0.5145, "step": 7986 }, { "epoch": 1.6418953643745504, "grad_norm": 0.16585613787174225, "learning_rate": 4.029423247932874e-05, "loss": 0.511, "step": 7987 }, { "epoch": 1.642100935347929, "grad_norm": 0.1544012725353241, "learning_rate": 4.028429925037143e-05, "loss": 0.5345, "step": 7988 }, { "epoch": 1.6423065063213076, "grad_norm": 0.2430618703365326, "learning_rate": 4.0274366253734644e-05, "loss": 0.5486, "step": 7989 }, { "epoch": 1.642512077294686, "grad_norm": 0.19470450282096863, "learning_rate": 4.0264433489907753e-05, "loss": 0.5318, "step": 7990 }, { "epoch": 1.6427176482680645, "grad_norm": 0.196413055062294, "learning_rate": 4.0254500959380096e-05, "loss": 0.5383, "step": 7991 }, { "epoch": 1.6429232192414431, "grad_norm": 0.19302628934383392, "learning_rate": 4.0244568662641e-05, "loss": 0.5368, "step": 7992 }, { "epoch": 1.6431287902148217, "grad_norm": 0.19250887632369995, "learning_rate": 4.023463660017978e-05, "loss": 0.5225, "step": 7993 }, { "epoch": 1.6433343611882, "grad_norm": 0.18870443105697632, "learning_rate": 4.022470477248573e-05, "loss": 0.5322, "step": 7994 }, { "epoch": 1.6435399321615787, "grad_norm": 0.19748498499393463, "learning_rate": 4.0214773180048155e-05, "loss": 0.5266, "step": 7995 }, { "epoch": 1.6437455031349573, "grad_norm": 0.19181567430496216, "learning_rate": 4.020484182335634e-05, "loss": 0.5553, "step": 7996 }, { "epoch": 1.6439510741083359, "grad_norm": 0.18883375823497772, "learning_rate": 4.019491070289956e-05, "loss": 0.536, "step": 7997 }, { "epoch": 1.6441566450817144, "grad_norm": 0.19764509797096252, "learning_rate": 4.0184979819167066e-05, "loss": 0.5231, "step": 7998 }, { "epoch": 1.644362216055093, "grad_norm": 0.1661233752965927, "learning_rate": 4.017504917264812e-05, "loss": 0.5223, "step": 7999 }, { "epoch": 1.6445677870284716, "grad_norm": 0.1361915022134781, "learning_rate": 4.016511876383195e-05, "loss": 0.5278, "step": 8000 }, { "epoch": 1.6447733580018502, "grad_norm": 0.16932383179664612, "learning_rate": 4.015518859320778e-05, "loss": 0.5341, "step": 8001 }, { "epoch": 1.6449789289752288, "grad_norm": 0.19935861229896545, "learning_rate": 4.014525866126482e-05, "loss": 0.5358, "step": 8002 }, { "epoch": 1.6451844999486074, "grad_norm": 0.20055261254310608, "learning_rate": 4.013532896849226e-05, "loss": 0.5611, "step": 8003 }, { "epoch": 1.645390070921986, "grad_norm": 0.1982363760471344, "learning_rate": 4.012539951537932e-05, "loss": 0.5271, "step": 8004 }, { "epoch": 1.6455956418953643, "grad_norm": 0.16576005518436432, "learning_rate": 4.011547030241516e-05, "loss": 0.5156, "step": 8005 }, { "epoch": 1.645801212868743, "grad_norm": 0.13087031245231628, "learning_rate": 4.010554133008895e-05, "loss": 0.5298, "step": 8006 }, { "epoch": 1.6460067838421215, "grad_norm": 0.16294503211975098, "learning_rate": 4.0095612598889837e-05, "loss": 0.5526, "step": 8007 }, { "epoch": 1.6462123548155, "grad_norm": 0.20266200602054596, "learning_rate": 4.008568410930698e-05, "loss": 0.5262, "step": 8008 }, { "epoch": 1.6464179257888785, "grad_norm": 0.16137059032917023, "learning_rate": 4.007575586182949e-05, "loss": 0.5215, "step": 8009 }, { "epoch": 1.646623496762257, "grad_norm": 0.16377897560596466, "learning_rate": 4.006582785694648e-05, "loss": 0.5292, "step": 8010 }, { "epoch": 1.6468290677356356, "grad_norm": 0.19530196487903595, "learning_rate": 4.005590009514708e-05, "loss": 0.5454, "step": 8011 }, { "epoch": 1.6470346387090142, "grad_norm": 0.19677075743675232, "learning_rate": 4.0045972576920374e-05, "loss": 0.5499, "step": 8012 }, { "epoch": 1.6472402096823928, "grad_norm": 0.19411884248256683, "learning_rate": 4.003604530275545e-05, "loss": 0.5623, "step": 8013 }, { "epoch": 1.6474457806557714, "grad_norm": 0.15870682895183563, "learning_rate": 4.002611827314137e-05, "loss": 0.5135, "step": 8014 }, { "epoch": 1.64765135162915, "grad_norm": 0.1609289050102234, "learning_rate": 4.0016191488567195e-05, "loss": 0.5723, "step": 8015 }, { "epoch": 1.6478569226025286, "grad_norm": 0.19486412405967712, "learning_rate": 4.000626494952196e-05, "loss": 0.5615, "step": 8016 }, { "epoch": 1.6480624935759072, "grad_norm": 0.20491555333137512, "learning_rate": 3.9996338656494715e-05, "loss": 0.5451, "step": 8017 }, { "epoch": 1.6482680645492858, "grad_norm": 0.19133470952510834, "learning_rate": 3.998641260997449e-05, "loss": 0.5251, "step": 8018 }, { "epoch": 1.6484736355226643, "grad_norm": 0.1599549949169159, "learning_rate": 3.997648681045026e-05, "loss": 0.5172, "step": 8019 }, { "epoch": 1.6486792064960427, "grad_norm": 0.1676701456308365, "learning_rate": 3.996656125841106e-05, "loss": 0.5404, "step": 8020 }, { "epoch": 1.6488847774694213, "grad_norm": 0.1984013020992279, "learning_rate": 3.995663595434587e-05, "loss": 0.5757, "step": 8021 }, { "epoch": 1.6490903484428, "grad_norm": 0.1664489060640335, "learning_rate": 3.994671089874364e-05, "loss": 0.5177, "step": 8022 }, { "epoch": 1.6492959194161785, "grad_norm": 0.15646716952323914, "learning_rate": 3.993678609209333e-05, "loss": 0.548, "step": 8023 }, { "epoch": 1.6495014903895568, "grad_norm": 0.1926644891500473, "learning_rate": 3.9926861534883924e-05, "loss": 0.5528, "step": 8024 }, { "epoch": 1.6497070613629354, "grad_norm": 0.20535780489444733, "learning_rate": 3.991693722760434e-05, "loss": 0.5611, "step": 8025 }, { "epoch": 1.649912632336314, "grad_norm": 0.19756321609020233, "learning_rate": 3.9907013170743504e-05, "loss": 0.5244, "step": 8026 }, { "epoch": 1.6501182033096926, "grad_norm": 0.16199225187301636, "learning_rate": 3.9897089364790315e-05, "loss": 0.5097, "step": 8027 }, { "epoch": 1.6503237742830712, "grad_norm": 0.1658937931060791, "learning_rate": 3.988716581023368e-05, "loss": 0.556, "step": 8028 }, { "epoch": 1.6505293452564498, "grad_norm": 0.1961878091096878, "learning_rate": 3.98772425075625e-05, "loss": 0.5337, "step": 8029 }, { "epoch": 1.6507349162298284, "grad_norm": 0.1957957148551941, "learning_rate": 3.9867319457265616e-05, "loss": 0.5322, "step": 8030 }, { "epoch": 1.650940487203207, "grad_norm": 0.19228583574295044, "learning_rate": 3.985739665983192e-05, "loss": 0.5356, "step": 8031 }, { "epoch": 1.6511460581765856, "grad_norm": 0.20034292340278625, "learning_rate": 3.984747411575027e-05, "loss": 0.557, "step": 8032 }, { "epoch": 1.6513516291499641, "grad_norm": 0.1647980958223343, "learning_rate": 3.983755182550948e-05, "loss": 0.5102, "step": 8033 }, { "epoch": 1.6515572001233427, "grad_norm": 0.16599765419960022, "learning_rate": 3.982762978959838e-05, "loss": 0.5501, "step": 8034 }, { "epoch": 1.651762771096721, "grad_norm": 0.193580761551857, "learning_rate": 3.981770800850579e-05, "loss": 0.5507, "step": 8035 }, { "epoch": 1.6519683420700997, "grad_norm": 0.1937427669763565, "learning_rate": 3.98077864827205e-05, "loss": 0.5389, "step": 8036 }, { "epoch": 1.6521739130434783, "grad_norm": 0.1987418383359909, "learning_rate": 3.9797865212731286e-05, "loss": 0.5353, "step": 8037 }, { "epoch": 1.6523794840168569, "grad_norm": 0.1963115632534027, "learning_rate": 3.978794419902696e-05, "loss": 0.5466, "step": 8038 }, { "epoch": 1.6525850549902352, "grad_norm": 0.16360154747962952, "learning_rate": 3.977802344209626e-05, "loss": 0.4988, "step": 8039 }, { "epoch": 1.6527906259636138, "grad_norm": 0.18510453402996063, "learning_rate": 3.976810294242792e-05, "loss": 0.5791, "step": 8040 }, { "epoch": 1.6529961969369924, "grad_norm": 0.2012956142425537, "learning_rate": 3.9758182700510714e-05, "loss": 0.5532, "step": 8041 }, { "epoch": 1.653201767910371, "grad_norm": 0.19226433336734772, "learning_rate": 3.974826271683334e-05, "loss": 0.5284, "step": 8042 }, { "epoch": 1.6534073388837496, "grad_norm": 0.19937126338481903, "learning_rate": 3.973834299188452e-05, "loss": 0.5377, "step": 8043 }, { "epoch": 1.6536129098571282, "grad_norm": 0.19442661106586456, "learning_rate": 3.9728423526152927e-05, "loss": 0.5465, "step": 8044 }, { "epoch": 1.6538184808305068, "grad_norm": 0.19324155151844025, "learning_rate": 3.971850432012729e-05, "loss": 0.5376, "step": 8045 }, { "epoch": 1.6540240518038853, "grad_norm": 0.16883355379104614, "learning_rate": 3.970858537429625e-05, "loss": 0.5119, "step": 8046 }, { "epoch": 1.654229622777264, "grad_norm": 0.16271071135997772, "learning_rate": 3.969866668914848e-05, "loss": 0.5545, "step": 8047 }, { "epoch": 1.6544351937506425, "grad_norm": 0.18823817372322083, "learning_rate": 3.9688748265172625e-05, "loss": 0.5345, "step": 8048 }, { "epoch": 1.654640764724021, "grad_norm": 0.19708384573459625, "learning_rate": 3.9678830102857324e-05, "loss": 0.5677, "step": 8049 }, { "epoch": 1.6548463356973995, "grad_norm": 0.2034367471933365, "learning_rate": 3.966891220269118e-05, "loss": 0.5178, "step": 8050 }, { "epoch": 1.655051906670778, "grad_norm": 0.1986580491065979, "learning_rate": 3.9658994565162816e-05, "loss": 0.5545, "step": 8051 }, { "epoch": 1.6552574776441566, "grad_norm": 0.20059730112552643, "learning_rate": 3.964907719076083e-05, "loss": 0.5635, "step": 8052 }, { "epoch": 1.6554630486175352, "grad_norm": 0.19054940342903137, "learning_rate": 3.963916007997379e-05, "loss": 0.5542, "step": 8053 }, { "epoch": 1.6556686195909136, "grad_norm": 0.19373731315135956, "learning_rate": 3.962924323329029e-05, "loss": 0.5499, "step": 8054 }, { "epoch": 1.6558741905642922, "grad_norm": 0.19648055732250214, "learning_rate": 3.9619326651198875e-05, "loss": 0.5399, "step": 8055 }, { "epoch": 1.6560797615376708, "grad_norm": 0.21236325800418854, "learning_rate": 3.960941033418808e-05, "loss": 0.5433, "step": 8056 }, { "epoch": 1.6562853325110494, "grad_norm": 0.18751861155033112, "learning_rate": 3.959949428274645e-05, "loss": 0.521, "step": 8057 }, { "epoch": 1.656490903484428, "grad_norm": 0.1983969807624817, "learning_rate": 3.958957849736247e-05, "loss": 0.5316, "step": 8058 }, { "epoch": 1.6566964744578065, "grad_norm": 0.6342448592185974, "learning_rate": 3.9579662978524695e-05, "loss": 0.5639, "step": 8059 }, { "epoch": 1.6569020454311851, "grad_norm": 0.19162461161613464, "learning_rate": 3.9569747726721584e-05, "loss": 0.5219, "step": 8060 }, { "epoch": 1.6571076164045637, "grad_norm": 0.152262344956398, "learning_rate": 3.9559832742441625e-05, "loss": 0.4952, "step": 8061 }, { "epoch": 1.6573131873779423, "grad_norm": 0.16122353076934814, "learning_rate": 3.9549918026173265e-05, "loss": 0.548, "step": 8062 }, { "epoch": 1.657518758351321, "grad_norm": 0.19267982244491577, "learning_rate": 3.9540003578404985e-05, "loss": 0.5452, "step": 8063 }, { "epoch": 1.6577243293246995, "grad_norm": 0.17134782671928406, "learning_rate": 3.953008939962521e-05, "loss": 0.5009, "step": 8064 }, { "epoch": 1.657929900298078, "grad_norm": 0.16193920373916626, "learning_rate": 3.952017549032234e-05, "loss": 0.5392, "step": 8065 }, { "epoch": 1.6581354712714564, "grad_norm": 0.1981363743543625, "learning_rate": 3.951026185098483e-05, "loss": 0.5403, "step": 8066 }, { "epoch": 1.658341042244835, "grad_norm": 0.19924452900886536, "learning_rate": 3.950034848210107e-05, "loss": 0.5773, "step": 8067 }, { "epoch": 1.6585466132182136, "grad_norm": 0.16161105036735535, "learning_rate": 3.949043538415942e-05, "loss": 0.5061, "step": 8068 }, { "epoch": 1.658752184191592, "grad_norm": 0.16612055897712708, "learning_rate": 3.948052255764828e-05, "loss": 0.5527, "step": 8069 }, { "epoch": 1.6589577551649706, "grad_norm": 0.20378176867961884, "learning_rate": 3.947061000305599e-05, "loss": 0.5445, "step": 8070 }, { "epoch": 1.6591633261383492, "grad_norm": 0.21650046110153198, "learning_rate": 3.946069772087089e-05, "loss": 0.5334, "step": 8071 }, { "epoch": 1.6593688971117277, "grad_norm": 0.1963663250207901, "learning_rate": 3.9450785711581324e-05, "loss": 0.5466, "step": 8072 }, { "epoch": 1.6595744680851063, "grad_norm": 0.19677862524986267, "learning_rate": 3.944087397567561e-05, "loss": 0.5542, "step": 8073 }, { "epoch": 1.659780039058485, "grad_norm": 0.19894835352897644, "learning_rate": 3.943096251364205e-05, "loss": 0.5259, "step": 8074 }, { "epoch": 1.6599856100318635, "grad_norm": 0.20943677425384521, "learning_rate": 3.942105132596895e-05, "loss": 0.5323, "step": 8075 }, { "epoch": 1.660191181005242, "grad_norm": 0.20376256108283997, "learning_rate": 3.941114041314458e-05, "loss": 0.5369, "step": 8076 }, { "epoch": 1.6603967519786207, "grad_norm": 0.1930057853460312, "learning_rate": 3.9401229775657185e-05, "loss": 0.5351, "step": 8077 }, { "epoch": 1.6606023229519993, "grad_norm": 0.19255690276622772, "learning_rate": 3.939131941399504e-05, "loss": 0.534, "step": 8078 }, { "epoch": 1.6608078939253779, "grad_norm": 0.18883344531059265, "learning_rate": 3.938140932864635e-05, "loss": 0.5353, "step": 8079 }, { "epoch": 1.6610134648987565, "grad_norm": 0.20242716372013092, "learning_rate": 3.937149952009938e-05, "loss": 0.5459, "step": 8080 }, { "epoch": 1.6612190358721348, "grad_norm": 0.19481943547725677, "learning_rate": 3.9361589988842325e-05, "loss": 0.5526, "step": 8081 }, { "epoch": 1.6614246068455134, "grad_norm": 0.19463589787483215, "learning_rate": 3.935168073536337e-05, "loss": 0.5376, "step": 8082 }, { "epoch": 1.661630177818892, "grad_norm": 0.1837586909532547, "learning_rate": 3.93417717601507e-05, "loss": 0.5, "step": 8083 }, { "epoch": 1.6618357487922706, "grad_norm": 0.19010527431964874, "learning_rate": 3.9331863063692494e-05, "loss": 0.5356, "step": 8084 }, { "epoch": 1.662041319765649, "grad_norm": 0.19221745431423187, "learning_rate": 3.932195464647691e-05, "loss": 0.5369, "step": 8085 }, { "epoch": 1.6622468907390275, "grad_norm": 0.19402176141738892, "learning_rate": 3.9312046508992064e-05, "loss": 0.5403, "step": 8086 }, { "epoch": 1.6624524617124061, "grad_norm": 0.19770248234272003, "learning_rate": 3.930213865172611e-05, "loss": 0.5719, "step": 8087 }, { "epoch": 1.6626580326857847, "grad_norm": 0.16622693836688995, "learning_rate": 3.929223107516716e-05, "loss": 0.5024, "step": 8088 }, { "epoch": 1.6628636036591633, "grad_norm": 0.15783652663230896, "learning_rate": 3.92823237798033e-05, "loss": 0.5163, "step": 8089 }, { "epoch": 1.663069174632542, "grad_norm": 0.19830361008644104, "learning_rate": 3.927241676612263e-05, "loss": 0.5191, "step": 8090 }, { "epoch": 1.6632747456059205, "grad_norm": 0.20356783270835876, "learning_rate": 3.9262510034613215e-05, "loss": 0.5717, "step": 8091 }, { "epoch": 1.663480316579299, "grad_norm": 0.17174309492111206, "learning_rate": 3.92526035857631e-05, "loss": 0.5261, "step": 8092 }, { "epoch": 1.6636858875526777, "grad_norm": 0.1699124574661255, "learning_rate": 3.924269742006035e-05, "loss": 0.5411, "step": 8093 }, { "epoch": 1.6638914585260562, "grad_norm": 0.19385066628456116, "learning_rate": 3.923279153799299e-05, "loss": 0.5143, "step": 8094 }, { "epoch": 1.6640970294994348, "grad_norm": 0.1945018619298935, "learning_rate": 3.922288594004903e-05, "loss": 0.5342, "step": 8095 }, { "epoch": 1.6643026004728132, "grad_norm": 0.19037404656410217, "learning_rate": 3.921298062671649e-05, "loss": 0.5635, "step": 8096 }, { "epoch": 1.6645081714461918, "grad_norm": 0.1975833922624588, "learning_rate": 3.9203075598483335e-05, "loss": 0.5451, "step": 8097 }, { "epoch": 1.6647137424195704, "grad_norm": 0.1882157325744629, "learning_rate": 3.9193170855837564e-05, "loss": 0.5283, "step": 8098 }, { "epoch": 1.664919313392949, "grad_norm": 0.19174973666667938, "learning_rate": 3.9183266399267094e-05, "loss": 0.5513, "step": 8099 }, { "epoch": 1.6651248843663273, "grad_norm": 0.19739782810211182, "learning_rate": 3.9173362229259926e-05, "loss": 0.5301, "step": 8100 }, { "epoch": 1.665330455339706, "grad_norm": 0.16633886098861694, "learning_rate": 3.916345834630396e-05, "loss": 0.541, "step": 8101 }, { "epoch": 1.6655360263130845, "grad_norm": 0.16310401260852814, "learning_rate": 3.915355475088714e-05, "loss": 0.5662, "step": 8102 }, { "epoch": 1.665741597286463, "grad_norm": 0.18664813041687012, "learning_rate": 3.914365144349733e-05, "loss": 0.5332, "step": 8103 }, { "epoch": 1.6659471682598417, "grad_norm": 0.19100825488567352, "learning_rate": 3.913374842462244e-05, "loss": 0.5315, "step": 8104 }, { "epoch": 1.6661527392332203, "grad_norm": 0.20404808223247528, "learning_rate": 3.912384569475036e-05, "loss": 0.5542, "step": 8105 }, { "epoch": 1.6663583102065989, "grad_norm": 0.1687227189540863, "learning_rate": 3.9113943254368916e-05, "loss": 0.5423, "step": 8106 }, { "epoch": 1.6665638811799774, "grad_norm": 0.1573527455329895, "learning_rate": 3.9104041103965985e-05, "loss": 0.5323, "step": 8107 }, { "epoch": 1.666769452153356, "grad_norm": 0.16023261845111847, "learning_rate": 3.90941392440294e-05, "loss": 0.5108, "step": 8108 }, { "epoch": 1.6669750231267346, "grad_norm": 0.15852369368076324, "learning_rate": 3.9084237675046975e-05, "loss": 0.5464, "step": 8109 }, { "epoch": 1.6671805941001132, "grad_norm": 0.19316738843917847, "learning_rate": 3.90743363975065e-05, "loss": 0.5283, "step": 8110 }, { "epoch": 1.6673861650734916, "grad_norm": 0.1973247081041336, "learning_rate": 3.906443541189578e-05, "loss": 0.5398, "step": 8111 }, { "epoch": 1.6675917360468702, "grad_norm": 0.1693935990333557, "learning_rate": 3.905453471870259e-05, "loss": 0.509, "step": 8112 }, { "epoch": 1.6677973070202488, "grad_norm": 0.1599174290895462, "learning_rate": 3.9044634318414656e-05, "loss": 0.5468, "step": 8113 }, { "epoch": 1.6680028779936273, "grad_norm": 0.18429811298847198, "learning_rate": 3.903473421151978e-05, "loss": 0.5272, "step": 8114 }, { "epoch": 1.6682084489670057, "grad_norm": 0.19133618474006653, "learning_rate": 3.902483439850566e-05, "loss": 0.538, "step": 8115 }, { "epoch": 1.6684140199403843, "grad_norm": 0.19194607436656952, "learning_rate": 3.901493487986002e-05, "loss": 0.5341, "step": 8116 }, { "epoch": 1.6686195909137629, "grad_norm": 0.16348059475421906, "learning_rate": 3.900503565607057e-05, "loss": 0.5021, "step": 8117 }, { "epoch": 1.6688251618871415, "grad_norm": 0.16237923502922058, "learning_rate": 3.899513672762499e-05, "loss": 0.5647, "step": 8118 }, { "epoch": 1.66903073286052, "grad_norm": 0.19955293834209442, "learning_rate": 3.8985238095010965e-05, "loss": 0.5687, "step": 8119 }, { "epoch": 1.6692363038338986, "grad_norm": 0.16089332103729248, "learning_rate": 3.897533975871612e-05, "loss": 0.51, "step": 8120 }, { "epoch": 1.6694418748072772, "grad_norm": 0.161229208111763, "learning_rate": 3.896544171922815e-05, "loss": 0.5291, "step": 8121 }, { "epoch": 1.6696474457806558, "grad_norm": 0.19278062880039215, "learning_rate": 3.895554397703466e-05, "loss": 0.5307, "step": 8122 }, { "epoch": 1.6698530167540344, "grad_norm": 0.16215354204177856, "learning_rate": 3.8945646532623256e-05, "loss": 0.5, "step": 8123 }, { "epoch": 1.670058587727413, "grad_norm": 0.16377978026866913, "learning_rate": 3.893574938648156e-05, "loss": 0.5362, "step": 8124 }, { "epoch": 1.6702641587007916, "grad_norm": 0.19552935659885406, "learning_rate": 3.892585253909714e-05, "loss": 0.5255, "step": 8125 }, { "epoch": 1.67046972967417, "grad_norm": 0.164475217461586, "learning_rate": 3.8915955990957575e-05, "loss": 0.4989, "step": 8126 }, { "epoch": 1.6706753006475485, "grad_norm": 0.15810781717300415, "learning_rate": 3.890605974255042e-05, "loss": 0.5276, "step": 8127 }, { "epoch": 1.6708808716209271, "grad_norm": 0.1982525885105133, "learning_rate": 3.889616379436321e-05, "loss": 0.5349, "step": 8128 }, { "epoch": 1.6710864425943057, "grad_norm": 0.15992006659507751, "learning_rate": 3.88862681468835e-05, "loss": 0.5116, "step": 8129 }, { "epoch": 1.671292013567684, "grad_norm": 0.15967024862766266, "learning_rate": 3.887637280059878e-05, "loss": 0.5657, "step": 8130 }, { "epoch": 1.6714975845410627, "grad_norm": 0.1937428116798401, "learning_rate": 3.886647775599655e-05, "loss": 0.5581, "step": 8131 }, { "epoch": 1.6717031555144413, "grad_norm": 0.18604367971420288, "learning_rate": 3.885658301356429e-05, "loss": 0.5246, "step": 8132 }, { "epoch": 1.6719087264878199, "grad_norm": 0.18401312828063965, "learning_rate": 3.884668857378947e-05, "loss": 0.5059, "step": 8133 }, { "epoch": 1.6721142974611984, "grad_norm": 0.20079663395881653, "learning_rate": 3.883679443715953e-05, "loss": 0.539, "step": 8134 }, { "epoch": 1.672319868434577, "grad_norm": 0.18646441400051117, "learning_rate": 3.882690060416194e-05, "loss": 0.548, "step": 8135 }, { "epoch": 1.6725254394079556, "grad_norm": 0.19753128290176392, "learning_rate": 3.88170070752841e-05, "loss": 0.5599, "step": 8136 }, { "epoch": 1.6727310103813342, "grad_norm": 0.19681565463542938, "learning_rate": 3.8807113851013425e-05, "loss": 0.543, "step": 8137 }, { "epoch": 1.6729365813547128, "grad_norm": 0.16268804669380188, "learning_rate": 3.879722093183729e-05, "loss": 0.5131, "step": 8138 }, { "epoch": 1.6731421523280914, "grad_norm": 0.16209396719932556, "learning_rate": 3.87873283182431e-05, "loss": 0.5467, "step": 8139 }, { "epoch": 1.67334772330147, "grad_norm": 0.19812874495983124, "learning_rate": 3.877743601071821e-05, "loss": 0.5457, "step": 8140 }, { "epoch": 1.6735532942748486, "grad_norm": 0.15637758374214172, "learning_rate": 3.8767544009749944e-05, "loss": 0.5099, "step": 8141 }, { "epoch": 1.673758865248227, "grad_norm": 0.15744930505752563, "learning_rate": 3.875765231582568e-05, "loss": 0.5549, "step": 8142 }, { "epoch": 1.6739644362216055, "grad_norm": 0.19686995446681976, "learning_rate": 3.874776092943269e-05, "loss": 0.5183, "step": 8143 }, { "epoch": 1.674170007194984, "grad_norm": 0.1597413569688797, "learning_rate": 3.8737869851058315e-05, "loss": 0.5043, "step": 8144 }, { "epoch": 1.6743755781683625, "grad_norm": 0.1251799464225769, "learning_rate": 3.872797908118982e-05, "loss": 0.5108, "step": 8145 }, { "epoch": 1.674581149141741, "grad_norm": 0.16012680530548096, "learning_rate": 3.8718088620314474e-05, "loss": 0.5168, "step": 8146 }, { "epoch": 1.6747867201151196, "grad_norm": 0.19369451701641083, "learning_rate": 3.870819846891953e-05, "loss": 0.5266, "step": 8147 }, { "epoch": 1.6749922910884982, "grad_norm": 0.19420257210731506, "learning_rate": 3.869830862749224e-05, "loss": 0.5269, "step": 8148 }, { "epoch": 1.6751978620618768, "grad_norm": 0.16983704268932343, "learning_rate": 3.8688419096519844e-05, "loss": 0.5027, "step": 8149 }, { "epoch": 1.6754034330352554, "grad_norm": 0.16026097536087036, "learning_rate": 3.8678529876489526e-05, "loss": 0.5368, "step": 8150 }, { "epoch": 1.675609004008634, "grad_norm": 0.1565508395433426, "learning_rate": 3.86686409678885e-05, "loss": 0.5045, "step": 8151 }, { "epoch": 1.6758145749820126, "grad_norm": 0.15121006965637207, "learning_rate": 3.865875237120395e-05, "loss": 0.5409, "step": 8152 }, { "epoch": 1.6760201459553912, "grad_norm": 0.19622927904129028, "learning_rate": 3.864886408692303e-05, "loss": 0.5297, "step": 8153 }, { "epoch": 1.6762257169287698, "grad_norm": 0.20082417130470276, "learning_rate": 3.863897611553289e-05, "loss": 0.5516, "step": 8154 }, { "epoch": 1.6764312879021483, "grad_norm": 0.19279861450195312, "learning_rate": 3.8629088457520645e-05, "loss": 0.5286, "step": 8155 }, { "epoch": 1.676636858875527, "grad_norm": 0.18971529603004456, "learning_rate": 3.861920111337345e-05, "loss": 0.5381, "step": 8156 }, { "epoch": 1.6768424298489053, "grad_norm": 0.18667519092559814, "learning_rate": 3.8609314083578396e-05, "loss": 0.529, "step": 8157 }, { "epoch": 1.677048000822284, "grad_norm": 0.18965506553649902, "learning_rate": 3.859942736862257e-05, "loss": 0.5504, "step": 8158 }, { "epoch": 1.6772535717956625, "grad_norm": 0.1879250854253769, "learning_rate": 3.858954096899303e-05, "loss": 0.521, "step": 8159 }, { "epoch": 1.6774591427690408, "grad_norm": 0.16116970777511597, "learning_rate": 3.8579654885176854e-05, "loss": 0.5171, "step": 8160 }, { "epoch": 1.6776647137424194, "grad_norm": 0.16163001954555511, "learning_rate": 3.856976911766107e-05, "loss": 0.5526, "step": 8161 }, { "epoch": 1.677870284715798, "grad_norm": 0.19858844578266144, "learning_rate": 3.855988366693269e-05, "loss": 0.5105, "step": 8162 }, { "epoch": 1.6780758556891766, "grad_norm": 0.19145843386650085, "learning_rate": 3.854999853347876e-05, "loss": 0.5701, "step": 8163 }, { "epoch": 1.6782814266625552, "grad_norm": 0.19304659962654114, "learning_rate": 3.854011371778625e-05, "loss": 0.5276, "step": 8164 }, { "epoch": 1.6784869976359338, "grad_norm": 0.19083738327026367, "learning_rate": 3.853022922034215e-05, "loss": 0.5204, "step": 8165 }, { "epoch": 1.6786925686093124, "grad_norm": 0.18819309771060944, "learning_rate": 3.852034504163341e-05, "loss": 0.5283, "step": 8166 }, { "epoch": 1.678898139582691, "grad_norm": 0.19191038608551025, "learning_rate": 3.851046118214699e-05, "loss": 0.5261, "step": 8167 }, { "epoch": 1.6791037105560696, "grad_norm": 0.19225665926933289, "learning_rate": 3.850057764236981e-05, "loss": 0.5282, "step": 8168 }, { "epoch": 1.6793092815294481, "grad_norm": 0.19503363966941833, "learning_rate": 3.849069442278878e-05, "loss": 0.5355, "step": 8169 }, { "epoch": 1.6795148525028267, "grad_norm": 0.19625093042850494, "learning_rate": 3.848081152389083e-05, "loss": 0.5557, "step": 8170 }, { "epoch": 1.6797204234762053, "grad_norm": 0.19365637004375458, "learning_rate": 3.8470928946162813e-05, "loss": 0.5369, "step": 8171 }, { "epoch": 1.6799259944495837, "grad_norm": 0.19885706901550293, "learning_rate": 3.8461046690091616e-05, "loss": 0.5276, "step": 8172 }, { "epoch": 1.6801315654229623, "grad_norm": 0.19316908717155457, "learning_rate": 3.845116475616409e-05, "loss": 0.5332, "step": 8173 }, { "epoch": 1.6803371363963409, "grad_norm": 0.1912158727645874, "learning_rate": 3.844128314486706e-05, "loss": 0.542, "step": 8174 }, { "epoch": 1.6805427073697194, "grad_norm": 0.1975051760673523, "learning_rate": 3.843140185668737e-05, "loss": 0.5467, "step": 8175 }, { "epoch": 1.6807482783430978, "grad_norm": 0.16475236415863037, "learning_rate": 3.8421520892111776e-05, "loss": 0.5106, "step": 8176 }, { "epoch": 1.6809538493164764, "grad_norm": 0.16820210218429565, "learning_rate": 3.841164025162713e-05, "loss": 0.5522, "step": 8177 }, { "epoch": 1.681159420289855, "grad_norm": 0.19619794189929962, "learning_rate": 3.840175993572016e-05, "loss": 0.5367, "step": 8178 }, { "epoch": 1.6813649912632336, "grad_norm": 0.19805863499641418, "learning_rate": 3.839187994487765e-05, "loss": 0.5383, "step": 8179 }, { "epoch": 1.6815705622366122, "grad_norm": 0.18975287675857544, "learning_rate": 3.838200027958632e-05, "loss": 0.5476, "step": 8180 }, { "epoch": 1.6817761332099908, "grad_norm": 0.18960921466350555, "learning_rate": 3.837212094033291e-05, "loss": 0.5452, "step": 8181 }, { "epoch": 1.6819817041833693, "grad_norm": 0.1594635397195816, "learning_rate": 3.8362241927604106e-05, "loss": 0.5045, "step": 8182 }, { "epoch": 1.682187275156748, "grad_norm": 0.1598910242319107, "learning_rate": 3.835236324188662e-05, "loss": 0.5456, "step": 8183 }, { "epoch": 1.6823928461301265, "grad_norm": 0.19848540425300598, "learning_rate": 3.834248488366714e-05, "loss": 0.5193, "step": 8184 }, { "epoch": 1.682598417103505, "grad_norm": 0.2017425149679184, "learning_rate": 3.833260685343231e-05, "loss": 0.5427, "step": 8185 }, { "epoch": 1.6828039880768837, "grad_norm": 0.19509434700012207, "learning_rate": 3.832272915166878e-05, "loss": 0.5208, "step": 8186 }, { "epoch": 1.683009559050262, "grad_norm": 0.19122706353664398, "learning_rate": 3.8312851778863176e-05, "loss": 0.5213, "step": 8187 }, { "epoch": 1.6832151300236406, "grad_norm": 0.18763068318367004, "learning_rate": 3.8302974735502104e-05, "loss": 0.5363, "step": 8188 }, { "epoch": 1.6834207009970192, "grad_norm": 0.2000308781862259, "learning_rate": 3.829309802207215e-05, "loss": 0.5397, "step": 8189 }, { "epoch": 1.6836262719703978, "grad_norm": 0.19013464450836182, "learning_rate": 3.828322163905993e-05, "loss": 0.5073, "step": 8190 }, { "epoch": 1.6838318429437762, "grad_norm": 0.19034752249717712, "learning_rate": 3.827334558695198e-05, "loss": 0.5318, "step": 8191 }, { "epoch": 1.6840374139171548, "grad_norm": 0.16001807153224945, "learning_rate": 3.8263469866234844e-05, "loss": 0.4987, "step": 8192 }, { "epoch": 1.6842429848905334, "grad_norm": 0.15920346975326538, "learning_rate": 3.825359447739507e-05, "loss": 0.5404, "step": 8193 }, { "epoch": 1.684448555863912, "grad_norm": 0.19532343745231628, "learning_rate": 3.8243719420919165e-05, "loss": 0.5134, "step": 8194 }, { "epoch": 1.6846541268372905, "grad_norm": 0.19484242796897888, "learning_rate": 3.823384469729363e-05, "loss": 0.5334, "step": 8195 }, { "epoch": 1.6848596978106691, "grad_norm": 0.20333658158779144, "learning_rate": 3.822397030700491e-05, "loss": 0.5491, "step": 8196 }, { "epoch": 1.6850652687840477, "grad_norm": 0.20554953813552856, "learning_rate": 3.821409625053953e-05, "loss": 0.5479, "step": 8197 }, { "epoch": 1.6852708397574263, "grad_norm": 0.19656214118003845, "learning_rate": 3.820422252838391e-05, "loss": 0.5334, "step": 8198 }, { "epoch": 1.685476410730805, "grad_norm": 0.19906407594680786, "learning_rate": 3.819434914102448e-05, "loss": 0.5302, "step": 8199 }, { "epoch": 1.6856819817041835, "grad_norm": 0.16761255264282227, "learning_rate": 3.818447608894767e-05, "loss": 0.5145, "step": 8200 }, { "epoch": 1.685887552677562, "grad_norm": 0.16284339129924774, "learning_rate": 3.8174603372639846e-05, "loss": 0.5399, "step": 8201 }, { "epoch": 1.6860931236509404, "grad_norm": 0.19720837473869324, "learning_rate": 3.816473099258742e-05, "loss": 0.5452, "step": 8202 }, { "epoch": 1.686298694624319, "grad_norm": 0.19352254271507263, "learning_rate": 3.8154858949276744e-05, "loss": 0.5399, "step": 8203 }, { "epoch": 1.6865042655976976, "grad_norm": 0.16425921022891998, "learning_rate": 3.814498724319418e-05, "loss": 0.5016, "step": 8204 }, { "epoch": 1.6867098365710762, "grad_norm": 0.15797263383865356, "learning_rate": 3.813511587482606e-05, "loss": 0.5325, "step": 8205 }, { "epoch": 1.6869154075444546, "grad_norm": 0.16672199964523315, "learning_rate": 3.812524484465869e-05, "loss": 0.4982, "step": 8206 }, { "epoch": 1.6871209785178332, "grad_norm": 0.2091359794139862, "learning_rate": 3.811537415317837e-05, "loss": 0.5206, "step": 8207 }, { "epoch": 1.6873265494912117, "grad_norm": 0.19015903770923615, "learning_rate": 3.81055038008714e-05, "loss": 0.5234, "step": 8208 }, { "epoch": 1.6875321204645903, "grad_norm": 0.20703433454036713, "learning_rate": 3.8095633788224024e-05, "loss": 0.5743, "step": 8209 }, { "epoch": 1.687737691437969, "grad_norm": 0.19770927727222443, "learning_rate": 3.8085764115722484e-05, "loss": 0.5482, "step": 8210 }, { "epoch": 1.6879432624113475, "grad_norm": 0.15969951450824738, "learning_rate": 3.8075894783853054e-05, "loss": 0.4893, "step": 8211 }, { "epoch": 1.688148833384726, "grad_norm": 0.15302079916000366, "learning_rate": 3.806602579310191e-05, "loss": 0.5153, "step": 8212 }, { "epoch": 1.6883544043581047, "grad_norm": 0.19498853385448456, "learning_rate": 3.805615714395527e-05, "loss": 0.546, "step": 8213 }, { "epoch": 1.6885599753314833, "grad_norm": 0.1922113597393036, "learning_rate": 3.804628883689931e-05, "loss": 0.5351, "step": 8214 }, { "epoch": 1.6887655463048619, "grad_norm": 0.19428758323192596, "learning_rate": 3.803642087242021e-05, "loss": 0.5452, "step": 8215 }, { "epoch": 1.6889711172782405, "grad_norm": 0.19760240614414215, "learning_rate": 3.8026553251004096e-05, "loss": 0.5356, "step": 8216 }, { "epoch": 1.6891766882516188, "grad_norm": 0.16687412559986115, "learning_rate": 3.8016685973137095e-05, "loss": 0.5093, "step": 8217 }, { "epoch": 1.6893822592249974, "grad_norm": 0.1642359495162964, "learning_rate": 3.800681903930535e-05, "loss": 0.5485, "step": 8218 }, { "epoch": 1.689587830198376, "grad_norm": 0.1901901662349701, "learning_rate": 3.799695244999495e-05, "loss": 0.5102, "step": 8219 }, { "epoch": 1.6897934011717546, "grad_norm": 0.19654683768749237, "learning_rate": 3.798708620569197e-05, "loss": 0.5338, "step": 8220 }, { "epoch": 1.689998972145133, "grad_norm": 0.1945556253194809, "learning_rate": 3.797722030688248e-05, "loss": 0.5369, "step": 8221 }, { "epoch": 1.6902045431185115, "grad_norm": 0.19918568432331085, "learning_rate": 3.7967354754052514e-05, "loss": 0.5409, "step": 8222 }, { "epoch": 1.6904101140918901, "grad_norm": 0.19335860013961792, "learning_rate": 3.7957489547688096e-05, "loss": 0.5451, "step": 8223 }, { "epoch": 1.6906156850652687, "grad_norm": 0.19655676186084747, "learning_rate": 3.794762468827526e-05, "loss": 0.5484, "step": 8224 }, { "epoch": 1.6908212560386473, "grad_norm": 0.20534905791282654, "learning_rate": 3.79377601763e-05, "loss": 0.5321, "step": 8225 }, { "epoch": 1.691026827012026, "grad_norm": 0.16838058829307556, "learning_rate": 3.7927896012248275e-05, "loss": 0.4978, "step": 8226 }, { "epoch": 1.6912323979854045, "grad_norm": 0.16190923750400543, "learning_rate": 3.7918032196606064e-05, "loss": 0.5627, "step": 8227 }, { "epoch": 1.691437968958783, "grad_norm": 0.197221040725708, "learning_rate": 3.790816872985931e-05, "loss": 0.5287, "step": 8228 }, { "epoch": 1.6916435399321617, "grad_norm": 0.19407358765602112, "learning_rate": 3.789830561249394e-05, "loss": 0.5409, "step": 8229 }, { "epoch": 1.6918491109055402, "grad_norm": 0.19359079003334045, "learning_rate": 3.7888442844995856e-05, "loss": 0.5378, "step": 8230 }, { "epoch": 1.6920546818789188, "grad_norm": 0.21710537374019623, "learning_rate": 3.7878580427850937e-05, "loss": 0.5545, "step": 8231 }, { "epoch": 1.6922602528522974, "grad_norm": 0.19026683270931244, "learning_rate": 3.786871836154509e-05, "loss": 0.549, "step": 8232 }, { "epoch": 1.6924658238256758, "grad_norm": 0.19044183194637299, "learning_rate": 3.785885664656415e-05, "loss": 0.5286, "step": 8233 }, { "epoch": 1.6926713947990544, "grad_norm": 0.20085959136486053, "learning_rate": 3.7848995283393984e-05, "loss": 0.5414, "step": 8234 }, { "epoch": 1.692876965772433, "grad_norm": 0.16187427937984467, "learning_rate": 3.783913427252038e-05, "loss": 0.5116, "step": 8235 }, { "epoch": 1.6930825367458113, "grad_norm": 0.16329748928546906, "learning_rate": 3.782927361442916e-05, "loss": 0.5387, "step": 8236 }, { "epoch": 1.69328810771919, "grad_norm": 0.1952928751707077, "learning_rate": 3.781941330960612e-05, "loss": 0.569, "step": 8237 }, { "epoch": 1.6934936786925685, "grad_norm": 0.19381776452064514, "learning_rate": 3.780955335853701e-05, "loss": 0.5336, "step": 8238 }, { "epoch": 1.693699249665947, "grad_norm": 0.2035483717918396, "learning_rate": 3.779969376170761e-05, "loss": 0.5385, "step": 8239 }, { "epoch": 1.6939048206393257, "grad_norm": 0.19148887693881989, "learning_rate": 3.778983451960365e-05, "loss": 0.5156, "step": 8240 }, { "epoch": 1.6941103916127043, "grad_norm": 0.17306075990200043, "learning_rate": 3.7779975632710836e-05, "loss": 0.5245, "step": 8241 }, { "epoch": 1.6943159625860829, "grad_norm": 0.12030526250600815, "learning_rate": 3.7770117101514885e-05, "loss": 0.5117, "step": 8242 }, { "epoch": 1.6945215335594614, "grad_norm": 0.15814997255802155, "learning_rate": 3.776025892650147e-05, "loss": 0.55, "step": 8243 }, { "epoch": 1.69472710453284, "grad_norm": 0.18655110895633698, "learning_rate": 3.775040110815624e-05, "loss": 0.4924, "step": 8244 }, { "epoch": 1.6949326755062186, "grad_norm": 0.19482672214508057, "learning_rate": 3.7740543646964876e-05, "loss": 0.5431, "step": 8245 }, { "epoch": 1.6951382464795972, "grad_norm": 0.1627287119626999, "learning_rate": 3.7730686543412994e-05, "loss": 0.5191, "step": 8246 }, { "epoch": 1.6953438174529758, "grad_norm": 0.17188504338264465, "learning_rate": 3.772082979798621e-05, "loss": 0.5403, "step": 8247 }, { "epoch": 1.6955493884263542, "grad_norm": 0.21016332507133484, "learning_rate": 3.7710973411170126e-05, "loss": 0.5456, "step": 8248 }, { "epoch": 1.6957549593997328, "grad_norm": 0.1925675868988037, "learning_rate": 3.770111738345031e-05, "loss": 0.5214, "step": 8249 }, { "epoch": 1.6959605303731113, "grad_norm": 0.19163696467876434, "learning_rate": 3.769126171531232e-05, "loss": 0.5354, "step": 8250 }, { "epoch": 1.69616610134649, "grad_norm": 0.16222819685935974, "learning_rate": 3.7681406407241716e-05, "loss": 0.5241, "step": 8251 }, { "epoch": 1.6963716723198683, "grad_norm": 0.16099952161312103, "learning_rate": 3.767155145972399e-05, "loss": 0.5321, "step": 8252 }, { "epoch": 1.6965772432932469, "grad_norm": 0.1959654986858368, "learning_rate": 3.766169687324468e-05, "loss": 0.54, "step": 8253 }, { "epoch": 1.6967828142666255, "grad_norm": 0.19316841661930084, "learning_rate": 3.7651842648289276e-05, "loss": 0.5356, "step": 8254 }, { "epoch": 1.696988385240004, "grad_norm": 0.202810600399971, "learning_rate": 3.7641988785343236e-05, "loss": 0.5506, "step": 8255 }, { "epoch": 1.6971939562133826, "grad_norm": 0.1944981962442398, "learning_rate": 3.763213528489201e-05, "loss": 0.5019, "step": 8256 }, { "epoch": 1.6973995271867612, "grad_norm": 0.16003461182117462, "learning_rate": 3.762228214742105e-05, "loss": 0.504, "step": 8257 }, { "epoch": 1.6976050981601398, "grad_norm": 0.15627720952033997, "learning_rate": 3.7612429373415754e-05, "loss": 0.5165, "step": 8258 }, { "epoch": 1.6978106691335184, "grad_norm": 0.19209109246730804, "learning_rate": 3.760257696336154e-05, "loss": 0.5114, "step": 8259 }, { "epoch": 1.698016240106897, "grad_norm": 0.16496042907238007, "learning_rate": 3.759272491774378e-05, "loss": 0.5113, "step": 8260 }, { "epoch": 1.6982218110802756, "grad_norm": 0.16691668331623077, "learning_rate": 3.758287323704785e-05, "loss": 0.5469, "step": 8261 }, { "epoch": 1.6984273820536542, "grad_norm": 0.20020011067390442, "learning_rate": 3.757302192175909e-05, "loss": 0.5397, "step": 8262 }, { "epoch": 1.6986329530270325, "grad_norm": 0.19349105656147003, "learning_rate": 3.756317097236282e-05, "loss": 0.5422, "step": 8263 }, { "epoch": 1.6988385240004111, "grad_norm": 0.16651464998722076, "learning_rate": 3.755332038934436e-05, "loss": 0.4836, "step": 8264 }, { "epoch": 1.6990440949737897, "grad_norm": 0.12502692639827728, "learning_rate": 3.754347017318897e-05, "loss": 0.5132, "step": 8265 }, { "epoch": 1.6992496659471683, "grad_norm": 0.12334790080785751, "learning_rate": 3.7533620324381984e-05, "loss": 0.5108, "step": 8266 }, { "epoch": 1.6994552369205467, "grad_norm": 0.12631775438785553, "learning_rate": 3.752377084340863e-05, "loss": 0.5167, "step": 8267 }, { "epoch": 1.6996608078939253, "grad_norm": 0.16306687891483307, "learning_rate": 3.7513921730754125e-05, "loss": 0.5369, "step": 8268 }, { "epoch": 1.6998663788673039, "grad_norm": 0.19654233753681183, "learning_rate": 3.750407298690372e-05, "loss": 0.52, "step": 8269 }, { "epoch": 1.7000719498406824, "grad_norm": 0.1925351619720459, "learning_rate": 3.74942246123426e-05, "loss": 0.5356, "step": 8270 }, { "epoch": 1.700277520814061, "grad_norm": 0.165648952126503, "learning_rate": 3.7484376607555954e-05, "loss": 0.5244, "step": 8271 }, { "epoch": 1.7004830917874396, "grad_norm": 0.1643042266368866, "learning_rate": 3.747452897302892e-05, "loss": 0.5356, "step": 8272 }, { "epoch": 1.7006886627608182, "grad_norm": 0.1931808739900589, "learning_rate": 3.7464681709246696e-05, "loss": 0.5371, "step": 8273 }, { "epoch": 1.7008942337341968, "grad_norm": 0.19738541543483734, "learning_rate": 3.745483481669438e-05, "loss": 0.5506, "step": 8274 }, { "epoch": 1.7010998047075754, "grad_norm": 0.1938776969909668, "learning_rate": 3.744498829585709e-05, "loss": 0.5548, "step": 8275 }, { "epoch": 1.701305375680954, "grad_norm": 0.192849263548851, "learning_rate": 3.743514214721991e-05, "loss": 0.5506, "step": 8276 }, { "epoch": 1.7015109466543326, "grad_norm": 0.19882553815841675, "learning_rate": 3.742529637126791e-05, "loss": 0.5341, "step": 8277 }, { "epoch": 1.701716517627711, "grad_norm": 0.1962941288948059, "learning_rate": 3.741545096848617e-05, "loss": 0.5582, "step": 8278 }, { "epoch": 1.7019220886010895, "grad_norm": 0.1944989264011383, "learning_rate": 3.7405605939359694e-05, "loss": 0.5621, "step": 8279 }, { "epoch": 1.702127659574468, "grad_norm": 0.19153885543346405, "learning_rate": 3.7395761284373516e-05, "loss": 0.5256, "step": 8280 }, { "epoch": 1.7023332305478467, "grad_norm": 0.1736639142036438, "learning_rate": 3.738591700401265e-05, "loss": 0.5124, "step": 8281 }, { "epoch": 1.702538801521225, "grad_norm": 0.160393625497818, "learning_rate": 3.7376073098762065e-05, "loss": 0.5521, "step": 8282 }, { "epoch": 1.7027443724946036, "grad_norm": 0.17553849518299103, "learning_rate": 3.736622956910673e-05, "loss": 0.5196, "step": 8283 }, { "epoch": 1.7029499434679822, "grad_norm": 0.16473659873008728, "learning_rate": 3.735638641553157e-05, "loss": 0.5314, "step": 8284 }, { "epoch": 1.7031555144413608, "grad_norm": 0.15864580869674683, "learning_rate": 3.734654363852153e-05, "loss": 0.4975, "step": 8285 }, { "epoch": 1.7033610854147394, "grad_norm": 0.15445110201835632, "learning_rate": 3.7336701238561504e-05, "loss": 0.5165, "step": 8286 }, { "epoch": 1.703566656388118, "grad_norm": 0.19712099432945251, "learning_rate": 3.73268592161364e-05, "loss": 0.5323, "step": 8287 }, { "epoch": 1.7037722273614966, "grad_norm": 0.9865581393241882, "learning_rate": 3.731701757173108e-05, "loss": 0.5455, "step": 8288 }, { "epoch": 1.7039777983348752, "grad_norm": 0.1986110508441925, "learning_rate": 3.730717630583038e-05, "loss": 0.5355, "step": 8289 }, { "epoch": 1.7041833693082538, "grad_norm": 0.1951410174369812, "learning_rate": 3.729733541891917e-05, "loss": 0.5557, "step": 8290 }, { "epoch": 1.7043889402816323, "grad_norm": 0.19467894732952118, "learning_rate": 3.728749491148223e-05, "loss": 0.523, "step": 8291 }, { "epoch": 1.704594511255011, "grad_norm": 0.2036120444536209, "learning_rate": 3.727765478400437e-05, "loss": 0.5361, "step": 8292 }, { "epoch": 1.7048000822283893, "grad_norm": 0.17244306206703186, "learning_rate": 3.726781503697034e-05, "loss": 0.5073, "step": 8293 }, { "epoch": 1.705005653201768, "grad_norm": 0.18532809615135193, "learning_rate": 3.7257975670864954e-05, "loss": 0.5654, "step": 8294 }, { "epoch": 1.7052112241751465, "grad_norm": 0.1989675760269165, "learning_rate": 3.724813668617292e-05, "loss": 0.5094, "step": 8295 }, { "epoch": 1.705416795148525, "grad_norm": 0.15224121510982513, "learning_rate": 3.723829808337895e-05, "loss": 0.5202, "step": 8296 }, { "epoch": 1.7056223661219034, "grad_norm": 0.1623305380344391, "learning_rate": 3.722845986296776e-05, "loss": 0.5323, "step": 8297 }, { "epoch": 1.705827937095282, "grad_norm": 0.20320528745651245, "learning_rate": 3.721862202542403e-05, "loss": 0.5185, "step": 8298 }, { "epoch": 1.7060335080686606, "grad_norm": 0.17598193883895874, "learning_rate": 3.7208784571232404e-05, "loss": 0.5198, "step": 8299 }, { "epoch": 1.7062390790420392, "grad_norm": 0.1603892594575882, "learning_rate": 3.7198947500877554e-05, "loss": 0.5437, "step": 8300 }, { "epoch": 1.7064446500154178, "grad_norm": 0.16339556872844696, "learning_rate": 3.71891108148441e-05, "loss": 0.507, "step": 8301 }, { "epoch": 1.7066502209887964, "grad_norm": 0.12459365278482437, "learning_rate": 3.717927451361665e-05, "loss": 0.5091, "step": 8302 }, { "epoch": 1.706855791962175, "grad_norm": 0.15585310757160187, "learning_rate": 3.7169438597679804e-05, "loss": 0.5252, "step": 8303 }, { "epoch": 1.7070613629355536, "grad_norm": 0.19386261701583862, "learning_rate": 3.7159603067518105e-05, "loss": 0.552, "step": 8304 }, { "epoch": 1.7072669339089321, "grad_norm": 0.19203509390354156, "learning_rate": 3.714976792361612e-05, "loss": 0.5265, "step": 8305 }, { "epoch": 1.7074725048823107, "grad_norm": 0.1666734516620636, "learning_rate": 3.713993316645839e-05, "loss": 0.5117, "step": 8306 }, { "epoch": 1.7076780758556893, "grad_norm": 0.1642848700284958, "learning_rate": 3.713009879652938e-05, "loss": 0.5366, "step": 8307 }, { "epoch": 1.7078836468290677, "grad_norm": 0.19008807837963104, "learning_rate": 3.712026481431364e-05, "loss": 0.5266, "step": 8308 }, { "epoch": 1.7080892178024463, "grad_norm": 0.19728736579418182, "learning_rate": 3.711043122029563e-05, "loss": 0.5425, "step": 8309 }, { "epoch": 1.7082947887758249, "grad_norm": 0.1897844821214676, "learning_rate": 3.71005980149598e-05, "loss": 0.5437, "step": 8310 }, { "epoch": 1.7085003597492034, "grad_norm": 0.19176128506660461, "learning_rate": 3.709076519879057e-05, "loss": 0.5434, "step": 8311 }, { "epoch": 1.7087059307225818, "grad_norm": 0.1628829389810562, "learning_rate": 3.7080932772272376e-05, "loss": 0.503, "step": 8312 }, { "epoch": 1.7089115016959604, "grad_norm": 0.16427487134933472, "learning_rate": 3.707110073588962e-05, "loss": 0.5303, "step": 8313 }, { "epoch": 1.709117072669339, "grad_norm": 0.19906549155712128, "learning_rate": 3.706126909012664e-05, "loss": 0.5186, "step": 8314 }, { "epoch": 1.7093226436427176, "grad_norm": 0.19773396849632263, "learning_rate": 3.7051437835467854e-05, "loss": 0.544, "step": 8315 }, { "epoch": 1.7095282146160962, "grad_norm": 0.19623300433158875, "learning_rate": 3.7041606972397575e-05, "loss": 0.5391, "step": 8316 }, { "epoch": 1.7097337855894748, "grad_norm": 0.1944045126438141, "learning_rate": 3.703177650140011e-05, "loss": 0.5314, "step": 8317 }, { "epoch": 1.7099393565628533, "grad_norm": 0.187159925699234, "learning_rate": 3.702194642295979e-05, "loss": 0.5218, "step": 8318 }, { "epoch": 1.710144927536232, "grad_norm": 0.19343869388103485, "learning_rate": 3.701211673756087e-05, "loss": 0.5396, "step": 8319 }, { "epoch": 1.7103504985096105, "grad_norm": 0.1905796229839325, "learning_rate": 3.700228744568762e-05, "loss": 0.5374, "step": 8320 }, { "epoch": 1.710556069482989, "grad_norm": 0.19070343673229218, "learning_rate": 3.6992458547824285e-05, "loss": 0.5484, "step": 8321 }, { "epoch": 1.7107616404563677, "grad_norm": 0.1975802779197693, "learning_rate": 3.69826300444551e-05, "loss": 0.5266, "step": 8322 }, { "epoch": 1.7109672114297463, "grad_norm": 0.18827404081821442, "learning_rate": 3.6972801936064244e-05, "loss": 0.5176, "step": 8323 }, { "epoch": 1.7111727824031246, "grad_norm": 0.21654710173606873, "learning_rate": 3.6962974223135936e-05, "loss": 0.5777, "step": 8324 }, { "epoch": 1.7113783533765032, "grad_norm": 0.19230619072914124, "learning_rate": 3.695314690615432e-05, "loss": 0.5154, "step": 8325 }, { "epoch": 1.7115839243498818, "grad_norm": 0.16557452082633972, "learning_rate": 3.694331998560354e-05, "loss": 0.5188, "step": 8326 }, { "epoch": 1.7117894953232602, "grad_norm": 0.15893855690956116, "learning_rate": 3.693349346196773e-05, "loss": 0.5331, "step": 8327 }, { "epoch": 1.7119950662966388, "grad_norm": 0.19809909164905548, "learning_rate": 3.692366733573098e-05, "loss": 0.5245, "step": 8328 }, { "epoch": 1.7122006372700174, "grad_norm": 0.19923657178878784, "learning_rate": 3.691384160737741e-05, "loss": 0.5283, "step": 8329 }, { "epoch": 1.712406208243396, "grad_norm": 0.2039818912744522, "learning_rate": 3.690401627739107e-05, "loss": 0.5497, "step": 8330 }, { "epoch": 1.7126117792167745, "grad_norm": 0.194504514336586, "learning_rate": 3.6894191346255995e-05, "loss": 0.5249, "step": 8331 }, { "epoch": 1.7128173501901531, "grad_norm": 0.535821795463562, "learning_rate": 3.688436681445623e-05, "loss": 0.5977, "step": 8332 }, { "epoch": 1.7130229211635317, "grad_norm": 0.19687367975711823, "learning_rate": 3.687454268247578e-05, "loss": 0.5581, "step": 8333 }, { "epoch": 1.7132284921369103, "grad_norm": 0.1983097642660141, "learning_rate": 3.686471895079863e-05, "loss": 0.5311, "step": 8334 }, { "epoch": 1.713434063110289, "grad_norm": 0.19524888694286346, "learning_rate": 3.685489561990875e-05, "loss": 0.558, "step": 8335 }, { "epoch": 1.7136396340836675, "grad_norm": 0.19910888373851776, "learning_rate": 3.684507269029011e-05, "loss": 0.547, "step": 8336 }, { "epoch": 1.713845205057046, "grad_norm": 0.1981588751077652, "learning_rate": 3.683525016242662e-05, "loss": 0.5375, "step": 8337 }, { "epoch": 1.7140507760304247, "grad_norm": 0.20191727578639984, "learning_rate": 3.6825428036802184e-05, "loss": 0.5574, "step": 8338 }, { "epoch": 1.714256347003803, "grad_norm": 0.16322053968906403, "learning_rate": 3.681560631390071e-05, "loss": 0.5131, "step": 8339 }, { "epoch": 1.7144619179771816, "grad_norm": 0.19834211468696594, "learning_rate": 3.6805784994206056e-05, "loss": 0.5583, "step": 8340 }, { "epoch": 1.7146674889505602, "grad_norm": 0.20397832989692688, "learning_rate": 3.679596407820205e-05, "loss": 0.5192, "step": 8341 }, { "epoch": 1.7148730599239388, "grad_norm": 0.19556482136249542, "learning_rate": 3.678614356637258e-05, "loss": 0.528, "step": 8342 }, { "epoch": 1.7150786308973172, "grad_norm": 0.18456198275089264, "learning_rate": 3.6776323459201415e-05, "loss": 0.4952, "step": 8343 }, { "epoch": 1.7152842018706957, "grad_norm": 0.19880907237529755, "learning_rate": 3.676650375717235e-05, "loss": 0.5528, "step": 8344 }, { "epoch": 1.7154897728440743, "grad_norm": 0.19653092324733734, "learning_rate": 3.6756684460769175e-05, "loss": 0.5341, "step": 8345 }, { "epoch": 1.715695343817453, "grad_norm": 0.5179283022880554, "learning_rate": 3.674686557047562e-05, "loss": 0.5772, "step": 8346 }, { "epoch": 1.7159009147908315, "grad_norm": 0.1959078460931778, "learning_rate": 3.673704708677543e-05, "loss": 0.5262, "step": 8347 }, { "epoch": 1.71610648576421, "grad_norm": 0.19857066869735718, "learning_rate": 3.672722901015228e-05, "loss": 0.5572, "step": 8348 }, { "epoch": 1.7163120567375887, "grad_norm": 0.19778084754943848, "learning_rate": 3.6717411341089914e-05, "loss": 0.5264, "step": 8349 }, { "epoch": 1.7165176277109673, "grad_norm": 0.2006417065858841, "learning_rate": 3.670759408007199e-05, "loss": 0.5192, "step": 8350 }, { "epoch": 1.7167231986843459, "grad_norm": 0.19210219383239746, "learning_rate": 3.669777722758213e-05, "loss": 0.5253, "step": 8351 }, { "epoch": 1.7169287696577245, "grad_norm": 0.19173528254032135, "learning_rate": 3.668796078410399e-05, "loss": 0.5447, "step": 8352 }, { "epoch": 1.717134340631103, "grad_norm": 0.19798819720745087, "learning_rate": 3.667814475012116e-05, "loss": 0.5544, "step": 8353 }, { "epoch": 1.7173399116044814, "grad_norm": 0.19367478787899017, "learning_rate": 3.666832912611725e-05, "loss": 0.546, "step": 8354 }, { "epoch": 1.71754548257786, "grad_norm": 0.19712290167808533, "learning_rate": 3.665851391257582e-05, "loss": 0.5339, "step": 8355 }, { "epoch": 1.7177510535512386, "grad_norm": 0.19337862730026245, "learning_rate": 3.6648699109980416e-05, "loss": 0.5559, "step": 8356 }, { "epoch": 1.7179566245246172, "grad_norm": 0.19475507736206055, "learning_rate": 3.6638884718814584e-05, "loss": 0.5432, "step": 8357 }, { "epoch": 1.7181621954979955, "grad_norm": 0.18744108080863953, "learning_rate": 3.6629070739561816e-05, "loss": 0.5275, "step": 8358 }, { "epoch": 1.7183677664713741, "grad_norm": 0.18683594465255737, "learning_rate": 3.661925717270561e-05, "loss": 0.5345, "step": 8359 }, { "epoch": 1.7185733374447527, "grad_norm": 0.1923644095659256, "learning_rate": 3.660944401872944e-05, "loss": 0.534, "step": 8360 }, { "epoch": 1.7187789084181313, "grad_norm": 0.19515560567378998, "learning_rate": 3.6599631278116735e-05, "loss": 0.5591, "step": 8361 }, { "epoch": 1.71898447939151, "grad_norm": 0.19667771458625793, "learning_rate": 3.658981895135092e-05, "loss": 0.5374, "step": 8362 }, { "epoch": 1.7191900503648885, "grad_norm": 0.19800591468811035, "learning_rate": 3.6580007038915436e-05, "loss": 0.5309, "step": 8363 }, { "epoch": 1.719395621338267, "grad_norm": 0.1907908171415329, "learning_rate": 3.657019554129365e-05, "loss": 0.5261, "step": 8364 }, { "epoch": 1.7196011923116457, "grad_norm": 0.195295050740242, "learning_rate": 3.656038445896891e-05, "loss": 0.5586, "step": 8365 }, { "epoch": 1.7198067632850242, "grad_norm": 0.1947198510169983, "learning_rate": 3.6550573792424606e-05, "loss": 0.5474, "step": 8366 }, { "epoch": 1.7200123342584028, "grad_norm": 0.1944238245487213, "learning_rate": 3.654076354214403e-05, "loss": 0.543, "step": 8367 }, { "epoch": 1.7202179052317814, "grad_norm": 0.18714429438114166, "learning_rate": 3.6530953708610496e-05, "loss": 0.5199, "step": 8368 }, { "epoch": 1.7204234762051598, "grad_norm": 0.16641157865524292, "learning_rate": 3.652114429230727e-05, "loss": 0.5063, "step": 8369 }, { "epoch": 1.7206290471785384, "grad_norm": 0.1247912049293518, "learning_rate": 3.651133529371765e-05, "loss": 0.5055, "step": 8370 }, { "epoch": 1.720834618151917, "grad_norm": 0.12845945358276367, "learning_rate": 3.650152671332487e-05, "loss": 0.5052, "step": 8371 }, { "epoch": 1.7210401891252955, "grad_norm": 0.15474404394626617, "learning_rate": 3.6491718551612146e-05, "loss": 0.5453, "step": 8372 }, { "epoch": 1.721245760098674, "grad_norm": 0.19407010078430176, "learning_rate": 3.648191080906268e-05, "loss": 0.5121, "step": 8373 }, { "epoch": 1.7214513310720525, "grad_norm": 0.16241449117660522, "learning_rate": 3.647210348615964e-05, "loss": 0.5118, "step": 8374 }, { "epoch": 1.721656902045431, "grad_norm": 0.1552891731262207, "learning_rate": 3.6462296583386225e-05, "loss": 0.5206, "step": 8375 }, { "epoch": 1.7218624730188097, "grad_norm": 0.194035142660141, "learning_rate": 3.6452490101225536e-05, "loss": 0.5528, "step": 8376 }, { "epoch": 1.7220680439921883, "grad_norm": 0.2007959634065628, "learning_rate": 3.6442684040160704e-05, "loss": 0.5216, "step": 8377 }, { "epoch": 1.7222736149655669, "grad_norm": 0.17652183771133423, "learning_rate": 3.643287840067485e-05, "loss": 0.5151, "step": 8378 }, { "epoch": 1.7224791859389454, "grad_norm": 0.15610848367214203, "learning_rate": 3.6423073183251024e-05, "loss": 0.5385, "step": 8379 }, { "epoch": 1.722684756912324, "grad_norm": 0.19610700011253357, "learning_rate": 3.641326838837231e-05, "loss": 0.5363, "step": 8380 }, { "epoch": 1.7228903278857026, "grad_norm": 0.1605963259935379, "learning_rate": 3.6403464016521716e-05, "loss": 0.5102, "step": 8381 }, { "epoch": 1.7230958988590812, "grad_norm": 0.15825892984867096, "learning_rate": 3.639366006818227e-05, "loss": 0.5264, "step": 8382 }, { "epoch": 1.7233014698324598, "grad_norm": 0.156993106007576, "learning_rate": 3.638385654383695e-05, "loss": 0.4904, "step": 8383 }, { "epoch": 1.7235070408058382, "grad_norm": 0.1612616777420044, "learning_rate": 3.637405344396877e-05, "loss": 0.5488, "step": 8384 }, { "epoch": 1.7237126117792168, "grad_norm": 0.19856800138950348, "learning_rate": 3.6364250769060654e-05, "loss": 0.5246, "step": 8385 }, { "epoch": 1.7239181827525953, "grad_norm": 0.19383041560649872, "learning_rate": 3.6354448519595526e-05, "loss": 0.5251, "step": 8386 }, { "epoch": 1.724123753725974, "grad_norm": 0.15535280108451843, "learning_rate": 3.634464669605633e-05, "loss": 0.4938, "step": 8387 }, { "epoch": 1.7243293246993523, "grad_norm": 0.1630435734987259, "learning_rate": 3.633484529892593e-05, "loss": 0.5444, "step": 8388 }, { "epoch": 1.7245348956727309, "grad_norm": 0.2020839899778366, "learning_rate": 3.6325044328687194e-05, "loss": 0.5377, "step": 8389 }, { "epoch": 1.7247404666461095, "grad_norm": 0.16391253471374512, "learning_rate": 3.631524378582297e-05, "loss": 0.4937, "step": 8390 }, { "epoch": 1.724946037619488, "grad_norm": 0.12773092091083527, "learning_rate": 3.630544367081611e-05, "loss": 0.5292, "step": 8391 }, { "epoch": 1.7251516085928666, "grad_norm": 0.16146111488342285, "learning_rate": 3.62956439841494e-05, "loss": 0.5185, "step": 8392 }, { "epoch": 1.7253571795662452, "grad_norm": 0.19887956976890564, "learning_rate": 3.6285844726305624e-05, "loss": 0.5424, "step": 8393 }, { "epoch": 1.7255627505396238, "grad_norm": 0.19647051393985748, "learning_rate": 3.627604589776755e-05, "loss": 0.5365, "step": 8394 }, { "epoch": 1.7257683215130024, "grad_norm": 0.16327311098575592, "learning_rate": 3.626624749901792e-05, "loss": 0.523, "step": 8395 }, { "epoch": 1.725973892486381, "grad_norm": 0.16111861169338226, "learning_rate": 3.625644953053945e-05, "loss": 0.5296, "step": 8396 }, { "epoch": 1.7261794634597596, "grad_norm": 0.20019720494747162, "learning_rate": 3.624665199281483e-05, "loss": 0.5494, "step": 8397 }, { "epoch": 1.7263850344331382, "grad_norm": 0.1936234086751938, "learning_rate": 3.623685488632678e-05, "loss": 0.5291, "step": 8398 }, { "epoch": 1.7265906054065168, "grad_norm": 0.20377790927886963, "learning_rate": 3.6227058211557906e-05, "loss": 0.5492, "step": 8399 }, { "epoch": 1.7267961763798951, "grad_norm": 0.16904407739639282, "learning_rate": 3.621726196899089e-05, "loss": 0.5124, "step": 8400 }, { "epoch": 1.7270017473532737, "grad_norm": 0.1632084995508194, "learning_rate": 3.620746615910832e-05, "loss": 0.5514, "step": 8401 }, { "epoch": 1.7272073183266523, "grad_norm": 0.16341902315616608, "learning_rate": 3.61976707823928e-05, "loss": 0.5038, "step": 8402 }, { "epoch": 1.7274128893000307, "grad_norm": 0.1626911461353302, "learning_rate": 3.61878758393269e-05, "loss": 0.5417, "step": 8403 }, { "epoch": 1.7276184602734093, "grad_norm": 0.2055915892124176, "learning_rate": 3.617808133039314e-05, "loss": 0.531, "step": 8404 }, { "epoch": 1.7278240312467879, "grad_norm": 0.1969294250011444, "learning_rate": 3.616828725607411e-05, "loss": 0.5347, "step": 8405 }, { "epoch": 1.7280296022201664, "grad_norm": 0.1655907779932022, "learning_rate": 3.6158493616852276e-05, "loss": 0.5059, "step": 8406 }, { "epoch": 1.728235173193545, "grad_norm": 0.1626054346561432, "learning_rate": 3.6148700413210144e-05, "loss": 0.5243, "step": 8407 }, { "epoch": 1.7284407441669236, "grad_norm": 0.19298885762691498, "learning_rate": 3.613890764563016e-05, "loss": 0.5355, "step": 8408 }, { "epoch": 1.7286463151403022, "grad_norm": 0.20283274352550507, "learning_rate": 3.61291153145948e-05, "loss": 0.5398, "step": 8409 }, { "epoch": 1.7288518861136808, "grad_norm": 0.19936981797218323, "learning_rate": 3.6119323420586446e-05, "loss": 0.5374, "step": 8410 }, { "epoch": 1.7290574570870594, "grad_norm": 0.16159012913703918, "learning_rate": 3.610953196408752e-05, "loss": 0.4839, "step": 8411 }, { "epoch": 1.729263028060438, "grad_norm": 0.16305240988731384, "learning_rate": 3.609974094558041e-05, "loss": 0.5284, "step": 8412 }, { "epoch": 1.7294685990338166, "grad_norm": 0.1939508616924286, "learning_rate": 3.608995036554746e-05, "loss": 0.5127, "step": 8413 }, { "epoch": 1.7296741700071951, "grad_norm": 0.1960534304380417, "learning_rate": 3.608016022447102e-05, "loss": 0.5506, "step": 8414 }, { "epoch": 1.7298797409805735, "grad_norm": 0.18489257991313934, "learning_rate": 3.607037052283339e-05, "loss": 0.5321, "step": 8415 }, { "epoch": 1.730085311953952, "grad_norm": 0.1943347156047821, "learning_rate": 3.606058126111686e-05, "loss": 0.5447, "step": 8416 }, { "epoch": 1.7302908829273307, "grad_norm": 0.199358269572258, "learning_rate": 3.60507924398037e-05, "loss": 0.553, "step": 8417 }, { "epoch": 1.7304964539007093, "grad_norm": 0.16631248593330383, "learning_rate": 3.6041004059376176e-05, "loss": 0.4963, "step": 8418 }, { "epoch": 1.7307020248740876, "grad_norm": 0.210128515958786, "learning_rate": 3.603121612031652e-05, "loss": 0.5068, "step": 8419 }, { "epoch": 1.7309075958474662, "grad_norm": 0.16205939650535583, "learning_rate": 3.602142862310691e-05, "loss": 0.5304, "step": 8420 }, { "epoch": 1.7311131668208448, "grad_norm": 0.1637234389781952, "learning_rate": 3.601164156822956e-05, "loss": 0.498, "step": 8421 }, { "epoch": 1.7313187377942234, "grad_norm": 0.12157563865184784, "learning_rate": 3.600185495616661e-05, "loss": 0.5283, "step": 8422 }, { "epoch": 1.731524308767602, "grad_norm": 0.1593407392501831, "learning_rate": 3.599206878740021e-05, "loss": 0.5318, "step": 8423 }, { "epoch": 1.7317298797409806, "grad_norm": 0.16835933923721313, "learning_rate": 3.598228306241247e-05, "loss": 0.5268, "step": 8424 }, { "epoch": 1.7319354507143592, "grad_norm": 0.12342957407236099, "learning_rate": 3.59724977816855e-05, "loss": 0.5118, "step": 8425 }, { "epoch": 1.7321410216877378, "grad_norm": 0.15400569140911102, "learning_rate": 3.596271294570138e-05, "loss": 0.535, "step": 8426 }, { "epoch": 1.7323465926611163, "grad_norm": 0.19436071813106537, "learning_rate": 3.595292855494215e-05, "loss": 0.5485, "step": 8427 }, { "epoch": 1.732552163634495, "grad_norm": 0.16538384556770325, "learning_rate": 3.594314460988984e-05, "loss": 0.4909, "step": 8428 }, { "epoch": 1.7327577346078735, "grad_norm": 0.15564298629760742, "learning_rate": 3.5933361111026453e-05, "loss": 0.5438, "step": 8429 }, { "epoch": 1.732963305581252, "grad_norm": 0.19588908553123474, "learning_rate": 3.5923578058834e-05, "loss": 0.5485, "step": 8430 }, { "epoch": 1.7331688765546305, "grad_norm": 0.19124017655849457, "learning_rate": 3.5913795453794427e-05, "loss": 0.5295, "step": 8431 }, { "epoch": 1.733374447528009, "grad_norm": 0.15818458795547485, "learning_rate": 3.5904013296389686e-05, "loss": 0.5142, "step": 8432 }, { "epoch": 1.7335800185013877, "grad_norm": 0.15775617957115173, "learning_rate": 3.5894231587101694e-05, "loss": 0.5282, "step": 8433 }, { "epoch": 1.733785589474766, "grad_norm": 0.16275940835475922, "learning_rate": 3.588445032641236e-05, "loss": 0.5012, "step": 8434 }, { "epoch": 1.7339911604481446, "grad_norm": 0.15710069239139557, "learning_rate": 3.5874669514803545e-05, "loss": 0.5309, "step": 8435 }, { "epoch": 1.7341967314215232, "grad_norm": 0.19356967508792877, "learning_rate": 3.586488915275711e-05, "loss": 0.5344, "step": 8436 }, { "epoch": 1.7344023023949018, "grad_norm": 0.19396322965621948, "learning_rate": 3.58551092407549e-05, "loss": 0.5279, "step": 8437 }, { "epoch": 1.7346078733682804, "grad_norm": 0.20493246614933014, "learning_rate": 3.5845329779278694e-05, "loss": 0.5537, "step": 8438 }, { "epoch": 1.734813444341659, "grad_norm": 0.18893173336982727, "learning_rate": 3.583555076881031e-05, "loss": 0.5145, "step": 8439 }, { "epoch": 1.7350190153150375, "grad_norm": 0.19152796268463135, "learning_rate": 3.5825772209831517e-05, "loss": 0.514, "step": 8440 }, { "epoch": 1.7352245862884161, "grad_norm": 0.1870860904455185, "learning_rate": 3.581599410282403e-05, "loss": 0.5234, "step": 8441 }, { "epoch": 1.7354301572617947, "grad_norm": 0.1898457258939743, "learning_rate": 3.58062164482696e-05, "loss": 0.5324, "step": 8442 }, { "epoch": 1.7356357282351733, "grad_norm": 0.19367991387844086, "learning_rate": 3.579643924664991e-05, "loss": 0.5443, "step": 8443 }, { "epoch": 1.735841299208552, "grad_norm": 0.1994738131761551, "learning_rate": 3.5786662498446645e-05, "loss": 0.5449, "step": 8444 }, { "epoch": 1.7360468701819303, "grad_norm": 0.16127611696720123, "learning_rate": 3.577688620414143e-05, "loss": 0.5126, "step": 8445 }, { "epoch": 1.7362524411553089, "grad_norm": 0.16468468308448792, "learning_rate": 3.5767110364215954e-05, "loss": 0.5467, "step": 8446 }, { "epoch": 1.7364580121286874, "grad_norm": 0.20566272735595703, "learning_rate": 3.575733497915179e-05, "loss": 0.5503, "step": 8447 }, { "epoch": 1.736663583102066, "grad_norm": 0.2016957551240921, "learning_rate": 3.5747560049430526e-05, "loss": 0.534, "step": 8448 }, { "epoch": 1.7368691540754444, "grad_norm": 0.6397230625152588, "learning_rate": 3.573778557553374e-05, "loss": 0.5599, "step": 8449 }, { "epoch": 1.737074725048823, "grad_norm": 0.191917285323143, "learning_rate": 3.572801155794295e-05, "loss": 0.5208, "step": 8450 }, { "epoch": 1.7372802960222016, "grad_norm": 0.20242702960968018, "learning_rate": 3.571823799713971e-05, "loss": 0.5409, "step": 8451 }, { "epoch": 1.7374858669955802, "grad_norm": 0.16706420481204987, "learning_rate": 3.570846489360549e-05, "loss": 0.5102, "step": 8452 }, { "epoch": 1.7376914379689588, "grad_norm": 0.1416233628988266, "learning_rate": 3.569869224782177e-05, "loss": 0.5132, "step": 8453 }, { "epoch": 1.7378970089423373, "grad_norm": 0.16062797605991364, "learning_rate": 3.568892006027003e-05, "loss": 0.5522, "step": 8454 }, { "epoch": 1.738102579915716, "grad_norm": 0.16195809841156006, "learning_rate": 3.5679148331431666e-05, "loss": 0.4988, "step": 8455 }, { "epoch": 1.7383081508890945, "grad_norm": 0.13933859765529633, "learning_rate": 3.5669377061788104e-05, "loss": 0.5216, "step": 8456 }, { "epoch": 1.738513721862473, "grad_norm": 0.16723297536373138, "learning_rate": 3.565960625182073e-05, "loss": 0.5381, "step": 8457 }, { "epoch": 1.7387192928358517, "grad_norm": 0.20578205585479736, "learning_rate": 3.564983590201089e-05, "loss": 0.5171, "step": 8458 }, { "epoch": 1.7389248638092303, "grad_norm": 0.20782290399074554, "learning_rate": 3.564006601283992e-05, "loss": 0.5442, "step": 8459 }, { "epoch": 1.7391304347826086, "grad_norm": 0.19257017970085144, "learning_rate": 3.563029658478916e-05, "loss": 0.5502, "step": 8460 }, { "epoch": 1.7393360057559872, "grad_norm": 0.19143828749656677, "learning_rate": 3.56205276183399e-05, "loss": 0.5325, "step": 8461 }, { "epoch": 1.7395415767293658, "grad_norm": 0.19385689496994019, "learning_rate": 3.5610759113973395e-05, "loss": 0.5194, "step": 8462 }, { "epoch": 1.7397471477027444, "grad_norm": 0.1967114955186844, "learning_rate": 3.560099107217091e-05, "loss": 0.5313, "step": 8463 }, { "epoch": 1.7399527186761228, "grad_norm": 0.16215933859348297, "learning_rate": 3.559122349341366e-05, "loss": 0.5018, "step": 8464 }, { "epoch": 1.7401582896495014, "grad_norm": 0.1343732327222824, "learning_rate": 3.558145637818286e-05, "loss": 0.519, "step": 8465 }, { "epoch": 1.74036386062288, "grad_norm": 0.15892748534679413, "learning_rate": 3.557168972695966e-05, "loss": 0.5512, "step": 8466 }, { "epoch": 1.7405694315962585, "grad_norm": 0.2068302482366562, "learning_rate": 3.556192354022525e-05, "loss": 0.5618, "step": 8467 }, { "epoch": 1.7407750025696371, "grad_norm": 0.20231375098228455, "learning_rate": 3.555215781846077e-05, "loss": 0.5403, "step": 8468 }, { "epoch": 1.7409805735430157, "grad_norm": 0.18931826949119568, "learning_rate": 3.5542392562147305e-05, "loss": 0.5406, "step": 8469 }, { "epoch": 1.7411861445163943, "grad_norm": 0.1967364251613617, "learning_rate": 3.553262777176596e-05, "loss": 0.5488, "step": 8470 }, { "epoch": 1.741391715489773, "grad_norm": 0.16582554578781128, "learning_rate": 3.552286344779779e-05, "loss": 0.5162, "step": 8471 }, { "epoch": 1.7415972864631515, "grad_norm": 0.16116267442703247, "learning_rate": 3.551309959072383e-05, "loss": 0.5275, "step": 8472 }, { "epoch": 1.74180285743653, "grad_norm": 0.19118881225585938, "learning_rate": 3.550333620102512e-05, "loss": 0.5363, "step": 8473 }, { "epoch": 1.7420084284099087, "grad_norm": 0.1949569284915924, "learning_rate": 3.549357327918264e-05, "loss": 0.5546, "step": 8474 }, { "epoch": 1.742213999383287, "grad_norm": 0.2024715095758438, "learning_rate": 3.548381082567738e-05, "loss": 0.5318, "step": 8475 }, { "epoch": 1.7424195703566656, "grad_norm": 0.21180285513401031, "learning_rate": 3.5474048840990286e-05, "loss": 0.5362, "step": 8476 }, { "epoch": 1.7426251413300442, "grad_norm": 0.1865611970424652, "learning_rate": 3.546428732560228e-05, "loss": 0.4995, "step": 8477 }, { "epoch": 1.7428307123034228, "grad_norm": 0.17171883583068848, "learning_rate": 3.545452627999427e-05, "loss": 0.5554, "step": 8478 }, { "epoch": 1.7430362832768012, "grad_norm": 0.2119421362876892, "learning_rate": 3.544476570464713e-05, "loss": 0.5312, "step": 8479 }, { "epoch": 1.7432418542501797, "grad_norm": 0.1979297697544098, "learning_rate": 3.543500560004171e-05, "loss": 0.5277, "step": 8480 }, { "epoch": 1.7434474252235583, "grad_norm": 0.19431854784488678, "learning_rate": 3.542524596665887e-05, "loss": 0.5307, "step": 8481 }, { "epoch": 1.743652996196937, "grad_norm": 0.18718986213207245, "learning_rate": 3.5415486804979417e-05, "loss": 0.5164, "step": 8482 }, { "epoch": 1.7438585671703155, "grad_norm": 0.19408833980560303, "learning_rate": 3.540572811548412e-05, "loss": 0.5484, "step": 8483 }, { "epoch": 1.744064138143694, "grad_norm": 0.19318553805351257, "learning_rate": 3.539596989865375e-05, "loss": 0.523, "step": 8484 }, { "epoch": 1.7442697091170727, "grad_norm": 0.16727426648139954, "learning_rate": 3.538621215496907e-05, "loss": 0.5504, "step": 8485 }, { "epoch": 1.7444752800904513, "grad_norm": 0.16280822455883026, "learning_rate": 3.537645488491078e-05, "loss": 0.538, "step": 8486 }, { "epoch": 1.7446808510638299, "grad_norm": 0.19524060189723969, "learning_rate": 3.5366698088959557e-05, "loss": 0.5333, "step": 8487 }, { "epoch": 1.7448864220372085, "grad_norm": 0.1601538062095642, "learning_rate": 3.535694176759611e-05, "loss": 0.4854, "step": 8488 }, { "epoch": 1.745091993010587, "grad_norm": 0.16524933278560638, "learning_rate": 3.534718592130107e-05, "loss": 0.5261, "step": 8489 }, { "epoch": 1.7452975639839656, "grad_norm": 0.20658870041370392, "learning_rate": 3.5337430550555065e-05, "loss": 0.5592, "step": 8490 }, { "epoch": 1.745503134957344, "grad_norm": 0.20213808119297028, "learning_rate": 3.5327675655838694e-05, "loss": 0.5413, "step": 8491 }, { "epoch": 1.7457087059307226, "grad_norm": 0.20643405616283417, "learning_rate": 3.531792123763253e-05, "loss": 0.5504, "step": 8492 }, { "epoch": 1.7459142769041012, "grad_norm": 0.1972249150276184, "learning_rate": 3.5308167296417125e-05, "loss": 0.5359, "step": 8493 }, { "epoch": 1.7461198478774795, "grad_norm": 0.19785918295383453, "learning_rate": 3.529841383267303e-05, "loss": 0.5605, "step": 8494 }, { "epoch": 1.7463254188508581, "grad_norm": 0.3983357846736908, "learning_rate": 3.528866084688074e-05, "loss": 0.5958, "step": 8495 }, { "epoch": 1.7465309898242367, "grad_norm": 0.21267639100551605, "learning_rate": 3.527890833952073e-05, "loss": 0.5268, "step": 8496 }, { "epoch": 1.7467365607976153, "grad_norm": 0.16559986770153046, "learning_rate": 3.5269156311073484e-05, "loss": 0.5056, "step": 8497 }, { "epoch": 1.746942131770994, "grad_norm": 0.15838290750980377, "learning_rate": 3.5259404762019416e-05, "loss": 0.552, "step": 8498 }, { "epoch": 1.7471477027443725, "grad_norm": 0.19080090522766113, "learning_rate": 3.524965369283896e-05, "loss": 0.5234, "step": 8499 }, { "epoch": 1.747353273717751, "grad_norm": 0.166726753115654, "learning_rate": 3.5239903104012464e-05, "loss": 0.5099, "step": 8500 }, { "epoch": 1.7475588446911297, "grad_norm": 0.15904779732227325, "learning_rate": 3.5230152996020346e-05, "loss": 0.5136, "step": 8501 }, { "epoch": 1.7477644156645082, "grad_norm": 0.19941627979278564, "learning_rate": 3.522040336934293e-05, "loss": 0.5499, "step": 8502 }, { "epoch": 1.7479699866378868, "grad_norm": 0.20110583305358887, "learning_rate": 3.521065422446052e-05, "loss": 0.5503, "step": 8503 }, { "epoch": 1.7481755576112654, "grad_norm": 0.20107027888298035, "learning_rate": 3.520090556185343e-05, "loss": 0.5605, "step": 8504 }, { "epoch": 1.748381128584644, "grad_norm": 0.18705639243125916, "learning_rate": 3.51911573820019e-05, "loss": 0.5163, "step": 8505 }, { "epoch": 1.7485866995580224, "grad_norm": 0.19800741970539093, "learning_rate": 3.518140968538622e-05, "loss": 0.4896, "step": 8506 }, { "epoch": 1.748792270531401, "grad_norm": 0.19224296510219574, "learning_rate": 3.517166247248659e-05, "loss": 0.5034, "step": 8507 }, { "epoch": 1.7489978415047795, "grad_norm": 0.20960035920143127, "learning_rate": 3.51619157437832e-05, "loss": 0.5245, "step": 8508 }, { "epoch": 1.7492034124781581, "grad_norm": 0.19137395918369293, "learning_rate": 3.5152169499756256e-05, "loss": 0.5284, "step": 8509 }, { "epoch": 1.7494089834515365, "grad_norm": 0.19862139225006104, "learning_rate": 3.514242374088588e-05, "loss": 0.5506, "step": 8510 }, { "epoch": 1.749614554424915, "grad_norm": 0.19606275856494904, "learning_rate": 3.5132678467652226e-05, "loss": 0.5297, "step": 8511 }, { "epoch": 1.7498201253982937, "grad_norm": 0.1958342045545578, "learning_rate": 3.512293368053537e-05, "loss": 0.5255, "step": 8512 }, { "epoch": 1.7500256963716723, "grad_norm": 0.16824734210968018, "learning_rate": 3.511318938001542e-05, "loss": 0.5178, "step": 8513 }, { "epoch": 1.7502312673450509, "grad_norm": 0.162201389670372, "learning_rate": 3.510344556657239e-05, "loss": 0.5169, "step": 8514 }, { "epoch": 1.7504368383184294, "grad_norm": 0.20394515991210938, "learning_rate": 3.509370224068637e-05, "loss": 0.5215, "step": 8515 }, { "epoch": 1.750642409291808, "grad_norm": 0.2038257122039795, "learning_rate": 3.508395940283733e-05, "loss": 0.5277, "step": 8516 }, { "epoch": 1.7508479802651866, "grad_norm": 0.19794686138629913, "learning_rate": 3.507421705350526e-05, "loss": 0.5379, "step": 8517 }, { "epoch": 1.7510535512385652, "grad_norm": 0.19401569664478302, "learning_rate": 3.506447519317012e-05, "loss": 0.5313, "step": 8518 }, { "epoch": 1.7512591222119438, "grad_norm": 0.1934097856283188, "learning_rate": 3.5054733822311856e-05, "loss": 0.5291, "step": 8519 }, { "epoch": 1.7514646931853224, "grad_norm": 0.19061771035194397, "learning_rate": 3.5044992941410374e-05, "loss": 0.5239, "step": 8520 }, { "epoch": 1.7516702641587008, "grad_norm": 0.19829559326171875, "learning_rate": 3.503525255094554e-05, "loss": 0.5532, "step": 8521 }, { "epoch": 1.7518758351320793, "grad_norm": 0.1951601654291153, "learning_rate": 3.502551265139726e-05, "loss": 0.5366, "step": 8522 }, { "epoch": 1.752081406105458, "grad_norm": 0.18550780415534973, "learning_rate": 3.501577324324535e-05, "loss": 0.5199, "step": 8523 }, { "epoch": 1.7522869770788365, "grad_norm": 0.19197461009025574, "learning_rate": 3.500603432696962e-05, "loss": 0.5048, "step": 8524 }, { "epoch": 1.7524925480522149, "grad_norm": 0.1984768956899643, "learning_rate": 3.4996295903049874e-05, "loss": 0.5334, "step": 8525 }, { "epoch": 1.7526981190255935, "grad_norm": 0.1615784913301468, "learning_rate": 3.498655797196586e-05, "loss": 0.5212, "step": 8526 }, { "epoch": 1.752903689998972, "grad_norm": 0.16125060617923737, "learning_rate": 3.4976820534197335e-05, "loss": 0.5676, "step": 8527 }, { "epoch": 1.7531092609723506, "grad_norm": 0.19413301348686218, "learning_rate": 3.4967083590224016e-05, "loss": 0.5531, "step": 8528 }, { "epoch": 1.7533148319457292, "grad_norm": 0.19663669168949127, "learning_rate": 3.4957347140525585e-05, "loss": 0.5442, "step": 8529 }, { "epoch": 1.7535204029191078, "grad_norm": 0.16446875035762787, "learning_rate": 3.4947611185581735e-05, "loss": 0.5127, "step": 8530 }, { "epoch": 1.7537259738924864, "grad_norm": 0.15640254318714142, "learning_rate": 3.4937875725872095e-05, "loss": 0.5259, "step": 8531 }, { "epoch": 1.753931544865865, "grad_norm": 0.19805364310741425, "learning_rate": 3.492814076187629e-05, "loss": 0.535, "step": 8532 }, { "epoch": 1.7541371158392436, "grad_norm": 0.16201485693454742, "learning_rate": 3.491840629407391e-05, "loss": 0.4878, "step": 8533 }, { "epoch": 1.7543426868126222, "grad_norm": 0.1226087361574173, "learning_rate": 3.490867232294454e-05, "loss": 0.5218, "step": 8534 }, { "epoch": 1.7545482577860008, "grad_norm": 0.16910824179649353, "learning_rate": 3.4898938848967695e-05, "loss": 0.5276, "step": 8535 }, { "epoch": 1.7547538287593791, "grad_norm": 0.19596606492996216, "learning_rate": 3.4889205872622936e-05, "loss": 0.5526, "step": 8536 }, { "epoch": 1.7549593997327577, "grad_norm": 0.20501984655857086, "learning_rate": 3.4879473394389745e-05, "loss": 0.5593, "step": 8537 }, { "epoch": 1.7551649707061363, "grad_norm": 0.1966264247894287, "learning_rate": 3.486974141474759e-05, "loss": 0.5296, "step": 8538 }, { "epoch": 1.755370541679515, "grad_norm": 0.18841052055358887, "learning_rate": 3.4860009934175934e-05, "loss": 0.5222, "step": 8539 }, { "epoch": 1.7555761126528933, "grad_norm": 0.19071705639362335, "learning_rate": 3.48502789531542e-05, "loss": 0.5494, "step": 8540 }, { "epoch": 1.7557816836262718, "grad_norm": 0.20280326902866364, "learning_rate": 3.4840548472161777e-05, "loss": 0.547, "step": 8541 }, { "epoch": 1.7559872545996504, "grad_norm": 0.18994936347007751, "learning_rate": 3.483081849167803e-05, "loss": 0.5271, "step": 8542 }, { "epoch": 1.756192825573029, "grad_norm": 0.19104993343353271, "learning_rate": 3.482108901218234e-05, "loss": 0.5228, "step": 8543 }, { "epoch": 1.7563983965464076, "grad_norm": 0.19525660574436188, "learning_rate": 3.481136003415402e-05, "loss": 0.5298, "step": 8544 }, { "epoch": 1.7566039675197862, "grad_norm": 0.19333256781101227, "learning_rate": 3.4801631558072374e-05, "loss": 0.5217, "step": 8545 }, { "epoch": 1.7568095384931648, "grad_norm": 0.19645366072654724, "learning_rate": 3.4791903584416667e-05, "loss": 0.5334, "step": 8546 }, { "epoch": 1.7570151094665434, "grad_norm": 0.1938944011926651, "learning_rate": 3.478217611366615e-05, "loss": 0.5287, "step": 8547 }, { "epoch": 1.757220680439922, "grad_norm": 0.1910870373249054, "learning_rate": 3.477244914630007e-05, "loss": 0.5285, "step": 8548 }, { "epoch": 1.7574262514133006, "grad_norm": 0.20212024450302124, "learning_rate": 3.4762722682797614e-05, "loss": 0.5529, "step": 8549 }, { "epoch": 1.7576318223866791, "grad_norm": 0.19146008789539337, "learning_rate": 3.475299672363795e-05, "loss": 0.5124, "step": 8550 }, { "epoch": 1.7578373933600575, "grad_norm": 0.1885506808757782, "learning_rate": 3.474327126930026e-05, "loss": 0.4892, "step": 8551 }, { "epoch": 1.758042964333436, "grad_norm": 0.18597213923931122, "learning_rate": 3.473354632026365e-05, "loss": 0.5208, "step": 8552 }, { "epoch": 1.7582485353068147, "grad_norm": 0.19762767851352692, "learning_rate": 3.472382187700723e-05, "loss": 0.5474, "step": 8553 }, { "epoch": 1.7584541062801933, "grad_norm": 0.1776442676782608, "learning_rate": 3.471409794001008e-05, "loss": 0.5056, "step": 8554 }, { "epoch": 1.7586596772535716, "grad_norm": 0.16057129204273224, "learning_rate": 3.4704374509751246e-05, "loss": 0.5486, "step": 8555 }, { "epoch": 1.7588652482269502, "grad_norm": 0.1970880925655365, "learning_rate": 3.469465158670973e-05, "loss": 0.5392, "step": 8556 }, { "epoch": 1.7590708192003288, "grad_norm": 0.16427253186702728, "learning_rate": 3.4684929171364594e-05, "loss": 0.5139, "step": 8557 }, { "epoch": 1.7592763901737074, "grad_norm": 0.13471728563308716, "learning_rate": 3.4675207264194776e-05, "loss": 0.5029, "step": 8558 }, { "epoch": 1.759481961147086, "grad_norm": 0.15767881274223328, "learning_rate": 3.4665485865679233e-05, "loss": 0.5285, "step": 8559 }, { "epoch": 1.7596875321204646, "grad_norm": 0.19089291989803314, "learning_rate": 3.465576497629691e-05, "loss": 0.52, "step": 8560 }, { "epoch": 1.7598931030938432, "grad_norm": 0.1997915655374527, "learning_rate": 3.46460445965267e-05, "loss": 0.5741, "step": 8561 }, { "epoch": 1.7600986740672218, "grad_norm": 0.20209218561649323, "learning_rate": 3.4636324726847474e-05, "loss": 0.5263, "step": 8562 }, { "epoch": 1.7603042450406003, "grad_norm": 0.1946118324995041, "learning_rate": 3.4626605367738065e-05, "loss": 0.5425, "step": 8563 }, { "epoch": 1.760509816013979, "grad_norm": 0.1665966659784317, "learning_rate": 3.4616886519677345e-05, "loss": 0.4985, "step": 8564 }, { "epoch": 1.7607153869873575, "grad_norm": 0.16211137175559998, "learning_rate": 3.4607168183144104e-05, "loss": 0.5197, "step": 8565 }, { "epoch": 1.7609209579607359, "grad_norm": 0.20239417254924774, "learning_rate": 3.4597450358617106e-05, "loss": 0.5397, "step": 8566 }, { "epoch": 1.7611265289341145, "grad_norm": 0.20297926664352417, "learning_rate": 3.458773304657511e-05, "loss": 0.5313, "step": 8567 }, { "epoch": 1.761332099907493, "grad_norm": 0.1988510936498642, "learning_rate": 3.457801624749683e-05, "loss": 0.5136, "step": 8568 }, { "epoch": 1.7615376708808717, "grad_norm": 0.19971226155757904, "learning_rate": 3.4568299961860965e-05, "loss": 0.5421, "step": 8569 }, { "epoch": 1.76174324185425, "grad_norm": 0.19609498977661133, "learning_rate": 3.4558584190146226e-05, "loss": 0.5286, "step": 8570 }, { "epoch": 1.7619488128276286, "grad_norm": 0.20023983716964722, "learning_rate": 3.4548868932831235e-05, "loss": 0.5378, "step": 8571 }, { "epoch": 1.7621543838010072, "grad_norm": 0.20097847282886505, "learning_rate": 3.453915419039462e-05, "loss": 0.5509, "step": 8572 }, { "epoch": 1.7623599547743858, "grad_norm": 0.1950797438621521, "learning_rate": 3.452943996331499e-05, "loss": 0.5162, "step": 8573 }, { "epoch": 1.7625655257477644, "grad_norm": 0.19853217899799347, "learning_rate": 3.451972625207091e-05, "loss": 0.5326, "step": 8574 }, { "epoch": 1.762771096721143, "grad_norm": 0.19258736073970795, "learning_rate": 3.451001305714094e-05, "loss": 0.5194, "step": 8575 }, { "epoch": 1.7629766676945215, "grad_norm": 0.1989012360572815, "learning_rate": 3.450030037900357e-05, "loss": 0.5142, "step": 8576 }, { "epoch": 1.7631822386679001, "grad_norm": 0.19085493683815002, "learning_rate": 3.4490588218137356e-05, "loss": 0.5305, "step": 8577 }, { "epoch": 1.7633878096412787, "grad_norm": 0.16426925361156464, "learning_rate": 3.448087657502073e-05, "loss": 0.5152, "step": 8578 }, { "epoch": 1.7635933806146573, "grad_norm": 0.16579680144786835, "learning_rate": 3.447116545013215e-05, "loss": 0.5536, "step": 8579 }, { "epoch": 1.763798951588036, "grad_norm": 0.19505342841148376, "learning_rate": 3.4461454843950035e-05, "loss": 0.5208, "step": 8580 }, { "epoch": 1.7640045225614145, "grad_norm": 0.19521526992321014, "learning_rate": 3.445174475695277e-05, "loss": 0.5593, "step": 8581 }, { "epoch": 1.7642100935347929, "grad_norm": 0.19059976935386658, "learning_rate": 3.4442035189618756e-05, "loss": 0.5199, "step": 8582 }, { "epoch": 1.7644156645081714, "grad_norm": 0.1944594383239746, "learning_rate": 3.443232614242631e-05, "loss": 0.5527, "step": 8583 }, { "epoch": 1.76462123548155, "grad_norm": 0.1929233968257904, "learning_rate": 3.442261761585376e-05, "loss": 0.5206, "step": 8584 }, { "epoch": 1.7648268064549284, "grad_norm": 0.1909506469964981, "learning_rate": 3.441290961037941e-05, "loss": 0.518, "step": 8585 }, { "epoch": 1.765032377428307, "grad_norm": 0.20194295048713684, "learning_rate": 3.440320212648152e-05, "loss": 0.5559, "step": 8586 }, { "epoch": 1.7652379484016856, "grad_norm": 0.20173272490501404, "learning_rate": 3.439349516463833e-05, "loss": 0.5235, "step": 8587 }, { "epoch": 1.7654435193750642, "grad_norm": 0.1989864706993103, "learning_rate": 3.438378872532806e-05, "loss": 0.5219, "step": 8588 }, { "epoch": 1.7656490903484428, "grad_norm": 0.17310731112957, "learning_rate": 3.43740828090289e-05, "loss": 0.506, "step": 8589 }, { "epoch": 1.7658546613218213, "grad_norm": 0.16371743381023407, "learning_rate": 3.4364377416219e-05, "loss": 0.5388, "step": 8590 }, { "epoch": 1.7660602322952, "grad_norm": 0.19252368807792664, "learning_rate": 3.4354672547376524e-05, "loss": 0.5251, "step": 8591 }, { "epoch": 1.7662658032685785, "grad_norm": 0.1986730545759201, "learning_rate": 3.4344968202979584e-05, "loss": 0.5353, "step": 8592 }, { "epoch": 1.766471374241957, "grad_norm": 0.22330817580223083, "learning_rate": 3.433526438350625e-05, "loss": 0.5355, "step": 8593 }, { "epoch": 1.7666769452153357, "grad_norm": 0.19446399807929993, "learning_rate": 3.43255610894346e-05, "loss": 0.518, "step": 8594 }, { "epoch": 1.7668825161887143, "grad_norm": 0.19539190828800201, "learning_rate": 3.431585832124266e-05, "loss": 0.5334, "step": 8595 }, { "epoch": 1.7670880871620929, "grad_norm": 0.20236273109912872, "learning_rate": 3.430615607940844e-05, "loss": 0.5315, "step": 8596 }, { "epoch": 1.7672936581354712, "grad_norm": 0.1652330905199051, "learning_rate": 3.429645436440991e-05, "loss": 0.5177, "step": 8597 }, { "epoch": 1.7674992291088498, "grad_norm": 0.16170786321163177, "learning_rate": 3.428675317672507e-05, "loss": 0.508, "step": 8598 }, { "epoch": 1.7677048000822284, "grad_norm": 0.1644188016653061, "learning_rate": 3.427705251683182e-05, "loss": 0.5064, "step": 8599 }, { "epoch": 1.767910371055607, "grad_norm": 0.1265815794467926, "learning_rate": 3.4267352385208086e-05, "loss": 0.4951, "step": 8600 }, { "epoch": 1.7681159420289854, "grad_norm": 0.16070230305194855, "learning_rate": 3.425765278233172e-05, "loss": 0.5369, "step": 8601 }, { "epoch": 1.768321513002364, "grad_norm": 0.19323338568210602, "learning_rate": 3.42479537086806e-05, "loss": 0.5374, "step": 8602 }, { "epoch": 1.7685270839757425, "grad_norm": 0.19410564005374908, "learning_rate": 3.423825516473254e-05, "loss": 0.5405, "step": 8603 }, { "epoch": 1.7687326549491211, "grad_norm": 0.19003941118717194, "learning_rate": 3.422855715096534e-05, "loss": 0.5468, "step": 8604 }, { "epoch": 1.7689382259224997, "grad_norm": 0.19323213398456573, "learning_rate": 3.421885966785679e-05, "loss": 0.5257, "step": 8605 }, { "epoch": 1.7691437968958783, "grad_norm": 0.1951300948858261, "learning_rate": 3.420916271588464e-05, "loss": 0.5289, "step": 8606 }, { "epoch": 1.769349367869257, "grad_norm": 0.17118534445762634, "learning_rate": 3.419946629552661e-05, "loss": 0.5018, "step": 8607 }, { "epoch": 1.7695549388426355, "grad_norm": 0.18546664714813232, "learning_rate": 3.418977040726039e-05, "loss": 0.5171, "step": 8608 }, { "epoch": 1.769760509816014, "grad_norm": 0.2011442631483078, "learning_rate": 3.418007505156365e-05, "loss": 0.5485, "step": 8609 }, { "epoch": 1.7699660807893927, "grad_norm": 0.20571096241474152, "learning_rate": 3.417038022891405e-05, "loss": 0.5286, "step": 8610 }, { "epoch": 1.7701716517627712, "grad_norm": 0.19118675589561462, "learning_rate": 3.416068593978917e-05, "loss": 0.5185, "step": 8611 }, { "epoch": 1.7703772227361496, "grad_norm": 0.19263485074043274, "learning_rate": 3.415099218466666e-05, "loss": 0.5285, "step": 8612 }, { "epoch": 1.7705827937095282, "grad_norm": 0.15957173705101013, "learning_rate": 3.4141298964024046e-05, "loss": 0.4959, "step": 8613 }, { "epoch": 1.7707883646829068, "grad_norm": 0.13335727155208588, "learning_rate": 3.4131606278338875e-05, "loss": 0.5116, "step": 8614 }, { "epoch": 1.7709939356562854, "grad_norm": 0.15437600016593933, "learning_rate": 3.412191412808868e-05, "loss": 0.545, "step": 8615 }, { "epoch": 1.7711995066296637, "grad_norm": 0.19646428525447845, "learning_rate": 3.411222251375092e-05, "loss": 0.5433, "step": 8616 }, { "epoch": 1.7714050776030423, "grad_norm": 0.21242785453796387, "learning_rate": 3.410253143580307e-05, "loss": 0.546, "step": 8617 }, { "epoch": 1.771610648576421, "grad_norm": 0.19566522538661957, "learning_rate": 3.4092840894722545e-05, "loss": 0.5379, "step": 8618 }, { "epoch": 1.7718162195497995, "grad_norm": 0.19648124277591705, "learning_rate": 3.40831508909868e-05, "loss": 0.566, "step": 8619 }, { "epoch": 1.772021790523178, "grad_norm": 0.17227233946323395, "learning_rate": 3.407346142507317e-05, "loss": 0.5122, "step": 8620 }, { "epoch": 1.7722273614965567, "grad_norm": 0.1685340255498886, "learning_rate": 3.406377249745902e-05, "loss": 0.5275, "step": 8621 }, { "epoch": 1.7724329324699353, "grad_norm": 0.19663850963115692, "learning_rate": 3.4054084108621695e-05, "loss": 0.5189, "step": 8622 }, { "epoch": 1.7726385034433139, "grad_norm": 0.19546058773994446, "learning_rate": 3.4044396259038475e-05, "loss": 0.5577, "step": 8623 }, { "epoch": 1.7728440744166925, "grad_norm": 0.19245782494544983, "learning_rate": 3.4034708949186655e-05, "loss": 0.5378, "step": 8624 }, { "epoch": 1.773049645390071, "grad_norm": 0.191994771361351, "learning_rate": 3.402502217954346e-05, "loss": 0.5207, "step": 8625 }, { "epoch": 1.7732552163634496, "grad_norm": 0.19610409438610077, "learning_rate": 3.401533595058612e-05, "loss": 0.5512, "step": 8626 }, { "epoch": 1.773460787336828, "grad_norm": 0.19996674358844757, "learning_rate": 3.400565026279186e-05, "loss": 0.5401, "step": 8627 }, { "epoch": 1.7736663583102066, "grad_norm": 0.19628667831420898, "learning_rate": 3.3995965116637814e-05, "loss": 0.5596, "step": 8628 }, { "epoch": 1.7738719292835852, "grad_norm": 0.2043389528989792, "learning_rate": 3.398628051260114e-05, "loss": 0.545, "step": 8629 }, { "epoch": 1.7740775002569638, "grad_norm": 0.1604812741279602, "learning_rate": 3.397659645115894e-05, "loss": 0.4899, "step": 8630 }, { "epoch": 1.7742830712303421, "grad_norm": 0.15960481762886047, "learning_rate": 3.396691293278831e-05, "loss": 0.5164, "step": 8631 }, { "epoch": 1.7744886422037207, "grad_norm": 0.1938653588294983, "learning_rate": 3.395722995796629e-05, "loss": 0.537, "step": 8632 }, { "epoch": 1.7746942131770993, "grad_norm": 0.202159583568573, "learning_rate": 3.3947547527169964e-05, "loss": 0.5427, "step": 8633 }, { "epoch": 1.7748997841504779, "grad_norm": 0.19612862169742584, "learning_rate": 3.3937865640876305e-05, "loss": 0.5301, "step": 8634 }, { "epoch": 1.7751053551238565, "grad_norm": 0.20528623461723328, "learning_rate": 3.39281842995623e-05, "loss": 0.5452, "step": 8635 }, { "epoch": 1.775310926097235, "grad_norm": 0.16790783405303955, "learning_rate": 3.3918503503704905e-05, "loss": 0.5233, "step": 8636 }, { "epoch": 1.7755164970706137, "grad_norm": 0.12118061631917953, "learning_rate": 3.390882325378105e-05, "loss": 0.5104, "step": 8637 }, { "epoch": 1.7757220680439922, "grad_norm": 0.1575068235397339, "learning_rate": 3.389914355026764e-05, "loss": 0.5293, "step": 8638 }, { "epoch": 1.7759276390173708, "grad_norm": 0.16683286428451538, "learning_rate": 3.3889464393641516e-05, "loss": 0.5083, "step": 8639 }, { "epoch": 1.7761332099907494, "grad_norm": 0.15598100423812866, "learning_rate": 3.387978578437957e-05, "loss": 0.5133, "step": 8640 }, { "epoch": 1.776338780964128, "grad_norm": 0.202442929148674, "learning_rate": 3.387010772295861e-05, "loss": 0.5476, "step": 8641 }, { "epoch": 1.7765443519375064, "grad_norm": 0.16517791152000427, "learning_rate": 3.3860430209855415e-05, "loss": 0.504, "step": 8642 }, { "epoch": 1.776749922910885, "grad_norm": 0.18405590951442719, "learning_rate": 3.3850753245546756e-05, "loss": 0.5372, "step": 8643 }, { "epoch": 1.7769554938842635, "grad_norm": 0.16793282330036163, "learning_rate": 3.384107683050938e-05, "loss": 0.5214, "step": 8644 }, { "epoch": 1.7771610648576421, "grad_norm": 0.1541900336742401, "learning_rate": 3.383140096521997e-05, "loss": 0.5294, "step": 8645 }, { "epoch": 1.7773666358310205, "grad_norm": 0.22302818298339844, "learning_rate": 3.3821725650155247e-05, "loss": 0.5564, "step": 8646 }, { "epoch": 1.777572206804399, "grad_norm": 0.1978428214788437, "learning_rate": 3.381205088579185e-05, "loss": 0.5236, "step": 8647 }, { "epoch": 1.7777777777777777, "grad_norm": 0.1918904036283493, "learning_rate": 3.380237667260642e-05, "loss": 0.5082, "step": 8648 }, { "epoch": 1.7779833487511563, "grad_norm": 0.1877550482749939, "learning_rate": 3.379270301107555e-05, "loss": 0.5303, "step": 8649 }, { "epoch": 1.7781889197245349, "grad_norm": 0.1972031146287918, "learning_rate": 3.3783029901675826e-05, "loss": 0.5553, "step": 8650 }, { "epoch": 1.7783944906979134, "grad_norm": 0.19708669185638428, "learning_rate": 3.377335734488379e-05, "loss": 0.5414, "step": 8651 }, { "epoch": 1.778600061671292, "grad_norm": 0.1898190975189209, "learning_rate": 3.376368534117595e-05, "loss": 0.5227, "step": 8652 }, { "epoch": 1.7788056326446706, "grad_norm": 0.19257086515426636, "learning_rate": 3.3754013891028826e-05, "loss": 0.5288, "step": 8653 }, { "epoch": 1.7790112036180492, "grad_norm": 0.20055457949638367, "learning_rate": 3.374434299491888e-05, "loss": 0.5422, "step": 8654 }, { "epoch": 1.7792167745914278, "grad_norm": 0.19281229376792908, "learning_rate": 3.373467265332254e-05, "loss": 0.5286, "step": 8655 }, { "epoch": 1.7794223455648064, "grad_norm": 0.16794267296791077, "learning_rate": 3.372500286671622e-05, "loss": 0.5123, "step": 8656 }, { "epoch": 1.779627916538185, "grad_norm": 0.17391237616539001, "learning_rate": 3.371533363557631e-05, "loss": 0.5325, "step": 8657 }, { "epoch": 1.7798334875115633, "grad_norm": 0.19158299267292023, "learning_rate": 3.3705664960379176e-05, "loss": 0.5259, "step": 8658 }, { "epoch": 1.780039058484942, "grad_norm": 0.19506706297397614, "learning_rate": 3.3695996841601114e-05, "loss": 0.5304, "step": 8659 }, { "epoch": 1.7802446294583205, "grad_norm": 0.19340963661670685, "learning_rate": 3.3686329279718484e-05, "loss": 0.5321, "step": 8660 }, { "epoch": 1.7804502004316989, "grad_norm": 0.16678109765052795, "learning_rate": 3.367666227520752e-05, "loss": 0.4986, "step": 8661 }, { "epoch": 1.7806557714050775, "grad_norm": 0.1627744436264038, "learning_rate": 3.366699582854449e-05, "loss": 0.5676, "step": 8662 }, { "epoch": 1.780861342378456, "grad_norm": 0.2138591855764389, "learning_rate": 3.365732994020559e-05, "loss": 0.5439, "step": 8663 }, { "epoch": 1.7810669133518346, "grad_norm": 0.197159543633461, "learning_rate": 3.3647664610667036e-05, "loss": 0.5174, "step": 8664 }, { "epoch": 1.7812724843252132, "grad_norm": 0.16026876866817474, "learning_rate": 3.363799984040499e-05, "loss": 0.5147, "step": 8665 }, { "epoch": 1.7814780552985918, "grad_norm": 0.1291634738445282, "learning_rate": 3.3628335629895564e-05, "loss": 0.5185, "step": 8666 }, { "epoch": 1.7816836262719704, "grad_norm": 0.12628033757209778, "learning_rate": 3.3618671979614906e-05, "loss": 0.5011, "step": 8667 }, { "epoch": 1.781889197245349, "grad_norm": 0.16729123890399933, "learning_rate": 3.360900889003909e-05, "loss": 0.5241, "step": 8668 }, { "epoch": 1.7820947682187276, "grad_norm": 0.199641615152359, "learning_rate": 3.3599346361644154e-05, "loss": 0.5325, "step": 8669 }, { "epoch": 1.7823003391921062, "grad_norm": 0.1920914649963379, "learning_rate": 3.3589684394906144e-05, "loss": 0.5149, "step": 8670 }, { "epoch": 1.7825059101654848, "grad_norm": 0.19104242324829102, "learning_rate": 3.358002299030105e-05, "loss": 0.5151, "step": 8671 }, { "epoch": 1.7827114811388634, "grad_norm": 0.1941048502922058, "learning_rate": 3.3570362148304846e-05, "loss": 0.5251, "step": 8672 }, { "epoch": 1.7829170521122417, "grad_norm": 0.18796855211257935, "learning_rate": 3.356070186939346e-05, "loss": 0.5222, "step": 8673 }, { "epoch": 1.7831226230856203, "grad_norm": 0.1918378323316574, "learning_rate": 3.355104215404284e-05, "loss": 0.5433, "step": 8674 }, { "epoch": 1.783328194058999, "grad_norm": 0.19748041033744812, "learning_rate": 3.354138300272887e-05, "loss": 0.5324, "step": 8675 }, { "epoch": 1.7835337650323775, "grad_norm": 0.20158739387989044, "learning_rate": 3.35317244159274e-05, "loss": 0.5326, "step": 8676 }, { "epoch": 1.7837393360057558, "grad_norm": 0.19620271027088165, "learning_rate": 3.352206639411426e-05, "loss": 0.5195, "step": 8677 }, { "epoch": 1.7839449069791344, "grad_norm": 0.2002163529396057, "learning_rate": 3.3512408937765256e-05, "loss": 0.5183, "step": 8678 }, { "epoch": 1.784150477952513, "grad_norm": 0.16237804293632507, "learning_rate": 3.350275204735618e-05, "loss": 0.5099, "step": 8679 }, { "epoch": 1.7843560489258916, "grad_norm": 0.16660307347774506, "learning_rate": 3.349309572336276e-05, "loss": 0.5543, "step": 8680 }, { "epoch": 1.7845616198992702, "grad_norm": 0.2031785398721695, "learning_rate": 3.3483439966260734e-05, "loss": 0.5341, "step": 8681 }, { "epoch": 1.7847671908726488, "grad_norm": 0.19732213020324707, "learning_rate": 3.34737847765258e-05, "loss": 0.5229, "step": 8682 }, { "epoch": 1.7849727618460274, "grad_norm": 0.20520782470703125, "learning_rate": 3.3464130154633616e-05, "loss": 0.5356, "step": 8683 }, { "epoch": 1.785178332819406, "grad_norm": 0.1953929215669632, "learning_rate": 3.345447610105983e-05, "loss": 0.546, "step": 8684 }, { "epoch": 1.7853839037927846, "grad_norm": 0.19621872901916504, "learning_rate": 3.344482261628003e-05, "loss": 0.5344, "step": 8685 }, { "epoch": 1.7855894747661631, "grad_norm": 0.18925665318965912, "learning_rate": 3.3435169700769815e-05, "loss": 0.5191, "step": 8686 }, { "epoch": 1.7857950457395417, "grad_norm": 0.16739846765995026, "learning_rate": 3.3425517355004715e-05, "loss": 0.5152, "step": 8687 }, { "epoch": 1.78600061671292, "grad_norm": 0.198704332113266, "learning_rate": 3.3415865579460305e-05, "loss": 0.5538, "step": 8688 }, { "epoch": 1.7862061876862987, "grad_norm": 0.24151252210140228, "learning_rate": 3.340621437461204e-05, "loss": 0.5001, "step": 8689 }, { "epoch": 1.7864117586596773, "grad_norm": 0.20363937318325043, "learning_rate": 3.3396563740935406e-05, "loss": 0.5489, "step": 8690 }, { "epoch": 1.7866173296330559, "grad_norm": 0.19773469865322113, "learning_rate": 3.338691367890584e-05, "loss": 0.5149, "step": 8691 }, { "epoch": 1.7868229006064342, "grad_norm": 0.1598690301179886, "learning_rate": 3.3377264188998764e-05, "loss": 0.516, "step": 8692 }, { "epoch": 1.7870284715798128, "grad_norm": 0.12722936272621155, "learning_rate": 3.3367615271689555e-05, "loss": 0.5108, "step": 8693 }, { "epoch": 1.7872340425531914, "grad_norm": 0.15668757259845734, "learning_rate": 3.335796692745356e-05, "loss": 0.5482, "step": 8694 }, { "epoch": 1.78743961352657, "grad_norm": 0.20184186100959778, "learning_rate": 3.3348319156766126e-05, "loss": 0.5621, "step": 8695 }, { "epoch": 1.7876451844999486, "grad_norm": 0.20065537095069885, "learning_rate": 3.333867196010255e-05, "loss": 0.5341, "step": 8696 }, { "epoch": 1.7878507554733272, "grad_norm": 0.19662179052829742, "learning_rate": 3.3329025337938106e-05, "loss": 0.5398, "step": 8697 }, { "epoch": 1.7880563264467058, "grad_norm": 0.1874885857105255, "learning_rate": 3.331937929074804e-05, "loss": 0.53, "step": 8698 }, { "epoch": 1.7882618974200843, "grad_norm": 0.19309687614440918, "learning_rate": 3.330973381900754e-05, "loss": 0.5446, "step": 8699 }, { "epoch": 1.788467468393463, "grad_norm": 0.1995777040719986, "learning_rate": 3.330008892319183e-05, "loss": 0.5365, "step": 8700 }, { "epoch": 1.7886730393668415, "grad_norm": 0.20065602660179138, "learning_rate": 3.3290444603776045e-05, "loss": 0.5344, "step": 8701 }, { "epoch": 1.78887861034022, "grad_norm": 0.19749754667282104, "learning_rate": 3.328080086123532e-05, "loss": 0.5441, "step": 8702 }, { "epoch": 1.7890841813135985, "grad_norm": 0.1977740079164505, "learning_rate": 3.3271157696044774e-05, "loss": 0.4977, "step": 8703 }, { "epoch": 1.789289752286977, "grad_norm": 0.19205152988433838, "learning_rate": 3.3261515108679465e-05, "loss": 0.5375, "step": 8704 }, { "epoch": 1.7894953232603557, "grad_norm": 0.19639678299427032, "learning_rate": 3.325187309961445e-05, "loss": 0.5507, "step": 8705 }, { "epoch": 1.7897008942337342, "grad_norm": 0.17259171605110168, "learning_rate": 3.3242231669324727e-05, "loss": 0.5051, "step": 8706 }, { "epoch": 1.7899064652071126, "grad_norm": 0.13026978075504303, "learning_rate": 3.323259081828529e-05, "loss": 0.5009, "step": 8707 }, { "epoch": 1.7901120361804912, "grad_norm": 0.16744323074817657, "learning_rate": 3.322295054697109e-05, "loss": 0.5441, "step": 8708 }, { "epoch": 1.7903176071538698, "grad_norm": 0.1634387969970703, "learning_rate": 3.3213310855857096e-05, "loss": 0.5119, "step": 8709 }, { "epoch": 1.7905231781272484, "grad_norm": 0.15612611174583435, "learning_rate": 3.3203671745418175e-05, "loss": 0.5536, "step": 8710 }, { "epoch": 1.790728749100627, "grad_norm": 0.23640312254428864, "learning_rate": 3.31940332161292e-05, "loss": 0.5475, "step": 8711 }, { "epoch": 1.7909343200740055, "grad_norm": 0.19410596787929535, "learning_rate": 3.318439526846505e-05, "loss": 0.5559, "step": 8712 }, { "epoch": 1.7911398910473841, "grad_norm": 0.19222721457481384, "learning_rate": 3.317475790290051e-05, "loss": 0.5342, "step": 8713 }, { "epoch": 1.7913454620207627, "grad_norm": 0.17586657404899597, "learning_rate": 3.316512111991038e-05, "loss": 0.4957, "step": 8714 }, { "epoch": 1.7915510329941413, "grad_norm": 0.1588892936706543, "learning_rate": 3.31554849199694e-05, "loss": 0.5331, "step": 8715 }, { "epoch": 1.79175660396752, "grad_norm": 0.202567458152771, "learning_rate": 3.3145849303552333e-05, "loss": 0.5418, "step": 8716 }, { "epoch": 1.7919621749408985, "grad_norm": 0.19263319671154022, "learning_rate": 3.3136214271133865e-05, "loss": 0.5144, "step": 8717 }, { "epoch": 1.7921677459142769, "grad_norm": 0.17071235179901123, "learning_rate": 3.312657982318866e-05, "loss": 0.5028, "step": 8718 }, { "epoch": 1.7923733168876554, "grad_norm": 0.16436687111854553, "learning_rate": 3.311694596019138e-05, "loss": 0.5452, "step": 8719 }, { "epoch": 1.792578887861034, "grad_norm": 0.16867224872112274, "learning_rate": 3.310731268261662e-05, "loss": 0.5006, "step": 8720 }, { "epoch": 1.7927844588344126, "grad_norm": 0.16634447872638702, "learning_rate": 3.3097679990938975e-05, "loss": 0.5141, "step": 8721 }, { "epoch": 1.792990029807791, "grad_norm": 0.16255205869674683, "learning_rate": 3.308804788563302e-05, "loss": 0.5167, "step": 8722 }, { "epoch": 1.7931956007811696, "grad_norm": 0.12086722999811172, "learning_rate": 3.307841636717326e-05, "loss": 0.5256, "step": 8723 }, { "epoch": 1.7934011717545482, "grad_norm": 0.16073068976402283, "learning_rate": 3.3068785436034214e-05, "loss": 0.522, "step": 8724 }, { "epoch": 1.7936067427279268, "grad_norm": 0.19669640064239502, "learning_rate": 3.305915509269034e-05, "loss": 0.5543, "step": 8725 }, { "epoch": 1.7938123137013053, "grad_norm": 0.17151986062526703, "learning_rate": 3.304952533761608e-05, "loss": 0.5286, "step": 8726 }, { "epoch": 1.794017884674684, "grad_norm": 0.15375934541225433, "learning_rate": 3.303989617128586e-05, "loss": 0.5302, "step": 8727 }, { "epoch": 1.7942234556480625, "grad_norm": 0.19120700657367706, "learning_rate": 3.303026759417403e-05, "loss": 0.5134, "step": 8728 }, { "epoch": 1.794429026621441, "grad_norm": 0.15886104106903076, "learning_rate": 3.302063960675498e-05, "loss": 0.4965, "step": 8729 }, { "epoch": 1.7946345975948197, "grad_norm": 0.1626490205526352, "learning_rate": 3.3011012209503034e-05, "loss": 0.5338, "step": 8730 }, { "epoch": 1.7948401685681983, "grad_norm": 0.20152784883975983, "learning_rate": 3.300138540289248e-05, "loss": 0.5339, "step": 8731 }, { "epoch": 1.7950457395415769, "grad_norm": 0.1927708387374878, "learning_rate": 3.2991759187397575e-05, "loss": 0.5188, "step": 8732 }, { "epoch": 1.7952513105149552, "grad_norm": 0.16235662996768951, "learning_rate": 3.2982133563492586e-05, "loss": 0.4898, "step": 8733 }, { "epoch": 1.7954568814883338, "grad_norm": 0.1614857167005539, "learning_rate": 3.2972508531651686e-05, "loss": 0.5315, "step": 8734 }, { "epoch": 1.7956624524617124, "grad_norm": 0.19275487959384918, "learning_rate": 3.2962884092349074e-05, "loss": 0.532, "step": 8735 }, { "epoch": 1.795868023435091, "grad_norm": 0.1601790487766266, "learning_rate": 3.295326024605891e-05, "loss": 0.4982, "step": 8736 }, { "epoch": 1.7960735944084694, "grad_norm": 0.16727516055107117, "learning_rate": 3.2943636993255316e-05, "loss": 0.5415, "step": 8737 }, { "epoch": 1.796279165381848, "grad_norm": 0.19914865493774414, "learning_rate": 3.293401433441237e-05, "loss": 0.507, "step": 8738 }, { "epoch": 1.7964847363552265, "grad_norm": 0.20361186563968658, "learning_rate": 3.2924392270004136e-05, "loss": 0.5369, "step": 8739 }, { "epoch": 1.7966903073286051, "grad_norm": 0.19120000302791595, "learning_rate": 3.2914770800504665e-05, "loss": 0.5204, "step": 8740 }, { "epoch": 1.7968958783019837, "grad_norm": 0.19319793581962585, "learning_rate": 3.2905149926387946e-05, "loss": 0.5346, "step": 8741 }, { "epoch": 1.7971014492753623, "grad_norm": 0.19608697295188904, "learning_rate": 3.289552964812793e-05, "loss": 0.528, "step": 8742 }, { "epoch": 1.797307020248741, "grad_norm": 0.19638018310070038, "learning_rate": 3.2885909966198625e-05, "loss": 0.5554, "step": 8743 }, { "epoch": 1.7975125912221195, "grad_norm": 0.19244056940078735, "learning_rate": 3.28762908810739e-05, "loss": 0.5331, "step": 8744 }, { "epoch": 1.797718162195498, "grad_norm": 0.1934266835451126, "learning_rate": 3.2866672393227665e-05, "loss": 0.523, "step": 8745 }, { "epoch": 1.7979237331688767, "grad_norm": 0.1938556432723999, "learning_rate": 3.285705450313377e-05, "loss": 0.5195, "step": 8746 }, { "epoch": 1.7981293041422552, "grad_norm": 0.1935025453567505, "learning_rate": 3.284743721126605e-05, "loss": 0.5507, "step": 8747 }, { "epoch": 1.7983348751156338, "grad_norm": 0.16398179531097412, "learning_rate": 3.2837820518098294e-05, "loss": 0.5202, "step": 8748 }, { "epoch": 1.7985404460890122, "grad_norm": 0.13340577483177185, "learning_rate": 3.2828204424104256e-05, "loss": 0.5141, "step": 8749 }, { "epoch": 1.7987460170623908, "grad_norm": 0.16972105205059052, "learning_rate": 3.2818588929757714e-05, "loss": 0.5168, "step": 8750 }, { "epoch": 1.7989515880357694, "grad_norm": 0.19210562109947205, "learning_rate": 3.2808974035532354e-05, "loss": 0.5188, "step": 8751 }, { "epoch": 1.7991571590091477, "grad_norm": 0.19385598599910736, "learning_rate": 3.279935974190187e-05, "loss": 0.5101, "step": 8752 }, { "epoch": 1.7993627299825263, "grad_norm": 0.20557603240013123, "learning_rate": 3.278974604933991e-05, "loss": 0.5596, "step": 8753 }, { "epoch": 1.799568300955905, "grad_norm": 0.17109614610671997, "learning_rate": 3.2780132958320075e-05, "loss": 0.5028, "step": 8754 }, { "epoch": 1.7997738719292835, "grad_norm": 0.1690118908882141, "learning_rate": 3.277052046931598e-05, "loss": 0.5425, "step": 8755 }, { "epoch": 1.799979442902662, "grad_norm": 0.20591634511947632, "learning_rate": 3.276090858280118e-05, "loss": 0.5446, "step": 8756 }, { "epoch": 1.8001850138760407, "grad_norm": 0.19887815415859222, "learning_rate": 3.27512972992492e-05, "loss": 0.557, "step": 8757 }, { "epoch": 1.8003905848494193, "grad_norm": 0.1681029200553894, "learning_rate": 3.274168661913357e-05, "loss": 0.5276, "step": 8758 }, { "epoch": 1.8005961558227979, "grad_norm": 0.1590951383113861, "learning_rate": 3.273207654292774e-05, "loss": 0.5437, "step": 8759 }, { "epoch": 1.8008017267961764, "grad_norm": 0.19525672495365143, "learning_rate": 3.272246707110516e-05, "loss": 0.5255, "step": 8760 }, { "epoch": 1.801007297769555, "grad_norm": 0.19866180419921875, "learning_rate": 3.271285820413924e-05, "loss": 0.5169, "step": 8761 }, { "epoch": 1.8012128687429336, "grad_norm": 0.18859532475471497, "learning_rate": 3.270324994250337e-05, "loss": 0.5348, "step": 8762 }, { "epoch": 1.8014184397163122, "grad_norm": 0.19412027299404144, "learning_rate": 3.2693642286670884e-05, "loss": 0.5319, "step": 8763 }, { "epoch": 1.8016240106896906, "grad_norm": 0.1961035430431366, "learning_rate": 3.2684035237115134e-05, "loss": 0.5443, "step": 8764 }, { "epoch": 1.8018295816630692, "grad_norm": 0.19343046844005585, "learning_rate": 3.2674428794309405e-05, "loss": 0.5361, "step": 8765 }, { "epoch": 1.8020351526364478, "grad_norm": 0.1937210112810135, "learning_rate": 3.266482295872695e-05, "loss": 0.5418, "step": 8766 }, { "epoch": 1.8022407236098263, "grad_norm": 0.19837218523025513, "learning_rate": 3.265521773084103e-05, "loss": 0.5194, "step": 8767 }, { "epoch": 1.8024462945832047, "grad_norm": 0.19206029176712036, "learning_rate": 3.264561311112483e-05, "loss": 0.535, "step": 8768 }, { "epoch": 1.8026518655565833, "grad_norm": 0.19339123368263245, "learning_rate": 3.263600910005152e-05, "loss": 0.509, "step": 8769 }, { "epoch": 1.8028574365299619, "grad_norm": 0.16199949383735657, "learning_rate": 3.262640569809424e-05, "loss": 0.4946, "step": 8770 }, { "epoch": 1.8030630075033405, "grad_norm": 0.1284702867269516, "learning_rate": 3.261680290572613e-05, "loss": 0.5095, "step": 8771 }, { "epoch": 1.803268578476719, "grad_norm": 0.16122448444366455, "learning_rate": 3.2607200723420274e-05, "loss": 0.5157, "step": 8772 }, { "epoch": 1.8034741494500977, "grad_norm": 0.1663200855255127, "learning_rate": 3.259759915164971e-05, "loss": 0.5245, "step": 8773 }, { "epoch": 1.8036797204234762, "grad_norm": 0.15676259994506836, "learning_rate": 3.258799819088746e-05, "loss": 0.5267, "step": 8774 }, { "epoch": 1.8038852913968548, "grad_norm": 0.20118048787117004, "learning_rate": 3.257839784160652e-05, "loss": 0.5446, "step": 8775 }, { "epoch": 1.8040908623702334, "grad_norm": 0.198233500123024, "learning_rate": 3.256879810427987e-05, "loss": 0.5508, "step": 8776 }, { "epoch": 1.804296433343612, "grad_norm": 0.20336700975894928, "learning_rate": 3.255919897938043e-05, "loss": 0.5365, "step": 8777 }, { "epoch": 1.8045020043169906, "grad_norm": 0.16567686200141907, "learning_rate": 3.2549600467381096e-05, "loss": 0.5033, "step": 8778 }, { "epoch": 1.804707575290369, "grad_norm": 0.16237860918045044, "learning_rate": 3.2540002568754776e-05, "loss": 0.5379, "step": 8779 }, { "epoch": 1.8049131462637475, "grad_norm": 0.19675461947917938, "learning_rate": 3.2530405283974284e-05, "loss": 0.5328, "step": 8780 }, { "epoch": 1.8051187172371261, "grad_norm": 0.19670812785625458, "learning_rate": 3.2520808613512446e-05, "loss": 0.5439, "step": 8781 }, { "epoch": 1.8053242882105047, "grad_norm": 0.19091184437274933, "learning_rate": 3.2511212557842036e-05, "loss": 0.5355, "step": 8782 }, { "epoch": 1.805529859183883, "grad_norm": 0.19207298755645752, "learning_rate": 3.250161711743581e-05, "loss": 0.5185, "step": 8783 }, { "epoch": 1.8057354301572617, "grad_norm": 0.19454807043075562, "learning_rate": 3.2492022292766476e-05, "loss": 0.5098, "step": 8784 }, { "epoch": 1.8059410011306403, "grad_norm": 0.1931590735912323, "learning_rate": 3.248242808430676e-05, "loss": 0.5535, "step": 8785 }, { "epoch": 1.8061465721040189, "grad_norm": 0.19954814016819, "learning_rate": 3.24728344925293e-05, "loss": 0.5226, "step": 8786 }, { "epoch": 1.8063521430773974, "grad_norm": 0.18844476342201233, "learning_rate": 3.2463241517906725e-05, "loss": 0.502, "step": 8787 }, { "epoch": 1.806557714050776, "grad_norm": 0.20117510855197906, "learning_rate": 3.245364916091166e-05, "loss": 0.5558, "step": 8788 }, { "epoch": 1.8067632850241546, "grad_norm": 0.16829055547714233, "learning_rate": 3.244405742201665e-05, "loss": 0.5025, "step": 8789 }, { "epoch": 1.8069688559975332, "grad_norm": 0.16325733065605164, "learning_rate": 3.243446630169425e-05, "loss": 0.5352, "step": 8790 }, { "epoch": 1.8071744269709118, "grad_norm": 0.2082752287387848, "learning_rate": 3.242487580041695e-05, "loss": 0.5324, "step": 8791 }, { "epoch": 1.8073799979442904, "grad_norm": 0.19825617969036102, "learning_rate": 3.2415285918657254e-05, "loss": 0.5495, "step": 8792 }, { "epoch": 1.807585568917669, "grad_norm": 0.19314411282539368, "learning_rate": 3.24056966568876e-05, "loss": 0.5279, "step": 8793 }, { "epoch": 1.8077911398910473, "grad_norm": 0.18960903584957123, "learning_rate": 3.2396108015580414e-05, "loss": 0.5288, "step": 8794 }, { "epoch": 1.807996710864426, "grad_norm": 0.1918916255235672, "learning_rate": 3.2386519995208064e-05, "loss": 0.5223, "step": 8795 }, { "epoch": 1.8082022818378045, "grad_norm": 0.1647498458623886, "learning_rate": 3.2376932596242916e-05, "loss": 0.4907, "step": 8796 }, { "epoch": 1.808407852811183, "grad_norm": 0.296530157327652, "learning_rate": 3.236734581915732e-05, "loss": 0.5286, "step": 8797 }, { "epoch": 1.8086134237845615, "grad_norm": 0.20278790593147278, "learning_rate": 3.235775966442352e-05, "loss": 0.5266, "step": 8798 }, { "epoch": 1.80881899475794, "grad_norm": 0.19407640397548676, "learning_rate": 3.234817413251382e-05, "loss": 0.5374, "step": 8799 }, { "epoch": 1.8090245657313186, "grad_norm": 0.1970399171113968, "learning_rate": 3.233858922390045e-05, "loss": 0.5392, "step": 8800 }, { "epoch": 1.8092301367046972, "grad_norm": 0.19362105429172516, "learning_rate": 3.232900493905562e-05, "loss": 0.5208, "step": 8801 }, { "epoch": 1.8094357076780758, "grad_norm": 0.17109599709510803, "learning_rate": 3.2319421278451495e-05, "loss": 0.5229, "step": 8802 }, { "epoch": 1.8096412786514544, "grad_norm": 0.16294682025909424, "learning_rate": 3.230983824256021e-05, "loss": 0.5206, "step": 8803 }, { "epoch": 1.809846849624833, "grad_norm": 0.2055048942565918, "learning_rate": 3.2300255831853856e-05, "loss": 0.5383, "step": 8804 }, { "epoch": 1.8100524205982116, "grad_norm": 0.1635579615831375, "learning_rate": 3.229067404680456e-05, "loss": 0.492, "step": 8805 }, { "epoch": 1.8102579915715902, "grad_norm": 0.11798587441444397, "learning_rate": 3.228109288788435e-05, "loss": 0.493, "step": 8806 }, { "epoch": 1.8104635625449688, "grad_norm": 0.16314202547073364, "learning_rate": 3.227151235556525e-05, "loss": 0.5462, "step": 8807 }, { "epoch": 1.8106691335183474, "grad_norm": 0.19977326691150665, "learning_rate": 3.2261932450319237e-05, "loss": 0.5128, "step": 8808 }, { "epoch": 1.8108747044917257, "grad_norm": 0.19161002337932587, "learning_rate": 3.2252353172618275e-05, "loss": 0.5166, "step": 8809 }, { "epoch": 1.8110802754651043, "grad_norm": 0.19649870693683624, "learning_rate": 3.2242774522934294e-05, "loss": 0.5205, "step": 8810 }, { "epoch": 1.811285846438483, "grad_norm": 0.19603271782398224, "learning_rate": 3.2233196501739164e-05, "loss": 0.5314, "step": 8811 }, { "epoch": 1.8114914174118615, "grad_norm": 0.19260643422603607, "learning_rate": 3.222361910950479e-05, "loss": 0.4841, "step": 8812 }, { "epoch": 1.8116969883852398, "grad_norm": 0.19248011708259583, "learning_rate": 3.221404234670299e-05, "loss": 0.5179, "step": 8813 }, { "epoch": 1.8119025593586184, "grad_norm": 0.21127529442310333, "learning_rate": 3.2204466213805556e-05, "loss": 0.5432, "step": 8814 }, { "epoch": 1.812108130331997, "grad_norm": 0.20110765099525452, "learning_rate": 3.219489071128427e-05, "loss": 0.5333, "step": 8815 }, { "epoch": 1.8123137013053756, "grad_norm": 0.19356440007686615, "learning_rate": 3.2185315839610864e-05, "loss": 0.5167, "step": 8816 }, { "epoch": 1.8125192722787542, "grad_norm": 0.19028930366039276, "learning_rate": 3.217574159925706e-05, "loss": 0.526, "step": 8817 }, { "epoch": 1.8127248432521328, "grad_norm": 0.16638804972171783, "learning_rate": 3.21661679906945e-05, "loss": 0.5041, "step": 8818 }, { "epoch": 1.8129304142255114, "grad_norm": 0.13588784635066986, "learning_rate": 3.2156595014394874e-05, "loss": 0.5016, "step": 8819 }, { "epoch": 1.81313598519889, "grad_norm": 0.16222389042377472, "learning_rate": 3.214702267082978e-05, "loss": 0.5194, "step": 8820 }, { "epoch": 1.8133415561722686, "grad_norm": 0.1995917111635208, "learning_rate": 3.213745096047081e-05, "loss": 0.5335, "step": 8821 }, { "epoch": 1.8135471271456471, "grad_norm": 0.20120279490947723, "learning_rate": 3.212787988378951e-05, "loss": 0.5491, "step": 8822 }, { "epoch": 1.8137526981190257, "grad_norm": 0.19153755903244019, "learning_rate": 3.211830944125741e-05, "loss": 0.5137, "step": 8823 }, { "epoch": 1.8139582690924043, "grad_norm": 0.19813428819179535, "learning_rate": 3.210873963334599e-05, "loss": 0.5302, "step": 8824 }, { "epoch": 1.8141638400657827, "grad_norm": 0.16227731108665466, "learning_rate": 3.20991704605267e-05, "loss": 0.5092, "step": 8825 }, { "epoch": 1.8143694110391613, "grad_norm": 0.13235917687416077, "learning_rate": 3.2089601923270996e-05, "loss": 0.5202, "step": 8826 }, { "epoch": 1.8145749820125399, "grad_norm": 0.16478480398654938, "learning_rate": 3.208003402205027e-05, "loss": 0.5245, "step": 8827 }, { "epoch": 1.8147805529859182, "grad_norm": 0.19610688090324402, "learning_rate": 3.207046675733587e-05, "loss": 0.514, "step": 8828 }, { "epoch": 1.8149861239592968, "grad_norm": 0.19508203864097595, "learning_rate": 3.206090012959915e-05, "loss": 0.5138, "step": 8829 }, { "epoch": 1.8151916949326754, "grad_norm": 0.20295090973377228, "learning_rate": 3.205133413931139e-05, "loss": 0.5225, "step": 8830 }, { "epoch": 1.815397265906054, "grad_norm": 0.17680945992469788, "learning_rate": 3.204176878694388e-05, "loss": 0.5177, "step": 8831 }, { "epoch": 1.8156028368794326, "grad_norm": 0.16306188702583313, "learning_rate": 3.203220407296784e-05, "loss": 0.5388, "step": 8832 }, { "epoch": 1.8158084078528112, "grad_norm": 0.22620275616645813, "learning_rate": 3.2022639997854516e-05, "loss": 0.5522, "step": 8833 }, { "epoch": 1.8160139788261898, "grad_norm": 0.19404473900794983, "learning_rate": 3.201307656207506e-05, "loss": 0.5213, "step": 8834 }, { "epoch": 1.8162195497995683, "grad_norm": 0.1629197895526886, "learning_rate": 3.200351376610062e-05, "loss": 0.4795, "step": 8835 }, { "epoch": 1.816425120772947, "grad_norm": 0.16560040414333344, "learning_rate": 3.199395161040231e-05, "loss": 0.524, "step": 8836 }, { "epoch": 1.8166306917463255, "grad_norm": 0.19243641197681427, "learning_rate": 3.198439009545122e-05, "loss": 0.5346, "step": 8837 }, { "epoch": 1.816836262719704, "grad_norm": 0.16431495547294617, "learning_rate": 3.1974829221718386e-05, "loss": 0.5181, "step": 8838 }, { "epoch": 1.8170418336930827, "grad_norm": 0.15657220780849457, "learning_rate": 3.196526898967483e-05, "loss": 0.5453, "step": 8839 }, { "epoch": 1.817247404666461, "grad_norm": 0.1584593802690506, "learning_rate": 3.1955709399791556e-05, "loss": 0.4755, "step": 8840 }, { "epoch": 1.8174529756398397, "grad_norm": 0.16270606219768524, "learning_rate": 3.194615045253952e-05, "loss": 0.5515, "step": 8841 }, { "epoch": 1.8176585466132182, "grad_norm": 0.2008228451013565, "learning_rate": 3.193659214838962e-05, "loss": 0.5282, "step": 8842 }, { "epoch": 1.8178641175865966, "grad_norm": 0.17246867716312408, "learning_rate": 3.192703448781278e-05, "loss": 0.5119, "step": 8843 }, { "epoch": 1.8180696885599752, "grad_norm": 0.124653160572052, "learning_rate": 3.1917477471279846e-05, "loss": 0.509, "step": 8844 }, { "epoch": 1.8182752595333538, "grad_norm": 0.1591031700372696, "learning_rate": 3.1907921099261654e-05, "loss": 0.5118, "step": 8845 }, { "epoch": 1.8184808305067324, "grad_norm": 0.20176099240779877, "learning_rate": 3.189836537222897e-05, "loss": 0.5179, "step": 8846 }, { "epoch": 1.818686401480111, "grad_norm": 0.19873826205730438, "learning_rate": 3.1888810290652606e-05, "loss": 0.5189, "step": 8847 }, { "epoch": 1.8188919724534895, "grad_norm": 0.19450412690639496, "learning_rate": 3.187925585500329e-05, "loss": 0.5346, "step": 8848 }, { "epoch": 1.8190975434268681, "grad_norm": 0.19407886266708374, "learning_rate": 3.18697020657517e-05, "loss": 0.5105, "step": 8849 }, { "epoch": 1.8193031144002467, "grad_norm": 0.1899595409631729, "learning_rate": 3.186014892336852e-05, "loss": 0.5173, "step": 8850 }, { "epoch": 1.8195086853736253, "grad_norm": 0.19497114419937134, "learning_rate": 3.185059642832438e-05, "loss": 0.5314, "step": 8851 }, { "epoch": 1.819714256347004, "grad_norm": 0.18996943533420563, "learning_rate": 3.184104458108991e-05, "loss": 0.5371, "step": 8852 }, { "epoch": 1.8199198273203825, "grad_norm": 0.20072153210639954, "learning_rate": 3.1831493382135644e-05, "loss": 0.5398, "step": 8853 }, { "epoch": 1.820125398293761, "grad_norm": 0.18781349062919617, "learning_rate": 3.182194283193216e-05, "loss": 0.5452, "step": 8854 }, { "epoch": 1.8203309692671394, "grad_norm": 0.19404610991477966, "learning_rate": 3.181239293094997e-05, "loss": 0.5322, "step": 8855 }, { "epoch": 1.820536540240518, "grad_norm": 0.19891172647476196, "learning_rate": 3.180284367965953e-05, "loss": 0.5382, "step": 8856 }, { "epoch": 1.8207421112138966, "grad_norm": 0.19251051545143127, "learning_rate": 3.179329507853131e-05, "loss": 0.5373, "step": 8857 }, { "epoch": 1.8209476821872752, "grad_norm": 0.16882584989070892, "learning_rate": 3.178374712803571e-05, "loss": 0.5063, "step": 8858 }, { "epoch": 1.8211532531606536, "grad_norm": 0.12265215069055557, "learning_rate": 3.177419982864312e-05, "loss": 0.5056, "step": 8859 }, { "epoch": 1.8213588241340322, "grad_norm": 0.17124712467193604, "learning_rate": 3.176465318082386e-05, "loss": 0.5412, "step": 8860 }, { "epoch": 1.8215643951074107, "grad_norm": 0.20257225632667542, "learning_rate": 3.1755107185048296e-05, "loss": 0.5302, "step": 8861 }, { "epoch": 1.8217699660807893, "grad_norm": 0.2040639966726303, "learning_rate": 3.17455618417867e-05, "loss": 0.5245, "step": 8862 }, { "epoch": 1.821975537054168, "grad_norm": 0.16710929572582245, "learning_rate": 3.173601715150931e-05, "loss": 0.5054, "step": 8863 }, { "epoch": 1.8221811080275465, "grad_norm": 0.16893459856510162, "learning_rate": 3.172647311468637e-05, "loss": 0.5471, "step": 8864 }, { "epoch": 1.822386679000925, "grad_norm": 0.208717480301857, "learning_rate": 3.171692973178805e-05, "loss": 0.5122, "step": 8865 }, { "epoch": 1.8225922499743037, "grad_norm": 0.20160967111587524, "learning_rate": 3.170738700328453e-05, "loss": 0.537, "step": 8866 }, { "epoch": 1.8227978209476823, "grad_norm": 0.20131829380989075, "learning_rate": 3.16978449296459e-05, "loss": 0.5303, "step": 8867 }, { "epoch": 1.8230033919210609, "grad_norm": 0.1935744732618332, "learning_rate": 3.168830351134229e-05, "loss": 0.5284, "step": 8868 }, { "epoch": 1.8232089628944395, "grad_norm": 0.16306568682193756, "learning_rate": 3.167876274884375e-05, "loss": 0.5169, "step": 8869 }, { "epoch": 1.8234145338678178, "grad_norm": 0.17691202461719513, "learning_rate": 3.166922264262031e-05, "loss": 0.5176, "step": 8870 }, { "epoch": 1.8236201048411964, "grad_norm": 0.1908418834209442, "learning_rate": 3.165968319314196e-05, "loss": 0.5321, "step": 8871 }, { "epoch": 1.823825675814575, "grad_norm": 0.2010890543460846, "learning_rate": 3.1650144400878655e-05, "loss": 0.534, "step": 8872 }, { "epoch": 1.8240312467879536, "grad_norm": 0.19382749497890472, "learning_rate": 3.164060626630035e-05, "loss": 0.528, "step": 8873 }, { "epoch": 1.824236817761332, "grad_norm": 0.16458258032798767, "learning_rate": 3.163106878987692e-05, "loss": 0.5106, "step": 8874 }, { "epoch": 1.8244423887347105, "grad_norm": 0.15716025233268738, "learning_rate": 3.162153197207825e-05, "loss": 0.5123, "step": 8875 }, { "epoch": 1.8246479597080891, "grad_norm": 0.19664350152015686, "learning_rate": 3.161199581337418e-05, "loss": 0.5322, "step": 8876 }, { "epoch": 1.8248535306814677, "grad_norm": 0.19546431303024292, "learning_rate": 3.160246031423449e-05, "loss": 0.5382, "step": 8877 }, { "epoch": 1.8250591016548463, "grad_norm": 0.15982358157634735, "learning_rate": 3.1592925475128965e-05, "loss": 0.5175, "step": 8878 }, { "epoch": 1.825264672628225, "grad_norm": 0.1651008278131485, "learning_rate": 3.1583391296527345e-05, "loss": 0.55, "step": 8879 }, { "epoch": 1.8254702436016035, "grad_norm": 0.19452379643917084, "learning_rate": 3.15738577788993e-05, "loss": 0.5374, "step": 8880 }, { "epoch": 1.825675814574982, "grad_norm": 0.1954164355993271, "learning_rate": 3.1564324922714546e-05, "loss": 0.5137, "step": 8881 }, { "epoch": 1.8258813855483607, "grad_norm": 0.19587047398090363, "learning_rate": 3.155479272844271e-05, "loss": 0.5334, "step": 8882 }, { "epoch": 1.8260869565217392, "grad_norm": 0.19509205222129822, "learning_rate": 3.154526119655339e-05, "loss": 0.5252, "step": 8883 }, { "epoch": 1.8262925274951178, "grad_norm": 0.1942068487405777, "learning_rate": 3.153573032751616e-05, "loss": 0.487, "step": 8884 }, { "epoch": 1.8264980984684962, "grad_norm": 0.19760790467262268, "learning_rate": 3.152620012180057e-05, "loss": 0.5228, "step": 8885 }, { "epoch": 1.8267036694418748, "grad_norm": 0.20716530084609985, "learning_rate": 3.151667057987612e-05, "loss": 0.5397, "step": 8886 }, { "epoch": 1.8269092404152534, "grad_norm": 0.17616085708141327, "learning_rate": 3.1507141702212276e-05, "loss": 0.5052, "step": 8887 }, { "epoch": 1.827114811388632, "grad_norm": 0.16189555823802948, "learning_rate": 3.149761348927851e-05, "loss": 0.54, "step": 8888 }, { "epoch": 1.8273203823620103, "grad_norm": 0.19330181181430817, "learning_rate": 3.148808594154422e-05, "loss": 0.5388, "step": 8889 }, { "epoch": 1.827525953335389, "grad_norm": 0.1901981681585312, "learning_rate": 3.1478559059478784e-05, "loss": 0.522, "step": 8890 }, { "epoch": 1.8277315243087675, "grad_norm": 0.2037983387708664, "learning_rate": 3.146903284355154e-05, "loss": 0.552, "step": 8891 }, { "epoch": 1.827937095282146, "grad_norm": 0.19158220291137695, "learning_rate": 3.14595072942318e-05, "loss": 0.5287, "step": 8892 }, { "epoch": 1.8281426662555247, "grad_norm": 0.19278709590435028, "learning_rate": 3.1449982411988846e-05, "loss": 0.5276, "step": 8893 }, { "epoch": 1.8283482372289033, "grad_norm": 0.19211730360984802, "learning_rate": 3.144045819729193e-05, "loss": 0.5246, "step": 8894 }, { "epoch": 1.8285538082022819, "grad_norm": 0.16746920347213745, "learning_rate": 3.143093465061026e-05, "loss": 0.5085, "step": 8895 }, { "epoch": 1.8287593791756604, "grad_norm": 0.16652396321296692, "learning_rate": 3.142141177241301e-05, "loss": 0.5325, "step": 8896 }, { "epoch": 1.828964950149039, "grad_norm": 0.19498294591903687, "learning_rate": 3.141188956316935e-05, "loss": 0.5344, "step": 8897 }, { "epoch": 1.8291705211224176, "grad_norm": 0.19246521592140198, "learning_rate": 3.140236802334837e-05, "loss": 0.5317, "step": 8898 }, { "epoch": 1.8293760920957962, "grad_norm": 0.15903492271900177, "learning_rate": 3.139284715341918e-05, "loss": 0.4802, "step": 8899 }, { "epoch": 1.8295816630691746, "grad_norm": 0.16071657836437225, "learning_rate": 3.1383326953850794e-05, "loss": 0.5418, "step": 8900 }, { "epoch": 1.8297872340425532, "grad_norm": 0.19449485838413239, "learning_rate": 3.1373807425112236e-05, "loss": 0.5342, "step": 8901 }, { "epoch": 1.8299928050159318, "grad_norm": 0.19907018542289734, "learning_rate": 3.136428856767252e-05, "loss": 0.5059, "step": 8902 }, { "epoch": 1.8301983759893103, "grad_norm": 0.19666697084903717, "learning_rate": 3.135477038200057e-05, "loss": 0.5349, "step": 8903 }, { "epoch": 1.8304039469626887, "grad_norm": 0.20423884689807892, "learning_rate": 3.13452528685653e-05, "loss": 0.5282, "step": 8904 }, { "epoch": 1.8306095179360673, "grad_norm": 0.1999506801366806, "learning_rate": 3.133573602783559e-05, "loss": 0.5322, "step": 8905 }, { "epoch": 1.8308150889094459, "grad_norm": 0.1950768530368805, "learning_rate": 3.132621986028031e-05, "loss": 0.5047, "step": 8906 }, { "epoch": 1.8310206598828245, "grad_norm": 0.1928025186061859, "learning_rate": 3.131670436636827e-05, "loss": 0.5322, "step": 8907 }, { "epoch": 1.831226230856203, "grad_norm": 0.18942581117153168, "learning_rate": 3.1307189546568223e-05, "loss": 0.5073, "step": 8908 }, { "epoch": 1.8314318018295817, "grad_norm": 0.19761119782924652, "learning_rate": 3.129767540134898e-05, "loss": 0.5366, "step": 8909 }, { "epoch": 1.8316373728029602, "grad_norm": 0.19659826159477234, "learning_rate": 3.1288161931179216e-05, "loss": 0.5442, "step": 8910 }, { "epoch": 1.8318429437763388, "grad_norm": 0.19665639102458954, "learning_rate": 3.1278649136527626e-05, "loss": 0.5245, "step": 8911 }, { "epoch": 1.8320485147497174, "grad_norm": 0.19645392894744873, "learning_rate": 3.1269137017862864e-05, "loss": 0.5327, "step": 8912 }, { "epoch": 1.832254085723096, "grad_norm": 0.1934535652399063, "learning_rate": 3.1259625575653535e-05, "loss": 0.5234, "step": 8913 }, { "epoch": 1.8324596566964746, "grad_norm": 0.19229261577129364, "learning_rate": 3.125011481036823e-05, "loss": 0.528, "step": 8914 }, { "epoch": 1.8326652276698532, "grad_norm": 0.1917877197265625, "learning_rate": 3.124060472247549e-05, "loss": 0.5341, "step": 8915 }, { "epoch": 1.8328707986432315, "grad_norm": 0.1936071366071701, "learning_rate": 3.1231095312443864e-05, "loss": 0.5224, "step": 8916 }, { "epoch": 1.8330763696166101, "grad_norm": 0.2054835557937622, "learning_rate": 3.12215865807418e-05, "loss": 0.5125, "step": 8917 }, { "epoch": 1.8332819405899887, "grad_norm": 0.1977832019329071, "learning_rate": 3.121207852783778e-05, "loss": 0.5194, "step": 8918 }, { "epoch": 1.833487511563367, "grad_norm": 0.19401463866233826, "learning_rate": 3.1202571154200206e-05, "loss": 0.5293, "step": 8919 }, { "epoch": 1.8336930825367457, "grad_norm": 0.16132505238056183, "learning_rate": 3.119306446029746e-05, "loss": 0.4925, "step": 8920 }, { "epoch": 1.8338986535101243, "grad_norm": 0.15574200451374054, "learning_rate": 3.1183558446597894e-05, "loss": 0.5394, "step": 8921 }, { "epoch": 1.8341042244835029, "grad_norm": 0.2005411684513092, "learning_rate": 3.117405311356981e-05, "loss": 0.5304, "step": 8922 }, { "epoch": 1.8343097954568814, "grad_norm": 0.1618259698152542, "learning_rate": 3.116454846168153e-05, "loss": 0.5146, "step": 8923 }, { "epoch": 1.83451536643026, "grad_norm": 0.17110760509967804, "learning_rate": 3.115504449140127e-05, "loss": 0.5491, "step": 8924 }, { "epoch": 1.8347209374036386, "grad_norm": 0.1980321705341339, "learning_rate": 3.114554120319726e-05, "loss": 0.523, "step": 8925 }, { "epoch": 1.8349265083770172, "grad_norm": 0.19496552646160126, "learning_rate": 3.113603859753768e-05, "loss": 0.5267, "step": 8926 }, { "epoch": 1.8351320793503958, "grad_norm": 0.19820396602153778, "learning_rate": 3.112653667489067e-05, "loss": 0.5187, "step": 8927 }, { "epoch": 1.8353376503237744, "grad_norm": 0.16340111196041107, "learning_rate": 3.111703543572436e-05, "loss": 0.5077, "step": 8928 }, { "epoch": 1.835543221297153, "grad_norm": 0.17557556927204132, "learning_rate": 3.110753488050682e-05, "loss": 0.5238, "step": 8929 }, { "epoch": 1.8357487922705316, "grad_norm": 0.2018451690673828, "learning_rate": 3.10980350097061e-05, "loss": 0.5438, "step": 8930 }, { "epoch": 1.83595436324391, "grad_norm": 0.18654681742191315, "learning_rate": 3.108853582379023e-05, "loss": 0.5106, "step": 8931 }, { "epoch": 1.8361599342172885, "grad_norm": 0.18761597573757172, "learning_rate": 3.1079037323227176e-05, "loss": 0.5089, "step": 8932 }, { "epoch": 1.836365505190667, "grad_norm": 0.19022609293460846, "learning_rate": 3.1069539508484894e-05, "loss": 0.5145, "step": 8933 }, { "epoch": 1.8365710761640457, "grad_norm": 0.18628591299057007, "learning_rate": 3.106004238003128e-05, "loss": 0.5176, "step": 8934 }, { "epoch": 1.836776647137424, "grad_norm": 0.19824586808681488, "learning_rate": 3.105054593833422e-05, "loss": 0.522, "step": 8935 }, { "epoch": 1.8369822181108026, "grad_norm": 0.16452063620090485, "learning_rate": 3.1041050183861545e-05, "loss": 0.5126, "step": 8936 }, { "epoch": 1.8371877890841812, "grad_norm": 0.1635677069425583, "learning_rate": 3.103155511708111e-05, "loss": 0.5449, "step": 8937 }, { "epoch": 1.8373933600575598, "grad_norm": 0.17322902381420135, "learning_rate": 3.1022060738460663e-05, "loss": 0.5107, "step": 8938 }, { "epoch": 1.8375989310309384, "grad_norm": 0.17800618708133698, "learning_rate": 3.101256704846794e-05, "loss": 0.5426, "step": 8939 }, { "epoch": 1.837804502004317, "grad_norm": 0.1971377432346344, "learning_rate": 3.100307404757067e-05, "loss": 0.5059, "step": 8940 }, { "epoch": 1.8380100729776956, "grad_norm": 0.18664588034152985, "learning_rate": 3.099358173623652e-05, "loss": 0.5143, "step": 8941 }, { "epoch": 1.8382156439510742, "grad_norm": 0.1906706988811493, "learning_rate": 3.0984090114933135e-05, "loss": 0.5337, "step": 8942 }, { "epoch": 1.8384212149244528, "grad_norm": 0.19608831405639648, "learning_rate": 3.09745991841281e-05, "loss": 0.5284, "step": 8943 }, { "epoch": 1.8386267858978314, "grad_norm": 0.19849687814712524, "learning_rate": 3.096510894428902e-05, "loss": 0.5223, "step": 8944 }, { "epoch": 1.83883235687121, "grad_norm": 0.19968105852603912, "learning_rate": 3.095561939588344e-05, "loss": 0.5307, "step": 8945 }, { "epoch": 1.8390379278445883, "grad_norm": 0.17240165174007416, "learning_rate": 3.094613053937883e-05, "loss": 0.5226, "step": 8946 }, { "epoch": 1.839243498817967, "grad_norm": 0.16927485167980194, "learning_rate": 3.0936642375242697e-05, "loss": 0.5411, "step": 8947 }, { "epoch": 1.8394490697913455, "grad_norm": 0.19402731955051422, "learning_rate": 3.092715490394245e-05, "loss": 0.5159, "step": 8948 }, { "epoch": 1.839654640764724, "grad_norm": 0.16462527215480804, "learning_rate": 3.091766812594551e-05, "loss": 0.5177, "step": 8949 }, { "epoch": 1.8398602117381024, "grad_norm": 0.16749082505702972, "learning_rate": 3.0908182041719226e-05, "loss": 0.5446, "step": 8950 }, { "epoch": 1.840065782711481, "grad_norm": 0.1971343457698822, "learning_rate": 3.089869665173095e-05, "loss": 0.5092, "step": 8951 }, { "epoch": 1.8402713536848596, "grad_norm": 0.20525288581848145, "learning_rate": 3.0889211956447994e-05, "loss": 0.5572, "step": 8952 }, { "epoch": 1.8404769246582382, "grad_norm": 0.20502051711082458, "learning_rate": 3.0879727956337605e-05, "loss": 0.5269, "step": 8953 }, { "epoch": 1.8406824956316168, "grad_norm": 0.20041027665138245, "learning_rate": 3.087024465186704e-05, "loss": 0.5216, "step": 8954 }, { "epoch": 1.8408880666049954, "grad_norm": 0.1678602546453476, "learning_rate": 3.086076204350346e-05, "loss": 0.4868, "step": 8955 }, { "epoch": 1.841093637578374, "grad_norm": 0.1751408874988556, "learning_rate": 3.085128013171403e-05, "loss": 0.549, "step": 8956 }, { "epoch": 1.8412992085517526, "grad_norm": 0.19476006925106049, "learning_rate": 3.084179891696592e-05, "loss": 0.5015, "step": 8957 }, { "epoch": 1.8415047795251311, "grad_norm": 0.20983824133872986, "learning_rate": 3.083231839972621e-05, "loss": 0.5351, "step": 8958 }, { "epoch": 1.8417103504985097, "grad_norm": 0.19999557733535767, "learning_rate": 3.082283858046194e-05, "loss": 0.5268, "step": 8959 }, { "epoch": 1.8419159214718883, "grad_norm": 0.2033097892999649, "learning_rate": 3.081335945964014e-05, "loss": 0.5243, "step": 8960 }, { "epoch": 1.8421214924452667, "grad_norm": 0.19662059843540192, "learning_rate": 3.080388103772783e-05, "loss": 0.5197, "step": 8961 }, { "epoch": 1.8423270634186453, "grad_norm": 0.19115544855594635, "learning_rate": 3.079440331519194e-05, "loss": 0.5119, "step": 8962 }, { "epoch": 1.8425326343920239, "grad_norm": 0.19383569061756134, "learning_rate": 3.078492629249939e-05, "loss": 0.5221, "step": 8963 }, { "epoch": 1.8427382053654024, "grad_norm": 0.19358788430690765, "learning_rate": 3.077544997011709e-05, "loss": 0.5366, "step": 8964 }, { "epoch": 1.8429437763387808, "grad_norm": 0.19317568838596344, "learning_rate": 3.0765974348511895e-05, "loss": 0.5127, "step": 8965 }, { "epoch": 1.8431493473121594, "grad_norm": 0.19126683473587036, "learning_rate": 3.075649942815061e-05, "loss": 0.5027, "step": 8966 }, { "epoch": 1.843354918285538, "grad_norm": 0.2007630318403244, "learning_rate": 3.0747025209500024e-05, "loss": 0.5352, "step": 8967 }, { "epoch": 1.8435604892589166, "grad_norm": 0.1700150966644287, "learning_rate": 3.073755169302689e-05, "loss": 0.4973, "step": 8968 }, { "epoch": 1.8437660602322952, "grad_norm": 0.1250450313091278, "learning_rate": 3.0728078879197913e-05, "loss": 0.5154, "step": 8969 }, { "epoch": 1.8439716312056738, "grad_norm": 0.16313976049423218, "learning_rate": 3.071860676847978e-05, "loss": 0.5528, "step": 8970 }, { "epoch": 1.8441772021790523, "grad_norm": 0.16907833516597748, "learning_rate": 3.070913536133915e-05, "loss": 0.5112, "step": 8971 }, { "epoch": 1.844382773152431, "grad_norm": 0.11831056326627731, "learning_rate": 3.0699664658242614e-05, "loss": 0.4941, "step": 8972 }, { "epoch": 1.8445883441258095, "grad_norm": 0.15390439331531525, "learning_rate": 3.0690194659656774e-05, "loss": 0.514, "step": 8973 }, { "epoch": 1.844793915099188, "grad_norm": 0.16013920307159424, "learning_rate": 3.0680725366048155e-05, "loss": 0.4877, "step": 8974 }, { "epoch": 1.8449994860725667, "grad_norm": 0.1253557801246643, "learning_rate": 3.067125677788327e-05, "loss": 0.5135, "step": 8975 }, { "epoch": 1.845205057045945, "grad_norm": 0.16094715893268585, "learning_rate": 3.0661788895628595e-05, "loss": 0.533, "step": 8976 }, { "epoch": 1.8454106280193237, "grad_norm": 0.190200075507164, "learning_rate": 3.065232171975054e-05, "loss": 0.5279, "step": 8977 }, { "epoch": 1.8456161989927022, "grad_norm": 0.20202942192554474, "learning_rate": 3.064285525071556e-05, "loss": 0.5234, "step": 8978 }, { "epoch": 1.8458217699660808, "grad_norm": 0.20393583178520203, "learning_rate": 3.063338948898999e-05, "loss": 0.5437, "step": 8979 }, { "epoch": 1.8460273409394592, "grad_norm": 0.19702088832855225, "learning_rate": 3.062392443504017e-05, "loss": 0.5375, "step": 8980 }, { "epoch": 1.8462329119128378, "grad_norm": 0.19736789166927338, "learning_rate": 3.061446008933239e-05, "loss": 0.5485, "step": 8981 }, { "epoch": 1.8464384828862164, "grad_norm": 0.19195613265037537, "learning_rate": 3.060499645233294e-05, "loss": 0.5325, "step": 8982 }, { "epoch": 1.846644053859595, "grad_norm": 0.19624346494674683, "learning_rate": 3.059553352450803e-05, "loss": 0.5307, "step": 8983 }, { "epoch": 1.8468496248329735, "grad_norm": 0.17116032540798187, "learning_rate": 3.058607130632383e-05, "loss": 0.4922, "step": 8984 }, { "epoch": 1.8470551958063521, "grad_norm": 0.1617693156003952, "learning_rate": 3.057660979824655e-05, "loss": 0.5326, "step": 8985 }, { "epoch": 1.8472607667797307, "grad_norm": 0.19013215601444244, "learning_rate": 3.05671490007423e-05, "loss": 0.5203, "step": 8986 }, { "epoch": 1.8474663377531093, "grad_norm": 0.1693909615278244, "learning_rate": 3.055768891427715e-05, "loss": 0.5208, "step": 8987 }, { "epoch": 1.847671908726488, "grad_norm": 0.1591087430715561, "learning_rate": 3.054822953931716e-05, "loss": 0.5186, "step": 8988 }, { "epoch": 1.8478774796998665, "grad_norm": 0.19975587725639343, "learning_rate": 3.0538770876328365e-05, "loss": 0.5238, "step": 8989 }, { "epoch": 1.848083050673245, "grad_norm": 0.21245107054710388, "learning_rate": 3.052931292577673e-05, "loss": 0.5405, "step": 8990 }, { "epoch": 1.8482886216466234, "grad_norm": 0.19569487869739532, "learning_rate": 3.051985568812819e-05, "loss": 0.5452, "step": 8991 }, { "epoch": 1.848494192620002, "grad_norm": 0.19539184868335724, "learning_rate": 3.0510399163848704e-05, "loss": 0.518, "step": 8992 }, { "epoch": 1.8486997635933806, "grad_norm": 0.1992693692445755, "learning_rate": 3.0500943353404117e-05, "loss": 0.5521, "step": 8993 }, { "epoch": 1.8489053345667592, "grad_norm": 0.1907494217157364, "learning_rate": 3.0491488257260293e-05, "loss": 0.5105, "step": 8994 }, { "epoch": 1.8491109055401376, "grad_norm": 0.1949932873249054, "learning_rate": 3.0482033875883026e-05, "loss": 0.5214, "step": 8995 }, { "epoch": 1.8493164765135162, "grad_norm": 0.19968056678771973, "learning_rate": 3.0472580209738096e-05, "loss": 0.5388, "step": 8996 }, { "epoch": 1.8495220474868947, "grad_norm": 0.1978997439146042, "learning_rate": 3.0463127259291236e-05, "loss": 0.5319, "step": 8997 }, { "epoch": 1.8497276184602733, "grad_norm": 0.2019454538822174, "learning_rate": 3.0453675025008134e-05, "loss": 0.532, "step": 8998 }, { "epoch": 1.849933189433652, "grad_norm": 0.20041054487228394, "learning_rate": 3.0444223507354492e-05, "loss": 0.5036, "step": 8999 }, { "epoch": 1.8501387604070305, "grad_norm": 0.20030958950519562, "learning_rate": 3.0434772706795925e-05, "loss": 0.5458, "step": 9000 }, { "epoch": 1.850344331380409, "grad_norm": 0.16151051223278046, "learning_rate": 3.042532262379803e-05, "loss": 0.5085, "step": 9001 }, { "epoch": 1.8505499023537877, "grad_norm": 0.15830279886722565, "learning_rate": 3.0415873258826368e-05, "loss": 0.536, "step": 9002 }, { "epoch": 1.8507554733271663, "grad_norm": 0.19460676610469818, "learning_rate": 3.040642461234645e-05, "loss": 0.5357, "step": 9003 }, { "epoch": 1.8509610443005449, "grad_norm": 0.1874227076768875, "learning_rate": 3.0396976684823795e-05, "loss": 0.5028, "step": 9004 }, { "epoch": 1.8511666152739235, "grad_norm": 0.19529518485069275, "learning_rate": 3.0387529476723823e-05, "loss": 0.548, "step": 9005 }, { "epoch": 1.851372186247302, "grad_norm": 0.19249314069747925, "learning_rate": 3.0378082988511997e-05, "loss": 0.4975, "step": 9006 }, { "epoch": 1.8515777572206804, "grad_norm": 0.15955211222171783, "learning_rate": 3.0368637220653672e-05, "loss": 0.4934, "step": 9007 }, { "epoch": 1.851783328194059, "grad_norm": 0.16296181082725525, "learning_rate": 3.0359192173614212e-05, "loss": 0.5391, "step": 9008 }, { "epoch": 1.8519888991674376, "grad_norm": 0.18920543789863586, "learning_rate": 3.0349747847858923e-05, "loss": 0.5126, "step": 9009 }, { "epoch": 1.852194470140816, "grad_norm": 0.20026175677776337, "learning_rate": 3.0340304243853077e-05, "loss": 0.5336, "step": 9010 }, { "epoch": 1.8524000411141945, "grad_norm": 0.1883440762758255, "learning_rate": 3.0330861362061927e-05, "loss": 0.4898, "step": 9011 }, { "epoch": 1.8526056120875731, "grad_norm": 0.19095604121685028, "learning_rate": 3.0321419202950652e-05, "loss": 0.5055, "step": 9012 }, { "epoch": 1.8528111830609517, "grad_norm": 0.19625356793403625, "learning_rate": 3.0311977766984462e-05, "loss": 0.5161, "step": 9013 }, { "epoch": 1.8530167540343303, "grad_norm": 0.19662852585315704, "learning_rate": 3.0302537054628483e-05, "loss": 0.5448, "step": 9014 }, { "epoch": 1.853222325007709, "grad_norm": 0.20150268077850342, "learning_rate": 3.0293097066347794e-05, "loss": 0.503, "step": 9015 }, { "epoch": 1.8534278959810875, "grad_norm": 0.20207509398460388, "learning_rate": 3.0283657802607484e-05, "loss": 0.5437, "step": 9016 }, { "epoch": 1.853633466954466, "grad_norm": 0.20044514536857605, "learning_rate": 3.027421926387257e-05, "loss": 0.5406, "step": 9017 }, { "epoch": 1.8538390379278447, "grad_norm": 0.2027798891067505, "learning_rate": 3.026478145060804e-05, "loss": 0.5493, "step": 9018 }, { "epoch": 1.8540446089012232, "grad_norm": 0.19402191042900085, "learning_rate": 3.025534436327884e-05, "loss": 0.5346, "step": 9019 }, { "epoch": 1.8542501798746018, "grad_norm": 0.2023455947637558, "learning_rate": 3.0245908002349927e-05, "loss": 0.5521, "step": 9020 }, { "epoch": 1.8544557508479804, "grad_norm": 0.19758723676204681, "learning_rate": 3.0236472368286162e-05, "loss": 0.5314, "step": 9021 }, { "epoch": 1.8546613218213588, "grad_norm": 0.19898824393749237, "learning_rate": 3.0227037461552405e-05, "loss": 0.5221, "step": 9022 }, { "epoch": 1.8548668927947374, "grad_norm": 0.18745093047618866, "learning_rate": 3.021760328261346e-05, "loss": 0.5196, "step": 9023 }, { "epoch": 1.855072463768116, "grad_norm": 0.1959155946969986, "learning_rate": 3.0208169831934095e-05, "loss": 0.515, "step": 9024 }, { "epoch": 1.8552780347414946, "grad_norm": 0.19238829612731934, "learning_rate": 3.0198737109979084e-05, "loss": 0.5023, "step": 9025 }, { "epoch": 1.855483605714873, "grad_norm": 0.19325855374336243, "learning_rate": 3.01893051172131e-05, "loss": 0.5261, "step": 9026 }, { "epoch": 1.8556891766882515, "grad_norm": 0.1941802203655243, "learning_rate": 3.017987385410083e-05, "loss": 0.5381, "step": 9027 }, { "epoch": 1.85589474766163, "grad_norm": 0.19294960796833038, "learning_rate": 3.0170443321106913e-05, "loss": 0.5493, "step": 9028 }, { "epoch": 1.8561003186350087, "grad_norm": 0.20181645452976227, "learning_rate": 3.0161013518695943e-05, "loss": 0.5268, "step": 9029 }, { "epoch": 1.8563058896083873, "grad_norm": 0.1975722759962082, "learning_rate": 3.0151584447332476e-05, "loss": 0.5342, "step": 9030 }, { "epoch": 1.8565114605817659, "grad_norm": 0.20087282359600067, "learning_rate": 3.0142156107481048e-05, "loss": 0.522, "step": 9031 }, { "epoch": 1.8567170315551444, "grad_norm": 0.19749726355075836, "learning_rate": 3.013272849960612e-05, "loss": 0.5077, "step": 9032 }, { "epoch": 1.856922602528523, "grad_norm": 0.19727249443531036, "learning_rate": 3.0123301624172185e-05, "loss": 0.5261, "step": 9033 }, { "epoch": 1.8571281735019016, "grad_norm": 0.2018827497959137, "learning_rate": 3.0113875481643647e-05, "loss": 0.5258, "step": 9034 }, { "epoch": 1.8573337444752802, "grad_norm": 0.17497631907463074, "learning_rate": 3.0104450072484895e-05, "loss": 0.525, "step": 9035 }, { "epoch": 1.8575393154486588, "grad_norm": 0.16717809438705444, "learning_rate": 3.0095025397160248e-05, "loss": 0.5311, "step": 9036 }, { "epoch": 1.8577448864220372, "grad_norm": 0.19906377792358398, "learning_rate": 3.0085601456134044e-05, "loss": 0.521, "step": 9037 }, { "epoch": 1.8579504573954158, "grad_norm": 0.19669370353221893, "learning_rate": 3.0076178249870547e-05, "loss": 0.495, "step": 9038 }, { "epoch": 1.8581560283687943, "grad_norm": 0.1930094212293625, "learning_rate": 3.006675577883398e-05, "loss": 0.5243, "step": 9039 }, { "epoch": 1.858361599342173, "grad_norm": 0.1566167026758194, "learning_rate": 3.0057334043488573e-05, "loss": 0.4969, "step": 9040 }, { "epoch": 1.8585671703155513, "grad_norm": 0.1628788709640503, "learning_rate": 3.0047913044298474e-05, "loss": 0.534, "step": 9041 }, { "epoch": 1.8587727412889299, "grad_norm": 0.19704844057559967, "learning_rate": 3.0038492781727817e-05, "loss": 0.5278, "step": 9042 }, { "epoch": 1.8589783122623085, "grad_norm": 0.2023809403181076, "learning_rate": 3.002907325624069e-05, "loss": 0.5197, "step": 9043 }, { "epoch": 1.859183883235687, "grad_norm": 0.1649642139673233, "learning_rate": 3.0019654468301153e-05, "loss": 0.5043, "step": 9044 }, { "epoch": 1.8593894542090657, "grad_norm": 0.16661269962787628, "learning_rate": 3.001023641837321e-05, "loss": 0.5443, "step": 9045 }, { "epoch": 1.8595950251824442, "grad_norm": 0.19846822321414948, "learning_rate": 3.000081910692085e-05, "loss": 0.5275, "step": 9046 }, { "epoch": 1.8598005961558228, "grad_norm": 0.1986059993505478, "learning_rate": 2.9991402534408043e-05, "loss": 0.5404, "step": 9047 }, { "epoch": 1.8600061671292014, "grad_norm": 0.20389589667320251, "learning_rate": 2.9981986701298672e-05, "loss": 0.5433, "step": 9048 }, { "epoch": 1.86021173810258, "grad_norm": 0.1978558897972107, "learning_rate": 2.9972571608056634e-05, "loss": 0.5279, "step": 9049 }, { "epoch": 1.8604173090759586, "grad_norm": 0.19354282319545746, "learning_rate": 2.996315725514575e-05, "loss": 0.5127, "step": 9050 }, { "epoch": 1.8606228800493372, "grad_norm": 0.2036632001399994, "learning_rate": 2.995374364302983e-05, "loss": 0.5386, "step": 9051 }, { "epoch": 1.8608284510227155, "grad_norm": 0.19390377402305603, "learning_rate": 2.9944330772172635e-05, "loss": 0.5256, "step": 9052 }, { "epoch": 1.8610340219960941, "grad_norm": 0.16378752887248993, "learning_rate": 2.9934918643037872e-05, "loss": 0.523, "step": 9053 }, { "epoch": 1.8612395929694727, "grad_norm": 0.28664878010749817, "learning_rate": 2.9925507256089277e-05, "loss": 0.5099, "step": 9054 }, { "epoch": 1.8614451639428513, "grad_norm": 0.16860786080360413, "learning_rate": 2.9916096611790473e-05, "loss": 0.5528, "step": 9055 }, { "epoch": 1.8616507349162297, "grad_norm": 0.1950398087501526, "learning_rate": 2.990668671060509e-05, "loss": 0.5442, "step": 9056 }, { "epoch": 1.8618563058896083, "grad_norm": 0.1937715858221054, "learning_rate": 2.98972775529967e-05, "loss": 0.5393, "step": 9057 }, { "epoch": 1.8620618768629869, "grad_norm": 0.1997014582157135, "learning_rate": 2.988786913942886e-05, "loss": 0.5328, "step": 9058 }, { "epoch": 1.8622674478363654, "grad_norm": 0.19975896179676056, "learning_rate": 2.9878461470365082e-05, "loss": 0.5348, "step": 9059 }, { "epoch": 1.862473018809744, "grad_norm": 0.15818095207214355, "learning_rate": 2.986905454626881e-05, "loss": 0.4831, "step": 9060 }, { "epoch": 1.8626785897831226, "grad_norm": 0.1348692923784256, "learning_rate": 2.9859648367603506e-05, "loss": 0.4821, "step": 9061 }, { "epoch": 1.8628841607565012, "grad_norm": 0.1564972698688507, "learning_rate": 2.9850242934832573e-05, "loss": 0.5206, "step": 9062 }, { "epoch": 1.8630897317298798, "grad_norm": 0.19233213365077972, "learning_rate": 2.9840838248419352e-05, "loss": 0.5317, "step": 9063 }, { "epoch": 1.8632953027032584, "grad_norm": 0.20065823197364807, "learning_rate": 2.983143430882718e-05, "loss": 0.5267, "step": 9064 }, { "epoch": 1.863500873676637, "grad_norm": 0.20056359469890594, "learning_rate": 2.9822031116519345e-05, "loss": 0.5365, "step": 9065 }, { "epoch": 1.8637064446500156, "grad_norm": 0.2093392014503479, "learning_rate": 2.9812628671959084e-05, "loss": 0.53, "step": 9066 }, { "epoch": 1.863912015623394, "grad_norm": 0.20502184331417084, "learning_rate": 2.9803226975609622e-05, "loss": 0.5227, "step": 9067 }, { "epoch": 1.8641175865967725, "grad_norm": 0.19816361367702484, "learning_rate": 2.9793826027934147e-05, "loss": 0.5054, "step": 9068 }, { "epoch": 1.864323157570151, "grad_norm": 0.2030927836894989, "learning_rate": 2.9784425829395777e-05, "loss": 0.5327, "step": 9069 }, { "epoch": 1.8645287285435297, "grad_norm": 0.20948849618434906, "learning_rate": 2.9775026380457645e-05, "loss": 0.5415, "step": 9070 }, { "epoch": 1.864734299516908, "grad_norm": 0.19347041845321655, "learning_rate": 2.97656276815828e-05, "loss": 0.5216, "step": 9071 }, { "epoch": 1.8649398704902866, "grad_norm": 0.16669385135173798, "learning_rate": 2.975622973323427e-05, "loss": 0.4946, "step": 9072 }, { "epoch": 1.8651454414636652, "grad_norm": 0.1577424257993698, "learning_rate": 2.9746832535875054e-05, "loss": 0.5255, "step": 9073 }, { "epoch": 1.8653510124370438, "grad_norm": 0.19290731847286224, "learning_rate": 2.973743608996809e-05, "loss": 0.5162, "step": 9074 }, { "epoch": 1.8655565834104224, "grad_norm": 0.22282882034778595, "learning_rate": 2.9728040395976326e-05, "loss": 0.5466, "step": 9075 }, { "epoch": 1.865762154383801, "grad_norm": 0.19410258531570435, "learning_rate": 2.9718645454362635e-05, "loss": 0.5002, "step": 9076 }, { "epoch": 1.8659677253571796, "grad_norm": 0.199889674782753, "learning_rate": 2.9709251265589857e-05, "loss": 0.5468, "step": 9077 }, { "epoch": 1.8661732963305582, "grad_norm": 0.19312036037445068, "learning_rate": 2.969985783012079e-05, "loss": 0.5278, "step": 9078 }, { "epoch": 1.8663788673039368, "grad_norm": 0.1675236076116562, "learning_rate": 2.9690465148418225e-05, "loss": 0.5274, "step": 9079 }, { "epoch": 1.8665844382773153, "grad_norm": 0.1646019071340561, "learning_rate": 2.9681073220944887e-05, "loss": 0.5227, "step": 9080 }, { "epoch": 1.866790009250694, "grad_norm": 0.19794802367687225, "learning_rate": 2.9671682048163452e-05, "loss": 0.5234, "step": 9081 }, { "epoch": 1.8669955802240725, "grad_norm": 0.19309046864509583, "learning_rate": 2.9662291630536612e-05, "loss": 0.5235, "step": 9082 }, { "epoch": 1.867201151197451, "grad_norm": 0.19643832743167877, "learning_rate": 2.965290196852698e-05, "loss": 0.5161, "step": 9083 }, { "epoch": 1.8674067221708295, "grad_norm": 0.19640390574932098, "learning_rate": 2.964351306259713e-05, "loss": 0.5374, "step": 9084 }, { "epoch": 1.867612293144208, "grad_norm": 0.2005051225423813, "learning_rate": 2.9634124913209623e-05, "loss": 0.5183, "step": 9085 }, { "epoch": 1.8678178641175864, "grad_norm": 0.20132414996623993, "learning_rate": 2.9624737520826958e-05, "loss": 0.5101, "step": 9086 }, { "epoch": 1.868023435090965, "grad_norm": 0.19228623807430267, "learning_rate": 2.9615350885911618e-05, "loss": 0.5274, "step": 9087 }, { "epoch": 1.8682290060643436, "grad_norm": 0.16477905213832855, "learning_rate": 2.9605965008926004e-05, "loss": 0.4958, "step": 9088 }, { "epoch": 1.8684345770377222, "grad_norm": 0.16282616555690765, "learning_rate": 2.9596579890332563e-05, "loss": 0.5274, "step": 9089 }, { "epoch": 1.8686401480111008, "grad_norm": 0.19304989278316498, "learning_rate": 2.958719553059363e-05, "loss": 0.5014, "step": 9090 }, { "epoch": 1.8688457189844794, "grad_norm": 0.16735190153121948, "learning_rate": 2.957781193017154e-05, "loss": 0.513, "step": 9091 }, { "epoch": 1.869051289957858, "grad_norm": 0.11929447948932648, "learning_rate": 2.9568429089528573e-05, "loss": 0.502, "step": 9092 }, { "epoch": 1.8692568609312366, "grad_norm": 0.16071327030658722, "learning_rate": 2.955904700912698e-05, "loss": 0.5299, "step": 9093 }, { "epoch": 1.8694624319046151, "grad_norm": 0.20334213972091675, "learning_rate": 2.954966568942897e-05, "loss": 0.534, "step": 9094 }, { "epoch": 1.8696680028779937, "grad_norm": 0.19918020069599152, "learning_rate": 2.9540285130896692e-05, "loss": 0.5146, "step": 9095 }, { "epoch": 1.8698735738513723, "grad_norm": 0.16279511153697968, "learning_rate": 2.9530905333992337e-05, "loss": 0.4845, "step": 9096 }, { "epoch": 1.870079144824751, "grad_norm": 0.16142159700393677, "learning_rate": 2.9521526299177962e-05, "loss": 0.5215, "step": 9097 }, { "epoch": 1.8702847157981293, "grad_norm": 0.20098471641540527, "learning_rate": 2.951214802691565e-05, "loss": 0.5314, "step": 9098 }, { "epoch": 1.8704902867715079, "grad_norm": 0.19500714540481567, "learning_rate": 2.950277051766741e-05, "loss": 0.5257, "step": 9099 }, { "epoch": 1.8706958577448864, "grad_norm": 0.19438640773296356, "learning_rate": 2.949339377189522e-05, "loss": 0.5412, "step": 9100 }, { "epoch": 1.870901428718265, "grad_norm": 0.16201166808605194, "learning_rate": 2.9484017790061058e-05, "loss": 0.4902, "step": 9101 }, { "epoch": 1.8711069996916434, "grad_norm": 0.15816009044647217, "learning_rate": 2.9474642572626804e-05, "loss": 0.5344, "step": 9102 }, { "epoch": 1.871312570665022, "grad_norm": 0.19865500926971436, "learning_rate": 2.9465268120054347e-05, "loss": 0.5286, "step": 9103 }, { "epoch": 1.8715181416384006, "grad_norm": 0.19558370113372803, "learning_rate": 2.945589443280553e-05, "loss": 0.5003, "step": 9104 }, { "epoch": 1.8717237126117792, "grad_norm": 0.19835351407527924, "learning_rate": 2.944652151134214e-05, "loss": 0.5106, "step": 9105 }, { "epoch": 1.8719292835851578, "grad_norm": 0.1939878761768341, "learning_rate": 2.9437149356125937e-05, "loss": 0.5295, "step": 9106 }, { "epoch": 1.8721348545585363, "grad_norm": 0.18890373408794403, "learning_rate": 2.9427777967618645e-05, "loss": 0.5072, "step": 9107 }, { "epoch": 1.872340425531915, "grad_norm": 0.20133374631404877, "learning_rate": 2.9418407346281948e-05, "loss": 0.5436, "step": 9108 }, { "epoch": 1.8725459965052935, "grad_norm": 0.19078055024147034, "learning_rate": 2.940903749257748e-05, "loss": 0.4905, "step": 9109 }, { "epoch": 1.872751567478672, "grad_norm": 0.1644534170627594, "learning_rate": 2.9399668406966874e-05, "loss": 0.5029, "step": 9110 }, { "epoch": 1.8729571384520507, "grad_norm": 0.1681102067232132, "learning_rate": 2.9390300089911696e-05, "loss": 0.54, "step": 9111 }, { "epoch": 1.8731627094254293, "grad_norm": 0.16144701838493347, "learning_rate": 2.938093254187346e-05, "loss": 0.5035, "step": 9112 }, { "epoch": 1.8733682803988077, "grad_norm": 0.16440553963184357, "learning_rate": 2.937156576331368e-05, "loss": 0.5317, "step": 9113 }, { "epoch": 1.8735738513721862, "grad_norm": 0.19571755826473236, "learning_rate": 2.936219975469382e-05, "loss": 0.5346, "step": 9114 }, { "epoch": 1.8737794223455648, "grad_norm": 0.6120141744613647, "learning_rate": 2.9352834516475254e-05, "loss": 0.5264, "step": 9115 }, { "epoch": 1.8739849933189434, "grad_norm": 0.1942664086818695, "learning_rate": 2.9343470049119426e-05, "loss": 0.5409, "step": 9116 }, { "epoch": 1.8741905642923218, "grad_norm": 0.19954350590705872, "learning_rate": 2.9334106353087646e-05, "loss": 0.5159, "step": 9117 }, { "epoch": 1.8743961352657004, "grad_norm": 0.156526118516922, "learning_rate": 2.9324743428841223e-05, "loss": 0.4767, "step": 9118 }, { "epoch": 1.874601706239079, "grad_norm": 0.1730726808309555, "learning_rate": 2.9315381276841425e-05, "loss": 0.5267, "step": 9119 }, { "epoch": 1.8748072772124575, "grad_norm": 0.20543117821216583, "learning_rate": 2.9306019897549483e-05, "loss": 0.5323, "step": 9120 }, { "epoch": 1.8750128481858361, "grad_norm": 0.17114831507205963, "learning_rate": 2.9296659291426576e-05, "loss": 0.5179, "step": 9121 }, { "epoch": 1.8752184191592147, "grad_norm": 0.16466084122657776, "learning_rate": 2.928729945893387e-05, "loss": 0.5224, "step": 9122 }, { "epoch": 1.8754239901325933, "grad_norm": 0.19490589201450348, "learning_rate": 2.927794040053249e-05, "loss": 0.5288, "step": 9123 }, { "epoch": 1.875629561105972, "grad_norm": 0.165072500705719, "learning_rate": 2.926858211668349e-05, "loss": 0.4984, "step": 9124 }, { "epoch": 1.8758351320793505, "grad_norm": 0.1546640247106552, "learning_rate": 2.9259224607847928e-05, "loss": 0.544, "step": 9125 }, { "epoch": 1.876040703052729, "grad_norm": 0.18832944333553314, "learning_rate": 2.9249867874486802e-05, "loss": 0.5269, "step": 9126 }, { "epoch": 1.8762462740261077, "grad_norm": 0.19997960329055786, "learning_rate": 2.924051191706107e-05, "loss": 0.539, "step": 9127 }, { "epoch": 1.876451844999486, "grad_norm": 0.21653611958026886, "learning_rate": 2.9231156736031653e-05, "loss": 0.5414, "step": 9128 }, { "epoch": 1.8766574159728646, "grad_norm": 0.1963309794664383, "learning_rate": 2.922180233185942e-05, "loss": 0.5308, "step": 9129 }, { "epoch": 1.8768629869462432, "grad_norm": 0.19767159223556519, "learning_rate": 2.921244870500526e-05, "loss": 0.5479, "step": 9130 }, { "epoch": 1.8770685579196218, "grad_norm": 0.19611725211143494, "learning_rate": 2.9203095855929962e-05, "loss": 0.5213, "step": 9131 }, { "epoch": 1.8772741288930002, "grad_norm": 0.19497352838516235, "learning_rate": 2.91937437850943e-05, "loss": 0.5212, "step": 9132 }, { "epoch": 1.8774796998663787, "grad_norm": 0.19085940718650818, "learning_rate": 2.918439249295899e-05, "loss": 0.532, "step": 9133 }, { "epoch": 1.8776852708397573, "grad_norm": 0.1957186907529831, "learning_rate": 2.917504197998475e-05, "loss": 0.5046, "step": 9134 }, { "epoch": 1.877890841813136, "grad_norm": 0.17413191497325897, "learning_rate": 2.916569224663223e-05, "loss": 0.5181, "step": 9135 }, { "epoch": 1.8780964127865145, "grad_norm": 0.16276034712791443, "learning_rate": 2.9156343293362013e-05, "loss": 0.5378, "step": 9136 }, { "epoch": 1.878301983759893, "grad_norm": 0.19842852652072906, "learning_rate": 2.914699512063474e-05, "loss": 0.5238, "step": 9137 }, { "epoch": 1.8785075547332717, "grad_norm": 0.20142436027526855, "learning_rate": 2.9137647728910915e-05, "loss": 0.53, "step": 9138 }, { "epoch": 1.8787131257066503, "grad_norm": 0.1989341378211975, "learning_rate": 2.9128301118651043e-05, "loss": 0.5447, "step": 9139 }, { "epoch": 1.8789186966800289, "grad_norm": 0.15917402505874634, "learning_rate": 2.9118955290315593e-05, "loss": 0.4962, "step": 9140 }, { "epoch": 1.8791242676534075, "grad_norm": 0.16550025343894958, "learning_rate": 2.9109610244364994e-05, "loss": 0.5044, "step": 9141 }, { "epoch": 1.879329838626786, "grad_norm": 0.19528605043888092, "learning_rate": 2.9100265981259613e-05, "loss": 0.52, "step": 9142 }, { "epoch": 1.8795354096001644, "grad_norm": 0.15918217599391937, "learning_rate": 2.909092250145981e-05, "loss": 0.5113, "step": 9143 }, { "epoch": 1.879740980573543, "grad_norm": 0.19821493327617645, "learning_rate": 2.9081579805425912e-05, "loss": 0.5407, "step": 9144 }, { "epoch": 1.8799465515469216, "grad_norm": 0.19132786989212036, "learning_rate": 2.9072237893618154e-05, "loss": 0.5243, "step": 9145 }, { "epoch": 1.8801521225203002, "grad_norm": 0.20594055950641632, "learning_rate": 2.9062896766496812e-05, "loss": 0.5245, "step": 9146 }, { "epoch": 1.8803576934936785, "grad_norm": 0.1643124520778656, "learning_rate": 2.9053556424522043e-05, "loss": 0.5056, "step": 9147 }, { "epoch": 1.8805632644670571, "grad_norm": 0.16157592833042145, "learning_rate": 2.9044216868154028e-05, "loss": 0.5309, "step": 9148 }, { "epoch": 1.8807688354404357, "grad_norm": 0.1968626081943512, "learning_rate": 2.9034878097852863e-05, "loss": 0.5417, "step": 9149 }, { "epoch": 1.8809744064138143, "grad_norm": 0.1947098821401596, "learning_rate": 2.9025540114078615e-05, "loss": 0.5476, "step": 9150 }, { "epoch": 1.881179977387193, "grad_norm": 0.1969742625951767, "learning_rate": 2.9016202917291363e-05, "loss": 0.5182, "step": 9151 }, { "epoch": 1.8813855483605715, "grad_norm": 0.199255108833313, "learning_rate": 2.9006866507951085e-05, "loss": 0.5049, "step": 9152 }, { "epoch": 1.88159111933395, "grad_norm": 0.1936180591583252, "learning_rate": 2.899753088651774e-05, "loss": 0.5345, "step": 9153 }, { "epoch": 1.8817966903073287, "grad_norm": 0.1694704294204712, "learning_rate": 2.8988196053451242e-05, "loss": 0.4989, "step": 9154 }, { "epoch": 1.8820022612807072, "grad_norm": 0.16625025868415833, "learning_rate": 2.89788620092115e-05, "loss": 0.5487, "step": 9155 }, { "epoch": 1.8822078322540858, "grad_norm": 0.2028672993183136, "learning_rate": 2.8969528754258344e-05, "loss": 0.5241, "step": 9156 }, { "epoch": 1.8824134032274644, "grad_norm": 0.20288680493831635, "learning_rate": 2.896019628905156e-05, "loss": 0.5528, "step": 9157 }, { "epoch": 1.8826189742008428, "grad_norm": 0.1986982226371765, "learning_rate": 2.8950864614050947e-05, "loss": 0.5214, "step": 9158 }, { "epoch": 1.8828245451742214, "grad_norm": 0.16761691868305206, "learning_rate": 2.8941533729716225e-05, "loss": 0.5164, "step": 9159 }, { "epoch": 1.8830301161476, "grad_norm": 0.17081286013126373, "learning_rate": 2.8932203636507085e-05, "loss": 0.5364, "step": 9160 }, { "epoch": 1.8832356871209786, "grad_norm": 0.19834405183792114, "learning_rate": 2.8922874334883166e-05, "loss": 0.5329, "step": 9161 }, { "epoch": 1.883441258094357, "grad_norm": 0.1979091614484787, "learning_rate": 2.8913545825304082e-05, "loss": 0.5363, "step": 9162 }, { "epoch": 1.8836468290677355, "grad_norm": 0.19581232964992523, "learning_rate": 2.8904218108229417e-05, "loss": 0.5278, "step": 9163 }, { "epoch": 1.883852400041114, "grad_norm": 0.1917477548122406, "learning_rate": 2.8894891184118666e-05, "loss": 0.5051, "step": 9164 }, { "epoch": 1.8840579710144927, "grad_norm": 0.1971418410539627, "learning_rate": 2.888556505343137e-05, "loss": 0.5333, "step": 9165 }, { "epoch": 1.8842635419878713, "grad_norm": 0.19777396321296692, "learning_rate": 2.8876239716626963e-05, "loss": 0.5299, "step": 9166 }, { "epoch": 1.8844691129612499, "grad_norm": 0.20348910987377167, "learning_rate": 2.8866915174164866e-05, "loss": 0.5299, "step": 9167 }, { "epoch": 1.8846746839346284, "grad_norm": 0.16637156903743744, "learning_rate": 2.8857591426504452e-05, "loss": 0.5043, "step": 9168 }, { "epoch": 1.884880254908007, "grad_norm": 0.1301034539937973, "learning_rate": 2.8848268474105064e-05, "loss": 0.5234, "step": 9169 }, { "epoch": 1.8850858258813856, "grad_norm": 0.16645324230194092, "learning_rate": 2.8838946317425992e-05, "loss": 0.5215, "step": 9170 }, { "epoch": 1.8852913968547642, "grad_norm": 0.19991381466388702, "learning_rate": 2.882962495692648e-05, "loss": 0.5436, "step": 9171 }, { "epoch": 1.8854969678281428, "grad_norm": 0.19237017631530762, "learning_rate": 2.8820304393065785e-05, "loss": 0.5136, "step": 9172 }, { "epoch": 1.8857025388015214, "grad_norm": 0.19153942167758942, "learning_rate": 2.8810984626303068e-05, "loss": 0.5263, "step": 9173 }, { "epoch": 1.8859081097748998, "grad_norm": 0.20043647289276123, "learning_rate": 2.8801665657097478e-05, "loss": 0.5285, "step": 9174 }, { "epoch": 1.8861136807482783, "grad_norm": 0.1646028459072113, "learning_rate": 2.87923474859081e-05, "loss": 0.4842, "step": 9175 }, { "epoch": 1.886319251721657, "grad_norm": 0.1615920066833496, "learning_rate": 2.8783030113194004e-05, "loss": 0.5396, "step": 9176 }, { "epoch": 1.8865248226950353, "grad_norm": 0.19795995950698853, "learning_rate": 2.8773713539414224e-05, "loss": 0.5248, "step": 9177 }, { "epoch": 1.8867303936684139, "grad_norm": 0.19956757128238678, "learning_rate": 2.8764397765027717e-05, "loss": 0.5554, "step": 9178 }, { "epoch": 1.8869359646417925, "grad_norm": 0.20267313718795776, "learning_rate": 2.8755082790493463e-05, "loss": 0.5167, "step": 9179 }, { "epoch": 1.887141535615171, "grad_norm": 0.2114168256521225, "learning_rate": 2.8745768616270358e-05, "loss": 0.5346, "step": 9180 }, { "epoch": 1.8873471065885497, "grad_norm": 0.1968614161014557, "learning_rate": 2.873645524281726e-05, "loss": 0.5359, "step": 9181 }, { "epoch": 1.8875526775619282, "grad_norm": 0.19454684853553772, "learning_rate": 2.8727142670592992e-05, "loss": 0.5156, "step": 9182 }, { "epoch": 1.8877582485353068, "grad_norm": 0.19672390818595886, "learning_rate": 2.8717830900056353e-05, "loss": 0.521, "step": 9183 }, { "epoch": 1.8879638195086854, "grad_norm": 0.1960788369178772, "learning_rate": 2.8708519931666074e-05, "loss": 0.5307, "step": 9184 }, { "epoch": 1.888169390482064, "grad_norm": 0.20308300852775574, "learning_rate": 2.869920976588086e-05, "loss": 0.535, "step": 9185 }, { "epoch": 1.8883749614554426, "grad_norm": 0.2008272409439087, "learning_rate": 2.86899004031594e-05, "loss": 0.5472, "step": 9186 }, { "epoch": 1.8885805324288212, "grad_norm": 0.20526312291622162, "learning_rate": 2.8680591843960325e-05, "loss": 0.5531, "step": 9187 }, { "epoch": 1.8887861034021998, "grad_norm": 0.19478276371955872, "learning_rate": 2.8671284088742203e-05, "loss": 0.5059, "step": 9188 }, { "epoch": 1.8889916743755781, "grad_norm": 0.19619226455688477, "learning_rate": 2.86619771379636e-05, "loss": 0.5273, "step": 9189 }, { "epoch": 1.8891972453489567, "grad_norm": 0.1992175430059433, "learning_rate": 2.8652670992083012e-05, "loss": 0.502, "step": 9190 }, { "epoch": 1.8894028163223353, "grad_norm": 0.20220716297626495, "learning_rate": 2.864336565155891e-05, "loss": 0.5146, "step": 9191 }, { "epoch": 1.889608387295714, "grad_norm": 0.2015795111656189, "learning_rate": 2.863406111684975e-05, "loss": 0.5589, "step": 9192 }, { "epoch": 1.8898139582690923, "grad_norm": 0.19961073994636536, "learning_rate": 2.86247573884139e-05, "loss": 0.5222, "step": 9193 }, { "epoch": 1.8900195292424709, "grad_norm": 0.1948811113834381, "learning_rate": 2.8615454466709714e-05, "loss": 0.5291, "step": 9194 }, { "epoch": 1.8902251002158494, "grad_norm": 0.16690470278263092, "learning_rate": 2.8606152352195506e-05, "loss": 0.4997, "step": 9195 }, { "epoch": 1.890430671189228, "grad_norm": 0.12287265807390213, "learning_rate": 2.8596851045329547e-05, "loss": 0.4877, "step": 9196 }, { "epoch": 1.8906362421626066, "grad_norm": 0.1636972278356552, "learning_rate": 2.8587550546570063e-05, "loss": 0.5293, "step": 9197 }, { "epoch": 1.8908418131359852, "grad_norm": 0.1975325345993042, "learning_rate": 2.8578250856375253e-05, "loss": 0.5406, "step": 9198 }, { "epoch": 1.8910473841093638, "grad_norm": 0.19792711734771729, "learning_rate": 2.8568951975203272e-05, "loss": 0.5435, "step": 9199 }, { "epoch": 1.8912529550827424, "grad_norm": 0.1665029525756836, "learning_rate": 2.8559653903512225e-05, "loss": 0.5264, "step": 9200 }, { "epoch": 1.891458526056121, "grad_norm": 0.1603616178035736, "learning_rate": 2.855035664176019e-05, "loss": 0.5334, "step": 9201 }, { "epoch": 1.8916640970294996, "grad_norm": 0.16326431930065155, "learning_rate": 2.8541060190405204e-05, "loss": 0.5195, "step": 9202 }, { "epoch": 1.8918696680028781, "grad_norm": 0.17168152332305908, "learning_rate": 2.8531764549905253e-05, "loss": 0.5336, "step": 9203 }, { "epoch": 1.8920752389762565, "grad_norm": 0.19132192432880402, "learning_rate": 2.8522469720718287e-05, "loss": 0.5346, "step": 9204 }, { "epoch": 1.892280809949635, "grad_norm": 0.20156370103359222, "learning_rate": 2.851317570330221e-05, "loss": 0.53, "step": 9205 }, { "epoch": 1.8924863809230137, "grad_norm": 0.19648964703083038, "learning_rate": 2.850388249811492e-05, "loss": 0.5264, "step": 9206 }, { "epoch": 1.8926919518963923, "grad_norm": 0.19839881360530853, "learning_rate": 2.8494590105614233e-05, "loss": 0.5382, "step": 9207 }, { "epoch": 1.8928975228697706, "grad_norm": 0.19902363419532776, "learning_rate": 2.8485298526257956e-05, "loss": 0.501, "step": 9208 }, { "epoch": 1.8931030938431492, "grad_norm": 0.19624055922031403, "learning_rate": 2.8476007760503814e-05, "loss": 0.5242, "step": 9209 }, { "epoch": 1.8933086648165278, "grad_norm": 0.1998245269060135, "learning_rate": 2.8466717808809548e-05, "loss": 0.5281, "step": 9210 }, { "epoch": 1.8935142357899064, "grad_norm": 0.2012401521205902, "learning_rate": 2.845742867163282e-05, "loss": 0.5282, "step": 9211 }, { "epoch": 1.893719806763285, "grad_norm": 0.22999098896980286, "learning_rate": 2.844814034943124e-05, "loss": 0.4983, "step": 9212 }, { "epoch": 1.8939253777366636, "grad_norm": 0.1972244828939438, "learning_rate": 2.8438852842662445e-05, "loss": 0.5242, "step": 9213 }, { "epoch": 1.8941309487100422, "grad_norm": 0.19226615130901337, "learning_rate": 2.8429566151783957e-05, "loss": 0.5224, "step": 9214 }, { "epoch": 1.8943365196834208, "grad_norm": 0.19878603518009186, "learning_rate": 2.8420280277253303e-05, "loss": 0.5235, "step": 9215 }, { "epoch": 1.8945420906567993, "grad_norm": 0.19549743831157684, "learning_rate": 2.8410995219527937e-05, "loss": 0.5459, "step": 9216 }, { "epoch": 1.894747661630178, "grad_norm": 0.19706833362579346, "learning_rate": 2.8401710979065313e-05, "loss": 0.5388, "step": 9217 }, { "epoch": 1.8949532326035565, "grad_norm": 0.19738836586475372, "learning_rate": 2.839242755632279e-05, "loss": 0.5026, "step": 9218 }, { "epoch": 1.895158803576935, "grad_norm": 0.1891833394765854, "learning_rate": 2.838314495175774e-05, "loss": 0.5232, "step": 9219 }, { "epoch": 1.8953643745503135, "grad_norm": 0.2042219191789627, "learning_rate": 2.837386316582748e-05, "loss": 0.5171, "step": 9220 }, { "epoch": 1.895569945523692, "grad_norm": 0.19573958218097687, "learning_rate": 2.8364582198989256e-05, "loss": 0.521, "step": 9221 }, { "epoch": 1.8957755164970707, "grad_norm": 0.1948520541191101, "learning_rate": 2.835530205170033e-05, "loss": 0.5316, "step": 9222 }, { "epoch": 1.895981087470449, "grad_norm": 0.1974736452102661, "learning_rate": 2.8346022724417877e-05, "loss": 0.5227, "step": 9223 }, { "epoch": 1.8961866584438276, "grad_norm": 0.19401709735393524, "learning_rate": 2.8336744217599044e-05, "loss": 0.5546, "step": 9224 }, { "epoch": 1.8963922294172062, "grad_norm": 0.2048596292734146, "learning_rate": 2.832746653170093e-05, "loss": 0.536, "step": 9225 }, { "epoch": 1.8965978003905848, "grad_norm": 0.19587990641593933, "learning_rate": 2.8318189667180604e-05, "loss": 0.5109, "step": 9226 }, { "epoch": 1.8968033713639634, "grad_norm": 0.20599375665187836, "learning_rate": 2.8308913624495113e-05, "loss": 0.5001, "step": 9227 }, { "epoch": 1.897008942337342, "grad_norm": 0.12338798493146896, "learning_rate": 2.829963840410144e-05, "loss": 0.5183, "step": 9228 }, { "epoch": 1.8972145133107206, "grad_norm": 0.16888199746608734, "learning_rate": 2.829036400645652e-05, "loss": 0.5375, "step": 9229 }, { "epoch": 1.8974200842840991, "grad_norm": 0.1930612325668335, "learning_rate": 2.8281090432017264e-05, "loss": 0.5138, "step": 9230 }, { "epoch": 1.8976256552574777, "grad_norm": 0.19552965462207794, "learning_rate": 2.827181768124054e-05, "loss": 0.5154, "step": 9231 }, { "epoch": 1.8978312262308563, "grad_norm": 0.1639067679643631, "learning_rate": 2.8262545754583176e-05, "loss": 0.5142, "step": 9232 }, { "epoch": 1.898036797204235, "grad_norm": 0.15904250741004944, "learning_rate": 2.8253274652501932e-05, "loss": 0.5298, "step": 9233 }, { "epoch": 1.8982423681776133, "grad_norm": 0.19867335259914398, "learning_rate": 2.824400437545359e-05, "loss": 0.5261, "step": 9234 }, { "epoch": 1.8984479391509919, "grad_norm": 0.16743482649326324, "learning_rate": 2.8234734923894837e-05, "loss": 0.4987, "step": 9235 }, { "epoch": 1.8986535101243704, "grad_norm": 0.15749593079090118, "learning_rate": 2.822546629828233e-05, "loss": 0.5142, "step": 9236 }, { "epoch": 1.898859081097749, "grad_norm": 0.21486081182956696, "learning_rate": 2.8216198499072697e-05, "loss": 0.5442, "step": 9237 }, { "epoch": 1.8990646520711274, "grad_norm": 0.16459722816944122, "learning_rate": 2.8206931526722516e-05, "loss": 0.5078, "step": 9238 }, { "epoch": 1.899270223044506, "grad_norm": 0.16410863399505615, "learning_rate": 2.819766538168832e-05, "loss": 0.5079, "step": 9239 }, { "epoch": 1.8994757940178846, "grad_norm": 0.20152071118354797, "learning_rate": 2.8188400064426613e-05, "loss": 0.5097, "step": 9240 }, { "epoch": 1.8996813649912632, "grad_norm": 0.19304706156253815, "learning_rate": 2.8179135575393867e-05, "loss": 0.5271, "step": 9241 }, { "epoch": 1.8998869359646418, "grad_norm": 0.19228653609752655, "learning_rate": 2.8169871915046488e-05, "loss": 0.5202, "step": 9242 }, { "epoch": 1.9000925069380203, "grad_norm": 0.20129820704460144, "learning_rate": 2.816060908384086e-05, "loss": 0.527, "step": 9243 }, { "epoch": 1.900298077911399, "grad_norm": 0.203932523727417, "learning_rate": 2.8151347082233317e-05, "loss": 0.5486, "step": 9244 }, { "epoch": 1.9005036488847775, "grad_norm": 0.1936814785003662, "learning_rate": 2.8142085910680153e-05, "loss": 0.5111, "step": 9245 }, { "epoch": 1.900709219858156, "grad_norm": 0.198873832821846, "learning_rate": 2.813282556963762e-05, "loss": 0.5324, "step": 9246 }, { "epoch": 1.9009147908315347, "grad_norm": 0.19669774174690247, "learning_rate": 2.8123566059561917e-05, "loss": 0.5218, "step": 9247 }, { "epoch": 1.9011203618049133, "grad_norm": 0.19902367889881134, "learning_rate": 2.8114307380909255e-05, "loss": 0.5155, "step": 9248 }, { "epoch": 1.9013259327782919, "grad_norm": 0.1605267971754074, "learning_rate": 2.8105049534135744e-05, "loss": 0.502, "step": 9249 }, { "epoch": 1.9015315037516702, "grad_norm": 0.16073958575725555, "learning_rate": 2.8095792519697472e-05, "loss": 0.5487, "step": 9250 }, { "epoch": 1.9017370747250488, "grad_norm": 0.19656385481357574, "learning_rate": 2.8086536338050488e-05, "loss": 0.5008, "step": 9251 }, { "epoch": 1.9019426456984274, "grad_norm": 0.20274598896503448, "learning_rate": 2.807728098965081e-05, "loss": 0.5181, "step": 9252 }, { "epoch": 1.9021482166718058, "grad_norm": 0.1996408849954605, "learning_rate": 2.8068026474954407e-05, "loss": 0.5041, "step": 9253 }, { "epoch": 1.9023537876451844, "grad_norm": 0.20199070870876312, "learning_rate": 2.805877279441717e-05, "loss": 0.5173, "step": 9254 }, { "epoch": 1.902559358618563, "grad_norm": 0.16346138715744019, "learning_rate": 2.804951994849504e-05, "loss": 0.5007, "step": 9255 }, { "epoch": 1.9027649295919415, "grad_norm": 0.16361282765865326, "learning_rate": 2.8040267937643842e-05, "loss": 0.5564, "step": 9256 }, { "epoch": 1.9029705005653201, "grad_norm": 0.1939617544412613, "learning_rate": 2.8031016762319366e-05, "loss": 0.5369, "step": 9257 }, { "epoch": 1.9031760715386987, "grad_norm": 0.20072026550769806, "learning_rate": 2.802176642297738e-05, "loss": 0.5177, "step": 9258 }, { "epoch": 1.9033816425120773, "grad_norm": 0.20429256558418274, "learning_rate": 2.801251692007361e-05, "loss": 0.5275, "step": 9259 }, { "epoch": 1.903587213485456, "grad_norm": 0.20607547461986542, "learning_rate": 2.8003268254063734e-05, "loss": 0.5278, "step": 9260 }, { "epoch": 1.9037927844588345, "grad_norm": 0.19499348104000092, "learning_rate": 2.7994020425403363e-05, "loss": 0.5303, "step": 9261 }, { "epoch": 1.903998355432213, "grad_norm": 0.19774754345417023, "learning_rate": 2.7984773434548146e-05, "loss": 0.5161, "step": 9262 }, { "epoch": 1.9042039264055917, "grad_norm": 0.2001447230577469, "learning_rate": 2.79755272819536e-05, "loss": 0.5278, "step": 9263 }, { "epoch": 1.9044094973789703, "grad_norm": 0.18907634913921356, "learning_rate": 2.7966281968075258e-05, "loss": 0.5056, "step": 9264 }, { "epoch": 1.9046150683523486, "grad_norm": 0.19662658870220184, "learning_rate": 2.7957037493368595e-05, "loss": 0.5241, "step": 9265 }, { "epoch": 1.9048206393257272, "grad_norm": 0.16739560663700104, "learning_rate": 2.794779385828903e-05, "loss": 0.5178, "step": 9266 }, { "epoch": 1.9050262102991058, "grad_norm": 0.16151371598243713, "learning_rate": 2.7938551063291945e-05, "loss": 0.5285, "step": 9267 }, { "epoch": 1.9052317812724842, "grad_norm": 0.20817650854587555, "learning_rate": 2.7929309108832727e-05, "loss": 0.5066, "step": 9268 }, { "epoch": 1.9054373522458627, "grad_norm": 0.20188304781913757, "learning_rate": 2.7920067995366655e-05, "loss": 0.5425, "step": 9269 }, { "epoch": 1.9056429232192413, "grad_norm": 0.20524436235427856, "learning_rate": 2.7910827723348997e-05, "loss": 0.5229, "step": 9270 }, { "epoch": 1.90584849419262, "grad_norm": 0.1990385204553604, "learning_rate": 2.790158829323499e-05, "loss": 0.5126, "step": 9271 }, { "epoch": 1.9060540651659985, "grad_norm": 0.19942565262317657, "learning_rate": 2.7892349705479808e-05, "loss": 0.5362, "step": 9272 }, { "epoch": 1.906259636139377, "grad_norm": 0.2039434313774109, "learning_rate": 2.7883111960538585e-05, "loss": 0.517, "step": 9273 }, { "epoch": 1.9064652071127557, "grad_norm": 0.20296645164489746, "learning_rate": 2.7873875058866438e-05, "loss": 0.5199, "step": 9274 }, { "epoch": 1.9066707780861343, "grad_norm": 0.16485995054244995, "learning_rate": 2.786463900091842e-05, "loss": 0.4991, "step": 9275 }, { "epoch": 1.9068763490595129, "grad_norm": 0.12144458293914795, "learning_rate": 2.7855403787149536e-05, "loss": 0.5215, "step": 9276 }, { "epoch": 1.9070819200328915, "grad_norm": 0.12911517918109894, "learning_rate": 2.7846169418014794e-05, "loss": 0.5127, "step": 9277 }, { "epoch": 1.90728749100627, "grad_norm": 0.16436396539211273, "learning_rate": 2.78369358939691e-05, "loss": 0.5474, "step": 9278 }, { "epoch": 1.9074930619796486, "grad_norm": 0.19795219600200653, "learning_rate": 2.782770321546736e-05, "loss": 0.5031, "step": 9279 }, { "epoch": 1.907698632953027, "grad_norm": 0.20450328290462494, "learning_rate": 2.7818471382964418e-05, "loss": 0.5389, "step": 9280 }, { "epoch": 1.9079042039264056, "grad_norm": 0.19718262553215027, "learning_rate": 2.7809240396915066e-05, "loss": 0.5057, "step": 9281 }, { "epoch": 1.9081097748997842, "grad_norm": 0.19920390844345093, "learning_rate": 2.7800010257774107e-05, "loss": 0.5347, "step": 9282 }, { "epoch": 1.9083153458731628, "grad_norm": 0.19880931079387665, "learning_rate": 2.7790780965996248e-05, "loss": 0.525, "step": 9283 }, { "epoch": 1.9085209168465411, "grad_norm": 0.19791699945926666, "learning_rate": 2.778155252203618e-05, "loss": 0.5229, "step": 9284 }, { "epoch": 1.9087264878199197, "grad_norm": 0.19774897396564484, "learning_rate": 2.7772324926348524e-05, "loss": 0.4856, "step": 9285 }, { "epoch": 1.9089320587932983, "grad_norm": 0.20268942415714264, "learning_rate": 2.7763098179387917e-05, "loss": 0.5158, "step": 9286 }, { "epoch": 1.909137629766677, "grad_norm": 0.19894084334373474, "learning_rate": 2.7753872281608892e-05, "loss": 0.5155, "step": 9287 }, { "epoch": 1.9093432007400555, "grad_norm": 0.20365378260612488, "learning_rate": 2.774464723346595e-05, "loss": 0.5379, "step": 9288 }, { "epoch": 1.909548771713434, "grad_norm": 0.20348501205444336, "learning_rate": 2.773542303541361e-05, "loss": 0.5442, "step": 9289 }, { "epoch": 1.9097543426868127, "grad_norm": 0.20363953709602356, "learning_rate": 2.772619968790628e-05, "loss": 0.5028, "step": 9290 }, { "epoch": 1.9099599136601912, "grad_norm": 0.20089566707611084, "learning_rate": 2.771697719139836e-05, "loss": 0.5419, "step": 9291 }, { "epoch": 1.9101654846335698, "grad_norm": 0.19938144087791443, "learning_rate": 2.7707755546344185e-05, "loss": 0.5269, "step": 9292 }, { "epoch": 1.9103710556069484, "grad_norm": 0.1974303126335144, "learning_rate": 2.7698534753198074e-05, "loss": 0.5038, "step": 9293 }, { "epoch": 1.910576626580327, "grad_norm": 0.16670559346675873, "learning_rate": 2.768931481241428e-05, "loss": 0.498, "step": 9294 }, { "epoch": 1.9107821975537054, "grad_norm": 0.16241280734539032, "learning_rate": 2.768009572444703e-05, "loss": 0.4979, "step": 9295 }, { "epoch": 1.910987768527084, "grad_norm": 0.2039078176021576, "learning_rate": 2.767087748975053e-05, "loss": 0.5426, "step": 9296 }, { "epoch": 1.9111933395004626, "grad_norm": 0.20147615671157837, "learning_rate": 2.766166010877889e-05, "loss": 0.5098, "step": 9297 }, { "epoch": 1.9113989104738411, "grad_norm": 0.19318887591362, "learning_rate": 2.765244358198621e-05, "loss": 0.5412, "step": 9298 }, { "epoch": 1.9116044814472195, "grad_norm": 0.19322159886360168, "learning_rate": 2.7643227909826573e-05, "loss": 0.5412, "step": 9299 }, { "epoch": 1.911810052420598, "grad_norm": 0.1994897574186325, "learning_rate": 2.7634013092753962e-05, "loss": 0.5362, "step": 9300 }, { "epoch": 1.9120156233939767, "grad_norm": 0.19642673432826996, "learning_rate": 2.762479913122236e-05, "loss": 0.5088, "step": 9301 }, { "epoch": 1.9122211943673553, "grad_norm": 0.20559348165988922, "learning_rate": 2.761558602568567e-05, "loss": 0.5465, "step": 9302 }, { "epoch": 1.9124267653407339, "grad_norm": 0.1719941794872284, "learning_rate": 2.760637377659782e-05, "loss": 0.4991, "step": 9303 }, { "epoch": 1.9126323363141124, "grad_norm": 0.1635911911725998, "learning_rate": 2.7597162384412645e-05, "loss": 0.5127, "step": 9304 }, { "epoch": 1.912837907287491, "grad_norm": 0.19266277551651, "learning_rate": 2.7587951849583936e-05, "loss": 0.5235, "step": 9305 }, { "epoch": 1.9130434782608696, "grad_norm": 0.20263995230197906, "learning_rate": 2.757874217256544e-05, "loss": 0.5287, "step": 9306 }, { "epoch": 1.9132490492342482, "grad_norm": 0.21031515300273895, "learning_rate": 2.75695333538109e-05, "loss": 0.5176, "step": 9307 }, { "epoch": 1.9134546202076268, "grad_norm": 0.19321498274803162, "learning_rate": 2.7560325393773992e-05, "loss": 0.5296, "step": 9308 }, { "epoch": 1.9136601911810054, "grad_norm": 0.16547061502933502, "learning_rate": 2.7551118292908317e-05, "loss": 0.5214, "step": 9309 }, { "epoch": 1.9138657621543838, "grad_norm": 0.15889470279216766, "learning_rate": 2.7541912051667503e-05, "loss": 0.5355, "step": 9310 }, { "epoch": 1.9140713331277623, "grad_norm": 0.19868826866149902, "learning_rate": 2.7532706670505082e-05, "loss": 0.5194, "step": 9311 }, { "epoch": 1.914276904101141, "grad_norm": 0.20042477548122406, "learning_rate": 2.7523502149874562e-05, "loss": 0.4973, "step": 9312 }, { "epoch": 1.9144824750745195, "grad_norm": 0.2017516791820526, "learning_rate": 2.7514298490229408e-05, "loss": 0.5121, "step": 9313 }, { "epoch": 1.9146880460478979, "grad_norm": 0.19849123060703278, "learning_rate": 2.7505095692023043e-05, "loss": 0.527, "step": 9314 }, { "epoch": 1.9148936170212765, "grad_norm": 0.19380970299243927, "learning_rate": 2.7495893755708823e-05, "loss": 0.5175, "step": 9315 }, { "epoch": 1.915099187994655, "grad_norm": 0.16672258079051971, "learning_rate": 2.748669268174011e-05, "loss": 0.4853, "step": 9316 }, { "epoch": 1.9153047589680336, "grad_norm": 0.1614876687526703, "learning_rate": 2.74774924705702e-05, "loss": 0.5163, "step": 9317 }, { "epoch": 1.9155103299414122, "grad_norm": 0.1949508935213089, "learning_rate": 2.746829312265233e-05, "loss": 0.515, "step": 9318 }, { "epoch": 1.9157159009147908, "grad_norm": 0.2158740758895874, "learning_rate": 2.745909463843972e-05, "loss": 0.5089, "step": 9319 }, { "epoch": 1.9159214718881694, "grad_norm": 0.16678479313850403, "learning_rate": 2.744989701838553e-05, "loss": 0.5147, "step": 9320 }, { "epoch": 1.916127042861548, "grad_norm": 0.16045857965946198, "learning_rate": 2.7440700262942893e-05, "loss": 0.505, "step": 9321 }, { "epoch": 1.9163326138349266, "grad_norm": 0.17274217307567596, "learning_rate": 2.7431504372564874e-05, "loss": 0.5002, "step": 9322 }, { "epoch": 1.9165381848083052, "grad_norm": 0.16283760964870453, "learning_rate": 2.7422309347704505e-05, "loss": 0.5303, "step": 9323 }, { "epoch": 1.9167437557816838, "grad_norm": 0.1970645785331726, "learning_rate": 2.741311518881481e-05, "loss": 0.5198, "step": 9324 }, { "epoch": 1.9169493267550621, "grad_norm": 0.20442216098308563, "learning_rate": 2.7403921896348735e-05, "loss": 0.4928, "step": 9325 }, { "epoch": 1.9171548977284407, "grad_norm": 0.19744066894054413, "learning_rate": 2.739472947075918e-05, "loss": 0.5099, "step": 9326 }, { "epoch": 1.9173604687018193, "grad_norm": 0.20807257294654846, "learning_rate": 2.7385537912499014e-05, "loss": 0.5173, "step": 9327 }, { "epoch": 1.917566039675198, "grad_norm": 0.19921061396598816, "learning_rate": 2.7376347222021067e-05, "loss": 0.5094, "step": 9328 }, { "epoch": 1.9177716106485763, "grad_norm": 0.1887097805738449, "learning_rate": 2.7367157399778125e-05, "loss": 0.514, "step": 9329 }, { "epoch": 1.9179771816219549, "grad_norm": 0.1967703402042389, "learning_rate": 2.7357968446222903e-05, "loss": 0.5085, "step": 9330 }, { "epoch": 1.9181827525953334, "grad_norm": 0.1980697363615036, "learning_rate": 2.734878036180813e-05, "loss": 0.5417, "step": 9331 }, { "epoch": 1.918388323568712, "grad_norm": 0.20071645081043243, "learning_rate": 2.733959314698645e-05, "loss": 0.5293, "step": 9332 }, { "epoch": 1.9185938945420906, "grad_norm": 0.1977865993976593, "learning_rate": 2.7330406802210472e-05, "loss": 0.5359, "step": 9333 }, { "epoch": 1.9187994655154692, "grad_norm": 0.19883140921592712, "learning_rate": 2.7321221327932762e-05, "loss": 0.5049, "step": 9334 }, { "epoch": 1.9190050364888478, "grad_norm": 0.19968102872371674, "learning_rate": 2.7312036724605848e-05, "loss": 0.5255, "step": 9335 }, { "epoch": 1.9192106074622264, "grad_norm": 0.19368182122707367, "learning_rate": 2.7302852992682212e-05, "loss": 0.5299, "step": 9336 }, { "epoch": 1.919416178435605, "grad_norm": 0.18962502479553223, "learning_rate": 2.7293670132614277e-05, "loss": 0.505, "step": 9337 }, { "epoch": 1.9196217494089836, "grad_norm": 0.19553595781326294, "learning_rate": 2.7284488144854465e-05, "loss": 0.5214, "step": 9338 }, { "epoch": 1.9198273203823621, "grad_norm": 0.1957550048828125, "learning_rate": 2.7275307029855118e-05, "loss": 0.5377, "step": 9339 }, { "epoch": 1.9200328913557407, "grad_norm": 0.19873984158039093, "learning_rate": 2.726612678806856e-05, "loss": 0.53, "step": 9340 }, { "epoch": 1.920238462329119, "grad_norm": 0.2044048011302948, "learning_rate": 2.7256947419947038e-05, "loss": 0.5364, "step": 9341 }, { "epoch": 1.9204440333024977, "grad_norm": 0.1971905678510666, "learning_rate": 2.7247768925942793e-05, "loss": 0.5233, "step": 9342 }, { "epoch": 1.9206496042758763, "grad_norm": 0.15951332449913025, "learning_rate": 2.7238591306507985e-05, "loss": 0.5017, "step": 9343 }, { "epoch": 1.9208551752492546, "grad_norm": 0.172995924949646, "learning_rate": 2.722941456209478e-05, "loss": 0.5254, "step": 9344 }, { "epoch": 1.9210607462226332, "grad_norm": 0.2066241055727005, "learning_rate": 2.7220238693155255e-05, "loss": 0.5268, "step": 9345 }, { "epoch": 1.9212663171960118, "grad_norm": 0.19944432377815247, "learning_rate": 2.721106370014147e-05, "loss": 0.5281, "step": 9346 }, { "epoch": 1.9214718881693904, "grad_norm": 0.20762419700622559, "learning_rate": 2.7201889583505433e-05, "loss": 0.5314, "step": 9347 }, { "epoch": 1.921677459142769, "grad_norm": 0.16759265959262848, "learning_rate": 2.7192716343699114e-05, "loss": 0.4948, "step": 9348 }, { "epoch": 1.9218830301161476, "grad_norm": 0.12171138823032379, "learning_rate": 2.718354398117441e-05, "loss": 0.4984, "step": 9349 }, { "epoch": 1.9220886010895262, "grad_norm": 0.17452505230903625, "learning_rate": 2.7174372496383224e-05, "loss": 0.5404, "step": 9350 }, { "epoch": 1.9222941720629048, "grad_norm": 0.16276037693023682, "learning_rate": 2.716520188977739e-05, "loss": 0.5044, "step": 9351 }, { "epoch": 1.9224997430362833, "grad_norm": 0.1610327959060669, "learning_rate": 2.7156032161808704e-05, "loss": 0.5304, "step": 9352 }, { "epoch": 1.922705314009662, "grad_norm": 0.2447415590286255, "learning_rate": 2.7146863312928917e-05, "loss": 0.5119, "step": 9353 }, { "epoch": 1.9229108849830405, "grad_norm": 0.19157683849334717, "learning_rate": 2.7137695343589725e-05, "loss": 0.5232, "step": 9354 }, { "epoch": 1.9231164559564191, "grad_norm": 0.20079728960990906, "learning_rate": 2.71285282542428e-05, "loss": 0.5146, "step": 9355 }, { "epoch": 1.9233220269297975, "grad_norm": 0.20246592164039612, "learning_rate": 2.7119362045339755e-05, "loss": 0.5289, "step": 9356 }, { "epoch": 1.923527597903176, "grad_norm": 0.1998445987701416, "learning_rate": 2.7110196717332144e-05, "loss": 0.537, "step": 9357 }, { "epoch": 1.9237331688765547, "grad_norm": 0.20412832498550415, "learning_rate": 2.7101032270671548e-05, "loss": 0.5388, "step": 9358 }, { "epoch": 1.9239387398499332, "grad_norm": 0.19689737260341644, "learning_rate": 2.709186870580943e-05, "loss": 0.529, "step": 9359 }, { "epoch": 1.9241443108233116, "grad_norm": 0.19693289697170258, "learning_rate": 2.7082706023197238e-05, "loss": 0.5362, "step": 9360 }, { "epoch": 1.9243498817966902, "grad_norm": 0.1994449496269226, "learning_rate": 2.707354422328637e-05, "loss": 0.5326, "step": 9361 }, { "epoch": 1.9245554527700688, "grad_norm": 0.20162896811962128, "learning_rate": 2.7064383306528194e-05, "loss": 0.5167, "step": 9362 }, { "epoch": 1.9247610237434474, "grad_norm": 0.19568750262260437, "learning_rate": 2.7055223273374027e-05, "loss": 0.5314, "step": 9363 }, { "epoch": 1.924966594716826, "grad_norm": 0.20198176801204681, "learning_rate": 2.7046064124275115e-05, "loss": 0.5225, "step": 9364 }, { "epoch": 1.9251721656902046, "grad_norm": 0.22592489421367645, "learning_rate": 2.7036905859682726e-05, "loss": 0.5282, "step": 9365 }, { "epoch": 1.9253777366635831, "grad_norm": 0.19349443912506104, "learning_rate": 2.7027748480048022e-05, "loss": 0.4946, "step": 9366 }, { "epoch": 1.9255833076369617, "grad_norm": 0.20024524629116058, "learning_rate": 2.701859198582215e-05, "loss": 0.5214, "step": 9367 }, { "epoch": 1.9257888786103403, "grad_norm": 0.19572319090366364, "learning_rate": 2.700943637745621e-05, "loss": 0.5243, "step": 9368 }, { "epoch": 1.925994449583719, "grad_norm": 0.20359370112419128, "learning_rate": 2.7000281655401248e-05, "loss": 0.5192, "step": 9369 }, { "epoch": 1.9262000205570975, "grad_norm": 0.17284277081489563, "learning_rate": 2.6991127820108274e-05, "loss": 0.5126, "step": 9370 }, { "epoch": 1.9264055915304759, "grad_norm": 0.17155306041240692, "learning_rate": 2.6981974872028255e-05, "loss": 0.5354, "step": 9371 }, { "epoch": 1.9266111625038544, "grad_norm": 0.20635953545570374, "learning_rate": 2.6972822811612127e-05, "loss": 0.5047, "step": 9372 }, { "epoch": 1.926816733477233, "grad_norm": 0.16604094207286835, "learning_rate": 2.696367163931075e-05, "loss": 0.5067, "step": 9373 }, { "epoch": 1.9270223044506116, "grad_norm": 0.15949425101280212, "learning_rate": 2.695452135557498e-05, "loss": 0.5084, "step": 9374 }, { "epoch": 1.92722787542399, "grad_norm": 0.19722892343997955, "learning_rate": 2.69453719608556e-05, "loss": 0.5247, "step": 9375 }, { "epoch": 1.9274334463973686, "grad_norm": 0.189317524433136, "learning_rate": 2.6936223455603357e-05, "loss": 0.5275, "step": 9376 }, { "epoch": 1.9276390173707472, "grad_norm": 0.193404883146286, "learning_rate": 2.6927075840268952e-05, "loss": 0.5003, "step": 9377 }, { "epoch": 1.9278445883441258, "grad_norm": 0.18967877328395844, "learning_rate": 2.6917929115303032e-05, "loss": 0.5414, "step": 9378 }, { "epoch": 1.9280501593175043, "grad_norm": 0.2023673802614212, "learning_rate": 2.690878328115625e-05, "loss": 0.5172, "step": 9379 }, { "epoch": 1.928255730290883, "grad_norm": 0.1624782383441925, "learning_rate": 2.6899638338279148e-05, "loss": 0.5076, "step": 9380 }, { "epoch": 1.9284613012642615, "grad_norm": 0.1585642248392105, "learning_rate": 2.6890494287122268e-05, "loss": 0.5232, "step": 9381 }, { "epoch": 1.92866687223764, "grad_norm": 0.20032867789268494, "learning_rate": 2.6881351128136084e-05, "loss": 0.5015, "step": 9382 }, { "epoch": 1.9288724432110187, "grad_norm": 0.20595210790634155, "learning_rate": 2.6872208861771055e-05, "loss": 0.5079, "step": 9383 }, { "epoch": 1.9290780141843973, "grad_norm": 0.2049880176782608, "learning_rate": 2.6863067488477565e-05, "loss": 0.5073, "step": 9384 }, { "epoch": 1.9292835851577759, "grad_norm": 0.19586196541786194, "learning_rate": 2.6853927008705945e-05, "loss": 0.5362, "step": 9385 }, { "epoch": 1.9294891561311542, "grad_norm": 0.19678068161010742, "learning_rate": 2.684478742290655e-05, "loss": 0.507, "step": 9386 }, { "epoch": 1.9296947271045328, "grad_norm": 0.15755969285964966, "learning_rate": 2.683564873152962e-05, "loss": 0.5057, "step": 9387 }, { "epoch": 1.9299002980779114, "grad_norm": 0.16498331725597382, "learning_rate": 2.6826510935025375e-05, "loss": 0.5442, "step": 9388 }, { "epoch": 1.93010586905129, "grad_norm": 0.19928227365016937, "learning_rate": 2.681737403384399e-05, "loss": 0.521, "step": 9389 }, { "epoch": 1.9303114400246684, "grad_norm": 0.1977323740720749, "learning_rate": 2.680823802843561e-05, "loss": 0.528, "step": 9390 }, { "epoch": 1.930517010998047, "grad_norm": 0.1704244613647461, "learning_rate": 2.67991029192503e-05, "loss": 0.509, "step": 9391 }, { "epoch": 1.9307225819714255, "grad_norm": 0.16151131689548492, "learning_rate": 2.6789968706738123e-05, "loss": 0.5298, "step": 9392 }, { "epoch": 1.9309281529448041, "grad_norm": 0.18972033262252808, "learning_rate": 2.678083539134908e-05, "loss": 0.5135, "step": 9393 }, { "epoch": 1.9311337239181827, "grad_norm": 0.19905173778533936, "learning_rate": 2.677170297353311e-05, "loss": 0.496, "step": 9394 }, { "epoch": 1.9313392948915613, "grad_norm": 0.21623218059539795, "learning_rate": 2.6762571453740148e-05, "loss": 0.539, "step": 9395 }, { "epoch": 1.93154486586494, "grad_norm": 0.16825906932353973, "learning_rate": 2.675344083242005e-05, "loss": 0.5124, "step": 9396 }, { "epoch": 1.9317504368383185, "grad_norm": 0.12174926698207855, "learning_rate": 2.674431111002263e-05, "loss": 0.5125, "step": 9397 }, { "epoch": 1.931956007811697, "grad_norm": 0.16127155721187592, "learning_rate": 2.6735182286997685e-05, "loss": 0.5231, "step": 9398 }, { "epoch": 1.9321615787850757, "grad_norm": 0.19533561170101166, "learning_rate": 2.6726054363794914e-05, "loss": 0.5015, "step": 9399 }, { "epoch": 1.9323671497584543, "grad_norm": 0.1924934983253479, "learning_rate": 2.671692734086405e-05, "loss": 0.5085, "step": 9400 }, { "epoch": 1.9325727207318326, "grad_norm": 0.1985793113708496, "learning_rate": 2.6707801218654726e-05, "loss": 0.5133, "step": 9401 }, { "epoch": 1.9327782917052112, "grad_norm": 0.17007775604724884, "learning_rate": 2.669867599761654e-05, "loss": 0.5008, "step": 9402 }, { "epoch": 1.9329838626785898, "grad_norm": 0.16425763070583344, "learning_rate": 2.6689551678199035e-05, "loss": 0.5018, "step": 9403 }, { "epoch": 1.9331894336519684, "grad_norm": 0.17384882271289825, "learning_rate": 2.6680428260851744e-05, "loss": 0.4811, "step": 9404 }, { "epoch": 1.9333950046253467, "grad_norm": 0.1561937779188156, "learning_rate": 2.6671305746024126e-05, "loss": 0.5158, "step": 9405 }, { "epoch": 1.9336005755987253, "grad_norm": 0.20057018101215363, "learning_rate": 2.6662184134165594e-05, "loss": 0.5178, "step": 9406 }, { "epoch": 1.933806146572104, "grad_norm": 0.17240118980407715, "learning_rate": 2.6653063425725552e-05, "loss": 0.4964, "step": 9407 }, { "epoch": 1.9340117175454825, "grad_norm": 0.16643132269382477, "learning_rate": 2.664394362115332e-05, "loss": 0.5116, "step": 9408 }, { "epoch": 1.934217288518861, "grad_norm": 0.19673089683055878, "learning_rate": 2.6634824720898195e-05, "loss": 0.5233, "step": 9409 }, { "epoch": 1.9344228594922397, "grad_norm": 0.19296656548976898, "learning_rate": 2.6625706725409412e-05, "loss": 0.5305, "step": 9410 }, { "epoch": 1.9346284304656183, "grad_norm": 0.18779776990413666, "learning_rate": 2.6616589635136185e-05, "loss": 0.5354, "step": 9411 }, { "epoch": 1.9348340014389969, "grad_norm": 0.19164229929447174, "learning_rate": 2.6607473450527648e-05, "loss": 0.5135, "step": 9412 }, { "epoch": 1.9350395724123755, "grad_norm": 0.19808048009872437, "learning_rate": 2.6598358172032928e-05, "loss": 0.4932, "step": 9413 }, { "epoch": 1.935245143385754, "grad_norm": 0.19213752448558807, "learning_rate": 2.65892438001011e-05, "loss": 0.5196, "step": 9414 }, { "epoch": 1.9354507143591326, "grad_norm": 0.19726723432540894, "learning_rate": 2.658013033518117e-05, "loss": 0.523, "step": 9415 }, { "epoch": 1.935656285332511, "grad_norm": 0.19998745620250702, "learning_rate": 2.657101777772214e-05, "loss": 0.5311, "step": 9416 }, { "epoch": 1.9358618563058896, "grad_norm": 0.2027643620967865, "learning_rate": 2.6561906128172917e-05, "loss": 0.5243, "step": 9417 }, { "epoch": 1.9360674272792682, "grad_norm": 0.20316363871097565, "learning_rate": 2.6552795386982405e-05, "loss": 0.5291, "step": 9418 }, { "epoch": 1.9362729982526468, "grad_norm": 0.20627467334270477, "learning_rate": 2.6543685554599437e-05, "loss": 0.4963, "step": 9419 }, { "epoch": 1.9364785692260251, "grad_norm": 0.19964690506458282, "learning_rate": 2.6534576631472806e-05, "loss": 0.5131, "step": 9420 }, { "epoch": 1.9366841401994037, "grad_norm": 0.19893944263458252, "learning_rate": 2.6525468618051296e-05, "loss": 0.5256, "step": 9421 }, { "epoch": 1.9368897111727823, "grad_norm": 0.17132525146007538, "learning_rate": 2.6516361514783592e-05, "loss": 0.5057, "step": 9422 }, { "epoch": 1.937095282146161, "grad_norm": 0.16164752840995789, "learning_rate": 2.6507255322118362e-05, "loss": 0.5165, "step": 9423 }, { "epoch": 1.9373008531195395, "grad_norm": 0.19539949297904968, "learning_rate": 2.6498150040504224e-05, "loss": 0.5299, "step": 9424 }, { "epoch": 1.937506424092918, "grad_norm": 0.1996447741985321, "learning_rate": 2.6489045670389765e-05, "loss": 0.5199, "step": 9425 }, { "epoch": 1.9377119950662967, "grad_norm": 0.20690996944904327, "learning_rate": 2.6479942212223494e-05, "loss": 0.4806, "step": 9426 }, { "epoch": 1.9379175660396752, "grad_norm": 0.19668295979499817, "learning_rate": 2.6470839666453906e-05, "loss": 0.5259, "step": 9427 }, { "epoch": 1.9381231370130538, "grad_norm": 0.200824573636055, "learning_rate": 2.6461738033529452e-05, "loss": 0.5383, "step": 9428 }, { "epoch": 1.9383287079864324, "grad_norm": 0.1936202496290207, "learning_rate": 2.6452637313898524e-05, "loss": 0.5199, "step": 9429 }, { "epoch": 1.938534278959811, "grad_norm": 0.1961507350206375, "learning_rate": 2.644353750800946e-05, "loss": 0.5261, "step": 9430 }, { "epoch": 1.9387398499331896, "grad_norm": 0.1959598958492279, "learning_rate": 2.643443861631057e-05, "loss": 0.5204, "step": 9431 }, { "epoch": 1.938945420906568, "grad_norm": 0.200755774974823, "learning_rate": 2.642534063925012e-05, "loss": 0.5277, "step": 9432 }, { "epoch": 1.9391509918799466, "grad_norm": 0.1906225085258484, "learning_rate": 2.6416243577276295e-05, "loss": 0.5116, "step": 9433 }, { "epoch": 1.9393565628533251, "grad_norm": 0.20025970041751862, "learning_rate": 2.6407147430837307e-05, "loss": 0.5146, "step": 9434 }, { "epoch": 1.9395621338267035, "grad_norm": 0.19855552911758423, "learning_rate": 2.6398052200381266e-05, "loss": 0.5334, "step": 9435 }, { "epoch": 1.939767704800082, "grad_norm": 0.19425593316555023, "learning_rate": 2.638895788635623e-05, "loss": 0.5349, "step": 9436 }, { "epoch": 1.9399732757734607, "grad_norm": 0.1955750733613968, "learning_rate": 2.637986448921027e-05, "loss": 0.5267, "step": 9437 }, { "epoch": 1.9401788467468393, "grad_norm": 0.19604718685150146, "learning_rate": 2.637077200939135e-05, "loss": 0.5303, "step": 9438 }, { "epoch": 1.9403844177202179, "grad_norm": 0.1608019322156906, "learning_rate": 2.6361680447347424e-05, "loss": 0.4905, "step": 9439 }, { "epoch": 1.9405899886935964, "grad_norm": 0.16983415186405182, "learning_rate": 2.635258980352637e-05, "loss": 0.533, "step": 9440 }, { "epoch": 1.940795559666975, "grad_norm": 0.2078002691268921, "learning_rate": 2.6343500078376077e-05, "loss": 0.5277, "step": 9441 }, { "epoch": 1.9410011306403536, "grad_norm": 0.20735982060432434, "learning_rate": 2.6334411272344328e-05, "loss": 0.5188, "step": 9442 }, { "epoch": 1.9412067016137322, "grad_norm": 0.1942051202058792, "learning_rate": 2.63253233858789e-05, "loss": 0.5166, "step": 9443 }, { "epoch": 1.9414122725871108, "grad_norm": 0.1942778080701828, "learning_rate": 2.6316236419427502e-05, "loss": 0.5302, "step": 9444 }, { "epoch": 1.9416178435604894, "grad_norm": 0.19624213874340057, "learning_rate": 2.6307150373437803e-05, "loss": 0.5376, "step": 9445 }, { "epoch": 1.941823414533868, "grad_norm": 0.19899539649486542, "learning_rate": 2.629806524835743e-05, "loss": 0.5369, "step": 9446 }, { "epoch": 1.9420289855072463, "grad_norm": 0.1954500824213028, "learning_rate": 2.628898104463397e-05, "loss": 0.5101, "step": 9447 }, { "epoch": 1.942234556480625, "grad_norm": 0.17353855073451996, "learning_rate": 2.627989776271496e-05, "loss": 0.5164, "step": 9448 }, { "epoch": 1.9424401274540035, "grad_norm": 0.16081948578357697, "learning_rate": 2.6270815403047906e-05, "loss": 0.5429, "step": 9449 }, { "epoch": 1.942645698427382, "grad_norm": 0.19543206691741943, "learning_rate": 2.626173396608023e-05, "loss": 0.5165, "step": 9450 }, { "epoch": 1.9428512694007605, "grad_norm": 0.20097336173057556, "learning_rate": 2.6252653452259336e-05, "loss": 0.5329, "step": 9451 }, { "epoch": 1.943056840374139, "grad_norm": 0.20384319126605988, "learning_rate": 2.6243573862032566e-05, "loss": 0.5314, "step": 9452 }, { "epoch": 1.9432624113475176, "grad_norm": 0.19779393076896667, "learning_rate": 2.6234495195847262e-05, "loss": 0.489, "step": 9453 }, { "epoch": 1.9434679823208962, "grad_norm": 0.16127046942710876, "learning_rate": 2.6225417454150668e-05, "loss": 0.5033, "step": 9454 }, { "epoch": 1.9436735532942748, "grad_norm": 0.1624097228050232, "learning_rate": 2.6216340637389987e-05, "loss": 0.532, "step": 9455 }, { "epoch": 1.9438791242676534, "grad_norm": 0.16971097886562347, "learning_rate": 2.620726474601243e-05, "loss": 0.5058, "step": 9456 }, { "epoch": 1.944084695241032, "grad_norm": 0.12277817726135254, "learning_rate": 2.619818978046509e-05, "loss": 0.4925, "step": 9457 }, { "epoch": 1.9442902662144106, "grad_norm": 0.16644692420959473, "learning_rate": 2.618911574119507e-05, "loss": 0.5217, "step": 9458 }, { "epoch": 1.9444958371877892, "grad_norm": 0.16563105583190918, "learning_rate": 2.61800426286494e-05, "loss": 0.5091, "step": 9459 }, { "epoch": 1.9447014081611678, "grad_norm": 0.1673881858587265, "learning_rate": 2.6170970443275054e-05, "loss": 0.5416, "step": 9460 }, { "epoch": 1.9449069791345464, "grad_norm": 0.20645494759082794, "learning_rate": 2.6161899185518977e-05, "loss": 0.5182, "step": 9461 }, { "epoch": 1.9451125501079247, "grad_norm": 0.19935904443264008, "learning_rate": 2.615282885582809e-05, "loss": 0.5234, "step": 9462 }, { "epoch": 1.9453181210813033, "grad_norm": 0.1983654797077179, "learning_rate": 2.614375945464924e-05, "loss": 0.5292, "step": 9463 }, { "epoch": 1.945523692054682, "grad_norm": 0.20159868896007538, "learning_rate": 2.6134690982429228e-05, "loss": 0.5162, "step": 9464 }, { "epoch": 1.9457292630280605, "grad_norm": 0.2034175992012024, "learning_rate": 2.612562343961481e-05, "loss": 0.5495, "step": 9465 }, { "epoch": 1.9459348340014389, "grad_norm": 0.16713906824588776, "learning_rate": 2.611655682665271e-05, "loss": 0.5031, "step": 9466 }, { "epoch": 1.9461404049748174, "grad_norm": 0.1686525195837021, "learning_rate": 2.6107491143989593e-05, "loss": 0.5444, "step": 9467 }, { "epoch": 1.946345975948196, "grad_norm": 0.19990558922290802, "learning_rate": 2.6098426392072068e-05, "loss": 0.5149, "step": 9468 }, { "epoch": 1.9465515469215746, "grad_norm": 0.1923760622739792, "learning_rate": 2.608936257134675e-05, "loss": 0.5353, "step": 9469 }, { "epoch": 1.9467571178949532, "grad_norm": 0.20133623480796814, "learning_rate": 2.6080299682260142e-05, "loss": 0.5079, "step": 9470 }, { "epoch": 1.9469626888683318, "grad_norm": 0.19276608526706696, "learning_rate": 2.6071237725258744e-05, "loss": 0.5221, "step": 9471 }, { "epoch": 1.9471682598417104, "grad_norm": 0.20174479484558105, "learning_rate": 2.6062176700788986e-05, "loss": 0.5174, "step": 9472 }, { "epoch": 1.947373830815089, "grad_norm": 0.2010992169380188, "learning_rate": 2.605311660929725e-05, "loss": 0.5325, "step": 9473 }, { "epoch": 1.9475794017884676, "grad_norm": 0.16488604247570038, "learning_rate": 2.604405745122992e-05, "loss": 0.4957, "step": 9474 }, { "epoch": 1.9477849727618461, "grad_norm": 0.12317883968353271, "learning_rate": 2.6034999227033278e-05, "loss": 0.527, "step": 9475 }, { "epoch": 1.9479905437352247, "grad_norm": 0.16236087679862976, "learning_rate": 2.602594193715357e-05, "loss": 0.5246, "step": 9476 }, { "epoch": 1.948196114708603, "grad_norm": 0.20070423185825348, "learning_rate": 2.6016885582037027e-05, "loss": 0.5258, "step": 9477 }, { "epoch": 1.9484016856819817, "grad_norm": 0.20194244384765625, "learning_rate": 2.6007830162129808e-05, "loss": 0.5142, "step": 9478 }, { "epoch": 1.9486072566553603, "grad_norm": 0.20240890979766846, "learning_rate": 2.599877567787803e-05, "loss": 0.5443, "step": 9479 }, { "epoch": 1.9488128276287389, "grad_norm": 0.19648049771785736, "learning_rate": 2.598972212972776e-05, "loss": 0.534, "step": 9480 }, { "epoch": 1.9490183986021172, "grad_norm": 0.2065919041633606, "learning_rate": 2.5980669518125028e-05, "loss": 0.5381, "step": 9481 }, { "epoch": 1.9492239695754958, "grad_norm": 0.20330984890460968, "learning_rate": 2.59716178435158e-05, "loss": 0.5169, "step": 9482 }, { "epoch": 1.9494295405488744, "grad_norm": 0.20162275433540344, "learning_rate": 2.5962567106346034e-05, "loss": 0.521, "step": 9483 }, { "epoch": 1.949635111522253, "grad_norm": 0.16646580398082733, "learning_rate": 2.5953517307061608e-05, "loss": 0.5098, "step": 9484 }, { "epoch": 1.9498406824956316, "grad_norm": 0.16191188991069794, "learning_rate": 2.594446844610836e-05, "loss": 0.5327, "step": 9485 }, { "epoch": 1.9500462534690102, "grad_norm": 0.1962418407201767, "learning_rate": 2.593542052393209e-05, "loss": 0.5035, "step": 9486 }, { "epoch": 1.9502518244423888, "grad_norm": 0.16942986845970154, "learning_rate": 2.5926373540978536e-05, "loss": 0.4919, "step": 9487 }, { "epoch": 1.9504573954157673, "grad_norm": 0.16431602835655212, "learning_rate": 2.5917327497693413e-05, "loss": 0.5368, "step": 9488 }, { "epoch": 1.950662966389146, "grad_norm": 0.1935006082057953, "learning_rate": 2.590828239452235e-05, "loss": 0.5189, "step": 9489 }, { "epoch": 1.9508685373625245, "grad_norm": 0.20640498399734497, "learning_rate": 2.5899238231911006e-05, "loss": 0.4955, "step": 9490 }, { "epoch": 1.9510741083359031, "grad_norm": 0.19904139637947083, "learning_rate": 2.5890195010304913e-05, "loss": 0.5199, "step": 9491 }, { "epoch": 1.9512796793092815, "grad_norm": 0.19545705616474152, "learning_rate": 2.5881152730149588e-05, "loss": 0.491, "step": 9492 }, { "epoch": 1.95148525028266, "grad_norm": 0.203142449259758, "learning_rate": 2.5872111391890512e-05, "loss": 0.5364, "step": 9493 }, { "epoch": 1.9516908212560387, "grad_norm": 0.2026265263557434, "learning_rate": 2.586307099597308e-05, "loss": 0.5117, "step": 9494 }, { "epoch": 1.9518963922294172, "grad_norm": 0.1932077556848526, "learning_rate": 2.585403154284272e-05, "loss": 0.4905, "step": 9495 }, { "epoch": 1.9521019632027956, "grad_norm": 0.19804999232292175, "learning_rate": 2.5844993032944735e-05, "loss": 0.5318, "step": 9496 }, { "epoch": 1.9523075341761742, "grad_norm": 0.19540899991989136, "learning_rate": 2.58359554667244e-05, "loss": 0.5114, "step": 9497 }, { "epoch": 1.9525131051495528, "grad_norm": 0.1968623399734497, "learning_rate": 2.5826918844626975e-05, "loss": 0.5148, "step": 9498 }, { "epoch": 1.9527186761229314, "grad_norm": 0.19433245062828064, "learning_rate": 2.5817883167097644e-05, "loss": 0.5073, "step": 9499 }, { "epoch": 1.95292424709631, "grad_norm": 0.2015180140733719, "learning_rate": 2.580884843458156e-05, "loss": 0.5178, "step": 9500 }, { "epoch": 1.9531298180696886, "grad_norm": 0.199843630194664, "learning_rate": 2.579981464752381e-05, "loss": 0.4852, "step": 9501 }, { "epoch": 1.9533353890430671, "grad_norm": 0.16600465774536133, "learning_rate": 2.5790781806369435e-05, "loss": 0.4928, "step": 9502 }, { "epoch": 1.9535409600164457, "grad_norm": 0.16455240547657013, "learning_rate": 2.578174991156347e-05, "loss": 0.5443, "step": 9503 }, { "epoch": 1.9537465309898243, "grad_norm": 0.16569803655147552, "learning_rate": 2.5772718963550868e-05, "loss": 0.5102, "step": 9504 }, { "epoch": 1.953952101963203, "grad_norm": 0.1648106575012207, "learning_rate": 2.5763688962776526e-05, "loss": 0.5247, "step": 9505 }, { "epoch": 1.9541576729365815, "grad_norm": 0.20244595408439636, "learning_rate": 2.5754659909685322e-05, "loss": 0.5192, "step": 9506 }, { "epoch": 1.95436324390996, "grad_norm": 0.20293334126472473, "learning_rate": 2.5745631804722077e-05, "loss": 0.5294, "step": 9507 }, { "epoch": 1.9545688148833384, "grad_norm": 0.19975414872169495, "learning_rate": 2.5736604648331552e-05, "loss": 0.5245, "step": 9508 }, { "epoch": 1.954774385856717, "grad_norm": 0.19464215636253357, "learning_rate": 2.5727578440958465e-05, "loss": 0.5115, "step": 9509 }, { "epoch": 1.9549799568300956, "grad_norm": 0.19542162120342255, "learning_rate": 2.571855318304753e-05, "loss": 0.5251, "step": 9510 }, { "epoch": 1.955185527803474, "grad_norm": 0.19343827664852142, "learning_rate": 2.570952887504335e-05, "loss": 0.5204, "step": 9511 }, { "epoch": 1.9553910987768526, "grad_norm": 0.17137175798416138, "learning_rate": 2.5700505517390526e-05, "loss": 0.5097, "step": 9512 }, { "epoch": 1.9555966697502312, "grad_norm": 0.11898645013570786, "learning_rate": 2.569148311053358e-05, "loss": 0.5127, "step": 9513 }, { "epoch": 1.9558022407236098, "grad_norm": 0.12215547263622284, "learning_rate": 2.5682461654917025e-05, "loss": 0.5129, "step": 9514 }, { "epoch": 1.9560078116969883, "grad_norm": 0.1605924665927887, "learning_rate": 2.5673441150985286e-05, "loss": 0.508, "step": 9515 }, { "epoch": 1.956213382670367, "grad_norm": 0.16376885771751404, "learning_rate": 2.5664421599182757e-05, "loss": 0.4932, "step": 9516 }, { "epoch": 1.9564189536437455, "grad_norm": 0.15705506503582, "learning_rate": 2.5655402999953816e-05, "loss": 0.5217, "step": 9517 }, { "epoch": 1.956624524617124, "grad_norm": 0.19561244547367096, "learning_rate": 2.5646385353742732e-05, "loss": 0.5084, "step": 9518 }, { "epoch": 1.9568300955905027, "grad_norm": 0.19796496629714966, "learning_rate": 2.563736866099381e-05, "loss": 0.5076, "step": 9519 }, { "epoch": 1.9570356665638813, "grad_norm": 0.20186658203601837, "learning_rate": 2.562835292215123e-05, "loss": 0.5518, "step": 9520 }, { "epoch": 1.9572412375372599, "grad_norm": 0.16234740614891052, "learning_rate": 2.5619338137659155e-05, "loss": 0.4855, "step": 9521 }, { "epoch": 1.9574468085106385, "grad_norm": 0.1610114425420761, "learning_rate": 2.5610324307961708e-05, "loss": 0.5212, "step": 9522 }, { "epoch": 1.9576523794840168, "grad_norm": 0.19542771577835083, "learning_rate": 2.560131143350294e-05, "loss": 0.5029, "step": 9523 }, { "epoch": 1.9578579504573954, "grad_norm": 0.20270508527755737, "learning_rate": 2.55922995147269e-05, "loss": 0.5271, "step": 9524 }, { "epoch": 1.958063521430774, "grad_norm": 0.18990576267242432, "learning_rate": 2.5583288552077552e-05, "loss": 0.5104, "step": 9525 }, { "epoch": 1.9582690924041524, "grad_norm": 0.15766002237796783, "learning_rate": 2.5574278545998827e-05, "loss": 0.4951, "step": 9526 }, { "epoch": 1.958474663377531, "grad_norm": 0.16321411728858948, "learning_rate": 2.5565269496934602e-05, "loss": 0.5406, "step": 9527 }, { "epoch": 1.9586802343509095, "grad_norm": 0.2016243040561676, "learning_rate": 2.5556261405328712e-05, "loss": 0.5152, "step": 9528 }, { "epoch": 1.9588858053242881, "grad_norm": 0.17193591594696045, "learning_rate": 2.554725427162494e-05, "loss": 0.5029, "step": 9529 }, { "epoch": 1.9590913762976667, "grad_norm": 0.16781920194625854, "learning_rate": 2.553824809626701e-05, "loss": 0.5322, "step": 9530 }, { "epoch": 1.9592969472710453, "grad_norm": 0.17390578985214233, "learning_rate": 2.5529242879698655e-05, "loss": 0.5052, "step": 9531 }, { "epoch": 1.959502518244424, "grad_norm": 0.12395156174898148, "learning_rate": 2.552023862236349e-05, "loss": 0.4978, "step": 9532 }, { "epoch": 1.9597080892178025, "grad_norm": 0.16266000270843506, "learning_rate": 2.5511235324705127e-05, "loss": 0.5345, "step": 9533 }, { "epoch": 1.959913660191181, "grad_norm": 0.2078227996826172, "learning_rate": 2.5502232987167103e-05, "loss": 0.5167, "step": 9534 }, { "epoch": 1.9601192311645597, "grad_norm": 0.20280295610427856, "learning_rate": 2.549323161019293e-05, "loss": 0.5316, "step": 9535 }, { "epoch": 1.9603248021379382, "grad_norm": 0.20383380353450775, "learning_rate": 2.5484231194226058e-05, "loss": 0.5124, "step": 9536 }, { "epoch": 1.9605303731113168, "grad_norm": 0.19895561039447784, "learning_rate": 2.547523173970989e-05, "loss": 0.5198, "step": 9537 }, { "epoch": 1.9607359440846952, "grad_norm": 0.20123358070850372, "learning_rate": 2.546623324708781e-05, "loss": 0.5255, "step": 9538 }, { "epoch": 1.9609415150580738, "grad_norm": 0.2038145512342453, "learning_rate": 2.5457235716803115e-05, "loss": 0.5309, "step": 9539 }, { "epoch": 1.9611470860314524, "grad_norm": 0.20116189122200012, "learning_rate": 2.5448239149299055e-05, "loss": 0.5075, "step": 9540 }, { "epoch": 1.961352657004831, "grad_norm": 0.2058117836713791, "learning_rate": 2.5439243545018884e-05, "loss": 0.533, "step": 9541 }, { "epoch": 1.9615582279782093, "grad_norm": 0.2008356899023056, "learning_rate": 2.543024890440576e-05, "loss": 0.5321, "step": 9542 }, { "epoch": 1.961763798951588, "grad_norm": 0.19685760140419006, "learning_rate": 2.5421255227902804e-05, "loss": 0.4969, "step": 9543 }, { "epoch": 1.9619693699249665, "grad_norm": 0.1951378434896469, "learning_rate": 2.541226251595307e-05, "loss": 0.4999, "step": 9544 }, { "epoch": 1.962174940898345, "grad_norm": 0.19807179272174835, "learning_rate": 2.5403270768999633e-05, "loss": 0.5146, "step": 9545 }, { "epoch": 1.9623805118717237, "grad_norm": 0.19595085084438324, "learning_rate": 2.539427998748544e-05, "loss": 0.5223, "step": 9546 }, { "epoch": 1.9625860828451023, "grad_norm": 0.19711394608020782, "learning_rate": 2.5385290171853446e-05, "loss": 0.5196, "step": 9547 }, { "epoch": 1.9627916538184809, "grad_norm": 0.20173287391662598, "learning_rate": 2.5376301322546523e-05, "loss": 0.5277, "step": 9548 }, { "epoch": 1.9629972247918595, "grad_norm": 0.20318298041820526, "learning_rate": 2.5367313440007513e-05, "loss": 0.5174, "step": 9549 }, { "epoch": 1.963202795765238, "grad_norm": 0.20232440531253815, "learning_rate": 2.5358326524679206e-05, "loss": 0.5325, "step": 9550 }, { "epoch": 1.9634083667386166, "grad_norm": 0.2035774439573288, "learning_rate": 2.534934057700433e-05, "loss": 0.516, "step": 9551 }, { "epoch": 1.9636139377119952, "grad_norm": 0.20142172276973724, "learning_rate": 2.534035559742561e-05, "loss": 0.5189, "step": 9552 }, { "epoch": 1.9638195086853736, "grad_norm": 0.2012597769498825, "learning_rate": 2.5331371586385683e-05, "loss": 0.5166, "step": 9553 }, { "epoch": 1.9640250796587522, "grad_norm": 0.1986485868692398, "learning_rate": 2.532238854432715e-05, "loss": 0.5134, "step": 9554 }, { "epoch": 1.9642306506321308, "grad_norm": 0.20366504788398743, "learning_rate": 2.531340647169256e-05, "loss": 0.5146, "step": 9555 }, { "epoch": 1.9644362216055093, "grad_norm": 0.19817805290222168, "learning_rate": 2.530442536892442e-05, "loss": 0.4911, "step": 9556 }, { "epoch": 1.9646417925788877, "grad_norm": 0.20008954405784607, "learning_rate": 2.529544523646518e-05, "loss": 0.574, "step": 9557 }, { "epoch": 1.9648473635522663, "grad_norm": 0.2054361253976822, "learning_rate": 2.5286466074757237e-05, "loss": 0.5204, "step": 9558 }, { "epoch": 1.965052934525645, "grad_norm": 0.19738180935382843, "learning_rate": 2.527748788424299e-05, "loss": 0.5198, "step": 9559 }, { "epoch": 1.9652585054990235, "grad_norm": 0.20528697967529297, "learning_rate": 2.526851066536473e-05, "loss": 0.5439, "step": 9560 }, { "epoch": 1.965464076472402, "grad_norm": 0.21813803911209106, "learning_rate": 2.5259534418564713e-05, "loss": 0.5442, "step": 9561 }, { "epoch": 1.9656696474457807, "grad_norm": 0.20172588527202606, "learning_rate": 2.5250559144285174e-05, "loss": 0.5133, "step": 9562 }, { "epoch": 1.9658752184191592, "grad_norm": 0.19807198643684387, "learning_rate": 2.5241584842968285e-05, "loss": 0.5051, "step": 9563 }, { "epoch": 1.9660807893925378, "grad_norm": 0.2078738957643509, "learning_rate": 2.5232611515056168e-05, "loss": 0.5716, "step": 9564 }, { "epoch": 1.9662863603659164, "grad_norm": 0.19806239008903503, "learning_rate": 2.522363916099086e-05, "loss": 0.5293, "step": 9565 }, { "epoch": 1.966491931339295, "grad_norm": 0.20789627730846405, "learning_rate": 2.5214667781214436e-05, "loss": 0.5446, "step": 9566 }, { "epoch": 1.9666975023126736, "grad_norm": 0.20237933099269867, "learning_rate": 2.5205697376168853e-05, "loss": 0.5286, "step": 9567 }, { "epoch": 1.966903073286052, "grad_norm": 0.2071990966796875, "learning_rate": 2.5196727946296043e-05, "loss": 0.5321, "step": 9568 }, { "epoch": 1.9671086442594305, "grad_norm": 0.19845061004161835, "learning_rate": 2.518775949203789e-05, "loss": 0.5272, "step": 9569 }, { "epoch": 1.9673142152328091, "grad_norm": 0.2033272236585617, "learning_rate": 2.5178792013836224e-05, "loss": 0.513, "step": 9570 }, { "epoch": 1.9675197862061877, "grad_norm": 0.20528094470500946, "learning_rate": 2.5169825512132833e-05, "loss": 0.5322, "step": 9571 }, { "epoch": 1.967725357179566, "grad_norm": 0.19687287509441376, "learning_rate": 2.516085998736943e-05, "loss": 0.5129, "step": 9572 }, { "epoch": 1.9679309281529447, "grad_norm": 0.16771896183490753, "learning_rate": 2.5151895439987746e-05, "loss": 0.5116, "step": 9573 }, { "epoch": 1.9681364991263233, "grad_norm": 0.16580241918563843, "learning_rate": 2.5142931870429404e-05, "loss": 0.527, "step": 9574 }, { "epoch": 1.9683420700997019, "grad_norm": 0.20436574518680573, "learning_rate": 2.5133969279136e-05, "loss": 0.521, "step": 9575 }, { "epoch": 1.9685476410730804, "grad_norm": 0.1928415149450302, "learning_rate": 2.5125007666549074e-05, "loss": 0.5062, "step": 9576 }, { "epoch": 1.968753212046459, "grad_norm": 0.19831101596355438, "learning_rate": 2.5116047033110125e-05, "loss": 0.5124, "step": 9577 }, { "epoch": 1.9689587830198376, "grad_norm": 0.1986418068408966, "learning_rate": 2.510708737926058e-05, "loss": 0.547, "step": 9578 }, { "epoch": 1.9691643539932162, "grad_norm": 0.19999928772449493, "learning_rate": 2.509812870544189e-05, "loss": 0.5286, "step": 9579 }, { "epoch": 1.9693699249665948, "grad_norm": 0.1935226321220398, "learning_rate": 2.5089171012095367e-05, "loss": 0.5285, "step": 9580 }, { "epoch": 1.9695754959399734, "grad_norm": 0.18808215856552124, "learning_rate": 2.5080214299662322e-05, "loss": 0.5051, "step": 9581 }, { "epoch": 1.969781066913352, "grad_norm": 0.20196162164211273, "learning_rate": 2.507125856858401e-05, "loss": 0.5222, "step": 9582 }, { "epoch": 1.9699866378867303, "grad_norm": 0.20096677541732788, "learning_rate": 2.5062303819301645e-05, "loss": 0.5405, "step": 9583 }, { "epoch": 1.970192208860109, "grad_norm": 0.20000407099723816, "learning_rate": 2.5053350052256393e-05, "loss": 0.5173, "step": 9584 }, { "epoch": 1.9703977798334875, "grad_norm": 0.19387024641036987, "learning_rate": 2.5044397267889327e-05, "loss": 0.4956, "step": 9585 }, { "epoch": 1.970603350806866, "grad_norm": 0.16809746623039246, "learning_rate": 2.5035445466641558e-05, "loss": 0.5046, "step": 9586 }, { "epoch": 1.9708089217802445, "grad_norm": 0.16820058226585388, "learning_rate": 2.502649464895408e-05, "loss": 0.5309, "step": 9587 }, { "epoch": 1.971014492753623, "grad_norm": 0.21059322357177734, "learning_rate": 2.501754481526785e-05, "loss": 0.5047, "step": 9588 }, { "epoch": 1.9712200637270016, "grad_norm": 0.20109686255455017, "learning_rate": 2.5008595966023786e-05, "loss": 0.5069, "step": 9589 }, { "epoch": 1.9714256347003802, "grad_norm": 0.20082977414131165, "learning_rate": 2.4999648101662763e-05, "loss": 0.5329, "step": 9590 }, { "epoch": 1.9716312056737588, "grad_norm": 0.1989169418811798, "learning_rate": 2.4990701222625602e-05, "loss": 0.5102, "step": 9591 }, { "epoch": 1.9718367766471374, "grad_norm": 0.19520479440689087, "learning_rate": 2.4981755329353043e-05, "loss": 0.5116, "step": 9592 }, { "epoch": 1.972042347620516, "grad_norm": 0.17147661745548248, "learning_rate": 2.4972810422285853e-05, "loss": 0.4902, "step": 9593 }, { "epoch": 1.9722479185938946, "grad_norm": 0.16414588689804077, "learning_rate": 2.496386650186469e-05, "loss": 0.5109, "step": 9594 }, { "epoch": 1.9724534895672732, "grad_norm": 0.20732592046260834, "learning_rate": 2.4954923568530175e-05, "loss": 0.5128, "step": 9595 }, { "epoch": 1.9726590605406518, "grad_norm": 0.19795072078704834, "learning_rate": 2.4945981622722878e-05, "loss": 0.5122, "step": 9596 }, { "epoch": 1.9728646315140304, "grad_norm": 0.2000289112329483, "learning_rate": 2.493704066488334e-05, "loss": 0.5282, "step": 9597 }, { "epoch": 1.973070202487409, "grad_norm": 0.1769014447927475, "learning_rate": 2.4928100695452037e-05, "loss": 0.4991, "step": 9598 }, { "epoch": 1.9732757734607873, "grad_norm": 0.16739298403263092, "learning_rate": 2.4919161714869377e-05, "loss": 0.507, "step": 9599 }, { "epoch": 1.973481344434166, "grad_norm": 0.199861079454422, "learning_rate": 2.4910223723575778e-05, "loss": 0.5178, "step": 9600 }, { "epoch": 1.9736869154075445, "grad_norm": 0.16744980216026306, "learning_rate": 2.490128672201156e-05, "loss": 0.4671, "step": 9601 }, { "epoch": 1.9738924863809229, "grad_norm": 0.16180412471294403, "learning_rate": 2.4892350710617003e-05, "loss": 0.5274, "step": 9602 }, { "epoch": 1.9740980573543014, "grad_norm": 0.2564503848552704, "learning_rate": 2.488341568983232e-05, "loss": 0.5285, "step": 9603 }, { "epoch": 1.97430362832768, "grad_norm": 0.16161498427391052, "learning_rate": 2.4874481660097748e-05, "loss": 0.4968, "step": 9604 }, { "epoch": 1.9745091993010586, "grad_norm": 0.11919713020324707, "learning_rate": 2.4865548621853394e-05, "loss": 0.5128, "step": 9605 }, { "epoch": 1.9747147702744372, "grad_norm": 0.16267365217208862, "learning_rate": 2.4856616575539334e-05, "loss": 0.5247, "step": 9606 }, { "epoch": 1.9749203412478158, "grad_norm": 0.16840054094791412, "learning_rate": 2.4847685521595643e-05, "loss": 0.4839, "step": 9607 }, { "epoch": 1.9751259122211944, "grad_norm": 0.17324216663837433, "learning_rate": 2.48387554604623e-05, "loss": 0.5092, "step": 9608 }, { "epoch": 1.975331483194573, "grad_norm": 0.16955405473709106, "learning_rate": 2.4829826392579227e-05, "loss": 0.4955, "step": 9609 }, { "epoch": 1.9755370541679516, "grad_norm": 0.16968326270580292, "learning_rate": 2.4820898318386345e-05, "loss": 0.5285, "step": 9610 }, { "epoch": 1.9757426251413301, "grad_norm": 0.2073184996843338, "learning_rate": 2.481197123832348e-05, "loss": 0.5258, "step": 9611 }, { "epoch": 1.9759481961147087, "grad_norm": 0.2012372761964798, "learning_rate": 2.4803045152830442e-05, "loss": 0.5157, "step": 9612 }, { "epoch": 1.9761537670880873, "grad_norm": 0.1959368884563446, "learning_rate": 2.4794120062346946e-05, "loss": 0.5346, "step": 9613 }, { "epoch": 1.9763593380614657, "grad_norm": 0.19632303714752197, "learning_rate": 2.478519596731273e-05, "loss": 0.5138, "step": 9614 }, { "epoch": 1.9765649090348443, "grad_norm": 0.19955292344093323, "learning_rate": 2.4776272868167424e-05, "loss": 0.535, "step": 9615 }, { "epoch": 1.9767704800082229, "grad_norm": 0.19841422140598297, "learning_rate": 2.476735076535063e-05, "loss": 0.5054, "step": 9616 }, { "epoch": 1.9769760509816015, "grad_norm": 0.19676409661769867, "learning_rate": 2.4758429659301894e-05, "loss": 0.5238, "step": 9617 }, { "epoch": 1.9771816219549798, "grad_norm": 0.19223178923130035, "learning_rate": 2.4749509550460724e-05, "loss": 0.5013, "step": 9618 }, { "epoch": 1.9773871929283584, "grad_norm": 0.20213696360588074, "learning_rate": 2.474059043926656e-05, "loss": 0.5086, "step": 9619 }, { "epoch": 1.977592763901737, "grad_norm": 0.2001548409461975, "learning_rate": 2.4731672326158804e-05, "loss": 0.4985, "step": 9620 }, { "epoch": 1.9777983348751156, "grad_norm": 0.20245525240898132, "learning_rate": 2.4722755211576836e-05, "loss": 0.5327, "step": 9621 }, { "epoch": 1.9780039058484942, "grad_norm": 0.2233567237854004, "learning_rate": 2.4713839095959936e-05, "loss": 0.5095, "step": 9622 }, { "epoch": 1.9782094768218728, "grad_norm": 0.19729016721248627, "learning_rate": 2.470492397974737e-05, "loss": 0.4831, "step": 9623 }, { "epoch": 1.9784150477952513, "grad_norm": 0.20027440786361694, "learning_rate": 2.4696009863378342e-05, "loss": 0.5315, "step": 9624 }, { "epoch": 1.97862061876863, "grad_norm": 0.20336763560771942, "learning_rate": 2.4687096747291987e-05, "loss": 0.5019, "step": 9625 }, { "epoch": 1.9788261897420085, "grad_norm": 0.16322872042655945, "learning_rate": 2.4678184631927453e-05, "loss": 0.4873, "step": 9626 }, { "epoch": 1.9790317607153871, "grad_norm": 0.1632460653781891, "learning_rate": 2.4669273517723777e-05, "loss": 0.524, "step": 9627 }, { "epoch": 1.9792373316887657, "grad_norm": 0.19479408860206604, "learning_rate": 2.466036340511995e-05, "loss": 0.5186, "step": 9628 }, { "epoch": 1.979442902662144, "grad_norm": 0.19414758682250977, "learning_rate": 2.4651454294554972e-05, "loss": 0.5153, "step": 9629 }, { "epoch": 1.9796484736355227, "grad_norm": 0.1960826814174652, "learning_rate": 2.464254618646773e-05, "loss": 0.5356, "step": 9630 }, { "epoch": 1.9798540446089012, "grad_norm": 0.19612587988376617, "learning_rate": 2.4633639081297088e-05, "loss": 0.5033, "step": 9631 }, { "epoch": 1.9800596155822798, "grad_norm": 1.9576839208602905, "learning_rate": 2.462473297948186e-05, "loss": 0.5465, "step": 9632 }, { "epoch": 1.9802651865556582, "grad_norm": 0.2153571993112564, "learning_rate": 2.4615827881460797e-05, "loss": 0.531, "step": 9633 }, { "epoch": 1.9804707575290368, "grad_norm": 0.20636354386806488, "learning_rate": 2.4606923787672607e-05, "loss": 0.5394, "step": 9634 }, { "epoch": 1.9806763285024154, "grad_norm": 0.19910024106502533, "learning_rate": 2.4598020698555975e-05, "loss": 0.5212, "step": 9635 }, { "epoch": 1.980881899475794, "grad_norm": 0.19475533068180084, "learning_rate": 2.458911861454951e-05, "loss": 0.5175, "step": 9636 }, { "epoch": 1.9810874704491725, "grad_norm": 0.20673874020576477, "learning_rate": 2.4580217536091772e-05, "loss": 0.5258, "step": 9637 }, { "epoch": 1.9812930414225511, "grad_norm": 0.20791196823120117, "learning_rate": 2.4571317463621278e-05, "loss": 0.5278, "step": 9638 }, { "epoch": 1.9814986123959297, "grad_norm": 0.20311853289604187, "learning_rate": 2.4562418397576482e-05, "loss": 0.5103, "step": 9639 }, { "epoch": 1.9817041833693083, "grad_norm": 0.18043197691440582, "learning_rate": 2.4553520338395808e-05, "loss": 0.5009, "step": 9640 }, { "epoch": 1.981909754342687, "grad_norm": 0.16400253772735596, "learning_rate": 2.45446232865176e-05, "loss": 0.5219, "step": 9641 }, { "epoch": 1.9821153253160655, "grad_norm": 0.20592088997364044, "learning_rate": 2.453572724238022e-05, "loss": 0.5247, "step": 9642 }, { "epoch": 1.982320896289444, "grad_norm": 0.22053800523281097, "learning_rate": 2.45268322064219e-05, "loss": 0.5272, "step": 9643 }, { "epoch": 1.9825264672628224, "grad_norm": 0.21963202953338623, "learning_rate": 2.451793817908087e-05, "loss": 0.51, "step": 9644 }, { "epoch": 1.982732038236201, "grad_norm": 0.21020135283470154, "learning_rate": 2.4509045160795295e-05, "loss": 0.5338, "step": 9645 }, { "epoch": 1.9829376092095796, "grad_norm": 0.17611977458000183, "learning_rate": 2.450015315200327e-05, "loss": 0.5083, "step": 9646 }, { "epoch": 1.9831431801829582, "grad_norm": 0.16838988661766052, "learning_rate": 2.44912621531429e-05, "loss": 0.5075, "step": 9647 }, { "epoch": 1.9833487511563366, "grad_norm": 0.20639371871948242, "learning_rate": 2.448237216465219e-05, "loss": 0.5329, "step": 9648 }, { "epoch": 1.9835543221297152, "grad_norm": 0.20562691986560822, "learning_rate": 2.4473483186969085e-05, "loss": 0.5001, "step": 9649 }, { "epoch": 1.9837598931030938, "grad_norm": 0.20028932392597198, "learning_rate": 2.4464595220531542e-05, "loss": 0.5145, "step": 9650 }, { "epoch": 1.9839654640764723, "grad_norm": 0.205689936876297, "learning_rate": 2.4455708265777406e-05, "loss": 0.5347, "step": 9651 }, { "epoch": 1.984171035049851, "grad_norm": 0.20499835908412933, "learning_rate": 2.4446822323144497e-05, "loss": 0.5239, "step": 9652 }, { "epoch": 1.9843766060232295, "grad_norm": 0.20297472178936005, "learning_rate": 2.4437937393070596e-05, "loss": 0.5307, "step": 9653 }, { "epoch": 1.984582176996608, "grad_norm": 0.1985624134540558, "learning_rate": 2.442905347599339e-05, "loss": 0.5076, "step": 9654 }, { "epoch": 1.9847877479699867, "grad_norm": 0.20252910256385803, "learning_rate": 2.442017057235059e-05, "loss": 0.528, "step": 9655 }, { "epoch": 1.9849933189433653, "grad_norm": 0.2101006656885147, "learning_rate": 2.441128868257979e-05, "loss": 0.5188, "step": 9656 }, { "epoch": 1.9851988899167439, "grad_norm": 0.1986953169107437, "learning_rate": 2.4402407807118577e-05, "loss": 0.5267, "step": 9657 }, { "epoch": 1.9854044608901225, "grad_norm": 0.20518286526203156, "learning_rate": 2.4393527946404447e-05, "loss": 0.5362, "step": 9658 }, { "epoch": 1.9856100318635008, "grad_norm": 0.20495247840881348, "learning_rate": 2.438464910087489e-05, "loss": 0.5306, "step": 9659 }, { "epoch": 1.9858156028368794, "grad_norm": 0.20301851630210876, "learning_rate": 2.437577127096731e-05, "loss": 0.5106, "step": 9660 }, { "epoch": 1.986021173810258, "grad_norm": 0.17709769308567047, "learning_rate": 2.4366894457119066e-05, "loss": 0.4954, "step": 9661 }, { "epoch": 1.9862267447836366, "grad_norm": 0.1601599156856537, "learning_rate": 2.4358018659767514e-05, "loss": 0.4917, "step": 9662 }, { "epoch": 1.986432315757015, "grad_norm": 0.19886882603168488, "learning_rate": 2.4349143879349898e-05, "loss": 0.5363, "step": 9663 }, { "epoch": 1.9866378867303935, "grad_norm": 0.16597384214401245, "learning_rate": 2.434027011630344e-05, "loss": 0.5121, "step": 9664 }, { "epoch": 1.9868434577037721, "grad_norm": 0.163084477186203, "learning_rate": 2.4331397371065314e-05, "loss": 0.5358, "step": 9665 }, { "epoch": 1.9870490286771507, "grad_norm": 0.19397611916065216, "learning_rate": 2.4322525644072636e-05, "loss": 0.4968, "step": 9666 }, { "epoch": 1.9872545996505293, "grad_norm": 0.19655869901180267, "learning_rate": 2.4313654935762452e-05, "loss": 0.5081, "step": 9667 }, { "epoch": 1.987460170623908, "grad_norm": 0.20611554384231567, "learning_rate": 2.4304785246571817e-05, "loss": 0.5285, "step": 9668 }, { "epoch": 1.9876657415972865, "grad_norm": 0.20290662348270416, "learning_rate": 2.4295916576937687e-05, "loss": 0.5153, "step": 9669 }, { "epoch": 1.987871312570665, "grad_norm": 0.20132143795490265, "learning_rate": 2.428704892729696e-05, "loss": 0.5342, "step": 9670 }, { "epoch": 1.9880768835440437, "grad_norm": 0.20216117799282074, "learning_rate": 2.4278182298086535e-05, "loss": 0.517, "step": 9671 }, { "epoch": 1.9882824545174222, "grad_norm": 0.19936327636241913, "learning_rate": 2.426931668974322e-05, "loss": 0.5252, "step": 9672 }, { "epoch": 1.9884880254908008, "grad_norm": 0.33940476179122925, "learning_rate": 2.426045210270377e-05, "loss": 0.5247, "step": 9673 }, { "epoch": 1.9886935964641792, "grad_norm": 0.20160600543022156, "learning_rate": 2.4251588537404913e-05, "loss": 0.5223, "step": 9674 }, { "epoch": 1.9888991674375578, "grad_norm": 0.2030128389596939, "learning_rate": 2.4242725994283292e-05, "loss": 0.5135, "step": 9675 }, { "epoch": 1.9891047384109364, "grad_norm": 0.22344298660755157, "learning_rate": 2.4233864473775556e-05, "loss": 0.5226, "step": 9676 }, { "epoch": 1.989310309384315, "grad_norm": 0.20270341634750366, "learning_rate": 2.422500397631826e-05, "loss": 0.5173, "step": 9677 }, { "epoch": 1.9895158803576933, "grad_norm": 0.17036183178424835, "learning_rate": 2.421614450234792e-05, "loss": 0.4997, "step": 9678 }, { "epoch": 1.989721451331072, "grad_norm": 0.16131217777729034, "learning_rate": 2.420728605230099e-05, "loss": 0.5233, "step": 9679 }, { "epoch": 1.9899270223044505, "grad_norm": 0.19689194858074188, "learning_rate": 2.4198428626613895e-05, "loss": 0.5235, "step": 9680 }, { "epoch": 1.990132593277829, "grad_norm": 0.1997881680727005, "learning_rate": 2.418957222572299e-05, "loss": 0.5469, "step": 9681 }, { "epoch": 1.9903381642512077, "grad_norm": 0.1594388335943222, "learning_rate": 2.4180716850064584e-05, "loss": 0.4688, "step": 9682 }, { "epoch": 1.9905437352245863, "grad_norm": 0.1173081025481224, "learning_rate": 2.4171862500074968e-05, "loss": 0.5151, "step": 9683 }, { "epoch": 1.9907493061979649, "grad_norm": 0.16193978488445282, "learning_rate": 2.416300917619033e-05, "loss": 0.507, "step": 9684 }, { "epoch": 1.9909548771713435, "grad_norm": 0.19650469720363617, "learning_rate": 2.415415687884684e-05, "loss": 0.512, "step": 9685 }, { "epoch": 1.991160448144722, "grad_norm": 0.19806897640228271, "learning_rate": 2.414530560848061e-05, "loss": 0.5165, "step": 9686 }, { "epoch": 1.9913660191181006, "grad_norm": 0.20564566552639008, "learning_rate": 2.4136455365527692e-05, "loss": 0.5088, "step": 9687 }, { "epoch": 1.9915715900914792, "grad_norm": 0.20067964494228363, "learning_rate": 2.412760615042411e-05, "loss": 0.5163, "step": 9688 }, { "epoch": 1.9917771610648578, "grad_norm": 0.20195259153842926, "learning_rate": 2.4118757963605788e-05, "loss": 0.5013, "step": 9689 }, { "epoch": 1.9919827320382362, "grad_norm": 0.2007036656141281, "learning_rate": 2.410991080550869e-05, "loss": 0.5301, "step": 9690 }, { "epoch": 1.9921883030116148, "grad_norm": 0.16521452367305756, "learning_rate": 2.4101064676568624e-05, "loss": 0.4947, "step": 9691 }, { "epoch": 1.9923938739849933, "grad_norm": 0.16318975389003754, "learning_rate": 2.4092219577221435e-05, "loss": 0.5409, "step": 9692 }, { "epoch": 1.9925994449583717, "grad_norm": 0.20644515752792358, "learning_rate": 2.4083375507902872e-05, "loss": 0.5451, "step": 9693 }, { "epoch": 1.9928050159317503, "grad_norm": 0.19570566713809967, "learning_rate": 2.407453246904863e-05, "loss": 0.5005, "step": 9694 }, { "epoch": 1.993010586905129, "grad_norm": 0.19532164931297302, "learning_rate": 2.4065690461094367e-05, "loss": 0.5377, "step": 9695 }, { "epoch": 1.9932161578785075, "grad_norm": 0.20121091604232788, "learning_rate": 2.405684948447567e-05, "loss": 0.5096, "step": 9696 }, { "epoch": 1.993421728851886, "grad_norm": 0.1667921096086502, "learning_rate": 2.4048009539628128e-05, "loss": 0.5165, "step": 9697 }, { "epoch": 1.9936272998252647, "grad_norm": 0.12459738552570343, "learning_rate": 2.403917062698723e-05, "loss": 0.5162, "step": 9698 }, { "epoch": 1.9938328707986432, "grad_norm": 0.16275346279144287, "learning_rate": 2.4030332746988426e-05, "loss": 0.513, "step": 9699 }, { "epoch": 1.9940384417720218, "grad_norm": 0.2167256772518158, "learning_rate": 2.4021495900067113e-05, "loss": 0.5138, "step": 9700 }, { "epoch": 1.9942440127454004, "grad_norm": 0.20247885584831238, "learning_rate": 2.4012660086658642e-05, "loss": 0.5086, "step": 9701 }, { "epoch": 1.994449583718779, "grad_norm": 0.20237302780151367, "learning_rate": 2.400382530719832e-05, "loss": 0.4994, "step": 9702 }, { "epoch": 1.9946551546921576, "grad_norm": 0.193708136677742, "learning_rate": 2.3994991562121362e-05, "loss": 0.5112, "step": 9703 }, { "epoch": 1.9948607256655362, "grad_norm": 0.20271430909633636, "learning_rate": 2.3986158851863016e-05, "loss": 0.5148, "step": 9704 }, { "epoch": 1.9950662966389145, "grad_norm": 0.16858288645744324, "learning_rate": 2.39773271768584e-05, "loss": 0.502, "step": 9705 }, { "epoch": 1.9952718676122931, "grad_norm": 0.1224452555179596, "learning_rate": 2.3968496537542624e-05, "loss": 0.5069, "step": 9706 }, { "epoch": 1.9954774385856717, "grad_norm": 0.1615760177373886, "learning_rate": 2.3959666934350715e-05, "loss": 0.5327, "step": 9707 }, { "epoch": 1.9956830095590503, "grad_norm": 0.19293002784252167, "learning_rate": 2.3950838367717675e-05, "loss": 0.5051, "step": 9708 }, { "epoch": 1.9958885805324287, "grad_norm": 0.20506036281585693, "learning_rate": 2.394201083807845e-05, "loss": 0.5306, "step": 9709 }, { "epoch": 1.9960941515058073, "grad_norm": 0.19566957652568817, "learning_rate": 2.3933184345867902e-05, "loss": 0.5146, "step": 9710 }, { "epoch": 1.9962997224791859, "grad_norm": 0.19693787395954132, "learning_rate": 2.3924358891520916e-05, "loss": 0.5098, "step": 9711 }, { "epoch": 1.9965052934525644, "grad_norm": 0.20601771771907806, "learning_rate": 2.391553447547226e-05, "loss": 0.5345, "step": 9712 }, { "epoch": 1.996710864425943, "grad_norm": 0.19721956551074982, "learning_rate": 2.3906711098156654e-05, "loss": 0.5034, "step": 9713 }, { "epoch": 1.9969164353993216, "grad_norm": 0.19830164313316345, "learning_rate": 2.389788876000882e-05, "loss": 0.5055, "step": 9714 }, { "epoch": 1.9971220063727002, "grad_norm": 0.19704151153564453, "learning_rate": 2.3889067461463375e-05, "loss": 0.4994, "step": 9715 }, { "epoch": 1.9973275773460788, "grad_norm": 0.2041328102350235, "learning_rate": 2.3880247202954906e-05, "loss": 0.5322, "step": 9716 }, { "epoch": 1.9975331483194574, "grad_norm": 0.20206472277641296, "learning_rate": 2.387142798491792e-05, "loss": 0.5115, "step": 9717 }, { "epoch": 1.997738719292836, "grad_norm": 0.20135797560214996, "learning_rate": 2.386260980778695e-05, "loss": 0.5294, "step": 9718 }, { "epoch": 1.9979442902662146, "grad_norm": 0.19181190431118011, "learning_rate": 2.3853792671996394e-05, "loss": 0.5249, "step": 9719 }, { "epoch": 1.998149861239593, "grad_norm": 0.199905663728714, "learning_rate": 2.3844976577980637e-05, "loss": 0.5133, "step": 9720 }, { "epoch": 1.9983554322129715, "grad_norm": 0.19756287336349487, "learning_rate": 2.3836161526173998e-05, "loss": 0.491, "step": 9721 }, { "epoch": 1.99856100318635, "grad_norm": 0.16492635011672974, "learning_rate": 2.382734751701077e-05, "loss": 0.4839, "step": 9722 }, { "epoch": 1.9987665741597287, "grad_norm": 0.16064047813415527, "learning_rate": 2.3818534550925166e-05, "loss": 0.525, "step": 9723 }, { "epoch": 1.998972145133107, "grad_norm": 0.1621170938014984, "learning_rate": 2.3809722628351345e-05, "loss": 0.5041, "step": 9724 }, { "epoch": 1.9991777161064856, "grad_norm": 0.1653175801038742, "learning_rate": 2.3800911749723466e-05, "loss": 0.5125, "step": 9725 }, { "epoch": 1.9993832870798642, "grad_norm": 0.16732336580753326, "learning_rate": 2.3792101915475583e-05, "loss": 0.5047, "step": 9726 }, { "epoch": 1.9995888580532428, "grad_norm": 0.1221918985247612, "learning_rate": 2.378329312604171e-05, "loss": 0.5094, "step": 9727 }, { "epoch": 1.9997944290266214, "grad_norm": 0.15841197967529297, "learning_rate": 2.3774485381855812e-05, "loss": 0.5167, "step": 9728 }, { "epoch": 2.0, "grad_norm": 0.17381541430950165, "learning_rate": 2.3765678683351824e-05, "loss": 0.5104, "step": 9729 }, { "epoch": 2.0002055709733786, "grad_norm": 0.35462313890457153, "learning_rate": 2.375687303096359e-05, "loss": 0.4014, "step": 9730 }, { "epoch": 2.000411141946757, "grad_norm": 0.3547631800174713, "learning_rate": 2.3748068425124914e-05, "loss": 0.4388, "step": 9731 }, { "epoch": 2.0006167129201358, "grad_norm": 0.28014928102493286, "learning_rate": 2.373926486626959e-05, "loss": 0.4034, "step": 9732 }, { "epoch": 2.0008222838935144, "grad_norm": 0.23848789930343628, "learning_rate": 2.3730462354831326e-05, "loss": 0.4079, "step": 9733 }, { "epoch": 2.001027854866893, "grad_norm": 0.22783653438091278, "learning_rate": 2.3721660891243738e-05, "loss": 0.4117, "step": 9734 }, { "epoch": 2.0012334258402715, "grad_norm": 0.27238190174102783, "learning_rate": 2.371286047594049e-05, "loss": 0.3957, "step": 9735 }, { "epoch": 2.00143899681365, "grad_norm": 0.35664230585098267, "learning_rate": 2.3704061109355107e-05, "loss": 0.4034, "step": 9736 }, { "epoch": 2.0016445677870283, "grad_norm": 0.3655121326446533, "learning_rate": 2.369526279192108e-05, "loss": 0.3992, "step": 9737 }, { "epoch": 2.001850138760407, "grad_norm": 0.31957703828811646, "learning_rate": 2.3686465524071887e-05, "loss": 0.4125, "step": 9738 }, { "epoch": 2.0020557097337854, "grad_norm": 0.2534150183200836, "learning_rate": 2.3677669306240927e-05, "loss": 0.3829, "step": 9739 }, { "epoch": 2.002261280707164, "grad_norm": 0.23875583708286285, "learning_rate": 2.3668874138861533e-05, "loss": 0.4053, "step": 9740 }, { "epoch": 2.0024668516805426, "grad_norm": 0.24184350669384003, "learning_rate": 2.366008002236702e-05, "loss": 0.4061, "step": 9741 }, { "epoch": 2.002672422653921, "grad_norm": 0.232225701212883, "learning_rate": 2.3651286957190612e-05, "loss": 0.4083, "step": 9742 }, { "epoch": 2.0028779936273, "grad_norm": 0.1986769735813141, "learning_rate": 2.3642494943765516e-05, "loss": 0.4456, "step": 9743 }, { "epoch": 2.0030835646006784, "grad_norm": 0.1628189980983734, "learning_rate": 2.363370398252485e-05, "loss": 0.4615, "step": 9744 }, { "epoch": 2.003289135574057, "grad_norm": 0.13968214392662048, "learning_rate": 2.362491407390174e-05, "loss": 0.4571, "step": 9745 }, { "epoch": 2.0034947065474356, "grad_norm": 0.2863624095916748, "learning_rate": 2.3616125218329208e-05, "loss": 0.3981, "step": 9746 }, { "epoch": 2.003700277520814, "grad_norm": 0.27160152792930603, "learning_rate": 2.360733741624024e-05, "loss": 0.3855, "step": 9747 }, { "epoch": 2.0039058484941927, "grad_norm": 0.24677185714244843, "learning_rate": 2.3598550668067765e-05, "loss": 0.373, "step": 9748 }, { "epoch": 2.0041114194675713, "grad_norm": 0.23863226175308228, "learning_rate": 2.358976497424467e-05, "loss": 0.4122, "step": 9749 }, { "epoch": 2.00431699044095, "grad_norm": 0.23597677052021027, "learning_rate": 2.3580980335203787e-05, "loss": 0.4114, "step": 9750 }, { "epoch": 2.0045225614143285, "grad_norm": 0.24519526958465576, "learning_rate": 2.357219675137787e-05, "loss": 0.4, "step": 9751 }, { "epoch": 2.0047281323877066, "grad_norm": 0.26484453678131104, "learning_rate": 2.356341422319968e-05, "loss": 0.4106, "step": 9752 }, { "epoch": 2.0049337033610852, "grad_norm": 0.2829241156578064, "learning_rate": 2.3554632751101882e-05, "loss": 0.4101, "step": 9753 }, { "epoch": 2.005139274334464, "grad_norm": 0.2894810438156128, "learning_rate": 2.354585233551709e-05, "loss": 0.4072, "step": 9754 }, { "epoch": 2.0053448453078424, "grad_norm": 0.26924699544906616, "learning_rate": 2.3537072976877862e-05, "loss": 0.3941, "step": 9755 }, { "epoch": 2.005550416281221, "grad_norm": 0.26175355911254883, "learning_rate": 2.352829467561675e-05, "loss": 0.4, "step": 9756 }, { "epoch": 2.0057559872545996, "grad_norm": 0.15815532207489014, "learning_rate": 2.3519517432166195e-05, "loss": 0.4488, "step": 9757 }, { "epoch": 2.005961558227978, "grad_norm": 0.1532447189092636, "learning_rate": 2.3510741246958602e-05, "loss": 0.4491, "step": 9758 }, { "epoch": 2.0061671292013568, "grad_norm": 0.2337024062871933, "learning_rate": 2.3501966120426364e-05, "loss": 0.4038, "step": 9759 }, { "epoch": 2.0063727001747353, "grad_norm": 0.2317887842655182, "learning_rate": 2.3493192053001774e-05, "loss": 0.4058, "step": 9760 }, { "epoch": 2.006578271148114, "grad_norm": 0.22884777188301086, "learning_rate": 2.3484419045117088e-05, "loss": 0.3987, "step": 9761 }, { "epoch": 2.0067838421214925, "grad_norm": 0.2271248698234558, "learning_rate": 2.3475647097204513e-05, "loss": 0.3916, "step": 9762 }, { "epoch": 2.006989413094871, "grad_norm": 0.2272649109363556, "learning_rate": 2.3466876209696204e-05, "loss": 0.4061, "step": 9763 }, { "epoch": 2.0071949840682497, "grad_norm": 0.22100196778774261, "learning_rate": 2.345810638302425e-05, "loss": 0.4063, "step": 9764 }, { "epoch": 2.0074005550416283, "grad_norm": 0.22727227210998535, "learning_rate": 2.3449337617620705e-05, "loss": 0.3931, "step": 9765 }, { "epoch": 2.007606126015007, "grad_norm": 0.24030756950378418, "learning_rate": 2.344056991391757e-05, "loss": 0.4071, "step": 9766 }, { "epoch": 2.007811696988385, "grad_norm": 0.2378872036933899, "learning_rate": 2.3431803272346795e-05, "loss": 0.4167, "step": 9767 }, { "epoch": 2.0080172679617636, "grad_norm": 0.23873169720172882, "learning_rate": 2.3423037693340263e-05, "loss": 0.4025, "step": 9768 }, { "epoch": 2.008222838935142, "grad_norm": 0.16406850516796112, "learning_rate": 2.341427317732981e-05, "loss": 0.4482, "step": 9769 }, { "epoch": 2.008428409908521, "grad_norm": 0.24554254114627838, "learning_rate": 2.340550972474723e-05, "loss": 0.4149, "step": 9770 }, { "epoch": 2.0086339808818994, "grad_norm": 0.24509701132774353, "learning_rate": 2.339674733602425e-05, "loss": 0.3931, "step": 9771 }, { "epoch": 2.008839551855278, "grad_norm": 0.2255314290523529, "learning_rate": 2.3387986011592542e-05, "loss": 0.4023, "step": 9772 }, { "epoch": 2.0090451228286565, "grad_norm": 0.22587113082408905, "learning_rate": 2.3379225751883768e-05, "loss": 0.403, "step": 9773 }, { "epoch": 2.009250693802035, "grad_norm": 0.13071568310260773, "learning_rate": 2.337046655732948e-05, "loss": 0.4701, "step": 9774 }, { "epoch": 2.0094562647754137, "grad_norm": 0.2212098240852356, "learning_rate": 2.336170842836121e-05, "loss": 0.394, "step": 9775 }, { "epoch": 2.0096618357487923, "grad_norm": 0.23073311150074005, "learning_rate": 2.3352951365410414e-05, "loss": 0.421, "step": 9776 }, { "epoch": 2.009867406722171, "grad_norm": 0.21537451446056366, "learning_rate": 2.334419536890854e-05, "loss": 0.3929, "step": 9777 }, { "epoch": 2.0100729776955495, "grad_norm": 0.21932470798492432, "learning_rate": 2.3335440439286943e-05, "loss": 0.3989, "step": 9778 }, { "epoch": 2.010278548668928, "grad_norm": 0.2174750566482544, "learning_rate": 2.332668657697692e-05, "loss": 0.3909, "step": 9779 }, { "epoch": 2.0104841196423067, "grad_norm": 0.21708469092845917, "learning_rate": 2.3317933782409764e-05, "loss": 0.3854, "step": 9780 }, { "epoch": 2.0106896906156853, "grad_norm": 0.22329485416412354, "learning_rate": 2.330918205601667e-05, "loss": 0.4068, "step": 9781 }, { "epoch": 2.010895261589064, "grad_norm": 0.22749973833560944, "learning_rate": 2.3300431398228786e-05, "loss": 0.4065, "step": 9782 }, { "epoch": 2.011100832562442, "grad_norm": 0.2266959398984909, "learning_rate": 2.3291681809477235e-05, "loss": 0.4044, "step": 9783 }, { "epoch": 2.0113064035358206, "grad_norm": 0.22487907111644745, "learning_rate": 2.3282933290193048e-05, "loss": 0.3902, "step": 9784 }, { "epoch": 2.011511974509199, "grad_norm": 0.22450290620326996, "learning_rate": 2.327418584080724e-05, "loss": 0.4144, "step": 9785 }, { "epoch": 2.0117175454825778, "grad_norm": 0.13316728174686432, "learning_rate": 2.3265439461750727e-05, "loss": 0.4369, "step": 9786 }, { "epoch": 2.0119231164559563, "grad_norm": 0.23068048059940338, "learning_rate": 2.3256694153454446e-05, "loss": 0.4071, "step": 9787 }, { "epoch": 2.012128687429335, "grad_norm": 0.22546036541461945, "learning_rate": 2.324794991634921e-05, "loss": 0.392, "step": 9788 }, { "epoch": 2.0123342584027135, "grad_norm": 0.2214207649230957, "learning_rate": 2.3239206750865813e-05, "loss": 0.3871, "step": 9789 }, { "epoch": 2.012539829376092, "grad_norm": 0.12996140122413635, "learning_rate": 2.3230464657434995e-05, "loss": 0.4446, "step": 9790 }, { "epoch": 2.0127454003494707, "grad_norm": 0.126758873462677, "learning_rate": 2.322172363648743e-05, "loss": 0.4344, "step": 9791 }, { "epoch": 2.0129509713228493, "grad_norm": 0.21626314520835876, "learning_rate": 2.3212983688453753e-05, "loss": 0.4197, "step": 9792 }, { "epoch": 2.013156542296228, "grad_norm": 0.11778894811868668, "learning_rate": 2.3204244813764516e-05, "loss": 0.4603, "step": 9793 }, { "epoch": 2.0133621132696065, "grad_norm": 0.13116705417633057, "learning_rate": 2.3195507012850284e-05, "loss": 0.4376, "step": 9794 }, { "epoch": 2.013567684242985, "grad_norm": 0.21736088395118713, "learning_rate": 2.3186770286141507e-05, "loss": 0.3973, "step": 9795 }, { "epoch": 2.0137732552163636, "grad_norm": 0.2278052121400833, "learning_rate": 2.31780346340686e-05, "loss": 0.4055, "step": 9796 }, { "epoch": 2.013978826189742, "grad_norm": 0.2270914614200592, "learning_rate": 2.3169300057061935e-05, "loss": 0.3941, "step": 9797 }, { "epoch": 2.0141843971631204, "grad_norm": 0.22449646890163422, "learning_rate": 2.31605665555518e-05, "loss": 0.3728, "step": 9798 }, { "epoch": 2.014389968136499, "grad_norm": 0.12993952631950378, "learning_rate": 2.3151834129968495e-05, "loss": 0.4542, "step": 9799 }, { "epoch": 2.0145955391098775, "grad_norm": 0.21774081885814667, "learning_rate": 2.3143102780742185e-05, "loss": 0.3867, "step": 9800 }, { "epoch": 2.014801110083256, "grad_norm": 0.13234397768974304, "learning_rate": 2.3134372508303055e-05, "loss": 0.4441, "step": 9801 }, { "epoch": 2.0150066810566347, "grad_norm": 0.22552914917469025, "learning_rate": 2.3125643313081194e-05, "loss": 0.3967, "step": 9802 }, { "epoch": 2.0152122520300133, "grad_norm": 0.22355657815933228, "learning_rate": 2.311691519550665e-05, "loss": 0.3994, "step": 9803 }, { "epoch": 2.015417823003392, "grad_norm": 0.22515852749347687, "learning_rate": 2.3108188156009412e-05, "loss": 0.3941, "step": 9804 }, { "epoch": 2.0156233939767705, "grad_norm": 0.2237560749053955, "learning_rate": 2.3099462195019416e-05, "loss": 0.4045, "step": 9805 }, { "epoch": 2.015828964950149, "grad_norm": 0.1298869103193283, "learning_rate": 2.309073731296656e-05, "loss": 0.4567, "step": 9806 }, { "epoch": 2.0160345359235277, "grad_norm": 0.22776378691196442, "learning_rate": 2.3082013510280656e-05, "loss": 0.4082, "step": 9807 }, { "epoch": 2.0162401068969062, "grad_norm": 0.22463855147361755, "learning_rate": 2.307329078739152e-05, "loss": 0.4022, "step": 9808 }, { "epoch": 2.016445677870285, "grad_norm": 0.22342638671398163, "learning_rate": 2.3064569144728855e-05, "loss": 0.4131, "step": 9809 }, { "epoch": 2.0166512488436634, "grad_norm": 0.22417156398296356, "learning_rate": 2.3055848582722352e-05, "loss": 0.3981, "step": 9810 }, { "epoch": 2.016856819817042, "grad_norm": 0.2322673499584198, "learning_rate": 2.3047129101801618e-05, "loss": 0.4035, "step": 9811 }, { "epoch": 2.0170623907904206, "grad_norm": 0.2153014838695526, "learning_rate": 2.303841070239622e-05, "loss": 0.3957, "step": 9812 }, { "epoch": 2.0172679617637987, "grad_norm": 0.22393642365932465, "learning_rate": 2.302969338493567e-05, "loss": 0.3947, "step": 9813 }, { "epoch": 2.0174735327371773, "grad_norm": 0.23003719747066498, "learning_rate": 2.302097714984945e-05, "loss": 0.3909, "step": 9814 }, { "epoch": 2.017679103710556, "grad_norm": 0.22402851283550262, "learning_rate": 2.301226199756696e-05, "loss": 0.3974, "step": 9815 }, { "epoch": 2.0178846746839345, "grad_norm": 0.2208302617073059, "learning_rate": 2.3003547928517547e-05, "loss": 0.3763, "step": 9816 }, { "epoch": 2.018090245657313, "grad_norm": 0.1260402798652649, "learning_rate": 2.299483494313052e-05, "loss": 0.4457, "step": 9817 }, { "epoch": 2.0182958166306917, "grad_norm": 0.226173534989357, "learning_rate": 2.298612304183512e-05, "loss": 0.4093, "step": 9818 }, { "epoch": 2.0185013876040703, "grad_norm": 0.12185024470090866, "learning_rate": 2.297741222506053e-05, "loss": 0.4517, "step": 9819 }, { "epoch": 2.018706958577449, "grad_norm": 0.2621656358242035, "learning_rate": 2.2968702493235923e-05, "loss": 0.4059, "step": 9820 }, { "epoch": 2.0189125295508275, "grad_norm": 0.2253510057926178, "learning_rate": 2.2959993846790372e-05, "loss": 0.4052, "step": 9821 }, { "epoch": 2.019118100524206, "grad_norm": 0.12481515854597092, "learning_rate": 2.2951286286152893e-05, "loss": 0.4528, "step": 9822 }, { "epoch": 2.0193236714975846, "grad_norm": 0.21684333682060242, "learning_rate": 2.2942579811752496e-05, "loss": 0.3872, "step": 9823 }, { "epoch": 2.019529242470963, "grad_norm": 0.13086971640586853, "learning_rate": 2.2933874424018093e-05, "loss": 0.4632, "step": 9824 }, { "epoch": 2.019734813444342, "grad_norm": 0.21728526055812836, "learning_rate": 2.292517012337857e-05, "loss": 0.3812, "step": 9825 }, { "epoch": 2.0199403844177204, "grad_norm": 0.23790940642356873, "learning_rate": 2.291646691026273e-05, "loss": 0.4011, "step": 9826 }, { "epoch": 2.020145955391099, "grad_norm": 0.12328074872493744, "learning_rate": 2.290776478509933e-05, "loss": 0.4421, "step": 9827 }, { "epoch": 2.020351526364477, "grad_norm": 0.23319554328918457, "learning_rate": 2.2899063748317123e-05, "loss": 0.3795, "step": 9828 }, { "epoch": 2.0205570973378557, "grad_norm": 0.21926866471767426, "learning_rate": 2.2890363800344744e-05, "loss": 0.3943, "step": 9829 }, { "epoch": 2.0207626683112343, "grad_norm": 0.2243729531764984, "learning_rate": 2.2881664941610796e-05, "loss": 0.4123, "step": 9830 }, { "epoch": 2.020968239284613, "grad_norm": 0.12581419944763184, "learning_rate": 2.2872967172543843e-05, "loss": 0.4629, "step": 9831 }, { "epoch": 2.0211738102579915, "grad_norm": 0.12486526370048523, "learning_rate": 2.286427049357237e-05, "loss": 0.4541, "step": 9832 }, { "epoch": 2.02137938123137, "grad_norm": 0.2228085696697235, "learning_rate": 2.2855574905124826e-05, "loss": 0.4007, "step": 9833 }, { "epoch": 2.0215849522047487, "grad_norm": 0.1320047229528427, "learning_rate": 2.284688040762959e-05, "loss": 0.4513, "step": 9834 }, { "epoch": 2.0217905231781272, "grad_norm": 0.22697174549102783, "learning_rate": 2.283818700151503e-05, "loss": 0.3806, "step": 9835 }, { "epoch": 2.021996094151506, "grad_norm": 0.12552069127559662, "learning_rate": 2.2829494687209413e-05, "loss": 0.4545, "step": 9836 }, { "epoch": 2.0222016651248844, "grad_norm": 0.22603359818458557, "learning_rate": 2.282080346514097e-05, "loss": 0.3866, "step": 9837 }, { "epoch": 2.022407236098263, "grad_norm": 0.22030943632125854, "learning_rate": 2.2812113335737867e-05, "loss": 0.3983, "step": 9838 }, { "epoch": 2.0226128070716416, "grad_norm": 0.23014822602272034, "learning_rate": 2.280342429942824e-05, "loss": 0.4008, "step": 9839 }, { "epoch": 2.02281837804502, "grad_norm": 0.2164926677942276, "learning_rate": 2.279473635664013e-05, "loss": 0.4113, "step": 9840 }, { "epoch": 2.0230239490183988, "grad_norm": 0.23505493998527527, "learning_rate": 2.2786049507801594e-05, "loss": 0.4073, "step": 9841 }, { "epoch": 2.0232295199917774, "grad_norm": 0.21695363521575928, "learning_rate": 2.277736375334057e-05, "loss": 0.3937, "step": 9842 }, { "epoch": 2.0234350909651555, "grad_norm": 0.21634046733379364, "learning_rate": 2.2768679093684948e-05, "loss": 0.4001, "step": 9843 }, { "epoch": 2.023640661938534, "grad_norm": 0.22589579224586487, "learning_rate": 2.2759995529262617e-05, "loss": 0.3816, "step": 9844 }, { "epoch": 2.0238462329119127, "grad_norm": 0.22080455720424652, "learning_rate": 2.2751313060501353e-05, "loss": 0.3994, "step": 9845 }, { "epoch": 2.0240518038852913, "grad_norm": 0.23890239000320435, "learning_rate": 2.2742631687828906e-05, "loss": 0.4072, "step": 9846 }, { "epoch": 2.02425737485867, "grad_norm": 0.2339673787355423, "learning_rate": 2.2733951411672963e-05, "loss": 0.4084, "step": 9847 }, { "epoch": 2.0244629458320484, "grad_norm": 0.22778230905532837, "learning_rate": 2.272527223246115e-05, "loss": 0.3973, "step": 9848 }, { "epoch": 2.024668516805427, "grad_norm": 0.22321897745132446, "learning_rate": 2.271659415062108e-05, "loss": 0.4052, "step": 9849 }, { "epoch": 2.0248740877788056, "grad_norm": 0.13747207820415497, "learning_rate": 2.270791716658026e-05, "loss": 0.4596, "step": 9850 }, { "epoch": 2.025079658752184, "grad_norm": 0.22815275192260742, "learning_rate": 2.2699241280766174e-05, "loss": 0.3894, "step": 9851 }, { "epoch": 2.025285229725563, "grad_norm": 0.219502255320549, "learning_rate": 2.269056649360623e-05, "loss": 0.3969, "step": 9852 }, { "epoch": 2.0254908006989414, "grad_norm": 0.229275181889534, "learning_rate": 2.26818928055278e-05, "loss": 0.4055, "step": 9853 }, { "epoch": 2.02569637167232, "grad_norm": 0.21822713315486908, "learning_rate": 2.2673220216958206e-05, "loss": 0.3896, "step": 9854 }, { "epoch": 2.0259019426456986, "grad_norm": 0.218753844499588, "learning_rate": 2.266454872832467e-05, "loss": 0.3951, "step": 9855 }, { "epoch": 2.026107513619077, "grad_norm": 0.2237304002046585, "learning_rate": 2.2655878340054446e-05, "loss": 0.4035, "step": 9856 }, { "epoch": 2.0263130845924557, "grad_norm": 0.2183140218257904, "learning_rate": 2.2647209052574658e-05, "loss": 0.3968, "step": 9857 }, { "epoch": 2.026518655565834, "grad_norm": 0.22163569927215576, "learning_rate": 2.26385408663124e-05, "loss": 0.3805, "step": 9858 }, { "epoch": 2.0267242265392125, "grad_norm": 0.22751082479953766, "learning_rate": 2.2629873781694717e-05, "loss": 0.3994, "step": 9859 }, { "epoch": 2.026929797512591, "grad_norm": 0.21998751163482666, "learning_rate": 2.2621207799148598e-05, "loss": 0.3864, "step": 9860 }, { "epoch": 2.0271353684859696, "grad_norm": 0.1337684839963913, "learning_rate": 2.2612542919100973e-05, "loss": 0.444, "step": 9861 }, { "epoch": 2.0273409394593482, "grad_norm": 0.23163475096225739, "learning_rate": 2.2603879141978702e-05, "loss": 0.4133, "step": 9862 }, { "epoch": 2.027546510432727, "grad_norm": 0.1254424899816513, "learning_rate": 2.2595216468208643e-05, "loss": 0.4527, "step": 9863 }, { "epoch": 2.0277520814061054, "grad_norm": 0.23382841050624847, "learning_rate": 2.258655489821753e-05, "loss": 0.4075, "step": 9864 }, { "epoch": 2.027957652379484, "grad_norm": 0.2241084724664688, "learning_rate": 2.2577894432432115e-05, "loss": 0.4089, "step": 9865 }, { "epoch": 2.0281632233528626, "grad_norm": 0.12018263339996338, "learning_rate": 2.2569235071279042e-05, "loss": 0.4616, "step": 9866 }, { "epoch": 2.028368794326241, "grad_norm": 0.21912699937820435, "learning_rate": 2.256057681518491e-05, "loss": 0.4057, "step": 9867 }, { "epoch": 2.0285743652996198, "grad_norm": 0.12558940052986145, "learning_rate": 2.255191966457629e-05, "loss": 0.437, "step": 9868 }, { "epoch": 2.0287799362729984, "grad_norm": 0.22069305181503296, "learning_rate": 2.254326361987964e-05, "loss": 0.3903, "step": 9869 }, { "epoch": 2.028985507246377, "grad_norm": 0.12789428234100342, "learning_rate": 2.2534608681521443e-05, "loss": 0.4686, "step": 9870 }, { "epoch": 2.0291910782197555, "grad_norm": 0.22064268589019775, "learning_rate": 2.252595484992808e-05, "loss": 0.3867, "step": 9871 }, { "epoch": 2.029396649193134, "grad_norm": 0.1297440379858017, "learning_rate": 2.251730212552587e-05, "loss": 0.471, "step": 9872 }, { "epoch": 2.0296022201665127, "grad_norm": 0.227555051445961, "learning_rate": 2.2508650508741107e-05, "loss": 0.4138, "step": 9873 }, { "epoch": 2.029807791139891, "grad_norm": 0.2229832112789154, "learning_rate": 2.250000000000001e-05, "loss": 0.3846, "step": 9874 }, { "epoch": 2.0300133621132694, "grad_norm": 0.12331897765398026, "learning_rate": 2.2491350599728745e-05, "loss": 0.4309, "step": 9875 }, { "epoch": 2.030218933086648, "grad_norm": 0.12525731325149536, "learning_rate": 2.2482702308353416e-05, "loss": 0.4642, "step": 9876 }, { "epoch": 2.0304245040600266, "grad_norm": 0.22697319090366364, "learning_rate": 2.2474055126300116e-05, "loss": 0.3967, "step": 9877 }, { "epoch": 2.030630075033405, "grad_norm": 0.21771733462810516, "learning_rate": 2.2465409053994835e-05, "loss": 0.396, "step": 9878 }, { "epoch": 2.030835646006784, "grad_norm": 0.21557028591632843, "learning_rate": 2.2456764091863518e-05, "loss": 0.3904, "step": 9879 }, { "epoch": 2.0310412169801624, "grad_norm": 0.22535440325737, "learning_rate": 2.244812024033207e-05, "loss": 0.4019, "step": 9880 }, { "epoch": 2.031246787953541, "grad_norm": 0.22445163130760193, "learning_rate": 2.243947749982633e-05, "loss": 0.3986, "step": 9881 }, { "epoch": 2.0314523589269196, "grad_norm": 0.21911373734474182, "learning_rate": 2.243083587077209e-05, "loss": 0.3931, "step": 9882 }, { "epoch": 2.031657929900298, "grad_norm": 0.21471014618873596, "learning_rate": 2.2422195353595056e-05, "loss": 0.3839, "step": 9883 }, { "epoch": 2.0318635008736767, "grad_norm": 0.2156352996826172, "learning_rate": 2.2413555948720952e-05, "loss": 0.3843, "step": 9884 }, { "epoch": 2.0320690718470553, "grad_norm": 0.22156722843647003, "learning_rate": 2.240491765657537e-05, "loss": 0.4147, "step": 9885 }, { "epoch": 2.032274642820434, "grad_norm": 0.22945941984653473, "learning_rate": 2.2396280477583874e-05, "loss": 0.4038, "step": 9886 }, { "epoch": 2.0324802137938125, "grad_norm": 0.217056542634964, "learning_rate": 2.2387644412172005e-05, "loss": 0.3978, "step": 9887 }, { "epoch": 2.032685784767191, "grad_norm": 0.22490544617176056, "learning_rate": 2.2379009460765203e-05, "loss": 0.3874, "step": 9888 }, { "epoch": 2.0328913557405692, "grad_norm": 0.224374920129776, "learning_rate": 2.2370375623788862e-05, "loss": 0.4149, "step": 9889 }, { "epoch": 2.033096926713948, "grad_norm": 0.13248522579669952, "learning_rate": 2.236174290166836e-05, "loss": 0.4294, "step": 9890 }, { "epoch": 2.0333024976873264, "grad_norm": 0.23234902322292328, "learning_rate": 2.235311129482897e-05, "loss": 0.395, "step": 9891 }, { "epoch": 2.033508068660705, "grad_norm": 0.2269185483455658, "learning_rate": 2.234448080369594e-05, "loss": 0.3915, "step": 9892 }, { "epoch": 2.0337136396340836, "grad_norm": 0.22412073612213135, "learning_rate": 2.2335851428694447e-05, "loss": 0.3766, "step": 9893 }, { "epoch": 2.033919210607462, "grad_norm": 0.22921979427337646, "learning_rate": 2.2327223170249626e-05, "loss": 0.4075, "step": 9894 }, { "epoch": 2.0341247815808408, "grad_norm": 0.12206049263477325, "learning_rate": 2.2318596028786543e-05, "loss": 0.4533, "step": 9895 }, { "epoch": 2.0343303525542193, "grad_norm": 0.22003917396068573, "learning_rate": 2.2309970004730204e-05, "loss": 0.3874, "step": 9896 }, { "epoch": 2.034535923527598, "grad_norm": 0.22223718464374542, "learning_rate": 2.2301345098505608e-05, "loss": 0.4057, "step": 9897 }, { "epoch": 2.0347414945009765, "grad_norm": 0.2259814292192459, "learning_rate": 2.2292721310537645e-05, "loss": 0.3888, "step": 9898 }, { "epoch": 2.034947065474355, "grad_norm": 0.21883010864257812, "learning_rate": 2.2284098641251172e-05, "loss": 0.4222, "step": 9899 }, { "epoch": 2.0351526364477337, "grad_norm": 0.12656092643737793, "learning_rate": 2.227547709107098e-05, "loss": 0.4542, "step": 9900 }, { "epoch": 2.0353582074211123, "grad_norm": 0.22307392954826355, "learning_rate": 2.2266856660421823e-05, "loss": 0.4201, "step": 9901 }, { "epoch": 2.035563778394491, "grad_norm": 0.2214750051498413, "learning_rate": 2.2258237349728382e-05, "loss": 0.3773, "step": 9902 }, { "epoch": 2.0357693493678695, "grad_norm": 0.22282758355140686, "learning_rate": 2.2249619159415273e-05, "loss": 0.4047, "step": 9903 }, { "epoch": 2.0359749203412476, "grad_norm": 0.12212900072336197, "learning_rate": 2.2241002089907114e-05, "loss": 0.4447, "step": 9904 }, { "epoch": 2.036180491314626, "grad_norm": 0.2368995100259781, "learning_rate": 2.2232386141628407e-05, "loss": 0.3965, "step": 9905 }, { "epoch": 2.036386062288005, "grad_norm": 0.12619584798812866, "learning_rate": 2.222377131500361e-05, "loss": 0.453, "step": 9906 }, { "epoch": 2.0365916332613834, "grad_norm": 0.1249145120382309, "learning_rate": 2.221515761045714e-05, "loss": 0.4397, "step": 9907 }, { "epoch": 2.036797204234762, "grad_norm": 0.22991523146629333, "learning_rate": 2.220654502841337e-05, "loss": 0.3909, "step": 9908 }, { "epoch": 2.0370027752081405, "grad_norm": 0.2213556170463562, "learning_rate": 2.2197933569296587e-05, "loss": 0.3813, "step": 9909 }, { "epoch": 2.037208346181519, "grad_norm": 0.24218927323818207, "learning_rate": 2.218932323353103e-05, "loss": 0.4047, "step": 9910 }, { "epoch": 2.0374139171548977, "grad_norm": 0.21407100558280945, "learning_rate": 2.2180714021540913e-05, "loss": 0.3848, "step": 9911 }, { "epoch": 2.0376194881282763, "grad_norm": 0.12527808547019958, "learning_rate": 2.217210593375036e-05, "loss": 0.4478, "step": 9912 }, { "epoch": 2.037825059101655, "grad_norm": 0.22188331186771393, "learning_rate": 2.216349897058345e-05, "loss": 0.3957, "step": 9913 }, { "epoch": 2.0380306300750335, "grad_norm": 0.23192797601222992, "learning_rate": 2.2154893132464207e-05, "loss": 0.3889, "step": 9914 }, { "epoch": 2.038236201048412, "grad_norm": 0.2198922336101532, "learning_rate": 2.21462884198166e-05, "loss": 0.3865, "step": 9915 }, { "epoch": 2.0384417720217907, "grad_norm": 0.1324300318956375, "learning_rate": 2.213768483306455e-05, "loss": 0.4286, "step": 9916 }, { "epoch": 2.0386473429951693, "grad_norm": 0.22883708775043488, "learning_rate": 2.212908237263189e-05, "loss": 0.3945, "step": 9917 }, { "epoch": 2.038852913968548, "grad_norm": 0.2269202619791031, "learning_rate": 2.212048103894246e-05, "loss": 0.4058, "step": 9918 }, { "epoch": 2.039058484941926, "grad_norm": 0.23420077562332153, "learning_rate": 2.2111880832419995e-05, "loss": 0.4064, "step": 9919 }, { "epoch": 2.0392640559153046, "grad_norm": 0.12553973495960236, "learning_rate": 2.210328175348818e-05, "loss": 0.4317, "step": 9920 }, { "epoch": 2.039469626888683, "grad_norm": 0.22346656024456024, "learning_rate": 2.209468380257065e-05, "loss": 0.3767, "step": 9921 }, { "epoch": 2.0396751978620618, "grad_norm": 0.2343178391456604, "learning_rate": 2.208608698009099e-05, "loss": 0.3972, "step": 9922 }, { "epoch": 2.0398807688354403, "grad_norm": 0.21974226832389832, "learning_rate": 2.207749128647273e-05, "loss": 0.3937, "step": 9923 }, { "epoch": 2.040086339808819, "grad_norm": 0.12265095114707947, "learning_rate": 2.206889672213932e-05, "loss": 0.4691, "step": 9924 }, { "epoch": 2.0402919107821975, "grad_norm": 0.13189628720283508, "learning_rate": 2.2060303287514198e-05, "loss": 0.4569, "step": 9925 }, { "epoch": 2.040497481755576, "grad_norm": 0.22592967748641968, "learning_rate": 2.2051710983020714e-05, "loss": 0.411, "step": 9926 }, { "epoch": 2.0407030527289547, "grad_norm": 0.22591936588287354, "learning_rate": 2.2043119809082176e-05, "loss": 0.4061, "step": 9927 }, { "epoch": 2.0409086237023333, "grad_norm": 0.22242794930934906, "learning_rate": 2.2034529766121802e-05, "loss": 0.398, "step": 9928 }, { "epoch": 2.041114194675712, "grad_norm": 0.21335627138614655, "learning_rate": 2.2025940854562824e-05, "loss": 0.4109, "step": 9929 }, { "epoch": 2.0413197656490905, "grad_norm": 0.2250111699104309, "learning_rate": 2.2017353074828363e-05, "loss": 0.3865, "step": 9930 }, { "epoch": 2.041525336622469, "grad_norm": 0.2197580337524414, "learning_rate": 2.2008766427341477e-05, "loss": 0.379, "step": 9931 }, { "epoch": 2.0417309075958476, "grad_norm": 0.23078951239585876, "learning_rate": 2.2000180912525225e-05, "loss": 0.4046, "step": 9932 }, { "epoch": 2.041936478569226, "grad_norm": 0.22051231563091278, "learning_rate": 2.1991596530802558e-05, "loss": 0.3925, "step": 9933 }, { "epoch": 2.042142049542605, "grad_norm": 0.23026688396930695, "learning_rate": 2.198301328259639e-05, "loss": 0.3941, "step": 9934 }, { "epoch": 2.042347620515983, "grad_norm": 0.23431305587291718, "learning_rate": 2.197443116832958e-05, "loss": 0.3928, "step": 9935 }, { "epoch": 2.0425531914893615, "grad_norm": 0.22884812951087952, "learning_rate": 2.1965850188424914e-05, "loss": 0.3915, "step": 9936 }, { "epoch": 2.04275876246274, "grad_norm": 0.22704505920410156, "learning_rate": 2.195727034330516e-05, "loss": 0.4077, "step": 9937 }, { "epoch": 2.0429643334361187, "grad_norm": 0.13225297629833221, "learning_rate": 2.194869163339297e-05, "loss": 0.4588, "step": 9938 }, { "epoch": 2.0431699044094973, "grad_norm": 0.220863476395607, "learning_rate": 2.194011405911102e-05, "loss": 0.3732, "step": 9939 }, { "epoch": 2.043375475382876, "grad_norm": 0.22376231849193573, "learning_rate": 2.193153762088187e-05, "loss": 0.4105, "step": 9940 }, { "epoch": 2.0435810463562545, "grad_norm": 0.22367540001869202, "learning_rate": 2.192296231912804e-05, "loss": 0.393, "step": 9941 }, { "epoch": 2.043786617329633, "grad_norm": 0.22071625292301178, "learning_rate": 2.1914388154271993e-05, "loss": 0.3973, "step": 9942 }, { "epoch": 2.0439921883030117, "grad_norm": 0.22081826627254486, "learning_rate": 2.1905815126736143e-05, "loss": 0.4125, "step": 9943 }, { "epoch": 2.0441977592763902, "grad_norm": 0.22600281238555908, "learning_rate": 2.1897243236942836e-05, "loss": 0.3986, "step": 9944 }, { "epoch": 2.044403330249769, "grad_norm": 0.2240431308746338, "learning_rate": 2.1888672485314357e-05, "loss": 0.4019, "step": 9945 }, { "epoch": 2.0446089012231474, "grad_norm": 0.22377148270606995, "learning_rate": 2.188010287227298e-05, "loss": 0.4098, "step": 9946 }, { "epoch": 2.044814472196526, "grad_norm": 0.2262306958436966, "learning_rate": 2.1871534398240877e-05, "loss": 0.3999, "step": 9947 }, { "epoch": 2.0450200431699046, "grad_norm": 0.22286969423294067, "learning_rate": 2.1862967063640164e-05, "loss": 0.3974, "step": 9948 }, { "epoch": 2.045225614143283, "grad_norm": 0.1264716535806656, "learning_rate": 2.1854400868892905e-05, "loss": 0.4572, "step": 9949 }, { "epoch": 2.0454311851166613, "grad_norm": 0.22342973947525024, "learning_rate": 2.1845835814421155e-05, "loss": 0.3999, "step": 9950 }, { "epoch": 2.04563675609004, "grad_norm": 0.22479073703289032, "learning_rate": 2.1837271900646852e-05, "loss": 0.3997, "step": 9951 }, { "epoch": 2.0458423270634185, "grad_norm": 0.22151948511600494, "learning_rate": 2.1828709127991884e-05, "loss": 0.3914, "step": 9952 }, { "epoch": 2.046047898036797, "grad_norm": 0.1296972632408142, "learning_rate": 2.1820147496878126e-05, "loss": 0.4305, "step": 9953 }, { "epoch": 2.0462534690101757, "grad_norm": 0.25065821409225464, "learning_rate": 2.181158700772736e-05, "loss": 0.3911, "step": 9954 }, { "epoch": 2.0464590399835543, "grad_norm": 0.2304956465959549, "learning_rate": 2.180302766096132e-05, "loss": 0.3961, "step": 9955 }, { "epoch": 2.046664610956933, "grad_norm": 0.22731968760490417, "learning_rate": 2.179446945700169e-05, "loss": 0.3846, "step": 9956 }, { "epoch": 2.0468701819303114, "grad_norm": 0.23249146342277527, "learning_rate": 2.1785912396270084e-05, "loss": 0.4109, "step": 9957 }, { "epoch": 2.04707575290369, "grad_norm": 0.22886785864830017, "learning_rate": 2.177735647918807e-05, "loss": 0.3894, "step": 9958 }, { "epoch": 2.0472813238770686, "grad_norm": 0.22079876065254211, "learning_rate": 2.176880170617715e-05, "loss": 0.4036, "step": 9959 }, { "epoch": 2.047486894850447, "grad_norm": 0.21782319247722626, "learning_rate": 2.1760248077658796e-05, "loss": 0.3954, "step": 9960 }, { "epoch": 2.047692465823826, "grad_norm": 0.22487705945968628, "learning_rate": 2.1751695594054398e-05, "loss": 0.4007, "step": 9961 }, { "epoch": 2.0478980367972044, "grad_norm": 0.22865137457847595, "learning_rate": 2.1743144255785294e-05, "loss": 0.3998, "step": 9962 }, { "epoch": 2.048103607770583, "grad_norm": 0.2298915535211563, "learning_rate": 2.173459406327278e-05, "loss": 0.4107, "step": 9963 }, { "epoch": 2.0483091787439616, "grad_norm": 0.2230944037437439, "learning_rate": 2.1726045016938065e-05, "loss": 0.3866, "step": 9964 }, { "epoch": 2.0485147497173397, "grad_norm": 0.23378700017929077, "learning_rate": 2.1717497117202314e-05, "loss": 0.4049, "step": 9965 }, { "epoch": 2.0487203206907183, "grad_norm": 0.22423069179058075, "learning_rate": 2.170895036448668e-05, "loss": 0.3989, "step": 9966 }, { "epoch": 2.048925891664097, "grad_norm": 0.2279648631811142, "learning_rate": 2.17004047592122e-05, "loss": 0.4052, "step": 9967 }, { "epoch": 2.0491314626374755, "grad_norm": 0.2262582629919052, "learning_rate": 2.1691860301799867e-05, "loss": 0.391, "step": 9968 }, { "epoch": 2.049337033610854, "grad_norm": 0.2182939350605011, "learning_rate": 2.1683316992670644e-05, "loss": 0.3879, "step": 9969 }, { "epoch": 2.0495426045842327, "grad_norm": 0.21680088341236115, "learning_rate": 2.1674774832245406e-05, "loss": 0.3804, "step": 9970 }, { "epoch": 2.0497481755576112, "grad_norm": 0.22588318586349487, "learning_rate": 2.166623382094497e-05, "loss": 0.4107, "step": 9971 }, { "epoch": 2.04995374653099, "grad_norm": 0.22498705983161926, "learning_rate": 2.165769395919015e-05, "loss": 0.3904, "step": 9972 }, { "epoch": 2.0501593175043684, "grad_norm": 0.1259543001651764, "learning_rate": 2.1649155247401637e-05, "loss": 0.4644, "step": 9973 }, { "epoch": 2.050364888477747, "grad_norm": 0.22000350058078766, "learning_rate": 2.1640617686000116e-05, "loss": 0.3917, "step": 9974 }, { "epoch": 2.0505704594511256, "grad_norm": 0.23319876194000244, "learning_rate": 2.163208127540618e-05, "loss": 0.4, "step": 9975 }, { "epoch": 2.050776030424504, "grad_norm": 0.22796432673931122, "learning_rate": 2.1623546016040378e-05, "loss": 0.4044, "step": 9976 }, { "epoch": 2.0509816013978828, "grad_norm": 0.2386104017496109, "learning_rate": 2.16150119083232e-05, "loss": 0.4046, "step": 9977 }, { "epoch": 2.0511871723712614, "grad_norm": 0.22699424624443054, "learning_rate": 2.160647895267509e-05, "loss": 0.3846, "step": 9978 }, { "epoch": 2.05139274334464, "grad_norm": 0.22776249051094055, "learning_rate": 2.1597947149516403e-05, "loss": 0.4042, "step": 9979 }, { "epoch": 2.051598314318018, "grad_norm": 0.22444364428520203, "learning_rate": 2.1589416499267495e-05, "loss": 0.4076, "step": 9980 }, { "epoch": 2.0518038852913967, "grad_norm": 0.21514415740966797, "learning_rate": 2.158088700234861e-05, "loss": 0.391, "step": 9981 }, { "epoch": 2.0520094562647753, "grad_norm": 0.12512782216072083, "learning_rate": 2.1572358659179968e-05, "loss": 0.4546, "step": 9982 }, { "epoch": 2.052215027238154, "grad_norm": 0.217271625995636, "learning_rate": 2.1563831470181714e-05, "loss": 0.392, "step": 9983 }, { "epoch": 2.0524205982115324, "grad_norm": 0.12956684827804565, "learning_rate": 2.155530543577394e-05, "loss": 0.4561, "step": 9984 }, { "epoch": 2.052626169184911, "grad_norm": 0.2247815728187561, "learning_rate": 2.1546780556376692e-05, "loss": 0.401, "step": 9985 }, { "epoch": 2.0528317401582896, "grad_norm": 0.22784893214702606, "learning_rate": 2.1538256832409923e-05, "loss": 0.3878, "step": 9986 }, { "epoch": 2.053037311131668, "grad_norm": 0.22039231657981873, "learning_rate": 2.1529734264293597e-05, "loss": 0.4089, "step": 9987 }, { "epoch": 2.053242882105047, "grad_norm": 0.22087042033672333, "learning_rate": 2.152121285244757e-05, "loss": 0.4153, "step": 9988 }, { "epoch": 2.0534484530784254, "grad_norm": 0.15735208988189697, "learning_rate": 2.1512692597291642e-05, "loss": 0.4635, "step": 9989 }, { "epoch": 2.053654024051804, "grad_norm": 0.22379711270332336, "learning_rate": 2.1504173499245572e-05, "loss": 0.4056, "step": 9990 }, { "epoch": 2.0538595950251826, "grad_norm": 0.22105872631072998, "learning_rate": 2.1495655558729053e-05, "loss": 0.407, "step": 9991 }, { "epoch": 2.054065165998561, "grad_norm": 0.2312091439962387, "learning_rate": 2.1487138776161708e-05, "loss": 0.3885, "step": 9992 }, { "epoch": 2.0542707369719397, "grad_norm": 0.22999829053878784, "learning_rate": 2.1478623151963156e-05, "loss": 0.3916, "step": 9993 }, { "epoch": 2.0544763079453183, "grad_norm": 0.2265433371067047, "learning_rate": 2.14701086865529e-05, "loss": 0.3997, "step": 9994 }, { "epoch": 2.0546818789186965, "grad_norm": 0.21633121371269226, "learning_rate": 2.1461595380350395e-05, "loss": 0.3746, "step": 9995 }, { "epoch": 2.054887449892075, "grad_norm": 0.22249945998191833, "learning_rate": 2.1453083233775083e-05, "loss": 0.3946, "step": 9996 }, { "epoch": 2.0550930208654536, "grad_norm": 0.22257232666015625, "learning_rate": 2.1444572247246306e-05, "loss": 0.4039, "step": 9997 }, { "epoch": 2.0552985918388322, "grad_norm": 0.1395193338394165, "learning_rate": 2.143606242118335e-05, "loss": 0.4434, "step": 9998 }, { "epoch": 2.055504162812211, "grad_norm": 0.22854886949062347, "learning_rate": 2.1427553756005467e-05, "loss": 0.409, "step": 9999 }, { "epoch": 2.0557097337855894, "grad_norm": 0.23623695969581604, "learning_rate": 2.1419046252131813e-05, "loss": 0.3945, "step": 10000 }, { "epoch": 2.055915304758968, "grad_norm": 0.22533413767814636, "learning_rate": 2.1410539909981554e-05, "loss": 0.4078, "step": 10001 }, { "epoch": 2.0561208757323466, "grad_norm": 0.21484293043613434, "learning_rate": 2.1402034729973735e-05, "loss": 0.3971, "step": 10002 }, { "epoch": 2.056326446705725, "grad_norm": 0.12295730412006378, "learning_rate": 2.1393530712527364e-05, "loss": 0.4583, "step": 10003 }, { "epoch": 2.0565320176791038, "grad_norm": 0.21692106127738953, "learning_rate": 2.1385027858061404e-05, "loss": 0.3951, "step": 10004 }, { "epoch": 2.0567375886524824, "grad_norm": 0.23760221898555756, "learning_rate": 2.137652616699474e-05, "loss": 0.4146, "step": 10005 }, { "epoch": 2.056943159625861, "grad_norm": 0.2326803058385849, "learning_rate": 2.1368025639746222e-05, "loss": 0.3751, "step": 10006 }, { "epoch": 2.0571487305992395, "grad_norm": 0.12141763418912888, "learning_rate": 2.13595262767346e-05, "loss": 0.4688, "step": 10007 }, { "epoch": 2.057354301572618, "grad_norm": 0.1330864131450653, "learning_rate": 2.135102807837865e-05, "loss": 0.463, "step": 10008 }, { "epoch": 2.0575598725459967, "grad_norm": 0.12697000801563263, "learning_rate": 2.1342531045097006e-05, "loss": 0.4498, "step": 10009 }, { "epoch": 2.057765443519375, "grad_norm": 0.12423637509346008, "learning_rate": 2.1334035177308284e-05, "loss": 0.4417, "step": 10010 }, { "epoch": 2.0579710144927534, "grad_norm": 0.23774953186511993, "learning_rate": 2.1325540475431032e-05, "loss": 0.4171, "step": 10011 }, { "epoch": 2.058176585466132, "grad_norm": 0.12215947359800339, "learning_rate": 2.131704693988375e-05, "loss": 0.4431, "step": 10012 }, { "epoch": 2.0583821564395106, "grad_norm": 0.22526676952838898, "learning_rate": 2.130855457108485e-05, "loss": 0.4086, "step": 10013 }, { "epoch": 2.058587727412889, "grad_norm": 0.2246025949716568, "learning_rate": 2.1300063369452754e-05, "loss": 0.3882, "step": 10014 }, { "epoch": 2.058793298386268, "grad_norm": 0.22365763783454895, "learning_rate": 2.1291573335405763e-05, "loss": 0.3854, "step": 10015 }, { "epoch": 2.0589988693596464, "grad_norm": 0.2273135632276535, "learning_rate": 2.1283084469362117e-05, "loss": 0.4483, "step": 10016 }, { "epoch": 2.059204440333025, "grad_norm": 0.2241649329662323, "learning_rate": 2.1274596771740074e-05, "loss": 0.4028, "step": 10017 }, { "epoch": 2.0594100113064036, "grad_norm": 0.1520613133907318, "learning_rate": 2.1266110242957747e-05, "loss": 0.4413, "step": 10018 }, { "epoch": 2.059615582279782, "grad_norm": 0.22100979089736938, "learning_rate": 2.125762488343324e-05, "loss": 0.4095, "step": 10019 }, { "epoch": 2.0598211532531607, "grad_norm": 0.22822456061840057, "learning_rate": 2.1249140693584583e-05, "loss": 0.4182, "step": 10020 }, { "epoch": 2.0600267242265393, "grad_norm": 0.22433196008205414, "learning_rate": 2.1240657673829736e-05, "loss": 0.3938, "step": 10021 }, { "epoch": 2.060232295199918, "grad_norm": 0.2217511087656021, "learning_rate": 2.1232175824586653e-05, "loss": 0.3991, "step": 10022 }, { "epoch": 2.0604378661732965, "grad_norm": 0.2158900797367096, "learning_rate": 2.1223695146273172e-05, "loss": 0.3928, "step": 10023 }, { "epoch": 2.060643437146675, "grad_norm": 0.22462232410907745, "learning_rate": 2.1215215639307106e-05, "loss": 0.4, "step": 10024 }, { "epoch": 2.0608490081200532, "grad_norm": 0.235184445977211, "learning_rate": 2.1206737304106196e-05, "loss": 0.4223, "step": 10025 }, { "epoch": 2.061054579093432, "grad_norm": 0.23646195232868195, "learning_rate": 2.1198260141088127e-05, "loss": 0.377, "step": 10026 }, { "epoch": 2.0612601500668104, "grad_norm": 0.23219510912895203, "learning_rate": 2.1189784150670534e-05, "loss": 0.4182, "step": 10027 }, { "epoch": 2.061465721040189, "grad_norm": 0.22460506856441498, "learning_rate": 2.1181309333270966e-05, "loss": 0.3969, "step": 10028 }, { "epoch": 2.0616712920135676, "grad_norm": 0.2338314950466156, "learning_rate": 2.1172835689306973e-05, "loss": 0.3975, "step": 10029 }, { "epoch": 2.061876862986946, "grad_norm": 0.22709804773330688, "learning_rate": 2.116436321919601e-05, "loss": 0.4034, "step": 10030 }, { "epoch": 2.0620824339603248, "grad_norm": 0.2227647751569748, "learning_rate": 2.115589192335545e-05, "loss": 0.3797, "step": 10031 }, { "epoch": 2.0622880049337033, "grad_norm": 0.2209719717502594, "learning_rate": 2.1147421802202655e-05, "loss": 0.3913, "step": 10032 }, { "epoch": 2.062493575907082, "grad_norm": 0.2267482727766037, "learning_rate": 2.1138952856154907e-05, "loss": 0.4176, "step": 10033 }, { "epoch": 2.0626991468804605, "grad_norm": 0.22682222723960876, "learning_rate": 2.1130485085629413e-05, "loss": 0.4015, "step": 10034 }, { "epoch": 2.062904717853839, "grad_norm": 0.23114748299121857, "learning_rate": 2.1122018491043344e-05, "loss": 0.3889, "step": 10035 }, { "epoch": 2.0631102888272177, "grad_norm": 0.22637394070625305, "learning_rate": 2.1113553072813834e-05, "loss": 0.4254, "step": 10036 }, { "epoch": 2.0633158598005963, "grad_norm": 0.2336263358592987, "learning_rate": 2.1105088831357904e-05, "loss": 0.4082, "step": 10037 }, { "epoch": 2.063521430773975, "grad_norm": 0.6490523815155029, "learning_rate": 2.1096625767092575e-05, "loss": 0.438, "step": 10038 }, { "epoch": 2.0637270017473535, "grad_norm": 0.22613218426704407, "learning_rate": 2.108816388043477e-05, "loss": 0.3998, "step": 10039 }, { "epoch": 2.0639325727207316, "grad_norm": 0.23520736396312714, "learning_rate": 2.1079703171801374e-05, "loss": 0.4229, "step": 10040 }, { "epoch": 2.06413814369411, "grad_norm": 0.22257588803768158, "learning_rate": 2.1071243641609196e-05, "loss": 0.3859, "step": 10041 }, { "epoch": 2.064343714667489, "grad_norm": 0.22676822543144226, "learning_rate": 2.106278529027498e-05, "loss": 0.3839, "step": 10042 }, { "epoch": 2.0645492856408674, "grad_norm": 0.22315295040607452, "learning_rate": 2.1054328118215475e-05, "loss": 0.3921, "step": 10043 }, { "epoch": 2.064754856614246, "grad_norm": 0.22379836440086365, "learning_rate": 2.1045872125847298e-05, "loss": 0.3746, "step": 10044 }, { "epoch": 2.0649604275876245, "grad_norm": 0.21513979136943817, "learning_rate": 2.103741731358704e-05, "loss": 0.393, "step": 10045 }, { "epoch": 2.065165998561003, "grad_norm": 0.24278521537780762, "learning_rate": 2.102896368185123e-05, "loss": 0.4002, "step": 10046 }, { "epoch": 2.0653715695343817, "grad_norm": 0.1328233927488327, "learning_rate": 2.1020511231056337e-05, "loss": 0.4638, "step": 10047 }, { "epoch": 2.0655771405077603, "grad_norm": 0.23675784468650818, "learning_rate": 2.101205996161876e-05, "loss": 0.3953, "step": 10048 }, { "epoch": 2.065782711481139, "grad_norm": 0.22523106634616852, "learning_rate": 2.1003609873954888e-05, "loss": 0.4019, "step": 10049 }, { "epoch": 2.0659882824545175, "grad_norm": 0.12683424353599548, "learning_rate": 2.0995160968480998e-05, "loss": 0.4565, "step": 10050 }, { "epoch": 2.066193853427896, "grad_norm": 0.22555489838123322, "learning_rate": 2.098671324561333e-05, "loss": 0.4062, "step": 10051 }, { "epoch": 2.0663994244012747, "grad_norm": 0.23419348895549774, "learning_rate": 2.0978266705768064e-05, "loss": 0.4253, "step": 10052 }, { "epoch": 2.0666049953746533, "grad_norm": 0.2320510447025299, "learning_rate": 2.0969821349361312e-05, "loss": 0.4052, "step": 10053 }, { "epoch": 2.066810566348032, "grad_norm": 0.2119479775428772, "learning_rate": 2.0961377176809152e-05, "loss": 0.3983, "step": 10054 }, { "epoch": 2.0670161373214104, "grad_norm": 0.21941865980625153, "learning_rate": 2.0952934188527566e-05, "loss": 0.3949, "step": 10055 }, { "epoch": 2.0672217082947886, "grad_norm": 0.1271030455827713, "learning_rate": 2.094449238493253e-05, "loss": 0.4436, "step": 10056 }, { "epoch": 2.067427279268167, "grad_norm": 0.22050043940544128, "learning_rate": 2.093605176643992e-05, "loss": 0.4041, "step": 10057 }, { "epoch": 2.0676328502415457, "grad_norm": 0.22902661561965942, "learning_rate": 2.0927612333465567e-05, "loss": 0.4003, "step": 10058 }, { "epoch": 2.0678384212149243, "grad_norm": 0.2170822024345398, "learning_rate": 2.091917408642522e-05, "loss": 0.391, "step": 10059 }, { "epoch": 2.068043992188303, "grad_norm": 0.2229936420917511, "learning_rate": 2.0910737025734634e-05, "loss": 0.403, "step": 10060 }, { "epoch": 2.0682495631616815, "grad_norm": 0.2259387969970703, "learning_rate": 2.090230115180944e-05, "loss": 0.3887, "step": 10061 }, { "epoch": 2.06845513413506, "grad_norm": 0.22917728126049042, "learning_rate": 2.0893866465065215e-05, "loss": 0.4047, "step": 10062 }, { "epoch": 2.0686607051084387, "grad_norm": 0.22916476428508759, "learning_rate": 2.088543296591754e-05, "loss": 0.3906, "step": 10063 }, { "epoch": 2.0688662760818173, "grad_norm": 0.22529999911785126, "learning_rate": 2.087700065478187e-05, "loss": 0.4009, "step": 10064 }, { "epoch": 2.069071847055196, "grad_norm": 0.22376291453838348, "learning_rate": 2.0868569532073623e-05, "loss": 0.4003, "step": 10065 }, { "epoch": 2.0692774180285745, "grad_norm": 0.21545644104480743, "learning_rate": 2.0860139598208166e-05, "loss": 0.4031, "step": 10066 }, { "epoch": 2.069482989001953, "grad_norm": 0.1322476714849472, "learning_rate": 2.0851710853600806e-05, "loss": 0.4664, "step": 10067 }, { "epoch": 2.0696885599753316, "grad_norm": 0.22991631925106049, "learning_rate": 2.0843283298666783e-05, "loss": 0.4024, "step": 10068 }, { "epoch": 2.06989413094871, "grad_norm": 0.22085146605968475, "learning_rate": 2.0834856933821267e-05, "loss": 0.3827, "step": 10069 }, { "epoch": 2.070099701922089, "grad_norm": 0.1257437914609909, "learning_rate": 2.0826431759479416e-05, "loss": 0.4524, "step": 10070 }, { "epoch": 2.070305272895467, "grad_norm": 0.1249329000711441, "learning_rate": 2.081800777605628e-05, "loss": 0.4446, "step": 10071 }, { "epoch": 2.0705108438688455, "grad_norm": 0.12916463613510132, "learning_rate": 2.0809584983966886e-05, "loss": 0.4477, "step": 10072 }, { "epoch": 2.070716414842224, "grad_norm": 0.22638201713562012, "learning_rate": 2.080116338362617e-05, "loss": 0.3862, "step": 10073 }, { "epoch": 2.0709219858156027, "grad_norm": 0.21907664835453033, "learning_rate": 2.0792742975449027e-05, "loss": 0.3962, "step": 10074 }, { "epoch": 2.0711275567889813, "grad_norm": 0.12063062191009521, "learning_rate": 2.0784323759850295e-05, "loss": 0.4442, "step": 10075 }, { "epoch": 2.07133312776236, "grad_norm": 0.22785618901252747, "learning_rate": 2.0775905737244727e-05, "loss": 0.4005, "step": 10076 }, { "epoch": 2.0715386987357385, "grad_norm": 0.2289772778749466, "learning_rate": 2.076748890804708e-05, "loss": 0.4268, "step": 10077 }, { "epoch": 2.071744269709117, "grad_norm": 0.22283616662025452, "learning_rate": 2.0759073272671997e-05, "loss": 0.4003, "step": 10078 }, { "epoch": 2.0719498406824957, "grad_norm": 0.23021160066127777, "learning_rate": 2.0750658831534067e-05, "loss": 0.3948, "step": 10079 }, { "epoch": 2.0721554116558742, "grad_norm": 0.22141693532466888, "learning_rate": 2.0742245585047817e-05, "loss": 0.4089, "step": 10080 }, { "epoch": 2.072360982629253, "grad_norm": 0.2241126000881195, "learning_rate": 2.0733833533627767e-05, "loss": 0.3935, "step": 10081 }, { "epoch": 2.0725665536026314, "grad_norm": 0.1273168921470642, "learning_rate": 2.0725422677688313e-05, "loss": 0.4539, "step": 10082 }, { "epoch": 2.07277212457601, "grad_norm": 0.2204464226961136, "learning_rate": 2.0717013017643815e-05, "loss": 0.4002, "step": 10083 }, { "epoch": 2.0729776955493886, "grad_norm": 0.22708940505981445, "learning_rate": 2.0708604553908598e-05, "loss": 0.4088, "step": 10084 }, { "epoch": 2.073183266522767, "grad_norm": 0.23681271076202393, "learning_rate": 2.07001972868969e-05, "loss": 0.4163, "step": 10085 }, { "epoch": 2.0733888374961453, "grad_norm": 0.22358982264995575, "learning_rate": 2.0691791217022905e-05, "loss": 0.4071, "step": 10086 }, { "epoch": 2.073594408469524, "grad_norm": 0.2268630564212799, "learning_rate": 2.068338634470074e-05, "loss": 0.4045, "step": 10087 }, { "epoch": 2.0737999794429025, "grad_norm": 0.22552597522735596, "learning_rate": 2.0674982670344475e-05, "loss": 0.4144, "step": 10088 }, { "epoch": 2.074005550416281, "grad_norm": 0.22645661234855652, "learning_rate": 2.0666580194368117e-05, "loss": 0.4017, "step": 10089 }, { "epoch": 2.0742111213896597, "grad_norm": 0.2267918735742569, "learning_rate": 2.0658178917185603e-05, "loss": 0.3807, "step": 10090 }, { "epoch": 2.0744166923630383, "grad_norm": 0.2314879298210144, "learning_rate": 2.0649778839210855e-05, "loss": 0.405, "step": 10091 }, { "epoch": 2.074622263336417, "grad_norm": 0.22707362473011017, "learning_rate": 2.0641379960857693e-05, "loss": 0.4071, "step": 10092 }, { "epoch": 2.0748278343097954, "grad_norm": 0.22233855724334717, "learning_rate": 2.0632982282539892e-05, "loss": 0.409, "step": 10093 }, { "epoch": 2.075033405283174, "grad_norm": 0.2284967601299286, "learning_rate": 2.0624585804671157e-05, "loss": 0.3873, "step": 10094 }, { "epoch": 2.0752389762565526, "grad_norm": 0.22250832617282867, "learning_rate": 2.0616190527665155e-05, "loss": 0.4054, "step": 10095 }, { "epoch": 2.075444547229931, "grad_norm": 0.2331288605928421, "learning_rate": 2.0607796451935468e-05, "loss": 0.3975, "step": 10096 }, { "epoch": 2.07565011820331, "grad_norm": 0.2304941862821579, "learning_rate": 2.059940357789563e-05, "loss": 0.3924, "step": 10097 }, { "epoch": 2.0758556891766884, "grad_norm": 0.2210913896560669, "learning_rate": 2.0591011905959142e-05, "loss": 0.383, "step": 10098 }, { "epoch": 2.076061260150067, "grad_norm": 0.22776024043560028, "learning_rate": 2.0582621436539415e-05, "loss": 0.4058, "step": 10099 }, { "epoch": 2.0762668311234456, "grad_norm": 0.21400035917758942, "learning_rate": 2.0574232170049804e-05, "loss": 0.3827, "step": 10100 }, { "epoch": 2.076472402096824, "grad_norm": 0.2280118465423584, "learning_rate": 2.0565844106903584e-05, "loss": 0.4127, "step": 10101 }, { "epoch": 2.0766779730702023, "grad_norm": 0.2156902402639389, "learning_rate": 2.0557457247514045e-05, "loss": 0.4023, "step": 10102 }, { "epoch": 2.076883544043581, "grad_norm": 0.22840487957000732, "learning_rate": 2.0549071592294338e-05, "loss": 0.411, "step": 10103 }, { "epoch": 2.0770891150169595, "grad_norm": 0.22176077961921692, "learning_rate": 2.0540687141657576e-05, "loss": 0.3836, "step": 10104 }, { "epoch": 2.077294685990338, "grad_norm": 0.2274215966463089, "learning_rate": 2.053230389601685e-05, "loss": 0.4141, "step": 10105 }, { "epoch": 2.0775002569637167, "grad_norm": 0.2207675725221634, "learning_rate": 2.052392185578515e-05, "loss": 0.3992, "step": 10106 }, { "epoch": 2.0777058279370952, "grad_norm": 0.23283138871192932, "learning_rate": 2.051554102137542e-05, "loss": 0.3971, "step": 10107 }, { "epoch": 2.077911398910474, "grad_norm": 0.24435223639011383, "learning_rate": 2.0507161393200547e-05, "loss": 0.3989, "step": 10108 }, { "epoch": 2.0781169698838524, "grad_norm": 0.22710062563419342, "learning_rate": 2.0498782971673353e-05, "loss": 0.3999, "step": 10109 }, { "epoch": 2.078322540857231, "grad_norm": 0.22904515266418457, "learning_rate": 2.0490405757206597e-05, "loss": 0.3923, "step": 10110 }, { "epoch": 2.0785281118306096, "grad_norm": 0.13042299449443817, "learning_rate": 2.0482029750212982e-05, "loss": 0.4425, "step": 10111 }, { "epoch": 2.078733682803988, "grad_norm": 0.13699179887771606, "learning_rate": 2.0473654951105176e-05, "loss": 0.4472, "step": 10112 }, { "epoch": 2.0789392537773668, "grad_norm": 0.228811115026474, "learning_rate": 2.046528136029576e-05, "loss": 0.4027, "step": 10113 }, { "epoch": 2.0791448247507454, "grad_norm": 0.21991683542728424, "learning_rate": 2.0456908978197252e-05, "loss": 0.3894, "step": 10114 }, { "epoch": 2.079350395724124, "grad_norm": 0.2570091485977173, "learning_rate": 2.0448537805222124e-05, "loss": 0.3982, "step": 10115 }, { "epoch": 2.0795559666975025, "grad_norm": 0.22256134450435638, "learning_rate": 2.0440167841782787e-05, "loss": 0.387, "step": 10116 }, { "epoch": 2.0797615376708807, "grad_norm": 0.22056585550308228, "learning_rate": 2.0431799088291588e-05, "loss": 0.3988, "step": 10117 }, { "epoch": 2.0799671086442593, "grad_norm": 0.21491390466690063, "learning_rate": 2.04234315451608e-05, "loss": 0.4163, "step": 10118 }, { "epoch": 2.080172679617638, "grad_norm": 0.21639686822891235, "learning_rate": 2.0415065212802687e-05, "loss": 0.3965, "step": 10119 }, { "epoch": 2.0803782505910164, "grad_norm": 0.2295675426721573, "learning_rate": 2.04067000916294e-05, "loss": 0.3914, "step": 10120 }, { "epoch": 2.080583821564395, "grad_norm": 0.12334294617176056, "learning_rate": 2.039833618205305e-05, "loss": 0.4585, "step": 10121 }, { "epoch": 2.0807893925377736, "grad_norm": 0.221688911318779, "learning_rate": 2.0389973484485674e-05, "loss": 0.3932, "step": 10122 }, { "epoch": 2.080994963511152, "grad_norm": 0.22646862268447876, "learning_rate": 2.0381611999339288e-05, "loss": 0.3961, "step": 10123 }, { "epoch": 2.081200534484531, "grad_norm": 0.12576346099376678, "learning_rate": 2.037325172702582e-05, "loss": 0.4689, "step": 10124 }, { "epoch": 2.0814061054579094, "grad_norm": 0.2239895462989807, "learning_rate": 2.0364892667957114e-05, "loss": 0.3882, "step": 10125 }, { "epoch": 2.081611676431288, "grad_norm": 0.2263174057006836, "learning_rate": 2.035653482254502e-05, "loss": 0.4017, "step": 10126 }, { "epoch": 2.0818172474046666, "grad_norm": 0.22486351430416107, "learning_rate": 2.034817819120127e-05, "loss": 0.3867, "step": 10127 }, { "epoch": 2.082022818378045, "grad_norm": 0.12152829766273499, "learning_rate": 2.0339822774337562e-05, "loss": 0.467, "step": 10128 }, { "epoch": 2.0822283893514237, "grad_norm": 0.2354230135679245, "learning_rate": 2.0331468572365525e-05, "loss": 0.4021, "step": 10129 }, { "epoch": 2.0824339603248023, "grad_norm": 0.23212236166000366, "learning_rate": 2.0323115585696726e-05, "loss": 0.3827, "step": 10130 }, { "epoch": 2.082639531298181, "grad_norm": 0.2421758621931076, "learning_rate": 2.031476381474267e-05, "loss": 0.3984, "step": 10131 }, { "epoch": 2.082845102271559, "grad_norm": 0.21930502355098724, "learning_rate": 2.0306413259914836e-05, "loss": 0.3948, "step": 10132 }, { "epoch": 2.0830506732449376, "grad_norm": 0.2258896678686142, "learning_rate": 2.0298063921624603e-05, "loss": 0.3935, "step": 10133 }, { "epoch": 2.0832562442183162, "grad_norm": 0.2229209989309311, "learning_rate": 2.0289715800283306e-05, "loss": 0.3999, "step": 10134 }, { "epoch": 2.083461815191695, "grad_norm": 0.22103843092918396, "learning_rate": 2.0281368896302212e-05, "loss": 0.3988, "step": 10135 }, { "epoch": 2.0836673861650734, "grad_norm": 0.22075578570365906, "learning_rate": 2.0273023210092543e-05, "loss": 0.394, "step": 10136 }, { "epoch": 2.083872957138452, "grad_norm": 0.22386351227760315, "learning_rate": 2.026467874206545e-05, "loss": 0.3929, "step": 10137 }, { "epoch": 2.0840785281118306, "grad_norm": 0.22971957921981812, "learning_rate": 2.0256335492631997e-05, "loss": 0.402, "step": 10138 }, { "epoch": 2.084284099085209, "grad_norm": 0.2303125262260437, "learning_rate": 2.024799346220326e-05, "loss": 0.3955, "step": 10139 }, { "epoch": 2.0844896700585878, "grad_norm": 0.23009240627288818, "learning_rate": 2.0239652651190203e-05, "loss": 0.3969, "step": 10140 }, { "epoch": 2.0846952410319664, "grad_norm": 0.22021248936653137, "learning_rate": 2.0231313060003725e-05, "loss": 0.4248, "step": 10141 }, { "epoch": 2.084900812005345, "grad_norm": 0.2278214991092682, "learning_rate": 2.0222974689054684e-05, "loss": 0.4051, "step": 10142 }, { "epoch": 2.0851063829787235, "grad_norm": 0.2289620041847229, "learning_rate": 2.0214637538753872e-05, "loss": 0.3883, "step": 10143 }, { "epoch": 2.085311953952102, "grad_norm": 0.21833720803260803, "learning_rate": 2.0206301609512006e-05, "loss": 0.3837, "step": 10144 }, { "epoch": 2.0855175249254807, "grad_norm": 0.225063756108284, "learning_rate": 2.0197966901739792e-05, "loss": 0.4063, "step": 10145 }, { "epoch": 2.0857230958988593, "grad_norm": 0.22253869473934174, "learning_rate": 2.0189633415847808e-05, "loss": 0.3882, "step": 10146 }, { "epoch": 2.0859286668722374, "grad_norm": 0.22410309314727783, "learning_rate": 2.0181301152246636e-05, "loss": 0.4163, "step": 10147 }, { "epoch": 2.086134237845616, "grad_norm": 0.2274530529975891, "learning_rate": 2.0172970111346756e-05, "loss": 0.405, "step": 10148 }, { "epoch": 2.0863398088189946, "grad_norm": 0.12682540714740753, "learning_rate": 2.01646402935586e-05, "loss": 0.4591, "step": 10149 }, { "epoch": 2.086545379792373, "grad_norm": 0.22507302463054657, "learning_rate": 2.015631169929253e-05, "loss": 0.4096, "step": 10150 }, { "epoch": 2.086750950765752, "grad_norm": 0.2234722524881363, "learning_rate": 2.014798432895887e-05, "loss": 0.3815, "step": 10151 }, { "epoch": 2.0869565217391304, "grad_norm": 0.231471449136734, "learning_rate": 2.0139658182967842e-05, "loss": 0.4016, "step": 10152 }, { "epoch": 2.087162092712509, "grad_norm": 0.2210719883441925, "learning_rate": 2.0131333261729683e-05, "loss": 0.3896, "step": 10153 }, { "epoch": 2.0873676636858876, "grad_norm": 0.22725726664066315, "learning_rate": 2.012300956565449e-05, "loss": 0.3893, "step": 10154 }, { "epoch": 2.087573234659266, "grad_norm": 0.21838733553886414, "learning_rate": 2.011468709515234e-05, "loss": 0.3981, "step": 10155 }, { "epoch": 2.0877788056326447, "grad_norm": 0.22779439389705658, "learning_rate": 2.010636585063325e-05, "loss": 0.4055, "step": 10156 }, { "epoch": 2.0879843766060233, "grad_norm": 0.2215360850095749, "learning_rate": 2.009804583250716e-05, "loss": 0.3861, "step": 10157 }, { "epoch": 2.088189947579402, "grad_norm": 0.22047077119350433, "learning_rate": 2.008972704118396e-05, "loss": 0.3813, "step": 10158 }, { "epoch": 2.0883955185527805, "grad_norm": 0.22012098133563995, "learning_rate": 2.008140947707346e-05, "loss": 0.4157, "step": 10159 }, { "epoch": 2.088601089526159, "grad_norm": 0.22172100841999054, "learning_rate": 2.0073093140585463e-05, "loss": 0.4031, "step": 10160 }, { "epoch": 2.0888066604995377, "grad_norm": 0.2272823601961136, "learning_rate": 2.0064778032129662e-05, "loss": 0.4071, "step": 10161 }, { "epoch": 2.089012231472916, "grad_norm": 0.22149771451950073, "learning_rate": 2.0056464152115694e-05, "loss": 0.3809, "step": 10162 }, { "epoch": 2.0892178024462944, "grad_norm": 0.1271590292453766, "learning_rate": 2.004815150095316e-05, "loss": 0.4552, "step": 10163 }, { "epoch": 2.089423373419673, "grad_norm": 0.21896865963935852, "learning_rate": 2.003984007905157e-05, "loss": 0.3918, "step": 10164 }, { "epoch": 2.0896289443930516, "grad_norm": 0.1302296221256256, "learning_rate": 2.003152988682038e-05, "loss": 0.4527, "step": 10165 }, { "epoch": 2.08983451536643, "grad_norm": 0.2259882539510727, "learning_rate": 2.002322092466903e-05, "loss": 0.3874, "step": 10166 }, { "epoch": 2.0900400863398088, "grad_norm": 0.22086426615715027, "learning_rate": 2.001491319300684e-05, "loss": 0.3821, "step": 10167 }, { "epoch": 2.0902456573131873, "grad_norm": 0.1255428045988083, "learning_rate": 2.0006606692243083e-05, "loss": 0.4736, "step": 10168 }, { "epoch": 2.090451228286566, "grad_norm": 0.21999509632587433, "learning_rate": 1.9998301422787013e-05, "loss": 0.3945, "step": 10169 }, { "epoch": 2.0906567992599445, "grad_norm": 0.22573313117027283, "learning_rate": 1.9989997385047776e-05, "loss": 0.4072, "step": 10170 }, { "epoch": 2.090862370233323, "grad_norm": 0.13236026465892792, "learning_rate": 1.9981694579434462e-05, "loss": 0.4539, "step": 10171 }, { "epoch": 2.0910679412067017, "grad_norm": 0.22156447172164917, "learning_rate": 1.997339300635613e-05, "loss": 0.3903, "step": 10172 }, { "epoch": 2.0912735121800803, "grad_norm": 0.22315345704555511, "learning_rate": 1.996509266622173e-05, "loss": 0.4011, "step": 10173 }, { "epoch": 2.091479083153459, "grad_norm": 0.210943341255188, "learning_rate": 1.9956793559440223e-05, "loss": 0.4072, "step": 10174 }, { "epoch": 2.0916846541268375, "grad_norm": 0.21411919593811035, "learning_rate": 1.994849568642044e-05, "loss": 0.3907, "step": 10175 }, { "epoch": 2.091890225100216, "grad_norm": 0.21381069719791412, "learning_rate": 1.9940199047571183e-05, "loss": 0.3825, "step": 10176 }, { "epoch": 2.092095796073594, "grad_norm": 0.22450023889541626, "learning_rate": 1.9931903643301194e-05, "loss": 0.4092, "step": 10177 }, { "epoch": 2.092301367046973, "grad_norm": 0.22319452464580536, "learning_rate": 1.9923609474019144e-05, "loss": 0.3992, "step": 10178 }, { "epoch": 2.0925069380203514, "grad_norm": 0.22775287926197052, "learning_rate": 1.9915316540133648e-05, "loss": 0.4082, "step": 10179 }, { "epoch": 2.09271250899373, "grad_norm": 0.22660957276821136, "learning_rate": 1.990702484205324e-05, "loss": 0.4158, "step": 10180 }, { "epoch": 2.0929180799671085, "grad_norm": 0.21837587654590607, "learning_rate": 1.9898734380186455e-05, "loss": 0.4005, "step": 10181 }, { "epoch": 2.093123650940487, "grad_norm": 0.22248844802379608, "learning_rate": 1.98904451549417e-05, "loss": 0.4156, "step": 10182 }, { "epoch": 2.0933292219138657, "grad_norm": 0.2321203351020813, "learning_rate": 1.988215716672736e-05, "loss": 0.3997, "step": 10183 }, { "epoch": 2.0935347928872443, "grad_norm": 0.22257383167743683, "learning_rate": 1.9873870415951728e-05, "loss": 0.4017, "step": 10184 }, { "epoch": 2.093740363860623, "grad_norm": 0.22243481874465942, "learning_rate": 1.986558490302306e-05, "loss": 0.3935, "step": 10185 }, { "epoch": 2.0939459348340015, "grad_norm": 0.2248910516500473, "learning_rate": 1.9857300628349532e-05, "loss": 0.3968, "step": 10186 }, { "epoch": 2.09415150580738, "grad_norm": 0.22491000592708588, "learning_rate": 1.98490175923393e-05, "loss": 0.3925, "step": 10187 }, { "epoch": 2.0943570767807587, "grad_norm": 0.22509317100048065, "learning_rate": 1.9840735795400418e-05, "loss": 0.4006, "step": 10188 }, { "epoch": 2.0945626477541373, "grad_norm": 0.2187567800283432, "learning_rate": 1.9832455237940873e-05, "loss": 0.4097, "step": 10189 }, { "epoch": 2.094768218727516, "grad_norm": 0.12614451348781586, "learning_rate": 1.9824175920368644e-05, "loss": 0.4585, "step": 10190 }, { "epoch": 2.0949737897008944, "grad_norm": 0.2270839512348175, "learning_rate": 1.981589784309159e-05, "loss": 0.4005, "step": 10191 }, { "epoch": 2.0951793606742726, "grad_norm": 0.22762618958950043, "learning_rate": 1.9807621006517543e-05, "loss": 0.386, "step": 10192 }, { "epoch": 2.095384931647651, "grad_norm": 0.23058810830116272, "learning_rate": 1.9799345411054263e-05, "loss": 0.3889, "step": 10193 }, { "epoch": 2.0955905026210297, "grad_norm": 0.22418002784252167, "learning_rate": 1.9791071057109426e-05, "loss": 0.3864, "step": 10194 }, { "epoch": 2.0957960735944083, "grad_norm": 0.23092950880527496, "learning_rate": 1.9782797945090707e-05, "loss": 0.4238, "step": 10195 }, { "epoch": 2.096001644567787, "grad_norm": 0.2287166863679886, "learning_rate": 1.977452607540567e-05, "loss": 0.3985, "step": 10196 }, { "epoch": 2.0962072155411655, "grad_norm": 0.22596527636051178, "learning_rate": 1.9766255448461836e-05, "loss": 0.4052, "step": 10197 }, { "epoch": 2.096412786514544, "grad_norm": 0.1205587163567543, "learning_rate": 1.9757986064666647e-05, "loss": 0.4629, "step": 10198 }, { "epoch": 2.0966183574879227, "grad_norm": 0.2247573435306549, "learning_rate": 1.9749717924427508e-05, "loss": 0.389, "step": 10199 }, { "epoch": 2.0968239284613013, "grad_norm": 0.12539906799793243, "learning_rate": 1.9741451028151723e-05, "loss": 0.4471, "step": 10200 }, { "epoch": 2.09702949943468, "grad_norm": 0.22345465421676636, "learning_rate": 1.9733185376246612e-05, "loss": 0.3977, "step": 10201 }, { "epoch": 2.0972350704080585, "grad_norm": 0.21945199370384216, "learning_rate": 1.9724920969119356e-05, "loss": 0.3732, "step": 10202 }, { "epoch": 2.097440641381437, "grad_norm": 0.2249259501695633, "learning_rate": 1.9716657807177112e-05, "loss": 0.3822, "step": 10203 }, { "epoch": 2.0976462123548156, "grad_norm": 0.22166243195533752, "learning_rate": 1.9708395890826962e-05, "loss": 0.3932, "step": 10204 }, { "epoch": 2.097851783328194, "grad_norm": 0.22853392362594604, "learning_rate": 1.9700135220475934e-05, "loss": 0.4078, "step": 10205 }, { "epoch": 2.098057354301573, "grad_norm": 0.22204367816448212, "learning_rate": 1.969187579653099e-05, "loss": 0.3897, "step": 10206 }, { "epoch": 2.098262925274951, "grad_norm": 0.21821551024913788, "learning_rate": 1.968361761939902e-05, "loss": 0.4099, "step": 10207 }, { "epoch": 2.0984684962483295, "grad_norm": 0.21198779344558716, "learning_rate": 1.96753606894869e-05, "loss": 0.4046, "step": 10208 }, { "epoch": 2.098674067221708, "grad_norm": 0.22482189536094666, "learning_rate": 1.966710500720139e-05, "loss": 0.4052, "step": 10209 }, { "epoch": 2.0988796381950867, "grad_norm": 0.22468796372413635, "learning_rate": 1.9658850572949195e-05, "loss": 0.3828, "step": 10210 }, { "epoch": 2.0990852091684653, "grad_norm": 0.1260218471288681, "learning_rate": 1.9650597387137008e-05, "loss": 0.4485, "step": 10211 }, { "epoch": 2.099290780141844, "grad_norm": 0.23379628360271454, "learning_rate": 1.96423454501714e-05, "loss": 0.4066, "step": 10212 }, { "epoch": 2.0994963511152225, "grad_norm": 0.22664855420589447, "learning_rate": 1.9634094762458916e-05, "loss": 0.4069, "step": 10213 }, { "epoch": 2.099701922088601, "grad_norm": 0.23156146705150604, "learning_rate": 1.9625845324406e-05, "loss": 0.4082, "step": 10214 }, { "epoch": 2.0999074930619797, "grad_norm": 0.12826383113861084, "learning_rate": 1.9617597136419107e-05, "loss": 0.4626, "step": 10215 }, { "epoch": 2.1001130640353582, "grad_norm": 0.2237342894077301, "learning_rate": 1.960935019890456e-05, "loss": 0.4013, "step": 10216 }, { "epoch": 2.100318635008737, "grad_norm": 0.1258496642112732, "learning_rate": 1.960110451226866e-05, "loss": 0.4512, "step": 10217 }, { "epoch": 2.1005242059821154, "grad_norm": 0.23888415098190308, "learning_rate": 1.9592860076917626e-05, "loss": 0.4139, "step": 10218 }, { "epoch": 2.100729776955494, "grad_norm": 0.12545999884605408, "learning_rate": 1.9584616893257618e-05, "loss": 0.4433, "step": 10219 }, { "epoch": 2.1009353479288726, "grad_norm": 0.22233633697032928, "learning_rate": 1.9576374961694747e-05, "loss": 0.4026, "step": 10220 }, { "epoch": 2.101140918902251, "grad_norm": 0.218837171792984, "learning_rate": 1.956813428263504e-05, "loss": 0.3964, "step": 10221 }, { "epoch": 2.1013464898756298, "grad_norm": 0.22407136857509613, "learning_rate": 1.9559894856484503e-05, "loss": 0.3996, "step": 10222 }, { "epoch": 2.101552060849008, "grad_norm": 0.22463653981685638, "learning_rate": 1.9551656683649034e-05, "loss": 0.3896, "step": 10223 }, { "epoch": 2.1017576318223865, "grad_norm": 0.22171586751937866, "learning_rate": 1.95434197645345e-05, "loss": 0.3992, "step": 10224 }, { "epoch": 2.101963202795765, "grad_norm": 0.2200179100036621, "learning_rate": 1.9535184099546695e-05, "loss": 0.4082, "step": 10225 }, { "epoch": 2.1021687737691437, "grad_norm": 0.11994064599275589, "learning_rate": 1.952694968909134e-05, "loss": 0.4613, "step": 10226 }, { "epoch": 2.1023743447425223, "grad_norm": 0.23581825196743011, "learning_rate": 1.9518716533574114e-05, "loss": 0.4014, "step": 10227 }, { "epoch": 2.102579915715901, "grad_norm": 0.2292327582836151, "learning_rate": 1.9510484633400608e-05, "loss": 0.3876, "step": 10228 }, { "epoch": 2.1027854866892794, "grad_norm": 0.22429661452770233, "learning_rate": 1.9502253988976407e-05, "loss": 0.3974, "step": 10229 }, { "epoch": 2.102991057662658, "grad_norm": 0.1279507577419281, "learning_rate": 1.9494024600706973e-05, "loss": 0.458, "step": 10230 }, { "epoch": 2.1031966286360366, "grad_norm": 0.14293161034584045, "learning_rate": 1.9485796468997733e-05, "loss": 0.4781, "step": 10231 }, { "epoch": 2.103402199609415, "grad_norm": 2.012324571609497, "learning_rate": 1.947756959425403e-05, "loss": 0.417, "step": 10232 }, { "epoch": 2.103607770582794, "grad_norm": 0.2304982990026474, "learning_rate": 1.94693439768812e-05, "loss": 0.4055, "step": 10233 }, { "epoch": 2.1038133415561724, "grad_norm": 0.23225271701812744, "learning_rate": 1.946111961728446e-05, "loss": 0.4127, "step": 10234 }, { "epoch": 2.104018912529551, "grad_norm": 0.2252538651227951, "learning_rate": 1.9452896515868974e-05, "loss": 0.3986, "step": 10235 }, { "epoch": 2.1042244835029296, "grad_norm": 0.2263312041759491, "learning_rate": 1.9444674673039884e-05, "loss": 0.3912, "step": 10236 }, { "epoch": 2.104430054476308, "grad_norm": 0.13533739745616913, "learning_rate": 1.9436454089202226e-05, "loss": 0.4608, "step": 10237 }, { "epoch": 2.1046356254496863, "grad_norm": 0.22458425164222717, "learning_rate": 1.9428234764760997e-05, "loss": 0.4091, "step": 10238 }, { "epoch": 2.104841196423065, "grad_norm": 0.23281507194042206, "learning_rate": 1.9420016700121114e-05, "loss": 0.4005, "step": 10239 }, { "epoch": 2.1050467673964435, "grad_norm": 0.13586680591106415, "learning_rate": 1.941179989568745e-05, "loss": 0.4477, "step": 10240 }, { "epoch": 2.105252338369822, "grad_norm": 0.23734326660633087, "learning_rate": 1.9403584351864806e-05, "loss": 0.4007, "step": 10241 }, { "epoch": 2.1054579093432007, "grad_norm": 0.14735311269760132, "learning_rate": 1.9395370069057907e-05, "loss": 0.437, "step": 10242 }, { "epoch": 2.1056634803165792, "grad_norm": 0.22844909131526947, "learning_rate": 1.9387157047671467e-05, "loss": 0.3974, "step": 10243 }, { "epoch": 2.105869051289958, "grad_norm": 0.23056820034980774, "learning_rate": 1.9378945288110086e-05, "loss": 0.4101, "step": 10244 }, { "epoch": 2.1060746222633364, "grad_norm": 0.3185328543186188, "learning_rate": 1.937073479077831e-05, "loss": 0.4083, "step": 10245 }, { "epoch": 2.106280193236715, "grad_norm": 0.22201012074947357, "learning_rate": 1.9362525556080648e-05, "loss": 0.3922, "step": 10246 }, { "epoch": 2.1064857642100936, "grad_norm": 0.2239248901605606, "learning_rate": 1.935431758442152e-05, "loss": 0.3834, "step": 10247 }, { "epoch": 2.106691335183472, "grad_norm": 0.23866835236549377, "learning_rate": 1.93461108762053e-05, "loss": 0.4183, "step": 10248 }, { "epoch": 2.1068969061568508, "grad_norm": 0.22566145658493042, "learning_rate": 1.933790543183627e-05, "loss": 0.3999, "step": 10249 }, { "epoch": 2.1071024771302294, "grad_norm": 0.12605851888656616, "learning_rate": 1.9329701251718715e-05, "loss": 0.4435, "step": 10250 }, { "epoch": 2.107308048103608, "grad_norm": 0.2505359947681427, "learning_rate": 1.9321498336256792e-05, "loss": 0.3997, "step": 10251 }, { "epoch": 2.1075136190769865, "grad_norm": 0.22146162390708923, "learning_rate": 1.9313296685854628e-05, "loss": 0.3939, "step": 10252 }, { "epoch": 2.1077191900503647, "grad_norm": 0.23050841689109802, "learning_rate": 1.9305096300916266e-05, "loss": 0.4322, "step": 10253 }, { "epoch": 2.1079247610237433, "grad_norm": 0.12850402295589447, "learning_rate": 1.929689718184572e-05, "loss": 0.4712, "step": 10254 }, { "epoch": 2.108130331997122, "grad_norm": 0.22386091947555542, "learning_rate": 1.9288699329046917e-05, "loss": 0.3985, "step": 10255 }, { "epoch": 2.1083359029705004, "grad_norm": 0.21962010860443115, "learning_rate": 1.9280502742923706e-05, "loss": 0.3824, "step": 10256 }, { "epoch": 2.108541473943879, "grad_norm": 0.2260153442621231, "learning_rate": 1.927230742387993e-05, "loss": 0.3941, "step": 10257 }, { "epoch": 2.1087470449172576, "grad_norm": 0.22931505739688873, "learning_rate": 1.926411337231932e-05, "loss": 0.3826, "step": 10258 }, { "epoch": 2.108952615890636, "grad_norm": 0.22665323317050934, "learning_rate": 1.9255920588645544e-05, "loss": 0.3905, "step": 10259 }, { "epoch": 2.109158186864015, "grad_norm": 0.12365376204252243, "learning_rate": 1.924772907326224e-05, "loss": 0.4274, "step": 10260 }, { "epoch": 2.1093637578373934, "grad_norm": 0.12186730653047562, "learning_rate": 1.923953882657296e-05, "loss": 0.4518, "step": 10261 }, { "epoch": 2.109569328810772, "grad_norm": 0.2289813756942749, "learning_rate": 1.9231349848981198e-05, "loss": 0.4068, "step": 10262 }, { "epoch": 2.1097748997841506, "grad_norm": 0.13003799319267273, "learning_rate": 1.922316214089037e-05, "loss": 0.4646, "step": 10263 }, { "epoch": 2.109980470757529, "grad_norm": 0.22192876040935516, "learning_rate": 1.921497570270388e-05, "loss": 0.3899, "step": 10264 }, { "epoch": 2.1101860417309077, "grad_norm": 0.24169708788394928, "learning_rate": 1.9206790534825012e-05, "loss": 0.3991, "step": 10265 }, { "epoch": 2.1103916127042863, "grad_norm": 0.12277937680482864, "learning_rate": 1.919860663765702e-05, "loss": 0.4448, "step": 10266 }, { "epoch": 2.110597183677665, "grad_norm": 0.22904759645462036, "learning_rate": 1.919042401160309e-05, "loss": 0.3916, "step": 10267 }, { "epoch": 2.1108027546510435, "grad_norm": 0.22709167003631592, "learning_rate": 1.9182242657066326e-05, "loss": 0.3872, "step": 10268 }, { "epoch": 2.1110083256244216, "grad_norm": 0.229196235537529, "learning_rate": 1.9174062574449796e-05, "loss": 0.4137, "step": 10269 }, { "epoch": 2.1112138965978002, "grad_norm": 0.2226954847574234, "learning_rate": 1.916588376415648e-05, "loss": 0.3845, "step": 10270 }, { "epoch": 2.111419467571179, "grad_norm": 0.21747919917106628, "learning_rate": 1.915770622658934e-05, "loss": 0.404, "step": 10271 }, { "epoch": 2.1116250385445574, "grad_norm": 0.22931161522865295, "learning_rate": 1.9149529962151223e-05, "loss": 0.4024, "step": 10272 }, { "epoch": 2.111830609517936, "grad_norm": 0.23304495215415955, "learning_rate": 1.9141354971244945e-05, "loss": 0.3922, "step": 10273 }, { "epoch": 2.1120361804913146, "grad_norm": 0.2212096005678177, "learning_rate": 1.9133181254273226e-05, "loss": 0.4006, "step": 10274 }, { "epoch": 2.112241751464693, "grad_norm": 0.13250161707401276, "learning_rate": 1.912500881163878e-05, "loss": 0.4599, "step": 10275 }, { "epoch": 2.1124473224380718, "grad_norm": 0.22626225650310516, "learning_rate": 1.911683764374421e-05, "loss": 0.4085, "step": 10276 }, { "epoch": 2.1126528934114503, "grad_norm": 0.12602867186069489, "learning_rate": 1.9108667750992057e-05, "loss": 0.4627, "step": 10277 }, { "epoch": 2.112858464384829, "grad_norm": 0.23502740263938904, "learning_rate": 1.9100499133784848e-05, "loss": 0.4113, "step": 10278 }, { "epoch": 2.1130640353582075, "grad_norm": 0.24214279651641846, "learning_rate": 1.9092331792524986e-05, "loss": 0.3842, "step": 10279 }, { "epoch": 2.113269606331586, "grad_norm": 0.23367543518543243, "learning_rate": 1.908416572761485e-05, "loss": 0.3974, "step": 10280 }, { "epoch": 2.1134751773049647, "grad_norm": 0.2227569818496704, "learning_rate": 1.907600093945674e-05, "loss": 0.4011, "step": 10281 }, { "epoch": 2.1136807482783433, "grad_norm": 0.222117081284523, "learning_rate": 1.906783742845289e-05, "loss": 0.4013, "step": 10282 }, { "epoch": 2.113886319251722, "grad_norm": 0.13070207834243774, "learning_rate": 1.9059675195005468e-05, "loss": 0.4754, "step": 10283 }, { "epoch": 2.1140918902251, "grad_norm": 0.23519377410411835, "learning_rate": 1.905151423951662e-05, "loss": 0.4061, "step": 10284 }, { "epoch": 2.1142974611984786, "grad_norm": 0.16713115572929382, "learning_rate": 1.9043354562388385e-05, "loss": 0.4556, "step": 10285 }, { "epoch": 2.114503032171857, "grad_norm": 0.12903447449207306, "learning_rate": 1.903519616402275e-05, "loss": 0.4728, "step": 10286 }, { "epoch": 2.114708603145236, "grad_norm": 0.22464367747306824, "learning_rate": 1.9027039044821635e-05, "loss": 0.4061, "step": 10287 }, { "epoch": 2.1149141741186144, "grad_norm": 0.21755559742450714, "learning_rate": 1.9018883205186913e-05, "loss": 0.3932, "step": 10288 }, { "epoch": 2.115119745091993, "grad_norm": 0.133756622672081, "learning_rate": 1.901072864552038e-05, "loss": 0.457, "step": 10289 }, { "epoch": 2.1153253160653716, "grad_norm": 0.23208466172218323, "learning_rate": 1.9002575366223756e-05, "loss": 0.4064, "step": 10290 }, { "epoch": 2.11553088703875, "grad_norm": 0.13155633211135864, "learning_rate": 1.8994423367698753e-05, "loss": 0.4419, "step": 10291 }, { "epoch": 2.1157364580121287, "grad_norm": 0.2295832335948944, "learning_rate": 1.8986272650346955e-05, "loss": 0.3953, "step": 10292 }, { "epoch": 2.1159420289855073, "grad_norm": 0.2247355431318283, "learning_rate": 1.8978123214569915e-05, "loss": 0.3978, "step": 10293 }, { "epoch": 2.116147599958886, "grad_norm": 0.22480376064777374, "learning_rate": 1.8969975060769123e-05, "loss": 0.4201, "step": 10294 }, { "epoch": 2.1163531709322645, "grad_norm": 0.12718500196933746, "learning_rate": 1.896182818934598e-05, "loss": 0.4484, "step": 10295 }, { "epoch": 2.116558741905643, "grad_norm": 0.2338053286075592, "learning_rate": 1.8953682600701873e-05, "loss": 0.4009, "step": 10296 }, { "epoch": 2.1167643128790217, "grad_norm": 0.23438557982444763, "learning_rate": 1.894553829523808e-05, "loss": 0.3935, "step": 10297 }, { "epoch": 2.1169698838524003, "grad_norm": 0.2157134860754013, "learning_rate": 1.8937395273355834e-05, "loss": 0.3973, "step": 10298 }, { "epoch": 2.1171754548257784, "grad_norm": 0.2266354262828827, "learning_rate": 1.8929253535456313e-05, "loss": 0.406, "step": 10299 }, { "epoch": 2.117381025799157, "grad_norm": 0.2172161191701889, "learning_rate": 1.8921113081940612e-05, "loss": 0.3979, "step": 10300 }, { "epoch": 2.1175865967725356, "grad_norm": 0.22894109785556793, "learning_rate": 1.8912973913209784e-05, "loss": 0.4039, "step": 10301 }, { "epoch": 2.117792167745914, "grad_norm": 0.218611940741539, "learning_rate": 1.8904836029664802e-05, "loss": 0.3832, "step": 10302 }, { "epoch": 2.1179977387192928, "grad_norm": 0.22846931219100952, "learning_rate": 1.8896699431706573e-05, "loss": 0.4059, "step": 10303 }, { "epoch": 2.1182033096926713, "grad_norm": 0.22411483526229858, "learning_rate": 1.888856411973595e-05, "loss": 0.3933, "step": 10304 }, { "epoch": 2.11840888066605, "grad_norm": 0.22474461793899536, "learning_rate": 1.8880430094153738e-05, "loss": 0.4027, "step": 10305 }, { "epoch": 2.1186144516394285, "grad_norm": 0.22956325113773346, "learning_rate": 1.8872297355360653e-05, "loss": 0.397, "step": 10306 }, { "epoch": 2.118820022612807, "grad_norm": 0.23198306560516357, "learning_rate": 1.886416590375736e-05, "loss": 0.41, "step": 10307 }, { "epoch": 2.1190255935861857, "grad_norm": 0.22490225732326508, "learning_rate": 1.8856035739744447e-05, "loss": 0.396, "step": 10308 }, { "epoch": 2.1192311645595643, "grad_norm": 0.23693934082984924, "learning_rate": 1.8847906863722467e-05, "loss": 0.4054, "step": 10309 }, { "epoch": 2.119436735532943, "grad_norm": 0.22398188710212708, "learning_rate": 1.8839779276091875e-05, "loss": 0.399, "step": 10310 }, { "epoch": 2.1196423065063215, "grad_norm": 0.23093253374099731, "learning_rate": 1.883165297725307e-05, "loss": 0.4094, "step": 10311 }, { "epoch": 2.1198478774797, "grad_norm": 0.22496986389160156, "learning_rate": 1.8823527967606428e-05, "loss": 0.3819, "step": 10312 }, { "epoch": 2.1200534484530786, "grad_norm": 0.22796480357646942, "learning_rate": 1.8815404247552213e-05, "loss": 0.3996, "step": 10313 }, { "epoch": 2.120259019426457, "grad_norm": 0.22607813775539398, "learning_rate": 1.8807281817490647e-05, "loss": 0.3882, "step": 10314 }, { "epoch": 2.1204645903998354, "grad_norm": 0.2205992192029953, "learning_rate": 1.8799160677821882e-05, "loss": 0.3846, "step": 10315 }, { "epoch": 2.120670161373214, "grad_norm": 0.12466558814048767, "learning_rate": 1.879104082894601e-05, "loss": 0.4445, "step": 10316 }, { "epoch": 2.1208757323465925, "grad_norm": 0.12291921675205231, "learning_rate": 1.8782922271263033e-05, "loss": 0.4429, "step": 10317 }, { "epoch": 2.121081303319971, "grad_norm": 0.22178338468074799, "learning_rate": 1.8774805005172958e-05, "loss": 0.3842, "step": 10318 }, { "epoch": 2.1212868742933497, "grad_norm": 0.22737297415733337, "learning_rate": 1.8766689031075644e-05, "loss": 0.3988, "step": 10319 }, { "epoch": 2.1214924452667283, "grad_norm": 0.12307467311620712, "learning_rate": 1.875857434937097e-05, "loss": 0.4426, "step": 10320 }, { "epoch": 2.121698016240107, "grad_norm": 0.21922807395458221, "learning_rate": 1.8750460960458682e-05, "loss": 0.4063, "step": 10321 }, { "epoch": 2.1219035872134855, "grad_norm": 0.12798526883125305, "learning_rate": 1.8742348864738494e-05, "loss": 0.4517, "step": 10322 }, { "epoch": 2.122109158186864, "grad_norm": 0.12603412568569183, "learning_rate": 1.8734238062610044e-05, "loss": 0.4614, "step": 10323 }, { "epoch": 2.1223147291602427, "grad_norm": 0.22325001657009125, "learning_rate": 1.8726128554472924e-05, "loss": 0.3954, "step": 10324 }, { "epoch": 2.1225203001336213, "grad_norm": 0.2292872816324234, "learning_rate": 1.8718020340726634e-05, "loss": 0.3985, "step": 10325 }, { "epoch": 2.122725871107, "grad_norm": 0.23180240392684937, "learning_rate": 1.8709913421770648e-05, "loss": 0.4131, "step": 10326 }, { "epoch": 2.1229314420803784, "grad_norm": 0.12431956827640533, "learning_rate": 1.870180779800435e-05, "loss": 0.4345, "step": 10327 }, { "epoch": 2.123137013053757, "grad_norm": 0.13498254120349884, "learning_rate": 1.8693703469827067e-05, "loss": 0.4681, "step": 10328 }, { "epoch": 2.123342584027135, "grad_norm": 0.12030383944511414, "learning_rate": 1.8685600437638057e-05, "loss": 0.4469, "step": 10329 }, { "epoch": 2.1235481550005137, "grad_norm": 0.22829271852970123, "learning_rate": 1.867749870183652e-05, "loss": 0.3874, "step": 10330 }, { "epoch": 2.1237537259738923, "grad_norm": 0.21957705914974213, "learning_rate": 1.8669398262821593e-05, "loss": 0.3904, "step": 10331 }, { "epoch": 2.123959296947271, "grad_norm": 0.22618070244789124, "learning_rate": 1.8661299120992332e-05, "loss": 0.4029, "step": 10332 }, { "epoch": 2.1241648679206495, "grad_norm": 0.2359391301870346, "learning_rate": 1.8653201276747767e-05, "loss": 0.4119, "step": 10333 }, { "epoch": 2.124370438894028, "grad_norm": 0.21867458522319794, "learning_rate": 1.8645104730486828e-05, "loss": 0.3953, "step": 10334 }, { "epoch": 2.1245760098674067, "grad_norm": 0.22511562705039978, "learning_rate": 1.86370094826084e-05, "loss": 0.3824, "step": 10335 }, { "epoch": 2.1247815808407853, "grad_norm": 0.12738649547100067, "learning_rate": 1.8628915533511296e-05, "loss": 0.4281, "step": 10336 }, { "epoch": 2.124987151814164, "grad_norm": 0.22026711702346802, "learning_rate": 1.8620822883594267e-05, "loss": 0.3925, "step": 10337 }, { "epoch": 2.1251927227875425, "grad_norm": 0.22602379322052002, "learning_rate": 1.8612731533255976e-05, "loss": 0.3959, "step": 10338 }, { "epoch": 2.125398293760921, "grad_norm": 0.22942064702510834, "learning_rate": 1.860464148289509e-05, "loss": 0.4084, "step": 10339 }, { "epoch": 2.1256038647342996, "grad_norm": 0.22742587327957153, "learning_rate": 1.8596552732910148e-05, "loss": 0.4137, "step": 10340 }, { "epoch": 2.125809435707678, "grad_norm": 0.12401507049798965, "learning_rate": 1.8588465283699622e-05, "loss": 0.434, "step": 10341 }, { "epoch": 2.126015006681057, "grad_norm": 0.21955260634422302, "learning_rate": 1.858037913566198e-05, "loss": 0.4068, "step": 10342 }, { "epoch": 2.1262205776544354, "grad_norm": 0.1239282488822937, "learning_rate": 1.8572294289195576e-05, "loss": 0.4364, "step": 10343 }, { "epoch": 2.1264261486278135, "grad_norm": 0.2231699824333191, "learning_rate": 1.8564210744698707e-05, "loss": 0.3928, "step": 10344 }, { "epoch": 2.126631719601192, "grad_norm": 0.12479789555072784, "learning_rate": 1.8556128502569618e-05, "loss": 0.4482, "step": 10345 }, { "epoch": 2.1268372905745707, "grad_norm": 0.2382933497428894, "learning_rate": 1.8548047563206465e-05, "loss": 0.4012, "step": 10346 }, { "epoch": 2.1270428615479493, "grad_norm": 0.23470161855220795, "learning_rate": 1.853996792700738e-05, "loss": 0.3967, "step": 10347 }, { "epoch": 2.127248432521328, "grad_norm": 0.22285513579845428, "learning_rate": 1.8531889594370406e-05, "loss": 0.4076, "step": 10348 }, { "epoch": 2.1274540034947065, "grad_norm": 0.23410557210445404, "learning_rate": 1.8523812565693522e-05, "loss": 0.4086, "step": 10349 }, { "epoch": 2.127659574468085, "grad_norm": 0.2240322232246399, "learning_rate": 1.8515736841374643e-05, "loss": 0.4091, "step": 10350 }, { "epoch": 2.1278651454414637, "grad_norm": 0.22299052774906158, "learning_rate": 1.8507662421811618e-05, "loss": 0.3762, "step": 10351 }, { "epoch": 2.1280707164148422, "grad_norm": 0.23107583820819855, "learning_rate": 1.8499589307402244e-05, "loss": 0.3983, "step": 10352 }, { "epoch": 2.128276287388221, "grad_norm": 0.22548127174377441, "learning_rate": 1.8491517498544227e-05, "loss": 0.4028, "step": 10353 }, { "epoch": 2.1284818583615994, "grad_norm": 0.2753831744194031, "learning_rate": 1.848344699563526e-05, "loss": 0.423, "step": 10354 }, { "epoch": 2.128687429334978, "grad_norm": 0.22851170599460602, "learning_rate": 1.847537779907292e-05, "loss": 0.3987, "step": 10355 }, { "epoch": 2.1288930003083566, "grad_norm": 0.2307175248861313, "learning_rate": 1.8467309909254737e-05, "loss": 0.4081, "step": 10356 }, { "epoch": 2.129098571281735, "grad_norm": 0.2297874242067337, "learning_rate": 1.8459243326578183e-05, "loss": 0.406, "step": 10357 }, { "epoch": 2.1293041422551138, "grad_norm": 0.12997567653656006, "learning_rate": 1.845117805144066e-05, "loss": 0.436, "step": 10358 }, { "epoch": 2.129509713228492, "grad_norm": 0.133535698056221, "learning_rate": 1.844311408423949e-05, "loss": 0.4471, "step": 10359 }, { "epoch": 2.1297152842018705, "grad_norm": 0.1276518702507019, "learning_rate": 1.843505142537198e-05, "loss": 0.4424, "step": 10360 }, { "epoch": 2.129920855175249, "grad_norm": 0.12228768318891525, "learning_rate": 1.842699007523532e-05, "loss": 0.4467, "step": 10361 }, { "epoch": 2.1301264261486277, "grad_norm": 0.2285146862268448, "learning_rate": 1.841893003422664e-05, "loss": 0.4019, "step": 10362 }, { "epoch": 2.1303319971220063, "grad_norm": 0.22430342435836792, "learning_rate": 1.8410871302743054e-05, "loss": 0.4207, "step": 10363 }, { "epoch": 2.130537568095385, "grad_norm": 0.214961439371109, "learning_rate": 1.8402813881181563e-05, "loss": 0.3986, "step": 10364 }, { "epoch": 2.1307431390687634, "grad_norm": 0.23033976554870605, "learning_rate": 1.8394757769939117e-05, "loss": 0.3853, "step": 10365 }, { "epoch": 2.130948710042142, "grad_norm": 0.12601739168167114, "learning_rate": 1.8386702969412583e-05, "loss": 0.438, "step": 10366 }, { "epoch": 2.1311542810155206, "grad_norm": 0.23412902653217316, "learning_rate": 1.8378649479998827e-05, "loss": 0.3996, "step": 10367 }, { "epoch": 2.131359851988899, "grad_norm": 0.2274925261735916, "learning_rate": 1.8370597302094577e-05, "loss": 0.388, "step": 10368 }, { "epoch": 2.131565422962278, "grad_norm": 0.1298505961894989, "learning_rate": 1.8362546436096537e-05, "loss": 0.4471, "step": 10369 }, { "epoch": 2.1317709939356564, "grad_norm": 0.23555243015289307, "learning_rate": 1.8354496882401327e-05, "loss": 0.3892, "step": 10370 }, { "epoch": 2.131976564909035, "grad_norm": 0.2312725931406021, "learning_rate": 1.8346448641405517e-05, "loss": 0.386, "step": 10371 }, { "epoch": 2.1321821358824136, "grad_norm": 0.22951969504356384, "learning_rate": 1.8338401713505603e-05, "loss": 0.407, "step": 10372 }, { "epoch": 2.132387706855792, "grad_norm": 0.22569020092487335, "learning_rate": 1.8330356099098006e-05, "loss": 0.3961, "step": 10373 }, { "epoch": 2.1325932778291703, "grad_norm": 0.2186949998140335, "learning_rate": 1.8322311798579125e-05, "loss": 0.3827, "step": 10374 }, { "epoch": 2.132798848802549, "grad_norm": 0.22884011268615723, "learning_rate": 1.8314268812345248e-05, "loss": 0.3973, "step": 10375 }, { "epoch": 2.1330044197759275, "grad_norm": 0.1293371617794037, "learning_rate": 1.8306227140792622e-05, "loss": 0.4564, "step": 10376 }, { "epoch": 2.133209990749306, "grad_norm": 0.22477327287197113, "learning_rate": 1.829818678431742e-05, "loss": 0.3865, "step": 10377 }, { "epoch": 2.1334155617226847, "grad_norm": 0.22367890179157257, "learning_rate": 1.8290147743315746e-05, "loss": 0.3733, "step": 10378 }, { "epoch": 2.1336211326960632, "grad_norm": 0.23502875864505768, "learning_rate": 1.8282110018183656e-05, "loss": 0.4037, "step": 10379 }, { "epoch": 2.133826703669442, "grad_norm": 0.12768757343292236, "learning_rate": 1.8274073609317106e-05, "loss": 0.4562, "step": 10380 }, { "epoch": 2.1340322746428204, "grad_norm": 0.23585356771945953, "learning_rate": 1.826603851711205e-05, "loss": 0.3938, "step": 10381 }, { "epoch": 2.134237845616199, "grad_norm": 0.23149564862251282, "learning_rate": 1.825800474196432e-05, "loss": 0.3848, "step": 10382 }, { "epoch": 2.1344434165895776, "grad_norm": 0.23342165350914001, "learning_rate": 1.824997228426969e-05, "loss": 0.4179, "step": 10383 }, { "epoch": 2.134648987562956, "grad_norm": 0.2237035036087036, "learning_rate": 1.8241941144423916e-05, "loss": 0.4023, "step": 10384 }, { "epoch": 2.1348545585363348, "grad_norm": 0.2252335101366043, "learning_rate": 1.8233911322822632e-05, "loss": 0.3956, "step": 10385 }, { "epoch": 2.1350601295097134, "grad_norm": 0.2148154377937317, "learning_rate": 1.822588281986143e-05, "loss": 0.3835, "step": 10386 }, { "epoch": 2.135265700483092, "grad_norm": 0.11948797851800919, "learning_rate": 1.8217855635935827e-05, "loss": 0.4476, "step": 10387 }, { "epoch": 2.1354712714564705, "grad_norm": 0.22916093468666077, "learning_rate": 1.8209829771441314e-05, "loss": 0.3903, "step": 10388 }, { "epoch": 2.1356768424298487, "grad_norm": 0.21855413913726807, "learning_rate": 1.820180522677327e-05, "loss": 0.3972, "step": 10389 }, { "epoch": 2.1358824134032273, "grad_norm": 0.13248126208782196, "learning_rate": 1.819378200232703e-05, "loss": 0.4453, "step": 10390 }, { "epoch": 2.136087984376606, "grad_norm": 0.22880522906780243, "learning_rate": 1.818576009849786e-05, "loss": 0.3987, "step": 10391 }, { "epoch": 2.1362935553499844, "grad_norm": 0.24837420880794525, "learning_rate": 1.8177739515680953e-05, "loss": 0.3857, "step": 10392 }, { "epoch": 2.136499126323363, "grad_norm": 0.23082508146762848, "learning_rate": 1.816972025427146e-05, "loss": 0.421, "step": 10393 }, { "epoch": 2.1367046972967416, "grad_norm": 0.1307905912399292, "learning_rate": 1.8161702314664423e-05, "loss": 0.4584, "step": 10394 }, { "epoch": 2.13691026827012, "grad_norm": 0.23677071928977966, "learning_rate": 1.815368569725489e-05, "loss": 0.4082, "step": 10395 }, { "epoch": 2.137115839243499, "grad_norm": 0.1245460957288742, "learning_rate": 1.8145670402437787e-05, "loss": 0.4332, "step": 10396 }, { "epoch": 2.1373214102168774, "grad_norm": 0.2281726449728012, "learning_rate": 1.8137656430607986e-05, "loss": 0.3907, "step": 10397 }, { "epoch": 2.137526981190256, "grad_norm": 0.11899819225072861, "learning_rate": 1.8129643782160294e-05, "loss": 0.449, "step": 10398 }, { "epoch": 2.1377325521636346, "grad_norm": 0.23056533932685852, "learning_rate": 1.8121632457489465e-05, "loss": 0.4015, "step": 10399 }, { "epoch": 2.137938123137013, "grad_norm": 0.2260628640651703, "learning_rate": 1.8113622456990175e-05, "loss": 0.3938, "step": 10400 }, { "epoch": 2.1381436941103917, "grad_norm": 0.22494405508041382, "learning_rate": 1.810561378105702e-05, "loss": 0.3994, "step": 10401 }, { "epoch": 2.1383492650837703, "grad_norm": 0.22264499962329865, "learning_rate": 1.809760643008459e-05, "loss": 0.3904, "step": 10402 }, { "epoch": 2.138554836057149, "grad_norm": 0.2253665328025818, "learning_rate": 1.808960040446735e-05, "loss": 0.3998, "step": 10403 }, { "epoch": 2.138760407030527, "grad_norm": 0.12751929461956024, "learning_rate": 1.8081595704599718e-05, "loss": 0.4584, "step": 10404 }, { "epoch": 2.1389659780039056, "grad_norm": 0.1251654028892517, "learning_rate": 1.8073592330876034e-05, "loss": 0.4494, "step": 10405 }, { "epoch": 2.1391715489772842, "grad_norm": 0.12770125269889832, "learning_rate": 1.8065590283690614e-05, "loss": 0.436, "step": 10406 }, { "epoch": 2.139377119950663, "grad_norm": 0.22460491955280304, "learning_rate": 1.8057589563437675e-05, "loss": 0.3837, "step": 10407 }, { "epoch": 2.1395826909240414, "grad_norm": 0.2189689576625824, "learning_rate": 1.8049590170511354e-05, "loss": 0.4027, "step": 10408 }, { "epoch": 2.13978826189742, "grad_norm": 0.22947020828723907, "learning_rate": 1.804159210530577e-05, "loss": 0.3883, "step": 10409 }, { "epoch": 2.1399938328707986, "grad_norm": 0.22392447292804718, "learning_rate": 1.8033595368214945e-05, "loss": 0.3933, "step": 10410 }, { "epoch": 2.140199403844177, "grad_norm": 0.23469264805316925, "learning_rate": 1.8025599959632835e-05, "loss": 0.4153, "step": 10411 }, { "epoch": 2.1404049748175558, "grad_norm": 0.2271226942539215, "learning_rate": 1.8017605879953335e-05, "loss": 0.396, "step": 10412 }, { "epoch": 2.1406105457909343, "grad_norm": 0.2269534021615982, "learning_rate": 1.8009613129570278e-05, "loss": 0.401, "step": 10413 }, { "epoch": 2.140816116764313, "grad_norm": 0.22716417908668518, "learning_rate": 1.800162170887743e-05, "loss": 0.3846, "step": 10414 }, { "epoch": 2.1410216877376915, "grad_norm": 0.13274461030960083, "learning_rate": 1.7993631618268472e-05, "loss": 0.448, "step": 10415 }, { "epoch": 2.14122725871107, "grad_norm": 0.22133229672908783, "learning_rate": 1.7985642858137076e-05, "loss": 0.3983, "step": 10416 }, { "epoch": 2.1414328296844487, "grad_norm": 0.21587035059928894, "learning_rate": 1.797765542887679e-05, "loss": 0.3917, "step": 10417 }, { "epoch": 2.1416384006578273, "grad_norm": 0.2158806473016739, "learning_rate": 1.796966933088112e-05, "loss": 0.3887, "step": 10418 }, { "epoch": 2.141843971631206, "grad_norm": 0.23333343863487244, "learning_rate": 1.7961684564543503e-05, "loss": 0.393, "step": 10419 }, { "epoch": 2.1420495426045845, "grad_norm": 0.21826335787773132, "learning_rate": 1.7953701130257313e-05, "loss": 0.3817, "step": 10420 }, { "epoch": 2.1422551135779626, "grad_norm": 0.12297184020280838, "learning_rate": 1.794571902841585e-05, "loss": 0.4548, "step": 10421 }, { "epoch": 2.142460684551341, "grad_norm": 0.12231001257896423, "learning_rate": 1.793773825941234e-05, "loss": 0.4505, "step": 10422 }, { "epoch": 2.14266625552472, "grad_norm": 0.2218412458896637, "learning_rate": 1.792975882364e-05, "loss": 0.3939, "step": 10423 }, { "epoch": 2.1428718264980984, "grad_norm": 0.1286546289920807, "learning_rate": 1.7921780721491914e-05, "loss": 0.4586, "step": 10424 }, { "epoch": 2.143077397471477, "grad_norm": 0.22066746652126312, "learning_rate": 1.7913803953361125e-05, "loss": 0.3819, "step": 10425 }, { "epoch": 2.1432829684448556, "grad_norm": 0.22369948029518127, "learning_rate": 1.7905828519640602e-05, "loss": 0.4186, "step": 10426 }, { "epoch": 2.143488539418234, "grad_norm": 0.12636181712150574, "learning_rate": 1.789785442072329e-05, "loss": 0.4643, "step": 10427 }, { "epoch": 2.1436941103916127, "grad_norm": 0.22555802762508392, "learning_rate": 1.788988165700201e-05, "loss": 0.3877, "step": 10428 }, { "epoch": 2.1438996813649913, "grad_norm": 0.2376098334789276, "learning_rate": 1.7881910228869535e-05, "loss": 0.3993, "step": 10429 }, { "epoch": 2.14410525233837, "grad_norm": 0.2282724678516388, "learning_rate": 1.787394013671861e-05, "loss": 0.3815, "step": 10430 }, { "epoch": 2.1443108233117485, "grad_norm": 0.22976957261562347, "learning_rate": 1.7865971380941866e-05, "loss": 0.3869, "step": 10431 }, { "epoch": 2.144516394285127, "grad_norm": 0.2277589738368988, "learning_rate": 1.7858003961931885e-05, "loss": 0.3927, "step": 10432 }, { "epoch": 2.1447219652585057, "grad_norm": 0.21987488865852356, "learning_rate": 1.785003788008119e-05, "loss": 0.3971, "step": 10433 }, { "epoch": 2.1449275362318843, "grad_norm": 0.22373713552951813, "learning_rate": 1.784207313578223e-05, "loss": 0.4124, "step": 10434 }, { "epoch": 2.145133107205263, "grad_norm": 0.22595758736133575, "learning_rate": 1.7834109729427376e-05, "loss": 0.4053, "step": 10435 }, { "epoch": 2.145338678178641, "grad_norm": 0.22213847935199738, "learning_rate": 1.782614766140898e-05, "loss": 0.3875, "step": 10436 }, { "epoch": 2.1455442491520196, "grad_norm": 0.127987802028656, "learning_rate": 1.7818186932119277e-05, "loss": 0.4445, "step": 10437 }, { "epoch": 2.145749820125398, "grad_norm": 0.22547675669193268, "learning_rate": 1.781022754195045e-05, "loss": 0.3897, "step": 10438 }, { "epoch": 2.1459553910987768, "grad_norm": 0.23386697471141815, "learning_rate": 1.780226949129464e-05, "loss": 0.3906, "step": 10439 }, { "epoch": 2.1461609620721553, "grad_norm": 0.22901882231235504, "learning_rate": 1.7794312780543883e-05, "loss": 0.3978, "step": 10440 }, { "epoch": 2.146366533045534, "grad_norm": 0.22975675761699677, "learning_rate": 1.7786357410090173e-05, "loss": 0.3855, "step": 10441 }, { "epoch": 2.1465721040189125, "grad_norm": 0.22928237915039062, "learning_rate": 1.7778403380325427e-05, "loss": 0.3919, "step": 10442 }, { "epoch": 2.146777674992291, "grad_norm": 0.22319789230823517, "learning_rate": 1.7770450691641526e-05, "loss": 0.3921, "step": 10443 }, { "epoch": 2.1469832459656697, "grad_norm": 0.23228733241558075, "learning_rate": 1.7762499344430253e-05, "loss": 0.395, "step": 10444 }, { "epoch": 2.1471888169390483, "grad_norm": 0.22841905057430267, "learning_rate": 1.7754549339083323e-05, "loss": 0.4022, "step": 10445 }, { "epoch": 2.147394387912427, "grad_norm": 0.1279844492673874, "learning_rate": 1.7746600675992408e-05, "loss": 0.4415, "step": 10446 }, { "epoch": 2.1475999588858055, "grad_norm": 0.2246563881635666, "learning_rate": 1.7738653355549078e-05, "loss": 0.3858, "step": 10447 }, { "epoch": 2.147805529859184, "grad_norm": 0.225599467754364, "learning_rate": 1.773070737814489e-05, "loss": 0.4025, "step": 10448 }, { "epoch": 2.1480111008325626, "grad_norm": 0.2247907519340515, "learning_rate": 1.7722762744171298e-05, "loss": 0.4245, "step": 10449 }, { "epoch": 2.1482166718059412, "grad_norm": 0.23618023097515106, "learning_rate": 1.7714819454019672e-05, "loss": 0.4155, "step": 10450 }, { "epoch": 2.1484222427793194, "grad_norm": 0.12265011668205261, "learning_rate": 1.770687750808138e-05, "loss": 0.4512, "step": 10451 }, { "epoch": 2.148627813752698, "grad_norm": 0.23683376610279083, "learning_rate": 1.7698936906747665e-05, "loss": 0.4045, "step": 10452 }, { "epoch": 2.1488333847260765, "grad_norm": 0.2286202311515808, "learning_rate": 1.7690997650409725e-05, "loss": 0.401, "step": 10453 }, { "epoch": 2.149038955699455, "grad_norm": 0.21446064114570618, "learning_rate": 1.7683059739458683e-05, "loss": 0.3898, "step": 10454 }, { "epoch": 2.1492445266728337, "grad_norm": 0.12255129218101501, "learning_rate": 1.7675123174285614e-05, "loss": 0.46, "step": 10455 }, { "epoch": 2.1494500976462123, "grad_norm": 0.22888119518756866, "learning_rate": 1.766718795528149e-05, "loss": 0.3708, "step": 10456 }, { "epoch": 2.149655668619591, "grad_norm": 0.2274254858493805, "learning_rate": 1.7659254082837288e-05, "loss": 0.3951, "step": 10457 }, { "epoch": 2.1498612395929695, "grad_norm": 0.12422723323106766, "learning_rate": 1.7651321557343836e-05, "loss": 0.4547, "step": 10458 }, { "epoch": 2.150066810566348, "grad_norm": 0.23370634019374847, "learning_rate": 1.7643390379191948e-05, "loss": 0.3956, "step": 10459 }, { "epoch": 2.1502723815397267, "grad_norm": 0.2372375875711441, "learning_rate": 1.7635460548772353e-05, "loss": 0.4031, "step": 10460 }, { "epoch": 2.1504779525131053, "grad_norm": 0.23817671835422516, "learning_rate": 1.762753206647571e-05, "loss": 0.3945, "step": 10461 }, { "epoch": 2.150683523486484, "grad_norm": 0.23152542114257812, "learning_rate": 1.7619604932692628e-05, "loss": 0.3837, "step": 10462 }, { "epoch": 2.1508890944598624, "grad_norm": 0.21996726095676422, "learning_rate": 1.7611679147813618e-05, "loss": 0.3971, "step": 10463 }, { "epoch": 2.151094665433241, "grad_norm": 0.22144795954227448, "learning_rate": 1.760375471222918e-05, "loss": 0.3999, "step": 10464 }, { "epoch": 2.1513002364066196, "grad_norm": 0.23396509885787964, "learning_rate": 1.7595831626329697e-05, "loss": 0.3977, "step": 10465 }, { "epoch": 2.1515058073799977, "grad_norm": 0.2290705144405365, "learning_rate": 1.7587909890505503e-05, "loss": 0.3953, "step": 10466 }, { "epoch": 2.1517113783533763, "grad_norm": 0.22540703415870667, "learning_rate": 1.7579989505146866e-05, "loss": 0.3971, "step": 10467 }, { "epoch": 2.151916949326755, "grad_norm": 0.12446384131908417, "learning_rate": 1.7572070470643973e-05, "loss": 0.4507, "step": 10468 }, { "epoch": 2.1521225203001335, "grad_norm": 0.12616395950317383, "learning_rate": 1.7564152787386977e-05, "loss": 0.44, "step": 10469 }, { "epoch": 2.152328091273512, "grad_norm": 0.23691080510616302, "learning_rate": 1.7556236455765943e-05, "loss": 0.3804, "step": 10470 }, { "epoch": 2.1525336622468907, "grad_norm": 0.2261635661125183, "learning_rate": 1.7548321476170854e-05, "loss": 0.3727, "step": 10471 }, { "epoch": 2.1527392332202693, "grad_norm": 0.22439588606357574, "learning_rate": 1.7540407848991672e-05, "loss": 0.3903, "step": 10472 }, { "epoch": 2.152944804193648, "grad_norm": 0.13026651740074158, "learning_rate": 1.7532495574618246e-05, "loss": 0.4672, "step": 10473 }, { "epoch": 2.1531503751670265, "grad_norm": 0.21984946727752686, "learning_rate": 1.7524584653440377e-05, "loss": 0.4064, "step": 10474 }, { "epoch": 2.153355946140405, "grad_norm": 0.22405663132667542, "learning_rate": 1.7516675085847812e-05, "loss": 0.4067, "step": 10475 }, { "epoch": 2.1535615171137836, "grad_norm": 0.22605964541435242, "learning_rate": 1.75087668722302e-05, "loss": 0.4045, "step": 10476 }, { "epoch": 2.153767088087162, "grad_norm": 0.1273018717765808, "learning_rate": 1.7500860012977142e-05, "loss": 0.4456, "step": 10477 }, { "epoch": 2.153972659060541, "grad_norm": 0.23210304975509644, "learning_rate": 1.7492954508478192e-05, "loss": 0.4067, "step": 10478 }, { "epoch": 2.1541782300339194, "grad_norm": 0.2308957576751709, "learning_rate": 1.7485050359122806e-05, "loss": 0.4144, "step": 10479 }, { "epoch": 2.154383801007298, "grad_norm": 0.2237699329853058, "learning_rate": 1.7477147565300388e-05, "loss": 0.3946, "step": 10480 }, { "epoch": 2.154589371980676, "grad_norm": 0.12873926758766174, "learning_rate": 1.7469246127400262e-05, "loss": 0.4475, "step": 10481 }, { "epoch": 2.1547949429540547, "grad_norm": 0.24316054582595825, "learning_rate": 1.7461346045811703e-05, "loss": 0.4043, "step": 10482 }, { "epoch": 2.1550005139274333, "grad_norm": 0.21882621943950653, "learning_rate": 1.7453447320923914e-05, "loss": 0.4072, "step": 10483 }, { "epoch": 2.155206084900812, "grad_norm": 0.2260080724954605, "learning_rate": 1.7445549953126e-05, "loss": 0.3984, "step": 10484 }, { "epoch": 2.1554116558741905, "grad_norm": 0.22015734016895294, "learning_rate": 1.743765394280707e-05, "loss": 0.3975, "step": 10485 }, { "epoch": 2.155617226847569, "grad_norm": 0.22426630556583405, "learning_rate": 1.7429759290356103e-05, "loss": 0.3925, "step": 10486 }, { "epoch": 2.1558227978209477, "grad_norm": 0.23523494601249695, "learning_rate": 1.7421865996162033e-05, "loss": 0.4133, "step": 10487 }, { "epoch": 2.1560283687943262, "grad_norm": 0.22726291418075562, "learning_rate": 1.7413974060613727e-05, "loss": 0.3988, "step": 10488 }, { "epoch": 2.156233939767705, "grad_norm": 0.2152286171913147, "learning_rate": 1.740608348409998e-05, "loss": 0.3935, "step": 10489 }, { "epoch": 2.1564395107410834, "grad_norm": 0.22603079676628113, "learning_rate": 1.7398194267009514e-05, "loss": 0.3965, "step": 10490 }, { "epoch": 2.156645081714462, "grad_norm": 0.1339533030986786, "learning_rate": 1.739030640973102e-05, "loss": 0.435, "step": 10491 }, { "epoch": 2.1568506526878406, "grad_norm": 0.23634931445121765, "learning_rate": 1.7382419912653064e-05, "loss": 0.4006, "step": 10492 }, { "epoch": 2.157056223661219, "grad_norm": 0.23838773369789124, "learning_rate": 1.7374534776164215e-05, "loss": 0.4042, "step": 10493 }, { "epoch": 2.1572617946345978, "grad_norm": 0.23160769045352936, "learning_rate": 1.736665100065291e-05, "loss": 0.3908, "step": 10494 }, { "epoch": 2.1574673656079764, "grad_norm": 0.12931808829307556, "learning_rate": 1.7358768586507557e-05, "loss": 0.4381, "step": 10495 }, { "epoch": 2.1576729365813545, "grad_norm": 0.2354772686958313, "learning_rate": 1.735088753411648e-05, "loss": 0.4097, "step": 10496 }, { "epoch": 2.157878507554733, "grad_norm": 0.22520937025547028, "learning_rate": 1.734300784386794e-05, "loss": 0.4014, "step": 10497 }, { "epoch": 2.1580840785281117, "grad_norm": 0.22981365025043488, "learning_rate": 1.7335129516150123e-05, "loss": 0.3952, "step": 10498 }, { "epoch": 2.1582896495014903, "grad_norm": 0.2230282872915268, "learning_rate": 1.7327252551351182e-05, "loss": 0.405, "step": 10499 }, { "epoch": 2.158495220474869, "grad_norm": 0.2350645661354065, "learning_rate": 1.731937694985917e-05, "loss": 0.3821, "step": 10500 }, { "epoch": 2.1587007914482474, "grad_norm": 0.2205275148153305, "learning_rate": 1.7311502712062073e-05, "loss": 0.4014, "step": 10501 }, { "epoch": 2.158906362421626, "grad_norm": 0.2229074090719223, "learning_rate": 1.7303629838347825e-05, "loss": 0.3965, "step": 10502 }, { "epoch": 2.1591119333950046, "grad_norm": 0.2243238240480423, "learning_rate": 1.7295758329104277e-05, "loss": 0.3978, "step": 10503 }, { "epoch": 2.159317504368383, "grad_norm": 0.22528594732284546, "learning_rate": 1.728788818471923e-05, "loss": 0.395, "step": 10504 }, { "epoch": 2.159523075341762, "grad_norm": 0.22361469268798828, "learning_rate": 1.7280019405580394e-05, "loss": 0.3949, "step": 10505 }, { "epoch": 2.1597286463151404, "grad_norm": 0.22868306934833527, "learning_rate": 1.727215199207545e-05, "loss": 0.396, "step": 10506 }, { "epoch": 2.159934217288519, "grad_norm": 0.23044967651367188, "learning_rate": 1.7264285944591975e-05, "loss": 0.4099, "step": 10507 }, { "epoch": 2.1601397882618976, "grad_norm": 0.2305765151977539, "learning_rate": 1.7256421263517503e-05, "loss": 0.3899, "step": 10508 }, { "epoch": 2.160345359235276, "grad_norm": 0.21992215514183044, "learning_rate": 1.724855794923948e-05, "loss": 0.3854, "step": 10509 }, { "epoch": 2.1605509302086547, "grad_norm": 0.21878063678741455, "learning_rate": 1.7240696002145292e-05, "loss": 0.3825, "step": 10510 }, { "epoch": 2.160756501182033, "grad_norm": 0.12538020312786102, "learning_rate": 1.7232835422622252e-05, "loss": 0.4371, "step": 10511 }, { "epoch": 2.1609620721554115, "grad_norm": 0.23171678185462952, "learning_rate": 1.7224976211057645e-05, "loss": 0.4239, "step": 10512 }, { "epoch": 2.16116764312879, "grad_norm": 0.12217391282320023, "learning_rate": 1.721711836783864e-05, "loss": 0.4505, "step": 10513 }, { "epoch": 2.1613732141021686, "grad_norm": 0.23179614543914795, "learning_rate": 1.7209261893352335e-05, "loss": 0.396, "step": 10514 }, { "epoch": 2.1615787850755472, "grad_norm": 0.2259824126958847, "learning_rate": 1.7201406787985824e-05, "loss": 0.381, "step": 10515 }, { "epoch": 2.161784356048926, "grad_norm": 0.2272365540266037, "learning_rate": 1.719355305212607e-05, "loss": 0.4012, "step": 10516 }, { "epoch": 2.1619899270223044, "grad_norm": 0.2351997047662735, "learning_rate": 1.718570068615999e-05, "loss": 0.4049, "step": 10517 }, { "epoch": 2.162195497995683, "grad_norm": 0.22571827471256256, "learning_rate": 1.7177849690474415e-05, "loss": 0.3954, "step": 10518 }, { "epoch": 2.1624010689690616, "grad_norm": 0.22981050610542297, "learning_rate": 1.7170000065456165e-05, "loss": 0.3959, "step": 10519 }, { "epoch": 2.16260663994244, "grad_norm": 0.2381727695465088, "learning_rate": 1.7162151811491932e-05, "loss": 0.3908, "step": 10520 }, { "epoch": 2.1628122109158188, "grad_norm": 0.2317119836807251, "learning_rate": 1.7154304928968366e-05, "loss": 0.4135, "step": 10521 }, { "epoch": 2.1630177818891974, "grad_norm": 0.2339845448732376, "learning_rate": 1.714645941827205e-05, "loss": 0.3687, "step": 10522 }, { "epoch": 2.163223352862576, "grad_norm": 0.12437080591917038, "learning_rate": 1.7138615279789484e-05, "loss": 0.4476, "step": 10523 }, { "epoch": 2.1634289238359545, "grad_norm": 0.12956155836582184, "learning_rate": 1.7130772513907122e-05, "loss": 0.4388, "step": 10524 }, { "epoch": 2.163634494809333, "grad_norm": 0.22595298290252686, "learning_rate": 1.7122931121011325e-05, "loss": 0.3914, "step": 10525 }, { "epoch": 2.1638400657827113, "grad_norm": 0.23524773120880127, "learning_rate": 1.711509110148843e-05, "loss": 0.394, "step": 10526 }, { "epoch": 2.16404563675609, "grad_norm": 0.229460209608078, "learning_rate": 1.7107252455724658e-05, "loss": 0.3965, "step": 10527 }, { "epoch": 2.1642512077294684, "grad_norm": 0.22869658470153809, "learning_rate": 1.709941518410619e-05, "loss": 0.3887, "step": 10528 }, { "epoch": 2.164456778702847, "grad_norm": 0.2369028925895691, "learning_rate": 1.7091579287019127e-05, "loss": 0.4027, "step": 10529 }, { "epoch": 2.1646623496762256, "grad_norm": 0.23322713375091553, "learning_rate": 1.7083744764849512e-05, "loss": 0.396, "step": 10530 }, { "epoch": 2.164867920649604, "grad_norm": 0.23089557886123657, "learning_rate": 1.707591161798331e-05, "loss": 0.3945, "step": 10531 }, { "epoch": 2.165073491622983, "grad_norm": 0.21757075190544128, "learning_rate": 1.7068079846806413e-05, "loss": 0.3796, "step": 10532 }, { "epoch": 2.1652790625963614, "grad_norm": 0.2164604812860489, "learning_rate": 1.706024945170468e-05, "loss": 0.398, "step": 10533 }, { "epoch": 2.16548463356974, "grad_norm": 0.2306961566209793, "learning_rate": 1.705242043306387e-05, "loss": 0.3956, "step": 10534 }, { "epoch": 2.1656902045431186, "grad_norm": 0.2262311577796936, "learning_rate": 1.704459279126966e-05, "loss": 0.3937, "step": 10535 }, { "epoch": 2.165895775516497, "grad_norm": 0.2339993417263031, "learning_rate": 1.703676652670772e-05, "loss": 0.4147, "step": 10536 }, { "epoch": 2.1661013464898757, "grad_norm": 0.22700749337673187, "learning_rate": 1.7028941639763586e-05, "loss": 0.3932, "step": 10537 }, { "epoch": 2.1663069174632543, "grad_norm": 0.22953462600708008, "learning_rate": 1.7021118130822766e-05, "loss": 0.3856, "step": 10538 }, { "epoch": 2.166512488436633, "grad_norm": 0.12440577894449234, "learning_rate": 1.7013296000270665e-05, "loss": 0.4448, "step": 10539 }, { "epoch": 2.1667180594100115, "grad_norm": 0.22885264456272125, "learning_rate": 1.7005475248492677e-05, "loss": 0.4023, "step": 10540 }, { "epoch": 2.1669236303833896, "grad_norm": 0.22612909972667694, "learning_rate": 1.6997655875874082e-05, "loss": 0.3813, "step": 10541 }, { "epoch": 2.1671292013567682, "grad_norm": 0.22638019919395447, "learning_rate": 1.6989837882800095e-05, "loss": 0.3978, "step": 10542 }, { "epoch": 2.167334772330147, "grad_norm": 0.12233424931764603, "learning_rate": 1.6982021269655878e-05, "loss": 0.4485, "step": 10543 }, { "epoch": 2.1675403433035254, "grad_norm": 0.12629348039627075, "learning_rate": 1.6974206036826516e-05, "loss": 0.4501, "step": 10544 }, { "epoch": 2.167745914276904, "grad_norm": 0.12014532089233398, "learning_rate": 1.696639218469703e-05, "loss": 0.4594, "step": 10545 }, { "epoch": 2.1679514852502826, "grad_norm": 0.2178095281124115, "learning_rate": 1.6958579713652356e-05, "loss": 0.4123, "step": 10546 }, { "epoch": 2.168157056223661, "grad_norm": 0.22389446198940277, "learning_rate": 1.6950768624077412e-05, "loss": 0.3935, "step": 10547 }, { "epoch": 2.1683626271970398, "grad_norm": 0.22835230827331543, "learning_rate": 1.6942958916356995e-05, "loss": 0.4019, "step": 10548 }, { "epoch": 2.1685681981704183, "grad_norm": 0.2239934802055359, "learning_rate": 1.6935150590875852e-05, "loss": 0.4014, "step": 10549 }, { "epoch": 2.168773769143797, "grad_norm": 0.22052869200706482, "learning_rate": 1.6927343648018667e-05, "loss": 0.3964, "step": 10550 }, { "epoch": 2.1689793401171755, "grad_norm": 0.22106504440307617, "learning_rate": 1.691953808817005e-05, "loss": 0.3868, "step": 10551 }, { "epoch": 2.169184911090554, "grad_norm": 0.12797969579696655, "learning_rate": 1.6911733911714544e-05, "loss": 0.4505, "step": 10552 }, { "epoch": 2.1693904820639327, "grad_norm": 0.12730328738689423, "learning_rate": 1.6903931119036607e-05, "loss": 0.4535, "step": 10553 }, { "epoch": 2.1695960530373113, "grad_norm": 0.22867700457572937, "learning_rate": 1.6896129710520677e-05, "loss": 0.4105, "step": 10554 }, { "epoch": 2.16980162401069, "grad_norm": 0.22605451941490173, "learning_rate": 1.688832968655108e-05, "loss": 0.3941, "step": 10555 }, { "epoch": 2.170007194984068, "grad_norm": 0.23293885588645935, "learning_rate": 1.6880531047512074e-05, "loss": 0.4083, "step": 10556 }, { "epoch": 2.1702127659574466, "grad_norm": 0.11922682076692581, "learning_rate": 1.6872733793787882e-05, "loss": 0.449, "step": 10557 }, { "epoch": 2.170418336930825, "grad_norm": 0.12665359675884247, "learning_rate": 1.6864937925762637e-05, "loss": 0.4587, "step": 10558 }, { "epoch": 2.170623907904204, "grad_norm": 0.23081457614898682, "learning_rate": 1.685714344382039e-05, "loss": 0.3861, "step": 10559 }, { "epoch": 2.1708294788775824, "grad_norm": 0.2365112155675888, "learning_rate": 1.6849350348345137e-05, "loss": 0.3958, "step": 10560 }, { "epoch": 2.171035049850961, "grad_norm": 0.12257271260023117, "learning_rate": 1.684155863972083e-05, "loss": 0.46, "step": 10561 }, { "epoch": 2.1712406208243396, "grad_norm": 0.2283942699432373, "learning_rate": 1.6833768318331313e-05, "loss": 0.388, "step": 10562 }, { "epoch": 2.171446191797718, "grad_norm": 0.22442100942134857, "learning_rate": 1.6825979384560385e-05, "loss": 0.3916, "step": 10563 }, { "epoch": 2.1716517627710967, "grad_norm": 0.12442784011363983, "learning_rate": 1.681819183879177e-05, "loss": 0.4635, "step": 10564 }, { "epoch": 2.1718573337444753, "grad_norm": 0.22854554653167725, "learning_rate": 1.681040568140912e-05, "loss": 0.379, "step": 10565 }, { "epoch": 2.172062904717854, "grad_norm": 0.12427257746458054, "learning_rate": 1.680262091279602e-05, "loss": 0.4719, "step": 10566 }, { "epoch": 2.1722684756912325, "grad_norm": 0.22989091277122498, "learning_rate": 1.6794837533335984e-05, "loss": 0.4118, "step": 10567 }, { "epoch": 2.172474046664611, "grad_norm": 0.23249632120132446, "learning_rate": 1.6787055543412484e-05, "loss": 0.3812, "step": 10568 }, { "epoch": 2.1726796176379897, "grad_norm": 0.21678483486175537, "learning_rate": 1.677927494340889e-05, "loss": 0.4007, "step": 10569 }, { "epoch": 2.1728851886113683, "grad_norm": 0.2254790961742401, "learning_rate": 1.677149573370852e-05, "loss": 0.395, "step": 10570 }, { "epoch": 2.1730907595847464, "grad_norm": 0.2205883264541626, "learning_rate": 1.6763717914694613e-05, "loss": 0.3865, "step": 10571 }, { "epoch": 2.173296330558125, "grad_norm": 0.12380865216255188, "learning_rate": 1.675594148675035e-05, "loss": 0.4542, "step": 10572 }, { "epoch": 2.1735019015315036, "grad_norm": 0.22934816777706146, "learning_rate": 1.6748166450258836e-05, "loss": 0.3885, "step": 10573 }, { "epoch": 2.173707472504882, "grad_norm": 0.2283497005701065, "learning_rate": 1.6740392805603097e-05, "loss": 0.385, "step": 10574 }, { "epoch": 2.1739130434782608, "grad_norm": 0.22790871560573578, "learning_rate": 1.6732620553166136e-05, "loss": 0.3862, "step": 10575 }, { "epoch": 2.1741186144516393, "grad_norm": 0.2244972586631775, "learning_rate": 1.6724849693330837e-05, "loss": 0.4012, "step": 10576 }, { "epoch": 2.174324185425018, "grad_norm": 0.23788417875766754, "learning_rate": 1.6717080226480034e-05, "loss": 0.4071, "step": 10577 }, { "epoch": 2.1745297563983965, "grad_norm": 0.22114843130111694, "learning_rate": 1.6709312152996484e-05, "loss": 0.3793, "step": 10578 }, { "epoch": 2.174735327371775, "grad_norm": 0.23666070401668549, "learning_rate": 1.6701545473262907e-05, "loss": 0.4066, "step": 10579 }, { "epoch": 2.1749408983451537, "grad_norm": 0.23616231977939606, "learning_rate": 1.669378018766192e-05, "loss": 0.4042, "step": 10580 }, { "epoch": 2.1751464693185323, "grad_norm": 0.2265489399433136, "learning_rate": 1.668601629657606e-05, "loss": 0.3877, "step": 10581 }, { "epoch": 2.175352040291911, "grad_norm": 0.223519966006279, "learning_rate": 1.6678253800387857e-05, "loss": 0.4095, "step": 10582 }, { "epoch": 2.1755576112652895, "grad_norm": 0.12714464962482452, "learning_rate": 1.6670492699479713e-05, "loss": 0.4789, "step": 10583 }, { "epoch": 2.175763182238668, "grad_norm": 0.22280433773994446, "learning_rate": 1.6662732994233978e-05, "loss": 0.3944, "step": 10584 }, { "epoch": 2.1759687532120466, "grad_norm": 0.2261977344751358, "learning_rate": 1.6654974685032947e-05, "loss": 0.3955, "step": 10585 }, { "epoch": 2.1761743241854252, "grad_norm": 0.23589631915092468, "learning_rate": 1.6647217772258825e-05, "loss": 0.3948, "step": 10586 }, { "epoch": 2.176379895158804, "grad_norm": 0.1299065500497818, "learning_rate": 1.6639462256293747e-05, "loss": 0.4561, "step": 10587 }, { "epoch": 2.176585466132182, "grad_norm": 0.24209356307983398, "learning_rate": 1.6631708137519825e-05, "loss": 0.4137, "step": 10588 }, { "epoch": 2.1767910371055605, "grad_norm": 0.2254961133003235, "learning_rate": 1.6623955416319047e-05, "loss": 0.3962, "step": 10589 }, { "epoch": 2.176996608078939, "grad_norm": 0.1276281327009201, "learning_rate": 1.661620409307336e-05, "loss": 0.4605, "step": 10590 }, { "epoch": 2.1772021790523177, "grad_norm": 0.22398579120635986, "learning_rate": 1.660845416816463e-05, "loss": 0.396, "step": 10591 }, { "epoch": 2.1774077500256963, "grad_norm": 0.22290287911891937, "learning_rate": 1.660070564197466e-05, "loss": 0.4096, "step": 10592 }, { "epoch": 2.177613320999075, "grad_norm": 0.22636477649211884, "learning_rate": 1.6592958514885183e-05, "loss": 0.3942, "step": 10593 }, { "epoch": 2.1778188919724535, "grad_norm": 0.21956631541252136, "learning_rate": 1.6585212787277854e-05, "loss": 0.4021, "step": 10594 }, { "epoch": 2.178024462945832, "grad_norm": 0.2394167184829712, "learning_rate": 1.6577468459534298e-05, "loss": 0.397, "step": 10595 }, { "epoch": 2.1782300339192107, "grad_norm": 0.22891393303871155, "learning_rate": 1.656972553203602e-05, "loss": 0.3938, "step": 10596 }, { "epoch": 2.1784356048925893, "grad_norm": 0.2175266295671463, "learning_rate": 1.6561984005164483e-05, "loss": 0.3902, "step": 10597 }, { "epoch": 2.178641175865968, "grad_norm": 0.22040759027004242, "learning_rate": 1.6554243879301076e-05, "loss": 0.3728, "step": 10598 }, { "epoch": 2.1788467468393464, "grad_norm": 0.22119790315628052, "learning_rate": 1.65465051548271e-05, "loss": 0.4136, "step": 10599 }, { "epoch": 2.179052317812725, "grad_norm": 0.22910022735595703, "learning_rate": 1.6538767832123844e-05, "loss": 0.4046, "step": 10600 }, { "epoch": 2.1792578887861036, "grad_norm": 0.129209503531456, "learning_rate": 1.653103191157247e-05, "loss": 0.439, "step": 10601 }, { "epoch": 2.179463459759482, "grad_norm": 0.23198646306991577, "learning_rate": 1.6523297393554072e-05, "loss": 0.4143, "step": 10602 }, { "epoch": 2.1796690307328603, "grad_norm": 0.22791431844234467, "learning_rate": 1.6515564278449728e-05, "loss": 0.3833, "step": 10603 }, { "epoch": 2.179874601706239, "grad_norm": 0.2255294919013977, "learning_rate": 1.6507832566640392e-05, "loss": 0.3928, "step": 10604 }, { "epoch": 2.1800801726796175, "grad_norm": 0.23165516555309296, "learning_rate": 1.6500102258506978e-05, "loss": 0.3914, "step": 10605 }, { "epoch": 2.180285743652996, "grad_norm": 0.2258346527814865, "learning_rate": 1.6492373354430316e-05, "loss": 0.3953, "step": 10606 }, { "epoch": 2.1804913146263747, "grad_norm": 0.22352395951747894, "learning_rate": 1.6484645854791174e-05, "loss": 0.3852, "step": 10607 }, { "epoch": 2.1806968855997533, "grad_norm": 0.22954273223876953, "learning_rate": 1.6476919759970236e-05, "loss": 0.4085, "step": 10608 }, { "epoch": 2.180902456573132, "grad_norm": 0.22188891470432281, "learning_rate": 1.6469195070348158e-05, "loss": 0.3917, "step": 10609 }, { "epoch": 2.1811080275465105, "grad_norm": 0.12909865379333496, "learning_rate": 1.6461471786305488e-05, "loss": 0.4633, "step": 10610 }, { "epoch": 2.181313598519889, "grad_norm": 0.2231685221195221, "learning_rate": 1.6453749908222718e-05, "loss": 0.3876, "step": 10611 }, { "epoch": 2.1815191694932676, "grad_norm": 0.22691339254379272, "learning_rate": 1.6446029436480263e-05, "loss": 0.3948, "step": 10612 }, { "epoch": 2.181724740466646, "grad_norm": 0.23698212206363678, "learning_rate": 1.643831037145847e-05, "loss": 0.3962, "step": 10613 }, { "epoch": 2.181930311440025, "grad_norm": 0.22960902750492096, "learning_rate": 1.6430592713537634e-05, "loss": 0.3989, "step": 10614 }, { "epoch": 2.1821358824134034, "grad_norm": 0.2320588082075119, "learning_rate": 1.642287646309795e-05, "loss": 0.392, "step": 10615 }, { "epoch": 2.182341453386782, "grad_norm": 0.560815155506134, "learning_rate": 1.641516162051958e-05, "loss": 0.3986, "step": 10616 }, { "epoch": 2.1825470243601606, "grad_norm": 0.12423614412546158, "learning_rate": 1.6407448186182598e-05, "loss": 0.4408, "step": 10617 }, { "epoch": 2.1827525953335387, "grad_norm": 0.2267366200685501, "learning_rate": 1.6399736160467e-05, "loss": 0.3849, "step": 10618 }, { "epoch": 2.1829581663069173, "grad_norm": 0.2252301126718521, "learning_rate": 1.6392025543752726e-05, "loss": 0.3939, "step": 10619 }, { "epoch": 2.183163737280296, "grad_norm": 0.1241535022854805, "learning_rate": 1.6384316336419625e-05, "loss": 0.4509, "step": 10620 }, { "epoch": 2.1833693082536745, "grad_norm": 0.22740307450294495, "learning_rate": 1.637660853884752e-05, "loss": 0.4052, "step": 10621 }, { "epoch": 2.183574879227053, "grad_norm": 0.2271934300661087, "learning_rate": 1.6368902151416132e-05, "loss": 0.3804, "step": 10622 }, { "epoch": 2.1837804502004317, "grad_norm": 0.23072363436222076, "learning_rate": 1.6361197174505098e-05, "loss": 0.3939, "step": 10623 }, { "epoch": 2.1839860211738102, "grad_norm": 0.2331043779850006, "learning_rate": 1.6353493608494032e-05, "loss": 0.3989, "step": 10624 }, { "epoch": 2.184191592147189, "grad_norm": 0.12475959211587906, "learning_rate": 1.634579145376245e-05, "loss": 0.4525, "step": 10625 }, { "epoch": 2.1843971631205674, "grad_norm": 0.22251753509044647, "learning_rate": 1.633809071068979e-05, "loss": 0.4049, "step": 10626 }, { "epoch": 2.184602734093946, "grad_norm": 0.22629208862781525, "learning_rate": 1.633039137965543e-05, "loss": 0.4039, "step": 10627 }, { "epoch": 2.1848083050673246, "grad_norm": 0.22912812232971191, "learning_rate": 1.632269346103869e-05, "loss": 0.4004, "step": 10628 }, { "epoch": 2.185013876040703, "grad_norm": 0.2214146852493286, "learning_rate": 1.6314996955218792e-05, "loss": 0.3727, "step": 10629 }, { "epoch": 2.1852194470140818, "grad_norm": 0.22701111435890198, "learning_rate": 1.6307301862574933e-05, "loss": 0.4044, "step": 10630 }, { "epoch": 2.1854250179874604, "grad_norm": 0.22968102991580963, "learning_rate": 1.6299608183486206e-05, "loss": 0.399, "step": 10631 }, { "epoch": 2.185630588960839, "grad_norm": 0.2261413037776947, "learning_rate": 1.6291915918331637e-05, "loss": 0.3978, "step": 10632 }, { "epoch": 2.185836159934217, "grad_norm": 0.2443215698003769, "learning_rate": 1.6284225067490187e-05, "loss": 0.3938, "step": 10633 }, { "epoch": 2.1860417309075957, "grad_norm": 0.1367214322090149, "learning_rate": 1.6276535631340756e-05, "loss": 0.459, "step": 10634 }, { "epoch": 2.1862473018809743, "grad_norm": 0.1239805743098259, "learning_rate": 1.6268847610262154e-05, "loss": 0.445, "step": 10635 }, { "epoch": 2.186452872854353, "grad_norm": 0.23008181154727936, "learning_rate": 1.626116100463313e-05, "loss": 0.3968, "step": 10636 }, { "epoch": 2.1866584438277314, "grad_norm": 0.22786974906921387, "learning_rate": 1.625347581483239e-05, "loss": 0.3968, "step": 10637 }, { "epoch": 2.18686401480111, "grad_norm": 0.2298787385225296, "learning_rate": 1.6245792041238542e-05, "loss": 0.3913, "step": 10638 }, { "epoch": 2.1870695857744886, "grad_norm": 0.23194655776023865, "learning_rate": 1.623810968423012e-05, "loss": 0.3976, "step": 10639 }, { "epoch": 2.187275156747867, "grad_norm": 0.23695392906665802, "learning_rate": 1.62304287441856e-05, "loss": 0.4161, "step": 10640 }, { "epoch": 2.187480727721246, "grad_norm": 0.22045163810253143, "learning_rate": 1.6222749221483375e-05, "loss": 0.412, "step": 10641 }, { "epoch": 2.1876862986946244, "grad_norm": 0.22696349024772644, "learning_rate": 1.62150711165018e-05, "loss": 0.3791, "step": 10642 }, { "epoch": 2.187891869668003, "grad_norm": 0.23293721675872803, "learning_rate": 1.6207394429619136e-05, "loss": 0.4014, "step": 10643 }, { "epoch": 2.1880974406413816, "grad_norm": 0.12806709110736847, "learning_rate": 1.619971916121356e-05, "loss": 0.449, "step": 10644 }, { "epoch": 2.18830301161476, "grad_norm": 0.21958725154399872, "learning_rate": 1.6192045311663218e-05, "loss": 0.3836, "step": 10645 }, { "epoch": 2.1885085825881387, "grad_norm": 0.22592249512672424, "learning_rate": 1.6184372881346154e-05, "loss": 0.3945, "step": 10646 }, { "epoch": 2.1887141535615173, "grad_norm": 0.12806597352027893, "learning_rate": 1.6176701870640362e-05, "loss": 0.4394, "step": 10647 }, { "epoch": 2.1889197245348955, "grad_norm": 0.2250743955373764, "learning_rate": 1.616903227992374e-05, "loss": 0.3952, "step": 10648 }, { "epoch": 2.189125295508274, "grad_norm": 0.1263757050037384, "learning_rate": 1.616136410957415e-05, "loss": 0.4591, "step": 10649 }, { "epoch": 2.1893308664816526, "grad_norm": 0.237161323428154, "learning_rate": 1.6153697359969344e-05, "loss": 0.4032, "step": 10650 }, { "epoch": 2.1895364374550312, "grad_norm": 0.22208333015441895, "learning_rate": 1.614603203148705e-05, "loss": 0.3927, "step": 10651 }, { "epoch": 2.18974200842841, "grad_norm": 0.22636909782886505, "learning_rate": 1.61383681245049e-05, "loss": 0.3784, "step": 10652 }, { "epoch": 2.1899475794017884, "grad_norm": 0.23345516622066498, "learning_rate": 1.6130705639400447e-05, "loss": 0.4156, "step": 10653 }, { "epoch": 2.190153150375167, "grad_norm": 0.2252190262079239, "learning_rate": 1.6123044576551202e-05, "loss": 0.3922, "step": 10654 }, { "epoch": 2.1903587213485456, "grad_norm": 0.23159563541412354, "learning_rate": 1.6115384936334575e-05, "loss": 0.4089, "step": 10655 }, { "epoch": 2.190564292321924, "grad_norm": 0.22487987577915192, "learning_rate": 1.6107726719127926e-05, "loss": 0.3992, "step": 10656 }, { "epoch": 2.1907698632953028, "grad_norm": 0.23709611594676971, "learning_rate": 1.6100069925308523e-05, "loss": 0.4198, "step": 10657 }, { "epoch": 2.1909754342686814, "grad_norm": 0.21871237456798553, "learning_rate": 1.609241455525361e-05, "loss": 0.4042, "step": 10658 }, { "epoch": 2.19118100524206, "grad_norm": 0.2315407693386078, "learning_rate": 1.6084760609340326e-05, "loss": 0.4062, "step": 10659 }, { "epoch": 2.1913865762154385, "grad_norm": 0.2263568639755249, "learning_rate": 1.6077108087945734e-05, "loss": 0.3908, "step": 10660 }, { "epoch": 2.191592147188817, "grad_norm": 0.12639762461185455, "learning_rate": 1.6069456991446842e-05, "loss": 0.4546, "step": 10661 }, { "epoch": 2.1917977181621957, "grad_norm": 0.2350437194108963, "learning_rate": 1.606180732022058e-05, "loss": 0.4115, "step": 10662 }, { "epoch": 2.192003289135574, "grad_norm": 0.21677015721797943, "learning_rate": 1.60541590746438e-05, "loss": 0.3724, "step": 10663 }, { "epoch": 2.1922088601089524, "grad_norm": 0.22756123542785645, "learning_rate": 1.6046512255093326e-05, "loss": 0.3916, "step": 10664 }, { "epoch": 2.192414431082331, "grad_norm": 0.12300966680049896, "learning_rate": 1.6038866861945847e-05, "loss": 0.4532, "step": 10665 }, { "epoch": 2.1926200020557096, "grad_norm": 0.23039010167121887, "learning_rate": 1.6031222895578052e-05, "loss": 0.3941, "step": 10666 }, { "epoch": 2.192825573029088, "grad_norm": 0.2256508469581604, "learning_rate": 1.6023580356366502e-05, "loss": 0.4022, "step": 10667 }, { "epoch": 2.193031144002467, "grad_norm": 0.21880964934825897, "learning_rate": 1.6015939244687717e-05, "loss": 0.3848, "step": 10668 }, { "epoch": 2.1932367149758454, "grad_norm": 0.23204973340034485, "learning_rate": 1.600829956091813e-05, "loss": 0.3865, "step": 10669 }, { "epoch": 2.193442285949224, "grad_norm": 0.24459494650363922, "learning_rate": 1.6000661305434108e-05, "loss": 0.3947, "step": 10670 }, { "epoch": 2.1936478569226026, "grad_norm": 0.23136425018310547, "learning_rate": 1.5993024478611972e-05, "loss": 0.3957, "step": 10671 }, { "epoch": 2.193853427895981, "grad_norm": 0.22914138436317444, "learning_rate": 1.5985389080827937e-05, "loss": 0.3889, "step": 10672 }, { "epoch": 2.1940589988693597, "grad_norm": 0.22302468121051788, "learning_rate": 1.5977755112458174e-05, "loss": 0.385, "step": 10673 }, { "epoch": 2.1942645698427383, "grad_norm": 0.2292277216911316, "learning_rate": 1.5970122573878766e-05, "loss": 0.4123, "step": 10674 }, { "epoch": 2.194470140816117, "grad_norm": 0.2244681715965271, "learning_rate": 1.5962491465465733e-05, "loss": 0.3681, "step": 10675 }, { "epoch": 2.1946757117894955, "grad_norm": 0.2233274132013321, "learning_rate": 1.5954861787595024e-05, "loss": 0.4046, "step": 10676 }, { "epoch": 2.194881282762874, "grad_norm": 0.23008307814598083, "learning_rate": 1.5947233540642505e-05, "loss": 0.408, "step": 10677 }, { "epoch": 2.1950868537362522, "grad_norm": 0.2235502302646637, "learning_rate": 1.593960672498401e-05, "loss": 0.3884, "step": 10678 }, { "epoch": 2.195292424709631, "grad_norm": 0.12918898463249207, "learning_rate": 1.5931981340995262e-05, "loss": 0.4728, "step": 10679 }, { "epoch": 2.1954979956830094, "grad_norm": 0.21759852766990662, "learning_rate": 1.5924357389051935e-05, "loss": 0.3975, "step": 10680 }, { "epoch": 2.195703566656388, "grad_norm": 0.22451691329479218, "learning_rate": 1.5916734869529616e-05, "loss": 0.3896, "step": 10681 }, { "epoch": 2.1959091376297666, "grad_norm": 0.13441641628742218, "learning_rate": 1.5909113782803837e-05, "loss": 0.4687, "step": 10682 }, { "epoch": 2.196114708603145, "grad_norm": 0.23042891919612885, "learning_rate": 1.5901494129250052e-05, "loss": 0.3967, "step": 10683 }, { "epoch": 2.1963202795765238, "grad_norm": 0.2289479672908783, "learning_rate": 1.589387590924363e-05, "loss": 0.3911, "step": 10684 }, { "epoch": 2.1965258505499023, "grad_norm": 0.22492031753063202, "learning_rate": 1.5886259123159917e-05, "loss": 0.3867, "step": 10685 }, { "epoch": 2.196731421523281, "grad_norm": 0.2289929836988449, "learning_rate": 1.5878643771374133e-05, "loss": 0.3915, "step": 10686 }, { "epoch": 2.1969369924966595, "grad_norm": 0.12365361303091049, "learning_rate": 1.5871029854261445e-05, "loss": 0.4289, "step": 10687 }, { "epoch": 2.197142563470038, "grad_norm": 0.21747228503227234, "learning_rate": 1.5863417372196988e-05, "loss": 0.401, "step": 10688 }, { "epoch": 2.1973481344434167, "grad_norm": 0.21652854979038239, "learning_rate": 1.585580632555577e-05, "loss": 0.3908, "step": 10689 }, { "epoch": 2.1975537054167953, "grad_norm": 0.22147879004478455, "learning_rate": 1.584819671471275e-05, "loss": 0.3968, "step": 10690 }, { "epoch": 2.197759276390174, "grad_norm": 0.2206578552722931, "learning_rate": 1.5840588540042816e-05, "loss": 0.3972, "step": 10691 }, { "epoch": 2.1979648473635525, "grad_norm": 0.23885060846805573, "learning_rate": 1.5832981801920806e-05, "loss": 0.385, "step": 10692 }, { "epoch": 2.1981704183369306, "grad_norm": 0.23165802657604218, "learning_rate": 1.582537650072145e-05, "loss": 0.3954, "step": 10693 }, { "epoch": 2.198375989310309, "grad_norm": 0.23803496360778809, "learning_rate": 1.5817772636819437e-05, "loss": 0.4089, "step": 10694 }, { "epoch": 2.198581560283688, "grad_norm": 0.22591203451156616, "learning_rate": 1.581017021058937e-05, "loss": 0.3965, "step": 10695 }, { "epoch": 2.1987871312570664, "grad_norm": 0.23487183451652527, "learning_rate": 1.5802569222405785e-05, "loss": 0.4041, "step": 10696 }, { "epoch": 2.198992702230445, "grad_norm": 0.12291015684604645, "learning_rate": 1.5794969672643143e-05, "loss": 0.4483, "step": 10697 }, { "epoch": 2.1991982732038236, "grad_norm": 0.2258739024400711, "learning_rate": 1.5787371561675826e-05, "loss": 0.3911, "step": 10698 }, { "epoch": 2.199403844177202, "grad_norm": 0.2271280735731125, "learning_rate": 1.5779774889878188e-05, "loss": 0.39, "step": 10699 }, { "epoch": 2.1996094151505807, "grad_norm": 0.12247934192419052, "learning_rate": 1.5772179657624468e-05, "loss": 0.4543, "step": 10700 }, { "epoch": 2.1998149861239593, "grad_norm": 0.22866493463516235, "learning_rate": 1.5764585865288846e-05, "loss": 0.3903, "step": 10701 }, { "epoch": 2.200020557097338, "grad_norm": 0.12255199253559113, "learning_rate": 1.5756993513245428e-05, "loss": 0.453, "step": 10702 }, { "epoch": 2.2002261280707165, "grad_norm": 0.2146882563829422, "learning_rate": 1.574940260186826e-05, "loss": 0.3789, "step": 10703 }, { "epoch": 2.200431699044095, "grad_norm": 0.23465701937675476, "learning_rate": 1.5741813131531313e-05, "loss": 0.3917, "step": 10704 }, { "epoch": 2.2006372700174737, "grad_norm": 0.2412889301776886, "learning_rate": 1.5734225102608464e-05, "loss": 0.4213, "step": 10705 }, { "epoch": 2.2008428409908523, "grad_norm": 0.22149762511253357, "learning_rate": 1.5726638515473566e-05, "loss": 0.3988, "step": 10706 }, { "epoch": 2.201048411964231, "grad_norm": 0.23268526792526245, "learning_rate": 1.571905337050037e-05, "loss": 0.3857, "step": 10707 }, { "epoch": 2.201253982937609, "grad_norm": 0.22317472100257874, "learning_rate": 1.571146966806254e-05, "loss": 0.3828, "step": 10708 }, { "epoch": 2.2014595539109876, "grad_norm": 0.22195008397102356, "learning_rate": 1.570388740853372e-05, "loss": 0.4056, "step": 10709 }, { "epoch": 2.201665124884366, "grad_norm": 0.21876020729541779, "learning_rate": 1.569630659228744e-05, "loss": 0.4002, "step": 10710 }, { "epoch": 2.2018706958577448, "grad_norm": 0.2204761803150177, "learning_rate": 1.5688727219697163e-05, "loss": 0.3963, "step": 10711 }, { "epoch": 2.2020762668311233, "grad_norm": 0.22541974484920502, "learning_rate": 1.5681149291136285e-05, "loss": 0.3829, "step": 10712 }, { "epoch": 2.202281837804502, "grad_norm": 0.22481369972229004, "learning_rate": 1.567357280697816e-05, "loss": 0.3834, "step": 10713 }, { "epoch": 2.2024874087778805, "grad_norm": 0.23171178996562958, "learning_rate": 1.5665997767596033e-05, "loss": 0.4008, "step": 10714 }, { "epoch": 2.202692979751259, "grad_norm": 0.22620131075382233, "learning_rate": 1.5658424173363085e-05, "loss": 0.3997, "step": 10715 }, { "epoch": 2.2028985507246377, "grad_norm": 0.22562332451343536, "learning_rate": 1.5650852024652435e-05, "loss": 0.4104, "step": 10716 }, { "epoch": 2.2031041216980163, "grad_norm": 0.2276526838541031, "learning_rate": 1.5643281321837135e-05, "loss": 0.392, "step": 10717 }, { "epoch": 2.203309692671395, "grad_norm": 0.12458810210227966, "learning_rate": 1.5635712065290146e-05, "loss": 0.4551, "step": 10718 }, { "epoch": 2.2035152636447735, "grad_norm": 0.23165149986743927, "learning_rate": 1.5628144255384365e-05, "loss": 0.3855, "step": 10719 }, { "epoch": 2.203720834618152, "grad_norm": 0.2240263819694519, "learning_rate": 1.562057789249264e-05, "loss": 0.3825, "step": 10720 }, { "epoch": 2.2039264055915306, "grad_norm": 0.21997642517089844, "learning_rate": 1.5613012976987728e-05, "loss": 0.3813, "step": 10721 }, { "epoch": 2.2041319765649092, "grad_norm": 1.4580494165420532, "learning_rate": 1.5605449509242312e-05, "loss": 0.408, "step": 10722 }, { "epoch": 2.2043375475382874, "grad_norm": 0.23071999847888947, "learning_rate": 1.5597887489629008e-05, "loss": 0.3983, "step": 10723 }, { "epoch": 2.204543118511666, "grad_norm": 0.22993268072605133, "learning_rate": 1.559032691852036e-05, "loss": 0.392, "step": 10724 }, { "epoch": 2.2047486894850445, "grad_norm": 0.12808802723884583, "learning_rate": 1.5582767796288852e-05, "loss": 0.4491, "step": 10725 }, { "epoch": 2.204954260458423, "grad_norm": 0.22585633397102356, "learning_rate": 1.5575210123306855e-05, "loss": 0.4, "step": 10726 }, { "epoch": 2.2051598314318017, "grad_norm": 0.12611474096775055, "learning_rate": 1.5567653899946745e-05, "loss": 0.4577, "step": 10727 }, { "epoch": 2.2053654024051803, "grad_norm": 0.17360465228557587, "learning_rate": 1.5560099126580757e-05, "loss": 0.4583, "step": 10728 }, { "epoch": 2.205570973378559, "grad_norm": 0.23249217867851257, "learning_rate": 1.5552545803581072e-05, "loss": 0.3971, "step": 10729 }, { "epoch": 2.2057765443519375, "grad_norm": 0.2386702597141266, "learning_rate": 1.5544993931319832e-05, "loss": 0.3891, "step": 10730 }, { "epoch": 2.205982115325316, "grad_norm": 0.12809514999389648, "learning_rate": 1.5537443510169068e-05, "loss": 0.4534, "step": 10731 }, { "epoch": 2.2061876862986947, "grad_norm": 0.2297258824110031, "learning_rate": 1.5529894540500755e-05, "loss": 0.3897, "step": 10732 }, { "epoch": 2.2063932572720732, "grad_norm": 0.22300571203231812, "learning_rate": 1.5522347022686782e-05, "loss": 0.3961, "step": 10733 }, { "epoch": 2.206598828245452, "grad_norm": 0.23077335953712463, "learning_rate": 1.5514800957099003e-05, "loss": 0.4094, "step": 10734 }, { "epoch": 2.2068043992188304, "grad_norm": 0.22444140911102295, "learning_rate": 1.550725634410917e-05, "loss": 0.4009, "step": 10735 }, { "epoch": 2.207009970192209, "grad_norm": 0.13065902888774872, "learning_rate": 1.549971318408897e-05, "loss": 0.4443, "step": 10736 }, { "epoch": 2.2072155411655876, "grad_norm": 0.12475431710481644, "learning_rate": 1.5492171477410013e-05, "loss": 0.4383, "step": 10737 }, { "epoch": 2.2074211121389657, "grad_norm": 0.23084284365177155, "learning_rate": 1.5484631224443852e-05, "loss": 0.4043, "step": 10738 }, { "epoch": 2.2076266831123443, "grad_norm": 0.12472715973854065, "learning_rate": 1.5477092425561953e-05, "loss": 0.4307, "step": 10739 }, { "epoch": 2.207832254085723, "grad_norm": 0.1253010481595993, "learning_rate": 1.546955508113571e-05, "loss": 0.4488, "step": 10740 }, { "epoch": 2.2080378250591015, "grad_norm": 0.12054693698883057, "learning_rate": 1.5462019191536478e-05, "loss": 0.4402, "step": 10741 }, { "epoch": 2.20824339603248, "grad_norm": 0.2258850783109665, "learning_rate": 1.5454484757135496e-05, "loss": 0.3804, "step": 10742 }, { "epoch": 2.2084489670058587, "grad_norm": 0.23322363197803497, "learning_rate": 1.5446951778303958e-05, "loss": 0.4058, "step": 10743 }, { "epoch": 2.2086545379792373, "grad_norm": 0.23911800980567932, "learning_rate": 1.543942025541297e-05, "loss": 0.3821, "step": 10744 }, { "epoch": 2.208860108952616, "grad_norm": 0.22474057972431183, "learning_rate": 1.5431890188833585e-05, "loss": 0.3981, "step": 10745 }, { "epoch": 2.2090656799259945, "grad_norm": 0.22120480239391327, "learning_rate": 1.5424361578936754e-05, "loss": 0.4036, "step": 10746 }, { "epoch": 2.209271250899373, "grad_norm": 0.23113922774791718, "learning_rate": 1.5416834426093406e-05, "loss": 0.3996, "step": 10747 }, { "epoch": 2.2094768218727516, "grad_norm": 0.23626331984996796, "learning_rate": 1.5409308730674354e-05, "loss": 0.409, "step": 10748 }, { "epoch": 2.20968239284613, "grad_norm": 0.22344759106636047, "learning_rate": 1.540178449305036e-05, "loss": 0.3952, "step": 10749 }, { "epoch": 2.209887963819509, "grad_norm": 0.23070107400417328, "learning_rate": 1.5394261713592094e-05, "loss": 0.3839, "step": 10750 }, { "epoch": 2.2100935347928874, "grad_norm": 0.22357220947742462, "learning_rate": 1.5386740392670165e-05, "loss": 0.3963, "step": 10751 }, { "epoch": 2.210299105766266, "grad_norm": 0.2235075831413269, "learning_rate": 1.5379220530655138e-05, "loss": 0.3847, "step": 10752 }, { "epoch": 2.2105046767396446, "grad_norm": 0.2250668853521347, "learning_rate": 1.5371702127917458e-05, "loss": 0.3854, "step": 10753 }, { "epoch": 2.2107102477130227, "grad_norm": 0.230119988322258, "learning_rate": 1.5364185184827543e-05, "loss": 0.3914, "step": 10754 }, { "epoch": 2.2109158186864013, "grad_norm": 0.22010499238967896, "learning_rate": 1.5356669701755708e-05, "loss": 0.4028, "step": 10755 }, { "epoch": 2.21112138965978, "grad_norm": 0.22333703935146332, "learning_rate": 1.5349155679072205e-05, "loss": 0.385, "step": 10756 }, { "epoch": 2.2113269606331585, "grad_norm": 0.22866930067539215, "learning_rate": 1.534164311714721e-05, "loss": 0.4027, "step": 10757 }, { "epoch": 2.211532531606537, "grad_norm": 0.22447089850902557, "learning_rate": 1.533413201635084e-05, "loss": 0.4108, "step": 10758 }, { "epoch": 2.2117381025799157, "grad_norm": 0.23292423784732819, "learning_rate": 1.5326622377053125e-05, "loss": 0.4173, "step": 10759 }, { "epoch": 2.2119436735532942, "grad_norm": 0.23067182302474976, "learning_rate": 1.5319114199624018e-05, "loss": 0.3871, "step": 10760 }, { "epoch": 2.212149244526673, "grad_norm": 0.13341167569160461, "learning_rate": 1.5311607484433443e-05, "loss": 0.4604, "step": 10761 }, { "epoch": 2.2123548155000514, "grad_norm": 0.2339571863412857, "learning_rate": 1.53041022318512e-05, "loss": 0.3879, "step": 10762 }, { "epoch": 2.21256038647343, "grad_norm": 0.22482730448246002, "learning_rate": 1.5296598442247045e-05, "loss": 0.4002, "step": 10763 }, { "epoch": 2.2127659574468086, "grad_norm": 0.2297281175851822, "learning_rate": 1.5289096115990654e-05, "loss": 0.4032, "step": 10764 }, { "epoch": 2.212971528420187, "grad_norm": 0.12835589051246643, "learning_rate": 1.5281595253451624e-05, "loss": 0.4497, "step": 10765 }, { "epoch": 2.2131770993935658, "grad_norm": 0.23261982202529907, "learning_rate": 1.52740958549995e-05, "loss": 0.4021, "step": 10766 }, { "epoch": 2.2133826703669444, "grad_norm": 0.22967736423015594, "learning_rate": 1.526659792100371e-05, "loss": 0.3974, "step": 10767 }, { "epoch": 2.213588241340323, "grad_norm": 0.1222897469997406, "learning_rate": 1.5259101451833683e-05, "loss": 0.454, "step": 10768 }, { "epoch": 2.2137938123137015, "grad_norm": 0.22212044894695282, "learning_rate": 1.5251606447858725e-05, "loss": 0.3908, "step": 10769 }, { "epoch": 2.2139993832870797, "grad_norm": 0.23276306688785553, "learning_rate": 1.5244112909448069e-05, "loss": 0.3877, "step": 10770 }, { "epoch": 2.2142049542604583, "grad_norm": 0.12715481221675873, "learning_rate": 1.5236620836970893e-05, "loss": 0.4706, "step": 10771 }, { "epoch": 2.214410525233837, "grad_norm": 0.22773075103759766, "learning_rate": 1.5229130230796281e-05, "loss": 0.4008, "step": 10772 }, { "epoch": 2.2146160962072154, "grad_norm": 0.23511482775211334, "learning_rate": 1.5221641091293283e-05, "loss": 0.4078, "step": 10773 }, { "epoch": 2.214821667180594, "grad_norm": 0.21598058938980103, "learning_rate": 1.521415341883085e-05, "loss": 0.3908, "step": 10774 }, { "epoch": 2.2150272381539726, "grad_norm": 0.23073440790176392, "learning_rate": 1.5206667213777846e-05, "loss": 0.404, "step": 10775 }, { "epoch": 2.215232809127351, "grad_norm": 0.22900259494781494, "learning_rate": 1.5199182476503105e-05, "loss": 0.3845, "step": 10776 }, { "epoch": 2.21543838010073, "grad_norm": 0.26081186532974243, "learning_rate": 1.519169920737536e-05, "loss": 0.397, "step": 10777 }, { "epoch": 2.2156439510741084, "grad_norm": 0.2252834439277649, "learning_rate": 1.5184217406763266e-05, "loss": 0.3678, "step": 10778 }, { "epoch": 2.215849522047487, "grad_norm": 0.2190970927476883, "learning_rate": 1.5176737075035423e-05, "loss": 0.3733, "step": 10779 }, { "epoch": 2.2160550930208656, "grad_norm": 0.23575487732887268, "learning_rate": 1.5169258212560354e-05, "loss": 0.4151, "step": 10780 }, { "epoch": 2.216260663994244, "grad_norm": 0.22723565995693207, "learning_rate": 1.5161780819706485e-05, "loss": 0.382, "step": 10781 }, { "epoch": 2.2164662349676227, "grad_norm": 0.23032769560813904, "learning_rate": 1.5154304896842231e-05, "loss": 0.3863, "step": 10782 }, { "epoch": 2.2166718059410013, "grad_norm": 0.2345583289861679, "learning_rate": 1.5146830444335872e-05, "loss": 0.4049, "step": 10783 }, { "epoch": 2.21687737691438, "grad_norm": 0.22362026572227478, "learning_rate": 1.5139357462555645e-05, "loss": 0.3943, "step": 10784 }, { "epoch": 2.217082947887758, "grad_norm": 0.23059040307998657, "learning_rate": 1.513188595186971e-05, "loss": 0.4008, "step": 10785 }, { "epoch": 2.2172885188611366, "grad_norm": 0.12331248074769974, "learning_rate": 1.5124415912646149e-05, "loss": 0.4494, "step": 10786 }, { "epoch": 2.2174940898345152, "grad_norm": 0.23354892432689667, "learning_rate": 1.5116947345252977e-05, "loss": 0.4016, "step": 10787 }, { "epoch": 2.217699660807894, "grad_norm": 0.232215017080307, "learning_rate": 1.5109480250058124e-05, "loss": 0.403, "step": 10788 }, { "epoch": 2.2179052317812724, "grad_norm": 0.22965744137763977, "learning_rate": 1.5102014627429483e-05, "loss": 0.4111, "step": 10789 }, { "epoch": 2.218110802754651, "grad_norm": 0.22863295674324036, "learning_rate": 1.5094550477734838e-05, "loss": 0.395, "step": 10790 }, { "epoch": 2.2183163737280296, "grad_norm": 0.22686706483364105, "learning_rate": 1.5087087801341914e-05, "loss": 0.4058, "step": 10791 }, { "epoch": 2.218521944701408, "grad_norm": 0.2347644418478012, "learning_rate": 1.5079626598618362e-05, "loss": 0.3953, "step": 10792 }, { "epoch": 2.2187275156747868, "grad_norm": 0.23546837270259857, "learning_rate": 1.5072166869931748e-05, "loss": 0.4049, "step": 10793 }, { "epoch": 2.2189330866481654, "grad_norm": 0.12171991914510727, "learning_rate": 1.5064708615649601e-05, "loss": 0.4516, "step": 10794 }, { "epoch": 2.219138657621544, "grad_norm": 0.23397013545036316, "learning_rate": 1.5057251836139343e-05, "loss": 0.3816, "step": 10795 }, { "epoch": 2.2193442285949225, "grad_norm": 0.22694621980190277, "learning_rate": 1.5049796531768323e-05, "loss": 0.3838, "step": 10796 }, { "epoch": 2.219549799568301, "grad_norm": 0.234305739402771, "learning_rate": 1.5042342702903859e-05, "loss": 0.3874, "step": 10797 }, { "epoch": 2.2197553705416797, "grad_norm": 0.2361372858285904, "learning_rate": 1.5034890349913142e-05, "loss": 0.3964, "step": 10798 }, { "epoch": 2.2199609415150583, "grad_norm": 0.23526331782341003, "learning_rate": 1.502743947316332e-05, "loss": 0.3981, "step": 10799 }, { "epoch": 2.2201665124884364, "grad_norm": 0.23586028814315796, "learning_rate": 1.501999007302147e-05, "loss": 0.4084, "step": 10800 }, { "epoch": 2.220372083461815, "grad_norm": 0.2271769642829895, "learning_rate": 1.5012542149854576e-05, "loss": 0.3905, "step": 10801 }, { "epoch": 2.2205776544351936, "grad_norm": 0.22880828380584717, "learning_rate": 1.5005095704029562e-05, "loss": 0.3896, "step": 10802 }, { "epoch": 2.220783225408572, "grad_norm": 0.2337990701198578, "learning_rate": 1.4997650735913297e-05, "loss": 0.3984, "step": 10803 }, { "epoch": 2.220988796381951, "grad_norm": 0.2161635160446167, "learning_rate": 1.499020724587255e-05, "loss": 0.4006, "step": 10804 }, { "epoch": 2.2211943673553294, "grad_norm": 0.22818011045455933, "learning_rate": 1.4982765234274027e-05, "loss": 0.3912, "step": 10805 }, { "epoch": 2.221399938328708, "grad_norm": 0.22331209480762482, "learning_rate": 1.4975324701484358e-05, "loss": 0.4113, "step": 10806 }, { "epoch": 2.2216055093020866, "grad_norm": 0.21700911223888397, "learning_rate": 1.4967885647870107e-05, "loss": 0.3738, "step": 10807 }, { "epoch": 2.221811080275465, "grad_norm": 0.12261340767145157, "learning_rate": 1.4960448073797765e-05, "loss": 0.4559, "step": 10808 }, { "epoch": 2.2220166512488437, "grad_norm": 0.22570718824863434, "learning_rate": 1.4953011979633725e-05, "loss": 0.4089, "step": 10809 }, { "epoch": 2.2222222222222223, "grad_norm": 0.22284522652626038, "learning_rate": 1.4945577365744356e-05, "loss": 0.406, "step": 10810 }, { "epoch": 2.222427793195601, "grad_norm": 0.2190810590982437, "learning_rate": 1.4938144232495923e-05, "loss": 0.396, "step": 10811 }, { "epoch": 2.2226333641689795, "grad_norm": 0.2320832461118698, "learning_rate": 1.4930712580254612e-05, "loss": 0.4115, "step": 10812 }, { "epoch": 2.222838935142358, "grad_norm": 0.12574470043182373, "learning_rate": 1.4923282409386543e-05, "loss": 0.4488, "step": 10813 }, { "epoch": 2.2230445061157367, "grad_norm": 0.21672125160694122, "learning_rate": 1.4915853720257762e-05, "loss": 0.4069, "step": 10814 }, { "epoch": 2.223250077089115, "grad_norm": 0.2291223555803299, "learning_rate": 1.490842651323427e-05, "loss": 0.4088, "step": 10815 }, { "epoch": 2.2234556480624934, "grad_norm": 0.23085300624370575, "learning_rate": 1.4901000788681959e-05, "loss": 0.3894, "step": 10816 }, { "epoch": 2.223661219035872, "grad_norm": 0.11973418295383453, "learning_rate": 1.489357654696664e-05, "loss": 0.4637, "step": 10817 }, { "epoch": 2.2238667900092506, "grad_norm": 0.2691250741481781, "learning_rate": 1.4886153788454096e-05, "loss": 0.4024, "step": 10818 }, { "epoch": 2.224072360982629, "grad_norm": 0.12348726391792297, "learning_rate": 1.4878732513510012e-05, "loss": 0.4423, "step": 10819 }, { "epoch": 2.2242779319560078, "grad_norm": 0.1290557086467743, "learning_rate": 1.4871312722499987e-05, "loss": 0.4628, "step": 10820 }, { "epoch": 2.2244835029293863, "grad_norm": 0.2316775619983673, "learning_rate": 1.4863894415789562e-05, "loss": 0.3948, "step": 10821 }, { "epoch": 2.224689073902765, "grad_norm": 0.2387668341398239, "learning_rate": 1.4856477593744187e-05, "loss": 0.379, "step": 10822 }, { "epoch": 2.2248946448761435, "grad_norm": 0.22780825197696686, "learning_rate": 1.4849062256729289e-05, "loss": 0.3708, "step": 10823 }, { "epoch": 2.225100215849522, "grad_norm": 0.22622719407081604, "learning_rate": 1.484164840511017e-05, "loss": 0.3871, "step": 10824 }, { "epoch": 2.2253057868229007, "grad_norm": 0.22779934108257294, "learning_rate": 1.4834236039252069e-05, "loss": 0.3736, "step": 10825 }, { "epoch": 2.2255113577962793, "grad_norm": 0.22025705873966217, "learning_rate": 1.4826825159520165e-05, "loss": 0.3883, "step": 10826 }, { "epoch": 2.225716928769658, "grad_norm": 0.21935100853443146, "learning_rate": 1.481941576627956e-05, "loss": 0.3932, "step": 10827 }, { "epoch": 2.2259224997430365, "grad_norm": 0.11909017711877823, "learning_rate": 1.4812007859895275e-05, "loss": 0.4316, "step": 10828 }, { "epoch": 2.226128070716415, "grad_norm": 0.2229301780462265, "learning_rate": 1.4804601440732245e-05, "loss": 0.3889, "step": 10829 }, { "epoch": 2.226333641689793, "grad_norm": 0.2314000278711319, "learning_rate": 1.479719650915539e-05, "loss": 0.4042, "step": 10830 }, { "epoch": 2.226539212663172, "grad_norm": 0.23769402503967285, "learning_rate": 1.4789793065529492e-05, "loss": 0.4003, "step": 10831 }, { "epoch": 2.2267447836365504, "grad_norm": 0.2327127605676651, "learning_rate": 1.478239111021929e-05, "loss": 0.3853, "step": 10832 }, { "epoch": 2.226950354609929, "grad_norm": 0.23596766591072083, "learning_rate": 1.4774990643589441e-05, "loss": 0.4041, "step": 10833 }, { "epoch": 2.2271559255833075, "grad_norm": 0.22967597842216492, "learning_rate": 1.476759166600453e-05, "loss": 0.413, "step": 10834 }, { "epoch": 2.227361496556686, "grad_norm": 0.223694309592247, "learning_rate": 1.476019417782907e-05, "loss": 0.3922, "step": 10835 }, { "epoch": 2.2275670675300647, "grad_norm": 0.22924546897411346, "learning_rate": 1.4752798179427489e-05, "loss": 0.3925, "step": 10836 }, { "epoch": 2.2277726385034433, "grad_norm": 0.2322525531053543, "learning_rate": 1.474540367116418e-05, "loss": 0.4093, "step": 10837 }, { "epoch": 2.227978209476822, "grad_norm": 0.22837835550308228, "learning_rate": 1.4738010653403414e-05, "loss": 0.3959, "step": 10838 }, { "epoch": 2.2281837804502005, "grad_norm": 0.13115087151527405, "learning_rate": 1.4730619126509427e-05, "loss": 0.4592, "step": 10839 }, { "epoch": 2.228389351423579, "grad_norm": 0.24123218655586243, "learning_rate": 1.472322909084636e-05, "loss": 0.389, "step": 10840 }, { "epoch": 2.2285949223969577, "grad_norm": 0.24346770346164703, "learning_rate": 1.4715840546778284e-05, "loss": 0.419, "step": 10841 }, { "epoch": 2.2288004933703363, "grad_norm": 0.2285340279340744, "learning_rate": 1.4708453494669196e-05, "loss": 0.4022, "step": 10842 }, { "epoch": 2.229006064343715, "grad_norm": 0.22701993584632874, "learning_rate": 1.4701067934883007e-05, "loss": 0.3926, "step": 10843 }, { "epoch": 2.2292116353170934, "grad_norm": 0.2268943190574646, "learning_rate": 1.4693683867783597e-05, "loss": 0.3891, "step": 10844 }, { "epoch": 2.2294172062904716, "grad_norm": 0.23047508299350739, "learning_rate": 1.468630129373473e-05, "loss": 0.3973, "step": 10845 }, { "epoch": 2.22962277726385, "grad_norm": 0.2280137687921524, "learning_rate": 1.4678920213100116e-05, "loss": 0.3851, "step": 10846 }, { "epoch": 2.2298283482372288, "grad_norm": 0.2208314836025238, "learning_rate": 1.4671540626243379e-05, "loss": 0.3931, "step": 10847 }, { "epoch": 2.2300339192106073, "grad_norm": 0.23788389563560486, "learning_rate": 1.4664162533528081e-05, "loss": 0.4042, "step": 10848 }, { "epoch": 2.230239490183986, "grad_norm": 0.2255765050649643, "learning_rate": 1.4656785935317708e-05, "loss": 0.3875, "step": 10849 }, { "epoch": 2.2304450611573645, "grad_norm": 0.22221685945987701, "learning_rate": 1.4649410831975656e-05, "loss": 0.3858, "step": 10850 }, { "epoch": 2.230650632130743, "grad_norm": 0.22361934185028076, "learning_rate": 1.4642037223865281e-05, "loss": 0.3891, "step": 10851 }, { "epoch": 2.2308562031041217, "grad_norm": 0.12343227863311768, "learning_rate": 1.4634665111349843e-05, "loss": 0.482, "step": 10852 }, { "epoch": 2.2310617740775003, "grad_norm": 0.12411545217037201, "learning_rate": 1.462729449479253e-05, "loss": 0.4664, "step": 10853 }, { "epoch": 2.231267345050879, "grad_norm": 0.2260737121105194, "learning_rate": 1.4619925374556457e-05, "loss": 0.392, "step": 10854 }, { "epoch": 2.2314729160242575, "grad_norm": 0.2308768928050995, "learning_rate": 1.461255775100466e-05, "loss": 0.4033, "step": 10855 }, { "epoch": 2.231678486997636, "grad_norm": 0.12042105197906494, "learning_rate": 1.460519162450011e-05, "loss": 0.4485, "step": 10856 }, { "epoch": 2.2318840579710146, "grad_norm": 0.22707884013652802, "learning_rate": 1.4597826995405697e-05, "loss": 0.3747, "step": 10857 }, { "epoch": 2.2320896289443932, "grad_norm": 0.23044802248477936, "learning_rate": 1.4590463864084258e-05, "loss": 0.3896, "step": 10858 }, { "epoch": 2.232295199917772, "grad_norm": 0.2284078150987625, "learning_rate": 1.458310223089853e-05, "loss": 0.3806, "step": 10859 }, { "epoch": 2.23250077089115, "grad_norm": 0.12638430297374725, "learning_rate": 1.4575742096211172e-05, "loss": 0.4579, "step": 10860 }, { "epoch": 2.2327063418645285, "grad_norm": 0.12327645719051361, "learning_rate": 1.4568383460384815e-05, "loss": 0.4572, "step": 10861 }, { "epoch": 2.232911912837907, "grad_norm": 0.22871337831020355, "learning_rate": 1.4561026323781969e-05, "loss": 0.3938, "step": 10862 }, { "epoch": 2.2331174838112857, "grad_norm": 0.1175784319639206, "learning_rate": 1.4553670686765082e-05, "loss": 0.4228, "step": 10863 }, { "epoch": 2.2333230547846643, "grad_norm": 0.23156176507472992, "learning_rate": 1.4546316549696521e-05, "loss": 0.3983, "step": 10864 }, { "epoch": 2.233528625758043, "grad_norm": 0.22325018048286438, "learning_rate": 1.453896391293862e-05, "loss": 0.4036, "step": 10865 }, { "epoch": 2.2337341967314215, "grad_norm": 0.2427932471036911, "learning_rate": 1.4531612776853592e-05, "loss": 0.3779, "step": 10866 }, { "epoch": 2.2339397677048, "grad_norm": 0.12050554901361465, "learning_rate": 1.452426314180359e-05, "loss": 0.4408, "step": 10867 }, { "epoch": 2.2341453386781787, "grad_norm": 0.2303098738193512, "learning_rate": 1.4516915008150703e-05, "loss": 0.3944, "step": 10868 }, { "epoch": 2.2343509096515572, "grad_norm": 0.22475799918174744, "learning_rate": 1.4509568376256933e-05, "loss": 0.3911, "step": 10869 }, { "epoch": 2.234556480624936, "grad_norm": 0.12232775241136551, "learning_rate": 1.4502223246484222e-05, "loss": 0.4503, "step": 10870 }, { "epoch": 2.2347620515983144, "grad_norm": 0.23218752443790436, "learning_rate": 1.4494879619194408e-05, "loss": 0.3916, "step": 10871 }, { "epoch": 2.234967622571693, "grad_norm": 0.22913837432861328, "learning_rate": 1.4487537494749308e-05, "loss": 0.3967, "step": 10872 }, { "epoch": 2.2351731935450716, "grad_norm": 0.22640950977802277, "learning_rate": 1.4480196873510623e-05, "loss": 0.3938, "step": 10873 }, { "epoch": 2.23537876451845, "grad_norm": 0.22983142733573914, "learning_rate": 1.4472857755839987e-05, "loss": 0.3957, "step": 10874 }, { "epoch": 2.2355843354918283, "grad_norm": 0.13250325620174408, "learning_rate": 1.4465520142098968e-05, "loss": 0.4521, "step": 10875 }, { "epoch": 2.235789906465207, "grad_norm": 0.12669454514980316, "learning_rate": 1.4458184032649049e-05, "loss": 0.4651, "step": 10876 }, { "epoch": 2.2359954774385855, "grad_norm": 0.22359710931777954, "learning_rate": 1.4450849427851654e-05, "loss": 0.3771, "step": 10877 }, { "epoch": 2.236201048411964, "grad_norm": 0.22868263721466064, "learning_rate": 1.4443516328068107e-05, "loss": 0.3723, "step": 10878 }, { "epoch": 2.2364066193853427, "grad_norm": 0.2262980043888092, "learning_rate": 1.4436184733659704e-05, "loss": 0.3886, "step": 10879 }, { "epoch": 2.2366121903587213, "grad_norm": 0.22829292714595795, "learning_rate": 1.4428854644987623e-05, "loss": 0.3879, "step": 10880 }, { "epoch": 2.2368177613321, "grad_norm": 0.22236782312393188, "learning_rate": 1.4421526062412972e-05, "loss": 0.3716, "step": 10881 }, { "epoch": 2.2370233323054785, "grad_norm": 0.2244395762681961, "learning_rate": 1.4414198986296825e-05, "loss": 0.3716, "step": 10882 }, { "epoch": 2.237228903278857, "grad_norm": 0.23614956438541412, "learning_rate": 1.4406873417000133e-05, "loss": 0.4046, "step": 10883 }, { "epoch": 2.2374344742522356, "grad_norm": 0.23262259364128113, "learning_rate": 1.4399549354883795e-05, "loss": 0.392, "step": 10884 }, { "epoch": 2.237640045225614, "grad_norm": 0.23623405396938324, "learning_rate": 1.439222680030862e-05, "loss": 0.4101, "step": 10885 }, { "epoch": 2.237845616198993, "grad_norm": 0.12626418471336365, "learning_rate": 1.4384905753635388e-05, "loss": 0.436, "step": 10886 }, { "epoch": 2.2380511871723714, "grad_norm": 0.2217606157064438, "learning_rate": 1.437758621522475e-05, "loss": 0.3971, "step": 10887 }, { "epoch": 2.23825675814575, "grad_norm": 0.22895729541778564, "learning_rate": 1.4370268185437314e-05, "loss": 0.4164, "step": 10888 }, { "epoch": 2.2384623291191286, "grad_norm": 0.26154306530952454, "learning_rate": 1.4362951664633601e-05, "loss": 0.411, "step": 10889 }, { "epoch": 2.2386679000925067, "grad_norm": 0.12071531265974045, "learning_rate": 1.4355636653174064e-05, "loss": 0.46, "step": 10890 }, { "epoch": 2.2388734710658853, "grad_norm": 0.23138496279716492, "learning_rate": 1.4348323151419076e-05, "loss": 0.3929, "step": 10891 }, { "epoch": 2.239079042039264, "grad_norm": 0.22143509984016418, "learning_rate": 1.4341011159728923e-05, "loss": 0.3937, "step": 10892 }, { "epoch": 2.2392846130126425, "grad_norm": 0.23120230436325073, "learning_rate": 1.433370067846387e-05, "loss": 0.4061, "step": 10893 }, { "epoch": 2.239490183986021, "grad_norm": 0.22361977398395538, "learning_rate": 1.4326391707984047e-05, "loss": 0.3993, "step": 10894 }, { "epoch": 2.2396957549593997, "grad_norm": 0.1270783543586731, "learning_rate": 1.431908424864954e-05, "loss": 0.424, "step": 10895 }, { "epoch": 2.2399013259327782, "grad_norm": 0.22819988429546356, "learning_rate": 1.4311778300820347e-05, "loss": 0.4009, "step": 10896 }, { "epoch": 2.240106896906157, "grad_norm": 0.22298060357570648, "learning_rate": 1.4304473864856404e-05, "loss": 0.3959, "step": 10897 }, { "epoch": 2.2403124678795354, "grad_norm": 0.22824987769126892, "learning_rate": 1.4297170941117544e-05, "loss": 0.4174, "step": 10898 }, { "epoch": 2.240518038852914, "grad_norm": 0.1287529617547989, "learning_rate": 1.4289869529963582e-05, "loss": 0.4321, "step": 10899 }, { "epoch": 2.2407236098262926, "grad_norm": 0.2339385449886322, "learning_rate": 1.428256963175421e-05, "loss": 0.4036, "step": 10900 }, { "epoch": 2.240929180799671, "grad_norm": 0.22810976207256317, "learning_rate": 1.4275271246849061e-05, "loss": 0.4073, "step": 10901 }, { "epoch": 2.2411347517730498, "grad_norm": 0.22102433443069458, "learning_rate": 1.4267974375607675e-05, "loss": 0.3761, "step": 10902 }, { "epoch": 2.2413403227464284, "grad_norm": 0.2228943556547165, "learning_rate": 1.4260679018389566e-05, "loss": 0.3958, "step": 10903 }, { "epoch": 2.241545893719807, "grad_norm": 0.22356650233268738, "learning_rate": 1.4253385175554126e-05, "loss": 0.3841, "step": 10904 }, { "epoch": 2.241751464693185, "grad_norm": 0.1219724789261818, "learning_rate": 1.4246092847460679e-05, "loss": 0.4373, "step": 10905 }, { "epoch": 2.2419570356665637, "grad_norm": 0.22389782965183258, "learning_rate": 1.42388020344685e-05, "loss": 0.3908, "step": 10906 }, { "epoch": 2.2421626066399423, "grad_norm": 0.22778619825839996, "learning_rate": 1.4231512736936774e-05, "loss": 0.4086, "step": 10907 }, { "epoch": 2.242368177613321, "grad_norm": 0.24095553159713745, "learning_rate": 1.4224224955224604e-05, "loss": 0.3859, "step": 10908 }, { "epoch": 2.2425737485866994, "grad_norm": 0.2397175282239914, "learning_rate": 1.4216938689691019e-05, "loss": 0.4006, "step": 10909 }, { "epoch": 2.242779319560078, "grad_norm": 0.22254031896591187, "learning_rate": 1.4209653940694986e-05, "loss": 0.4021, "step": 10910 }, { "epoch": 2.2429848905334566, "grad_norm": 0.12882784008979797, "learning_rate": 1.4202370708595396e-05, "loss": 0.4369, "step": 10911 }, { "epoch": 2.243190461506835, "grad_norm": 0.13095501065254211, "learning_rate": 1.4195088993751034e-05, "loss": 0.4539, "step": 10912 }, { "epoch": 2.243396032480214, "grad_norm": 0.2357592135667801, "learning_rate": 1.418780879652067e-05, "loss": 0.3915, "step": 10913 }, { "epoch": 2.2436016034535924, "grad_norm": 0.23308870196342468, "learning_rate": 1.4180530117262953e-05, "loss": 0.4003, "step": 10914 }, { "epoch": 2.243807174426971, "grad_norm": 0.22599655389785767, "learning_rate": 1.4173252956336463e-05, "loss": 0.3978, "step": 10915 }, { "epoch": 2.2440127454003496, "grad_norm": 0.23513002693653107, "learning_rate": 1.416597731409972e-05, "loss": 0.3943, "step": 10916 }, { "epoch": 2.244218316373728, "grad_norm": 0.1267446130514145, "learning_rate": 1.4158703190911157e-05, "loss": 0.4464, "step": 10917 }, { "epoch": 2.2444238873471067, "grad_norm": 0.22103582322597504, "learning_rate": 1.4151430587129133e-05, "loss": 0.3842, "step": 10918 }, { "epoch": 2.2446294583204853, "grad_norm": 0.2322588562965393, "learning_rate": 1.4144159503111928e-05, "loss": 0.4096, "step": 10919 }, { "epoch": 2.2448350292938635, "grad_norm": 0.1323188990354538, "learning_rate": 1.4136889939217776e-05, "loss": 0.4459, "step": 10920 }, { "epoch": 2.245040600267242, "grad_norm": 0.2242937535047531, "learning_rate": 1.41296218958048e-05, "loss": 0.3859, "step": 10921 }, { "epoch": 2.2452461712406206, "grad_norm": 0.22466784715652466, "learning_rate": 1.4122355373231073e-05, "loss": 0.3982, "step": 10922 }, { "epoch": 2.2454517422139992, "grad_norm": 0.22480922937393188, "learning_rate": 1.411509037185457e-05, "loss": 0.4073, "step": 10923 }, { "epoch": 2.245657313187378, "grad_norm": 0.12106183916330338, "learning_rate": 1.4107826892033194e-05, "loss": 0.4505, "step": 10924 }, { "epoch": 2.2458628841607564, "grad_norm": 0.2291100174188614, "learning_rate": 1.4100564934124812e-05, "loss": 0.3902, "step": 10925 }, { "epoch": 2.246068455134135, "grad_norm": 0.22419095039367676, "learning_rate": 1.409330449848716e-05, "loss": 0.3931, "step": 10926 }, { "epoch": 2.2462740261075136, "grad_norm": 0.22613660991191864, "learning_rate": 1.4086045585477947e-05, "loss": 0.3922, "step": 10927 }, { "epoch": 2.246479597080892, "grad_norm": 0.22982370853424072, "learning_rate": 1.407878819545478e-05, "loss": 0.399, "step": 10928 }, { "epoch": 2.2466851680542708, "grad_norm": 0.23034709692001343, "learning_rate": 1.4071532328775196e-05, "loss": 0.3812, "step": 10929 }, { "epoch": 2.2468907390276494, "grad_norm": 0.23110920190811157, "learning_rate": 1.4064277985796652e-05, "loss": 0.389, "step": 10930 }, { "epoch": 2.247096310001028, "grad_norm": 0.2307683825492859, "learning_rate": 1.4057025166876537e-05, "loss": 0.4113, "step": 10931 }, { "epoch": 2.2473018809744065, "grad_norm": 0.23556135594844818, "learning_rate": 1.4049773872372172e-05, "loss": 0.3884, "step": 10932 }, { "epoch": 2.247507451947785, "grad_norm": 0.230165496468544, "learning_rate": 1.4042524102640763e-05, "loss": 0.3956, "step": 10933 }, { "epoch": 2.2477130229211637, "grad_norm": 0.22927415370941162, "learning_rate": 1.4035275858039516e-05, "loss": 0.3868, "step": 10934 }, { "epoch": 2.2479185938945423, "grad_norm": 0.22793439030647278, "learning_rate": 1.4028029138925497e-05, "loss": 0.3894, "step": 10935 }, { "epoch": 2.248124164867921, "grad_norm": 0.2283446490764618, "learning_rate": 1.4020783945655724e-05, "loss": 0.3903, "step": 10936 }, { "epoch": 2.248329735841299, "grad_norm": 0.22100144624710083, "learning_rate": 1.4013540278587125e-05, "loss": 0.3942, "step": 10937 }, { "epoch": 2.2485353068146776, "grad_norm": 0.12830045819282532, "learning_rate": 1.4006298138076567e-05, "loss": 0.4512, "step": 10938 }, { "epoch": 2.248740877788056, "grad_norm": 0.2236565202474594, "learning_rate": 1.3999057524480838e-05, "loss": 0.4032, "step": 10939 }, { "epoch": 2.248946448761435, "grad_norm": 0.22065366804599762, "learning_rate": 1.3991818438156628e-05, "loss": 0.3844, "step": 10940 }, { "epoch": 2.2491520197348134, "grad_norm": 0.12815195322036743, "learning_rate": 1.3984580879460613e-05, "loss": 0.4361, "step": 10941 }, { "epoch": 2.249357590708192, "grad_norm": 0.23110713064670563, "learning_rate": 1.3977344848749327e-05, "loss": 0.3976, "step": 10942 }, { "epoch": 2.2495631616815706, "grad_norm": 0.23048558831214905, "learning_rate": 1.3970110346379258e-05, "loss": 0.3893, "step": 10943 }, { "epoch": 2.249768732654949, "grad_norm": 0.12720687687397003, "learning_rate": 1.3962877372706823e-05, "loss": 0.4534, "step": 10944 }, { "epoch": 2.2499743036283277, "grad_norm": 0.2292504608631134, "learning_rate": 1.3955645928088343e-05, "loss": 0.4032, "step": 10945 }, { "epoch": 2.2501798746017063, "grad_norm": 0.26804453134536743, "learning_rate": 1.3948416012880095e-05, "loss": 0.3896, "step": 10946 }, { "epoch": 2.250385445575085, "grad_norm": 0.24208854138851166, "learning_rate": 1.3941187627438255e-05, "loss": 0.4036, "step": 10947 }, { "epoch": 2.2505910165484635, "grad_norm": 0.21898695826530457, "learning_rate": 1.393396077211892e-05, "loss": 0.3847, "step": 10948 }, { "epoch": 2.250796587521842, "grad_norm": 0.24147653579711914, "learning_rate": 1.3926735447278149e-05, "loss": 0.399, "step": 10949 }, { "epoch": 2.2510021584952202, "grad_norm": 0.21761365234851837, "learning_rate": 1.3919511653271885e-05, "loss": 0.3977, "step": 10950 }, { "epoch": 2.2512077294685993, "grad_norm": 0.23133422434329987, "learning_rate": 1.3912289390456018e-05, "loss": 0.3832, "step": 10951 }, { "epoch": 2.2514133004419774, "grad_norm": 0.23142319917678833, "learning_rate": 1.3905068659186345e-05, "loss": 0.4152, "step": 10952 }, { "epoch": 2.251618871415356, "grad_norm": 0.21739207208156586, "learning_rate": 1.3897849459818602e-05, "loss": 0.3866, "step": 10953 }, { "epoch": 2.2518244423887346, "grad_norm": 0.2368880808353424, "learning_rate": 1.389063179270843e-05, "loss": 0.3975, "step": 10954 }, { "epoch": 2.252030013362113, "grad_norm": 0.22230856120586395, "learning_rate": 1.3883415658211439e-05, "loss": 0.3897, "step": 10955 }, { "epoch": 2.2522355843354918, "grad_norm": 0.2135685384273529, "learning_rate": 1.387620105668312e-05, "loss": 0.3953, "step": 10956 }, { "epoch": 2.2524411553088703, "grad_norm": 0.22502809762954712, "learning_rate": 1.3868987988478905e-05, "loss": 0.3849, "step": 10957 }, { "epoch": 2.252646726282249, "grad_norm": 0.12617872655391693, "learning_rate": 1.3861776453954141e-05, "loss": 0.4533, "step": 10958 }, { "epoch": 2.2528522972556275, "grad_norm": 0.12221905589103699, "learning_rate": 1.3854566453464114e-05, "loss": 0.4514, "step": 10959 }, { "epoch": 2.253057868229006, "grad_norm": 0.22371545433998108, "learning_rate": 1.3847357987364026e-05, "loss": 0.4013, "step": 10960 }, { "epoch": 2.2532634392023847, "grad_norm": 0.22430896759033203, "learning_rate": 1.3840151056008989e-05, "loss": 0.3826, "step": 10961 }, { "epoch": 2.2534690101757633, "grad_norm": 0.2251027673482895, "learning_rate": 1.3832945659754084e-05, "loss": 0.39, "step": 10962 }, { "epoch": 2.253674581149142, "grad_norm": 0.21788759529590607, "learning_rate": 1.3825741798954265e-05, "loss": 0.3945, "step": 10963 }, { "epoch": 2.2538801521225205, "grad_norm": 0.2384837120771408, "learning_rate": 1.3818539473964443e-05, "loss": 0.3972, "step": 10964 }, { "epoch": 2.254085723095899, "grad_norm": 0.2365540862083435, "learning_rate": 1.381133868513944e-05, "loss": 0.4051, "step": 10965 }, { "epoch": 2.2542912940692776, "grad_norm": 0.22459320724010468, "learning_rate": 1.3804139432833994e-05, "loss": 0.3933, "step": 10966 }, { "epoch": 2.254496865042656, "grad_norm": 0.2330470085144043, "learning_rate": 1.3796941717402797e-05, "loss": 0.4029, "step": 10967 }, { "epoch": 2.2547024360160344, "grad_norm": 0.2302565574645996, "learning_rate": 1.3789745539200443e-05, "loss": 0.3685, "step": 10968 }, { "epoch": 2.254908006989413, "grad_norm": 0.12435781210660934, "learning_rate": 1.3782550898581435e-05, "loss": 0.465, "step": 10969 }, { "epoch": 2.2551135779627915, "grad_norm": 0.22399941086769104, "learning_rate": 1.377535779590025e-05, "loss": 0.3946, "step": 10970 }, { "epoch": 2.25531914893617, "grad_norm": 0.2299404740333557, "learning_rate": 1.3768166231511242e-05, "loss": 0.3981, "step": 10971 }, { "epoch": 2.2555247199095487, "grad_norm": 0.22755853831768036, "learning_rate": 1.3760976205768704e-05, "loss": 0.4128, "step": 10972 }, { "epoch": 2.2557302908829273, "grad_norm": 0.23051007091999054, "learning_rate": 1.3753787719026858e-05, "loss": 0.4034, "step": 10973 }, { "epoch": 2.255935861856306, "grad_norm": 0.11795416474342346, "learning_rate": 1.3746600771639847e-05, "loss": 0.4349, "step": 10974 }, { "epoch": 2.2561414328296845, "grad_norm": 0.22369509935379028, "learning_rate": 1.3739415363961725e-05, "loss": 0.3958, "step": 10975 }, { "epoch": 2.256347003803063, "grad_norm": 0.224918395280838, "learning_rate": 1.3732231496346506e-05, "loss": 0.4054, "step": 10976 }, { "epoch": 2.2565525747764417, "grad_norm": 0.22502835094928741, "learning_rate": 1.3725049169148101e-05, "loss": 0.3986, "step": 10977 }, { "epoch": 2.2567581457498203, "grad_norm": 0.2298583686351776, "learning_rate": 1.3717868382720342e-05, "loss": 0.4023, "step": 10978 }, { "epoch": 2.256963716723199, "grad_norm": 0.2239440232515335, "learning_rate": 1.3710689137417002e-05, "loss": 0.3776, "step": 10979 }, { "epoch": 2.2571692876965774, "grad_norm": 0.12783947587013245, "learning_rate": 1.3703511433591756e-05, "loss": 0.4592, "step": 10980 }, { "epoch": 2.257374858669956, "grad_norm": 0.23055274784564972, "learning_rate": 1.3696335271598206e-05, "loss": 0.3805, "step": 10981 }, { "epoch": 2.257580429643334, "grad_norm": 0.22777009010314941, "learning_rate": 1.3689160651789923e-05, "loss": 0.3927, "step": 10982 }, { "epoch": 2.2577860006167128, "grad_norm": 0.2232956886291504, "learning_rate": 1.3681987574520346e-05, "loss": 0.3783, "step": 10983 }, { "epoch": 2.2579915715900913, "grad_norm": 0.2353593409061432, "learning_rate": 1.3674816040142864e-05, "loss": 0.4053, "step": 10984 }, { "epoch": 2.25819714256347, "grad_norm": 0.12569645047187805, "learning_rate": 1.3667646049010782e-05, "loss": 0.4533, "step": 10985 }, { "epoch": 2.2584027135368485, "grad_norm": 0.22515416145324707, "learning_rate": 1.3660477601477328e-05, "loss": 0.3757, "step": 10986 }, { "epoch": 2.258608284510227, "grad_norm": 0.13127067685127258, "learning_rate": 1.3653310697895652e-05, "loss": 0.4595, "step": 10987 }, { "epoch": 2.2588138554836057, "grad_norm": 0.22975093126296997, "learning_rate": 1.3646145338618855e-05, "loss": 0.3877, "step": 10988 }, { "epoch": 2.2590194264569843, "grad_norm": 0.22624441981315613, "learning_rate": 1.3638981523999929e-05, "loss": 0.379, "step": 10989 }, { "epoch": 2.259224997430363, "grad_norm": 0.12386941909790039, "learning_rate": 1.3631819254391793e-05, "loss": 0.4457, "step": 10990 }, { "epoch": 2.2594305684037415, "grad_norm": 0.2416963428258896, "learning_rate": 1.3624658530147319e-05, "loss": 0.3763, "step": 10991 }, { "epoch": 2.25963613937712, "grad_norm": 0.22425812482833862, "learning_rate": 1.3617499351619269e-05, "loss": 0.3828, "step": 10992 }, { "epoch": 2.2598417103504986, "grad_norm": 0.13300848007202148, "learning_rate": 1.3610341719160347e-05, "loss": 0.4532, "step": 10993 }, { "epoch": 2.260047281323877, "grad_norm": 0.22609826922416687, "learning_rate": 1.3603185633123177e-05, "loss": 0.3796, "step": 10994 }, { "epoch": 2.260252852297256, "grad_norm": 0.22295403480529785, "learning_rate": 1.3596031093860283e-05, "loss": 0.4128, "step": 10995 }, { "epoch": 2.2604584232706344, "grad_norm": 0.22617916762828827, "learning_rate": 1.3588878101724169e-05, "loss": 0.4004, "step": 10996 }, { "epoch": 2.2606639942440125, "grad_norm": 0.23671671748161316, "learning_rate": 1.3581726657067217e-05, "loss": 0.3947, "step": 10997 }, { "epoch": 2.260869565217391, "grad_norm": 0.2252146303653717, "learning_rate": 1.357457676024175e-05, "loss": 0.3923, "step": 10998 }, { "epoch": 2.2610751361907697, "grad_norm": 0.2305798977613449, "learning_rate": 1.3567428411599997e-05, "loss": 0.4119, "step": 10999 }, { "epoch": 2.2612807071641483, "grad_norm": 0.23965519666671753, "learning_rate": 1.3560281611494131e-05, "loss": 0.3992, "step": 11000 }, { "epoch": 2.261486278137527, "grad_norm": 0.22159597277641296, "learning_rate": 1.355313636027624e-05, "loss": 0.3947, "step": 11001 }, { "epoch": 2.2616918491109055, "grad_norm": 0.23163023591041565, "learning_rate": 1.3545992658298328e-05, "loss": 0.3794, "step": 11002 }, { "epoch": 2.261897420084284, "grad_norm": 0.2376321256160736, "learning_rate": 1.3538850505912354e-05, "loss": 0.3868, "step": 11003 }, { "epoch": 2.2621029910576627, "grad_norm": 0.22760237753391266, "learning_rate": 1.3531709903470169e-05, "loss": 0.3917, "step": 11004 }, { "epoch": 2.2623085620310412, "grad_norm": 0.22676926851272583, "learning_rate": 1.3524570851323556e-05, "loss": 0.3942, "step": 11005 }, { "epoch": 2.26251413300442, "grad_norm": 0.22704067826271057, "learning_rate": 1.351743334982422e-05, "loss": 0.3709, "step": 11006 }, { "epoch": 2.2627197039777984, "grad_norm": 0.24701926112174988, "learning_rate": 1.3510297399323792e-05, "loss": 0.3939, "step": 11007 }, { "epoch": 2.262925274951177, "grad_norm": 0.2252301573753357, "learning_rate": 1.3503163000173827e-05, "loss": 0.373, "step": 11008 }, { "epoch": 2.2631308459245556, "grad_norm": 0.2303270697593689, "learning_rate": 1.3496030152725793e-05, "loss": 0.4049, "step": 11009 }, { "epoch": 2.263336416897934, "grad_norm": 0.22634254395961761, "learning_rate": 1.3488898857331116e-05, "loss": 0.3793, "step": 11010 }, { "epoch": 2.2635419878713128, "grad_norm": 0.231819748878479, "learning_rate": 1.3481769114341098e-05, "loss": 0.3854, "step": 11011 }, { "epoch": 2.263747558844691, "grad_norm": 0.12441035360097885, "learning_rate": 1.3474640924107014e-05, "loss": 0.4482, "step": 11012 }, { "epoch": 2.2639531298180695, "grad_norm": 0.23297782242298126, "learning_rate": 1.3467514286980024e-05, "loss": 0.3978, "step": 11013 }, { "epoch": 2.264158700791448, "grad_norm": 0.23407147824764252, "learning_rate": 1.346038920331122e-05, "loss": 0.3915, "step": 11014 }, { "epoch": 2.2643642717648267, "grad_norm": 0.22615815699100494, "learning_rate": 1.3453265673451623e-05, "loss": 0.3919, "step": 11015 }, { "epoch": 2.2645698427382053, "grad_norm": 0.23967291414737701, "learning_rate": 1.3446143697752166e-05, "loss": 0.3988, "step": 11016 }, { "epoch": 2.264775413711584, "grad_norm": 0.2341252863407135, "learning_rate": 1.3439023276563739e-05, "loss": 0.363, "step": 11017 }, { "epoch": 2.2649809846849625, "grad_norm": 0.22647178173065186, "learning_rate": 1.3431904410237122e-05, "loss": 0.3922, "step": 11018 }, { "epoch": 2.265186555658341, "grad_norm": 0.2393738180398941, "learning_rate": 1.3424787099123023e-05, "loss": 0.3874, "step": 11019 }, { "epoch": 2.2653921266317196, "grad_norm": 0.23167793452739716, "learning_rate": 1.3417671343572087e-05, "loss": 0.3921, "step": 11020 }, { "epoch": 2.265597697605098, "grad_norm": 0.2206806093454361, "learning_rate": 1.3410557143934864e-05, "loss": 0.3988, "step": 11021 }, { "epoch": 2.265803268578477, "grad_norm": 0.22465433180332184, "learning_rate": 1.340344450056184e-05, "loss": 0.3896, "step": 11022 }, { "epoch": 2.2660088395518554, "grad_norm": 0.22498202323913574, "learning_rate": 1.3396333413803412e-05, "loss": 0.3902, "step": 11023 }, { "epoch": 2.266214410525234, "grad_norm": 0.23176932334899902, "learning_rate": 1.3389223884009937e-05, "loss": 0.4043, "step": 11024 }, { "epoch": 2.2664199814986126, "grad_norm": 0.22066771984100342, "learning_rate": 1.3382115911531653e-05, "loss": 0.3588, "step": 11025 }, { "epoch": 2.266625552471991, "grad_norm": 0.23479969799518585, "learning_rate": 1.3375009496718729e-05, "loss": 0.4034, "step": 11026 }, { "epoch": 2.2668311234453693, "grad_norm": 0.21714085340499878, "learning_rate": 1.336790463992128e-05, "loss": 0.4034, "step": 11027 }, { "epoch": 2.267036694418748, "grad_norm": 0.22929847240447998, "learning_rate": 1.336080134148932e-05, "loss": 0.4047, "step": 11028 }, { "epoch": 2.2672422653921265, "grad_norm": 0.23881329596042633, "learning_rate": 1.3353699601772797e-05, "loss": 0.3813, "step": 11029 }, { "epoch": 2.267447836365505, "grad_norm": 0.22318050265312195, "learning_rate": 1.3346599421121562e-05, "loss": 0.4027, "step": 11030 }, { "epoch": 2.2676534073388837, "grad_norm": 0.21505969762802124, "learning_rate": 1.3339500799885443e-05, "loss": 0.3957, "step": 11031 }, { "epoch": 2.2678589783122622, "grad_norm": 0.22498784959316254, "learning_rate": 1.3332403738414138e-05, "loss": 0.3994, "step": 11032 }, { "epoch": 2.268064549285641, "grad_norm": 0.23193588852882385, "learning_rate": 1.3325308237057274e-05, "loss": 0.3767, "step": 11033 }, { "epoch": 2.2682701202590194, "grad_norm": 0.2315264791250229, "learning_rate": 1.3318214296164444e-05, "loss": 0.4012, "step": 11034 }, { "epoch": 2.268475691232398, "grad_norm": 0.2320316731929779, "learning_rate": 1.3311121916085105e-05, "loss": 0.3979, "step": 11035 }, { "epoch": 2.2686812622057766, "grad_norm": 0.22784501314163208, "learning_rate": 1.3304031097168684e-05, "loss": 0.3942, "step": 11036 }, { "epoch": 2.268886833179155, "grad_norm": 0.22963948547840118, "learning_rate": 1.329694183976449e-05, "loss": 0.3872, "step": 11037 }, { "epoch": 2.2690924041525338, "grad_norm": 0.2397637516260147, "learning_rate": 1.32898541442218e-05, "loss": 0.4042, "step": 11038 }, { "epoch": 2.2692979751259124, "grad_norm": 0.22877174615859985, "learning_rate": 1.3282768010889788e-05, "loss": 0.39, "step": 11039 }, { "epoch": 2.269503546099291, "grad_norm": 0.21806636452674866, "learning_rate": 1.3275683440117551e-05, "loss": 0.3721, "step": 11040 }, { "epoch": 2.2697091170726695, "grad_norm": 0.22859534621238708, "learning_rate": 1.3268600432254108e-05, "loss": 0.4001, "step": 11041 }, { "epoch": 2.2699146880460477, "grad_norm": 0.22555097937583923, "learning_rate": 1.3261518987648413e-05, "loss": 0.3969, "step": 11042 }, { "epoch": 2.2701202590194263, "grad_norm": 0.22480298578739166, "learning_rate": 1.3254439106649332e-05, "loss": 0.3929, "step": 11043 }, { "epoch": 2.270325829992805, "grad_norm": 0.13393786549568176, "learning_rate": 1.324736078960564e-05, "loss": 0.4585, "step": 11044 }, { "epoch": 2.2705314009661834, "grad_norm": 0.22970856726169586, "learning_rate": 1.324028403686609e-05, "loss": 0.4069, "step": 11045 }, { "epoch": 2.270736971939562, "grad_norm": 0.22466929256916046, "learning_rate": 1.3233208848779298e-05, "loss": 0.3929, "step": 11046 }, { "epoch": 2.2709425429129406, "grad_norm": 0.12328503280878067, "learning_rate": 1.3226135225693829e-05, "loss": 0.4301, "step": 11047 }, { "epoch": 2.271148113886319, "grad_norm": 0.2344934195280075, "learning_rate": 1.3219063167958165e-05, "loss": 0.3806, "step": 11048 }, { "epoch": 2.271353684859698, "grad_norm": 0.23457783460617065, "learning_rate": 1.3211992675920716e-05, "loss": 0.3918, "step": 11049 }, { "epoch": 2.2715592558330764, "grad_norm": 0.12788406014442444, "learning_rate": 1.3204923749929811e-05, "loss": 0.4623, "step": 11050 }, { "epoch": 2.271764826806455, "grad_norm": 0.12366097420454025, "learning_rate": 1.319785639033369e-05, "loss": 0.431, "step": 11051 }, { "epoch": 2.2719703977798336, "grad_norm": 0.22478674352169037, "learning_rate": 1.3190790597480558e-05, "loss": 0.4044, "step": 11052 }, { "epoch": 2.272175968753212, "grad_norm": 0.2239609956741333, "learning_rate": 1.3183726371718493e-05, "loss": 0.3959, "step": 11053 }, { "epoch": 2.2723815397265907, "grad_norm": 0.22685250639915466, "learning_rate": 1.3176663713395506e-05, "loss": 0.4002, "step": 11054 }, { "epoch": 2.2725871106999693, "grad_norm": 0.2281496375799179, "learning_rate": 1.3169602622859576e-05, "loss": 0.3986, "step": 11055 }, { "epoch": 2.272792681673348, "grad_norm": 0.23187507688999176, "learning_rate": 1.3162543100458542e-05, "loss": 0.4239, "step": 11056 }, { "epoch": 2.272998252646726, "grad_norm": 0.2259424477815628, "learning_rate": 1.3155485146540192e-05, "loss": 0.381, "step": 11057 }, { "epoch": 2.273203823620105, "grad_norm": 0.23765668272972107, "learning_rate": 1.3148428761452263e-05, "loss": 0.4185, "step": 11058 }, { "epoch": 2.2734093945934832, "grad_norm": 0.23085662722587585, "learning_rate": 1.3141373945542375e-05, "loss": 0.4, "step": 11059 }, { "epoch": 2.273614965566862, "grad_norm": 0.22228921949863434, "learning_rate": 1.3134320699158083e-05, "loss": 0.3736, "step": 11060 }, { "epoch": 2.2738205365402404, "grad_norm": 0.21951285004615784, "learning_rate": 1.3127269022646872e-05, "loss": 0.3928, "step": 11061 }, { "epoch": 2.274026107513619, "grad_norm": 0.1213352307677269, "learning_rate": 1.3120218916356144e-05, "loss": 0.4417, "step": 11062 }, { "epoch": 2.2742316784869976, "grad_norm": 0.23710954189300537, "learning_rate": 1.3113170380633223e-05, "loss": 0.3963, "step": 11063 }, { "epoch": 2.274437249460376, "grad_norm": 0.23138689994812012, "learning_rate": 1.310612341582535e-05, "loss": 0.3926, "step": 11064 }, { "epoch": 2.2746428204337548, "grad_norm": 0.12516102194786072, "learning_rate": 1.309907802227971e-05, "loss": 0.4632, "step": 11065 }, { "epoch": 2.2748483914071334, "grad_norm": 0.1229373887181282, "learning_rate": 1.3092034200343395e-05, "loss": 0.4587, "step": 11066 }, { "epoch": 2.275053962380512, "grad_norm": 0.12089274078607559, "learning_rate": 1.308499195036342e-05, "loss": 0.4485, "step": 11067 }, { "epoch": 2.2752595333538905, "grad_norm": 0.23402529954910278, "learning_rate": 1.3077951272686716e-05, "loss": 0.4031, "step": 11068 }, { "epoch": 2.275465104327269, "grad_norm": 0.12246517091989517, "learning_rate": 1.3070912167660153e-05, "loss": 0.4518, "step": 11069 }, { "epoch": 2.2756706753006477, "grad_norm": 0.22479888796806335, "learning_rate": 1.3063874635630514e-05, "loss": 0.4006, "step": 11070 }, { "epoch": 2.2758762462740263, "grad_norm": 0.2248338758945465, "learning_rate": 1.3056838676944483e-05, "loss": 0.3937, "step": 11071 }, { "epoch": 2.2760818172474044, "grad_norm": 0.23100706934928894, "learning_rate": 1.3049804291948727e-05, "loss": 0.3983, "step": 11072 }, { "epoch": 2.2762873882207835, "grad_norm": 0.23669414222240448, "learning_rate": 1.3042771480989777e-05, "loss": 0.4027, "step": 11073 }, { "epoch": 2.2764929591941616, "grad_norm": 0.1265943944454193, "learning_rate": 1.303574024441411e-05, "loss": 0.4579, "step": 11074 }, { "epoch": 2.27669853016754, "grad_norm": 0.23661333322525024, "learning_rate": 1.3028710582568104e-05, "loss": 0.3944, "step": 11075 }, { "epoch": 2.276904101140919, "grad_norm": 0.1238350123167038, "learning_rate": 1.3021682495798108e-05, "loss": 0.4527, "step": 11076 }, { "epoch": 2.2771096721142974, "grad_norm": 0.23075202107429504, "learning_rate": 1.3014655984450351e-05, "loss": 0.4139, "step": 11077 }, { "epoch": 2.277315243087676, "grad_norm": 0.23109117150306702, "learning_rate": 1.300763104887098e-05, "loss": 0.3795, "step": 11078 }, { "epoch": 2.2775208140610546, "grad_norm": 0.13491906225681305, "learning_rate": 1.300060768940611e-05, "loss": 0.4503, "step": 11079 }, { "epoch": 2.277726385034433, "grad_norm": 0.22590011358261108, "learning_rate": 1.2993585906401735e-05, "loss": 0.3878, "step": 11080 }, { "epoch": 2.2779319560078117, "grad_norm": 0.23638883233070374, "learning_rate": 1.2986565700203778e-05, "loss": 0.3989, "step": 11081 }, { "epoch": 2.2781375269811903, "grad_norm": 0.2324167639017105, "learning_rate": 1.2979547071158106e-05, "loss": 0.3983, "step": 11082 }, { "epoch": 2.278343097954569, "grad_norm": 0.22499267756938934, "learning_rate": 1.2972530019610482e-05, "loss": 0.3917, "step": 11083 }, { "epoch": 2.2785486689279475, "grad_norm": 0.23397715389728546, "learning_rate": 1.2965514545906612e-05, "loss": 0.4039, "step": 11084 }, { "epoch": 2.278754239901326, "grad_norm": 0.12136294692754745, "learning_rate": 1.2958500650392098e-05, "loss": 0.4592, "step": 11085 }, { "epoch": 2.2789598108747047, "grad_norm": 0.23275341093540192, "learning_rate": 1.2951488333412505e-05, "loss": 0.3907, "step": 11086 }, { "epoch": 2.279165381848083, "grad_norm": 0.23098520934581757, "learning_rate": 1.294447759531329e-05, "loss": 0.3933, "step": 11087 }, { "epoch": 2.279370952821462, "grad_norm": 0.2239454835653305, "learning_rate": 1.2937468436439835e-05, "loss": 0.3851, "step": 11088 }, { "epoch": 2.27957652379484, "grad_norm": 0.23332616686820984, "learning_rate": 1.2930460857137452e-05, "loss": 0.4186, "step": 11089 }, { "epoch": 2.2797820947682186, "grad_norm": 0.22289900481700897, "learning_rate": 1.2923454857751368e-05, "loss": 0.3918, "step": 11090 }, { "epoch": 2.279987665741597, "grad_norm": 0.11850762367248535, "learning_rate": 1.2916450438626742e-05, "loss": 0.4475, "step": 11091 }, { "epoch": 2.2801932367149758, "grad_norm": 0.22523003816604614, "learning_rate": 1.2909447600108626e-05, "loss": 0.3886, "step": 11092 }, { "epoch": 2.2803988076883543, "grad_norm": 0.23885266482830048, "learning_rate": 1.2902446342542053e-05, "loss": 0.4051, "step": 11093 }, { "epoch": 2.280604378661733, "grad_norm": 0.2248595505952835, "learning_rate": 1.2895446666271926e-05, "loss": 0.3843, "step": 11094 }, { "epoch": 2.2808099496351115, "grad_norm": 0.23855264484882355, "learning_rate": 1.2888448571643081e-05, "loss": 0.3936, "step": 11095 }, { "epoch": 2.28101552060849, "grad_norm": 0.2420293390750885, "learning_rate": 1.2881452059000287e-05, "loss": 0.3967, "step": 11096 }, { "epoch": 2.2812210915818687, "grad_norm": 0.22361691296100616, "learning_rate": 1.2874457128688216e-05, "loss": 0.3815, "step": 11097 }, { "epoch": 2.2814266625552473, "grad_norm": 0.13447174429893494, "learning_rate": 1.28674637810515e-05, "loss": 0.4621, "step": 11098 }, { "epoch": 2.281632233528626, "grad_norm": 0.23001371324062347, "learning_rate": 1.2860472016434645e-05, "loss": 0.3698, "step": 11099 }, { "epoch": 2.2818378045020045, "grad_norm": 0.2274404913187027, "learning_rate": 1.2853481835182129e-05, "loss": 0.3959, "step": 11100 }, { "epoch": 2.282043375475383, "grad_norm": 0.23622088134288788, "learning_rate": 1.2846493237638308e-05, "loss": 0.4038, "step": 11101 }, { "epoch": 2.282248946448761, "grad_norm": 0.11896710842847824, "learning_rate": 1.283950622414748e-05, "loss": 0.4503, "step": 11102 }, { "epoch": 2.2824545174221402, "grad_norm": 0.23470290005207062, "learning_rate": 1.2832520795053865e-05, "loss": 0.3857, "step": 11103 }, { "epoch": 2.2826600883955184, "grad_norm": 0.2171606570482254, "learning_rate": 1.2825536950701594e-05, "loss": 0.4002, "step": 11104 }, { "epoch": 2.282865659368897, "grad_norm": 0.23823009431362152, "learning_rate": 1.281855469143474e-05, "loss": 0.3899, "step": 11105 }, { "epoch": 2.2830712303422755, "grad_norm": 0.22637523710727692, "learning_rate": 1.2811574017597265e-05, "loss": 0.3961, "step": 11106 }, { "epoch": 2.283276801315654, "grad_norm": 0.23832228779792786, "learning_rate": 1.2804594929533107e-05, "loss": 0.4002, "step": 11107 }, { "epoch": 2.2834823722890327, "grad_norm": 0.22340717911720276, "learning_rate": 1.2797617427586071e-05, "loss": 0.3843, "step": 11108 }, { "epoch": 2.2836879432624113, "grad_norm": 0.2311078906059265, "learning_rate": 1.2790641512099914e-05, "loss": 0.3848, "step": 11109 }, { "epoch": 2.28389351423579, "grad_norm": 0.1308235377073288, "learning_rate": 1.2783667183418299e-05, "loss": 0.4372, "step": 11110 }, { "epoch": 2.2840990852091685, "grad_norm": 0.22774946689605713, "learning_rate": 1.2776694441884828e-05, "loss": 0.4162, "step": 11111 }, { "epoch": 2.284304656182547, "grad_norm": 0.23029407858848572, "learning_rate": 1.2769723287843009e-05, "loss": 0.4024, "step": 11112 }, { "epoch": 2.2845102271559257, "grad_norm": 0.126814067363739, "learning_rate": 1.2762753721636263e-05, "loss": 0.4453, "step": 11113 }, { "epoch": 2.2847157981293043, "grad_norm": 0.1285434365272522, "learning_rate": 1.2755785743607981e-05, "loss": 0.4571, "step": 11114 }, { "epoch": 2.284921369102683, "grad_norm": 0.22413338720798492, "learning_rate": 1.2748819354101428e-05, "loss": 0.4142, "step": 11115 }, { "epoch": 2.2851269400760614, "grad_norm": 0.2274656891822815, "learning_rate": 1.2741854553459801e-05, "loss": 0.3934, "step": 11116 }, { "epoch": 2.2853325110494396, "grad_norm": 0.2260764241218567, "learning_rate": 1.2734891342026228e-05, "loss": 0.3912, "step": 11117 }, { "epoch": 2.2855380820228186, "grad_norm": 0.24936430156230927, "learning_rate": 1.2727929720143737e-05, "loss": 0.3797, "step": 11118 }, { "epoch": 2.2857436529961968, "grad_norm": 0.12210172414779663, "learning_rate": 1.2720969688155326e-05, "loss": 0.4556, "step": 11119 }, { "epoch": 2.2859492239695753, "grad_norm": 0.23101243376731873, "learning_rate": 1.2714011246403862e-05, "loss": 0.3901, "step": 11120 }, { "epoch": 2.286154794942954, "grad_norm": 0.22702264785766602, "learning_rate": 1.2707054395232148e-05, "loss": 0.4061, "step": 11121 }, { "epoch": 2.2863603659163325, "grad_norm": 0.12117066979408264, "learning_rate": 1.270009913498294e-05, "loss": 0.4418, "step": 11122 }, { "epoch": 2.286565936889711, "grad_norm": 0.12678340077400208, "learning_rate": 1.2693145465998878e-05, "loss": 0.462, "step": 11123 }, { "epoch": 2.2867715078630897, "grad_norm": 0.1255645453929901, "learning_rate": 1.2686193388622541e-05, "loss": 0.4692, "step": 11124 }, { "epoch": 2.2869770788364683, "grad_norm": 0.2327447086572647, "learning_rate": 1.2679242903196418e-05, "loss": 0.4108, "step": 11125 }, { "epoch": 2.287182649809847, "grad_norm": 0.23680876195430756, "learning_rate": 1.267229401006293e-05, "loss": 0.3892, "step": 11126 }, { "epoch": 2.2873882207832255, "grad_norm": 0.22818145155906677, "learning_rate": 1.2665346709564407e-05, "loss": 0.4014, "step": 11127 }, { "epoch": 2.287593791756604, "grad_norm": 0.2357787936925888, "learning_rate": 1.2658401002043128e-05, "loss": 0.3958, "step": 11128 }, { "epoch": 2.2877993627299826, "grad_norm": 0.12954148650169373, "learning_rate": 1.2651456887841272e-05, "loss": 0.4567, "step": 11129 }, { "epoch": 2.288004933703361, "grad_norm": 0.23145915567874908, "learning_rate": 1.2644514367300932e-05, "loss": 0.4028, "step": 11130 }, { "epoch": 2.28821050467674, "grad_norm": 0.22589780390262604, "learning_rate": 1.2637573440764148e-05, "loss": 0.3977, "step": 11131 }, { "epoch": 2.2884160756501184, "grad_norm": 0.23484013974666595, "learning_rate": 1.2630634108572853e-05, "loss": 0.3964, "step": 11132 }, { "epoch": 2.288621646623497, "grad_norm": 0.23270565271377563, "learning_rate": 1.2623696371068912e-05, "loss": 0.3953, "step": 11133 }, { "epoch": 2.288827217596875, "grad_norm": 0.12677009403705597, "learning_rate": 1.2616760228594133e-05, "loss": 0.4461, "step": 11134 }, { "epoch": 2.2890327885702537, "grad_norm": 0.22877991199493408, "learning_rate": 1.2609825681490221e-05, "loss": 0.3859, "step": 11135 }, { "epoch": 2.2892383595436323, "grad_norm": 0.23278361558914185, "learning_rate": 1.260289273009881e-05, "loss": 0.3986, "step": 11136 }, { "epoch": 2.289443930517011, "grad_norm": 0.2246071696281433, "learning_rate": 1.2595961374761448e-05, "loss": 0.3715, "step": 11137 }, { "epoch": 2.2896495014903895, "grad_norm": 0.23304541409015656, "learning_rate": 1.2589031615819613e-05, "loss": 0.3874, "step": 11138 }, { "epoch": 2.289855072463768, "grad_norm": 0.2341768443584442, "learning_rate": 1.2582103453614684e-05, "loss": 0.3995, "step": 11139 }, { "epoch": 2.2900606434371467, "grad_norm": 0.22343499958515167, "learning_rate": 1.2575176888488016e-05, "loss": 0.3997, "step": 11140 }, { "epoch": 2.2902662144105252, "grad_norm": 0.22474630177021027, "learning_rate": 1.2568251920780829e-05, "loss": 0.4096, "step": 11141 }, { "epoch": 2.290471785383904, "grad_norm": 0.1266659051179886, "learning_rate": 1.2561328550834265e-05, "loss": 0.4552, "step": 11142 }, { "epoch": 2.2906773563572824, "grad_norm": 0.2366304099559784, "learning_rate": 1.2554406778989448e-05, "loss": 0.3886, "step": 11143 }, { "epoch": 2.290882927330661, "grad_norm": 0.23987746238708496, "learning_rate": 1.2547486605587354e-05, "loss": 0.4198, "step": 11144 }, { "epoch": 2.2910884983040396, "grad_norm": 0.12243471294641495, "learning_rate": 1.2540568030968911e-05, "loss": 0.4459, "step": 11145 }, { "epoch": 2.291294069277418, "grad_norm": 0.12086188048124313, "learning_rate": 1.2533651055474965e-05, "loss": 0.4536, "step": 11146 }, { "epoch": 2.2914996402507968, "grad_norm": 0.23374128341674805, "learning_rate": 1.2526735679446273e-05, "loss": 0.3984, "step": 11147 }, { "epoch": 2.2917052112241754, "grad_norm": 0.23066291213035583, "learning_rate": 1.2519821903223552e-05, "loss": 0.4043, "step": 11148 }, { "epoch": 2.2919107821975535, "grad_norm": 0.227426216006279, "learning_rate": 1.2512909727147388e-05, "loss": 0.4083, "step": 11149 }, { "epoch": 2.292116353170932, "grad_norm": 0.22349144518375397, "learning_rate": 1.2505999151558319e-05, "loss": 0.4062, "step": 11150 }, { "epoch": 2.2923219241443107, "grad_norm": 0.22015713155269623, "learning_rate": 1.2499090176796794e-05, "loss": 0.3929, "step": 11151 }, { "epoch": 2.2925274951176893, "grad_norm": 0.22965404391288757, "learning_rate": 1.2492182803203188e-05, "loss": 0.3723, "step": 11152 }, { "epoch": 2.292733066091068, "grad_norm": 0.22359246015548706, "learning_rate": 1.24852770311178e-05, "loss": 0.399, "step": 11153 }, { "epoch": 2.2929386370644464, "grad_norm": 0.2246733158826828, "learning_rate": 1.2478372860880819e-05, "loss": 0.4153, "step": 11154 }, { "epoch": 2.293144208037825, "grad_norm": 0.23003293573856354, "learning_rate": 1.2471470292832414e-05, "loss": 0.4202, "step": 11155 }, { "epoch": 2.2933497790112036, "grad_norm": 0.22609424591064453, "learning_rate": 1.2464569327312634e-05, "loss": 0.3861, "step": 11156 }, { "epoch": 2.293555349984582, "grad_norm": 0.233436718583107, "learning_rate": 1.2457669964661447e-05, "loss": 0.4113, "step": 11157 }, { "epoch": 2.293760920957961, "grad_norm": 0.2230585813522339, "learning_rate": 1.2450772205218768e-05, "loss": 0.3785, "step": 11158 }, { "epoch": 2.2939664919313394, "grad_norm": 0.13363520801067352, "learning_rate": 1.2443876049324401e-05, "loss": 0.4589, "step": 11159 }, { "epoch": 2.294172062904718, "grad_norm": 0.23311814665794373, "learning_rate": 1.2436981497318081e-05, "loss": 0.398, "step": 11160 }, { "epoch": 2.2943776338780966, "grad_norm": 0.23788057267665863, "learning_rate": 1.2430088549539498e-05, "loss": 0.3656, "step": 11161 }, { "epoch": 2.294583204851475, "grad_norm": 0.23247785866260529, "learning_rate": 1.2423197206328219e-05, "loss": 0.416, "step": 11162 }, { "epoch": 2.2947887758248537, "grad_norm": 0.12585797905921936, "learning_rate": 1.2416307468023738e-05, "loss": 0.4245, "step": 11163 }, { "epoch": 2.294994346798232, "grad_norm": 0.11753173917531967, "learning_rate": 1.2409419334965507e-05, "loss": 0.4366, "step": 11164 }, { "epoch": 2.2951999177716105, "grad_norm": 0.11819145828485489, "learning_rate": 1.2402532807492854e-05, "loss": 0.4381, "step": 11165 }, { "epoch": 2.295405488744989, "grad_norm": 0.2348855584859848, "learning_rate": 1.2395647885945055e-05, "loss": 0.3894, "step": 11166 }, { "epoch": 2.2956110597183677, "grad_norm": 0.11923953890800476, "learning_rate": 1.238876457066129e-05, "loss": 0.4363, "step": 11167 }, { "epoch": 2.2958166306917462, "grad_norm": 0.23349328339099884, "learning_rate": 1.2381882861980653e-05, "loss": 0.3905, "step": 11168 }, { "epoch": 2.296022201665125, "grad_norm": 0.2256205677986145, "learning_rate": 1.2375002760242207e-05, "loss": 0.385, "step": 11169 }, { "epoch": 2.2962277726385034, "grad_norm": 0.23128965497016907, "learning_rate": 1.2368124265784888e-05, "loss": 0.3942, "step": 11170 }, { "epoch": 2.296433343611882, "grad_norm": 0.12350024282932281, "learning_rate": 1.2361247378947561e-05, "loss": 0.4333, "step": 11171 }, { "epoch": 2.2966389145852606, "grad_norm": 0.23417676985263824, "learning_rate": 1.2354372100069026e-05, "loss": 0.3891, "step": 11172 }, { "epoch": 2.296844485558639, "grad_norm": 0.22731667757034302, "learning_rate": 1.2347498429487991e-05, "loss": 0.3977, "step": 11173 }, { "epoch": 2.2970500565320178, "grad_norm": 0.2296586036682129, "learning_rate": 1.2340626367543091e-05, "loss": 0.4054, "step": 11174 }, { "epoch": 2.2972556275053964, "grad_norm": 0.13354873657226562, "learning_rate": 1.2333755914572868e-05, "loss": 0.4622, "step": 11175 }, { "epoch": 2.297461198478775, "grad_norm": 0.22536778450012207, "learning_rate": 1.2326887070915823e-05, "loss": 0.3746, "step": 11176 }, { "epoch": 2.2976667694521535, "grad_norm": 0.22419311106204987, "learning_rate": 1.2320019836910335e-05, "loss": 0.4029, "step": 11177 }, { "epoch": 2.297872340425532, "grad_norm": 0.2210252434015274, "learning_rate": 1.231315421289473e-05, "loss": 0.3709, "step": 11178 }, { "epoch": 2.2980779113989103, "grad_norm": 0.22239845991134644, "learning_rate": 1.2306290199207233e-05, "loss": 0.3892, "step": 11179 }, { "epoch": 2.298283482372289, "grad_norm": 0.22236813604831696, "learning_rate": 1.2299427796186008e-05, "loss": 0.4075, "step": 11180 }, { "epoch": 2.2984890533456674, "grad_norm": 0.22609713673591614, "learning_rate": 1.229256700416914e-05, "loss": 0.3968, "step": 11181 }, { "epoch": 2.298694624319046, "grad_norm": 0.23106250166893005, "learning_rate": 1.2285707823494599e-05, "loss": 0.3792, "step": 11182 }, { "epoch": 2.2989001952924246, "grad_norm": 0.22286170721054077, "learning_rate": 1.2278850254500348e-05, "loss": 0.3835, "step": 11183 }, { "epoch": 2.299105766265803, "grad_norm": 0.229881152510643, "learning_rate": 1.227199429752419e-05, "loss": 0.3851, "step": 11184 }, { "epoch": 2.299311337239182, "grad_norm": 0.1258445382118225, "learning_rate": 1.2265139952903916e-05, "loss": 0.4364, "step": 11185 }, { "epoch": 2.2995169082125604, "grad_norm": 0.22773106396198273, "learning_rate": 1.2258287220977196e-05, "loss": 0.4042, "step": 11186 }, { "epoch": 2.299722479185939, "grad_norm": 0.22230634093284607, "learning_rate": 1.225143610208163e-05, "loss": 0.3832, "step": 11187 }, { "epoch": 2.2999280501593176, "grad_norm": 0.23126055300235748, "learning_rate": 1.2244586596554739e-05, "loss": 0.3922, "step": 11188 }, { "epoch": 2.300133621132696, "grad_norm": 0.2347308248281479, "learning_rate": 1.2237738704733954e-05, "loss": 0.3671, "step": 11189 }, { "epoch": 2.3003391921060747, "grad_norm": 0.12365079671144485, "learning_rate": 1.2230892426956669e-05, "loss": 0.4378, "step": 11190 }, { "epoch": 2.3005447630794533, "grad_norm": 0.22160682082176208, "learning_rate": 1.222404776356015e-05, "loss": 0.388, "step": 11191 }, { "epoch": 2.300750334052832, "grad_norm": 0.22561746835708618, "learning_rate": 1.2217204714881603e-05, "loss": 0.3529, "step": 11192 }, { "epoch": 2.3009559050262105, "grad_norm": 0.27136144042015076, "learning_rate": 1.2210363281258155e-05, "loss": 0.3885, "step": 11193 }, { "epoch": 2.3011614759995886, "grad_norm": 0.22475385665893555, "learning_rate": 1.220352346302685e-05, "loss": 0.3874, "step": 11194 }, { "epoch": 2.3013670469729672, "grad_norm": 0.23630446195602417, "learning_rate": 1.2196685260524648e-05, "loss": 0.3871, "step": 11195 }, { "epoch": 2.301572617946346, "grad_norm": 0.12092158198356628, "learning_rate": 1.2189848674088433e-05, "loss": 0.4375, "step": 11196 }, { "epoch": 2.3017781889197244, "grad_norm": 0.23177292943000793, "learning_rate": 1.2183013704055033e-05, "loss": 0.4025, "step": 11197 }, { "epoch": 2.301983759893103, "grad_norm": 0.12416423112154007, "learning_rate": 1.2176180350761157e-05, "loss": 0.4473, "step": 11198 }, { "epoch": 2.3021893308664816, "grad_norm": 0.12276289612054825, "learning_rate": 1.2169348614543464e-05, "loss": 0.4537, "step": 11199 }, { "epoch": 2.30239490183986, "grad_norm": 0.22835765779018402, "learning_rate": 1.216251849573851e-05, "loss": 0.3937, "step": 11200 }, { "epoch": 2.3026004728132388, "grad_norm": 0.22718718647956848, "learning_rate": 1.2155689994682788e-05, "loss": 0.3896, "step": 11201 }, { "epoch": 2.3028060437866174, "grad_norm": 0.1231781542301178, "learning_rate": 1.2148863111712704e-05, "loss": 0.447, "step": 11202 }, { "epoch": 2.303011614759996, "grad_norm": 0.23988062143325806, "learning_rate": 1.214203784716458e-05, "loss": 0.3919, "step": 11203 }, { "epoch": 2.3032171857333745, "grad_norm": 0.21849578619003296, "learning_rate": 1.2135214201374685e-05, "loss": 0.3758, "step": 11204 }, { "epoch": 2.303422756706753, "grad_norm": 0.2158803790807724, "learning_rate": 1.2128392174679179e-05, "loss": 0.3704, "step": 11205 }, { "epoch": 2.3036283276801317, "grad_norm": 0.22733426094055176, "learning_rate": 1.212157176741413e-05, "loss": 0.3694, "step": 11206 }, { "epoch": 2.3038338986535103, "grad_norm": 0.23298750817775726, "learning_rate": 1.2114752979915584e-05, "loss": 0.3798, "step": 11207 }, { "epoch": 2.304039469626889, "grad_norm": 0.22814899682998657, "learning_rate": 1.210793581251945e-05, "loss": 0.3811, "step": 11208 }, { "epoch": 2.304245040600267, "grad_norm": 0.23419663310050964, "learning_rate": 1.2101120265561585e-05, "loss": 0.3799, "step": 11209 }, { "epoch": 2.3044506115736456, "grad_norm": 0.12921544909477234, "learning_rate": 1.2094306339377743e-05, "loss": 0.4378, "step": 11210 }, { "epoch": 2.304656182547024, "grad_norm": 0.22787374258041382, "learning_rate": 1.208749403430364e-05, "loss": 0.4039, "step": 11211 }, { "epoch": 2.304861753520403, "grad_norm": 0.2288065403699875, "learning_rate": 1.2080683350674869e-05, "loss": 0.3922, "step": 11212 }, { "epoch": 2.3050673244937814, "grad_norm": 0.23211759328842163, "learning_rate": 1.2073874288826966e-05, "loss": 0.3804, "step": 11213 }, { "epoch": 2.30527289546716, "grad_norm": 0.23307380080223083, "learning_rate": 1.2067066849095386e-05, "loss": 0.3883, "step": 11214 }, { "epoch": 2.3054784664405386, "grad_norm": 0.22233398258686066, "learning_rate": 1.206026103181549e-05, "loss": 0.3948, "step": 11215 }, { "epoch": 2.305684037413917, "grad_norm": 0.22807008028030396, "learning_rate": 1.2053456837322557e-05, "loss": 0.396, "step": 11216 }, { "epoch": 2.3058896083872957, "grad_norm": 0.23228740692138672, "learning_rate": 1.204665426595183e-05, "loss": 0.4057, "step": 11217 }, { "epoch": 2.3060951793606743, "grad_norm": 0.2424495369195938, "learning_rate": 1.2039853318038428e-05, "loss": 0.4068, "step": 11218 }, { "epoch": 2.306300750334053, "grad_norm": 0.23171810805797577, "learning_rate": 1.2033053993917391e-05, "loss": 0.4152, "step": 11219 }, { "epoch": 2.3065063213074315, "grad_norm": 0.2335965633392334, "learning_rate": 1.2026256293923702e-05, "loss": 0.3733, "step": 11220 }, { "epoch": 2.30671189228081, "grad_norm": 0.12516964972019196, "learning_rate": 1.2019460218392243e-05, "loss": 0.4496, "step": 11221 }, { "epoch": 2.3069174632541887, "grad_norm": 0.2288234382867813, "learning_rate": 1.2012665767657825e-05, "loss": 0.3842, "step": 11222 }, { "epoch": 2.3071230342275673, "grad_norm": 0.23571978509426117, "learning_rate": 1.2005872942055177e-05, "loss": 0.4029, "step": 11223 }, { "epoch": 2.3073286052009454, "grad_norm": 0.23239515721797943, "learning_rate": 1.1999081741918965e-05, "loss": 0.4028, "step": 11224 }, { "epoch": 2.307534176174324, "grad_norm": 0.23048000037670135, "learning_rate": 1.1992292167583748e-05, "loss": 0.3883, "step": 11225 }, { "epoch": 2.3077397471477026, "grad_norm": 0.1262623518705368, "learning_rate": 1.198550421938402e-05, "loss": 0.4509, "step": 11226 }, { "epoch": 2.307945318121081, "grad_norm": 0.2399047166109085, "learning_rate": 1.1978717897654171e-05, "loss": 0.4162, "step": 11227 }, { "epoch": 2.3081508890944598, "grad_norm": 0.22697141766548157, "learning_rate": 1.197193320272857e-05, "loss": 0.3845, "step": 11228 }, { "epoch": 2.3083564600678383, "grad_norm": 0.2281046062707901, "learning_rate": 1.1965150134941447e-05, "loss": 0.3835, "step": 11229 }, { "epoch": 2.308562031041217, "grad_norm": 0.12404376268386841, "learning_rate": 1.1958368694626956e-05, "loss": 0.4376, "step": 11230 }, { "epoch": 2.3087676020145955, "grad_norm": 0.12131867557764053, "learning_rate": 1.195158888211922e-05, "loss": 0.4545, "step": 11231 }, { "epoch": 2.308973172987974, "grad_norm": 0.22881445288658142, "learning_rate": 1.194481069775223e-05, "loss": 0.4063, "step": 11232 }, { "epoch": 2.3091787439613527, "grad_norm": 0.22988468408584595, "learning_rate": 1.1938034141859915e-05, "loss": 0.4105, "step": 11233 }, { "epoch": 2.3093843149347313, "grad_norm": 0.23098687827587128, "learning_rate": 1.1931259214776129e-05, "loss": 0.3975, "step": 11234 }, { "epoch": 2.30958988590811, "grad_norm": 0.12407363951206207, "learning_rate": 1.1924485916834638e-05, "loss": 0.4472, "step": 11235 }, { "epoch": 2.3097954568814885, "grad_norm": 0.12328176200389862, "learning_rate": 1.1917714248369133e-05, "loss": 0.4449, "step": 11236 }, { "epoch": 2.310001027854867, "grad_norm": 0.22142189741134644, "learning_rate": 1.1910944209713205e-05, "loss": 0.3997, "step": 11237 }, { "epoch": 2.3102065988282456, "grad_norm": 0.2281443476676941, "learning_rate": 1.1904175801200417e-05, "loss": 0.3818, "step": 11238 }, { "epoch": 2.310412169801624, "grad_norm": 0.22729991376399994, "learning_rate": 1.1897409023164191e-05, "loss": 0.3928, "step": 11239 }, { "epoch": 2.310617740775003, "grad_norm": 0.12084699422121048, "learning_rate": 1.1890643875937904e-05, "loss": 0.4569, "step": 11240 }, { "epoch": 2.310823311748381, "grad_norm": 0.12548977136611938, "learning_rate": 1.1883880359854836e-05, "loss": 0.4437, "step": 11241 }, { "epoch": 2.3110288827217595, "grad_norm": 0.22213564813137054, "learning_rate": 1.1877118475248204e-05, "loss": 0.4011, "step": 11242 }, { "epoch": 2.311234453695138, "grad_norm": 0.2207585573196411, "learning_rate": 1.1870358222451127e-05, "loss": 0.4, "step": 11243 }, { "epoch": 2.3114400246685167, "grad_norm": 0.2309262752532959, "learning_rate": 1.1863599601796638e-05, "loss": 0.384, "step": 11244 }, { "epoch": 2.3116455956418953, "grad_norm": 0.22863119840621948, "learning_rate": 1.1856842613617734e-05, "loss": 0.3985, "step": 11245 }, { "epoch": 2.311851166615274, "grad_norm": 0.22216136753559113, "learning_rate": 1.1850087258247282e-05, "loss": 0.3878, "step": 11246 }, { "epoch": 2.3120567375886525, "grad_norm": 0.23234418034553528, "learning_rate": 1.1843333536018088e-05, "loss": 0.3844, "step": 11247 }, { "epoch": 2.312262308562031, "grad_norm": 0.22549466788768768, "learning_rate": 1.1836581447262865e-05, "loss": 0.3844, "step": 11248 }, { "epoch": 2.3124678795354097, "grad_norm": 0.2254628688097, "learning_rate": 1.1829830992314282e-05, "loss": 0.38, "step": 11249 }, { "epoch": 2.3126734505087883, "grad_norm": 0.23794369399547577, "learning_rate": 1.1823082171504888e-05, "loss": 0.38, "step": 11250 }, { "epoch": 2.312879021482167, "grad_norm": 0.1556072235107422, "learning_rate": 1.1816334985167152e-05, "loss": 0.4545, "step": 11251 }, { "epoch": 2.3130845924555454, "grad_norm": 0.23473793268203735, "learning_rate": 1.1809589433633507e-05, "loss": 0.4154, "step": 11252 }, { "epoch": 2.313290163428924, "grad_norm": 0.22591789066791534, "learning_rate": 1.1802845517236261e-05, "loss": 0.3782, "step": 11253 }, { "epoch": 2.313495734402302, "grad_norm": 0.22409707307815552, "learning_rate": 1.1796103236307647e-05, "loss": 0.3871, "step": 11254 }, { "epoch": 2.313701305375681, "grad_norm": 0.12136626243591309, "learning_rate": 1.1789362591179836e-05, "loss": 0.4417, "step": 11255 }, { "epoch": 2.3139068763490593, "grad_norm": 0.23068110644817352, "learning_rate": 1.1782623582184907e-05, "loss": 0.3921, "step": 11256 }, { "epoch": 2.314112447322438, "grad_norm": 0.22606144845485687, "learning_rate": 1.1775886209654853e-05, "loss": 0.4033, "step": 11257 }, { "epoch": 2.3143180182958165, "grad_norm": 0.23773600161075592, "learning_rate": 1.1769150473921582e-05, "loss": 0.4094, "step": 11258 }, { "epoch": 2.314523589269195, "grad_norm": 0.23489652574062347, "learning_rate": 1.1762416375316958e-05, "loss": 0.3755, "step": 11259 }, { "epoch": 2.3147291602425737, "grad_norm": 0.12201520800590515, "learning_rate": 1.1755683914172731e-05, "loss": 0.4488, "step": 11260 }, { "epoch": 2.3149347312159523, "grad_norm": 0.22625313699245453, "learning_rate": 1.1748953090820572e-05, "loss": 0.382, "step": 11261 }, { "epoch": 2.315140302189331, "grad_norm": 0.21789546310901642, "learning_rate": 1.1742223905592084e-05, "loss": 0.3877, "step": 11262 }, { "epoch": 2.3153458731627095, "grad_norm": 0.2211894392967224, "learning_rate": 1.1735496358818773e-05, "loss": 0.3978, "step": 11263 }, { "epoch": 2.315551444136088, "grad_norm": 0.22544537484645844, "learning_rate": 1.1728770450832078e-05, "loss": 0.3777, "step": 11264 }, { "epoch": 2.3157570151094666, "grad_norm": 0.23240074515342712, "learning_rate": 1.1722046181963344e-05, "loss": 0.3894, "step": 11265 }, { "epoch": 2.315962586082845, "grad_norm": 0.22723515331745148, "learning_rate": 1.1715323552543861e-05, "loss": 0.3761, "step": 11266 }, { "epoch": 2.316168157056224, "grad_norm": 0.2265399843454361, "learning_rate": 1.170860256290482e-05, "loss": 0.3725, "step": 11267 }, { "epoch": 2.3163737280296024, "grad_norm": 0.22929410636425018, "learning_rate": 1.1701883213377327e-05, "loss": 0.4007, "step": 11268 }, { "epoch": 2.3165792990029805, "grad_norm": 0.2396460622549057, "learning_rate": 1.1695165504292409e-05, "loss": 0.386, "step": 11269 }, { "epoch": 2.3167848699763596, "grad_norm": 0.23619569838047028, "learning_rate": 1.168844943598101e-05, "loss": 0.3854, "step": 11270 }, { "epoch": 2.3169904409497377, "grad_norm": 0.22975857555866241, "learning_rate": 1.168173500877402e-05, "loss": 0.3851, "step": 11271 }, { "epoch": 2.3171960119231163, "grad_norm": 0.23731692135334015, "learning_rate": 1.167502222300221e-05, "loss": 0.3812, "step": 11272 }, { "epoch": 2.317401582896495, "grad_norm": 0.22858087718486786, "learning_rate": 1.1668311078996303e-05, "loss": 0.387, "step": 11273 }, { "epoch": 2.3176071538698735, "grad_norm": 0.22912317514419556, "learning_rate": 1.1661601577086916e-05, "loss": 0.4138, "step": 11274 }, { "epoch": 2.317812724843252, "grad_norm": 0.2295382171869278, "learning_rate": 1.1654893717604597e-05, "loss": 0.4013, "step": 11275 }, { "epoch": 2.3180182958166307, "grad_norm": 0.1292608678340912, "learning_rate": 1.1648187500879812e-05, "loss": 0.4512, "step": 11276 }, { "epoch": 2.3182238667900092, "grad_norm": 0.23045098781585693, "learning_rate": 1.1641482927242945e-05, "loss": 0.4034, "step": 11277 }, { "epoch": 2.318429437763388, "grad_norm": 0.22682234644889832, "learning_rate": 1.1634779997024293e-05, "loss": 0.3821, "step": 11278 }, { "epoch": 2.3186350087367664, "grad_norm": 0.2304777354001999, "learning_rate": 1.1628078710554069e-05, "loss": 0.3779, "step": 11279 }, { "epoch": 2.318840579710145, "grad_norm": 0.2295672744512558, "learning_rate": 1.1621379068162438e-05, "loss": 0.3924, "step": 11280 }, { "epoch": 2.3190461506835236, "grad_norm": 0.23286469280719757, "learning_rate": 1.161468107017945e-05, "loss": 0.3817, "step": 11281 }, { "epoch": 2.319251721656902, "grad_norm": 0.12597419321537018, "learning_rate": 1.1607984716935084e-05, "loss": 0.4553, "step": 11282 }, { "epoch": 2.3194572926302808, "grad_norm": 0.2292589247226715, "learning_rate": 1.160129000875924e-05, "loss": 0.3939, "step": 11283 }, { "epoch": 2.319662863603659, "grad_norm": 0.2388840913772583, "learning_rate": 1.1594596945981732e-05, "loss": 0.3885, "step": 11284 }, { "epoch": 2.319868434577038, "grad_norm": 0.22787928581237793, "learning_rate": 1.1587905528932294e-05, "loss": 0.3977, "step": 11285 }, { "epoch": 2.320074005550416, "grad_norm": 0.23008286952972412, "learning_rate": 1.1581215757940565e-05, "loss": 0.3862, "step": 11286 }, { "epoch": 2.3202795765237947, "grad_norm": 0.22636668384075165, "learning_rate": 1.1574527633336158e-05, "loss": 0.4, "step": 11287 }, { "epoch": 2.3204851474971733, "grad_norm": 0.12164843082427979, "learning_rate": 1.1567841155448539e-05, "loss": 0.4519, "step": 11288 }, { "epoch": 2.320690718470552, "grad_norm": 0.22811272740364075, "learning_rate": 1.1561156324607123e-05, "loss": 0.3912, "step": 11289 }, { "epoch": 2.3208962894439304, "grad_norm": 0.2221514880657196, "learning_rate": 1.1554473141141244e-05, "loss": 0.3612, "step": 11290 }, { "epoch": 2.321101860417309, "grad_norm": 0.23008368909358978, "learning_rate": 1.154779160538014e-05, "loss": 0.3888, "step": 11291 }, { "epoch": 2.3213074313906876, "grad_norm": 0.23193509876728058, "learning_rate": 1.1541111717653002e-05, "loss": 0.3793, "step": 11292 }, { "epoch": 2.321513002364066, "grad_norm": 0.22582639753818512, "learning_rate": 1.1534433478288896e-05, "loss": 0.4062, "step": 11293 }, { "epoch": 2.321718573337445, "grad_norm": 0.5882457494735718, "learning_rate": 1.1527756887616828e-05, "loss": 0.4089, "step": 11294 }, { "epoch": 2.3219241443108234, "grad_norm": 0.23613165318965912, "learning_rate": 1.152108194596574e-05, "loss": 0.3803, "step": 11295 }, { "epoch": 2.322129715284202, "grad_norm": 0.24490775167942047, "learning_rate": 1.1514408653664464e-05, "loss": 0.4217, "step": 11296 }, { "epoch": 2.3223352862575806, "grad_norm": 0.2295404076576233, "learning_rate": 1.1507737011041767e-05, "loss": 0.3876, "step": 11297 }, { "epoch": 2.322540857230959, "grad_norm": 0.22926199436187744, "learning_rate": 1.150106701842632e-05, "loss": 0.4045, "step": 11298 }, { "epoch": 2.3227464282043373, "grad_norm": 0.23146659135818481, "learning_rate": 1.1494398676146716e-05, "loss": 0.3973, "step": 11299 }, { "epoch": 2.3229519991777163, "grad_norm": 0.2279983013868332, "learning_rate": 1.1487731984531497e-05, "loss": 0.3856, "step": 11300 }, { "epoch": 2.3231575701510945, "grad_norm": 0.22734786570072174, "learning_rate": 1.1481066943909086e-05, "loss": 0.395, "step": 11301 }, { "epoch": 2.323363141124473, "grad_norm": 0.12357629090547562, "learning_rate": 1.147440355460784e-05, "loss": 0.4503, "step": 11302 }, { "epoch": 2.3235687120978517, "grad_norm": 0.22878186404705048, "learning_rate": 1.1467741816956036e-05, "loss": 0.3805, "step": 11303 }, { "epoch": 2.3237742830712302, "grad_norm": 0.22551243007183075, "learning_rate": 1.1461081731281857e-05, "loss": 0.3962, "step": 11304 }, { "epoch": 2.323979854044609, "grad_norm": 0.22322127223014832, "learning_rate": 1.1454423297913425e-05, "loss": 0.3839, "step": 11305 }, { "epoch": 2.3241854250179874, "grad_norm": 0.12337585538625717, "learning_rate": 1.1447766517178752e-05, "loss": 0.4513, "step": 11306 }, { "epoch": 2.324390995991366, "grad_norm": 0.22409552335739136, "learning_rate": 1.1441111389405813e-05, "loss": 0.3851, "step": 11307 }, { "epoch": 2.3245965669647446, "grad_norm": 0.2322671264410019, "learning_rate": 1.1434457914922463e-05, "loss": 0.4114, "step": 11308 }, { "epoch": 2.324802137938123, "grad_norm": 0.23481951653957367, "learning_rate": 1.1427806094056486e-05, "loss": 0.4041, "step": 11309 }, { "epoch": 2.3250077089115018, "grad_norm": 0.2358068972826004, "learning_rate": 1.1421155927135584e-05, "loss": 0.404, "step": 11310 }, { "epoch": 2.3252132798848804, "grad_norm": 0.24007724225521088, "learning_rate": 1.1414507414487383e-05, "loss": 0.3907, "step": 11311 }, { "epoch": 2.325418850858259, "grad_norm": 0.2249882072210312, "learning_rate": 1.1407860556439413e-05, "loss": 0.4018, "step": 11312 }, { "epoch": 2.3256244218316375, "grad_norm": 0.21669505536556244, "learning_rate": 1.1401215353319158e-05, "loss": 0.3996, "step": 11313 }, { "epoch": 2.325829992805016, "grad_norm": 0.2299477905035019, "learning_rate": 1.139457180545398e-05, "loss": 0.3819, "step": 11314 }, { "epoch": 2.3260355637783947, "grad_norm": 0.22735092043876648, "learning_rate": 1.1387929913171164e-05, "loss": 0.3832, "step": 11315 }, { "epoch": 2.326241134751773, "grad_norm": 0.22514750063419342, "learning_rate": 1.1381289676797953e-05, "loss": 0.3827, "step": 11316 }, { "epoch": 2.3264467057251514, "grad_norm": 0.23412209749221802, "learning_rate": 1.1374651096661464e-05, "loss": 0.4225, "step": 11317 }, { "epoch": 2.32665227669853, "grad_norm": 0.23634769022464752, "learning_rate": 1.1368014173088757e-05, "loss": 0.412, "step": 11318 }, { "epoch": 2.3268578476719086, "grad_norm": 0.2300824671983719, "learning_rate": 1.136137890640679e-05, "loss": 0.3749, "step": 11319 }, { "epoch": 2.327063418645287, "grad_norm": 0.2358069270849228, "learning_rate": 1.135474529694245e-05, "loss": 0.4009, "step": 11320 }, { "epoch": 2.327268989618666, "grad_norm": 0.23068921267986298, "learning_rate": 1.134811334502256e-05, "loss": 0.3985, "step": 11321 }, { "epoch": 2.3274745605920444, "grad_norm": 0.22651554644107819, "learning_rate": 1.1341483050973838e-05, "loss": 0.38, "step": 11322 }, { "epoch": 2.327680131565423, "grad_norm": 0.22414909303188324, "learning_rate": 1.1334854415122924e-05, "loss": 0.3884, "step": 11323 }, { "epoch": 2.3278857025388016, "grad_norm": 0.21925905346870422, "learning_rate": 1.1328227437796389e-05, "loss": 0.3742, "step": 11324 }, { "epoch": 2.32809127351218, "grad_norm": 0.23087939620018005, "learning_rate": 1.1321602119320704e-05, "loss": 0.3872, "step": 11325 }, { "epoch": 2.3282968444855587, "grad_norm": 0.2237529307603836, "learning_rate": 1.131497846002227e-05, "loss": 0.3848, "step": 11326 }, { "epoch": 2.3285024154589373, "grad_norm": 0.22944872081279755, "learning_rate": 1.1308356460227386e-05, "loss": 0.4088, "step": 11327 }, { "epoch": 2.328707986432316, "grad_norm": 0.1283191293478012, "learning_rate": 1.1301736120262326e-05, "loss": 0.47, "step": 11328 }, { "epoch": 2.3289135574056945, "grad_norm": 0.22146999835968018, "learning_rate": 1.1295117440453219e-05, "loss": 0.3917, "step": 11329 }, { "epoch": 2.329119128379073, "grad_norm": 0.22980590164661407, "learning_rate": 1.1288500421126137e-05, "loss": 0.3876, "step": 11330 }, { "epoch": 2.3293246993524512, "grad_norm": 0.22274045646190643, "learning_rate": 1.1281885062607072e-05, "loss": 0.3849, "step": 11331 }, { "epoch": 2.32953027032583, "grad_norm": 0.22919537127017975, "learning_rate": 1.1275271365221938e-05, "loss": 0.3906, "step": 11332 }, { "epoch": 2.3297358412992084, "grad_norm": 0.1261204034090042, "learning_rate": 1.1268659329296534e-05, "loss": 0.444, "step": 11333 }, { "epoch": 2.329941412272587, "grad_norm": 0.2240409255027771, "learning_rate": 1.1262048955156643e-05, "loss": 0.3987, "step": 11334 }, { "epoch": 2.3301469832459656, "grad_norm": 0.125702366232872, "learning_rate": 1.1255440243127906e-05, "loss": 0.4473, "step": 11335 }, { "epoch": 2.330352554219344, "grad_norm": 0.22843293845653534, "learning_rate": 1.1248833193535898e-05, "loss": 0.4213, "step": 11336 }, { "epoch": 2.3305581251927228, "grad_norm": 0.23132173717021942, "learning_rate": 1.1242227806706137e-05, "loss": 0.3878, "step": 11337 }, { "epoch": 2.3307636961661014, "grad_norm": 0.23673327267169952, "learning_rate": 1.1235624082964025e-05, "loss": 0.3987, "step": 11338 }, { "epoch": 2.33096926713948, "grad_norm": 0.23916591703891754, "learning_rate": 1.1229022022634903e-05, "loss": 0.4045, "step": 11339 }, { "epoch": 2.3311748381128585, "grad_norm": 0.12463133037090302, "learning_rate": 1.122242162604402e-05, "loss": 0.4453, "step": 11340 }, { "epoch": 2.331380409086237, "grad_norm": 0.23358865082263947, "learning_rate": 1.1215822893516539e-05, "loss": 0.3772, "step": 11341 }, { "epoch": 2.3315859800596157, "grad_norm": 0.2250611037015915, "learning_rate": 1.1209225825377565e-05, "loss": 0.4015, "step": 11342 }, { "epoch": 2.3317915510329943, "grad_norm": 0.12012235075235367, "learning_rate": 1.1202630421952097e-05, "loss": 0.446, "step": 11343 }, { "epoch": 2.331997122006373, "grad_norm": 0.12716658413410187, "learning_rate": 1.1196036683565063e-05, "loss": 0.4522, "step": 11344 }, { "epoch": 2.3322026929797515, "grad_norm": 0.12125218659639359, "learning_rate": 1.11894446105413e-05, "loss": 0.4634, "step": 11345 }, { "epoch": 2.3324082639531296, "grad_norm": 0.23313722014427185, "learning_rate": 1.1182854203205569e-05, "loss": 0.4123, "step": 11346 }, { "epoch": 2.332613834926508, "grad_norm": 0.22456228733062744, "learning_rate": 1.1176265461882556e-05, "loss": 0.3851, "step": 11347 }, { "epoch": 2.332819405899887, "grad_norm": 0.22414372861385345, "learning_rate": 1.1169678386896833e-05, "loss": 0.4027, "step": 11348 }, { "epoch": 2.3330249768732654, "grad_norm": 0.2482268065214157, "learning_rate": 1.116309297857295e-05, "loss": 0.3893, "step": 11349 }, { "epoch": 2.333230547846644, "grad_norm": 0.2372516393661499, "learning_rate": 1.1156509237235325e-05, "loss": 0.3884, "step": 11350 }, { "epoch": 2.3334361188200226, "grad_norm": 0.23063679039478302, "learning_rate": 1.1149927163208297e-05, "loss": 0.3853, "step": 11351 }, { "epoch": 2.333641689793401, "grad_norm": 0.12314844876527786, "learning_rate": 1.114334675681615e-05, "loss": 0.4468, "step": 11352 }, { "epoch": 2.3338472607667797, "grad_norm": 0.22128140926361084, "learning_rate": 1.1136768018383064e-05, "loss": 0.3851, "step": 11353 }, { "epoch": 2.3340528317401583, "grad_norm": 0.22692500054836273, "learning_rate": 1.1130190948233133e-05, "loss": 0.3878, "step": 11354 }, { "epoch": 2.334258402713537, "grad_norm": 0.2241378277540207, "learning_rate": 1.1123615546690383e-05, "loss": 0.3838, "step": 11355 }, { "epoch": 2.3344639736869155, "grad_norm": 0.22740109264850616, "learning_rate": 1.1117041814078769e-05, "loss": 0.3741, "step": 11356 }, { "epoch": 2.334669544660294, "grad_norm": 0.25140267610549927, "learning_rate": 1.1110469750722118e-05, "loss": 0.3816, "step": 11357 }, { "epoch": 2.3348751156336727, "grad_norm": 0.22210964560508728, "learning_rate": 1.1103899356944239e-05, "loss": 0.3815, "step": 11358 }, { "epoch": 2.3350806866070513, "grad_norm": 0.2357717901468277, "learning_rate": 1.1097330633068806e-05, "loss": 0.3867, "step": 11359 }, { "epoch": 2.33528625758043, "grad_norm": 0.23202987015247345, "learning_rate": 1.1090763579419436e-05, "loss": 0.4003, "step": 11360 }, { "epoch": 2.335491828553808, "grad_norm": 0.2323846071958542, "learning_rate": 1.1084198196319653e-05, "loss": 0.3845, "step": 11361 }, { "epoch": 2.3356973995271866, "grad_norm": 0.22971893846988678, "learning_rate": 1.1077634484092887e-05, "loss": 0.3897, "step": 11362 }, { "epoch": 2.335902970500565, "grad_norm": 0.23653818666934967, "learning_rate": 1.1071072443062531e-05, "loss": 0.416, "step": 11363 }, { "epoch": 2.3361085414739438, "grad_norm": 0.21813298761844635, "learning_rate": 1.1064512073551854e-05, "loss": 0.3926, "step": 11364 }, { "epoch": 2.3363141124473223, "grad_norm": 0.24081604182720184, "learning_rate": 1.1057953375884053e-05, "loss": 0.3823, "step": 11365 }, { "epoch": 2.336519683420701, "grad_norm": 0.12533682584762573, "learning_rate": 1.1051396350382246e-05, "loss": 0.4627, "step": 11366 }, { "epoch": 2.3367252543940795, "grad_norm": 0.23893719911575317, "learning_rate": 1.104484099736946e-05, "loss": 0.3698, "step": 11367 }, { "epoch": 2.336930825367458, "grad_norm": 0.1185644194483757, "learning_rate": 1.1038287317168643e-05, "loss": 0.4715, "step": 11368 }, { "epoch": 2.3371363963408367, "grad_norm": 0.22912783920764923, "learning_rate": 1.1031735310102686e-05, "loss": 0.3963, "step": 11369 }, { "epoch": 2.3373419673142153, "grad_norm": 0.23992134630680084, "learning_rate": 1.1025184976494363e-05, "loss": 0.3906, "step": 11370 }, { "epoch": 2.337547538287594, "grad_norm": 0.2348276525735855, "learning_rate": 1.1018636316666378e-05, "loss": 0.4119, "step": 11371 }, { "epoch": 2.3377531092609725, "grad_norm": 0.23046445846557617, "learning_rate": 1.101208933094135e-05, "loss": 0.3736, "step": 11372 }, { "epoch": 2.337958680234351, "grad_norm": 0.22680574655532837, "learning_rate": 1.1005544019641824e-05, "loss": 0.3675, "step": 11373 }, { "epoch": 2.3381642512077296, "grad_norm": 0.22704631090164185, "learning_rate": 1.0999000383090255e-05, "loss": 0.4037, "step": 11374 }, { "epoch": 2.3383698221811082, "grad_norm": 0.23311007022857666, "learning_rate": 1.0992458421609007e-05, "loss": 0.3913, "step": 11375 }, { "epoch": 2.3385753931544864, "grad_norm": 0.23383252322673798, "learning_rate": 1.098591813552039e-05, "loss": 0.3879, "step": 11376 }, { "epoch": 2.338780964127865, "grad_norm": 0.2401203066110611, "learning_rate": 1.0979379525146603e-05, "loss": 0.4057, "step": 11377 }, { "epoch": 2.3389865351012435, "grad_norm": 0.23543764650821686, "learning_rate": 1.0972842590809783e-05, "loss": 0.3725, "step": 11378 }, { "epoch": 2.339192106074622, "grad_norm": 0.22404974699020386, "learning_rate": 1.0966307332831947e-05, "loss": 0.3833, "step": 11379 }, { "epoch": 2.3393976770480007, "grad_norm": 0.23188042640686035, "learning_rate": 1.0959773751535091e-05, "loss": 0.3922, "step": 11380 }, { "epoch": 2.3396032480213793, "grad_norm": 0.23337653279304504, "learning_rate": 1.0953241847241078e-05, "loss": 0.3864, "step": 11381 }, { "epoch": 2.339808818994758, "grad_norm": 0.2359674870967865, "learning_rate": 1.0946711620271692e-05, "loss": 0.4073, "step": 11382 }, { "epoch": 2.3400143899681365, "grad_norm": 0.22280322015285492, "learning_rate": 1.0940183070948668e-05, "loss": 0.3692, "step": 11383 }, { "epoch": 2.340219960941515, "grad_norm": 0.2298697531223297, "learning_rate": 1.0933656199593635e-05, "loss": 0.3965, "step": 11384 }, { "epoch": 2.3404255319148937, "grad_norm": 0.12579971551895142, "learning_rate": 1.0927131006528134e-05, "loss": 0.4416, "step": 11385 }, { "epoch": 2.3406311028882723, "grad_norm": 0.22117015719413757, "learning_rate": 1.0920607492073632e-05, "loss": 0.3884, "step": 11386 }, { "epoch": 2.340836673861651, "grad_norm": 0.22283059358596802, "learning_rate": 1.0914085656551514e-05, "loss": 0.3971, "step": 11387 }, { "epoch": 2.3410422448350294, "grad_norm": 0.2289050966501236, "learning_rate": 1.0907565500283078e-05, "loss": 0.4027, "step": 11388 }, { "epoch": 2.341247815808408, "grad_norm": 0.22611112892627716, "learning_rate": 1.0901047023589525e-05, "loss": 0.4097, "step": 11389 }, { "epoch": 2.3414533867817866, "grad_norm": 0.23010249435901642, "learning_rate": 1.0894530226792024e-05, "loss": 0.3971, "step": 11390 }, { "epoch": 2.3416589577551647, "grad_norm": 0.2295684963464737, "learning_rate": 1.088801511021161e-05, "loss": 0.371, "step": 11391 }, { "epoch": 2.3418645287285433, "grad_norm": 0.221123605966568, "learning_rate": 1.0881501674169247e-05, "loss": 0.3816, "step": 11392 }, { "epoch": 2.342070099701922, "grad_norm": 0.12064526975154877, "learning_rate": 1.0874989918985833e-05, "loss": 0.4318, "step": 11393 }, { "epoch": 2.3422756706753005, "grad_norm": 0.23293597996234894, "learning_rate": 1.0868479844982164e-05, "loss": 0.3857, "step": 11394 }, { "epoch": 2.342481241648679, "grad_norm": 0.22393792867660522, "learning_rate": 1.0861971452478966e-05, "loss": 0.3969, "step": 11395 }, { "epoch": 2.3426868126220577, "grad_norm": 0.12383504956960678, "learning_rate": 1.0855464741796857e-05, "loss": 0.4518, "step": 11396 }, { "epoch": 2.3428923835954363, "grad_norm": 0.2288213074207306, "learning_rate": 1.0848959713256421e-05, "loss": 0.3848, "step": 11397 }, { "epoch": 2.343097954568815, "grad_norm": 0.23577377200126648, "learning_rate": 1.0842456367178123e-05, "loss": 0.4115, "step": 11398 }, { "epoch": 2.3433035255421935, "grad_norm": 0.22047261893749237, "learning_rate": 1.0835954703882345e-05, "loss": 0.3738, "step": 11399 }, { "epoch": 2.343509096515572, "grad_norm": 0.22211310267448425, "learning_rate": 1.0829454723689383e-05, "loss": 0.4006, "step": 11400 }, { "epoch": 2.3437146674889506, "grad_norm": 0.23019267618656158, "learning_rate": 1.0822956426919487e-05, "loss": 0.3988, "step": 11401 }, { "epoch": 2.343920238462329, "grad_norm": 0.23312908411026, "learning_rate": 1.0816459813892787e-05, "loss": 0.3799, "step": 11402 }, { "epoch": 2.344125809435708, "grad_norm": 0.2296217679977417, "learning_rate": 1.0809964884929325e-05, "loss": 0.3731, "step": 11403 }, { "epoch": 2.3443313804090864, "grad_norm": 0.12692473828792572, "learning_rate": 1.08034716403491e-05, "loss": 0.4605, "step": 11404 }, { "epoch": 2.344536951382465, "grad_norm": 0.2210485190153122, "learning_rate": 1.0796980080471993e-05, "loss": 0.3822, "step": 11405 }, { "epoch": 2.344742522355843, "grad_norm": 0.22621271014213562, "learning_rate": 1.0790490205617812e-05, "loss": 0.3743, "step": 11406 }, { "epoch": 2.344948093329222, "grad_norm": 0.22742587327957153, "learning_rate": 1.0784002016106287e-05, "loss": 0.4062, "step": 11407 }, { "epoch": 2.3451536643026003, "grad_norm": 0.2238548845052719, "learning_rate": 1.0777515512257057e-05, "loss": 0.3738, "step": 11408 }, { "epoch": 2.345359235275979, "grad_norm": 0.2274450659751892, "learning_rate": 1.077103069438968e-05, "loss": 0.4024, "step": 11409 }, { "epoch": 2.3455648062493575, "grad_norm": 0.2332809865474701, "learning_rate": 1.0764547562823627e-05, "loss": 0.4046, "step": 11410 }, { "epoch": 2.345770377222736, "grad_norm": 0.1246822252869606, "learning_rate": 1.0758066117878307e-05, "loss": 0.4457, "step": 11411 }, { "epoch": 2.3459759481961147, "grad_norm": 0.2296641618013382, "learning_rate": 1.0751586359873026e-05, "loss": 0.394, "step": 11412 }, { "epoch": 2.3461815191694932, "grad_norm": 0.2302107959985733, "learning_rate": 1.0745108289127006e-05, "loss": 0.4005, "step": 11413 }, { "epoch": 2.346387090142872, "grad_norm": 0.12304381281137466, "learning_rate": 1.0738631905959397e-05, "loss": 0.4551, "step": 11414 }, { "epoch": 2.3465926611162504, "grad_norm": 0.23445133864879608, "learning_rate": 1.0732157210689257e-05, "loss": 0.3921, "step": 11415 }, { "epoch": 2.346798232089629, "grad_norm": 0.22406600415706635, "learning_rate": 1.0725684203635556e-05, "loss": 0.3952, "step": 11416 }, { "epoch": 2.3470038030630076, "grad_norm": 0.2265467792749405, "learning_rate": 1.0719212885117194e-05, "loss": 0.3897, "step": 11417 }, { "epoch": 2.347209374036386, "grad_norm": 0.22809205949306488, "learning_rate": 1.0712743255452993e-05, "loss": 0.3919, "step": 11418 }, { "epoch": 2.3474149450097648, "grad_norm": 0.12264318019151688, "learning_rate": 1.0706275314961672e-05, "loss": 0.4388, "step": 11419 }, { "epoch": 2.3476205159831434, "grad_norm": 0.26397988200187683, "learning_rate": 1.0699809063961879e-05, "loss": 0.3855, "step": 11420 }, { "epoch": 2.3478260869565215, "grad_norm": 0.2346579134464264, "learning_rate": 1.0693344502772162e-05, "loss": 0.397, "step": 11421 }, { "epoch": 2.3480316579299005, "grad_norm": 0.23680004477500916, "learning_rate": 1.0686881631711023e-05, "loss": 0.4192, "step": 11422 }, { "epoch": 2.3482372289032787, "grad_norm": 0.22599753737449646, "learning_rate": 1.0680420451096852e-05, "loss": 0.4073, "step": 11423 }, { "epoch": 2.3484427998766573, "grad_norm": 0.23660646378993988, "learning_rate": 1.0673960961247943e-05, "loss": 0.3879, "step": 11424 }, { "epoch": 2.348648370850036, "grad_norm": 0.26729151606559753, "learning_rate": 1.0667503162482548e-05, "loss": 0.3812, "step": 11425 }, { "epoch": 2.3488539418234144, "grad_norm": 0.22443972527980804, "learning_rate": 1.06610470551188e-05, "loss": 0.4038, "step": 11426 }, { "epoch": 2.349059512796793, "grad_norm": 0.22883421182632446, "learning_rate": 1.0654592639474768e-05, "loss": 0.3976, "step": 11427 }, { "epoch": 2.3492650837701716, "grad_norm": 0.23285576701164246, "learning_rate": 1.0648139915868425e-05, "loss": 0.3958, "step": 11428 }, { "epoch": 2.34947065474355, "grad_norm": 0.22203494608402252, "learning_rate": 1.0641688884617673e-05, "loss": 0.391, "step": 11429 }, { "epoch": 2.349676225716929, "grad_norm": 0.21967896819114685, "learning_rate": 1.0635239546040312e-05, "loss": 0.3793, "step": 11430 }, { "epoch": 2.3498817966903074, "grad_norm": 0.23787444829940796, "learning_rate": 1.062879190045407e-05, "loss": 0.3829, "step": 11431 }, { "epoch": 2.350087367663686, "grad_norm": 0.2242104560136795, "learning_rate": 1.0622345948176609e-05, "loss": 0.3986, "step": 11432 }, { "epoch": 2.3502929386370646, "grad_norm": 0.12349691241979599, "learning_rate": 1.0615901689525487e-05, "loss": 0.4521, "step": 11433 }, { "epoch": 2.350498509610443, "grad_norm": 0.2340456247329712, "learning_rate": 1.0609459124818177e-05, "loss": 0.419, "step": 11434 }, { "epoch": 2.3507040805838217, "grad_norm": 0.12247911095619202, "learning_rate": 1.0603018254372072e-05, "loss": 0.4609, "step": 11435 }, { "epoch": 2.3509096515572, "grad_norm": 0.2385515421628952, "learning_rate": 1.0596579078504486e-05, "loss": 0.3997, "step": 11436 }, { "epoch": 2.351115222530579, "grad_norm": 0.12545520067214966, "learning_rate": 1.0590141597532653e-05, "loss": 0.4411, "step": 11437 }, { "epoch": 2.351320793503957, "grad_norm": 0.23046888411045074, "learning_rate": 1.0583705811773695e-05, "loss": 0.3795, "step": 11438 }, { "epoch": 2.3515263644773357, "grad_norm": 0.12221966683864594, "learning_rate": 1.0577271721544703e-05, "loss": 0.4572, "step": 11439 }, { "epoch": 2.3517319354507142, "grad_norm": 0.22688139975070953, "learning_rate": 1.0570839327162644e-05, "loss": 0.3925, "step": 11440 }, { "epoch": 2.351937506424093, "grad_norm": 0.23011426627635956, "learning_rate": 1.056440862894441e-05, "loss": 0.3921, "step": 11441 }, { "epoch": 2.3521430773974714, "grad_norm": 0.2639561891555786, "learning_rate": 1.0557979627206812e-05, "loss": 0.3734, "step": 11442 }, { "epoch": 2.35234864837085, "grad_norm": 0.2354530692100525, "learning_rate": 1.055155232226656e-05, "loss": 0.3819, "step": 11443 }, { "epoch": 2.3525542193442286, "grad_norm": 0.23552900552749634, "learning_rate": 1.0545126714440329e-05, "loss": 0.3951, "step": 11444 }, { "epoch": 2.352759790317607, "grad_norm": 0.12986932694911957, "learning_rate": 1.0538702804044648e-05, "loss": 0.4338, "step": 11445 }, { "epoch": 2.3529653612909858, "grad_norm": 0.22587868571281433, "learning_rate": 1.0532280591396021e-05, "loss": 0.388, "step": 11446 }, { "epoch": 2.3531709322643644, "grad_norm": 0.22547636926174164, "learning_rate": 1.0525860076810829e-05, "loss": 0.3929, "step": 11447 }, { "epoch": 2.353376503237743, "grad_norm": 0.24222803115844727, "learning_rate": 1.0519441260605384e-05, "loss": 0.3973, "step": 11448 }, { "epoch": 2.3535820742111215, "grad_norm": 0.2281145453453064, "learning_rate": 1.0513024143095896e-05, "loss": 0.3693, "step": 11449 }, { "epoch": 2.3537876451845, "grad_norm": 0.22498665750026703, "learning_rate": 1.0506608724598525e-05, "loss": 0.3781, "step": 11450 }, { "epoch": 2.3539932161578783, "grad_norm": 0.12150565534830093, "learning_rate": 1.0500195005429303e-05, "loss": 0.4532, "step": 11451 }, { "epoch": 2.3541987871312573, "grad_norm": 0.23014621436595917, "learning_rate": 1.0493782985904235e-05, "loss": 0.3878, "step": 11452 }, { "epoch": 2.3544043581046354, "grad_norm": 0.2346828430891037, "learning_rate": 1.04873726663392e-05, "loss": 0.4009, "step": 11453 }, { "epoch": 2.354609929078014, "grad_norm": 0.21988657116889954, "learning_rate": 1.0480964047050002e-05, "loss": 0.3942, "step": 11454 }, { "epoch": 2.3548155000513926, "grad_norm": 0.12439004331827164, "learning_rate": 1.0474557128352365e-05, "loss": 0.4566, "step": 11455 }, { "epoch": 2.355021071024771, "grad_norm": 0.12461668252944946, "learning_rate": 1.0468151910561923e-05, "loss": 0.4609, "step": 11456 }, { "epoch": 2.35522664199815, "grad_norm": 0.11804953217506409, "learning_rate": 1.0461748393994234e-05, "loss": 0.4588, "step": 11457 }, { "epoch": 2.3554322129715284, "grad_norm": 0.2295227199792862, "learning_rate": 1.045534657896476e-05, "loss": 0.3971, "step": 11458 }, { "epoch": 2.355637783944907, "grad_norm": 0.22749020159244537, "learning_rate": 1.0448946465788915e-05, "loss": 0.4247, "step": 11459 }, { "epoch": 2.3558433549182856, "grad_norm": 0.22229236364364624, "learning_rate": 1.044254805478198e-05, "loss": 0.3964, "step": 11460 }, { "epoch": 2.356048925891664, "grad_norm": 0.2296680361032486, "learning_rate": 1.0436151346259184e-05, "loss": 0.402, "step": 11461 }, { "epoch": 2.3562544968650427, "grad_norm": 0.12308470159769058, "learning_rate": 1.0429756340535659e-05, "loss": 0.4583, "step": 11462 }, { "epoch": 2.3564600678384213, "grad_norm": 0.12049432843923569, "learning_rate": 1.0423363037926464e-05, "loss": 0.4624, "step": 11463 }, { "epoch": 2.3566656388118, "grad_norm": 0.12415426224470139, "learning_rate": 1.0416971438746542e-05, "loss": 0.4517, "step": 11464 }, { "epoch": 2.3568712097851785, "grad_norm": 0.2221984714269638, "learning_rate": 1.041058154331081e-05, "loss": 0.3924, "step": 11465 }, { "epoch": 2.3570767807585566, "grad_norm": 0.22418946027755737, "learning_rate": 1.0404193351934057e-05, "loss": 0.3781, "step": 11466 }, { "epoch": 2.3572823517319357, "grad_norm": 0.2208791971206665, "learning_rate": 1.0397806864930983e-05, "loss": 0.3731, "step": 11467 }, { "epoch": 2.357487922705314, "grad_norm": 0.23673607409000397, "learning_rate": 1.0391422082616247e-05, "loss": 0.3809, "step": 11468 }, { "epoch": 2.3576934936786924, "grad_norm": 0.22379258275032043, "learning_rate": 1.0385039005304386e-05, "loss": 0.401, "step": 11469 }, { "epoch": 2.357899064652071, "grad_norm": 0.2308909147977829, "learning_rate": 1.0378657633309862e-05, "loss": 0.3777, "step": 11470 }, { "epoch": 2.3581046356254496, "grad_norm": 0.12026369571685791, "learning_rate": 1.0372277966947059e-05, "loss": 0.4592, "step": 11471 }, { "epoch": 2.358310206598828, "grad_norm": 0.12578755617141724, "learning_rate": 1.036590000653026e-05, "loss": 0.4422, "step": 11472 }, { "epoch": 2.3585157775722068, "grad_norm": 0.23081423342227936, "learning_rate": 1.0359523752373694e-05, "loss": 0.3895, "step": 11473 }, { "epoch": 2.3587213485455854, "grad_norm": 0.1233346238732338, "learning_rate": 1.035314920479149e-05, "loss": 0.4362, "step": 11474 }, { "epoch": 2.358926919518964, "grad_norm": 0.23306210339069366, "learning_rate": 1.0346776364097683e-05, "loss": 0.3826, "step": 11475 }, { "epoch": 2.3591324904923425, "grad_norm": 0.23711657524108887, "learning_rate": 1.0340405230606235e-05, "loss": 0.3861, "step": 11476 }, { "epoch": 2.359338061465721, "grad_norm": 0.24400153756141663, "learning_rate": 1.0334035804631026e-05, "loss": 0.3896, "step": 11477 }, { "epoch": 2.3595436324390997, "grad_norm": 0.1271253228187561, "learning_rate": 1.0327668086485842e-05, "loss": 0.4421, "step": 11478 }, { "epoch": 2.3597492034124783, "grad_norm": 0.23349100351333618, "learning_rate": 1.0321302076484381e-05, "loss": 0.3748, "step": 11479 }, { "epoch": 2.359954774385857, "grad_norm": 0.22339333593845367, "learning_rate": 1.031493777494029e-05, "loss": 0.392, "step": 11480 }, { "epoch": 2.3601603453592355, "grad_norm": 0.23393899202346802, "learning_rate": 1.03085751821671e-05, "loss": 0.3857, "step": 11481 }, { "epoch": 2.360365916332614, "grad_norm": 0.22653932869434357, "learning_rate": 1.0302214298478262e-05, "loss": 0.3752, "step": 11482 }, { "epoch": 2.360571487305992, "grad_norm": 0.2276255041360855, "learning_rate": 1.0295855124187149e-05, "loss": 0.3894, "step": 11483 }, { "epoch": 2.360777058279371, "grad_norm": 0.22504010796546936, "learning_rate": 1.0289497659607049e-05, "loss": 0.355, "step": 11484 }, { "epoch": 2.3609826292527494, "grad_norm": 0.2319696694612503, "learning_rate": 1.0283141905051145e-05, "loss": 0.4006, "step": 11485 }, { "epoch": 2.361188200226128, "grad_norm": 0.23021776974201202, "learning_rate": 1.0276787860832589e-05, "loss": 0.3885, "step": 11486 }, { "epoch": 2.3613937711995066, "grad_norm": 0.22840525209903717, "learning_rate": 1.0270435527264398e-05, "loss": 0.3885, "step": 11487 }, { "epoch": 2.361599342172885, "grad_norm": 0.23946824669837952, "learning_rate": 1.0264084904659514e-05, "loss": 0.3887, "step": 11488 }, { "epoch": 2.3618049131462637, "grad_norm": 0.23394089937210083, "learning_rate": 1.025773599333082e-05, "loss": 0.387, "step": 11489 }, { "epoch": 2.3620104841196423, "grad_norm": 0.2347833514213562, "learning_rate": 1.0251388793591093e-05, "loss": 0.3909, "step": 11490 }, { "epoch": 2.362216055093021, "grad_norm": 0.24539333581924438, "learning_rate": 1.024504330575302e-05, "loss": 0.3911, "step": 11491 }, { "epoch": 2.3624216260663995, "grad_norm": 0.2272724211215973, "learning_rate": 1.0238699530129222e-05, "loss": 0.3899, "step": 11492 }, { "epoch": 2.362627197039778, "grad_norm": 0.1279131919145584, "learning_rate": 1.0232357467032217e-05, "loss": 0.4453, "step": 11493 }, { "epoch": 2.3628327680131567, "grad_norm": 0.21736563742160797, "learning_rate": 1.0226017116774459e-05, "loss": 0.3957, "step": 11494 }, { "epoch": 2.3630383389865353, "grad_norm": 0.22350220382213593, "learning_rate": 1.0219678479668308e-05, "loss": 0.38, "step": 11495 }, { "epoch": 2.363243909959914, "grad_norm": 0.22701016068458557, "learning_rate": 1.0213341556026038e-05, "loss": 0.3937, "step": 11496 }, { "epoch": 2.3634494809332924, "grad_norm": 0.23441599309444427, "learning_rate": 1.0207006346159835e-05, "loss": 0.3887, "step": 11497 }, { "epoch": 2.3636550519066706, "grad_norm": 0.2203342318534851, "learning_rate": 1.0200672850381808e-05, "loss": 0.3824, "step": 11498 }, { "epoch": 2.363860622880049, "grad_norm": 0.12455693632364273, "learning_rate": 1.0194341069003977e-05, "loss": 0.4432, "step": 11499 }, { "epoch": 2.3640661938534278, "grad_norm": 0.22522957623004913, "learning_rate": 1.0188011002338268e-05, "loss": 0.376, "step": 11500 }, { "epoch": 2.3642717648268063, "grad_norm": 0.23828500509262085, "learning_rate": 1.0181682650696563e-05, "loss": 0.394, "step": 11501 }, { "epoch": 2.364477335800185, "grad_norm": 0.1210595965385437, "learning_rate": 1.0175356014390606e-05, "loss": 0.4444, "step": 11502 }, { "epoch": 2.3646829067735635, "grad_norm": 0.22295325994491577, "learning_rate": 1.0169031093732092e-05, "loss": 0.3968, "step": 11503 }, { "epoch": 2.364888477746942, "grad_norm": 0.21954037249088287, "learning_rate": 1.016270788903262e-05, "loss": 0.3821, "step": 11504 }, { "epoch": 2.3650940487203207, "grad_norm": 0.23098480701446533, "learning_rate": 1.0156386400603697e-05, "loss": 0.3838, "step": 11505 }, { "epoch": 2.3652996196936993, "grad_norm": 0.2265908420085907, "learning_rate": 1.0150066628756741e-05, "loss": 0.4052, "step": 11506 }, { "epoch": 2.365505190667078, "grad_norm": 0.2330310344696045, "learning_rate": 1.0143748573803133e-05, "loss": 0.4044, "step": 11507 }, { "epoch": 2.3657107616404565, "grad_norm": 0.22517040371894836, "learning_rate": 1.0137432236054111e-05, "loss": 0.4007, "step": 11508 }, { "epoch": 2.365916332613835, "grad_norm": 0.22092685103416443, "learning_rate": 1.0131117615820847e-05, "loss": 0.395, "step": 11509 }, { "epoch": 2.3661219035872136, "grad_norm": 0.22711274027824402, "learning_rate": 1.0124804713414453e-05, "loss": 0.3662, "step": 11510 }, { "epoch": 2.3663274745605922, "grad_norm": 0.2373218983411789, "learning_rate": 1.011849352914592e-05, "loss": 0.3841, "step": 11511 }, { "epoch": 2.366533045533971, "grad_norm": 0.23521727323532104, "learning_rate": 1.011218406332618e-05, "loss": 0.3878, "step": 11512 }, { "epoch": 2.366738616507349, "grad_norm": 0.12808705866336823, "learning_rate": 1.0105876316266065e-05, "loss": 0.4659, "step": 11513 }, { "epoch": 2.3669441874807275, "grad_norm": 0.11950518935918808, "learning_rate": 1.0099570288276317e-05, "loss": 0.4365, "step": 11514 }, { "epoch": 2.367149758454106, "grad_norm": 0.24219125509262085, "learning_rate": 1.0093265979667625e-05, "loss": 0.3793, "step": 11515 }, { "epoch": 2.3673553294274847, "grad_norm": 0.22846874594688416, "learning_rate": 1.0086963390750568e-05, "loss": 0.3735, "step": 11516 }, { "epoch": 2.3675609004008633, "grad_norm": 0.23134097456932068, "learning_rate": 1.0080662521835643e-05, "loss": 0.3869, "step": 11517 }, { "epoch": 2.367766471374242, "grad_norm": 0.21544456481933594, "learning_rate": 1.0074363373233259e-05, "loss": 0.393, "step": 11518 }, { "epoch": 2.3679720423476205, "grad_norm": 0.22806456685066223, "learning_rate": 1.0068065945253753e-05, "loss": 0.3971, "step": 11519 }, { "epoch": 2.368177613320999, "grad_norm": 0.22958514094352722, "learning_rate": 1.0061770238207364e-05, "loss": 0.4065, "step": 11520 }, { "epoch": 2.3683831842943777, "grad_norm": 0.229364275932312, "learning_rate": 1.0055476252404244e-05, "loss": 0.394, "step": 11521 }, { "epoch": 2.3685887552677563, "grad_norm": 0.23312440514564514, "learning_rate": 1.0049183988154493e-05, "loss": 0.4033, "step": 11522 }, { "epoch": 2.368794326241135, "grad_norm": 0.22797515988349915, "learning_rate": 1.0042893445768084e-05, "loss": 0.3912, "step": 11523 }, { "epoch": 2.3689998972145134, "grad_norm": 0.23262247443199158, "learning_rate": 1.0036604625554923e-05, "loss": 0.3907, "step": 11524 }, { "epoch": 2.369205468187892, "grad_norm": 0.22848542034626007, "learning_rate": 1.003031752782484e-05, "loss": 0.3972, "step": 11525 }, { "epoch": 2.3694110391612706, "grad_norm": 0.21729277074337006, "learning_rate": 1.002403215288756e-05, "loss": 0.4045, "step": 11526 }, { "epoch": 2.369616610134649, "grad_norm": 0.22861436009407043, "learning_rate": 1.001774850105273e-05, "loss": 0.4129, "step": 11527 }, { "epoch": 2.3698221811080273, "grad_norm": 0.22693173587322235, "learning_rate": 1.0011466572629933e-05, "loss": 0.3786, "step": 11528 }, { "epoch": 2.370027752081406, "grad_norm": 0.23766165971755981, "learning_rate": 1.0005186367928648e-05, "loss": 0.406, "step": 11529 }, { "epoch": 2.3702333230547845, "grad_norm": 0.12284702807664871, "learning_rate": 9.998907887258245e-06, "loss": 0.4393, "step": 11530 }, { "epoch": 2.370438894028163, "grad_norm": 0.12830850481987, "learning_rate": 9.992631130928073e-06, "loss": 0.4596, "step": 11531 }, { "epoch": 2.3706444650015417, "grad_norm": 0.21804697811603546, "learning_rate": 9.986356099247343e-06, "loss": 0.3676, "step": 11532 }, { "epoch": 2.3708500359749203, "grad_norm": 0.12258250266313553, "learning_rate": 9.98008279252519e-06, "loss": 0.4617, "step": 11533 }, { "epoch": 2.371055606948299, "grad_norm": 0.22385314106941223, "learning_rate": 9.973811211070666e-06, "loss": 0.3938, "step": 11534 }, { "epoch": 2.3712611779216775, "grad_norm": 0.12500827014446259, "learning_rate": 9.967541355192763e-06, "loss": 0.4385, "step": 11535 }, { "epoch": 2.371466748895056, "grad_norm": 0.23873549699783325, "learning_rate": 9.961273225200353e-06, "loss": 0.3857, "step": 11536 }, { "epoch": 2.3716723198684346, "grad_norm": 0.23228701949119568, "learning_rate": 9.955006821402244e-06, "loss": 0.3898, "step": 11537 }, { "epoch": 2.371877890841813, "grad_norm": 0.12291909754276276, "learning_rate": 9.948742144107149e-06, "loss": 0.4612, "step": 11538 }, { "epoch": 2.372083461815192, "grad_norm": 0.23434710502624512, "learning_rate": 9.942479193623696e-06, "loss": 0.3871, "step": 11539 }, { "epoch": 2.3722890327885704, "grad_norm": 0.23191601037979126, "learning_rate": 9.936217970260437e-06, "loss": 0.4079, "step": 11540 }, { "epoch": 2.372494603761949, "grad_norm": 0.21654024720191956, "learning_rate": 9.929958474325821e-06, "loss": 0.387, "step": 11541 }, { "epoch": 2.3727001747353276, "grad_norm": 0.2176521271467209, "learning_rate": 9.923700706128245e-06, "loss": 0.4028, "step": 11542 }, { "epoch": 2.3729057457087057, "grad_norm": 0.22292236983776093, "learning_rate": 9.917444665975987e-06, "loss": 0.3789, "step": 11543 }, { "epoch": 2.3731113166820843, "grad_norm": 0.23066085577011108, "learning_rate": 9.911190354177257e-06, "loss": 0.3781, "step": 11544 }, { "epoch": 2.373316887655463, "grad_norm": 0.1312110424041748, "learning_rate": 9.904937771040172e-06, "loss": 0.4353, "step": 11545 }, { "epoch": 2.3735224586288415, "grad_norm": 0.22334595024585724, "learning_rate": 9.89868691687277e-06, "loss": 0.3894, "step": 11546 }, { "epoch": 2.37372802960222, "grad_norm": 0.2200348973274231, "learning_rate": 9.892437791983002e-06, "loss": 0.3792, "step": 11547 }, { "epoch": 2.3739336005755987, "grad_norm": 0.2263760268688202, "learning_rate": 9.886190396678715e-06, "loss": 0.3948, "step": 11548 }, { "epoch": 2.3741391715489772, "grad_norm": 0.22932201623916626, "learning_rate": 9.879944731267723e-06, "loss": 0.3927, "step": 11549 }, { "epoch": 2.374344742522356, "grad_norm": 0.22899407148361206, "learning_rate": 9.873700796057702e-06, "loss": 0.4045, "step": 11550 }, { "epoch": 2.3745503134957344, "grad_norm": 0.23349648714065552, "learning_rate": 9.867458591356262e-06, "loss": 0.3858, "step": 11551 }, { "epoch": 2.374755884469113, "grad_norm": 0.2280297577381134, "learning_rate": 9.861218117470914e-06, "loss": 0.3987, "step": 11552 }, { "epoch": 2.3749614554424916, "grad_norm": 0.11944809556007385, "learning_rate": 9.854979374709125e-06, "loss": 0.44, "step": 11553 }, { "epoch": 2.37516702641587, "grad_norm": 0.2443980574607849, "learning_rate": 9.848742363378233e-06, "loss": 0.3749, "step": 11554 }, { "epoch": 2.3753725973892488, "grad_norm": 0.224415123462677, "learning_rate": 9.8425070837855e-06, "loss": 0.4007, "step": 11555 }, { "epoch": 2.3755781683626274, "grad_norm": 0.23538914322853088, "learning_rate": 9.836273536238125e-06, "loss": 0.4024, "step": 11556 }, { "epoch": 2.375783739336006, "grad_norm": 0.2267664521932602, "learning_rate": 9.830041721043201e-06, "loss": 0.3676, "step": 11557 }, { "epoch": 2.375989310309384, "grad_norm": 0.2350446581840515, "learning_rate": 9.823811638507738e-06, "loss": 0.3737, "step": 11558 }, { "epoch": 2.3761948812827627, "grad_norm": 0.23056869208812714, "learning_rate": 9.81758328893866e-06, "loss": 0.3897, "step": 11559 }, { "epoch": 2.3764004522561413, "grad_norm": 0.22713732719421387, "learning_rate": 9.811356672642816e-06, "loss": 0.3669, "step": 11560 }, { "epoch": 2.37660602322952, "grad_norm": 0.22514687478542328, "learning_rate": 9.805131789926953e-06, "loss": 0.3922, "step": 11561 }, { "epoch": 2.3768115942028984, "grad_norm": 0.2302553504705429, "learning_rate": 9.798908641097734e-06, "loss": 0.3878, "step": 11562 }, { "epoch": 2.377017165176277, "grad_norm": 0.22958478331565857, "learning_rate": 9.792687226461768e-06, "loss": 0.3946, "step": 11563 }, { "epoch": 2.3772227361496556, "grad_norm": 0.2399352788925171, "learning_rate": 9.786467546325548e-06, "loss": 0.3835, "step": 11564 }, { "epoch": 2.377428307123034, "grad_norm": 0.22785188257694244, "learning_rate": 9.780249600995484e-06, "loss": 0.383, "step": 11565 }, { "epoch": 2.377633878096413, "grad_norm": 0.23024681210517883, "learning_rate": 9.774033390777902e-06, "loss": 0.379, "step": 11566 }, { "epoch": 2.3778394490697914, "grad_norm": 0.12375470250844955, "learning_rate": 9.767818915979052e-06, "loss": 0.4333, "step": 11567 }, { "epoch": 2.37804502004317, "grad_norm": 0.23087210953235626, "learning_rate": 9.761606176905089e-06, "loss": 0.3899, "step": 11568 }, { "epoch": 2.3782505910165486, "grad_norm": 0.12501628696918488, "learning_rate": 9.755395173862072e-06, "loss": 0.4761, "step": 11569 }, { "epoch": 2.378456161989927, "grad_norm": 0.23227210342884064, "learning_rate": 9.749185907156014e-06, "loss": 0.3867, "step": 11570 }, { "epoch": 2.3786617329633057, "grad_norm": 0.11980535089969635, "learning_rate": 9.742978377092805e-06, "loss": 0.4406, "step": 11571 }, { "epoch": 2.3788673039366843, "grad_norm": 0.23125259578227997, "learning_rate": 9.736772583978261e-06, "loss": 0.3782, "step": 11572 }, { "epoch": 2.3790728749100625, "grad_norm": 0.237775981426239, "learning_rate": 9.730568528118097e-06, "loss": 0.4088, "step": 11573 }, { "epoch": 2.3792784458834415, "grad_norm": 0.22310465574264526, "learning_rate": 9.724366209817991e-06, "loss": 0.3875, "step": 11574 }, { "epoch": 2.3794840168568197, "grad_norm": 0.23160605132579803, "learning_rate": 9.71816562938348e-06, "loss": 0.3908, "step": 11575 }, { "epoch": 2.3796895878301982, "grad_norm": 0.21605071425437927, "learning_rate": 9.711966787120025e-06, "loss": 0.3931, "step": 11576 }, { "epoch": 2.379895158803577, "grad_norm": 0.23142650723457336, "learning_rate": 9.705769683333049e-06, "loss": 0.3814, "step": 11577 }, { "epoch": 2.3801007297769554, "grad_norm": 0.2322172224521637, "learning_rate": 9.699574318327836e-06, "loss": 0.4077, "step": 11578 }, { "epoch": 2.380306300750334, "grad_norm": 0.23128941655158997, "learning_rate": 9.693380692409598e-06, "loss": 0.4085, "step": 11579 }, { "epoch": 2.3805118717237126, "grad_norm": 0.22898712754249573, "learning_rate": 9.687188805883475e-06, "loss": 0.3729, "step": 11580 }, { "epoch": 2.380717442697091, "grad_norm": 0.2282625287771225, "learning_rate": 9.680998659054504e-06, "loss": 0.3726, "step": 11581 }, { "epoch": 2.3809230136704698, "grad_norm": 0.2424619495868683, "learning_rate": 9.674810252227655e-06, "loss": 0.4017, "step": 11582 }, { "epoch": 2.3811285846438484, "grad_norm": 0.12431478500366211, "learning_rate": 9.668623585707774e-06, "loss": 0.4515, "step": 11583 }, { "epoch": 2.381334155617227, "grad_norm": 0.2225092202425003, "learning_rate": 9.662438659799689e-06, "loss": 0.3965, "step": 11584 }, { "epoch": 2.3815397265906055, "grad_norm": 0.2349206954240799, "learning_rate": 9.656255474808082e-06, "loss": 0.3851, "step": 11585 }, { "epoch": 2.381745297563984, "grad_norm": 0.22471074759960175, "learning_rate": 9.650074031037576e-06, "loss": 0.396, "step": 11586 }, { "epoch": 2.3819508685373627, "grad_norm": 0.13233044743537903, "learning_rate": 9.643894328792692e-06, "loss": 0.4617, "step": 11587 }, { "epoch": 2.382156439510741, "grad_norm": 0.11851814389228821, "learning_rate": 9.637716368377883e-06, "loss": 0.4364, "step": 11588 }, { "epoch": 2.38236201048412, "grad_norm": 0.22143539786338806, "learning_rate": 9.631540150097501e-06, "loss": 0.4004, "step": 11589 }, { "epoch": 2.382567581457498, "grad_norm": 0.2365567684173584, "learning_rate": 9.625365674255817e-06, "loss": 0.4103, "step": 11590 }, { "epoch": 2.3827731524308766, "grad_norm": 0.22856760025024414, "learning_rate": 9.619192941157033e-06, "loss": 0.3897, "step": 11591 }, { "epoch": 2.382978723404255, "grad_norm": 0.12115172296762466, "learning_rate": 9.613021951105246e-06, "loss": 0.456, "step": 11592 }, { "epoch": 2.383184294377634, "grad_norm": 0.2271842509508133, "learning_rate": 9.606852704404472e-06, "loss": 0.3896, "step": 11593 }, { "epoch": 2.3833898653510124, "grad_norm": 0.232135608792305, "learning_rate": 9.600685201358626e-06, "loss": 0.3863, "step": 11594 }, { "epoch": 2.383595436324391, "grad_norm": 0.23168498277664185, "learning_rate": 9.594519442271568e-06, "loss": 0.4031, "step": 11595 }, { "epoch": 2.3838010072977696, "grad_norm": 0.22451357543468475, "learning_rate": 9.588355427447062e-06, "loss": 0.3845, "step": 11596 }, { "epoch": 2.384006578271148, "grad_norm": 0.23108120262622833, "learning_rate": 9.582193157188753e-06, "loss": 0.3817, "step": 11597 }, { "epoch": 2.3842121492445267, "grad_norm": 0.2306068241596222, "learning_rate": 9.576032631800258e-06, "loss": 0.3839, "step": 11598 }, { "epoch": 2.3844177202179053, "grad_norm": 0.23344826698303223, "learning_rate": 9.569873851585067e-06, "loss": 0.3873, "step": 11599 }, { "epoch": 2.384623291191284, "grad_norm": 0.12073783576488495, "learning_rate": 9.563716816846585e-06, "loss": 0.4482, "step": 11600 }, { "epoch": 2.3848288621646625, "grad_norm": 0.23118554055690765, "learning_rate": 9.557561527888153e-06, "loss": 0.3992, "step": 11601 }, { "epoch": 2.385034433138041, "grad_norm": 0.24189937114715576, "learning_rate": 9.551407985013004e-06, "loss": 0.3896, "step": 11602 }, { "epoch": 2.3852400041114192, "grad_norm": 0.22828659415245056, "learning_rate": 9.545256188524287e-06, "loss": 0.3812, "step": 11603 }, { "epoch": 2.3854455750847983, "grad_norm": 0.22598852217197418, "learning_rate": 9.53910613872509e-06, "loss": 0.3918, "step": 11604 }, { "epoch": 2.3856511460581764, "grad_norm": 0.2214164286851883, "learning_rate": 9.532957835918392e-06, "loss": 0.3615, "step": 11605 }, { "epoch": 2.385856717031555, "grad_norm": 0.2324512004852295, "learning_rate": 9.526811280407091e-06, "loss": 0.3832, "step": 11606 }, { "epoch": 2.3860622880049336, "grad_norm": 0.22195158898830414, "learning_rate": 9.520666472493996e-06, "loss": 0.3767, "step": 11607 }, { "epoch": 2.386267858978312, "grad_norm": 0.23884356021881104, "learning_rate": 9.514523412481835e-06, "loss": 0.3979, "step": 11608 }, { "epoch": 2.3864734299516908, "grad_norm": 0.2285103052854538, "learning_rate": 9.508382100673247e-06, "loss": 0.3877, "step": 11609 }, { "epoch": 2.3866790009250693, "grad_norm": 0.24297171831130981, "learning_rate": 9.502242537370767e-06, "loss": 0.3847, "step": 11610 }, { "epoch": 2.386884571898448, "grad_norm": 0.23993346095085144, "learning_rate": 9.4961047228769e-06, "loss": 0.3909, "step": 11611 }, { "epoch": 2.3870901428718265, "grad_norm": 0.24006116390228271, "learning_rate": 9.489968657494006e-06, "loss": 0.3865, "step": 11612 }, { "epoch": 2.387295713845205, "grad_norm": 0.23091156780719757, "learning_rate": 9.483834341524384e-06, "loss": 0.3936, "step": 11613 }, { "epoch": 2.3875012848185837, "grad_norm": 0.12236663699150085, "learning_rate": 9.477701775270241e-06, "loss": 0.4518, "step": 11614 }, { "epoch": 2.3877068557919623, "grad_norm": 0.2414701133966446, "learning_rate": 9.471570959033699e-06, "loss": 0.3928, "step": 11615 }, { "epoch": 2.387912426765341, "grad_norm": 0.22857315838336945, "learning_rate": 9.465441893116786e-06, "loss": 0.3743, "step": 11616 }, { "epoch": 2.3881179977387195, "grad_norm": 0.2300974428653717, "learning_rate": 9.459314577821475e-06, "loss": 0.3847, "step": 11617 }, { "epoch": 2.3883235687120976, "grad_norm": 0.12099748104810715, "learning_rate": 9.453189013449605e-06, "loss": 0.4291, "step": 11618 }, { "epoch": 2.3885291396854766, "grad_norm": 0.1314156949520111, "learning_rate": 9.44706520030298e-06, "loss": 0.4537, "step": 11619 }, { "epoch": 2.388734710658855, "grad_norm": 0.22321221232414246, "learning_rate": 9.44094313868328e-06, "loss": 0.3879, "step": 11620 }, { "epoch": 2.3889402816322334, "grad_norm": 0.22418095171451569, "learning_rate": 9.434822828892105e-06, "loss": 0.37, "step": 11621 }, { "epoch": 2.389145852605612, "grad_norm": 0.23237618803977966, "learning_rate": 9.428704271230982e-06, "loss": 0.4108, "step": 11622 }, { "epoch": 2.3893514235789906, "grad_norm": 0.235540971159935, "learning_rate": 9.42258746600134e-06, "loss": 0.3878, "step": 11623 }, { "epoch": 2.389556994552369, "grad_norm": 0.2259136289358139, "learning_rate": 9.41647241350451e-06, "loss": 0.385, "step": 11624 }, { "epoch": 2.3897625655257477, "grad_norm": 0.2294631153345108, "learning_rate": 9.41035911404178e-06, "loss": 0.3939, "step": 11625 }, { "epoch": 2.3899681364991263, "grad_norm": 0.23754870891571045, "learning_rate": 9.404247567914311e-06, "loss": 0.3749, "step": 11626 }, { "epoch": 2.390173707472505, "grad_norm": 0.2304736226797104, "learning_rate": 9.398137775423193e-06, "loss": 0.4073, "step": 11627 }, { "epoch": 2.3903792784458835, "grad_norm": 0.22400328516960144, "learning_rate": 9.392029736869421e-06, "loss": 0.4066, "step": 11628 }, { "epoch": 2.390584849419262, "grad_norm": 0.2297855019569397, "learning_rate": 9.385923452553912e-06, "loss": 0.3995, "step": 11629 }, { "epoch": 2.3907904203926407, "grad_norm": 0.22708038985729218, "learning_rate": 9.379818922777499e-06, "loss": 0.3896, "step": 11630 }, { "epoch": 2.3909959913660193, "grad_norm": 0.22796861827373505, "learning_rate": 9.373716147840904e-06, "loss": 0.3939, "step": 11631 }, { "epoch": 2.391201562339398, "grad_norm": 0.2315075695514679, "learning_rate": 9.367615128044811e-06, "loss": 0.3848, "step": 11632 }, { "epoch": 2.391407133312776, "grad_norm": 0.23707285523414612, "learning_rate": 9.361515863689775e-06, "loss": 0.3923, "step": 11633 }, { "epoch": 2.391612704286155, "grad_norm": 0.2384757399559021, "learning_rate": 9.355418355076277e-06, "loss": 0.362, "step": 11634 }, { "epoch": 2.391818275259533, "grad_norm": 0.23415617644786835, "learning_rate": 9.349322602504717e-06, "loss": 0.4033, "step": 11635 }, { "epoch": 2.3920238462329118, "grad_norm": 0.23738761246204376, "learning_rate": 9.343228606275398e-06, "loss": 0.389, "step": 11636 }, { "epoch": 2.3922294172062903, "grad_norm": 0.2381700575351715, "learning_rate": 9.337136366688534e-06, "loss": 0.396, "step": 11637 }, { "epoch": 2.392434988179669, "grad_norm": 0.22899046540260315, "learning_rate": 9.331045884044288e-06, "loss": 0.3902, "step": 11638 }, { "epoch": 2.3926405591530475, "grad_norm": 0.24088416993618011, "learning_rate": 9.324957158642698e-06, "loss": 0.4191, "step": 11639 }, { "epoch": 2.392846130126426, "grad_norm": 0.12892726063728333, "learning_rate": 9.318870190783708e-06, "loss": 0.4628, "step": 11640 }, { "epoch": 2.3930517010998047, "grad_norm": 0.23511195182800293, "learning_rate": 9.312784980767221e-06, "loss": 0.4036, "step": 11641 }, { "epoch": 2.3932572720731833, "grad_norm": 0.1235305592417717, "learning_rate": 9.306701528893022e-06, "loss": 0.4505, "step": 11642 }, { "epoch": 2.393462843046562, "grad_norm": 0.33878177404403687, "learning_rate": 9.300619835460804e-06, "loss": 0.3857, "step": 11643 }, { "epoch": 2.3936684140199405, "grad_norm": 0.1223718672990799, "learning_rate": 9.294539900770187e-06, "loss": 0.4886, "step": 11644 }, { "epoch": 2.393873984993319, "grad_norm": 0.2304789125919342, "learning_rate": 9.288461725120694e-06, "loss": 0.3925, "step": 11645 }, { "epoch": 2.3940795559666976, "grad_norm": 0.2310042679309845, "learning_rate": 9.282385308811784e-06, "loss": 0.3862, "step": 11646 }, { "epoch": 2.3942851269400762, "grad_norm": 0.22798366844654083, "learning_rate": 9.276310652142813e-06, "loss": 0.3814, "step": 11647 }, { "epoch": 2.394490697913455, "grad_norm": 0.23074646294116974, "learning_rate": 9.270237755413042e-06, "loss": 0.3983, "step": 11648 }, { "epoch": 2.3946962688868334, "grad_norm": 0.11851920187473297, "learning_rate": 9.264166618921649e-06, "loss": 0.4514, "step": 11649 }, { "epoch": 2.3949018398602115, "grad_norm": 0.22583618760108948, "learning_rate": 9.258097242967744e-06, "loss": 0.3941, "step": 11650 }, { "epoch": 2.39510741083359, "grad_norm": 0.23350371420383453, "learning_rate": 9.252029627850334e-06, "loss": 0.3911, "step": 11651 }, { "epoch": 2.3953129818069687, "grad_norm": 0.22866030037403107, "learning_rate": 9.245963773868321e-06, "loss": 0.3851, "step": 11652 }, { "epoch": 2.3955185527803473, "grad_norm": 0.23153471946716309, "learning_rate": 9.239899681320573e-06, "loss": 0.3953, "step": 11653 }, { "epoch": 2.395724123753726, "grad_norm": 0.24449722468852997, "learning_rate": 9.233837350505824e-06, "loss": 0.3887, "step": 11654 }, { "epoch": 2.3959296947271045, "grad_norm": 0.23732249438762665, "learning_rate": 9.22777678172274e-06, "loss": 0.3858, "step": 11655 }, { "epoch": 2.396135265700483, "grad_norm": 0.231268510222435, "learning_rate": 9.221717975269895e-06, "loss": 0.3985, "step": 11656 }, { "epoch": 2.3963408366738617, "grad_norm": 0.23087145388126373, "learning_rate": 9.215660931445777e-06, "loss": 0.4104, "step": 11657 }, { "epoch": 2.3965464076472403, "grad_norm": 0.12480619549751282, "learning_rate": 9.209605650548777e-06, "loss": 0.4454, "step": 11658 }, { "epoch": 2.396751978620619, "grad_norm": 0.2369321882724762, "learning_rate": 9.203552132877233e-06, "loss": 0.3862, "step": 11659 }, { "epoch": 2.3969575495939974, "grad_norm": 0.22652901709079742, "learning_rate": 9.197500378729366e-06, "loss": 0.3744, "step": 11660 }, { "epoch": 2.397163120567376, "grad_norm": 0.2313791662454605, "learning_rate": 9.191450388403304e-06, "loss": 0.3994, "step": 11661 }, { "epoch": 2.3973686915407546, "grad_norm": 0.22537241876125336, "learning_rate": 9.18540216219712e-06, "loss": 0.3834, "step": 11662 }, { "epoch": 2.397574262514133, "grad_norm": 0.12685616314411163, "learning_rate": 9.17935570040878e-06, "loss": 0.4568, "step": 11663 }, { "epoch": 2.397779833487512, "grad_norm": 0.24072742462158203, "learning_rate": 9.173311003336157e-06, "loss": 0.3874, "step": 11664 }, { "epoch": 2.39798540446089, "grad_norm": 0.2216685265302658, "learning_rate": 9.167268071277045e-06, "loss": 0.4017, "step": 11665 }, { "epoch": 2.3981909754342685, "grad_norm": 0.1218583807349205, "learning_rate": 9.161226904529145e-06, "loss": 0.4435, "step": 11666 }, { "epoch": 2.398396546407647, "grad_norm": 0.23101285099983215, "learning_rate": 9.155187503390094e-06, "loss": 0.3781, "step": 11667 }, { "epoch": 2.3986021173810257, "grad_norm": 0.23491719365119934, "learning_rate": 9.14914986815742e-06, "loss": 0.394, "step": 11668 }, { "epoch": 2.3988076883544043, "grad_norm": 0.23189514875411987, "learning_rate": 9.143113999128563e-06, "loss": 0.3847, "step": 11669 }, { "epoch": 2.399013259327783, "grad_norm": 0.2177298218011856, "learning_rate": 9.137079896600887e-06, "loss": 0.3886, "step": 11670 }, { "epoch": 2.3992188303011615, "grad_norm": 0.1185031533241272, "learning_rate": 9.131047560871658e-06, "loss": 0.4323, "step": 11671 }, { "epoch": 2.39942440127454, "grad_norm": 0.22408519685268402, "learning_rate": 9.12501699223807e-06, "loss": 0.3679, "step": 11672 }, { "epoch": 2.3996299722479186, "grad_norm": 0.23200219869613647, "learning_rate": 9.118988190997197e-06, "loss": 0.3909, "step": 11673 }, { "epoch": 2.399835543221297, "grad_norm": 0.22250621020793915, "learning_rate": 9.112961157446087e-06, "loss": 0.3789, "step": 11674 }, { "epoch": 2.400041114194676, "grad_norm": 0.2219180166721344, "learning_rate": 9.106935891881641e-06, "loss": 0.3725, "step": 11675 }, { "epoch": 2.4002466851680544, "grad_norm": 0.2245936095714569, "learning_rate": 9.1009123946007e-06, "loss": 0.401, "step": 11676 }, { "epoch": 2.400452256141433, "grad_norm": 0.2297823131084442, "learning_rate": 9.094890665900018e-06, "loss": 0.3871, "step": 11677 }, { "epoch": 2.4006578271148116, "grad_norm": 0.2330087423324585, "learning_rate": 9.088870706076245e-06, "loss": 0.4198, "step": 11678 }, { "epoch": 2.40086339808819, "grad_norm": 0.23439383506774902, "learning_rate": 9.08285251542596e-06, "loss": 0.3966, "step": 11679 }, { "epoch": 2.4010689690615683, "grad_norm": 0.12889879941940308, "learning_rate": 9.076836094245659e-06, "loss": 0.4475, "step": 11680 }, { "epoch": 2.401274540034947, "grad_norm": 0.22724612057209015, "learning_rate": 9.070821442831747e-06, "loss": 0.3952, "step": 11681 }, { "epoch": 2.4014801110083255, "grad_norm": 0.22570443153381348, "learning_rate": 9.064808561480513e-06, "loss": 0.3949, "step": 11682 }, { "epoch": 2.401685681981704, "grad_norm": 0.22554244101047516, "learning_rate": 9.058797450488212e-06, "loss": 0.4023, "step": 11683 }, { "epoch": 2.4018912529550827, "grad_norm": 0.12734529376029968, "learning_rate": 9.052788110150975e-06, "loss": 0.4305, "step": 11684 }, { "epoch": 2.4020968239284612, "grad_norm": 0.23667073249816895, "learning_rate": 9.046780540764853e-06, "loss": 0.3961, "step": 11685 }, { "epoch": 2.40230239490184, "grad_norm": 0.12144028395414352, "learning_rate": 9.040774742625795e-06, "loss": 0.4524, "step": 11686 }, { "epoch": 2.4025079658752184, "grad_norm": 0.2276497334241867, "learning_rate": 9.034770716029703e-06, "loss": 0.3837, "step": 11687 }, { "epoch": 2.402713536848597, "grad_norm": 0.23129193484783173, "learning_rate": 9.028768461272352e-06, "loss": 0.384, "step": 11688 }, { "epoch": 2.4029191078219756, "grad_norm": 0.21576765179634094, "learning_rate": 9.022767978649457e-06, "loss": 0.4049, "step": 11689 }, { "epoch": 2.403124678795354, "grad_norm": 0.2269795835018158, "learning_rate": 9.016769268456623e-06, "loss": 0.3741, "step": 11690 }, { "epoch": 2.4033302497687328, "grad_norm": 0.22810319066047668, "learning_rate": 9.010772330989387e-06, "loss": 0.4111, "step": 11691 }, { "epoch": 2.4035358207421114, "grad_norm": 0.23659124970436096, "learning_rate": 9.00477716654318e-06, "loss": 0.4142, "step": 11692 }, { "epoch": 2.40374139171549, "grad_norm": 0.21605411171913147, "learning_rate": 8.998783775413351e-06, "loss": 0.3838, "step": 11693 }, { "epoch": 2.4039469626888685, "grad_norm": 0.23164892196655273, "learning_rate": 8.992792157895186e-06, "loss": 0.3911, "step": 11694 }, { "epoch": 2.4041525336622467, "grad_norm": 0.23304125666618347, "learning_rate": 8.986802314283856e-06, "loss": 0.3949, "step": 11695 }, { "epoch": 2.4043581046356253, "grad_norm": 0.2246290147304535, "learning_rate": 8.980814244874447e-06, "loss": 0.373, "step": 11696 }, { "epoch": 2.404563675609004, "grad_norm": 0.23660001158714294, "learning_rate": 8.974827949961973e-06, "loss": 0.3805, "step": 11697 }, { "epoch": 2.4047692465823824, "grad_norm": 0.2283889651298523, "learning_rate": 8.968843429841342e-06, "loss": 0.3934, "step": 11698 }, { "epoch": 2.404974817555761, "grad_norm": 0.22905899584293365, "learning_rate": 8.962860684807384e-06, "loss": 0.3994, "step": 11699 }, { "epoch": 2.4051803885291396, "grad_norm": 0.21978145837783813, "learning_rate": 8.956879715154832e-06, "loss": 0.3818, "step": 11700 }, { "epoch": 2.405385959502518, "grad_norm": 0.2412233203649521, "learning_rate": 8.950900521178367e-06, "loss": 0.3827, "step": 11701 }, { "epoch": 2.405591530475897, "grad_norm": 0.2382228821516037, "learning_rate": 8.944923103172537e-06, "loss": 0.3949, "step": 11702 }, { "epoch": 2.4057971014492754, "grad_norm": 0.24121670424938202, "learning_rate": 8.938947461431813e-06, "loss": 0.3916, "step": 11703 }, { "epoch": 2.406002672422654, "grad_norm": 0.12582828104496002, "learning_rate": 8.932973596250607e-06, "loss": 0.4566, "step": 11704 }, { "epoch": 2.4062082433960326, "grad_norm": 0.11934048682451248, "learning_rate": 8.927001507923221e-06, "loss": 0.456, "step": 11705 }, { "epoch": 2.406413814369411, "grad_norm": 0.22901131212711334, "learning_rate": 8.921031196743864e-06, "loss": 0.374, "step": 11706 }, { "epoch": 2.4066193853427897, "grad_norm": 0.23406758904457092, "learning_rate": 8.915062663006655e-06, "loss": 0.3698, "step": 11707 }, { "epoch": 2.4068249563161683, "grad_norm": 0.23848240077495575, "learning_rate": 8.909095907005659e-06, "loss": 0.3978, "step": 11708 }, { "epoch": 2.407030527289547, "grad_norm": 0.22418878972530365, "learning_rate": 8.903130929034822e-06, "loss": 0.3848, "step": 11709 }, { "epoch": 2.407236098262925, "grad_norm": 0.22299997508525848, "learning_rate": 8.897167729388002e-06, "loss": 0.3901, "step": 11710 }, { "epoch": 2.4074416692363036, "grad_norm": 0.22833383083343506, "learning_rate": 8.89120630835899e-06, "loss": 0.3744, "step": 11711 }, { "epoch": 2.4076472402096822, "grad_norm": 0.2442595660686493, "learning_rate": 8.885246666241468e-06, "loss": 0.3829, "step": 11712 }, { "epoch": 2.407852811183061, "grad_norm": 0.23331138491630554, "learning_rate": 8.879288803329043e-06, "loss": 0.4022, "step": 11713 }, { "epoch": 2.4080583821564394, "grad_norm": 0.22748929262161255, "learning_rate": 8.87333271991522e-06, "loss": 0.4032, "step": 11714 }, { "epoch": 2.408263953129818, "grad_norm": 0.23111633956432343, "learning_rate": 8.867378416293447e-06, "loss": 0.3815, "step": 11715 }, { "epoch": 2.4084695241031966, "grad_norm": 0.23724834620952606, "learning_rate": 8.861425892757058e-06, "loss": 0.384, "step": 11716 }, { "epoch": 2.408675095076575, "grad_norm": 0.22605331242084503, "learning_rate": 8.855475149599309e-06, "loss": 0.3709, "step": 11717 }, { "epoch": 2.4088806660499538, "grad_norm": 0.2561459541320801, "learning_rate": 8.849526187113354e-06, "loss": 0.3945, "step": 11718 }, { "epoch": 2.4090862370233324, "grad_norm": 0.2261964976787567, "learning_rate": 8.843579005592281e-06, "loss": 0.399, "step": 11719 }, { "epoch": 2.409291807996711, "grad_norm": 0.23060794174671173, "learning_rate": 8.837633605329074e-06, "loss": 0.4068, "step": 11720 }, { "epoch": 2.4094973789700895, "grad_norm": 0.2191411554813385, "learning_rate": 8.831689986616623e-06, "loss": 0.3823, "step": 11721 }, { "epoch": 2.409702949943468, "grad_norm": 0.2240157574415207, "learning_rate": 8.82574814974777e-06, "loss": 0.3942, "step": 11722 }, { "epoch": 2.4099085209168467, "grad_norm": 0.22615095973014832, "learning_rate": 8.819808095015225e-06, "loss": 0.3915, "step": 11723 }, { "epoch": 2.4101140918902253, "grad_norm": 0.12168576568365097, "learning_rate": 8.81386982271163e-06, "loss": 0.4526, "step": 11724 }, { "epoch": 2.4103196628636034, "grad_norm": 0.12334515154361725, "learning_rate": 8.807933333129526e-06, "loss": 0.4541, "step": 11725 }, { "epoch": 2.410525233836982, "grad_norm": 0.2267734259366989, "learning_rate": 8.801998626561397e-06, "loss": 0.3867, "step": 11726 }, { "epoch": 2.4107308048103606, "grad_norm": 0.23022069036960602, "learning_rate": 8.796065703299608e-06, "loss": 0.4002, "step": 11727 }, { "epoch": 2.410936375783739, "grad_norm": 0.2284584641456604, "learning_rate": 8.79013456363643e-06, "loss": 0.3759, "step": 11728 }, { "epoch": 2.411141946757118, "grad_norm": 0.24320749938488007, "learning_rate": 8.78420520786409e-06, "loss": 0.3935, "step": 11729 }, { "epoch": 2.4113475177304964, "grad_norm": 0.2366839051246643, "learning_rate": 8.778277636274688e-06, "loss": 0.399, "step": 11730 }, { "epoch": 2.411553088703875, "grad_norm": 0.22736315429210663, "learning_rate": 8.772351849160245e-06, "loss": 0.3755, "step": 11731 }, { "epoch": 2.4117586596772536, "grad_norm": 0.23666198551654816, "learning_rate": 8.766427846812702e-06, "loss": 0.3967, "step": 11732 }, { "epoch": 2.411964230650632, "grad_norm": 0.2277197241783142, "learning_rate": 8.760505629523901e-06, "loss": 0.3715, "step": 11733 }, { "epoch": 2.4121698016240107, "grad_norm": 0.21670396625995636, "learning_rate": 8.754585197585605e-06, "loss": 0.3729, "step": 11734 }, { "epoch": 2.4123753725973893, "grad_norm": 0.22383198142051697, "learning_rate": 8.748666551289474e-06, "loss": 0.39, "step": 11735 }, { "epoch": 2.412580943570768, "grad_norm": 0.232547789812088, "learning_rate": 8.742749690927115e-06, "loss": 0.3888, "step": 11736 }, { "epoch": 2.4127865145441465, "grad_norm": 0.23317821323871613, "learning_rate": 8.736834616790018e-06, "loss": 0.4036, "step": 11737 }, { "epoch": 2.412992085517525, "grad_norm": 0.12473565340042114, "learning_rate": 8.73092132916958e-06, "loss": 0.4348, "step": 11738 }, { "epoch": 2.4131976564909037, "grad_norm": 0.22561731934547424, "learning_rate": 8.72500982835713e-06, "loss": 0.3938, "step": 11739 }, { "epoch": 2.413403227464282, "grad_norm": 0.22302691638469696, "learning_rate": 8.719100114643891e-06, "loss": 0.3842, "step": 11740 }, { "epoch": 2.413608798437661, "grad_norm": 0.2331441342830658, "learning_rate": 8.71319218832102e-06, "loss": 0.3891, "step": 11741 }, { "epoch": 2.413814369411039, "grad_norm": 0.22362032532691956, "learning_rate": 8.70728604967955e-06, "loss": 0.3858, "step": 11742 }, { "epoch": 2.4140199403844176, "grad_norm": 0.23315522074699402, "learning_rate": 8.701381699010476e-06, "loss": 0.3939, "step": 11743 }, { "epoch": 2.414225511357796, "grad_norm": 0.11996540427207947, "learning_rate": 8.69547913660467e-06, "loss": 0.4391, "step": 11744 }, { "epoch": 2.4144310823311748, "grad_norm": 0.12286810576915741, "learning_rate": 8.689578362752919e-06, "loss": 0.4379, "step": 11745 }, { "epoch": 2.4146366533045533, "grad_norm": 0.229908749461174, "learning_rate": 8.683679377745915e-06, "loss": 0.3843, "step": 11746 }, { "epoch": 2.414842224277932, "grad_norm": 0.2293166071176529, "learning_rate": 8.677782181874295e-06, "loss": 0.3845, "step": 11747 }, { "epoch": 2.4150477952513105, "grad_norm": 0.22157427668571472, "learning_rate": 8.671886775428584e-06, "loss": 0.3857, "step": 11748 }, { "epoch": 2.415253366224689, "grad_norm": 0.22539031505584717, "learning_rate": 8.665993158699197e-06, "loss": 0.3803, "step": 11749 }, { "epoch": 2.4154589371980677, "grad_norm": 0.23554009199142456, "learning_rate": 8.660101331976515e-06, "loss": 0.3964, "step": 11750 }, { "epoch": 2.4156645081714463, "grad_norm": 0.11748301237821579, "learning_rate": 8.654211295550791e-06, "loss": 0.4473, "step": 11751 }, { "epoch": 2.415870079144825, "grad_norm": 0.12662889063358307, "learning_rate": 8.648323049712192e-06, "loss": 0.4615, "step": 11752 }, { "epoch": 2.4160756501182035, "grad_norm": 0.23614639043807983, "learning_rate": 8.642436594750813e-06, "loss": 0.3832, "step": 11753 }, { "epoch": 2.416281221091582, "grad_norm": 0.23256917297840118, "learning_rate": 8.636551930956645e-06, "loss": 0.4061, "step": 11754 }, { "epoch": 2.41648679206496, "grad_norm": 0.23401497304439545, "learning_rate": 8.630669058619595e-06, "loss": 0.4095, "step": 11755 }, { "epoch": 2.4166923630383392, "grad_norm": 0.12618698179721832, "learning_rate": 8.624787978029495e-06, "loss": 0.4405, "step": 11756 }, { "epoch": 2.4168979340117174, "grad_norm": 0.22936862707138062, "learning_rate": 8.61890868947608e-06, "loss": 0.391, "step": 11757 }, { "epoch": 2.417103504985096, "grad_norm": 0.2273116260766983, "learning_rate": 8.613031193248985e-06, "loss": 0.4034, "step": 11758 }, { "epoch": 2.4173090759584746, "grad_norm": 0.23834945261478424, "learning_rate": 8.607155489637773e-06, "loss": 0.3938, "step": 11759 }, { "epoch": 2.417514646931853, "grad_norm": 0.22730384767055511, "learning_rate": 8.601281578931908e-06, "loss": 0.4146, "step": 11760 }, { "epoch": 2.4177202179052317, "grad_norm": 0.23497353494167328, "learning_rate": 8.595409461420778e-06, "loss": 0.3847, "step": 11761 }, { "epoch": 2.4179257888786103, "grad_norm": 0.23128505051136017, "learning_rate": 8.589539137393653e-06, "loss": 0.3937, "step": 11762 }, { "epoch": 2.418131359851989, "grad_norm": 0.22396472096443176, "learning_rate": 8.583670607139764e-06, "loss": 0.3887, "step": 11763 }, { "epoch": 2.4183369308253675, "grad_norm": 0.2318245768547058, "learning_rate": 8.577803870948217e-06, "loss": 0.3752, "step": 11764 }, { "epoch": 2.418542501798746, "grad_norm": 0.12530356645584106, "learning_rate": 8.571938929108033e-06, "loss": 0.4542, "step": 11765 }, { "epoch": 2.4187480727721247, "grad_norm": 0.23986156284809113, "learning_rate": 8.566075781908158e-06, "loss": 0.3791, "step": 11766 }, { "epoch": 2.4189536437455033, "grad_norm": 0.2401699423789978, "learning_rate": 8.56021442963742e-06, "loss": 0.3889, "step": 11767 }, { "epoch": 2.419159214718882, "grad_norm": 0.12486173957586288, "learning_rate": 8.554354872584612e-06, "loss": 0.4482, "step": 11768 }, { "epoch": 2.4193647856922604, "grad_norm": 0.12108970433473587, "learning_rate": 8.5484971110384e-06, "loss": 0.4339, "step": 11769 }, { "epoch": 2.4195703566656386, "grad_norm": 0.2209288775920868, "learning_rate": 8.542641145287342e-06, "loss": 0.3695, "step": 11770 }, { "epoch": 2.4197759276390176, "grad_norm": 0.22829124331474304, "learning_rate": 8.536786975619966e-06, "loss": 0.3876, "step": 11771 }, { "epoch": 2.4199814986123958, "grad_norm": 0.24268139898777008, "learning_rate": 8.53093460232467e-06, "loss": 0.3802, "step": 11772 }, { "epoch": 2.4201870695857743, "grad_norm": 0.23681510984897614, "learning_rate": 8.525084025689766e-06, "loss": 0.3856, "step": 11773 }, { "epoch": 2.420392640559153, "grad_norm": 0.23241069912910461, "learning_rate": 8.519235246003491e-06, "loss": 0.3781, "step": 11774 }, { "epoch": 2.4205982115325315, "grad_norm": 0.21853965520858765, "learning_rate": 8.513388263553982e-06, "loss": 0.3835, "step": 11775 }, { "epoch": 2.42080378250591, "grad_norm": 0.23458018898963928, "learning_rate": 8.507543078629288e-06, "loss": 0.3982, "step": 11776 }, { "epoch": 2.4210093534792887, "grad_norm": 0.23409396409988403, "learning_rate": 8.501699691517392e-06, "loss": 0.3817, "step": 11777 }, { "epoch": 2.4212149244526673, "grad_norm": 0.23286281526088715, "learning_rate": 8.49585810250616e-06, "loss": 0.4137, "step": 11778 }, { "epoch": 2.421420495426046, "grad_norm": 0.11904696375131607, "learning_rate": 8.49001831188338e-06, "loss": 0.453, "step": 11779 }, { "epoch": 2.4216260663994245, "grad_norm": 0.24150028824806213, "learning_rate": 8.484180319936748e-06, "loss": 0.3943, "step": 11780 }, { "epoch": 2.421831637372803, "grad_norm": 0.2359628528356552, "learning_rate": 8.478344126953874e-06, "loss": 0.3806, "step": 11781 }, { "epoch": 2.4220372083461816, "grad_norm": 0.12654449045658112, "learning_rate": 8.472509733222289e-06, "loss": 0.4553, "step": 11782 }, { "epoch": 2.4222427793195602, "grad_norm": 0.23700331151485443, "learning_rate": 8.466677139029405e-06, "loss": 0.4043, "step": 11783 }, { "epoch": 2.422448350292939, "grad_norm": 0.24000297486782074, "learning_rate": 8.460846344662597e-06, "loss": 0.396, "step": 11784 }, { "epoch": 2.422653921266317, "grad_norm": 0.12040732055902481, "learning_rate": 8.455017350409105e-06, "loss": 0.4522, "step": 11785 }, { "epoch": 2.422859492239696, "grad_norm": 0.21814100444316864, "learning_rate": 8.449190156556098e-06, "loss": 0.3766, "step": 11786 }, { "epoch": 2.423065063213074, "grad_norm": 0.12481694668531418, "learning_rate": 8.443364763390649e-06, "loss": 0.4527, "step": 11787 }, { "epoch": 2.4232706341864527, "grad_norm": 0.22663375735282898, "learning_rate": 8.43754117119976e-06, "loss": 0.383, "step": 11788 }, { "epoch": 2.4234762051598313, "grad_norm": 0.12208539247512817, "learning_rate": 8.431719380270307e-06, "loss": 0.4564, "step": 11789 }, { "epoch": 2.42368177613321, "grad_norm": 0.22577068209648132, "learning_rate": 8.425899390889138e-06, "loss": 0.3758, "step": 11790 }, { "epoch": 2.4238873471065885, "grad_norm": 0.12725965678691864, "learning_rate": 8.420081203342941e-06, "loss": 0.435, "step": 11791 }, { "epoch": 2.424092918079967, "grad_norm": 0.23309412598609924, "learning_rate": 8.414264817918385e-06, "loss": 0.3846, "step": 11792 }, { "epoch": 2.4242984890533457, "grad_norm": 0.2279675304889679, "learning_rate": 8.408450234901998e-06, "loss": 0.3934, "step": 11793 }, { "epoch": 2.4245040600267243, "grad_norm": 0.22592322528362274, "learning_rate": 8.402637454580244e-06, "loss": 0.3864, "step": 11794 }, { "epoch": 2.424709631000103, "grad_norm": 0.22809530794620514, "learning_rate": 8.396826477239479e-06, "loss": 0.3911, "step": 11795 }, { "epoch": 2.4249152019734814, "grad_norm": 0.23382043838500977, "learning_rate": 8.391017303165995e-06, "loss": 0.392, "step": 11796 }, { "epoch": 2.42512077294686, "grad_norm": 0.23308755457401276, "learning_rate": 8.38520993264597e-06, "loss": 0.4044, "step": 11797 }, { "epoch": 2.4253263439202386, "grad_norm": 0.22026486694812775, "learning_rate": 8.379404365965524e-06, "loss": 0.3994, "step": 11798 }, { "epoch": 2.425531914893617, "grad_norm": 0.12114302068948746, "learning_rate": 8.373600603410658e-06, "loss": 0.4553, "step": 11799 }, { "epoch": 2.4257374858669953, "grad_norm": 0.23082832992076874, "learning_rate": 8.367798645267303e-06, "loss": 0.3775, "step": 11800 }, { "epoch": 2.4259430568403744, "grad_norm": 0.2422942817211151, "learning_rate": 8.361998491821289e-06, "loss": 0.3988, "step": 11801 }, { "epoch": 2.4261486278137525, "grad_norm": 0.23066774010658264, "learning_rate": 8.356200143358363e-06, "loss": 0.3964, "step": 11802 }, { "epoch": 2.426354198787131, "grad_norm": 0.1255854219198227, "learning_rate": 8.35040360016418e-06, "loss": 0.4471, "step": 11803 }, { "epoch": 2.4265597697605097, "grad_norm": 0.23167705535888672, "learning_rate": 8.344608862524306e-06, "loss": 0.3935, "step": 11804 }, { "epoch": 2.4267653407338883, "grad_norm": 0.22956189513206482, "learning_rate": 8.338815930724234e-06, "loss": 0.3887, "step": 11805 }, { "epoch": 2.426970911707267, "grad_norm": 0.23048321902751923, "learning_rate": 8.33302480504935e-06, "loss": 0.3993, "step": 11806 }, { "epoch": 2.4271764826806455, "grad_norm": 0.22636932134628296, "learning_rate": 8.327235485784948e-06, "loss": 0.3955, "step": 11807 }, { "epoch": 2.427382053654024, "grad_norm": 0.2231477051973343, "learning_rate": 8.321447973216248e-06, "loss": 0.3885, "step": 11808 }, { "epoch": 2.4275876246274026, "grad_norm": 0.23407144844532013, "learning_rate": 8.315662267628374e-06, "loss": 0.3875, "step": 11809 }, { "epoch": 2.427793195600781, "grad_norm": 0.22613434493541718, "learning_rate": 8.309878369306348e-06, "loss": 0.3699, "step": 11810 }, { "epoch": 2.42799876657416, "grad_norm": 0.244913712143898, "learning_rate": 8.30409627853513e-06, "loss": 0.3858, "step": 11811 }, { "epoch": 2.4282043375475384, "grad_norm": 0.24289999902248383, "learning_rate": 8.298315995599578e-06, "loss": 0.3877, "step": 11812 }, { "epoch": 2.428409908520917, "grad_norm": 0.2354183942079544, "learning_rate": 8.292537520784438e-06, "loss": 0.3713, "step": 11813 }, { "epoch": 2.4286154794942956, "grad_norm": 0.22426529228687286, "learning_rate": 8.286760854374421e-06, "loss": 0.374, "step": 11814 }, { "epoch": 2.428821050467674, "grad_norm": 0.12690819799900055, "learning_rate": 8.280985996654097e-06, "loss": 0.4512, "step": 11815 }, { "epoch": 2.4290266214410527, "grad_norm": 0.12370403110980988, "learning_rate": 8.275212947907967e-06, "loss": 0.4472, "step": 11816 }, { "epoch": 2.429232192414431, "grad_norm": 0.237684965133667, "learning_rate": 8.26944170842044e-06, "loss": 0.3942, "step": 11817 }, { "epoch": 2.4294377633878095, "grad_norm": 0.23901338875293732, "learning_rate": 8.26367227847584e-06, "loss": 0.3923, "step": 11818 }, { "epoch": 2.429643334361188, "grad_norm": 0.22691085934638977, "learning_rate": 8.257904658358407e-06, "loss": 0.3927, "step": 11819 }, { "epoch": 2.4298489053345667, "grad_norm": 0.2229507714509964, "learning_rate": 8.25213884835228e-06, "loss": 0.3897, "step": 11820 }, { "epoch": 2.4300544763079452, "grad_norm": 0.2375117689371109, "learning_rate": 8.246374848741511e-06, "loss": 0.3892, "step": 11821 }, { "epoch": 2.430260047281324, "grad_norm": 0.23138779401779175, "learning_rate": 8.24061265981007e-06, "loss": 0.3968, "step": 11822 }, { "epoch": 2.4304656182547024, "grad_norm": 0.23814985156059265, "learning_rate": 8.234852281841833e-06, "loss": 0.3955, "step": 11823 }, { "epoch": 2.430671189228081, "grad_norm": 0.2250833660364151, "learning_rate": 8.229093715120578e-06, "loss": 0.37, "step": 11824 }, { "epoch": 2.4308767602014596, "grad_norm": 0.11755650490522385, "learning_rate": 8.223336959930003e-06, "loss": 0.4526, "step": 11825 }, { "epoch": 2.431082331174838, "grad_norm": 0.11656010895967484, "learning_rate": 8.217582016553732e-06, "loss": 0.427, "step": 11826 }, { "epoch": 2.4312879021482168, "grad_norm": 0.2219466120004654, "learning_rate": 8.211828885275272e-06, "loss": 0.3825, "step": 11827 }, { "epoch": 2.4314934731215954, "grad_norm": 0.22404515743255615, "learning_rate": 8.206077566378058e-06, "loss": 0.3639, "step": 11828 }, { "epoch": 2.431699044094974, "grad_norm": 0.22220522165298462, "learning_rate": 8.200328060145428e-06, "loss": 0.3856, "step": 11829 }, { "epoch": 2.4319046150683525, "grad_norm": 0.22566857933998108, "learning_rate": 8.194580366860628e-06, "loss": 0.394, "step": 11830 }, { "epoch": 2.432110186041731, "grad_norm": 0.2224518358707428, "learning_rate": 8.18883448680682e-06, "loss": 0.3692, "step": 11831 }, { "epoch": 2.4323157570151093, "grad_norm": 0.23694801330566406, "learning_rate": 8.18309042026709e-06, "loss": 0.4063, "step": 11832 }, { "epoch": 2.432521327988488, "grad_norm": 0.1398681104183197, "learning_rate": 8.177348167524418e-06, "loss": 0.4508, "step": 11833 }, { "epoch": 2.4327268989618664, "grad_norm": 0.2483135610818863, "learning_rate": 8.171607728861677e-06, "loss": 0.3977, "step": 11834 }, { "epoch": 2.432932469935245, "grad_norm": 0.23512189090251923, "learning_rate": 8.165869104561702e-06, "loss": 0.3918, "step": 11835 }, { "epoch": 2.4331380409086236, "grad_norm": 0.11742374300956726, "learning_rate": 8.16013229490719e-06, "loss": 0.4327, "step": 11836 }, { "epoch": 2.433343611882002, "grad_norm": 0.24561573565006256, "learning_rate": 8.154397300180771e-06, "loss": 0.409, "step": 11837 }, { "epoch": 2.433549182855381, "grad_norm": 0.22359338402748108, "learning_rate": 8.148664120664973e-06, "loss": 0.3741, "step": 11838 }, { "epoch": 2.4337547538287594, "grad_norm": 0.22402852773666382, "learning_rate": 8.142932756642262e-06, "loss": 0.3976, "step": 11839 }, { "epoch": 2.433960324802138, "grad_norm": 0.22858619689941406, "learning_rate": 8.137203208394986e-06, "loss": 0.3971, "step": 11840 }, { "epoch": 2.4341658957755166, "grad_norm": 0.1260390430688858, "learning_rate": 8.13147547620541e-06, "loss": 0.449, "step": 11841 }, { "epoch": 2.434371466748895, "grad_norm": 0.2319212555885315, "learning_rate": 8.12574956035571e-06, "loss": 0.3938, "step": 11842 }, { "epoch": 2.4345770377222737, "grad_norm": 0.2238619327545166, "learning_rate": 8.120025461127984e-06, "loss": 0.3789, "step": 11843 }, { "epoch": 2.4347826086956523, "grad_norm": 0.2239915281534195, "learning_rate": 8.114303178804226e-06, "loss": 0.3881, "step": 11844 }, { "epoch": 2.434988179669031, "grad_norm": 0.2295527458190918, "learning_rate": 8.108582713666335e-06, "loss": 0.3872, "step": 11845 }, { "epoch": 2.4351937506424095, "grad_norm": 0.22697387635707855, "learning_rate": 8.102864065996159e-06, "loss": 0.3928, "step": 11846 }, { "epoch": 2.4353993216157876, "grad_norm": 0.12275702506303787, "learning_rate": 8.09714723607541e-06, "loss": 0.4449, "step": 11847 }, { "epoch": 2.4356048925891662, "grad_norm": 0.2424585521221161, "learning_rate": 8.09143222418573e-06, "loss": 0.4016, "step": 11848 }, { "epoch": 2.435810463562545, "grad_norm": 0.23178981244564056, "learning_rate": 8.085719030608682e-06, "loss": 0.3917, "step": 11849 }, { "epoch": 2.4360160345359234, "grad_norm": 0.23368723690509796, "learning_rate": 8.080007655625715e-06, "loss": 0.3883, "step": 11850 }, { "epoch": 2.436221605509302, "grad_norm": 0.23010197281837463, "learning_rate": 8.074298099518207e-06, "loss": 0.4075, "step": 11851 }, { "epoch": 2.4364271764826806, "grad_norm": 0.23444652557373047, "learning_rate": 8.068590362567436e-06, "loss": 0.3887, "step": 11852 }, { "epoch": 2.436632747456059, "grad_norm": 0.22641274333000183, "learning_rate": 8.062884445054602e-06, "loss": 0.3826, "step": 11853 }, { "epoch": 2.4368383184294378, "grad_norm": 0.12579554319381714, "learning_rate": 8.057180347260816e-06, "loss": 0.4397, "step": 11854 }, { "epoch": 2.4370438894028164, "grad_norm": 0.2325548529624939, "learning_rate": 8.05147806946707e-06, "loss": 0.3948, "step": 11855 }, { "epoch": 2.437249460376195, "grad_norm": 0.22521813213825226, "learning_rate": 8.045777611954315e-06, "loss": 0.3773, "step": 11856 }, { "epoch": 2.4374550313495735, "grad_norm": 0.22666728496551514, "learning_rate": 8.040078975003372e-06, "loss": 0.3918, "step": 11857 }, { "epoch": 2.437660602322952, "grad_norm": 0.2205967754125595, "learning_rate": 8.03438215889499e-06, "loss": 0.3929, "step": 11858 }, { "epoch": 2.4378661732963307, "grad_norm": 0.23035195469856262, "learning_rate": 8.028687163909804e-06, "loss": 0.3795, "step": 11859 }, { "epoch": 2.4380717442697093, "grad_norm": 0.22747023403644562, "learning_rate": 8.022993990328418e-06, "loss": 0.3908, "step": 11860 }, { "epoch": 2.438277315243088, "grad_norm": 0.2318742722272873, "learning_rate": 8.017302638431285e-06, "loss": 0.3972, "step": 11861 }, { "epoch": 2.438482886216466, "grad_norm": 0.22795268893241882, "learning_rate": 8.011613108498795e-06, "loss": 0.3828, "step": 11862 }, { "epoch": 2.4386884571898446, "grad_norm": 0.23046202957630157, "learning_rate": 8.00592540081124e-06, "loss": 0.391, "step": 11863 }, { "epoch": 2.438894028163223, "grad_norm": 0.23023991286754608, "learning_rate": 8.000239515648832e-06, "loss": 0.3984, "step": 11864 }, { "epoch": 2.439099599136602, "grad_norm": 0.2348402440547943, "learning_rate": 7.994555453291689e-06, "loss": 0.4067, "step": 11865 }, { "epoch": 2.4393051701099804, "grad_norm": 0.22935132682323456, "learning_rate": 7.98887321401982e-06, "loss": 0.3785, "step": 11866 }, { "epoch": 2.439510741083359, "grad_norm": 0.23405125737190247, "learning_rate": 7.983192798113195e-06, "loss": 0.3775, "step": 11867 }, { "epoch": 2.4397163120567376, "grad_norm": 0.22010092437267303, "learning_rate": 7.977514205851645e-06, "loss": 0.3812, "step": 11868 }, { "epoch": 2.439921883030116, "grad_norm": 0.22667670249938965, "learning_rate": 7.97183743751492e-06, "loss": 0.3858, "step": 11869 }, { "epoch": 2.4401274540034947, "grad_norm": 0.22953353822231293, "learning_rate": 7.966162493382703e-06, "loss": 0.3841, "step": 11870 }, { "epoch": 2.4403330249768733, "grad_norm": 0.2351302206516266, "learning_rate": 7.960489373734561e-06, "loss": 0.3691, "step": 11871 }, { "epoch": 2.440538595950252, "grad_norm": 0.22747184336185455, "learning_rate": 7.954818078849988e-06, "loss": 0.3671, "step": 11872 }, { "epoch": 2.4407441669236305, "grad_norm": 0.2345331311225891, "learning_rate": 7.949148609008362e-06, "loss": 0.3826, "step": 11873 }, { "epoch": 2.440949737897009, "grad_norm": 0.23388345539569855, "learning_rate": 7.943480964489024e-06, "loss": 0.3909, "step": 11874 }, { "epoch": 2.4411553088703877, "grad_norm": 0.2370399385690689, "learning_rate": 7.937815145571177e-06, "loss": 0.384, "step": 11875 }, { "epoch": 2.4413608798437663, "grad_norm": 0.23738206923007965, "learning_rate": 7.93215115253394e-06, "loss": 0.4001, "step": 11876 }, { "epoch": 2.4415664508171444, "grad_norm": 0.2355763465166092, "learning_rate": 7.926488985656372e-06, "loss": 0.3872, "step": 11877 }, { "epoch": 2.441772021790523, "grad_norm": 0.2362237423658371, "learning_rate": 7.920828645217405e-06, "loss": 0.3833, "step": 11878 }, { "epoch": 2.4419775927639016, "grad_norm": 0.22331978380680084, "learning_rate": 7.915170131495912e-06, "loss": 0.3734, "step": 11879 }, { "epoch": 2.44218316373728, "grad_norm": 0.23111921548843384, "learning_rate": 7.909513444770636e-06, "loss": 0.3911, "step": 11880 }, { "epoch": 2.4423887347106588, "grad_norm": 0.12303854525089264, "learning_rate": 7.90385858532028e-06, "loss": 0.4474, "step": 11881 }, { "epoch": 2.4425943056840373, "grad_norm": 0.23098498582839966, "learning_rate": 7.89820555342343e-06, "loss": 0.3952, "step": 11882 }, { "epoch": 2.442799876657416, "grad_norm": 0.2414843589067459, "learning_rate": 7.89255434935858e-06, "loss": 0.3926, "step": 11883 }, { "epoch": 2.4430054476307945, "grad_norm": 0.22785574197769165, "learning_rate": 7.886904973404134e-06, "loss": 0.3836, "step": 11884 }, { "epoch": 2.443211018604173, "grad_norm": 0.13417033851146698, "learning_rate": 7.881257425838412e-06, "loss": 0.4613, "step": 11885 }, { "epoch": 2.4434165895775517, "grad_norm": 0.11953188478946686, "learning_rate": 7.875611706939649e-06, "loss": 0.4594, "step": 11886 }, { "epoch": 2.4436221605509303, "grad_norm": 0.22670340538024902, "learning_rate": 7.869967816985965e-06, "loss": 0.3894, "step": 11887 }, { "epoch": 2.443827731524309, "grad_norm": 0.24815067648887634, "learning_rate": 7.86432575625543e-06, "loss": 0.3958, "step": 11888 }, { "epoch": 2.4440333024976875, "grad_norm": 0.22556784749031067, "learning_rate": 7.858685525025997e-06, "loss": 0.3895, "step": 11889 }, { "epoch": 2.444238873471066, "grad_norm": 0.2190685123205185, "learning_rate": 7.85304712357553e-06, "loss": 0.3572, "step": 11890 }, { "epoch": 2.4444444444444446, "grad_norm": 0.22397378087043762, "learning_rate": 7.847410552181804e-06, "loss": 0.3832, "step": 11891 }, { "epoch": 2.444650015417823, "grad_norm": 0.22663743793964386, "learning_rate": 7.841775811122514e-06, "loss": 0.3838, "step": 11892 }, { "epoch": 2.4448555863912014, "grad_norm": 0.22418825328350067, "learning_rate": 7.83614290067525e-06, "loss": 0.4074, "step": 11893 }, { "epoch": 2.44506115736458, "grad_norm": 0.2320103645324707, "learning_rate": 7.83051182111751e-06, "loss": 0.393, "step": 11894 }, { "epoch": 2.4452667283379586, "grad_norm": 0.2413313090801239, "learning_rate": 7.824882572726734e-06, "loss": 0.3944, "step": 11895 }, { "epoch": 2.445472299311337, "grad_norm": 0.24407535791397095, "learning_rate": 7.81925515578024e-06, "loss": 0.4004, "step": 11896 }, { "epoch": 2.4456778702847157, "grad_norm": 0.24596747756004333, "learning_rate": 7.81362957055526e-06, "loss": 0.4299, "step": 11897 }, { "epoch": 2.4458834412580943, "grad_norm": 0.23562337458133698, "learning_rate": 7.808005817328927e-06, "loss": 0.4011, "step": 11898 }, { "epoch": 2.446089012231473, "grad_norm": 0.1234961450099945, "learning_rate": 7.80238389637833e-06, "loss": 0.4573, "step": 11899 }, { "epoch": 2.4462945832048515, "grad_norm": 0.22207041084766388, "learning_rate": 7.796763807980414e-06, "loss": 0.3856, "step": 11900 }, { "epoch": 2.44650015417823, "grad_norm": 0.23153123259544373, "learning_rate": 7.79114555241205e-06, "loss": 0.3985, "step": 11901 }, { "epoch": 2.4467057251516087, "grad_norm": 0.2310320883989334, "learning_rate": 7.785529129950038e-06, "loss": 0.379, "step": 11902 }, { "epoch": 2.4469112961249873, "grad_norm": 0.23394078016281128, "learning_rate": 7.779914540871065e-06, "loss": 0.3878, "step": 11903 }, { "epoch": 2.447116867098366, "grad_norm": 0.24129053950309753, "learning_rate": 7.774301785451743e-06, "loss": 0.4019, "step": 11904 }, { "epoch": 2.4473224380717444, "grad_norm": 0.23060935735702515, "learning_rate": 7.768690863968575e-06, "loss": 0.383, "step": 11905 }, { "epoch": 2.447528009045123, "grad_norm": 0.2250318080186844, "learning_rate": 7.763081776697986e-06, "loss": 0.3917, "step": 11906 }, { "epoch": 2.447733580018501, "grad_norm": 0.2283366620540619, "learning_rate": 7.75747452391632e-06, "loss": 0.3753, "step": 11907 }, { "epoch": 2.4479391509918798, "grad_norm": 0.12568168342113495, "learning_rate": 7.751869105899797e-06, "loss": 0.4482, "step": 11908 }, { "epoch": 2.4481447219652583, "grad_norm": 0.2254650741815567, "learning_rate": 7.746265522924599e-06, "loss": 0.3705, "step": 11909 }, { "epoch": 2.448350292938637, "grad_norm": 0.22300590574741364, "learning_rate": 7.740663775266774e-06, "loss": 0.3809, "step": 11910 }, { "epoch": 2.4485558639120155, "grad_norm": 0.12381377071142197, "learning_rate": 7.735063863202297e-06, "loss": 0.4679, "step": 11911 }, { "epoch": 2.448761434885394, "grad_norm": 0.2367285043001175, "learning_rate": 7.729465787007045e-06, "loss": 0.4062, "step": 11912 }, { "epoch": 2.4489670058587727, "grad_norm": 0.23108406364917755, "learning_rate": 7.723869546956815e-06, "loss": 0.3886, "step": 11913 }, { "epoch": 2.4491725768321513, "grad_norm": 0.1295919418334961, "learning_rate": 7.71827514332729e-06, "loss": 0.4521, "step": 11914 }, { "epoch": 2.44937814780553, "grad_norm": 0.21976915001869202, "learning_rate": 7.71268257639411e-06, "loss": 0.3668, "step": 11915 }, { "epoch": 2.4495837187789085, "grad_norm": 0.22710269689559937, "learning_rate": 7.707091846432775e-06, "loss": 0.4069, "step": 11916 }, { "epoch": 2.449789289752287, "grad_norm": 0.23662005364894867, "learning_rate": 7.70150295371872e-06, "loss": 0.3951, "step": 11917 }, { "epoch": 2.4499948607256656, "grad_norm": 0.2241106480360031, "learning_rate": 7.695915898527278e-06, "loss": 0.3844, "step": 11918 }, { "epoch": 2.4502004316990442, "grad_norm": 0.22867663204669952, "learning_rate": 7.690330681133695e-06, "loss": 0.398, "step": 11919 }, { "epoch": 2.450406002672423, "grad_norm": 0.22373969852924347, "learning_rate": 7.684747301813141e-06, "loss": 0.3871, "step": 11920 }, { "epoch": 2.4506115736458014, "grad_norm": 0.21344764530658722, "learning_rate": 7.679165760840676e-06, "loss": 0.3814, "step": 11921 }, { "epoch": 2.4508171446191795, "grad_norm": 0.2301592379808426, "learning_rate": 7.67358605849127e-06, "loss": 0.3842, "step": 11922 }, { "epoch": 2.4510227155925586, "grad_norm": 0.23212045431137085, "learning_rate": 7.668008195039828e-06, "loss": 0.4049, "step": 11923 }, { "epoch": 2.4512282865659367, "grad_norm": 0.2373504489660263, "learning_rate": 7.662432170761128e-06, "loss": 0.371, "step": 11924 }, { "epoch": 2.4514338575393153, "grad_norm": 0.22908884286880493, "learning_rate": 7.65685798592988e-06, "loss": 0.3902, "step": 11925 }, { "epoch": 2.451639428512694, "grad_norm": 0.22210195660591125, "learning_rate": 7.6512856408207e-06, "loss": 0.4091, "step": 11926 }, { "epoch": 2.4518449994860725, "grad_norm": 0.22548379004001617, "learning_rate": 7.645715135708107e-06, "loss": 0.3848, "step": 11927 }, { "epoch": 2.452050570459451, "grad_norm": 0.23502108454704285, "learning_rate": 7.640146470866528e-06, "loss": 0.4022, "step": 11928 }, { "epoch": 2.4522561414328297, "grad_norm": 0.23006172478199005, "learning_rate": 7.634579646570319e-06, "loss": 0.3856, "step": 11929 }, { "epoch": 2.4524617124062082, "grad_norm": 0.23742365837097168, "learning_rate": 7.629014663093729e-06, "loss": 0.3902, "step": 11930 }, { "epoch": 2.452667283379587, "grad_norm": 0.1288265436887741, "learning_rate": 7.623451520710911e-06, "loss": 0.4406, "step": 11931 }, { "epoch": 2.4528728543529654, "grad_norm": 0.23163823783397675, "learning_rate": 7.617890219695945e-06, "loss": 0.4061, "step": 11932 }, { "epoch": 2.453078425326344, "grad_norm": 0.23114512860774994, "learning_rate": 7.612330760322799e-06, "loss": 0.3849, "step": 11933 }, { "epoch": 2.4532839962997226, "grad_norm": 0.2422575205564499, "learning_rate": 7.606773142865368e-06, "loss": 0.4076, "step": 11934 }, { "epoch": 2.453489567273101, "grad_norm": 0.23896224796772003, "learning_rate": 7.601217367597442e-06, "loss": 0.3913, "step": 11935 }, { "epoch": 2.45369513824648, "grad_norm": 0.22641202807426453, "learning_rate": 7.595663434792739e-06, "loss": 0.3782, "step": 11936 }, { "epoch": 2.453900709219858, "grad_norm": 0.2328871637582779, "learning_rate": 7.590111344724879e-06, "loss": 0.3799, "step": 11937 }, { "epoch": 2.454106280193237, "grad_norm": 0.22412602603435516, "learning_rate": 7.584561097667373e-06, "loss": 0.3826, "step": 11938 }, { "epoch": 2.454311851166615, "grad_norm": 0.21876847743988037, "learning_rate": 7.579012693893668e-06, "loss": 0.3916, "step": 11939 }, { "epoch": 2.4545174221399937, "grad_norm": 0.11912833899259567, "learning_rate": 7.5734661336770845e-06, "loss": 0.4674, "step": 11940 }, { "epoch": 2.4547229931133723, "grad_norm": 0.2307884842157364, "learning_rate": 7.56792141729091e-06, "loss": 0.3924, "step": 11941 }, { "epoch": 2.454928564086751, "grad_norm": 0.24826078116893768, "learning_rate": 7.562378545008289e-06, "loss": 0.3996, "step": 11942 }, { "epoch": 2.4551341350601295, "grad_norm": 0.22318950295448303, "learning_rate": 7.556837517102281e-06, "loss": 0.3761, "step": 11943 }, { "epoch": 2.455339706033508, "grad_norm": 0.22398589551448822, "learning_rate": 7.55129833384589e-06, "loss": 0.4049, "step": 11944 }, { "epoch": 2.4555452770068866, "grad_norm": 0.2238241583108902, "learning_rate": 7.545760995512e-06, "loss": 0.3946, "step": 11945 }, { "epoch": 2.455750847980265, "grad_norm": 0.23885828256607056, "learning_rate": 7.540225502373406e-06, "loss": 0.374, "step": 11946 }, { "epoch": 2.455956418953644, "grad_norm": 0.22806625068187714, "learning_rate": 7.53469185470281e-06, "loss": 0.3872, "step": 11947 }, { "epoch": 2.4561619899270224, "grad_norm": 0.2306099683046341, "learning_rate": 7.529160052772834e-06, "loss": 0.3904, "step": 11948 }, { "epoch": 2.456367560900401, "grad_norm": 0.22906504571437836, "learning_rate": 7.523630096855996e-06, "loss": 0.3872, "step": 11949 }, { "epoch": 2.4565731318737796, "grad_norm": 0.2300928682088852, "learning_rate": 7.518101987224747e-06, "loss": 0.3774, "step": 11950 }, { "epoch": 2.456778702847158, "grad_norm": 0.22766615450382233, "learning_rate": 7.512575724151425e-06, "loss": 0.375, "step": 11951 }, { "epoch": 2.4569842738205363, "grad_norm": 0.2307845950126648, "learning_rate": 7.507051307908282e-06, "loss": 0.4087, "step": 11952 }, { "epoch": 2.4571898447939153, "grad_norm": 0.23094025254249573, "learning_rate": 7.5015287387674745e-06, "loss": 0.4023, "step": 11953 }, { "epoch": 2.4573954157672935, "grad_norm": 0.25428444147109985, "learning_rate": 7.4960080170010855e-06, "loss": 0.3832, "step": 11954 }, { "epoch": 2.457600986740672, "grad_norm": 0.2268911600112915, "learning_rate": 7.490489142881082e-06, "loss": 0.3697, "step": 11955 }, { "epoch": 2.4578065577140507, "grad_norm": 0.23061802983283997, "learning_rate": 7.484972116679353e-06, "loss": 0.3872, "step": 11956 }, { "epoch": 2.4580121286874292, "grad_norm": 0.22644907236099243, "learning_rate": 7.479456938667715e-06, "loss": 0.4041, "step": 11957 }, { "epoch": 2.458217699660808, "grad_norm": 0.22928078472614288, "learning_rate": 7.473943609117859e-06, "loss": 0.3757, "step": 11958 }, { "epoch": 2.4584232706341864, "grad_norm": 0.23315031826496124, "learning_rate": 7.468432128301406e-06, "loss": 0.3962, "step": 11959 }, { "epoch": 2.458628841607565, "grad_norm": 0.23016057908535004, "learning_rate": 7.462922496489881e-06, "loss": 0.3948, "step": 11960 }, { "epoch": 2.4588344125809436, "grad_norm": 0.23658603429794312, "learning_rate": 7.457414713954714e-06, "loss": 0.358, "step": 11961 }, { "epoch": 2.459039983554322, "grad_norm": 0.23030193150043488, "learning_rate": 7.451908780967242e-06, "loss": 0.3848, "step": 11962 }, { "epoch": 2.4592455545277008, "grad_norm": 0.23462505638599396, "learning_rate": 7.446404697798738e-06, "loss": 0.3856, "step": 11963 }, { "epoch": 2.4594511255010794, "grad_norm": 0.23800687491893768, "learning_rate": 7.4409024647203344e-06, "loss": 0.3833, "step": 11964 }, { "epoch": 2.459656696474458, "grad_norm": 0.22501428425312042, "learning_rate": 7.43540208200313e-06, "loss": 0.4078, "step": 11965 }, { "epoch": 2.4598622674478365, "grad_norm": 0.23494026064872742, "learning_rate": 7.429903549918089e-06, "loss": 0.382, "step": 11966 }, { "epoch": 2.4600678384212147, "grad_norm": 0.23382548987865448, "learning_rate": 7.424406868736093e-06, "loss": 0.3714, "step": 11967 }, { "epoch": 2.4602734093945937, "grad_norm": 0.2336670309305191, "learning_rate": 7.418912038727947e-06, "loss": 0.386, "step": 11968 }, { "epoch": 2.460478980367972, "grad_norm": 0.1252935528755188, "learning_rate": 7.413419060164348e-06, "loss": 0.4512, "step": 11969 }, { "epoch": 2.4606845513413504, "grad_norm": 0.12696446478366852, "learning_rate": 7.4079279333159054e-06, "loss": 0.4467, "step": 11970 }, { "epoch": 2.460890122314729, "grad_norm": 0.23147273063659668, "learning_rate": 7.4024386584531574e-06, "loss": 0.3854, "step": 11971 }, { "epoch": 2.4610956932881076, "grad_norm": 0.23505190014839172, "learning_rate": 7.396951235846528e-06, "loss": 0.4034, "step": 11972 }, { "epoch": 2.461301264261486, "grad_norm": 0.22706139087677002, "learning_rate": 7.391465665766351e-06, "loss": 0.3958, "step": 11973 }, { "epoch": 2.461506835234865, "grad_norm": 0.2269429713487625, "learning_rate": 7.385981948482885e-06, "loss": 0.3912, "step": 11974 }, { "epoch": 2.4617124062082434, "grad_norm": 0.24472972750663757, "learning_rate": 7.380500084266274e-06, "loss": 0.4132, "step": 11975 }, { "epoch": 2.461917977181622, "grad_norm": 0.2306082546710968, "learning_rate": 7.375020073386597e-06, "loss": 0.3853, "step": 11976 }, { "epoch": 2.4621235481550006, "grad_norm": 0.23048275709152222, "learning_rate": 7.369541916113808e-06, "loss": 0.3962, "step": 11977 }, { "epoch": 2.462329119128379, "grad_norm": 0.22634848952293396, "learning_rate": 7.364065612717816e-06, "loss": 0.3774, "step": 11978 }, { "epoch": 2.4625346901017577, "grad_norm": 0.23332750797271729, "learning_rate": 7.3585911634684e-06, "loss": 0.4043, "step": 11979 }, { "epoch": 2.4627402610751363, "grad_norm": 0.22785905003547668, "learning_rate": 7.353118568635265e-06, "loss": 0.3758, "step": 11980 }, { "epoch": 2.462945832048515, "grad_norm": 0.23111788928508759, "learning_rate": 7.347647828488015e-06, "loss": 0.3824, "step": 11981 }, { "epoch": 2.463151403021893, "grad_norm": 0.12065623700618744, "learning_rate": 7.342178943296169e-06, "loss": 0.4432, "step": 11982 }, { "epoch": 2.463356973995272, "grad_norm": 0.22295540571212769, "learning_rate": 7.336711913329146e-06, "loss": 0.3759, "step": 11983 }, { "epoch": 2.4635625449686502, "grad_norm": 0.2377004474401474, "learning_rate": 7.331246738856297e-06, "loss": 0.3769, "step": 11984 }, { "epoch": 2.463768115942029, "grad_norm": 0.23187273740768433, "learning_rate": 7.325783420146861e-06, "loss": 0.3655, "step": 11985 }, { "epoch": 2.4639736869154074, "grad_norm": 0.12513786554336548, "learning_rate": 7.320321957469973e-06, "loss": 0.4312, "step": 11986 }, { "epoch": 2.464179257888786, "grad_norm": 0.23012107610702515, "learning_rate": 7.3148623510947215e-06, "loss": 0.3841, "step": 11987 }, { "epoch": 2.4643848288621646, "grad_norm": 0.23000621795654297, "learning_rate": 7.309404601290058e-06, "loss": 0.3997, "step": 11988 }, { "epoch": 2.464590399835543, "grad_norm": 0.23168058693408966, "learning_rate": 7.3039487083248665e-06, "loss": 0.3756, "step": 11989 }, { "epoch": 2.4647959708089218, "grad_norm": 0.2302679568529129, "learning_rate": 7.298494672467922e-06, "loss": 0.3849, "step": 11990 }, { "epoch": 2.4650015417823004, "grad_norm": 0.23086369037628174, "learning_rate": 7.2930424939879405e-06, "loss": 0.3872, "step": 11991 }, { "epoch": 2.465207112755679, "grad_norm": 0.2280319184064865, "learning_rate": 7.28759217315351e-06, "loss": 0.3777, "step": 11992 }, { "epoch": 2.4654126837290575, "grad_norm": 0.2329222559928894, "learning_rate": 7.282143710233148e-06, "loss": 0.3905, "step": 11993 }, { "epoch": 2.465618254702436, "grad_norm": 0.22960495948791504, "learning_rate": 7.276697105495274e-06, "loss": 0.3776, "step": 11994 }, { "epoch": 2.4658238256758147, "grad_norm": 0.22651077806949615, "learning_rate": 7.271252359208212e-06, "loss": 0.3902, "step": 11995 }, { "epoch": 2.4660293966491933, "grad_norm": 0.23243005573749542, "learning_rate": 7.2658094716402e-06, "loss": 0.3618, "step": 11996 }, { "epoch": 2.466234967622572, "grad_norm": 0.2251901775598526, "learning_rate": 7.260368443059382e-06, "loss": 0.3856, "step": 11997 }, { "epoch": 2.4664405385959505, "grad_norm": 0.22453376650810242, "learning_rate": 7.254929273733824e-06, "loss": 0.4079, "step": 11998 }, { "epoch": 2.4666461095693286, "grad_norm": 0.2299884408712387, "learning_rate": 7.249491963931481e-06, "loss": 0.3801, "step": 11999 }, { "epoch": 2.466851680542707, "grad_norm": 0.34996315836906433, "learning_rate": 7.244056513920224e-06, "loss": 0.4597, "step": 12000 }, { "epoch": 2.467057251516086, "grad_norm": 0.22876065969467163, "learning_rate": 7.238622923967829e-06, "loss": 0.3884, "step": 12001 }, { "epoch": 2.4672628224894644, "grad_norm": 0.22702383995056152, "learning_rate": 7.233191194341992e-06, "loss": 0.3792, "step": 12002 }, { "epoch": 2.467468393462843, "grad_norm": 0.23241770267486572, "learning_rate": 7.2277613253102985e-06, "loss": 0.394, "step": 12003 }, { "epoch": 2.4676739644362216, "grad_norm": 0.124427430331707, "learning_rate": 7.222333317140245e-06, "loss": 0.4528, "step": 12004 }, { "epoch": 2.4678795354096, "grad_norm": 0.2252800464630127, "learning_rate": 7.216907170099272e-06, "loss": 0.373, "step": 12005 }, { "epoch": 2.4680851063829787, "grad_norm": 0.22926414012908936, "learning_rate": 7.211482884454681e-06, "loss": 0.3816, "step": 12006 }, { "epoch": 2.4682906773563573, "grad_norm": 0.1216835305094719, "learning_rate": 7.206060460473699e-06, "loss": 0.4481, "step": 12007 }, { "epoch": 2.468496248329736, "grad_norm": 0.22425177693367004, "learning_rate": 7.200639898423476e-06, "loss": 0.3763, "step": 12008 }, { "epoch": 2.4687018193031145, "grad_norm": 0.22090640664100647, "learning_rate": 7.195221198571054e-06, "loss": 0.3812, "step": 12009 }, { "epoch": 2.468907390276493, "grad_norm": 0.23358182609081268, "learning_rate": 7.1898043611833845e-06, "loss": 0.3889, "step": 12010 }, { "epoch": 2.4691129612498717, "grad_norm": 0.2351762056350708, "learning_rate": 7.184389386527319e-06, "loss": 0.4039, "step": 12011 }, { "epoch": 2.4693185322232503, "grad_norm": 0.22724005579948425, "learning_rate": 7.178976274869649e-06, "loss": 0.4057, "step": 12012 }, { "epoch": 2.469524103196629, "grad_norm": 0.22213922441005707, "learning_rate": 7.173565026477041e-06, "loss": 0.3853, "step": 12013 }, { "epoch": 2.469729674170007, "grad_norm": 0.22167488932609558, "learning_rate": 7.1681556416160875e-06, "loss": 0.4068, "step": 12014 }, { "epoch": 2.4699352451433856, "grad_norm": 0.23194730281829834, "learning_rate": 7.1627481205532795e-06, "loss": 0.3975, "step": 12015 }, { "epoch": 2.470140816116764, "grad_norm": 0.22849147021770477, "learning_rate": 7.157342463555019e-06, "loss": 0.398, "step": 12016 }, { "epoch": 2.4703463870901428, "grad_norm": 0.22663573920726776, "learning_rate": 7.1519386708876185e-06, "loss": 0.3678, "step": 12017 }, { "epoch": 2.4705519580635213, "grad_norm": 0.5367224216461182, "learning_rate": 7.14653674281729e-06, "loss": 0.3891, "step": 12018 }, { "epoch": 2.4707575290369, "grad_norm": 0.232307568192482, "learning_rate": 7.1411366796101795e-06, "loss": 0.3862, "step": 12019 }, { "epoch": 2.4709631000102785, "grad_norm": 0.23023979365825653, "learning_rate": 7.135738481532311e-06, "loss": 0.3982, "step": 12020 }, { "epoch": 2.471168670983657, "grad_norm": 0.2264028787612915, "learning_rate": 7.13034214884963e-06, "loss": 0.3724, "step": 12021 }, { "epoch": 2.4713742419570357, "grad_norm": 0.2289603054523468, "learning_rate": 7.124947681827991e-06, "loss": 0.3857, "step": 12022 }, { "epoch": 2.4715798129304143, "grad_norm": 0.22829623520374298, "learning_rate": 7.119555080733154e-06, "loss": 0.3647, "step": 12023 }, { "epoch": 2.471785383903793, "grad_norm": 0.22573639452457428, "learning_rate": 7.114164345830782e-06, "loss": 0.405, "step": 12024 }, { "epoch": 2.4719909548771715, "grad_norm": 0.2383507937192917, "learning_rate": 7.108775477386444e-06, "loss": 0.3742, "step": 12025 }, { "epoch": 2.47219652585055, "grad_norm": 0.2287752777338028, "learning_rate": 7.103388475665647e-06, "loss": 0.3927, "step": 12026 }, { "epoch": 2.4724020968239286, "grad_norm": 0.2402697503566742, "learning_rate": 7.098003340933773e-06, "loss": 0.3899, "step": 12027 }, { "epoch": 2.4726076677973072, "grad_norm": 0.1256427764892578, "learning_rate": 7.09262007345611e-06, "loss": 0.4296, "step": 12028 }, { "epoch": 2.4728132387706854, "grad_norm": 0.2302490770816803, "learning_rate": 7.0872386734978865e-06, "loss": 0.3799, "step": 12029 }, { "epoch": 2.473018809744064, "grad_norm": 0.12290017306804657, "learning_rate": 7.08185914132421e-06, "loss": 0.4441, "step": 12030 }, { "epoch": 2.4732243807174425, "grad_norm": 0.24090375006198883, "learning_rate": 7.0764814772001035e-06, "loss": 0.3726, "step": 12031 }, { "epoch": 2.473429951690821, "grad_norm": 0.23742975294589996, "learning_rate": 7.071105681390495e-06, "loss": 0.3915, "step": 12032 }, { "epoch": 2.4736355226641997, "grad_norm": 0.12602561712265015, "learning_rate": 7.065731754160233e-06, "loss": 0.429, "step": 12033 }, { "epoch": 2.4738410936375783, "grad_norm": 0.2322402000427246, "learning_rate": 7.06035969577407e-06, "loss": 0.3945, "step": 12034 }, { "epoch": 2.474046664610957, "grad_norm": 0.22450955212116241, "learning_rate": 7.05498950649665e-06, "loss": 0.3684, "step": 12035 }, { "epoch": 2.4742522355843355, "grad_norm": 0.23430559039115906, "learning_rate": 7.049621186592546e-06, "loss": 0.378, "step": 12036 }, { "epoch": 2.474457806557714, "grad_norm": 0.2268606424331665, "learning_rate": 7.044254736326227e-06, "loss": 0.3944, "step": 12037 }, { "epoch": 2.4746633775310927, "grad_norm": 0.22248311340808868, "learning_rate": 7.038890155962071e-06, "loss": 0.3941, "step": 12038 }, { "epoch": 2.4748689485044713, "grad_norm": 0.12058508396148682, "learning_rate": 7.033527445764357e-06, "loss": 0.4526, "step": 12039 }, { "epoch": 2.47507451947785, "grad_norm": 0.12435080856084824, "learning_rate": 7.028166605997302e-06, "loss": 0.4443, "step": 12040 }, { "epoch": 2.4752800904512284, "grad_norm": 0.22780485451221466, "learning_rate": 7.022807636924997e-06, "loss": 0.3832, "step": 12041 }, { "epoch": 2.475485661424607, "grad_norm": 0.22483399510383606, "learning_rate": 7.017450538811455e-06, "loss": 0.4114, "step": 12042 }, { "epoch": 2.4756912323979856, "grad_norm": 0.22376932203769684, "learning_rate": 7.012095311920595e-06, "loss": 0.365, "step": 12043 }, { "epoch": 2.4758968033713638, "grad_norm": 0.22798992693424225, "learning_rate": 7.006741956516246e-06, "loss": 0.3874, "step": 12044 }, { "epoch": 2.4761023743447423, "grad_norm": 0.23297694325447083, "learning_rate": 7.001390472862141e-06, "loss": 0.3908, "step": 12045 }, { "epoch": 2.476307945318121, "grad_norm": 0.22531673312187195, "learning_rate": 6.99604086122191e-06, "loss": 0.3905, "step": 12046 }, { "epoch": 2.4765135162914995, "grad_norm": 0.22847385704517365, "learning_rate": 6.990693121859122e-06, "loss": 0.3764, "step": 12047 }, { "epoch": 2.476719087264878, "grad_norm": 0.12156729400157928, "learning_rate": 6.985347255037237e-06, "loss": 0.4623, "step": 12048 }, { "epoch": 2.4769246582382567, "grad_norm": 0.12409412860870361, "learning_rate": 6.980003261019599e-06, "loss": 0.4559, "step": 12049 }, { "epoch": 2.4771302292116353, "grad_norm": 0.2400665581226349, "learning_rate": 6.974661140069501e-06, "loss": 0.3763, "step": 12050 }, { "epoch": 2.477335800185014, "grad_norm": 0.22779475152492523, "learning_rate": 6.969320892450124e-06, "loss": 0.3765, "step": 12051 }, { "epoch": 2.4775413711583925, "grad_norm": 0.22973057627677917, "learning_rate": 6.9639825184245524e-06, "loss": 0.3799, "step": 12052 }, { "epoch": 2.477746942131771, "grad_norm": 0.23675696551799774, "learning_rate": 6.9586460182557705e-06, "loss": 0.399, "step": 12053 }, { "epoch": 2.4779525131051496, "grad_norm": 0.23592104017734528, "learning_rate": 6.953311392206702e-06, "loss": 0.3764, "step": 12054 }, { "epoch": 2.4781580840785282, "grad_norm": 0.12474309653043747, "learning_rate": 6.947978640540154e-06, "loss": 0.436, "step": 12055 }, { "epoch": 2.478363655051907, "grad_norm": 0.23557905852794647, "learning_rate": 6.942647763518844e-06, "loss": 0.3961, "step": 12056 }, { "epoch": 2.4785692260252854, "grad_norm": 0.23335106670856476, "learning_rate": 6.937318761405399e-06, "loss": 0.39, "step": 12057 }, { "epoch": 2.478774796998664, "grad_norm": 1.1866546869277954, "learning_rate": 6.931991634462352e-06, "loss": 0.4177, "step": 12058 }, { "epoch": 2.478980367972042, "grad_norm": 0.12616188824176788, "learning_rate": 6.926666382952149e-06, "loss": 0.4414, "step": 12059 }, { "epoch": 2.4791859389454207, "grad_norm": 0.22257229685783386, "learning_rate": 6.921343007137131e-06, "loss": 0.3853, "step": 12060 }, { "epoch": 2.4793915099187993, "grad_norm": 0.2297201305627823, "learning_rate": 6.916021507279572e-06, "loss": 0.3891, "step": 12061 }, { "epoch": 2.479597080892178, "grad_norm": 0.12035045772790909, "learning_rate": 6.910701883641627e-06, "loss": 0.4512, "step": 12062 }, { "epoch": 2.4798026518655565, "grad_norm": 0.11942754685878754, "learning_rate": 6.905384136485374e-06, "loss": 0.4546, "step": 12063 }, { "epoch": 2.480008222838935, "grad_norm": 0.12709856033325195, "learning_rate": 6.900068266072795e-06, "loss": 0.4667, "step": 12064 }, { "epoch": 2.4802137938123137, "grad_norm": 0.22888512909412384, "learning_rate": 6.894754272665767e-06, "loss": 0.3852, "step": 12065 }, { "epoch": 2.4804193647856922, "grad_norm": 0.22018122673034668, "learning_rate": 6.889442156526085e-06, "loss": 0.3962, "step": 12066 }, { "epoch": 2.480624935759071, "grad_norm": 0.23357877135276794, "learning_rate": 6.884131917915471e-06, "loss": 0.3871, "step": 12067 }, { "epoch": 2.4808305067324494, "grad_norm": 0.22664080560207367, "learning_rate": 6.87882355709552e-06, "loss": 0.3931, "step": 12068 }, { "epoch": 2.481036077705828, "grad_norm": 0.22483284771442413, "learning_rate": 6.873517074327758e-06, "loss": 0.3701, "step": 12069 }, { "epoch": 2.4812416486792066, "grad_norm": 0.12439465522766113, "learning_rate": 6.868212469873605e-06, "loss": 0.4436, "step": 12070 }, { "epoch": 2.481447219652585, "grad_norm": 0.12237696349620819, "learning_rate": 6.862909743994388e-06, "loss": 0.4515, "step": 12071 }, { "epoch": 2.481652790625964, "grad_norm": 0.22126199305057526, "learning_rate": 6.857608896951367e-06, "loss": 0.3588, "step": 12072 }, { "epoch": 2.4818583615993424, "grad_norm": 0.23091398179531097, "learning_rate": 6.8523099290056645e-06, "loss": 0.3856, "step": 12073 }, { "epoch": 2.4820639325727205, "grad_norm": 0.22415180504322052, "learning_rate": 6.847012840418361e-06, "loss": 0.397, "step": 12074 }, { "epoch": 2.482269503546099, "grad_norm": 0.12421949952840805, "learning_rate": 6.8417176314504125e-06, "loss": 0.4434, "step": 12075 }, { "epoch": 2.4824750745194777, "grad_norm": 0.12139065563678741, "learning_rate": 6.83642430236268e-06, "loss": 0.4701, "step": 12076 }, { "epoch": 2.4826806454928563, "grad_norm": 0.24218404293060303, "learning_rate": 6.831132853415946e-06, "loss": 0.4046, "step": 12077 }, { "epoch": 2.482886216466235, "grad_norm": 0.23166660964488983, "learning_rate": 6.825843284870901e-06, "loss": 0.3861, "step": 12078 }, { "epoch": 2.4830917874396135, "grad_norm": 0.2387050986289978, "learning_rate": 6.820555596988127e-06, "loss": 0.3854, "step": 12079 }, { "epoch": 2.483297358412992, "grad_norm": 0.23468570411205292, "learning_rate": 6.81526979002812e-06, "loss": 0.3764, "step": 12080 }, { "epoch": 2.4835029293863706, "grad_norm": 0.23246009647846222, "learning_rate": 6.809985864251303e-06, "loss": 0.3896, "step": 12081 }, { "epoch": 2.483708500359749, "grad_norm": 0.24410288035869598, "learning_rate": 6.804703819917987e-06, "loss": 0.3876, "step": 12082 }, { "epoch": 2.483914071333128, "grad_norm": 0.2310299128293991, "learning_rate": 6.799423657288384e-06, "loss": 0.3816, "step": 12083 }, { "epoch": 2.4841196423065064, "grad_norm": 0.22626370191574097, "learning_rate": 6.794145376622635e-06, "loss": 0.3851, "step": 12084 }, { "epoch": 2.484325213279885, "grad_norm": 0.2305128276348114, "learning_rate": 6.788868978180763e-06, "loss": 0.4095, "step": 12085 }, { "epoch": 2.4845307842532636, "grad_norm": 0.22715520858764648, "learning_rate": 6.78359446222272e-06, "loss": 0.397, "step": 12086 }, { "epoch": 2.484736355226642, "grad_norm": 0.12447824329137802, "learning_rate": 6.778321829008348e-06, "loss": 0.4611, "step": 12087 }, { "epoch": 2.4849419262000207, "grad_norm": 0.12171711772680283, "learning_rate": 6.773051078797419e-06, "loss": 0.4459, "step": 12088 }, { "epoch": 2.485147497173399, "grad_norm": 0.12131594866514206, "learning_rate": 6.767782211849591e-06, "loss": 0.4644, "step": 12089 }, { "epoch": 2.485353068146778, "grad_norm": 0.12281377613544464, "learning_rate": 6.7625152284244395e-06, "loss": 0.4399, "step": 12090 }, { "epoch": 2.485558639120156, "grad_norm": 0.2290441393852234, "learning_rate": 6.75725012878144e-06, "loss": 0.3939, "step": 12091 }, { "epoch": 2.4857642100935347, "grad_norm": 0.22904446721076965, "learning_rate": 6.751986913179967e-06, "loss": 0.3833, "step": 12092 }, { "epoch": 2.4859697810669132, "grad_norm": 0.23602800071239471, "learning_rate": 6.746725581879339e-06, "loss": 0.3835, "step": 12093 }, { "epoch": 2.486175352040292, "grad_norm": 0.2316070944070816, "learning_rate": 6.74146613513875e-06, "loss": 0.3902, "step": 12094 }, { "epoch": 2.4863809230136704, "grad_norm": 0.22582808136940002, "learning_rate": 6.736208573217292e-06, "loss": 0.4079, "step": 12095 }, { "epoch": 2.486586493987049, "grad_norm": 0.23117490112781525, "learning_rate": 6.730952896374002e-06, "loss": 0.3945, "step": 12096 }, { "epoch": 2.4867920649604276, "grad_norm": 0.22690841555595398, "learning_rate": 6.725699104867799e-06, "loss": 0.3927, "step": 12097 }, { "epoch": 2.486997635933806, "grad_norm": 0.23165901005268097, "learning_rate": 6.7204471989575e-06, "loss": 0.4029, "step": 12098 }, { "epoch": 2.4872032069071848, "grad_norm": 0.22219586372375488, "learning_rate": 6.715197178901853e-06, "loss": 0.3776, "step": 12099 }, { "epoch": 2.4874087778805634, "grad_norm": 0.2293098270893097, "learning_rate": 6.709949044959502e-06, "loss": 0.3988, "step": 12100 }, { "epoch": 2.487614348853942, "grad_norm": 0.23303751647472382, "learning_rate": 6.70470279738898e-06, "loss": 0.3915, "step": 12101 }, { "epoch": 2.4878199198273205, "grad_norm": 0.1233496144413948, "learning_rate": 6.6994584364487695e-06, "loss": 0.4614, "step": 12102 }, { "epoch": 2.488025490800699, "grad_norm": 0.23316849768161774, "learning_rate": 6.694215962397225e-06, "loss": 0.3868, "step": 12103 }, { "epoch": 2.4882310617740773, "grad_norm": 0.22257505357265472, "learning_rate": 6.688975375492618e-06, "loss": 0.374, "step": 12104 }, { "epoch": 2.4884366327474563, "grad_norm": 0.12211709469556808, "learning_rate": 6.6837366759931345e-06, "loss": 0.4395, "step": 12105 }, { "epoch": 2.4886422037208344, "grad_norm": 0.22621026635169983, "learning_rate": 6.678499864156851e-06, "loss": 0.3922, "step": 12106 }, { "epoch": 2.488847774694213, "grad_norm": 0.2442169040441513, "learning_rate": 6.673264940241767e-06, "loss": 0.3831, "step": 12107 }, { "epoch": 2.4890533456675916, "grad_norm": 0.22115904092788696, "learning_rate": 6.668031904505771e-06, "loss": 0.39, "step": 12108 }, { "epoch": 2.48925891664097, "grad_norm": 0.12432961910963058, "learning_rate": 6.662800757206687e-06, "loss": 0.4369, "step": 12109 }, { "epoch": 2.489464487614349, "grad_norm": 0.23481737077236176, "learning_rate": 6.657571498602224e-06, "loss": 0.3807, "step": 12110 }, { "epoch": 2.4896700585877274, "grad_norm": 0.24072937667369843, "learning_rate": 6.65234412895e-06, "loss": 0.3857, "step": 12111 }, { "epoch": 2.489875629561106, "grad_norm": 0.2299319952726364, "learning_rate": 6.647118648507545e-06, "loss": 0.3725, "step": 12112 }, { "epoch": 2.4900812005344846, "grad_norm": 0.2447563111782074, "learning_rate": 6.641895057532282e-06, "loss": 0.3858, "step": 12113 }, { "epoch": 2.490286771507863, "grad_norm": 0.22545365989208221, "learning_rate": 6.636673356281577e-06, "loss": 0.3783, "step": 12114 }, { "epoch": 2.4904923424812417, "grad_norm": 0.2212546318769455, "learning_rate": 6.631453545012663e-06, "loss": 0.3906, "step": 12115 }, { "epoch": 2.4906979134546203, "grad_norm": 0.2362491488456726, "learning_rate": 6.626235623982693e-06, "loss": 0.4016, "step": 12116 }, { "epoch": 2.490903484427999, "grad_norm": 0.11933384835720062, "learning_rate": 6.6210195934487395e-06, "loss": 0.4647, "step": 12117 }, { "epoch": 2.4911090554013775, "grad_norm": 0.12910796701908112, "learning_rate": 6.615805453667774e-06, "loss": 0.4296, "step": 12118 }, { "epoch": 2.4913146263747556, "grad_norm": 0.22228464484214783, "learning_rate": 6.6105932048966625e-06, "loss": 0.3975, "step": 12119 }, { "epoch": 2.4915201973481347, "grad_norm": 0.11906154453754425, "learning_rate": 6.6053828473921945e-06, "loss": 0.4488, "step": 12120 }, { "epoch": 2.491725768321513, "grad_norm": 0.11698108166456223, "learning_rate": 6.600174381411054e-06, "loss": 0.467, "step": 12121 }, { "epoch": 2.4919313392948914, "grad_norm": 0.23555971682071686, "learning_rate": 6.594967807209831e-06, "loss": 0.3887, "step": 12122 }, { "epoch": 2.49213691026827, "grad_norm": 0.23438353836536407, "learning_rate": 6.589763125045056e-06, "loss": 0.3863, "step": 12123 }, { "epoch": 2.4923424812416486, "grad_norm": 0.22644414007663727, "learning_rate": 6.584560335173119e-06, "loss": 0.3941, "step": 12124 }, { "epoch": 2.492548052215027, "grad_norm": 0.11747743934392929, "learning_rate": 6.579359437850339e-06, "loss": 0.4527, "step": 12125 }, { "epoch": 2.4927536231884058, "grad_norm": 0.23047557473182678, "learning_rate": 6.574160433332946e-06, "loss": 0.4062, "step": 12126 }, { "epoch": 2.4929591941617844, "grad_norm": 0.22950156033039093, "learning_rate": 6.568963321877061e-06, "loss": 0.3833, "step": 12127 }, { "epoch": 2.493164765135163, "grad_norm": 0.21891199052333832, "learning_rate": 6.563768103738734e-06, "loss": 0.3736, "step": 12128 }, { "epoch": 2.4933703361085415, "grad_norm": 0.22695685923099518, "learning_rate": 6.558574779173884e-06, "loss": 0.3752, "step": 12129 }, { "epoch": 2.49357590708192, "grad_norm": 0.12211208045482635, "learning_rate": 6.553383348438398e-06, "loss": 0.4442, "step": 12130 }, { "epoch": 2.4937814780552987, "grad_norm": 0.22641681134700775, "learning_rate": 6.548193811788011e-06, "loss": 0.3864, "step": 12131 }, { "epoch": 2.4939870490286773, "grad_norm": 0.11796488612890244, "learning_rate": 6.543006169478392e-06, "loss": 0.4571, "step": 12132 }, { "epoch": 2.494192620002056, "grad_norm": 0.226291224360466, "learning_rate": 6.537820421765109e-06, "loss": 0.38, "step": 12133 }, { "epoch": 2.494398190975434, "grad_norm": 0.22466683387756348, "learning_rate": 6.5326365689036465e-06, "loss": 0.4094, "step": 12134 }, { "epoch": 2.494603761948813, "grad_norm": 0.23120231926441193, "learning_rate": 6.5274546111493696e-06, "loss": 0.3899, "step": 12135 }, { "epoch": 2.494809332922191, "grad_norm": 0.23374420404434204, "learning_rate": 6.5222745487576e-06, "loss": 0.3821, "step": 12136 }, { "epoch": 2.49501490389557, "grad_norm": 0.22625453770160675, "learning_rate": 6.517096381983503e-06, "loss": 0.3882, "step": 12137 }, { "epoch": 2.4952204748689484, "grad_norm": 0.12417057901620865, "learning_rate": 6.51192011108221e-06, "loss": 0.4423, "step": 12138 }, { "epoch": 2.495426045842327, "grad_norm": 0.2231971025466919, "learning_rate": 6.506745736308721e-06, "loss": 0.3984, "step": 12139 }, { "epoch": 2.4956316168157056, "grad_norm": 0.2350044548511505, "learning_rate": 6.501573257917954e-06, "loss": 0.3884, "step": 12140 }, { "epoch": 2.495837187789084, "grad_norm": 0.23853430151939392, "learning_rate": 6.496402676164734e-06, "loss": 0.3903, "step": 12141 }, { "epoch": 2.4960427587624627, "grad_norm": 0.23373542726039886, "learning_rate": 6.4912339913037815e-06, "loss": 0.3925, "step": 12142 }, { "epoch": 2.4962483297358413, "grad_norm": 0.2317272126674652, "learning_rate": 6.486067203589738e-06, "loss": 0.4034, "step": 12143 }, { "epoch": 2.49645390070922, "grad_norm": 0.22617876529693604, "learning_rate": 6.480902313277152e-06, "loss": 0.3891, "step": 12144 }, { "epoch": 2.4966594716825985, "grad_norm": 0.22388514876365662, "learning_rate": 6.475739320620478e-06, "loss": 0.3823, "step": 12145 }, { "epoch": 2.496865042655977, "grad_norm": 0.12233025580644608, "learning_rate": 6.470578225874062e-06, "loss": 0.459, "step": 12146 }, { "epoch": 2.4970706136293557, "grad_norm": 0.2257211059331894, "learning_rate": 6.4654190292921724e-06, "loss": 0.3908, "step": 12147 }, { "epoch": 2.4972761846027343, "grad_norm": 0.2302434891462326, "learning_rate": 6.460261731128975e-06, "loss": 0.3994, "step": 12148 }, { "epoch": 2.4974817555761124, "grad_norm": 0.2282235473394394, "learning_rate": 6.455106331638541e-06, "loss": 0.3751, "step": 12149 }, { "epoch": 2.4976873265494914, "grad_norm": 0.23330600559711456, "learning_rate": 6.449952831074869e-06, "loss": 0.3851, "step": 12150 }, { "epoch": 2.4978928975228696, "grad_norm": 0.22312867641448975, "learning_rate": 6.4448012296918385e-06, "loss": 0.3799, "step": 12151 }, { "epoch": 2.498098468496248, "grad_norm": 0.22371982038021088, "learning_rate": 6.439651527743244e-06, "loss": 0.386, "step": 12152 }, { "epoch": 2.4983040394696268, "grad_norm": 0.2417476773262024, "learning_rate": 6.434503725482785e-06, "loss": 0.3929, "step": 12153 }, { "epoch": 2.4985096104430053, "grad_norm": 0.23515672981739044, "learning_rate": 6.429357823164076e-06, "loss": 0.3886, "step": 12154 }, { "epoch": 2.498715181416384, "grad_norm": 0.22999493777751923, "learning_rate": 6.424213821040627e-06, "loss": 0.3596, "step": 12155 }, { "epoch": 2.4989207523897625, "grad_norm": 0.2299181967973709, "learning_rate": 6.419071719365853e-06, "loss": 0.3789, "step": 12156 }, { "epoch": 2.499126323363141, "grad_norm": 0.23717856407165527, "learning_rate": 6.4139315183930986e-06, "loss": 0.3868, "step": 12157 }, { "epoch": 2.4993318943365197, "grad_norm": 0.22513870894908905, "learning_rate": 6.408793218375587e-06, "loss": 0.3657, "step": 12158 }, { "epoch": 2.4995374653098983, "grad_norm": 0.22355802357196808, "learning_rate": 6.403656819566447e-06, "loss": 0.3665, "step": 12159 }, { "epoch": 2.499743036283277, "grad_norm": 0.23084700107574463, "learning_rate": 6.3985223222187455e-06, "loss": 0.3808, "step": 12160 }, { "epoch": 2.4999486072566555, "grad_norm": 0.22430096566677094, "learning_rate": 6.393389726585429e-06, "loss": 0.3874, "step": 12161 }, { "epoch": 2.500154178230034, "grad_norm": 0.23496957123279572, "learning_rate": 6.388259032919352e-06, "loss": 0.4068, "step": 12162 }, { "epoch": 2.5003597492034126, "grad_norm": 0.22846169769763947, "learning_rate": 6.383130241473271e-06, "loss": 0.3625, "step": 12163 }, { "epoch": 2.500565320176791, "grad_norm": 0.23542927205562592, "learning_rate": 6.37800335249988e-06, "loss": 0.4062, "step": 12164 }, { "epoch": 2.50077089115017, "grad_norm": 0.22982755303382874, "learning_rate": 6.372878366251746e-06, "loss": 0.3788, "step": 12165 }, { "epoch": 2.500976462123548, "grad_norm": 0.2346840351819992, "learning_rate": 6.3677552829813525e-06, "loss": 0.3856, "step": 12166 }, { "epoch": 2.5011820330969265, "grad_norm": 0.23400172591209412, "learning_rate": 6.362634102941088e-06, "loss": 0.3948, "step": 12167 }, { "epoch": 2.501387604070305, "grad_norm": 0.2556484639644623, "learning_rate": 6.357514826383249e-06, "loss": 0.4074, "step": 12168 }, { "epoch": 2.5015931750436837, "grad_norm": 0.23373647034168243, "learning_rate": 6.352397453560041e-06, "loss": 0.3774, "step": 12169 }, { "epoch": 2.5017987460170623, "grad_norm": 0.23084743320941925, "learning_rate": 6.347281984723565e-06, "loss": 0.378, "step": 12170 }, { "epoch": 2.502004316990441, "grad_norm": 0.22970278561115265, "learning_rate": 6.342168420125852e-06, "loss": 0.3945, "step": 12171 }, { "epoch": 2.5022098879638195, "grad_norm": 0.22761283814907074, "learning_rate": 6.337056760018814e-06, "loss": 0.393, "step": 12172 }, { "epoch": 2.502415458937198, "grad_norm": 0.2262086719274521, "learning_rate": 6.331947004654279e-06, "loss": 0.4013, "step": 12173 }, { "epoch": 2.5026210299105767, "grad_norm": 0.22546137869358063, "learning_rate": 6.326839154283977e-06, "loss": 0.3821, "step": 12174 }, { "epoch": 2.5028266008839553, "grad_norm": 0.12685376405715942, "learning_rate": 6.321733209159555e-06, "loss": 0.4521, "step": 12175 }, { "epoch": 2.503032171857334, "grad_norm": 0.1270647794008255, "learning_rate": 6.316629169532559e-06, "loss": 0.443, "step": 12176 }, { "epoch": 2.5032377428307124, "grad_norm": 0.23198673129081726, "learning_rate": 6.3115270356544265e-06, "loss": 0.3716, "step": 12177 }, { "epoch": 2.503443313804091, "grad_norm": 0.22710855305194855, "learning_rate": 6.306426807776537e-06, "loss": 0.3858, "step": 12178 }, { "epoch": 2.503648884777469, "grad_norm": 0.222482368350029, "learning_rate": 6.301328486150148e-06, "loss": 0.3927, "step": 12179 }, { "epoch": 2.503854455750848, "grad_norm": 0.23889537155628204, "learning_rate": 6.2962320710264155e-06, "loss": 0.4017, "step": 12180 }, { "epoch": 2.5040600267242263, "grad_norm": 0.22771425545215607, "learning_rate": 6.291137562656433e-06, "loss": 0.3956, "step": 12181 }, { "epoch": 2.5042655976976054, "grad_norm": 0.23738330602645874, "learning_rate": 6.286044961291184e-06, "loss": 0.3685, "step": 12182 }, { "epoch": 2.5044711686709835, "grad_norm": 0.23009170591831207, "learning_rate": 6.2809542671815495e-06, "loss": 0.3939, "step": 12183 }, { "epoch": 2.504676739644362, "grad_norm": 0.2258174568414688, "learning_rate": 6.275865480578317e-06, "loss": 0.3827, "step": 12184 }, { "epoch": 2.5048823106177407, "grad_norm": 0.21696443855762482, "learning_rate": 6.2707786017322066e-06, "loss": 0.3773, "step": 12185 }, { "epoch": 2.5050878815911193, "grad_norm": 0.23282679915428162, "learning_rate": 6.265693630893814e-06, "loss": 0.4009, "step": 12186 }, { "epoch": 2.505293452564498, "grad_norm": 0.2228788286447525, "learning_rate": 6.260610568313647e-06, "loss": 0.3716, "step": 12187 }, { "epoch": 2.5054990235378765, "grad_norm": 0.2226657122373581, "learning_rate": 6.255529414242136e-06, "loss": 0.3872, "step": 12188 }, { "epoch": 2.505704594511255, "grad_norm": 0.22949428856372833, "learning_rate": 6.250450168929597e-06, "loss": 0.3995, "step": 12189 }, { "epoch": 2.5059101654846336, "grad_norm": 2.063056707382202, "learning_rate": 6.2453728326262674e-06, "loss": 0.4019, "step": 12190 }, { "epoch": 2.506115736458012, "grad_norm": 0.23003017902374268, "learning_rate": 6.240297405582264e-06, "loss": 0.3975, "step": 12191 }, { "epoch": 2.506321307431391, "grad_norm": 0.23214492201805115, "learning_rate": 6.235223888047661e-06, "loss": 0.3863, "step": 12192 }, { "epoch": 2.5065268784047694, "grad_norm": 0.2411757856607437, "learning_rate": 6.2301522802723835e-06, "loss": 0.3888, "step": 12193 }, { "epoch": 2.5067324493781475, "grad_norm": 0.22843949496746063, "learning_rate": 6.2250825825062975e-06, "loss": 0.4066, "step": 12194 }, { "epoch": 2.5069380203515266, "grad_norm": 0.2403997927904129, "learning_rate": 6.2200147949991624e-06, "loss": 0.3949, "step": 12195 }, { "epoch": 2.5071435913249047, "grad_norm": 0.23729455471038818, "learning_rate": 6.214948918000638e-06, "loss": 0.3915, "step": 12196 }, { "epoch": 2.5073491622982838, "grad_norm": 0.21809126436710358, "learning_rate": 6.209884951760296e-06, "loss": 0.3535, "step": 12197 }, { "epoch": 2.507554733271662, "grad_norm": 0.22672174870967865, "learning_rate": 6.20482289652761e-06, "loss": 0.3854, "step": 12198 }, { "epoch": 2.5077603042450405, "grad_norm": 0.22650691866874695, "learning_rate": 6.199762752551988e-06, "loss": 0.3908, "step": 12199 }, { "epoch": 2.507965875218419, "grad_norm": 0.22250515222549438, "learning_rate": 6.194704520082694e-06, "loss": 0.3765, "step": 12200 }, { "epoch": 2.5081714461917977, "grad_norm": 0.2284214347600937, "learning_rate": 6.189648199368929e-06, "loss": 0.3919, "step": 12201 }, { "epoch": 2.5083770171651762, "grad_norm": 0.23341004550457, "learning_rate": 6.184593790659807e-06, "loss": 0.3923, "step": 12202 }, { "epoch": 2.508582588138555, "grad_norm": 0.24929523468017578, "learning_rate": 6.179541294204327e-06, "loss": 0.3788, "step": 12203 }, { "epoch": 2.5087881591119334, "grad_norm": 0.23570400476455688, "learning_rate": 6.174490710251398e-06, "loss": 0.3904, "step": 12204 }, { "epoch": 2.508993730085312, "grad_norm": 0.22578248381614685, "learning_rate": 6.169442039049831e-06, "loss": 0.4045, "step": 12205 }, { "epoch": 2.5091993010586906, "grad_norm": 0.2417832911014557, "learning_rate": 6.1643952808483726e-06, "loss": 0.3621, "step": 12206 }, { "epoch": 2.509404872032069, "grad_norm": 0.2352113127708435, "learning_rate": 6.159350435895643e-06, "loss": 0.3799, "step": 12207 }, { "epoch": 2.5096104430054478, "grad_norm": 0.1250247210264206, "learning_rate": 6.154307504440175e-06, "loss": 0.4474, "step": 12208 }, { "epoch": 2.509816013978826, "grad_norm": 0.22554874420166016, "learning_rate": 6.149266486730414e-06, "loss": 0.3857, "step": 12209 }, { "epoch": 2.510021584952205, "grad_norm": 0.23448492586612701, "learning_rate": 6.144227383014705e-06, "loss": 0.3939, "step": 12210 }, { "epoch": 2.510227155925583, "grad_norm": 0.23547668755054474, "learning_rate": 6.139190193541301e-06, "loss": 0.4062, "step": 12211 }, { "epoch": 2.510432726898962, "grad_norm": 0.2341381311416626, "learning_rate": 6.1341549185583495e-06, "loss": 0.3777, "step": 12212 }, { "epoch": 2.5106382978723403, "grad_norm": 0.23182830214500427, "learning_rate": 6.129121558313939e-06, "loss": 0.3878, "step": 12213 }, { "epoch": 2.510843868845719, "grad_norm": 0.23118196427822113, "learning_rate": 6.124090113056029e-06, "loss": 0.3822, "step": 12214 }, { "epoch": 2.5110494398190975, "grad_norm": 0.2306642383337021, "learning_rate": 6.11906058303249e-06, "loss": 0.3742, "step": 12215 }, { "epoch": 2.511255010792476, "grad_norm": 0.2363126426935196, "learning_rate": 6.114032968491108e-06, "loss": 0.3679, "step": 12216 }, { "epoch": 2.5114605817658546, "grad_norm": 0.22103947401046753, "learning_rate": 6.109007269679567e-06, "loss": 0.378, "step": 12217 }, { "epoch": 2.511666152739233, "grad_norm": 0.23410068452358246, "learning_rate": 6.1039834868454676e-06, "loss": 0.3637, "step": 12218 }, { "epoch": 2.511871723712612, "grad_norm": 0.2275317907333374, "learning_rate": 6.098961620236286e-06, "loss": 0.3731, "step": 12219 }, { "epoch": 2.5120772946859904, "grad_norm": 0.2327854335308075, "learning_rate": 6.093941670099456e-06, "loss": 0.3812, "step": 12220 }, { "epoch": 2.512282865659369, "grad_norm": 0.11918573826551437, "learning_rate": 6.088923636682273e-06, "loss": 0.4619, "step": 12221 }, { "epoch": 2.5124884366327476, "grad_norm": 0.11901576071977615, "learning_rate": 6.083907520231941e-06, "loss": 0.4478, "step": 12222 }, { "epoch": 2.512694007606126, "grad_norm": 0.23713438212871552, "learning_rate": 6.0788933209956015e-06, "loss": 0.3682, "step": 12223 }, { "epoch": 2.5128995785795043, "grad_norm": 0.22688627243041992, "learning_rate": 6.0738810392202725e-06, "loss": 0.3878, "step": 12224 }, { "epoch": 2.5131051495528833, "grad_norm": 0.22531628608703613, "learning_rate": 6.068870675152875e-06, "loss": 0.3921, "step": 12225 }, { "epoch": 2.5133107205262615, "grad_norm": 0.23456744849681854, "learning_rate": 6.063862229040268e-06, "loss": 0.3799, "step": 12226 }, { "epoch": 2.5135162914996405, "grad_norm": 0.22432486712932587, "learning_rate": 6.058855701129178e-06, "loss": 0.386, "step": 12227 }, { "epoch": 2.5137218624730187, "grad_norm": 0.23045605421066284, "learning_rate": 6.0538510916662595e-06, "loss": 0.3704, "step": 12228 }, { "epoch": 2.5139274334463972, "grad_norm": 0.23640716075897217, "learning_rate": 6.048848400898063e-06, "loss": 0.3814, "step": 12229 }, { "epoch": 2.514133004419776, "grad_norm": 0.23872627317905426, "learning_rate": 6.043847629071049e-06, "loss": 0.3968, "step": 12230 }, { "epoch": 2.5143385753931544, "grad_norm": 0.2490765005350113, "learning_rate": 6.038848776431582e-06, "loss": 0.396, "step": 12231 }, { "epoch": 2.514544146366533, "grad_norm": 0.24178048968315125, "learning_rate": 6.033851843225918e-06, "loss": 0.393, "step": 12232 }, { "epoch": 2.5147497173399116, "grad_norm": 0.12222810834646225, "learning_rate": 6.028856829700258e-06, "loss": 0.4281, "step": 12233 }, { "epoch": 2.51495528831329, "grad_norm": 0.23273582756519318, "learning_rate": 6.023863736100677e-06, "loss": 0.3911, "step": 12234 }, { "epoch": 2.5151608592866688, "grad_norm": 0.23268084228038788, "learning_rate": 6.0188725626731475e-06, "loss": 0.3771, "step": 12235 }, { "epoch": 2.5153664302600474, "grad_norm": 0.22554205358028412, "learning_rate": 6.013883309663577e-06, "loss": 0.3792, "step": 12236 }, { "epoch": 2.515572001233426, "grad_norm": 0.12636855244636536, "learning_rate": 6.00889597731775e-06, "loss": 0.4423, "step": 12237 }, { "epoch": 2.5157775722068045, "grad_norm": 0.22077181935310364, "learning_rate": 6.0039105658813745e-06, "loss": 0.4052, "step": 12238 }, { "epoch": 2.515983143180183, "grad_norm": 0.2312643826007843, "learning_rate": 5.998927075600054e-06, "loss": 0.3937, "step": 12239 }, { "epoch": 2.5161887141535617, "grad_norm": 0.22247177362442017, "learning_rate": 5.993945506719307e-06, "loss": 0.3851, "step": 12240 }, { "epoch": 2.51639428512694, "grad_norm": 0.12727056443691254, "learning_rate": 5.988965859484558e-06, "loss": 0.4577, "step": 12241 }, { "epoch": 2.516599856100319, "grad_norm": 0.2271554172039032, "learning_rate": 5.9839881341411235e-06, "loss": 0.3691, "step": 12242 }, { "epoch": 2.516805427073697, "grad_norm": 0.22784371674060822, "learning_rate": 5.97901233093423e-06, "loss": 0.3829, "step": 12243 }, { "epoch": 2.5170109980470756, "grad_norm": 0.235183447599411, "learning_rate": 5.974038450109005e-06, "loss": 0.399, "step": 12244 }, { "epoch": 2.517216569020454, "grad_norm": 0.22664892673492432, "learning_rate": 5.969066491910514e-06, "loss": 0.3783, "step": 12245 }, { "epoch": 2.517422139993833, "grad_norm": 0.2340896725654602, "learning_rate": 5.9640964565836684e-06, "loss": 0.3887, "step": 12246 }, { "epoch": 2.5176277109672114, "grad_norm": 0.23754270374774933, "learning_rate": 5.959128344373354e-06, "loss": 0.3782, "step": 12247 }, { "epoch": 2.51783328194059, "grad_norm": 0.2322191596031189, "learning_rate": 5.9541621555243055e-06, "loss": 0.3946, "step": 12248 }, { "epoch": 2.5180388529139686, "grad_norm": 0.24042516946792603, "learning_rate": 5.9491978902811915e-06, "loss": 0.405, "step": 12249 }, { "epoch": 2.518244423887347, "grad_norm": 0.23382358253002167, "learning_rate": 5.944235548888571e-06, "loss": 0.3944, "step": 12250 }, { "epoch": 2.5184499948607257, "grad_norm": 0.22752118110656738, "learning_rate": 5.939275131590924e-06, "loss": 0.3723, "step": 12251 }, { "epoch": 2.5186555658341043, "grad_norm": 0.23085589706897736, "learning_rate": 5.934316638632615e-06, "loss": 0.3916, "step": 12252 }, { "epoch": 2.518861136807483, "grad_norm": 0.23417700827121735, "learning_rate": 5.929360070257928e-06, "loss": 0.3699, "step": 12253 }, { "epoch": 2.5190667077808615, "grad_norm": 0.23297809064388275, "learning_rate": 5.924405426711064e-06, "loss": 0.3863, "step": 12254 }, { "epoch": 2.51927227875424, "grad_norm": 0.3042377531528473, "learning_rate": 5.919452708236101e-06, "loss": 0.3995, "step": 12255 }, { "epoch": 2.5194778497276182, "grad_norm": 0.3225111663341522, "learning_rate": 5.914501915077045e-06, "loss": 0.387, "step": 12256 }, { "epoch": 2.5196834207009973, "grad_norm": 0.12028972804546356, "learning_rate": 5.909553047477796e-06, "loss": 0.447, "step": 12257 }, { "epoch": 2.5198889916743754, "grad_norm": 0.22156624495983124, "learning_rate": 5.904606105682159e-06, "loss": 0.3813, "step": 12258 }, { "epoch": 2.520094562647754, "grad_norm": 0.23767083883285522, "learning_rate": 5.899661089933842e-06, "loss": 0.3754, "step": 12259 }, { "epoch": 2.5203001336211326, "grad_norm": 0.24094241857528687, "learning_rate": 5.894718000476468e-06, "loss": 0.3879, "step": 12260 }, { "epoch": 2.520505704594511, "grad_norm": 0.23110216856002808, "learning_rate": 5.889776837553565e-06, "loss": 0.384, "step": 12261 }, { "epoch": 2.5207112755678898, "grad_norm": 0.23392094671726227, "learning_rate": 5.884837601408556e-06, "loss": 0.3925, "step": 12262 }, { "epoch": 2.5209168465412684, "grad_norm": 0.23152020573616028, "learning_rate": 5.879900292284778e-06, "loss": 0.391, "step": 12263 }, { "epoch": 2.521122417514647, "grad_norm": 0.2314983457326889, "learning_rate": 5.8749649104254634e-06, "loss": 0.3918, "step": 12264 }, { "epoch": 2.5213279884880255, "grad_norm": 0.2333018183708191, "learning_rate": 5.870031456073747e-06, "loss": 0.3686, "step": 12265 }, { "epoch": 2.521533559461404, "grad_norm": 0.22460217773914337, "learning_rate": 5.8650999294727e-06, "loss": 0.38, "step": 12266 }, { "epoch": 2.5217391304347827, "grad_norm": 0.2324889600276947, "learning_rate": 5.8601703308652585e-06, "loss": 0.3957, "step": 12267 }, { "epoch": 2.5219447014081613, "grad_norm": 0.22676560282707214, "learning_rate": 5.8552426604942814e-06, "loss": 0.3589, "step": 12268 }, { "epoch": 2.52215027238154, "grad_norm": 0.22790227830410004, "learning_rate": 5.8503169186025465e-06, "loss": 0.3892, "step": 12269 }, { "epoch": 2.5223558433549185, "grad_norm": 0.23212410509586334, "learning_rate": 5.845393105432708e-06, "loss": 0.3854, "step": 12270 }, { "epoch": 2.5225614143282966, "grad_norm": 0.23569580912590027, "learning_rate": 5.8404712212273436e-06, "loss": 0.3756, "step": 12271 }, { "epoch": 2.5227669853016756, "grad_norm": 0.22882294654846191, "learning_rate": 5.835551266228932e-06, "loss": 0.3866, "step": 12272 }, { "epoch": 2.522972556275054, "grad_norm": 0.22856424748897552, "learning_rate": 5.8306332406798574e-06, "loss": 0.3792, "step": 12273 }, { "epoch": 2.5231781272484324, "grad_norm": 0.240891695022583, "learning_rate": 5.825717144822393e-06, "loss": 0.3868, "step": 12274 }, { "epoch": 2.523383698221811, "grad_norm": 0.23485086858272552, "learning_rate": 5.820802978898757e-06, "loss": 0.3834, "step": 12275 }, { "epoch": 2.5235892691951896, "grad_norm": 0.23131176829338074, "learning_rate": 5.81589074315103e-06, "loss": 0.3774, "step": 12276 }, { "epoch": 2.523794840168568, "grad_norm": 0.24229222536087036, "learning_rate": 5.810980437821223e-06, "loss": 0.4105, "step": 12277 }, { "epoch": 2.5240004111419467, "grad_norm": 0.23501570522785187, "learning_rate": 5.806072063151243e-06, "loss": 0.3863, "step": 12278 }, { "epoch": 2.5242059821153253, "grad_norm": 0.22483564913272858, "learning_rate": 5.801165619382897e-06, "loss": 0.3781, "step": 12279 }, { "epoch": 2.524411553088704, "grad_norm": 0.2277892529964447, "learning_rate": 5.7962611067579116e-06, "loss": 0.3625, "step": 12280 }, { "epoch": 2.5246171240620825, "grad_norm": 0.2191118448972702, "learning_rate": 5.791358525517887e-06, "loss": 0.3664, "step": 12281 }, { "epoch": 2.524822695035461, "grad_norm": 0.12358484417200089, "learning_rate": 5.786457875904382e-06, "loss": 0.4396, "step": 12282 }, { "epoch": 2.5250282660088397, "grad_norm": 0.23930969834327698, "learning_rate": 5.781559158158813e-06, "loss": 0.3858, "step": 12283 }, { "epoch": 2.5252338369822183, "grad_norm": 0.231824591755867, "learning_rate": 5.776662372522516e-06, "loss": 0.3838, "step": 12284 }, { "epoch": 2.525439407955597, "grad_norm": 0.1248823031783104, "learning_rate": 5.771767519236734e-06, "loss": 0.4319, "step": 12285 }, { "epoch": 2.525644978928975, "grad_norm": 0.22508475184440613, "learning_rate": 5.766874598542609e-06, "loss": 0.3756, "step": 12286 }, { "epoch": 2.525850549902354, "grad_norm": 0.22717183828353882, "learning_rate": 5.761983610681201e-06, "loss": 0.3744, "step": 12287 }, { "epoch": 2.526056120875732, "grad_norm": 0.22601410746574402, "learning_rate": 5.757094555893466e-06, "loss": 0.3717, "step": 12288 }, { "epoch": 2.5262616918491108, "grad_norm": 0.2239820659160614, "learning_rate": 5.752207434420249e-06, "loss": 0.3665, "step": 12289 }, { "epoch": 2.5264672628224893, "grad_norm": 0.23698551952838898, "learning_rate": 5.747322246502343e-06, "loss": 0.4048, "step": 12290 }, { "epoch": 2.526672833795868, "grad_norm": 0.22845512628555298, "learning_rate": 5.742438992380399e-06, "loss": 0.3882, "step": 12291 }, { "epoch": 2.5268784047692465, "grad_norm": 0.232425257563591, "learning_rate": 5.7375576722949975e-06, "loss": 0.3715, "step": 12292 }, { "epoch": 2.527083975742625, "grad_norm": 0.21982984244823456, "learning_rate": 5.732678286486614e-06, "loss": 0.3603, "step": 12293 }, { "epoch": 2.5272895467160037, "grad_norm": 0.22463706135749817, "learning_rate": 5.727800835195642e-06, "loss": 0.3708, "step": 12294 }, { "epoch": 2.5274951176893823, "grad_norm": 0.12560081481933594, "learning_rate": 5.722925318662354e-06, "loss": 0.4498, "step": 12295 }, { "epoch": 2.527700688662761, "grad_norm": 0.2263532429933548, "learning_rate": 5.718051737126963e-06, "loss": 0.3865, "step": 12296 }, { "epoch": 2.5279062596361395, "grad_norm": 0.23695407807826996, "learning_rate": 5.713180090829561e-06, "loss": 0.3791, "step": 12297 }, { "epoch": 2.528111830609518, "grad_norm": 0.12093336135149002, "learning_rate": 5.708310380010148e-06, "loss": 0.4565, "step": 12298 }, { "epoch": 2.5283174015828966, "grad_norm": 0.237099289894104, "learning_rate": 5.703442604908635e-06, "loss": 0.4034, "step": 12299 }, { "epoch": 2.5285229725562752, "grad_norm": 0.26493915915489197, "learning_rate": 5.698576765764832e-06, "loss": 0.3807, "step": 12300 }, { "epoch": 2.5287285435296534, "grad_norm": 0.2276540845632553, "learning_rate": 5.693712862818446e-06, "loss": 0.381, "step": 12301 }, { "epoch": 2.5289341145030324, "grad_norm": 0.22775860130786896, "learning_rate": 5.688850896309126e-06, "loss": 0.3737, "step": 12302 }, { "epoch": 2.5291396854764105, "grad_norm": 0.12160097062587738, "learning_rate": 5.6839908664763745e-06, "loss": 0.4252, "step": 12303 }, { "epoch": 2.529345256449789, "grad_norm": 0.24990959465503693, "learning_rate": 5.679132773559636e-06, "loss": 0.3963, "step": 12304 }, { "epoch": 2.5295508274231677, "grad_norm": 0.2322610765695572, "learning_rate": 5.674276617798239e-06, "loss": 0.3973, "step": 12305 }, { "epoch": 2.5297563983965463, "grad_norm": 0.23721322417259216, "learning_rate": 5.669422399431426e-06, "loss": 0.4063, "step": 12306 }, { "epoch": 2.529961969369925, "grad_norm": 0.23168140649795532, "learning_rate": 5.6645701186983416e-06, "loss": 0.3688, "step": 12307 }, { "epoch": 2.5301675403433035, "grad_norm": 0.22344398498535156, "learning_rate": 5.65971977583802e-06, "loss": 0.3936, "step": 12308 }, { "epoch": 2.530373111316682, "grad_norm": 0.2303028702735901, "learning_rate": 5.6548713710894444e-06, "loss": 0.3847, "step": 12309 }, { "epoch": 2.5305786822900607, "grad_norm": 0.23595616221427917, "learning_rate": 5.650024904691443e-06, "loss": 0.3789, "step": 12310 }, { "epoch": 2.5307842532634393, "grad_norm": 0.23113130033016205, "learning_rate": 5.645180376882806e-06, "loss": 0.3743, "step": 12311 }, { "epoch": 2.530989824236818, "grad_norm": 0.23540560901165009, "learning_rate": 5.640337787902188e-06, "loss": 0.3958, "step": 12312 }, { "epoch": 2.5311953952101964, "grad_norm": 0.23868121206760406, "learning_rate": 5.635497137988157e-06, "loss": 0.3984, "step": 12313 }, { "epoch": 2.531400966183575, "grad_norm": 0.23868517577648163, "learning_rate": 5.6306584273791965e-06, "loss": 0.4009, "step": 12314 }, { "epoch": 2.5316065371569536, "grad_norm": 0.1235857680439949, "learning_rate": 5.625821656313673e-06, "loss": 0.456, "step": 12315 }, { "epoch": 2.5318121081303318, "grad_norm": 0.22929808497428894, "learning_rate": 5.620986825029889e-06, "loss": 0.3708, "step": 12316 }, { "epoch": 2.532017679103711, "grad_norm": 0.22845540940761566, "learning_rate": 5.6161539337660305e-06, "loss": 0.3914, "step": 12317 }, { "epoch": 2.532223250077089, "grad_norm": 0.22550463676452637, "learning_rate": 5.611322982760191e-06, "loss": 0.3671, "step": 12318 }, { "epoch": 2.5324288210504675, "grad_norm": 0.223682701587677, "learning_rate": 5.606493972250359e-06, "loss": 0.3678, "step": 12319 }, { "epoch": 2.532634392023846, "grad_norm": 0.22872628271579742, "learning_rate": 5.601666902474447e-06, "loss": 0.3995, "step": 12320 }, { "epoch": 2.5328399629972247, "grad_norm": 0.12020973116159439, "learning_rate": 5.596841773670258e-06, "loss": 0.4526, "step": 12321 }, { "epoch": 2.5330455339706033, "grad_norm": 0.22796304523944855, "learning_rate": 5.592018586075498e-06, "loss": 0.3907, "step": 12322 }, { "epoch": 2.533251104943982, "grad_norm": 0.22996073961257935, "learning_rate": 5.5871973399278e-06, "loss": 0.3912, "step": 12323 }, { "epoch": 2.5334566759173605, "grad_norm": 0.23374302685260773, "learning_rate": 5.582378035464671e-06, "loss": 0.3796, "step": 12324 }, { "epoch": 2.533662246890739, "grad_norm": 0.12319260090589523, "learning_rate": 5.577560672923539e-06, "loss": 0.4496, "step": 12325 }, { "epoch": 2.5338678178641176, "grad_norm": 0.23780031502246857, "learning_rate": 5.572745252541736e-06, "loss": 0.3854, "step": 12326 }, { "epoch": 2.534073388837496, "grad_norm": 0.22886228561401367, "learning_rate": 5.567931774556487e-06, "loss": 0.3914, "step": 12327 }, { "epoch": 2.534278959810875, "grad_norm": 0.2357688695192337, "learning_rate": 5.563120239204937e-06, "loss": 0.3849, "step": 12328 }, { "epoch": 2.5344845307842534, "grad_norm": 0.23062871396541595, "learning_rate": 5.558310646724115e-06, "loss": 0.369, "step": 12329 }, { "epoch": 2.534690101757632, "grad_norm": 0.24323634803295135, "learning_rate": 5.553502997350989e-06, "loss": 0.3802, "step": 12330 }, { "epoch": 2.53489567273101, "grad_norm": 0.24064137041568756, "learning_rate": 5.548697291322398e-06, "loss": 0.3606, "step": 12331 }, { "epoch": 2.535101243704389, "grad_norm": 0.22627827525138855, "learning_rate": 5.543893528875087e-06, "loss": 0.3883, "step": 12332 }, { "epoch": 2.5353068146777673, "grad_norm": 0.2306792140007019, "learning_rate": 5.539091710245729e-06, "loss": 0.3751, "step": 12333 }, { "epoch": 2.535512385651146, "grad_norm": 0.12287892401218414, "learning_rate": 5.534291835670888e-06, "loss": 0.4274, "step": 12334 }, { "epoch": 2.5357179566245245, "grad_norm": 0.22518762946128845, "learning_rate": 5.529493905387025e-06, "loss": 0.3907, "step": 12335 }, { "epoch": 2.535923527597903, "grad_norm": 0.2295764982700348, "learning_rate": 5.524697919630501e-06, "loss": 0.3925, "step": 12336 }, { "epoch": 2.5361290985712817, "grad_norm": 0.23383575677871704, "learning_rate": 5.519903878637617e-06, "loss": 0.4127, "step": 12337 }, { "epoch": 2.5363346695446602, "grad_norm": 0.24502725899219513, "learning_rate": 5.515111782644535e-06, "loss": 0.3995, "step": 12338 }, { "epoch": 2.536540240518039, "grad_norm": 0.24664360284805298, "learning_rate": 5.510321631887345e-06, "loss": 0.3686, "step": 12339 }, { "epoch": 2.5367458114914174, "grad_norm": 0.12008702009916306, "learning_rate": 5.505533426602033e-06, "loss": 0.4564, "step": 12340 }, { "epoch": 2.536951382464796, "grad_norm": 0.23110847175121307, "learning_rate": 5.500747167024496e-06, "loss": 0.3741, "step": 12341 }, { "epoch": 2.5371569534381746, "grad_norm": 0.22447291016578674, "learning_rate": 5.495962853390521e-06, "loss": 0.3598, "step": 12342 }, { "epoch": 2.537362524411553, "grad_norm": 0.11924073100090027, "learning_rate": 5.491180485935813e-06, "loss": 0.4384, "step": 12343 }, { "epoch": 2.5375680953849318, "grad_norm": 0.22318169474601746, "learning_rate": 5.48640006489598e-06, "loss": 0.3797, "step": 12344 }, { "epoch": 2.5377736663583104, "grad_norm": 0.22601492702960968, "learning_rate": 5.4816215905065375e-06, "loss": 0.3914, "step": 12345 }, { "epoch": 2.5379792373316885, "grad_norm": 0.23133814334869385, "learning_rate": 5.476845063002888e-06, "loss": 0.3854, "step": 12346 }, { "epoch": 2.5381848083050675, "grad_norm": 0.12131541967391968, "learning_rate": 5.472070482620347e-06, "loss": 0.4463, "step": 12347 }, { "epoch": 2.5383903792784457, "grad_norm": 0.23147541284561157, "learning_rate": 5.467297849594143e-06, "loss": 0.3833, "step": 12348 }, { "epoch": 2.5385959502518247, "grad_norm": 0.23673370480537415, "learning_rate": 5.462527164159402e-06, "loss": 0.381, "step": 12349 }, { "epoch": 2.538801521225203, "grad_norm": 0.22934825718402863, "learning_rate": 5.457758426551136e-06, "loss": 0.3894, "step": 12350 }, { "epoch": 2.5390070921985815, "grad_norm": 0.22352683544158936, "learning_rate": 5.4529916370043065e-06, "loss": 0.3989, "step": 12351 }, { "epoch": 2.53921266317196, "grad_norm": 0.23551110923290253, "learning_rate": 5.448226795753732e-06, "loss": 0.4017, "step": 12352 }, { "epoch": 2.5394182341453386, "grad_norm": 0.23551301658153534, "learning_rate": 5.443463903034154e-06, "loss": 0.3999, "step": 12353 }, { "epoch": 2.539623805118717, "grad_norm": 0.22477497160434723, "learning_rate": 5.43870295908023e-06, "loss": 0.3857, "step": 12354 }, { "epoch": 2.539829376092096, "grad_norm": 0.222430020570755, "learning_rate": 5.433943964126501e-06, "loss": 0.3661, "step": 12355 }, { "epoch": 2.5400349470654744, "grad_norm": 0.23153965175151825, "learning_rate": 5.429186918407423e-06, "loss": 0.3748, "step": 12356 }, { "epoch": 2.540240518038853, "grad_norm": 0.12405303865671158, "learning_rate": 5.4244318221573395e-06, "loss": 0.4329, "step": 12357 }, { "epoch": 2.5404460890122316, "grad_norm": 0.22384849190711975, "learning_rate": 5.419678675610535e-06, "loss": 0.3792, "step": 12358 }, { "epoch": 2.54065165998561, "grad_norm": 0.2254662811756134, "learning_rate": 5.414927479001167e-06, "loss": 0.3913, "step": 12359 }, { "epoch": 2.5408572309589887, "grad_norm": 0.23094946146011353, "learning_rate": 5.410178232563299e-06, "loss": 0.3677, "step": 12360 }, { "epoch": 2.541062801932367, "grad_norm": 0.23447729647159576, "learning_rate": 5.405430936530908e-06, "loss": 0.3659, "step": 12361 }, { "epoch": 2.541268372905746, "grad_norm": 0.23138076066970825, "learning_rate": 5.400685591137871e-06, "loss": 0.375, "step": 12362 }, { "epoch": 2.541473943879124, "grad_norm": 0.12305799126625061, "learning_rate": 5.395942196617968e-06, "loss": 0.4492, "step": 12363 }, { "epoch": 2.541679514852503, "grad_norm": 0.1181621253490448, "learning_rate": 5.391200753204876e-06, "loss": 0.4415, "step": 12364 }, { "epoch": 2.5418850858258812, "grad_norm": 0.12282504886388779, "learning_rate": 5.386461261132198e-06, "loss": 0.4412, "step": 12365 }, { "epoch": 2.54209065679926, "grad_norm": 0.23556901514530182, "learning_rate": 5.381723720633422e-06, "loss": 0.3947, "step": 12366 }, { "epoch": 2.5422962277726384, "grad_norm": 0.1227208599448204, "learning_rate": 5.376988131941943e-06, "loss": 0.4529, "step": 12367 }, { "epoch": 2.542501798746017, "grad_norm": 0.2223055213689804, "learning_rate": 5.3722544952910625e-06, "loss": 0.3783, "step": 12368 }, { "epoch": 2.5427073697193956, "grad_norm": 0.12605048716068268, "learning_rate": 5.367522810913984e-06, "loss": 0.4487, "step": 12369 }, { "epoch": 2.542912940692774, "grad_norm": 0.12217556685209274, "learning_rate": 5.362793079043813e-06, "loss": 0.4541, "step": 12370 }, { "epoch": 2.5431185116661528, "grad_norm": 0.2289581149816513, "learning_rate": 5.358065299913551e-06, "loss": 0.3795, "step": 12371 }, { "epoch": 2.5433240826395314, "grad_norm": 0.24024073779582977, "learning_rate": 5.3533394737561425e-06, "loss": 0.3983, "step": 12372 }, { "epoch": 2.54352965361291, "grad_norm": 0.2249261736869812, "learning_rate": 5.348615600804381e-06, "loss": 0.397, "step": 12373 }, { "epoch": 2.5437352245862885, "grad_norm": 0.231714129447937, "learning_rate": 5.3438936812909965e-06, "loss": 0.3847, "step": 12374 }, { "epoch": 2.543940795559667, "grad_norm": 0.2364065796136856, "learning_rate": 5.339173715448626e-06, "loss": 0.3956, "step": 12375 }, { "epoch": 2.5441463665330453, "grad_norm": 0.23184433579444885, "learning_rate": 5.33445570350979e-06, "loss": 0.3686, "step": 12376 }, { "epoch": 2.5443519375064243, "grad_norm": 0.23327279090881348, "learning_rate": 5.3297396457069164e-06, "loss": 0.3834, "step": 12377 }, { "epoch": 2.5445575084798024, "grad_norm": 0.2422483265399933, "learning_rate": 5.3250255422723655e-06, "loss": 0.3607, "step": 12378 }, { "epoch": 2.5447630794531815, "grad_norm": 0.22602832317352295, "learning_rate": 5.320313393438361e-06, "loss": 0.3734, "step": 12379 }, { "epoch": 2.5449686504265596, "grad_norm": 0.22192765772342682, "learning_rate": 5.315603199437057e-06, "loss": 0.3825, "step": 12380 }, { "epoch": 2.545174221399938, "grad_norm": 0.2314867079257965, "learning_rate": 5.310894960500493e-06, "loss": 0.3918, "step": 12381 }, { "epoch": 2.545379792373317, "grad_norm": 0.22359047830104828, "learning_rate": 5.306188676860634e-06, "loss": 0.3916, "step": 12382 }, { "epoch": 2.5455853633466954, "grad_norm": 0.23620890080928802, "learning_rate": 5.301484348749329e-06, "loss": 0.4001, "step": 12383 }, { "epoch": 2.545790934320074, "grad_norm": 0.23437555134296417, "learning_rate": 5.296781976398327e-06, "loss": 0.3721, "step": 12384 }, { "epoch": 2.5459965052934526, "grad_norm": 0.22928333282470703, "learning_rate": 5.292081560039319e-06, "loss": 0.3894, "step": 12385 }, { "epoch": 2.546202076266831, "grad_norm": 0.23154671490192413, "learning_rate": 5.287383099903855e-06, "loss": 0.3979, "step": 12386 }, { "epoch": 2.5464076472402097, "grad_norm": 0.23585215210914612, "learning_rate": 5.282686596223412e-06, "loss": 0.3604, "step": 12387 }, { "epoch": 2.5466132182135883, "grad_norm": 0.23773467540740967, "learning_rate": 5.277992049229358e-06, "loss": 0.3868, "step": 12388 }, { "epoch": 2.546818789186967, "grad_norm": 0.22915463149547577, "learning_rate": 5.273299459152977e-06, "loss": 0.371, "step": 12389 }, { "epoch": 2.5470243601603455, "grad_norm": 0.2335277944803238, "learning_rate": 5.268608826225454e-06, "loss": 0.3819, "step": 12390 }, { "epoch": 2.5472299311337236, "grad_norm": 0.24060097336769104, "learning_rate": 5.263920150677854e-06, "loss": 0.4, "step": 12391 }, { "epoch": 2.5474355021071027, "grad_norm": 0.2347687929868698, "learning_rate": 5.259233432741198e-06, "loss": 0.4035, "step": 12392 }, { "epoch": 2.547641073080481, "grad_norm": 0.23900634050369263, "learning_rate": 5.25454867264636e-06, "loss": 0.3856, "step": 12393 }, { "epoch": 2.54784664405386, "grad_norm": 0.22846248745918274, "learning_rate": 5.249865870624136e-06, "loss": 0.391, "step": 12394 }, { "epoch": 2.548052215027238, "grad_norm": 0.2248847782611847, "learning_rate": 5.2451850269052214e-06, "loss": 0.3927, "step": 12395 }, { "epoch": 2.5482577860006166, "grad_norm": 0.1248060017824173, "learning_rate": 5.2405061417202366e-06, "loss": 0.4394, "step": 12396 }, { "epoch": 2.548463356973995, "grad_norm": 0.22413742542266846, "learning_rate": 5.235829215299683e-06, "loss": 0.3923, "step": 12397 }, { "epoch": 2.5486689279473738, "grad_norm": 0.12117671221494675, "learning_rate": 5.2311542478739505e-06, "loss": 0.4538, "step": 12398 }, { "epoch": 2.5488744989207524, "grad_norm": 0.22767889499664307, "learning_rate": 5.226481239673385e-06, "loss": 0.3679, "step": 12399 }, { "epoch": 2.549080069894131, "grad_norm": 0.23341724276542664, "learning_rate": 5.221810190928183e-06, "loss": 0.3912, "step": 12400 }, { "epoch": 2.5492856408675095, "grad_norm": 0.2280956357717514, "learning_rate": 5.21714110186847e-06, "loss": 0.3871, "step": 12401 }, { "epoch": 2.549491211840888, "grad_norm": 0.22712482511997223, "learning_rate": 5.212473972724271e-06, "loss": 0.3725, "step": 12402 }, { "epoch": 2.5496967828142667, "grad_norm": 0.22607560455799103, "learning_rate": 5.207808803725519e-06, "loss": 0.3927, "step": 12403 }, { "epoch": 2.5499023537876453, "grad_norm": 0.26311928033828735, "learning_rate": 5.203145595102033e-06, "loss": 0.4036, "step": 12404 }, { "epoch": 2.550107924761024, "grad_norm": 0.12306389212608337, "learning_rate": 5.198484347083541e-06, "loss": 0.4641, "step": 12405 }, { "epoch": 2.550313495734402, "grad_norm": 0.12494704872369766, "learning_rate": 5.193825059899709e-06, "loss": 0.4593, "step": 12406 }, { "epoch": 2.550519066707781, "grad_norm": 0.2371709644794464, "learning_rate": 5.189167733780062e-06, "loss": 0.4007, "step": 12407 }, { "epoch": 2.550724637681159, "grad_norm": 0.22765342891216278, "learning_rate": 5.184512368954043e-06, "loss": 0.3812, "step": 12408 }, { "epoch": 2.5509302086545382, "grad_norm": 0.12098430842161179, "learning_rate": 5.1798589656510035e-06, "loss": 0.4594, "step": 12409 }, { "epoch": 2.5511357796279164, "grad_norm": 0.2373332679271698, "learning_rate": 5.1752075241001945e-06, "loss": 0.3945, "step": 12410 }, { "epoch": 2.551341350601295, "grad_norm": 0.12555475533008575, "learning_rate": 5.170558044530767e-06, "loss": 0.4456, "step": 12411 }, { "epoch": 2.5515469215746736, "grad_norm": 0.22965727746486664, "learning_rate": 5.16591052717178e-06, "loss": 0.3749, "step": 12412 }, { "epoch": 2.551752492548052, "grad_norm": 0.23548352718353271, "learning_rate": 5.161264972252198e-06, "loss": 0.3978, "step": 12413 }, { "epoch": 2.5519580635214307, "grad_norm": 0.22701576352119446, "learning_rate": 5.156621380000889e-06, "loss": 0.3722, "step": 12414 }, { "epoch": 2.5521636344948093, "grad_norm": 0.24250133335590363, "learning_rate": 5.15197975064662e-06, "loss": 0.3762, "step": 12415 }, { "epoch": 2.552369205468188, "grad_norm": 0.235441654920578, "learning_rate": 5.147340084418053e-06, "loss": 0.3904, "step": 12416 }, { "epoch": 2.5525747764415665, "grad_norm": 0.2249901443719864, "learning_rate": 5.1427023815437655e-06, "loss": 0.3812, "step": 12417 }, { "epoch": 2.552780347414945, "grad_norm": 0.11771126836538315, "learning_rate": 5.138066642252249e-06, "loss": 0.4564, "step": 12418 }, { "epoch": 2.5529859183883237, "grad_norm": 0.22527842223644257, "learning_rate": 5.133432866771862e-06, "loss": 0.3784, "step": 12419 }, { "epoch": 2.5531914893617023, "grad_norm": 0.22456350922584534, "learning_rate": 5.1288010553309096e-06, "loss": 0.367, "step": 12420 }, { "epoch": 2.553397060335081, "grad_norm": 0.24883276224136353, "learning_rate": 5.124171208157577e-06, "loss": 0.4066, "step": 12421 }, { "epoch": 2.5536026313084594, "grad_norm": 0.22530537843704224, "learning_rate": 5.119543325479944e-06, "loss": 0.3663, "step": 12422 }, { "epoch": 2.5538082022818376, "grad_norm": 0.23030851781368256, "learning_rate": 5.114917407526017e-06, "loss": 0.3692, "step": 12423 }, { "epoch": 2.5540137732552166, "grad_norm": 0.22793278098106384, "learning_rate": 5.110293454523685e-06, "loss": 0.3891, "step": 12424 }, { "epoch": 2.5542193442285948, "grad_norm": 0.22494344413280487, "learning_rate": 5.1056714667007475e-06, "loss": 0.3759, "step": 12425 }, { "epoch": 2.5544249152019733, "grad_norm": 0.2244558185338974, "learning_rate": 5.101051444284902e-06, "loss": 0.3901, "step": 12426 }, { "epoch": 2.554630486175352, "grad_norm": 0.22287413477897644, "learning_rate": 5.096433387503776e-06, "loss": 0.3852, "step": 12427 }, { "epoch": 2.5548360571487305, "grad_norm": 0.22315384447574615, "learning_rate": 5.091817296584869e-06, "loss": 0.3859, "step": 12428 }, { "epoch": 2.555041628122109, "grad_norm": 0.2284417599439621, "learning_rate": 5.087203171755592e-06, "loss": 0.3805, "step": 12429 }, { "epoch": 2.5552471990954877, "grad_norm": 0.22787770628929138, "learning_rate": 5.08259101324326e-06, "loss": 0.3772, "step": 12430 }, { "epoch": 2.5554527700688663, "grad_norm": 0.11939222365617752, "learning_rate": 5.0779808212751e-06, "loss": 0.4522, "step": 12431 }, { "epoch": 2.555658341042245, "grad_norm": 0.22622445225715637, "learning_rate": 5.0733725960782266e-06, "loss": 0.3636, "step": 12432 }, { "epoch": 2.5558639120156235, "grad_norm": 0.23444552719593048, "learning_rate": 5.068766337879662e-06, "loss": 0.3865, "step": 12433 }, { "epoch": 2.556069482989002, "grad_norm": 0.1237218827009201, "learning_rate": 5.064162046906351e-06, "loss": 0.4495, "step": 12434 }, { "epoch": 2.5562750539623806, "grad_norm": 0.2306404858827591, "learning_rate": 5.059559723385115e-06, "loss": 0.3957, "step": 12435 }, { "epoch": 2.5564806249357592, "grad_norm": 0.2336534857749939, "learning_rate": 5.054959367542689e-06, "loss": 0.3902, "step": 12436 }, { "epoch": 2.556686195909138, "grad_norm": 0.23504294455051422, "learning_rate": 5.0503609796057175e-06, "loss": 0.3697, "step": 12437 }, { "epoch": 2.556891766882516, "grad_norm": 0.23617815971374512, "learning_rate": 5.045764559800722e-06, "loss": 0.3986, "step": 12438 }, { "epoch": 2.557097337855895, "grad_norm": 0.2346443086862564, "learning_rate": 5.041170108354174e-06, "loss": 0.3879, "step": 12439 }, { "epoch": 2.557302908829273, "grad_norm": 0.2332235723733902, "learning_rate": 5.0365776254924055e-06, "loss": 0.3873, "step": 12440 }, { "epoch": 2.5575084798026517, "grad_norm": 0.2314586490392685, "learning_rate": 5.031987111441657e-06, "loss": 0.3749, "step": 12441 }, { "epoch": 2.5577140507760303, "grad_norm": 0.22944357991218567, "learning_rate": 5.027398566428106e-06, "loss": 0.3968, "step": 12442 }, { "epoch": 2.557919621749409, "grad_norm": 0.23463036119937897, "learning_rate": 5.0228119906777975e-06, "loss": 0.3848, "step": 12443 }, { "epoch": 2.5581251927227875, "grad_norm": 0.12207529693841934, "learning_rate": 5.018227384416686e-06, "loss": 0.4292, "step": 12444 }, { "epoch": 2.558330763696166, "grad_norm": 0.12470246851444244, "learning_rate": 5.013644747870641e-06, "loss": 0.4441, "step": 12445 }, { "epoch": 2.5585363346695447, "grad_norm": 0.2320421189069748, "learning_rate": 5.009064081265421e-06, "loss": 0.3746, "step": 12446 }, { "epoch": 2.5587419056429233, "grad_norm": 0.22109293937683105, "learning_rate": 5.004485384826685e-06, "loss": 0.3845, "step": 12447 }, { "epoch": 2.558947476616302, "grad_norm": 0.2225075662136078, "learning_rate": 4.999908658780025e-06, "loss": 0.3755, "step": 12448 }, { "epoch": 2.5591530475896804, "grad_norm": 0.12030386924743652, "learning_rate": 4.995333903350908e-06, "loss": 0.4508, "step": 12449 }, { "epoch": 2.559358618563059, "grad_norm": 0.2298811674118042, "learning_rate": 4.990761118764711e-06, "loss": 0.384, "step": 12450 }, { "epoch": 2.5595641895364376, "grad_norm": 0.23811288177967072, "learning_rate": 4.9861903052467065e-06, "loss": 0.3781, "step": 12451 }, { "epoch": 2.559769760509816, "grad_norm": 0.22522372007369995, "learning_rate": 4.981621463022082e-06, "loss": 0.3895, "step": 12452 }, { "epoch": 2.5599753314831943, "grad_norm": 0.2294057160615921, "learning_rate": 4.9770545923159244e-06, "loss": 0.3782, "step": 12453 }, { "epoch": 2.5601809024565734, "grad_norm": 0.225949227809906, "learning_rate": 4.972489693353206e-06, "loss": 0.3833, "step": 12454 }, { "epoch": 2.5603864734299515, "grad_norm": 0.22200778126716614, "learning_rate": 4.967926766358847e-06, "loss": 0.3662, "step": 12455 }, { "epoch": 2.56059204440333, "grad_norm": 0.25845810770988464, "learning_rate": 4.963365811557625e-06, "loss": 0.3953, "step": 12456 }, { "epoch": 2.5607976153767087, "grad_norm": 0.24813143908977509, "learning_rate": 4.958806829174239e-06, "loss": 0.3734, "step": 12457 }, { "epoch": 2.5610031863500873, "grad_norm": 0.2332116812467575, "learning_rate": 4.954249819433291e-06, "loss": 0.4004, "step": 12458 }, { "epoch": 2.561208757323466, "grad_norm": 0.3196330666542053, "learning_rate": 4.949694782559268e-06, "loss": 0.3785, "step": 12459 }, { "epoch": 2.5614143282968445, "grad_norm": 0.1242533028125763, "learning_rate": 4.945141718776601e-06, "loss": 0.4463, "step": 12460 }, { "epoch": 2.561619899270223, "grad_norm": 0.12104514986276627, "learning_rate": 4.94059062830958e-06, "loss": 0.4533, "step": 12461 }, { "epoch": 2.5618254702436016, "grad_norm": 0.23640932142734528, "learning_rate": 4.9360415113824195e-06, "loss": 0.3747, "step": 12462 }, { "epoch": 2.56203104121698, "grad_norm": 0.13072489202022552, "learning_rate": 4.931494368219237e-06, "loss": 0.44, "step": 12463 }, { "epoch": 2.562236612190359, "grad_norm": 0.22700245678424835, "learning_rate": 4.926949199044052e-06, "loss": 0.3968, "step": 12464 }, { "epoch": 2.5624421831637374, "grad_norm": 0.23678947985172272, "learning_rate": 4.922406004080776e-06, "loss": 0.3869, "step": 12465 }, { "epoch": 2.562647754137116, "grad_norm": 0.22681771218776703, "learning_rate": 4.91786478355324e-06, "loss": 0.3858, "step": 12466 }, { "epoch": 2.5628533251104946, "grad_norm": 0.12208957225084305, "learning_rate": 4.91332553768515e-06, "loss": 0.4612, "step": 12467 }, { "epoch": 2.5630588960838727, "grad_norm": 0.22747553884983063, "learning_rate": 4.908788266700153e-06, "loss": 0.3569, "step": 12468 }, { "epoch": 2.5632644670572517, "grad_norm": 0.23218391835689545, "learning_rate": 4.904252970821774e-06, "loss": 0.3702, "step": 12469 }, { "epoch": 2.56347003803063, "grad_norm": 0.23117561638355255, "learning_rate": 4.899719650273443e-06, "loss": 0.3971, "step": 12470 }, { "epoch": 2.5636756090040085, "grad_norm": 0.22626996040344238, "learning_rate": 4.895188305278499e-06, "loss": 0.3674, "step": 12471 }, { "epoch": 2.563881179977387, "grad_norm": 0.23139001429080963, "learning_rate": 4.890658936060177e-06, "loss": 0.3867, "step": 12472 }, { "epoch": 2.5640867509507657, "grad_norm": 0.12001971155405045, "learning_rate": 4.8861315428416195e-06, "loss": 0.4376, "step": 12473 }, { "epoch": 2.5642923219241442, "grad_norm": 0.22510318458080292, "learning_rate": 4.8816061258458565e-06, "loss": 0.3841, "step": 12474 }, { "epoch": 2.564497892897523, "grad_norm": 0.22726254165172577, "learning_rate": 4.877082685295861e-06, "loss": 0.364, "step": 12475 }, { "epoch": 2.5647034638709014, "grad_norm": 0.2362823784351349, "learning_rate": 4.872561221414465e-06, "loss": 0.376, "step": 12476 }, { "epoch": 2.56490903484428, "grad_norm": 0.2281261384487152, "learning_rate": 4.868041734424418e-06, "loss": 0.3786, "step": 12477 }, { "epoch": 2.5651146058176586, "grad_norm": 0.22835490107536316, "learning_rate": 4.863524224548385e-06, "loss": 0.3793, "step": 12478 }, { "epoch": 2.565320176791037, "grad_norm": 0.22907859086990356, "learning_rate": 4.859008692008911e-06, "loss": 0.3848, "step": 12479 }, { "epoch": 2.5655257477644158, "grad_norm": 0.12358912080526352, "learning_rate": 4.854495137028458e-06, "loss": 0.4492, "step": 12480 }, { "epoch": 2.5657313187377944, "grad_norm": 0.23854859173297882, "learning_rate": 4.849983559829394e-06, "loss": 0.3861, "step": 12481 }, { "epoch": 2.565936889711173, "grad_norm": 0.23517528176307678, "learning_rate": 4.845473960633981e-06, "loss": 0.3655, "step": 12482 }, { "epoch": 2.566142460684551, "grad_norm": 0.11823319643735886, "learning_rate": 4.840966339664371e-06, "loss": 0.4302, "step": 12483 }, { "epoch": 2.56634803165793, "grad_norm": 0.22691769897937775, "learning_rate": 4.836460697142662e-06, "loss": 0.3748, "step": 12484 }, { "epoch": 2.5665536026313083, "grad_norm": 0.23544248938560486, "learning_rate": 4.831957033290806e-06, "loss": 0.3853, "step": 12485 }, { "epoch": 2.566759173604687, "grad_norm": 0.22319963574409485, "learning_rate": 4.827455348330684e-06, "loss": 0.389, "step": 12486 }, { "epoch": 2.5669647445780654, "grad_norm": 0.22622336447238922, "learning_rate": 4.822955642484072e-06, "loss": 0.376, "step": 12487 }, { "epoch": 2.567170315551444, "grad_norm": 0.22860629856586456, "learning_rate": 4.818457915972635e-06, "loss": 0.3648, "step": 12488 }, { "epoch": 2.5673758865248226, "grad_norm": 0.12275035679340363, "learning_rate": 4.813962169017981e-06, "loss": 0.441, "step": 12489 }, { "epoch": 2.567581457498201, "grad_norm": 0.2211294323205948, "learning_rate": 4.809468401841578e-06, "loss": 0.3881, "step": 12490 }, { "epoch": 2.56778702847158, "grad_norm": 0.2277289628982544, "learning_rate": 4.804976614664821e-06, "loss": 0.3607, "step": 12491 }, { "epoch": 2.5679925994449584, "grad_norm": 0.23206478357315063, "learning_rate": 4.800486807708995e-06, "loss": 0.3881, "step": 12492 }, { "epoch": 2.568198170418337, "grad_norm": 0.24027037620544434, "learning_rate": 4.795998981195294e-06, "loss": 0.3896, "step": 12493 }, { "epoch": 2.5684037413917156, "grad_norm": 0.24075712263584137, "learning_rate": 4.791513135344807e-06, "loss": 0.3876, "step": 12494 }, { "epoch": 2.568609312365094, "grad_norm": 0.23393917083740234, "learning_rate": 4.787029270378522e-06, "loss": 0.3844, "step": 12495 }, { "epoch": 2.5688148833384727, "grad_norm": 0.22370143234729767, "learning_rate": 4.782547386517362e-06, "loss": 0.3913, "step": 12496 }, { "epoch": 2.5690204543118513, "grad_norm": 0.23047272861003876, "learning_rate": 4.778067483982119e-06, "loss": 0.3883, "step": 12497 }, { "epoch": 2.5692260252852295, "grad_norm": 0.12189171463251114, "learning_rate": 4.773589562993489e-06, "loss": 0.4429, "step": 12498 }, { "epoch": 2.5694315962586085, "grad_norm": 0.23024466633796692, "learning_rate": 4.769113623772089e-06, "loss": 0.3858, "step": 12499 }, { "epoch": 2.5696371672319867, "grad_norm": 0.12103652209043503, "learning_rate": 4.764639666538418e-06, "loss": 0.4603, "step": 12500 }, { "epoch": 2.5698427382053652, "grad_norm": 0.23692312836647034, "learning_rate": 4.76016769151289e-06, "loss": 0.3932, "step": 12501 }, { "epoch": 2.570048309178744, "grad_norm": 0.22613804042339325, "learning_rate": 4.755697698915813e-06, "loss": 0.3724, "step": 12502 }, { "epoch": 2.5702538801521224, "grad_norm": 0.2332460582256317, "learning_rate": 4.7512296889674205e-06, "loss": 0.3811, "step": 12503 }, { "epoch": 2.570459451125501, "grad_norm": 0.2254786342382431, "learning_rate": 4.746763661887813e-06, "loss": 0.3876, "step": 12504 }, { "epoch": 2.5706650220988796, "grad_norm": 0.2281585931777954, "learning_rate": 4.742299617897014e-06, "loss": 0.3865, "step": 12505 }, { "epoch": 2.570870593072258, "grad_norm": 0.23506386578083038, "learning_rate": 4.737837557214951e-06, "loss": 0.3798, "step": 12506 }, { "epoch": 2.5710761640456368, "grad_norm": 0.23268993198871613, "learning_rate": 4.7333774800614505e-06, "loss": 0.3984, "step": 12507 }, { "epoch": 2.5712817350190154, "grad_norm": 0.23153680562973022, "learning_rate": 4.728919386656236e-06, "loss": 0.386, "step": 12508 }, { "epoch": 2.571487305992394, "grad_norm": 0.1192520409822464, "learning_rate": 4.72446327721893e-06, "loss": 0.4267, "step": 12509 }, { "epoch": 2.5716928769657725, "grad_norm": 0.23475117981433868, "learning_rate": 4.720009151969075e-06, "loss": 0.3883, "step": 12510 }, { "epoch": 2.571898447939151, "grad_norm": 0.24187681078910828, "learning_rate": 4.715557011126102e-06, "loss": 0.3814, "step": 12511 }, { "epoch": 2.5721040189125297, "grad_norm": 0.23029695451259613, "learning_rate": 4.7111068549093485e-06, "loss": 0.3786, "step": 12512 }, { "epoch": 2.572309589885908, "grad_norm": 0.1344357579946518, "learning_rate": 4.7066586835380475e-06, "loss": 0.4468, "step": 12513 }, { "epoch": 2.572515160859287, "grad_norm": 0.2278510481119156, "learning_rate": 4.7022124972313446e-06, "loss": 0.3777, "step": 12514 }, { "epoch": 2.572720731832665, "grad_norm": 0.23537226021289825, "learning_rate": 4.697768296208279e-06, "loss": 0.3934, "step": 12515 }, { "epoch": 2.572926302806044, "grad_norm": 0.2430686354637146, "learning_rate": 4.693326080687791e-06, "loss": 0.4047, "step": 12516 }, { "epoch": 2.573131873779422, "grad_norm": 0.2354026734828949, "learning_rate": 4.688885850888745e-06, "loss": 0.3855, "step": 12517 }, { "epoch": 2.573337444752801, "grad_norm": 0.22142033278942108, "learning_rate": 4.6844476070298715e-06, "loss": 0.4079, "step": 12518 }, { "epoch": 2.5735430157261794, "grad_norm": 0.23135825991630554, "learning_rate": 4.680011349329835e-06, "loss": 0.3854, "step": 12519 }, { "epoch": 2.573748586699558, "grad_norm": 0.23446208238601685, "learning_rate": 4.675577078007187e-06, "loss": 0.3963, "step": 12520 }, { "epoch": 2.5739541576729366, "grad_norm": 0.11839111894369125, "learning_rate": 4.671144793280376e-06, "loss": 0.4355, "step": 12521 }, { "epoch": 2.574159728646315, "grad_norm": 0.23686189949512482, "learning_rate": 4.666714495367763e-06, "loss": 0.3901, "step": 12522 }, { "epoch": 2.5743652996196937, "grad_norm": 0.11666683852672577, "learning_rate": 4.662286184487604e-06, "loss": 0.4504, "step": 12523 }, { "epoch": 2.5745708705930723, "grad_norm": 0.23141448199748993, "learning_rate": 4.6578598608580744e-06, "loss": 0.3776, "step": 12524 }, { "epoch": 2.574776441566451, "grad_norm": 0.23291108012199402, "learning_rate": 4.653435524697234e-06, "loss": 0.3911, "step": 12525 }, { "epoch": 2.5749820125398295, "grad_norm": 0.2317928522825241, "learning_rate": 4.649013176223034e-06, "loss": 0.3803, "step": 12526 }, { "epoch": 2.575187583513208, "grad_norm": 0.23690040409564972, "learning_rate": 4.644592815653365e-06, "loss": 0.3758, "step": 12527 }, { "epoch": 2.5753931544865862, "grad_norm": 0.22948139905929565, "learning_rate": 4.640174443205982e-06, "loss": 0.3874, "step": 12528 }, { "epoch": 2.5755987254599653, "grad_norm": 0.2342422902584076, "learning_rate": 4.635758059098568e-06, "loss": 0.3791, "step": 12529 }, { "epoch": 2.5758042964333434, "grad_norm": 0.2423672080039978, "learning_rate": 4.6313436635486865e-06, "loss": 0.3912, "step": 12530 }, { "epoch": 2.5760098674067224, "grad_norm": 0.23133836686611176, "learning_rate": 4.626931256773821e-06, "loss": 0.3838, "step": 12531 }, { "epoch": 2.5762154383801006, "grad_norm": 0.22452816367149353, "learning_rate": 4.622520838991355e-06, "loss": 0.393, "step": 12532 }, { "epoch": 2.576421009353479, "grad_norm": 0.23489362001419067, "learning_rate": 4.618112410418561e-06, "loss": 0.3839, "step": 12533 }, { "epoch": 2.5766265803268578, "grad_norm": 0.22381377220153809, "learning_rate": 4.613705971272626e-06, "loss": 0.3874, "step": 12534 }, { "epoch": 2.5768321513002364, "grad_norm": 0.22615040838718414, "learning_rate": 4.6093015217706305e-06, "loss": 0.3871, "step": 12535 }, { "epoch": 2.577037722273615, "grad_norm": 0.12121882289648056, "learning_rate": 4.604899062129556e-06, "loss": 0.4319, "step": 12536 }, { "epoch": 2.5772432932469935, "grad_norm": 0.21779850125312805, "learning_rate": 4.600498592566309e-06, "loss": 0.3738, "step": 12537 }, { "epoch": 2.577448864220372, "grad_norm": 0.12148154526948929, "learning_rate": 4.596100113297666e-06, "loss": 0.4412, "step": 12538 }, { "epoch": 2.5776544351937507, "grad_norm": 0.24054840207099915, "learning_rate": 4.591703624540323e-06, "loss": 0.3983, "step": 12539 }, { "epoch": 2.5778600061671293, "grad_norm": 0.23665878176689148, "learning_rate": 4.587309126510879e-06, "loss": 0.391, "step": 12540 }, { "epoch": 2.578065577140508, "grad_norm": 0.23454469442367554, "learning_rate": 4.582916619425823e-06, "loss": 0.38, "step": 12541 }, { "epoch": 2.5782711481138865, "grad_norm": 0.22932063043117523, "learning_rate": 4.578526103501554e-06, "loss": 0.386, "step": 12542 }, { "epoch": 2.5784767190872646, "grad_norm": 0.2300751805305481, "learning_rate": 4.574137578954369e-06, "loss": 0.3736, "step": 12543 }, { "epoch": 2.5786822900606436, "grad_norm": 0.23985406756401062, "learning_rate": 4.569751046000483e-06, "loss": 0.4049, "step": 12544 }, { "epoch": 2.578887861034022, "grad_norm": 0.2458028644323349, "learning_rate": 4.5653665048559895e-06, "loss": 0.3769, "step": 12545 }, { "epoch": 2.579093432007401, "grad_norm": 0.23812150955200195, "learning_rate": 4.560983955736901e-06, "loss": 0.3921, "step": 12546 }, { "epoch": 2.579299002980779, "grad_norm": 0.23339690268039703, "learning_rate": 4.5566033988591146e-06, "loss": 0.3839, "step": 12547 }, { "epoch": 2.5795045739541576, "grad_norm": 0.22212813794612885, "learning_rate": 4.5522248344384525e-06, "loss": 0.3801, "step": 12548 }, { "epoch": 2.579710144927536, "grad_norm": 0.23296941816806793, "learning_rate": 4.547848262690621e-06, "loss": 0.3995, "step": 12549 }, { "epoch": 2.5799157159009147, "grad_norm": 0.23013028502464294, "learning_rate": 4.543473683831221e-06, "loss": 0.3542, "step": 12550 }, { "epoch": 2.5801212868742933, "grad_norm": 0.23488110303878784, "learning_rate": 4.539101098075791e-06, "loss": 0.3884, "step": 12551 }, { "epoch": 2.580326857847672, "grad_norm": 0.2313051074743271, "learning_rate": 4.534730505639736e-06, "loss": 0.3894, "step": 12552 }, { "epoch": 2.5805324288210505, "grad_norm": 0.2325943112373352, "learning_rate": 4.5303619067383785e-06, "loss": 0.38, "step": 12553 }, { "epoch": 2.580737999794429, "grad_norm": 0.22922460734844208, "learning_rate": 4.525995301586931e-06, "loss": 0.4037, "step": 12554 }, { "epoch": 2.5809435707678077, "grad_norm": 0.12386429309844971, "learning_rate": 4.521630690400517e-06, "loss": 0.4532, "step": 12555 }, { "epoch": 2.5811491417411863, "grad_norm": 0.223622664809227, "learning_rate": 4.517268073394169e-06, "loss": 0.3716, "step": 12556 }, { "epoch": 2.581354712714565, "grad_norm": 0.23587054014205933, "learning_rate": 4.512907450782795e-06, "loss": 0.3963, "step": 12557 }, { "epoch": 2.581560283687943, "grad_norm": 0.23143510520458221, "learning_rate": 4.508548822781248e-06, "loss": 0.3691, "step": 12558 }, { "epoch": 2.581765854661322, "grad_norm": 0.227389857172966, "learning_rate": 4.504192189604236e-06, "loss": 0.3778, "step": 12559 }, { "epoch": 2.5819714256347, "grad_norm": 0.2255561500787735, "learning_rate": 4.499837551466404e-06, "loss": 0.3913, "step": 12560 }, { "epoch": 2.582176996608079, "grad_norm": 0.2301749736070633, "learning_rate": 4.4954849085822795e-06, "loss": 0.3736, "step": 12561 }, { "epoch": 2.5823825675814573, "grad_norm": 0.23604105412960052, "learning_rate": 4.491134261166295e-06, "loss": 0.3876, "step": 12562 }, { "epoch": 2.582588138554836, "grad_norm": 0.24186021089553833, "learning_rate": 4.4867856094327845e-06, "loss": 0.3961, "step": 12563 }, { "epoch": 2.5827937095282145, "grad_norm": 0.23529918491840363, "learning_rate": 4.482438953595982e-06, "loss": 0.3746, "step": 12564 }, { "epoch": 2.582999280501593, "grad_norm": 0.2367369532585144, "learning_rate": 4.4780942938700425e-06, "loss": 0.397, "step": 12565 }, { "epoch": 2.5832048514749717, "grad_norm": 0.23594695329666138, "learning_rate": 4.473751630468997e-06, "loss": 0.395, "step": 12566 }, { "epoch": 2.5834104224483503, "grad_norm": 0.221700519323349, "learning_rate": 4.469410963606791e-06, "loss": 0.3851, "step": 12567 }, { "epoch": 2.583615993421729, "grad_norm": 0.12304549664258957, "learning_rate": 4.465072293497258e-06, "loss": 0.4312, "step": 12568 }, { "epoch": 2.5838215643951075, "grad_norm": 0.22101683914661407, "learning_rate": 4.460735620354163e-06, "loss": 0.3857, "step": 12569 }, { "epoch": 2.584027135368486, "grad_norm": 0.11826925724744797, "learning_rate": 4.456400944391144e-06, "loss": 0.4562, "step": 12570 }, { "epoch": 2.5842327063418646, "grad_norm": 0.11855246126651764, "learning_rate": 4.45206826582174e-06, "loss": 0.4436, "step": 12571 }, { "epoch": 2.5844382773152432, "grad_norm": 0.22833800315856934, "learning_rate": 4.447737584859421e-06, "loss": 0.38, "step": 12572 }, { "epoch": 2.5846438482886214, "grad_norm": 0.12766826152801514, "learning_rate": 4.443408901717526e-06, "loss": 0.4538, "step": 12573 }, { "epoch": 2.5848494192620004, "grad_norm": 0.2362251728773117, "learning_rate": 4.43908221660932e-06, "loss": 0.3791, "step": 12574 }, { "epoch": 2.5850549902353785, "grad_norm": 0.2291366159915924, "learning_rate": 4.434757529747952e-06, "loss": 0.3797, "step": 12575 }, { "epoch": 2.5852605612087576, "grad_norm": 0.11885092407464981, "learning_rate": 4.430434841346476e-06, "loss": 0.4403, "step": 12576 }, { "epoch": 2.5854661321821357, "grad_norm": 0.2298079878091812, "learning_rate": 4.426114151617852e-06, "loss": 0.3733, "step": 12577 }, { "epoch": 2.5856717031555143, "grad_norm": 0.2344081848859787, "learning_rate": 4.421795460774936e-06, "loss": 0.3807, "step": 12578 }, { "epoch": 2.585877274128893, "grad_norm": 0.23046311736106873, "learning_rate": 4.417478769030506e-06, "loss": 0.3999, "step": 12579 }, { "epoch": 2.5860828451022715, "grad_norm": 0.22831617295742035, "learning_rate": 4.4131640765972125e-06, "loss": 0.367, "step": 12580 }, { "epoch": 2.58628841607565, "grad_norm": 0.2355516254901886, "learning_rate": 4.408851383687621e-06, "loss": 0.3845, "step": 12581 }, { "epoch": 2.5864939870490287, "grad_norm": 0.23236438632011414, "learning_rate": 4.4045406905142014e-06, "loss": 0.399, "step": 12582 }, { "epoch": 2.5866995580224073, "grad_norm": 0.2408120036125183, "learning_rate": 4.400231997289323e-06, "loss": 0.3817, "step": 12583 }, { "epoch": 2.586905128995786, "grad_norm": 0.11931653320789337, "learning_rate": 4.395925304225247e-06, "loss": 0.4378, "step": 12584 }, { "epoch": 2.5871106999691644, "grad_norm": 0.2242845743894577, "learning_rate": 4.391620611534138e-06, "loss": 0.3632, "step": 12585 }, { "epoch": 2.587316270942543, "grad_norm": 0.2249579280614853, "learning_rate": 4.387317919428092e-06, "loss": 0.3774, "step": 12586 }, { "epoch": 2.5875218419159216, "grad_norm": 0.245198056101799, "learning_rate": 4.383017228119064e-06, "loss": 0.3825, "step": 12587 }, { "epoch": 2.5877274128893, "grad_norm": 0.23710772395133972, "learning_rate": 4.378718537818934e-06, "loss": 0.3911, "step": 12588 }, { "epoch": 2.587932983862679, "grad_norm": 0.22874656319618225, "learning_rate": 4.374421848739483e-06, "loss": 0.3732, "step": 12589 }, { "epoch": 2.588138554836057, "grad_norm": 0.2265346497297287, "learning_rate": 4.370127161092373e-06, "loss": 0.367, "step": 12590 }, { "epoch": 2.588344125809436, "grad_norm": 0.22639183700084686, "learning_rate": 4.365834475089203e-06, "loss": 0.3966, "step": 12591 }, { "epoch": 2.588549696782814, "grad_norm": 0.2292342483997345, "learning_rate": 4.361543790941434e-06, "loss": 0.3785, "step": 12592 }, { "epoch": 2.5887552677561927, "grad_norm": 0.23128138482570648, "learning_rate": 4.357255108860468e-06, "loss": 0.3829, "step": 12593 }, { "epoch": 2.5889608387295713, "grad_norm": 0.2288789004087448, "learning_rate": 4.35296842905758e-06, "loss": 0.3774, "step": 12594 }, { "epoch": 2.58916640970295, "grad_norm": 0.22914868593215942, "learning_rate": 4.348683751743952e-06, "loss": 0.3669, "step": 12595 }, { "epoch": 2.5893719806763285, "grad_norm": 0.12475783377885818, "learning_rate": 4.344401077130674e-06, "loss": 0.466, "step": 12596 }, { "epoch": 2.589577551649707, "grad_norm": 0.22877533733844757, "learning_rate": 4.340120405428733e-06, "loss": 0.386, "step": 12597 }, { "epoch": 2.5897831226230856, "grad_norm": 0.2277032434940338, "learning_rate": 4.335841736849015e-06, "loss": 0.3892, "step": 12598 }, { "epoch": 2.589988693596464, "grad_norm": 0.22553406655788422, "learning_rate": 4.331565071602301e-06, "loss": 0.3725, "step": 12599 }, { "epoch": 2.590194264569843, "grad_norm": 0.23949706554412842, "learning_rate": 4.327290409899299e-06, "loss": 0.3877, "step": 12600 }, { "epoch": 2.5903998355432214, "grad_norm": 0.23723599314689636, "learning_rate": 4.323017751950593e-06, "loss": 0.3816, "step": 12601 }, { "epoch": 2.5906054065166, "grad_norm": 0.2249545156955719, "learning_rate": 4.318747097966682e-06, "loss": 0.3656, "step": 12602 }, { "epoch": 2.5908109774899786, "grad_norm": 0.22489015758037567, "learning_rate": 4.314478448157962e-06, "loss": 0.3837, "step": 12603 }, { "epoch": 2.591016548463357, "grad_norm": 0.22682681679725647, "learning_rate": 4.31021180273472e-06, "loss": 0.388, "step": 12604 }, { "epoch": 2.5912221194367353, "grad_norm": 0.22748106718063354, "learning_rate": 4.305947161907161e-06, "loss": 0.3789, "step": 12605 }, { "epoch": 2.5914276904101143, "grad_norm": 0.22520016133785248, "learning_rate": 4.301684525885369e-06, "loss": 0.3921, "step": 12606 }, { "epoch": 2.5916332613834925, "grad_norm": 0.22781188786029816, "learning_rate": 4.297423894879371e-06, "loss": 0.3787, "step": 12607 }, { "epoch": 2.591838832356871, "grad_norm": 0.1223108097910881, "learning_rate": 4.293165269099049e-06, "loss": 0.4503, "step": 12608 }, { "epoch": 2.5920444033302497, "grad_norm": 0.2299673855304718, "learning_rate": 4.288908648754213e-06, "loss": 0.3899, "step": 12609 }, { "epoch": 2.5922499743036282, "grad_norm": 0.28654077649116516, "learning_rate": 4.284654034054568e-06, "loss": 0.3756, "step": 12610 }, { "epoch": 2.592455545277007, "grad_norm": 0.24485927820205688, "learning_rate": 4.280401425209705e-06, "loss": 0.3807, "step": 12611 }, { "epoch": 2.5926611162503854, "grad_norm": 0.12086854130029678, "learning_rate": 4.276150822429146e-06, "loss": 0.4414, "step": 12612 }, { "epoch": 2.592866687223764, "grad_norm": 0.12190677970647812, "learning_rate": 4.2719022259223e-06, "loss": 0.4467, "step": 12613 }, { "epoch": 2.5930722581971426, "grad_norm": 0.23326410353183746, "learning_rate": 4.267655635898454e-06, "loss": 0.3714, "step": 12614 }, { "epoch": 2.593277829170521, "grad_norm": 0.12049704790115356, "learning_rate": 4.263411052566845e-06, "loss": 0.4688, "step": 12615 }, { "epoch": 2.5934834001438998, "grad_norm": 0.22923533618450165, "learning_rate": 4.259168476136571e-06, "loss": 0.3817, "step": 12616 }, { "epoch": 2.5936889711172784, "grad_norm": 0.23745588958263397, "learning_rate": 4.25492790681664e-06, "loss": 0.3685, "step": 12617 }, { "epoch": 2.593894542090657, "grad_norm": 0.1278616040945053, "learning_rate": 4.250689344815975e-06, "loss": 0.4721, "step": 12618 }, { "epoch": 2.5941001130640355, "grad_norm": 0.2362724095582962, "learning_rate": 4.2464527903433685e-06, "loss": 0.399, "step": 12619 }, { "epoch": 2.5943056840374137, "grad_norm": 0.22642555832862854, "learning_rate": 4.242218243607564e-06, "loss": 0.3937, "step": 12620 }, { "epoch": 2.5945112550107927, "grad_norm": 0.11976632475852966, "learning_rate": 4.237985704817164e-06, "loss": 0.4401, "step": 12621 }, { "epoch": 2.594716825984171, "grad_norm": 0.22681719064712524, "learning_rate": 4.233755174180688e-06, "loss": 0.3904, "step": 12622 }, { "epoch": 2.5949223969575494, "grad_norm": 0.23691929876804352, "learning_rate": 4.2295266519065575e-06, "loss": 0.3986, "step": 12623 }, { "epoch": 2.595127967930928, "grad_norm": 0.2224111258983612, "learning_rate": 4.225300138203082e-06, "loss": 0.3766, "step": 12624 }, { "epoch": 2.5953335389043066, "grad_norm": 0.22920066118240356, "learning_rate": 4.22107563327849e-06, "loss": 0.3883, "step": 12625 }, { "epoch": 2.595539109877685, "grad_norm": 0.22633981704711914, "learning_rate": 4.216853137340895e-06, "loss": 0.381, "step": 12626 }, { "epoch": 2.595744680851064, "grad_norm": 0.2328253835439682, "learning_rate": 4.21263265059833e-06, "loss": 0.3936, "step": 12627 }, { "epoch": 2.5959502518244424, "grad_norm": 0.2358826845884323, "learning_rate": 4.208414173258719e-06, "loss": 0.3572, "step": 12628 }, { "epoch": 2.596155822797821, "grad_norm": 0.23712804913520813, "learning_rate": 4.204197705529881e-06, "loss": 0.4026, "step": 12629 }, { "epoch": 2.5963613937711996, "grad_norm": 0.2283300757408142, "learning_rate": 4.199983247619545e-06, "loss": 0.3851, "step": 12630 }, { "epoch": 2.596566964744578, "grad_norm": 0.22347889840602875, "learning_rate": 4.195770799735333e-06, "loss": 0.3673, "step": 12631 }, { "epoch": 2.5967725357179567, "grad_norm": 0.23281633853912354, "learning_rate": 4.1915603620847675e-06, "loss": 0.4097, "step": 12632 }, { "epoch": 2.5969781066913353, "grad_norm": 0.2382623255252838, "learning_rate": 4.187351934875289e-06, "loss": 0.409, "step": 12633 }, { "epoch": 2.597183677664714, "grad_norm": 0.2310085892677307, "learning_rate": 4.18314551831423e-06, "loss": 0.3912, "step": 12634 }, { "epoch": 2.597389248638092, "grad_norm": 0.2320510298013687, "learning_rate": 4.1789411126088015e-06, "loss": 0.3582, "step": 12635 }, { "epoch": 2.597594819611471, "grad_norm": 0.23400908708572388, "learning_rate": 4.174738717966154e-06, "loss": 0.3725, "step": 12636 }, { "epoch": 2.5978003905848492, "grad_norm": 0.23173139989376068, "learning_rate": 4.170538334593318e-06, "loss": 0.3992, "step": 12637 }, { "epoch": 2.598005961558228, "grad_norm": 0.23350614309310913, "learning_rate": 4.1663399626972175e-06, "loss": 0.3795, "step": 12638 }, { "epoch": 2.5982115325316064, "grad_norm": 0.23148328065872192, "learning_rate": 4.162143602484692e-06, "loss": 0.3611, "step": 12639 }, { "epoch": 2.598417103504985, "grad_norm": 0.22413556277751923, "learning_rate": 4.15794925416247e-06, "loss": 0.3857, "step": 12640 }, { "epoch": 2.5986226744783636, "grad_norm": 0.22561226785182953, "learning_rate": 4.153756917937197e-06, "loss": 0.3802, "step": 12641 }, { "epoch": 2.598828245451742, "grad_norm": 0.22936685383319855, "learning_rate": 4.149566594015408e-06, "loss": 0.374, "step": 12642 }, { "epoch": 2.5990338164251208, "grad_norm": 0.12491568177938461, "learning_rate": 4.145378282603538e-06, "loss": 0.4393, "step": 12643 }, { "epoch": 2.5992393873984994, "grad_norm": 0.22605833411216736, "learning_rate": 4.141191983907927e-06, "loss": 0.3838, "step": 12644 }, { "epoch": 2.599444958371878, "grad_norm": 0.22735659778118134, "learning_rate": 4.137007698134814e-06, "loss": 0.3879, "step": 12645 }, { "epoch": 2.5996505293452565, "grad_norm": 0.23198509216308594, "learning_rate": 4.1328254254903345e-06, "loss": 0.3801, "step": 12646 }, { "epoch": 2.599856100318635, "grad_norm": 0.11979317665100098, "learning_rate": 4.12864516618053e-06, "loss": 0.4608, "step": 12647 }, { "epoch": 2.6000616712920137, "grad_norm": 0.23859287798404694, "learning_rate": 4.124466920411354e-06, "loss": 0.3896, "step": 12648 }, { "epoch": 2.6002672422653923, "grad_norm": 0.22083625197410583, "learning_rate": 4.120290688388638e-06, "loss": 0.3972, "step": 12649 }, { "epoch": 2.6004728132387704, "grad_norm": 0.23605285584926605, "learning_rate": 4.116116470318131e-06, "loss": 0.4005, "step": 12650 }, { "epoch": 2.6006783842121495, "grad_norm": 0.2248559296131134, "learning_rate": 4.111944266405476e-06, "loss": 0.3765, "step": 12651 }, { "epoch": 2.6008839551855276, "grad_norm": 0.23217467963695526, "learning_rate": 4.107774076856211e-06, "loss": 0.3721, "step": 12652 }, { "epoch": 2.601089526158906, "grad_norm": 0.23271602392196655, "learning_rate": 4.103605901875783e-06, "loss": 0.383, "step": 12653 }, { "epoch": 2.601295097132285, "grad_norm": 0.22513030469417572, "learning_rate": 4.099439741669553e-06, "loss": 0.375, "step": 12654 }, { "epoch": 2.6015006681056634, "grad_norm": 0.2263166457414627, "learning_rate": 4.0952755964427555e-06, "loss": 0.3759, "step": 12655 }, { "epoch": 2.601706239079042, "grad_norm": 0.22585409879684448, "learning_rate": 4.091113466400533e-06, "loss": 0.3809, "step": 12656 }, { "epoch": 2.6019118100524206, "grad_norm": 0.22669021785259247, "learning_rate": 4.08695335174795e-06, "loss": 0.3782, "step": 12657 }, { "epoch": 2.602117381025799, "grad_norm": 0.2331555336713791, "learning_rate": 4.082795252689949e-06, "loss": 0.3776, "step": 12658 }, { "epoch": 2.6023229519991777, "grad_norm": 0.2259588986635208, "learning_rate": 4.07863916943138e-06, "loss": 0.4052, "step": 12659 }, { "epoch": 2.6025285229725563, "grad_norm": 0.2281801849603653, "learning_rate": 4.074485102176994e-06, "loss": 0.391, "step": 12660 }, { "epoch": 2.602734093945935, "grad_norm": 0.22164572775363922, "learning_rate": 4.070333051131434e-06, "loss": 0.3553, "step": 12661 }, { "epoch": 2.6029396649193135, "grad_norm": 0.2315491884946823, "learning_rate": 4.0661830164992644e-06, "loss": 0.3879, "step": 12662 }, { "epoch": 2.603145235892692, "grad_norm": 0.22418269515037537, "learning_rate": 4.062034998484938e-06, "loss": 0.3866, "step": 12663 }, { "epoch": 2.6033508068660707, "grad_norm": 0.22450844943523407, "learning_rate": 4.0578889972928e-06, "loss": 0.3846, "step": 12664 }, { "epoch": 2.603556377839449, "grad_norm": 0.22189322113990784, "learning_rate": 4.053745013127109e-06, "loss": 0.3828, "step": 12665 }, { "epoch": 2.603761948812828, "grad_norm": 0.22486789524555206, "learning_rate": 4.04960304619202e-06, "loss": 0.3742, "step": 12666 }, { "epoch": 2.603967519786206, "grad_norm": 0.23218779265880585, "learning_rate": 4.045463096691585e-06, "loss": 0.3841, "step": 12667 }, { "epoch": 2.6041730907595846, "grad_norm": 0.24457047879695892, "learning_rate": 4.041325164829752e-06, "loss": 0.3924, "step": 12668 }, { "epoch": 2.604378661732963, "grad_norm": 0.1159941703081131, "learning_rate": 4.037189250810401e-06, "loss": 0.4393, "step": 12669 }, { "epoch": 2.6045842327063418, "grad_norm": 0.2236376702785492, "learning_rate": 4.033055354837276e-06, "loss": 0.376, "step": 12670 }, { "epoch": 2.6047898036797204, "grad_norm": 0.23160508275032043, "learning_rate": 4.0289234771140335e-06, "loss": 0.3798, "step": 12671 }, { "epoch": 2.604995374653099, "grad_norm": 0.11661987006664276, "learning_rate": 4.02479361784423e-06, "loss": 0.4366, "step": 12672 }, { "epoch": 2.6052009456264775, "grad_norm": 0.22946786880493164, "learning_rate": 4.020665777231327e-06, "loss": 0.3895, "step": 12673 }, { "epoch": 2.605406516599856, "grad_norm": 0.2358265221118927, "learning_rate": 4.0165399554786894e-06, "loss": 0.37, "step": 12674 }, { "epoch": 2.6056120875732347, "grad_norm": 0.23617896437644958, "learning_rate": 4.0124161527895635e-06, "loss": 0.3892, "step": 12675 }, { "epoch": 2.6058176585466133, "grad_norm": 0.23179112374782562, "learning_rate": 4.008294369367121e-06, "loss": 0.3648, "step": 12676 }, { "epoch": 2.606023229519992, "grad_norm": 0.12185750156641006, "learning_rate": 4.004174605414424e-06, "loss": 0.4366, "step": 12677 }, { "epoch": 2.6062288004933705, "grad_norm": 0.12000511586666107, "learning_rate": 4.000056861134422e-06, "loss": 0.4486, "step": 12678 }, { "epoch": 2.606434371466749, "grad_norm": 0.25182247161865234, "learning_rate": 3.995941136729992e-06, "loss": 0.4121, "step": 12679 }, { "epoch": 2.606639942440127, "grad_norm": 0.2277366816997528, "learning_rate": 3.991827432403891e-06, "loss": 0.3816, "step": 12680 }, { "epoch": 2.6068455134135062, "grad_norm": 0.24362795054912567, "learning_rate": 3.987715748358783e-06, "loss": 0.3746, "step": 12681 }, { "epoch": 2.6070510843868844, "grad_norm": 0.23480716347694397, "learning_rate": 3.983606084797215e-06, "loss": 0.3913, "step": 12682 }, { "epoch": 2.607256655360263, "grad_norm": 0.232587993144989, "learning_rate": 3.9794984419216755e-06, "loss": 0.3749, "step": 12683 }, { "epoch": 2.6074622263336416, "grad_norm": 0.22933539748191833, "learning_rate": 3.9753928199345225e-06, "loss": 0.3873, "step": 12684 }, { "epoch": 2.60766779730702, "grad_norm": 0.22452722489833832, "learning_rate": 3.971289219038014e-06, "loss": 0.3707, "step": 12685 }, { "epoch": 2.6078733682803987, "grad_norm": 0.23321999609470367, "learning_rate": 3.967187639434315e-06, "loss": 0.3864, "step": 12686 }, { "epoch": 2.6080789392537773, "grad_norm": 0.1229674443602562, "learning_rate": 3.963088081325497e-06, "loss": 0.4495, "step": 12687 }, { "epoch": 2.608284510227156, "grad_norm": 0.24325041472911835, "learning_rate": 3.958990544913513e-06, "loss": 0.3694, "step": 12688 }, { "epoch": 2.6084900812005345, "grad_norm": 0.22455255687236786, "learning_rate": 3.9548950304002536e-06, "loss": 0.38, "step": 12689 }, { "epoch": 2.608695652173913, "grad_norm": 0.23633895814418793, "learning_rate": 3.950801537987466e-06, "loss": 0.3842, "step": 12690 }, { "epoch": 2.6089012231472917, "grad_norm": 0.2320830076932907, "learning_rate": 3.946710067876824e-06, "loss": 0.3621, "step": 12691 }, { "epoch": 2.6091067941206703, "grad_norm": 0.22698310017585754, "learning_rate": 3.942620620269896e-06, "loss": 0.3754, "step": 12692 }, { "epoch": 2.609312365094049, "grad_norm": 0.23345276713371277, "learning_rate": 3.938533195368147e-06, "loss": 0.3781, "step": 12693 }, { "epoch": 2.6095179360674274, "grad_norm": 0.23347729444503784, "learning_rate": 3.93444779337295e-06, "loss": 0.3761, "step": 12694 }, { "epoch": 2.6097235070408056, "grad_norm": 0.2320747673511505, "learning_rate": 3.9303644144855595e-06, "loss": 0.3811, "step": 12695 }, { "epoch": 2.6099290780141846, "grad_norm": 0.23767243325710297, "learning_rate": 3.926283058907159e-06, "loss": 0.3885, "step": 12696 }, { "epoch": 2.6101346489875628, "grad_norm": 0.2290267050266266, "learning_rate": 3.922203726838818e-06, "loss": 0.3869, "step": 12697 }, { "epoch": 2.610340219960942, "grad_norm": 0.2302408069372177, "learning_rate": 3.918126418481507e-06, "loss": 0.3751, "step": 12698 }, { "epoch": 2.61054579093432, "grad_norm": 0.22879059612751007, "learning_rate": 3.914051134036077e-06, "loss": 0.3831, "step": 12699 }, { "epoch": 2.6107513619076985, "grad_norm": 0.23263303935527802, "learning_rate": 3.9099778737033215e-06, "loss": 0.3813, "step": 12700 }, { "epoch": 2.610956932881077, "grad_norm": 0.23693881928920746, "learning_rate": 3.905906637683902e-06, "loss": 0.3898, "step": 12701 }, { "epoch": 2.6111625038544557, "grad_norm": 0.22104987502098083, "learning_rate": 3.901837426178384e-06, "loss": 0.3801, "step": 12702 }, { "epoch": 2.6113680748278343, "grad_norm": 0.23495499789714813, "learning_rate": 3.897770239387247e-06, "loss": 0.3659, "step": 12703 }, { "epoch": 2.611573645801213, "grad_norm": 0.23057711124420166, "learning_rate": 3.893705077510861e-06, "loss": 0.3858, "step": 12704 }, { "epoch": 2.6117792167745915, "grad_norm": 0.2297714799642563, "learning_rate": 3.8896419407494955e-06, "loss": 0.3919, "step": 12705 }, { "epoch": 2.61198478774797, "grad_norm": 0.12049584090709686, "learning_rate": 3.885580829303326e-06, "loss": 0.4581, "step": 12706 }, { "epoch": 2.6121903587213486, "grad_norm": 0.23567521572113037, "learning_rate": 3.8815217433724165e-06, "loss": 0.3795, "step": 12707 }, { "epoch": 2.6123959296947272, "grad_norm": 0.22988666594028473, "learning_rate": 3.877464683156743e-06, "loss": 0.3793, "step": 12708 }, { "epoch": 2.612601500668106, "grad_norm": 0.2348259687423706, "learning_rate": 3.873409648856175e-06, "loss": 0.3949, "step": 12709 }, { "epoch": 2.612807071641484, "grad_norm": 0.2306186556816101, "learning_rate": 3.869356640670493e-06, "loss": 0.3803, "step": 12710 }, { "epoch": 2.613012642614863, "grad_norm": 0.23866905272006989, "learning_rate": 3.865305658799362e-06, "loss": 0.3926, "step": 12711 }, { "epoch": 2.613218213588241, "grad_norm": 0.23288170993328094, "learning_rate": 3.861256703442363e-06, "loss": 0.3704, "step": 12712 }, { "epoch": 2.61342378456162, "grad_norm": 0.12461275607347488, "learning_rate": 3.857209774798965e-06, "loss": 0.4396, "step": 12713 }, { "epoch": 2.6136293555349983, "grad_norm": 0.2187352180480957, "learning_rate": 3.853164873068535e-06, "loss": 0.3681, "step": 12714 }, { "epoch": 2.613834926508377, "grad_norm": 0.1244712844491005, "learning_rate": 3.849121998450358e-06, "loss": 0.4467, "step": 12715 }, { "epoch": 2.6140404974817555, "grad_norm": 0.12192442268133163, "learning_rate": 3.84508115114359e-06, "loss": 0.4495, "step": 12716 }, { "epoch": 2.614246068455134, "grad_norm": 0.22983375191688538, "learning_rate": 3.841042331347321e-06, "loss": 0.3779, "step": 12717 }, { "epoch": 2.6144516394285127, "grad_norm": 0.23377950489521027, "learning_rate": 3.8370055392605225e-06, "loss": 0.3805, "step": 12718 }, { "epoch": 2.6146572104018913, "grad_norm": 0.24588587880134583, "learning_rate": 3.832970775082071e-06, "loss": 0.393, "step": 12719 }, { "epoch": 2.61486278137527, "grad_norm": 0.23526208102703094, "learning_rate": 3.82893803901072e-06, "loss": 0.3813, "step": 12720 }, { "epoch": 2.6150683523486484, "grad_norm": 0.24966707825660706, "learning_rate": 3.824907331245169e-06, "loss": 0.3916, "step": 12721 }, { "epoch": 2.615273923322027, "grad_norm": 0.2245279848575592, "learning_rate": 3.820878651983982e-06, "loss": 0.3745, "step": 12722 }, { "epoch": 2.6154794942954056, "grad_norm": 0.2309785783290863, "learning_rate": 3.816852001425625e-06, "loss": 0.3854, "step": 12723 }, { "epoch": 2.615685065268784, "grad_norm": 0.22770026326179504, "learning_rate": 3.812827379768491e-06, "loss": 0.3834, "step": 12724 }, { "epoch": 2.6158906362421623, "grad_norm": 0.12256407737731934, "learning_rate": 3.80880478721084e-06, "loss": 0.4432, "step": 12725 }, { "epoch": 2.6160962072155414, "grad_norm": 0.12518732249736786, "learning_rate": 3.8047842239508542e-06, "loss": 0.446, "step": 12726 }, { "epoch": 2.6163017781889195, "grad_norm": 0.2416573315858841, "learning_rate": 3.8007656901865996e-06, "loss": 0.3825, "step": 12727 }, { "epoch": 2.6165073491622985, "grad_norm": 0.23824763298034668, "learning_rate": 3.7967491861160583e-06, "loss": 0.3737, "step": 12728 }, { "epoch": 2.6167129201356767, "grad_norm": 0.2198529988527298, "learning_rate": 3.7927347119370966e-06, "loss": 0.3874, "step": 12729 }, { "epoch": 2.6169184911090553, "grad_norm": 0.2278008759021759, "learning_rate": 3.7887222678474868e-06, "loss": 0.356, "step": 12730 }, { "epoch": 2.617124062082434, "grad_norm": 0.2338324338197708, "learning_rate": 3.7847118540449202e-06, "loss": 0.3923, "step": 12731 }, { "epoch": 2.6173296330558125, "grad_norm": 0.21889689564704895, "learning_rate": 3.780703470726959e-06, "loss": 0.3612, "step": 12732 }, { "epoch": 2.617535204029191, "grad_norm": 0.238824263215065, "learning_rate": 3.7766971180910803e-06, "loss": 0.3983, "step": 12733 }, { "epoch": 2.6177407750025696, "grad_norm": 0.12167999893426895, "learning_rate": 3.7726927963346564e-06, "loss": 0.4387, "step": 12734 }, { "epoch": 2.617946345975948, "grad_norm": 0.23274268209934235, "learning_rate": 3.768690505654964e-06, "loss": 0.3855, "step": 12735 }, { "epoch": 2.618151916949327, "grad_norm": 0.24479152262210846, "learning_rate": 3.7646902462491765e-06, "loss": 0.3923, "step": 12736 }, { "epoch": 2.6183574879227054, "grad_norm": 0.2456178367137909, "learning_rate": 3.7606920183143546e-06, "loss": 0.4016, "step": 12737 }, { "epoch": 2.618563058896084, "grad_norm": 0.23310574889183044, "learning_rate": 3.756695822047497e-06, "loss": 0.3859, "step": 12738 }, { "epoch": 2.6187686298694626, "grad_norm": 0.12393064796924591, "learning_rate": 3.7527016576454603e-06, "loss": 0.4474, "step": 12739 }, { "epoch": 2.6189742008428407, "grad_norm": 0.23465074598789215, "learning_rate": 3.748709525305028e-06, "loss": 0.3948, "step": 12740 }, { "epoch": 2.6191797718162197, "grad_norm": 0.2182430624961853, "learning_rate": 3.7447194252228624e-06, "loss": 0.3807, "step": 12741 }, { "epoch": 2.619385342789598, "grad_norm": 0.23260043561458588, "learning_rate": 3.740731357595551e-06, "loss": 0.3909, "step": 12742 }, { "epoch": 2.619590913762977, "grad_norm": 0.11928752809762955, "learning_rate": 3.736745322619557e-06, "loss": 0.45, "step": 12743 }, { "epoch": 2.619796484736355, "grad_norm": 0.23598788678646088, "learning_rate": 3.7327613204912532e-06, "loss": 0.3824, "step": 12744 }, { "epoch": 2.6200020557097337, "grad_norm": 0.24089393019676208, "learning_rate": 3.7287793514069226e-06, "loss": 0.3849, "step": 12745 }, { "epoch": 2.6202076266831122, "grad_norm": 0.23498208820819855, "learning_rate": 3.724799415562733e-06, "loss": 0.3896, "step": 12746 }, { "epoch": 2.620413197656491, "grad_norm": 0.23025161027908325, "learning_rate": 3.720821513154758e-06, "loss": 0.3816, "step": 12747 }, { "epoch": 2.6206187686298694, "grad_norm": 0.22431518137454987, "learning_rate": 3.7168456443789656e-06, "loss": 0.3795, "step": 12748 }, { "epoch": 2.620824339603248, "grad_norm": 0.12069787830114365, "learning_rate": 3.7128718094312293e-06, "loss": 0.4559, "step": 12749 }, { "epoch": 2.6210299105766266, "grad_norm": 0.2284901887178421, "learning_rate": 3.708900008507327e-06, "loss": 0.3781, "step": 12750 }, { "epoch": 2.621235481550005, "grad_norm": 0.12058551609516144, "learning_rate": 3.704930241802918e-06, "loss": 0.446, "step": 12751 }, { "epoch": 2.6214410525233838, "grad_norm": 0.22641105949878693, "learning_rate": 3.700962509513595e-06, "loss": 0.3953, "step": 12752 }, { "epoch": 2.6216466234967624, "grad_norm": 0.11827776581048965, "learning_rate": 3.6969968118348127e-06, "loss": 0.4369, "step": 12753 }, { "epoch": 2.621852194470141, "grad_norm": 0.12709125876426697, "learning_rate": 3.6930331489619537e-06, "loss": 0.4419, "step": 12754 }, { "epoch": 2.6220577654435195, "grad_norm": 0.24272185564041138, "learning_rate": 3.689071521090277e-06, "loss": 0.396, "step": 12755 }, { "epoch": 2.622263336416898, "grad_norm": 0.23644161224365234, "learning_rate": 3.685111928414962e-06, "loss": 0.3728, "step": 12756 }, { "epoch": 2.6224689073902763, "grad_norm": 0.23552681505680084, "learning_rate": 3.6811543711310777e-06, "loss": 0.4081, "step": 12757 }, { "epoch": 2.6226744783636553, "grad_norm": 0.23269321024417877, "learning_rate": 3.6771988494335823e-06, "loss": 0.3695, "step": 12758 }, { "epoch": 2.6228800493370334, "grad_norm": 0.22955691814422607, "learning_rate": 3.673245363517371e-06, "loss": 0.3751, "step": 12759 }, { "epoch": 2.623085620310412, "grad_norm": 0.22635483741760254, "learning_rate": 3.669293913577197e-06, "loss": 0.4007, "step": 12760 }, { "epoch": 2.6232911912837906, "grad_norm": 0.2312253713607788, "learning_rate": 3.6653444998077302e-06, "loss": 0.3691, "step": 12761 }, { "epoch": 2.623496762257169, "grad_norm": 0.24891069531440735, "learning_rate": 3.661397122403545e-06, "loss": 0.3871, "step": 12762 }, { "epoch": 2.623702333230548, "grad_norm": 0.22625122964382172, "learning_rate": 3.6574517815591002e-06, "loss": 0.3823, "step": 12763 }, { "epoch": 2.6239079042039264, "grad_norm": 0.2243538647890091, "learning_rate": 3.653508477468781e-06, "loss": 0.3765, "step": 12764 }, { "epoch": 2.624113475177305, "grad_norm": 0.23248639702796936, "learning_rate": 3.649567210326832e-06, "loss": 0.3974, "step": 12765 }, { "epoch": 2.6243190461506836, "grad_norm": 0.22589054703712463, "learning_rate": 3.6456279803274474e-06, "loss": 0.3502, "step": 12766 }, { "epoch": 2.624524617124062, "grad_norm": 0.23221521079540253, "learning_rate": 3.6416907876646824e-06, "loss": 0.3724, "step": 12767 }, { "epoch": 2.6247301880974407, "grad_norm": 0.22528620064258575, "learning_rate": 3.6377556325325014e-06, "loss": 0.3963, "step": 12768 }, { "epoch": 2.6249357590708193, "grad_norm": 0.2201879322528839, "learning_rate": 3.6338225151247797e-06, "loss": 0.3879, "step": 12769 }, { "epoch": 2.625141330044198, "grad_norm": 0.2363358587026596, "learning_rate": 3.629891435635272e-06, "loss": 0.3906, "step": 12770 }, { "epoch": 2.6253469010175765, "grad_norm": 0.12273525446653366, "learning_rate": 3.625962394257644e-06, "loss": 0.4524, "step": 12771 }, { "epoch": 2.6255524719909547, "grad_norm": 0.23554597795009613, "learning_rate": 3.6220353911854748e-06, "loss": 0.3759, "step": 12772 }, { "epoch": 2.6257580429643337, "grad_norm": 0.12295962870121002, "learning_rate": 3.6181104266122206e-06, "loss": 0.4549, "step": 12773 }, { "epoch": 2.625963613937712, "grad_norm": 0.22669798135757446, "learning_rate": 3.6141875007312465e-06, "loss": 0.3875, "step": 12774 }, { "epoch": 2.6261691849110904, "grad_norm": 0.12479596585035324, "learning_rate": 3.610266613735818e-06, "loss": 0.4492, "step": 12775 }, { "epoch": 2.626374755884469, "grad_norm": 0.1302644908428192, "learning_rate": 3.6063477658191e-06, "loss": 0.4415, "step": 12776 }, { "epoch": 2.6265803268578476, "grad_norm": 0.12051938474178314, "learning_rate": 3.6024309571741533e-06, "loss": 0.4435, "step": 12777 }, { "epoch": 2.626785897831226, "grad_norm": 0.2350439727306366, "learning_rate": 3.5985161879939338e-06, "loss": 0.3832, "step": 12778 }, { "epoch": 2.6269914688046048, "grad_norm": 0.12057623267173767, "learning_rate": 3.5946034584713225e-06, "loss": 0.454, "step": 12779 }, { "epoch": 2.6271970397779834, "grad_norm": 0.22473283112049103, "learning_rate": 3.5906927687990644e-06, "loss": 0.38, "step": 12780 }, { "epoch": 2.627402610751362, "grad_norm": 0.2340707778930664, "learning_rate": 3.586784119169831e-06, "loss": 0.3914, "step": 12781 }, { "epoch": 2.6276081817247405, "grad_norm": 0.11768918484449387, "learning_rate": 3.582877509776178e-06, "loss": 0.4492, "step": 12782 }, { "epoch": 2.627813752698119, "grad_norm": 0.2253478765487671, "learning_rate": 3.5789729408105665e-06, "loss": 0.3697, "step": 12783 }, { "epoch": 2.6280193236714977, "grad_norm": 0.22701376676559448, "learning_rate": 3.575070412465353e-06, "loss": 0.3721, "step": 12784 }, { "epoch": 2.6282248946448763, "grad_norm": 0.1314348578453064, "learning_rate": 3.571169924932803e-06, "loss": 0.4403, "step": 12785 }, { "epoch": 2.628430465618255, "grad_norm": 0.22781673073768616, "learning_rate": 3.567271478405078e-06, "loss": 0.3857, "step": 12786 }, { "epoch": 2.628636036591633, "grad_norm": 0.2242654412984848, "learning_rate": 3.56337507307422e-06, "loss": 0.383, "step": 12787 }, { "epoch": 2.628841607565012, "grad_norm": 0.23189879953861237, "learning_rate": 3.5594807091322047e-06, "loss": 0.3873, "step": 12788 }, { "epoch": 2.62904717853839, "grad_norm": 0.23214222490787506, "learning_rate": 3.555588386770884e-06, "loss": 0.4035, "step": 12789 }, { "epoch": 2.629252749511769, "grad_norm": 0.23310469090938568, "learning_rate": 3.551698106182014e-06, "loss": 0.3994, "step": 12790 }, { "epoch": 2.6294583204851474, "grad_norm": 0.2216804176568985, "learning_rate": 3.5478098675572474e-06, "loss": 0.3785, "step": 12791 }, { "epoch": 2.629663891458526, "grad_norm": 0.23494820296764374, "learning_rate": 3.543923671088135e-06, "loss": 0.3643, "step": 12792 }, { "epoch": 2.6298694624319046, "grad_norm": 0.23349052667617798, "learning_rate": 3.540039516966144e-06, "loss": 0.4023, "step": 12793 }, { "epoch": 2.630075033405283, "grad_norm": 0.12207508087158203, "learning_rate": 3.536157405382627e-06, "loss": 0.4413, "step": 12794 }, { "epoch": 2.6302806043786617, "grad_norm": 0.22845908999443054, "learning_rate": 3.5322773365288298e-06, "loss": 0.3583, "step": 12795 }, { "epoch": 2.6304861753520403, "grad_norm": 0.24039143323898315, "learning_rate": 3.5283993105959103e-06, "loss": 0.3928, "step": 12796 }, { "epoch": 2.630691746325419, "grad_norm": 0.23867449164390564, "learning_rate": 3.524523327774915e-06, "loss": 0.3646, "step": 12797 }, { "epoch": 2.6308973172987975, "grad_norm": 0.12439849972724915, "learning_rate": 3.520649388256802e-06, "loss": 0.4475, "step": 12798 }, { "epoch": 2.631102888272176, "grad_norm": 0.23779630661010742, "learning_rate": 3.516777492232413e-06, "loss": 0.371, "step": 12799 }, { "epoch": 2.6313084592455547, "grad_norm": 0.2336895763874054, "learning_rate": 3.512907639892511e-06, "loss": 0.3766, "step": 12800 }, { "epoch": 2.6315140302189333, "grad_norm": 0.23059743642807007, "learning_rate": 3.5090398314277427e-06, "loss": 0.4003, "step": 12801 }, { "epoch": 2.6317196011923114, "grad_norm": 0.2365521341562271, "learning_rate": 3.5051740670286466e-06, "loss": 0.396, "step": 12802 }, { "epoch": 2.6319251721656904, "grad_norm": 0.2317541390657425, "learning_rate": 3.5013103468856846e-06, "loss": 0.3752, "step": 12803 }, { "epoch": 2.6321307431390686, "grad_norm": 0.23596826195716858, "learning_rate": 3.4974486711891948e-06, "loss": 0.3894, "step": 12804 }, { "epoch": 2.632336314112447, "grad_norm": 0.22818133234977722, "learning_rate": 3.49358904012942e-06, "loss": 0.3867, "step": 12805 }, { "epoch": 2.6325418850858258, "grad_norm": 0.23365046083927155, "learning_rate": 3.4897314538965178e-06, "loss": 0.3997, "step": 12806 }, { "epoch": 2.6327474560592043, "grad_norm": 0.21868395805358887, "learning_rate": 3.4858759126805315e-06, "loss": 0.3677, "step": 12807 }, { "epoch": 2.632953027032583, "grad_norm": 0.23094679415225983, "learning_rate": 3.4820224166713938e-06, "loss": 0.407, "step": 12808 }, { "epoch": 2.6331585980059615, "grad_norm": 0.2458486258983612, "learning_rate": 3.4781709660589636e-06, "loss": 0.3881, "step": 12809 }, { "epoch": 2.63336416897934, "grad_norm": 0.23680594563484192, "learning_rate": 3.4743215610329785e-06, "loss": 0.3726, "step": 12810 }, { "epoch": 2.6335697399527187, "grad_norm": 0.22720733284950256, "learning_rate": 3.4704742017830815e-06, "loss": 0.3768, "step": 12811 }, { "epoch": 2.6337753109260973, "grad_norm": 0.1218261644244194, "learning_rate": 3.466628888498807e-06, "loss": 0.4454, "step": 12812 }, { "epoch": 2.633980881899476, "grad_norm": 0.2283984124660492, "learning_rate": 3.4627856213695977e-06, "loss": 0.3878, "step": 12813 }, { "epoch": 2.6341864528728545, "grad_norm": 0.22897951304912567, "learning_rate": 3.4589444005848023e-06, "loss": 0.3816, "step": 12814 }, { "epoch": 2.634392023846233, "grad_norm": 0.12013474106788635, "learning_rate": 3.455105226333654e-06, "loss": 0.4596, "step": 12815 }, { "epoch": 2.6345975948196116, "grad_norm": 0.22502024471759796, "learning_rate": 3.4512680988052878e-06, "loss": 0.371, "step": 12816 }, { "epoch": 2.63480316579299, "grad_norm": 0.23252278566360474, "learning_rate": 3.447433018188751e-06, "loss": 0.3728, "step": 12817 }, { "epoch": 2.635008736766369, "grad_norm": 0.23052488267421722, "learning_rate": 3.4435999846729684e-06, "loss": 0.395, "step": 12818 }, { "epoch": 2.635214307739747, "grad_norm": 0.22712182998657227, "learning_rate": 3.4397689984467786e-06, "loss": 0.3595, "step": 12819 }, { "epoch": 2.6354198787131256, "grad_norm": 0.11764495819807053, "learning_rate": 3.4359400596989154e-06, "loss": 0.4578, "step": 12820 }, { "epoch": 2.635625449686504, "grad_norm": 0.23514217138290405, "learning_rate": 3.4321131686180186e-06, "loss": 0.3898, "step": 12821 }, { "epoch": 2.6358310206598827, "grad_norm": 0.11776993423700333, "learning_rate": 3.428288325392622e-06, "loss": 0.444, "step": 12822 }, { "epoch": 2.6360365916332613, "grad_norm": 0.21985335648059845, "learning_rate": 3.4244655302111493e-06, "loss": 0.3853, "step": 12823 }, { "epoch": 2.63624216260664, "grad_norm": 0.23297996819019318, "learning_rate": 3.420644783261941e-06, "loss": 0.3617, "step": 12824 }, { "epoch": 2.6364477335800185, "grad_norm": 0.23519185185432434, "learning_rate": 3.4168260847332207e-06, "loss": 0.3854, "step": 12825 }, { "epoch": 2.636653304553397, "grad_norm": 0.23188424110412598, "learning_rate": 3.413009434813113e-06, "loss": 0.378, "step": 12826 }, { "epoch": 2.6368588755267757, "grad_norm": 0.12326161563396454, "learning_rate": 3.409194833689663e-06, "loss": 0.4533, "step": 12827 }, { "epoch": 2.6370644465001543, "grad_norm": 0.230136439204216, "learning_rate": 3.405382281550785e-06, "loss": 0.3748, "step": 12828 }, { "epoch": 2.637270017473533, "grad_norm": 0.23117460310459137, "learning_rate": 3.4015717785843033e-06, "loss": 0.4093, "step": 12829 }, { "epoch": 2.6374755884469114, "grad_norm": 0.12335003167390823, "learning_rate": 3.3977633249779582e-06, "loss": 0.4496, "step": 12830 }, { "epoch": 2.63768115942029, "grad_norm": 0.22897659242153168, "learning_rate": 3.393956920919365e-06, "loss": 0.3755, "step": 12831 }, { "epoch": 2.637886730393668, "grad_norm": 0.24940082430839539, "learning_rate": 3.390152566596048e-06, "loss": 0.3918, "step": 12832 }, { "epoch": 2.638092301367047, "grad_norm": 0.22477301955223083, "learning_rate": 3.386350262195428e-06, "loss": 0.3838, "step": 12833 }, { "epoch": 2.6382978723404253, "grad_norm": 0.2210949808359146, "learning_rate": 3.3825500079048244e-06, "loss": 0.3877, "step": 12834 }, { "epoch": 2.638503443313804, "grad_norm": 0.23475997149944305, "learning_rate": 3.378751803911468e-06, "loss": 0.379, "step": 12835 }, { "epoch": 2.6387090142871825, "grad_norm": 0.23455579578876495, "learning_rate": 3.3749556504024738e-06, "loss": 0.3899, "step": 12836 }, { "epoch": 2.638914585260561, "grad_norm": 0.2290680706501007, "learning_rate": 3.3711615475648574e-06, "loss": 0.3715, "step": 12837 }, { "epoch": 2.6391201562339397, "grad_norm": 0.23036018013954163, "learning_rate": 3.367369495585544e-06, "loss": 0.3757, "step": 12838 }, { "epoch": 2.6393257272073183, "grad_norm": 0.2289496660232544, "learning_rate": 3.3635794946513393e-06, "loss": 0.3768, "step": 12839 }, { "epoch": 2.639531298180697, "grad_norm": 0.23706848919391632, "learning_rate": 3.3597915449489694e-06, "loss": 0.3983, "step": 12840 }, { "epoch": 2.6397368691540755, "grad_norm": 0.22767172753810883, "learning_rate": 3.356005646665034e-06, "loss": 0.4012, "step": 12841 }, { "epoch": 2.639942440127454, "grad_norm": 0.2272636741399765, "learning_rate": 3.352221799986065e-06, "loss": 0.3776, "step": 12842 }, { "epoch": 2.6401480111008326, "grad_norm": 0.2348739355802536, "learning_rate": 3.3484400050984677e-06, "loss": 0.3836, "step": 12843 }, { "epoch": 2.6403535820742112, "grad_norm": 0.23279324173927307, "learning_rate": 3.3446602621885533e-06, "loss": 0.38, "step": 12844 }, { "epoch": 2.64055915304759, "grad_norm": 0.23363368213176727, "learning_rate": 3.3408825714425273e-06, "loss": 0.3875, "step": 12845 }, { "epoch": 2.6407647240209684, "grad_norm": 0.2381156086921692, "learning_rate": 3.3371069330465066e-06, "loss": 0.3899, "step": 12846 }, { "epoch": 2.6409702949943465, "grad_norm": 0.22464582324028015, "learning_rate": 3.333333347186487e-06, "loss": 0.3757, "step": 12847 }, { "epoch": 2.6411758659677256, "grad_norm": 0.22474953532218933, "learning_rate": 3.3295618140483898e-06, "loss": 0.3826, "step": 12848 }, { "epoch": 2.6413814369411037, "grad_norm": 0.23333978652954102, "learning_rate": 3.3257923338180166e-06, "loss": 0.3958, "step": 12849 }, { "epoch": 2.6415870079144823, "grad_norm": 0.22718356549739838, "learning_rate": 3.3220249066810683e-06, "loss": 0.3781, "step": 12850 }, { "epoch": 2.641792578887861, "grad_norm": 0.2350386381149292, "learning_rate": 3.318259532823147e-06, "loss": 0.382, "step": 12851 }, { "epoch": 2.6419981498612395, "grad_norm": 0.23376323282718658, "learning_rate": 3.314496212429764e-06, "loss": 0.3888, "step": 12852 }, { "epoch": 2.642203720834618, "grad_norm": 0.12379579991102219, "learning_rate": 3.3107349456863164e-06, "loss": 0.4619, "step": 12853 }, { "epoch": 2.6424092918079967, "grad_norm": 0.2283748835325241, "learning_rate": 3.3069757327780903e-06, "loss": 0.3903, "step": 12854 }, { "epoch": 2.6426148627813753, "grad_norm": 0.12083147466182709, "learning_rate": 3.303218573890308e-06, "loss": 0.4596, "step": 12855 }, { "epoch": 2.642820433754754, "grad_norm": 0.12731818854808807, "learning_rate": 3.2994634692080566e-06, "loss": 0.4275, "step": 12856 }, { "epoch": 2.6430260047281324, "grad_norm": 0.22971482574939728, "learning_rate": 3.295710418916333e-06, "loss": 0.3684, "step": 12857 }, { "epoch": 2.643231575701511, "grad_norm": 0.23392242193222046, "learning_rate": 3.291959423200029e-06, "loss": 0.3943, "step": 12858 }, { "epoch": 2.6434371466748896, "grad_norm": 0.23680520057678223, "learning_rate": 3.288210482243942e-06, "loss": 0.3717, "step": 12859 }, { "epoch": 2.643642717648268, "grad_norm": 0.2301972508430481, "learning_rate": 3.284463596232769e-06, "loss": 0.3778, "step": 12860 }, { "epoch": 2.643848288621647, "grad_norm": 0.2333422154188156, "learning_rate": 3.280718765351083e-06, "loss": 0.3792, "step": 12861 }, { "epoch": 2.644053859595025, "grad_norm": 0.12037578970193863, "learning_rate": 3.2769759897834006e-06, "loss": 0.447, "step": 12862 }, { "epoch": 2.644259430568404, "grad_norm": 0.23077453672885895, "learning_rate": 3.273235269714095e-06, "loss": 0.3731, "step": 12863 }, { "epoch": 2.644465001541782, "grad_norm": 0.23764555156230927, "learning_rate": 3.2694966053274583e-06, "loss": 0.3879, "step": 12864 }, { "epoch": 2.644670572515161, "grad_norm": 0.23211322724819183, "learning_rate": 3.2657599968076737e-06, "loss": 0.3737, "step": 12865 }, { "epoch": 2.6448761434885393, "grad_norm": 0.24329562485218048, "learning_rate": 3.2620254443388283e-06, "loss": 0.3848, "step": 12866 }, { "epoch": 2.645081714461918, "grad_norm": 0.12165253609418869, "learning_rate": 3.25829294810491e-06, "loss": 0.4326, "step": 12867 }, { "epoch": 2.6452872854352965, "grad_norm": 0.1259094923734665, "learning_rate": 3.2545625082897874e-06, "loss": 0.4411, "step": 12868 }, { "epoch": 2.645492856408675, "grad_norm": 0.9432693123817444, "learning_rate": 3.250834125077263e-06, "loss": 0.4054, "step": 12869 }, { "epoch": 2.6456984273820536, "grad_norm": 0.23042386770248413, "learning_rate": 3.2471077986510045e-06, "loss": 0.3848, "step": 12870 }, { "epoch": 2.645903998355432, "grad_norm": 0.24361389875411987, "learning_rate": 3.243383529194591e-06, "loss": 0.3802, "step": 12871 }, { "epoch": 2.646109569328811, "grad_norm": 0.2344541847705841, "learning_rate": 3.2396613168914945e-06, "loss": 0.382, "step": 12872 }, { "epoch": 2.6463151403021894, "grad_norm": 0.2395932823419571, "learning_rate": 3.2359411619251094e-06, "loss": 0.3851, "step": 12873 }, { "epoch": 2.646520711275568, "grad_norm": 0.23469178378582, "learning_rate": 3.232223064478694e-06, "loss": 0.3817, "step": 12874 }, { "epoch": 2.6467262822489466, "grad_norm": 0.12158174812793732, "learning_rate": 3.228507024735416e-06, "loss": 0.4588, "step": 12875 }, { "epoch": 2.646931853222325, "grad_norm": 0.2328415811061859, "learning_rate": 3.2247930428783698e-06, "loss": 0.3656, "step": 12876 }, { "epoch": 2.6471374241957033, "grad_norm": 0.2348622977733612, "learning_rate": 3.2210811190905133e-06, "loss": 0.3897, "step": 12877 }, { "epoch": 2.6473429951690823, "grad_norm": 0.2322535365819931, "learning_rate": 3.2173712535547156e-06, "loss": 0.3972, "step": 12878 }, { "epoch": 2.6475485661424605, "grad_norm": 0.2271070033311844, "learning_rate": 3.2136634464537407e-06, "loss": 0.3874, "step": 12879 }, { "epoch": 2.6477541371158395, "grad_norm": 0.22299452126026154, "learning_rate": 3.209957697970262e-06, "loss": 0.3853, "step": 12880 }, { "epoch": 2.6479597080892177, "grad_norm": 0.22766993939876556, "learning_rate": 3.206254008286844e-06, "loss": 0.3715, "step": 12881 }, { "epoch": 2.6481652790625962, "grad_norm": 0.23258061707019806, "learning_rate": 3.202552377585936e-06, "loss": 0.3779, "step": 12882 }, { "epoch": 2.648370850035975, "grad_norm": 0.2237747460603714, "learning_rate": 3.198852806049921e-06, "loss": 0.3782, "step": 12883 }, { "epoch": 2.6485764210093534, "grad_norm": 0.2268817126750946, "learning_rate": 3.1951552938610486e-06, "loss": 0.3831, "step": 12884 }, { "epoch": 2.648781991982732, "grad_norm": 0.22952783107757568, "learning_rate": 3.1914598412014784e-06, "loss": 0.387, "step": 12885 }, { "epoch": 2.6489875629561106, "grad_norm": 0.11640308797359467, "learning_rate": 3.1877664482532748e-06, "loss": 0.4273, "step": 12886 }, { "epoch": 2.649193133929489, "grad_norm": 0.23823915421962738, "learning_rate": 3.184075115198382e-06, "loss": 0.36, "step": 12887 }, { "epoch": 2.6493987049028678, "grad_norm": 0.2281506359577179, "learning_rate": 3.180385842218665e-06, "loss": 0.3906, "step": 12888 }, { "epoch": 2.6496042758762464, "grad_norm": 0.22782327234745026, "learning_rate": 3.176698629495868e-06, "loss": 0.3764, "step": 12889 }, { "epoch": 2.649809846849625, "grad_norm": 0.23879340291023254, "learning_rate": 3.1730134772116507e-06, "loss": 0.3965, "step": 12890 }, { "epoch": 2.6500154178230035, "grad_norm": 0.22930049896240234, "learning_rate": 3.1693303855475626e-06, "loss": 0.3817, "step": 12891 }, { "epoch": 2.6502209887963817, "grad_norm": 0.12056277692317963, "learning_rate": 3.1656493546850492e-06, "loss": 0.4472, "step": 12892 }, { "epoch": 2.6504265597697607, "grad_norm": 0.22143647074699402, "learning_rate": 3.16197038480545e-06, "loss": 0.3693, "step": 12893 }, { "epoch": 2.650632130743139, "grad_norm": 0.2295861691236496, "learning_rate": 3.1582934760900302e-06, "loss": 0.3659, "step": 12894 }, { "epoch": 2.650837701716518, "grad_norm": 0.2308284193277359, "learning_rate": 3.1546186287199196e-06, "loss": 0.3898, "step": 12895 }, { "epoch": 2.651043272689896, "grad_norm": 0.12249313294887543, "learning_rate": 3.1509458428761593e-06, "loss": 0.4343, "step": 12896 }, { "epoch": 2.6512488436632746, "grad_norm": 0.2282872498035431, "learning_rate": 3.1472751187397034e-06, "loss": 0.3752, "step": 12897 }, { "epoch": 2.651454414636653, "grad_norm": 0.23152220249176025, "learning_rate": 3.1436064564913824e-06, "loss": 0.3756, "step": 12898 }, { "epoch": 2.651659985610032, "grad_norm": 0.2310069352388382, "learning_rate": 3.1399398563119376e-06, "loss": 0.3788, "step": 12899 }, { "epoch": 2.6518655565834104, "grad_norm": 0.24678552150726318, "learning_rate": 3.1362753183819987e-06, "loss": 0.4028, "step": 12900 }, { "epoch": 2.652071127556789, "grad_norm": 0.2333284318447113, "learning_rate": 3.1326128428821065e-06, "loss": 0.3579, "step": 12901 }, { "epoch": 2.6522766985301676, "grad_norm": 0.22593623399734497, "learning_rate": 3.128952429992692e-06, "loss": 0.3587, "step": 12902 }, { "epoch": 2.652482269503546, "grad_norm": 0.22910958528518677, "learning_rate": 3.1252940798940757e-06, "loss": 0.3806, "step": 12903 }, { "epoch": 2.6526878404769247, "grad_norm": 0.2289619743824005, "learning_rate": 3.1216377927665083e-06, "loss": 0.3707, "step": 12904 }, { "epoch": 2.6528934114503033, "grad_norm": 0.2235552966594696, "learning_rate": 3.1179835687901104e-06, "loss": 0.3581, "step": 12905 }, { "epoch": 2.653098982423682, "grad_norm": 0.23109345138072968, "learning_rate": 3.1143314081449036e-06, "loss": 0.3845, "step": 12906 }, { "epoch": 2.65330455339706, "grad_norm": 0.22421690821647644, "learning_rate": 3.1106813110108143e-06, "loss": 0.349, "step": 12907 }, { "epoch": 2.653510124370439, "grad_norm": 0.23160065710544586, "learning_rate": 3.1070332775676675e-06, "loss": 0.3736, "step": 12908 }, { "epoch": 2.6537156953438172, "grad_norm": 0.24303646385669708, "learning_rate": 3.1033873079951803e-06, "loss": 0.3866, "step": 12909 }, { "epoch": 2.6539212663171963, "grad_norm": 0.23496584594249725, "learning_rate": 3.0997434024729737e-06, "loss": 0.3996, "step": 12910 }, { "epoch": 2.6541268372905744, "grad_norm": 0.23909246921539307, "learning_rate": 3.0961015611805742e-06, "loss": 0.3741, "step": 12911 }, { "epoch": 2.654332408263953, "grad_norm": 0.24438230693340302, "learning_rate": 3.0924617842973936e-06, "loss": 0.3972, "step": 12912 }, { "epoch": 2.6545379792373316, "grad_norm": 0.23331284523010254, "learning_rate": 3.0888240720027427e-06, "loss": 0.3722, "step": 12913 }, { "epoch": 2.65474355021071, "grad_norm": 0.22317220270633698, "learning_rate": 3.085188424475834e-06, "loss": 0.3871, "step": 12914 }, { "epoch": 2.6549491211840888, "grad_norm": 0.2298220545053482, "learning_rate": 3.0815548418957884e-06, "loss": 0.3819, "step": 12915 }, { "epoch": 2.6551546921574674, "grad_norm": 0.22008730471134186, "learning_rate": 3.0779233244416084e-06, "loss": 0.374, "step": 12916 }, { "epoch": 2.655360263130846, "grad_norm": 0.22563999891281128, "learning_rate": 3.0742938722921956e-06, "loss": 0.3705, "step": 12917 }, { "epoch": 2.6555658341042245, "grad_norm": 0.231903076171875, "learning_rate": 3.070666485626367e-06, "loss": 0.389, "step": 12918 }, { "epoch": 2.655771405077603, "grad_norm": 0.2327851802110672, "learning_rate": 3.067041164622829e-06, "loss": 0.3835, "step": 12919 }, { "epoch": 2.6559769760509817, "grad_norm": 0.225325807929039, "learning_rate": 3.063417909460175e-06, "loss": 0.3936, "step": 12920 }, { "epoch": 2.6561825470243603, "grad_norm": 0.12957926094532013, "learning_rate": 3.0597967203169113e-06, "loss": 0.4463, "step": 12921 }, { "epoch": 2.656388117997739, "grad_norm": 0.22962552309036255, "learning_rate": 3.056177597371436e-06, "loss": 0.3842, "step": 12922 }, { "epoch": 2.6565936889711175, "grad_norm": 0.22653664648532867, "learning_rate": 3.0525605408020405e-06, "loss": 0.3896, "step": 12923 }, { "epoch": 2.6567992599444956, "grad_norm": 0.23864829540252686, "learning_rate": 3.0489455507869275e-06, "loss": 0.3847, "step": 12924 }, { "epoch": 2.6570048309178746, "grad_norm": 0.22004222869873047, "learning_rate": 3.0453326275041898e-06, "loss": 0.3739, "step": 12925 }, { "epoch": 2.657210401891253, "grad_norm": 0.22327813506126404, "learning_rate": 3.0417217711318203e-06, "loss": 0.3769, "step": 12926 }, { "epoch": 2.6574159728646314, "grad_norm": 0.2343619167804718, "learning_rate": 3.038112981847706e-06, "loss": 0.395, "step": 12927 }, { "epoch": 2.65762154383801, "grad_norm": 0.2278946340084076, "learning_rate": 3.034506259829635e-06, "loss": 0.3972, "step": 12928 }, { "epoch": 2.6578271148113886, "grad_norm": 0.23201905190944672, "learning_rate": 3.030901605255296e-06, "loss": 0.3898, "step": 12929 }, { "epoch": 2.658032685784767, "grad_norm": 0.2293037325143814, "learning_rate": 3.0272990183022606e-06, "loss": 0.3821, "step": 12930 }, { "epoch": 2.6582382567581457, "grad_norm": 0.12265797704458237, "learning_rate": 3.0236984991480323e-06, "loss": 0.447, "step": 12931 }, { "epoch": 2.6584438277315243, "grad_norm": 0.2273169308900833, "learning_rate": 3.0201000479699793e-06, "loss": 0.373, "step": 12932 }, { "epoch": 2.658649398704903, "grad_norm": 0.12377490103244781, "learning_rate": 3.01650366494539e-06, "loss": 0.458, "step": 12933 }, { "epoch": 2.6588549696782815, "grad_norm": 0.2221972793340683, "learning_rate": 3.012909350251427e-06, "loss": 0.3638, "step": 12934 }, { "epoch": 2.65906054065166, "grad_norm": 0.22611477971076965, "learning_rate": 3.0093171040651795e-06, "loss": 0.3919, "step": 12935 }, { "epoch": 2.6592661116250387, "grad_norm": 0.22873830795288086, "learning_rate": 3.005726926563606e-06, "loss": 0.3743, "step": 12936 }, { "epoch": 2.6594716825984173, "grad_norm": 0.24191910028457642, "learning_rate": 3.0021388179235887e-06, "loss": 0.3736, "step": 12937 }, { "epoch": 2.659677253571796, "grad_norm": 0.2301923930644989, "learning_rate": 2.9985527783218924e-06, "loss": 0.3863, "step": 12938 }, { "epoch": 2.659882824545174, "grad_norm": 0.24405382573604584, "learning_rate": 2.9949688079351906e-06, "loss": 0.3997, "step": 12939 }, { "epoch": 2.660088395518553, "grad_norm": 0.23321811854839325, "learning_rate": 2.991386906940047e-06, "loss": 0.3724, "step": 12940 }, { "epoch": 2.660293966491931, "grad_norm": 0.1216077208518982, "learning_rate": 2.98780707551292e-06, "loss": 0.438, "step": 12941 }, { "epoch": 2.6604995374653098, "grad_norm": 0.23895247280597687, "learning_rate": 2.984229313830179e-06, "loss": 0.3645, "step": 12942 }, { "epoch": 2.6607051084386883, "grad_norm": 0.2277345210313797, "learning_rate": 2.9806536220680733e-06, "loss": 0.3865, "step": 12943 }, { "epoch": 2.660910679412067, "grad_norm": 0.22947533428668976, "learning_rate": 2.977080000402761e-06, "loss": 0.3807, "step": 12944 }, { "epoch": 2.6611162503854455, "grad_norm": 0.22254477441310883, "learning_rate": 2.973508449010307e-06, "loss": 0.3799, "step": 12945 }, { "epoch": 2.661321821358824, "grad_norm": 0.23514899611473083, "learning_rate": 2.9699389680666607e-06, "loss": 0.3769, "step": 12946 }, { "epoch": 2.6615273923322027, "grad_norm": 0.2325250208377838, "learning_rate": 2.9663715577476757e-06, "loss": 0.3932, "step": 12947 }, { "epoch": 2.6617329633055813, "grad_norm": 0.23977595567703247, "learning_rate": 2.962806218229097e-06, "loss": 0.3916, "step": 12948 }, { "epoch": 2.66193853427896, "grad_norm": 0.23064671456813812, "learning_rate": 2.9592429496865793e-06, "loss": 0.3747, "step": 12949 }, { "epoch": 2.6621441052523385, "grad_norm": 0.22543418407440186, "learning_rate": 2.9556817522956613e-06, "loss": 0.3767, "step": 12950 }, { "epoch": 2.662349676225717, "grad_norm": 0.23423805832862854, "learning_rate": 2.9521226262317785e-06, "loss": 0.3838, "step": 12951 }, { "epoch": 2.6625552471990956, "grad_norm": 0.22551970183849335, "learning_rate": 2.9485655716702904e-06, "loss": 0.3817, "step": 12952 }, { "epoch": 2.6627608181724742, "grad_norm": 0.2365717738866806, "learning_rate": 2.9450105887864316e-06, "loss": 0.3874, "step": 12953 }, { "epoch": 2.6629663891458524, "grad_norm": 0.24153275787830353, "learning_rate": 2.941457677755337e-06, "loss": 0.3949, "step": 12954 }, { "epoch": 2.6631719601192314, "grad_norm": 0.23784461617469788, "learning_rate": 2.937906838752037e-06, "loss": 0.3925, "step": 12955 }, { "epoch": 2.6633775310926096, "grad_norm": 0.23372387886047363, "learning_rate": 2.934358071951471e-06, "loss": 0.4013, "step": 12956 }, { "epoch": 2.663583102065988, "grad_norm": 0.24772094190120697, "learning_rate": 2.930811377528465e-06, "loss": 0.3938, "step": 12957 }, { "epoch": 2.6637886730393667, "grad_norm": 0.24129636585712433, "learning_rate": 2.927266755657754e-06, "loss": 0.3854, "step": 12958 }, { "epoch": 2.6639942440127453, "grad_norm": 0.11964880675077438, "learning_rate": 2.9237242065139626e-06, "loss": 0.4409, "step": 12959 }, { "epoch": 2.664199814986124, "grad_norm": 0.23363502323627472, "learning_rate": 2.9201837302716118e-06, "loss": 0.3931, "step": 12960 }, { "epoch": 2.6644053859595025, "grad_norm": 0.23559674620628357, "learning_rate": 2.916645327105132e-06, "loss": 0.3897, "step": 12961 }, { "epoch": 2.664610956932881, "grad_norm": 0.2335934042930603, "learning_rate": 2.913108997188844e-06, "loss": 0.3799, "step": 12962 }, { "epoch": 2.6648165279062597, "grad_norm": 0.23663899302482605, "learning_rate": 2.9095747406969577e-06, "loss": 0.3606, "step": 12963 }, { "epoch": 2.6650220988796383, "grad_norm": 0.22651928663253784, "learning_rate": 2.9060425578035995e-06, "loss": 0.3795, "step": 12964 }, { "epoch": 2.665227669853017, "grad_norm": 0.22793136537075043, "learning_rate": 2.902512448682765e-06, "loss": 0.3749, "step": 12965 }, { "epoch": 2.6654332408263954, "grad_norm": 0.2406536191701889, "learning_rate": 2.898984413508385e-06, "loss": 0.3877, "step": 12966 }, { "epoch": 2.665638811799774, "grad_norm": 0.24164964258670807, "learning_rate": 2.8954584524542707e-06, "loss": 0.3982, "step": 12967 }, { "epoch": 2.6658443827731526, "grad_norm": 0.2386479675769806, "learning_rate": 2.891934565694118e-06, "loss": 0.3901, "step": 12968 }, { "epoch": 2.6660499537465308, "grad_norm": 0.231131449341774, "learning_rate": 2.8884127534015327e-06, "loss": 0.3654, "step": 12969 }, { "epoch": 2.66625552471991, "grad_norm": 0.12683962285518646, "learning_rate": 2.8848930157500264e-06, "loss": 0.4251, "step": 12970 }, { "epoch": 2.666461095693288, "grad_norm": 0.23223094642162323, "learning_rate": 2.8813753529129956e-06, "loss": 0.3818, "step": 12971 }, { "epoch": 2.6666666666666665, "grad_norm": 0.22929398715496063, "learning_rate": 2.8778597650637312e-06, "loss": 0.3858, "step": 12972 }, { "epoch": 2.666872237640045, "grad_norm": 0.22222407162189484, "learning_rate": 2.874346252375445e-06, "loss": 0.3972, "step": 12973 }, { "epoch": 2.6670778086134237, "grad_norm": 0.22839607298374176, "learning_rate": 2.8708348150212236e-06, "loss": 0.3687, "step": 12974 }, { "epoch": 2.6672833795868023, "grad_norm": 0.22204485535621643, "learning_rate": 2.867325453174063e-06, "loss": 0.3772, "step": 12975 }, { "epoch": 2.667488950560181, "grad_norm": 0.12451615929603577, "learning_rate": 2.8638181670068452e-06, "loss": 0.4541, "step": 12976 }, { "epoch": 2.6676945215335595, "grad_norm": 0.24160999059677124, "learning_rate": 2.8603129566923676e-06, "loss": 0.3808, "step": 12977 }, { "epoch": 2.667900092506938, "grad_norm": 0.23395465314388275, "learning_rate": 2.8568098224032963e-06, "loss": 0.4002, "step": 12978 }, { "epoch": 2.6681056634803166, "grad_norm": 0.22567373514175415, "learning_rate": 2.8533087643122387e-06, "loss": 0.3679, "step": 12979 }, { "epoch": 2.6683112344536952, "grad_norm": 0.2304529845714569, "learning_rate": 2.8498097825916664e-06, "loss": 0.3783, "step": 12980 }, { "epoch": 2.668516805427074, "grad_norm": 0.22995389997959137, "learning_rate": 2.846312877413947e-06, "loss": 0.3743, "step": 12981 }, { "epoch": 2.6687223764004524, "grad_norm": 0.2304750382900238, "learning_rate": 2.842818048951377e-06, "loss": 0.3882, "step": 12982 }, { "epoch": 2.668927947373831, "grad_norm": 0.23300042748451233, "learning_rate": 2.8393252973761146e-06, "loss": 0.3901, "step": 12983 }, { "epoch": 2.669133518347209, "grad_norm": 0.231519877910614, "learning_rate": 2.8358346228602416e-06, "loss": 0.3797, "step": 12984 }, { "epoch": 2.669339089320588, "grad_norm": 0.22919991612434387, "learning_rate": 2.8323460255757206e-06, "loss": 0.3678, "step": 12985 }, { "epoch": 2.6695446602939663, "grad_norm": 0.2351444512605667, "learning_rate": 2.828859505694409e-06, "loss": 0.3931, "step": 12986 }, { "epoch": 2.669750231267345, "grad_norm": 0.12017477303743362, "learning_rate": 2.8253750633880943e-06, "loss": 0.4364, "step": 12987 }, { "epoch": 2.6699558022407235, "grad_norm": 0.22112242877483368, "learning_rate": 2.8218926988284245e-06, "loss": 0.3572, "step": 12988 }, { "epoch": 2.670161373214102, "grad_norm": 0.23203538358211517, "learning_rate": 2.8184124121869572e-06, "loss": 0.3769, "step": 12989 }, { "epoch": 2.6703669441874807, "grad_norm": 0.24650564789772034, "learning_rate": 2.81493420363516e-06, "loss": 0.3939, "step": 12990 }, { "epoch": 2.6705725151608593, "grad_norm": 0.22376540303230286, "learning_rate": 2.8114580733443815e-06, "loss": 0.3736, "step": 12991 }, { "epoch": 2.670778086134238, "grad_norm": 0.23529557883739471, "learning_rate": 2.8079840214858738e-06, "loss": 0.4071, "step": 12992 }, { "epoch": 2.6709836571076164, "grad_norm": 0.22694729268550873, "learning_rate": 2.804512048230781e-06, "loss": 0.366, "step": 12993 }, { "epoch": 2.671189228080995, "grad_norm": 0.11928309500217438, "learning_rate": 2.8010421537501653e-06, "loss": 0.4387, "step": 12994 }, { "epoch": 2.6713947990543736, "grad_norm": 0.22787900269031525, "learning_rate": 2.7975743382149655e-06, "loss": 0.3641, "step": 12995 }, { "epoch": 2.671600370027752, "grad_norm": 0.23789376020431519, "learning_rate": 2.79410860179602e-06, "loss": 0.3965, "step": 12996 }, { "epoch": 2.671805941001131, "grad_norm": 0.23560819029808044, "learning_rate": 2.790644944664082e-06, "loss": 0.3903, "step": 12997 }, { "epoch": 2.6720115119745094, "grad_norm": 0.1282123327255249, "learning_rate": 2.787183366989775e-06, "loss": 0.4493, "step": 12998 }, { "epoch": 2.6722170829478875, "grad_norm": 0.2356158196926117, "learning_rate": 2.783723868943638e-06, "loss": 0.3806, "step": 12999 }, { "epoch": 2.6724226539212665, "grad_norm": 0.22649535536766052, "learning_rate": 2.780266450696114e-06, "loss": 0.3694, "step": 13000 }, { "epoch": 2.6726282248946447, "grad_norm": 0.22387070953845978, "learning_rate": 2.7768111124175274e-06, "loss": 0.3648, "step": 13001 }, { "epoch": 2.6728337958680233, "grad_norm": 0.12043121457099915, "learning_rate": 2.7733578542780964e-06, "loss": 0.4574, "step": 13002 }, { "epoch": 2.673039366841402, "grad_norm": 0.22910076379776, "learning_rate": 2.7699066764479703e-06, "loss": 0.3764, "step": 13003 }, { "epoch": 2.6732449378147805, "grad_norm": 0.2299896627664566, "learning_rate": 2.766457579097153e-06, "loss": 0.3939, "step": 13004 }, { "epoch": 2.673450508788159, "grad_norm": 0.24078021943569183, "learning_rate": 2.763010562395579e-06, "loss": 0.385, "step": 13005 }, { "epoch": 2.6736560797615376, "grad_norm": 0.11954071372747421, "learning_rate": 2.7595656265130464e-06, "loss": 0.4594, "step": 13006 }, { "epoch": 2.673861650734916, "grad_norm": 0.23649781942367554, "learning_rate": 2.7561227716192906e-06, "loss": 0.3816, "step": 13007 }, { "epoch": 2.674067221708295, "grad_norm": 0.21963661909103394, "learning_rate": 2.75268199788392e-06, "loss": 0.3746, "step": 13008 }, { "epoch": 2.6742727926816734, "grad_norm": 0.2391149252653122, "learning_rate": 2.749243305476445e-06, "loss": 0.3899, "step": 13009 }, { "epoch": 2.674478363655052, "grad_norm": 0.22582948207855225, "learning_rate": 2.745806694566274e-06, "loss": 0.3797, "step": 13010 }, { "epoch": 2.6746839346284306, "grad_norm": 0.23774947226047516, "learning_rate": 2.7423721653227076e-06, "loss": 0.3978, "step": 13011 }, { "epoch": 2.674889505601809, "grad_norm": 0.2316160500049591, "learning_rate": 2.7389397179149596e-06, "loss": 0.3722, "step": 13012 }, { "epoch": 2.6750950765751877, "grad_norm": 0.22677737474441528, "learning_rate": 2.73550935251211e-06, "loss": 0.3865, "step": 13013 }, { "epoch": 2.675300647548566, "grad_norm": 0.2274550050497055, "learning_rate": 2.732081069283179e-06, "loss": 0.3732, "step": 13014 }, { "epoch": 2.675506218521945, "grad_norm": 0.23151232302188873, "learning_rate": 2.728654868397056e-06, "loss": 0.3861, "step": 13015 }, { "epoch": 2.675711789495323, "grad_norm": 0.12545832991600037, "learning_rate": 2.725230750022531e-06, "loss": 0.4512, "step": 13016 }, { "epoch": 2.6759173604687017, "grad_norm": 0.24073415994644165, "learning_rate": 2.7218087143282994e-06, "loss": 0.3836, "step": 13017 }, { "epoch": 2.6761229314420802, "grad_norm": 0.23176778852939606, "learning_rate": 2.7183887614829412e-06, "loss": 0.4068, "step": 13018 }, { "epoch": 2.676328502415459, "grad_norm": 0.23318178951740265, "learning_rate": 2.7149708916549418e-06, "loss": 0.3968, "step": 13019 }, { "epoch": 2.6765340733888374, "grad_norm": 0.24132607877254486, "learning_rate": 2.711555105012681e-06, "loss": 0.3644, "step": 13020 }, { "epoch": 2.676739644362216, "grad_norm": 0.22596125304698944, "learning_rate": 2.7081414017244543e-06, "loss": 0.3598, "step": 13021 }, { "epoch": 2.6769452153355946, "grad_norm": 0.2259039431810379, "learning_rate": 2.7047297819584276e-06, "loss": 0.3662, "step": 13022 }, { "epoch": 2.677150786308973, "grad_norm": 0.23511864244937897, "learning_rate": 2.7013202458826765e-06, "loss": 0.4058, "step": 13023 }, { "epoch": 2.6773563572823518, "grad_norm": 0.24032087624073029, "learning_rate": 2.697912793665171e-06, "loss": 0.3719, "step": 13024 }, { "epoch": 2.6775619282557304, "grad_norm": 0.23492936789989471, "learning_rate": 2.6945074254737823e-06, "loss": 0.3734, "step": 13025 }, { "epoch": 2.677767499229109, "grad_norm": 0.23162946105003357, "learning_rate": 2.691104141476281e-06, "loss": 0.3805, "step": 13026 }, { "epoch": 2.6779730702024875, "grad_norm": 0.24035188555717468, "learning_rate": 2.6877029418403233e-06, "loss": 0.3693, "step": 13027 }, { "epoch": 2.678178641175866, "grad_norm": 0.23720598220825195, "learning_rate": 2.6843038267334797e-06, "loss": 0.4006, "step": 13028 }, { "epoch": 2.6783842121492443, "grad_norm": 0.23743665218353271, "learning_rate": 2.6809067963232016e-06, "loss": 0.4038, "step": 13029 }, { "epoch": 2.6785897831226233, "grad_norm": 0.240424245595932, "learning_rate": 2.677511850776845e-06, "loss": 0.3842, "step": 13030 }, { "epoch": 2.6787953540960014, "grad_norm": 0.1235266923904419, "learning_rate": 2.674118990261666e-06, "loss": 0.4391, "step": 13031 }, { "epoch": 2.6790009250693805, "grad_norm": 0.23002861440181732, "learning_rate": 2.670728214944816e-06, "loss": 0.384, "step": 13032 }, { "epoch": 2.6792064960427586, "grad_norm": 0.22837281227111816, "learning_rate": 2.6673395249933415e-06, "loss": 0.38, "step": 13033 }, { "epoch": 2.679412067016137, "grad_norm": 0.24020573496818542, "learning_rate": 2.6639529205741737e-06, "loss": 0.3887, "step": 13034 }, { "epoch": 2.679617637989516, "grad_norm": 0.24188318848609924, "learning_rate": 2.6605684018541794e-06, "loss": 0.3972, "step": 13035 }, { "epoch": 2.6798232089628944, "grad_norm": 0.12417499721050262, "learning_rate": 2.657185969000085e-06, "loss": 0.4522, "step": 13036 }, { "epoch": 2.680028779936273, "grad_norm": 0.21679937839508057, "learning_rate": 2.653805622178527e-06, "loss": 0.3873, "step": 13037 }, { "epoch": 2.6802343509096516, "grad_norm": 0.22777822613716125, "learning_rate": 2.6504273615560383e-06, "loss": 0.3618, "step": 13038 }, { "epoch": 2.68043992188303, "grad_norm": 0.12555932998657227, "learning_rate": 2.6470511872990544e-06, "loss": 0.4445, "step": 13039 }, { "epoch": 2.6806454928564087, "grad_norm": 0.23415377736091614, "learning_rate": 2.643677099573903e-06, "loss": 0.3909, "step": 13040 }, { "epoch": 2.6808510638297873, "grad_norm": 0.22409114241600037, "learning_rate": 2.640305098546801e-06, "loss": 0.362, "step": 13041 }, { "epoch": 2.681056634803166, "grad_norm": 0.23534564673900604, "learning_rate": 2.6369351843838803e-06, "loss": 0.3977, "step": 13042 }, { "epoch": 2.6812622057765445, "grad_norm": 0.23140472173690796, "learning_rate": 2.633567357251163e-06, "loss": 0.3775, "step": 13043 }, { "epoch": 2.6814677767499226, "grad_norm": 0.23929573595523834, "learning_rate": 2.630201617314557e-06, "loss": 0.3746, "step": 13044 }, { "epoch": 2.6816733477233017, "grad_norm": 0.23926587402820587, "learning_rate": 2.6268379647398795e-06, "loss": 0.3772, "step": 13045 }, { "epoch": 2.68187891869668, "grad_norm": 0.23361510038375854, "learning_rate": 2.6234763996928526e-06, "loss": 0.3855, "step": 13046 }, { "epoch": 2.682084489670059, "grad_norm": 0.23641300201416016, "learning_rate": 2.620116922339069e-06, "loss": 0.3859, "step": 13047 }, { "epoch": 2.682290060643437, "grad_norm": 0.22642360627651215, "learning_rate": 2.616759532844041e-06, "loss": 0.375, "step": 13048 }, { "epoch": 2.6824956316168156, "grad_norm": 0.22510544955730438, "learning_rate": 2.6134042313731765e-06, "loss": 0.3614, "step": 13049 }, { "epoch": 2.682701202590194, "grad_norm": 0.23352572321891785, "learning_rate": 2.6100510180917686e-06, "loss": 0.3866, "step": 13050 }, { "epoch": 2.6829067735635728, "grad_norm": 0.2314728945493698, "learning_rate": 2.60669989316502e-06, "loss": 0.3931, "step": 13051 }, { "epoch": 2.6831123445369514, "grad_norm": 0.23167473077774048, "learning_rate": 2.603350856758018e-06, "loss": 0.3845, "step": 13052 }, { "epoch": 2.68331791551033, "grad_norm": 0.23171542584896088, "learning_rate": 2.600003909035762e-06, "loss": 0.3828, "step": 13053 }, { "epoch": 2.6835234864837085, "grad_norm": 0.12145873159170151, "learning_rate": 2.596659050163139e-06, "loss": 0.4463, "step": 13054 }, { "epoch": 2.683729057457087, "grad_norm": 0.22926872968673706, "learning_rate": 2.593316280304917e-06, "loss": 0.3856, "step": 13055 }, { "epoch": 2.6839346284304657, "grad_norm": 0.2303893268108368, "learning_rate": 2.589975599625805e-06, "loss": 0.3838, "step": 13056 }, { "epoch": 2.6841401994038443, "grad_norm": 0.2381599098443985, "learning_rate": 2.5866370082903713e-06, "loss": 0.3799, "step": 13057 }, { "epoch": 2.684345770377223, "grad_norm": 0.23543013632297516, "learning_rate": 2.583300506463094e-06, "loss": 0.385, "step": 13058 }, { "epoch": 2.684551341350601, "grad_norm": 0.2375613898038864, "learning_rate": 2.5799660943083415e-06, "loss": 0.388, "step": 13059 }, { "epoch": 2.68475691232398, "grad_norm": 0.22905340790748596, "learning_rate": 2.5766337719903927e-06, "loss": 0.3664, "step": 13060 }, { "epoch": 2.684962483297358, "grad_norm": 0.23459582030773163, "learning_rate": 2.5733035396734113e-06, "loss": 0.3786, "step": 13061 }, { "epoch": 2.6851680542707372, "grad_norm": 0.23848964273929596, "learning_rate": 2.569975397521451e-06, "loss": 0.374, "step": 13062 }, { "epoch": 2.6853736252441154, "grad_norm": 0.22707267105579376, "learning_rate": 2.5666493456985e-06, "loss": 0.3724, "step": 13063 }, { "epoch": 2.685579196217494, "grad_norm": 0.22259126603603363, "learning_rate": 2.5633253843683986e-06, "loss": 0.3879, "step": 13064 }, { "epoch": 2.6857847671908726, "grad_norm": 0.23496946692466736, "learning_rate": 2.5600035136949045e-06, "loss": 0.3857, "step": 13065 }, { "epoch": 2.685990338164251, "grad_norm": 0.23178550601005554, "learning_rate": 2.5566837338416676e-06, "loss": 0.3643, "step": 13066 }, { "epoch": 2.6861959091376297, "grad_norm": 0.22792139649391174, "learning_rate": 2.553366044972252e-06, "loss": 0.3983, "step": 13067 }, { "epoch": 2.6864014801110083, "grad_norm": 0.12821319699287415, "learning_rate": 2.5500504472500965e-06, "loss": 0.4591, "step": 13068 }, { "epoch": 2.686607051084387, "grad_norm": 0.22924353182315826, "learning_rate": 2.5467369408385405e-06, "loss": 0.3922, "step": 13069 }, { "epoch": 2.6868126220577655, "grad_norm": 0.2222532331943512, "learning_rate": 2.5434255259008338e-06, "loss": 0.3853, "step": 13070 }, { "epoch": 2.687018193031144, "grad_norm": 0.2258753478527069, "learning_rate": 2.5401162026001056e-06, "loss": 0.3812, "step": 13071 }, { "epoch": 2.6872237640045227, "grad_norm": 0.12898650765419006, "learning_rate": 2.536808971099401e-06, "loss": 0.459, "step": 13072 }, { "epoch": 2.6874293349779013, "grad_norm": 0.23579534888267517, "learning_rate": 2.533503831561644e-06, "loss": 0.3781, "step": 13073 }, { "epoch": 2.6876349059512794, "grad_norm": 0.23496629297733307, "learning_rate": 2.5302007841496646e-06, "loss": 0.3986, "step": 13074 }, { "epoch": 2.6878404769246584, "grad_norm": 0.23853163421154022, "learning_rate": 2.5268998290261877e-06, "loss": 0.3919, "step": 13075 }, { "epoch": 2.6880460478980366, "grad_norm": 0.23851320147514343, "learning_rate": 2.523600966353833e-06, "loss": 0.3835, "step": 13076 }, { "epoch": 2.6882516188714156, "grad_norm": 0.12485864758491516, "learning_rate": 2.5203041962951306e-06, "loss": 0.447, "step": 13077 }, { "epoch": 2.6884571898447938, "grad_norm": 0.22811704874038696, "learning_rate": 2.517009519012496e-06, "loss": 0.3961, "step": 13078 }, { "epoch": 2.6886627608181723, "grad_norm": 0.2245602309703827, "learning_rate": 2.513716934668229e-06, "loss": 0.3827, "step": 13079 }, { "epoch": 2.688868331791551, "grad_norm": 0.23157405853271484, "learning_rate": 2.5104264434245545e-06, "loss": 0.3791, "step": 13080 }, { "epoch": 2.6890739027649295, "grad_norm": 0.2352142632007599, "learning_rate": 2.5071380454435682e-06, "loss": 0.3827, "step": 13081 }, { "epoch": 2.689279473738308, "grad_norm": 0.24821443855762482, "learning_rate": 2.503851740887276e-06, "loss": 0.3834, "step": 13082 }, { "epoch": 2.6894850447116867, "grad_norm": 0.2236967235803604, "learning_rate": 2.5005675299175875e-06, "loss": 0.3846, "step": 13083 }, { "epoch": 2.6896906156850653, "grad_norm": 0.2317054569721222, "learning_rate": 2.4972854126962986e-06, "loss": 0.3587, "step": 13084 }, { "epoch": 2.689896186658444, "grad_norm": 0.2305641770362854, "learning_rate": 2.494005389385095e-06, "loss": 0.3853, "step": 13085 }, { "epoch": 2.6901017576318225, "grad_norm": 0.22506798803806305, "learning_rate": 2.4907274601455726e-06, "loss": 0.371, "step": 13086 }, { "epoch": 2.690307328605201, "grad_norm": 0.23190316557884216, "learning_rate": 2.487451625139217e-06, "loss": 0.3807, "step": 13087 }, { "epoch": 2.6905128995785796, "grad_norm": 0.23732031881809235, "learning_rate": 2.4841778845274242e-06, "loss": 0.3917, "step": 13088 }, { "epoch": 2.690718470551958, "grad_norm": 0.23446981608867645, "learning_rate": 2.4809062384714706e-06, "loss": 0.3926, "step": 13089 }, { "epoch": 2.690924041525337, "grad_norm": 0.2362259030342102, "learning_rate": 2.4776366871325213e-06, "loss": 0.3592, "step": 13090 }, { "epoch": 2.691129612498715, "grad_norm": 0.12213429063558578, "learning_rate": 2.4743692306716734e-06, "loss": 0.4355, "step": 13091 }, { "epoch": 2.691335183472094, "grad_norm": 0.23348113894462585, "learning_rate": 2.4711038692498873e-06, "loss": 0.3789, "step": 13092 }, { "epoch": 2.691540754445472, "grad_norm": 0.23292915523052216, "learning_rate": 2.46784060302803e-06, "loss": 0.3832, "step": 13093 }, { "epoch": 2.6917463254188507, "grad_norm": 0.12205676734447479, "learning_rate": 2.4645794321668774e-06, "loss": 0.4589, "step": 13094 }, { "epoch": 2.6919518963922293, "grad_norm": 0.24196146428585052, "learning_rate": 2.4613203568270864e-06, "loss": 0.3818, "step": 13095 }, { "epoch": 2.692157467365608, "grad_norm": 0.23547834157943726, "learning_rate": 2.4580633771692036e-06, "loss": 0.3813, "step": 13096 }, { "epoch": 2.6923630383389865, "grad_norm": 0.23056018352508545, "learning_rate": 2.4548084933537104e-06, "loss": 0.3645, "step": 13097 }, { "epoch": 2.692568609312365, "grad_norm": 0.23042161762714386, "learning_rate": 2.4515557055409433e-06, "loss": 0.3756, "step": 13098 }, { "epoch": 2.6927741802857437, "grad_norm": 0.22752924263477325, "learning_rate": 2.4483050138911598e-06, "loss": 0.3805, "step": 13099 }, { "epoch": 2.6929797512591223, "grad_norm": 0.12008198350667953, "learning_rate": 2.445056418564496e-06, "loss": 0.442, "step": 13100 }, { "epoch": 2.693185322232501, "grad_norm": 0.23437613248825073, "learning_rate": 2.4418099197210043e-06, "loss": 0.3716, "step": 13101 }, { "epoch": 2.6933908932058794, "grad_norm": 0.2585579752922058, "learning_rate": 2.438565517520622e-06, "loss": 0.3716, "step": 13102 }, { "epoch": 2.693596464179258, "grad_norm": 0.23899348080158234, "learning_rate": 2.4353232121231807e-06, "loss": 0.3918, "step": 13103 }, { "epoch": 2.6938020351526366, "grad_norm": 0.23468652367591858, "learning_rate": 2.432083003688423e-06, "loss": 0.4057, "step": 13104 }, { "epoch": 2.694007606126015, "grad_norm": 0.2242051213979721, "learning_rate": 2.428844892375971e-06, "loss": 0.3746, "step": 13105 }, { "epoch": 2.6942131770993933, "grad_norm": 0.22958362102508545, "learning_rate": 2.4256088783453573e-06, "loss": 0.3729, "step": 13106 }, { "epoch": 2.6944187480727724, "grad_norm": 0.2308982014656067, "learning_rate": 2.4223749617559994e-06, "loss": 0.3924, "step": 13107 }, { "epoch": 2.6946243190461505, "grad_norm": 0.22810958325862885, "learning_rate": 2.4191431427672194e-06, "loss": 0.378, "step": 13108 }, { "epoch": 2.694829890019529, "grad_norm": 0.2260715216398239, "learning_rate": 2.4159134215382305e-06, "loss": 0.3683, "step": 13109 }, { "epoch": 2.6950354609929077, "grad_norm": 0.22648762166500092, "learning_rate": 2.4126857982281553e-06, "loss": 0.3933, "step": 13110 }, { "epoch": 2.6952410319662863, "grad_norm": 0.2309451550245285, "learning_rate": 2.4094602729959916e-06, "loss": 0.3877, "step": 13111 }, { "epoch": 2.695446602939665, "grad_norm": 0.23046442866325378, "learning_rate": 2.406236846000657e-06, "loss": 0.3708, "step": 13112 }, { "epoch": 2.6956521739130435, "grad_norm": 0.23607337474822998, "learning_rate": 2.4030155174009545e-06, "loss": 0.3971, "step": 13113 }, { "epoch": 2.695857744886422, "grad_norm": 0.12376264482736588, "learning_rate": 2.3997962873555773e-06, "loss": 0.4165, "step": 13114 }, { "epoch": 2.6960633158598006, "grad_norm": 0.22271588444709778, "learning_rate": 2.396579156023124e-06, "loss": 0.3727, "step": 13115 }, { "epoch": 2.6962688868331792, "grad_norm": 0.22929471731185913, "learning_rate": 2.393364123562087e-06, "loss": 0.3777, "step": 13116 }, { "epoch": 2.696474457806558, "grad_norm": 0.12126558274030685, "learning_rate": 2.39015119013085e-06, "loss": 0.4323, "step": 13117 }, { "epoch": 2.6966800287799364, "grad_norm": 0.22162644565105438, "learning_rate": 2.3869403558877163e-06, "loss": 0.3741, "step": 13118 }, { "epoch": 2.696885599753315, "grad_norm": 0.12397125363349915, "learning_rate": 2.3837316209908546e-06, "loss": 0.4487, "step": 13119 }, { "epoch": 2.6970911707266936, "grad_norm": 0.11935931444168091, "learning_rate": 2.380524985598348e-06, "loss": 0.4464, "step": 13120 }, { "epoch": 2.6972967417000717, "grad_norm": 0.23302531242370605, "learning_rate": 2.3773204498681758e-06, "loss": 0.3829, "step": 13121 }, { "epoch": 2.6975023126734508, "grad_norm": 0.12195220589637756, "learning_rate": 2.374118013958206e-06, "loss": 0.4471, "step": 13122 }, { "epoch": 2.697707883646829, "grad_norm": 0.23271100223064423, "learning_rate": 2.3709176780262076e-06, "loss": 0.3808, "step": 13123 }, { "epoch": 2.6979134546202075, "grad_norm": 0.22822687029838562, "learning_rate": 2.36771944222984e-06, "loss": 0.3695, "step": 13124 }, { "epoch": 2.698119025593586, "grad_norm": 0.23481127619743347, "learning_rate": 2.3645233067266815e-06, "loss": 0.3999, "step": 13125 }, { "epoch": 2.6983245965669647, "grad_norm": 0.225934699177742, "learning_rate": 2.3613292716741816e-06, "loss": 0.3737, "step": 13126 }, { "epoch": 2.6985301675403432, "grad_norm": 0.23984263837337494, "learning_rate": 2.358137337229694e-06, "loss": 0.3819, "step": 13127 }, { "epoch": 2.698735738513722, "grad_norm": 0.2241523116827011, "learning_rate": 2.3549475035504733e-06, "loss": 0.3683, "step": 13128 }, { "epoch": 2.6989413094871004, "grad_norm": 0.2185996025800705, "learning_rate": 2.3517597707936636e-06, "loss": 0.3635, "step": 13129 }, { "epoch": 2.699146880460479, "grad_norm": 0.23561379313468933, "learning_rate": 2.3485741391163092e-06, "loss": 0.3819, "step": 13130 }, { "epoch": 2.6993524514338576, "grad_norm": 0.12253455072641373, "learning_rate": 2.3453906086753646e-06, "loss": 0.4446, "step": 13131 }, { "epoch": 2.699558022407236, "grad_norm": 0.22365736961364746, "learning_rate": 2.34220917962765e-06, "loss": 0.382, "step": 13132 }, { "epoch": 2.699763593380615, "grad_norm": 0.12028893828392029, "learning_rate": 2.339029852129909e-06, "loss": 0.4583, "step": 13133 }, { "epoch": 2.6999691643539934, "grad_norm": 0.12149006873369217, "learning_rate": 2.3358526263387715e-06, "loss": 0.4422, "step": 13134 }, { "epoch": 2.700174735327372, "grad_norm": 0.23423054814338684, "learning_rate": 2.3326775024107627e-06, "loss": 0.3989, "step": 13135 }, { "epoch": 2.70038030630075, "grad_norm": 0.2282380312681198, "learning_rate": 2.3295044805023075e-06, "loss": 0.3798, "step": 13136 }, { "epoch": 2.700585877274129, "grad_norm": 0.22905749082565308, "learning_rate": 2.3263335607697258e-06, "loss": 0.3987, "step": 13137 }, { "epoch": 2.7007914482475073, "grad_norm": 0.223682701587677, "learning_rate": 2.3231647433692273e-06, "loss": 0.3574, "step": 13138 }, { "epoch": 2.700997019220886, "grad_norm": 0.12208550423383713, "learning_rate": 2.3199980284569373e-06, "loss": 0.4494, "step": 13139 }, { "epoch": 2.7012025901942645, "grad_norm": 0.2424362748861313, "learning_rate": 2.316833416188861e-06, "loss": 0.3936, "step": 13140 }, { "epoch": 2.701408161167643, "grad_norm": 0.12195998430252075, "learning_rate": 2.313670906720899e-06, "loss": 0.4549, "step": 13141 }, { "epoch": 2.7016137321410216, "grad_norm": 0.23335258662700653, "learning_rate": 2.310510500208856e-06, "loss": 0.398, "step": 13142 }, { "epoch": 2.7018193031144, "grad_norm": 0.239247128367424, "learning_rate": 2.3073521968084285e-06, "loss": 0.3936, "step": 13143 }, { "epoch": 2.702024874087779, "grad_norm": 0.23259401321411133, "learning_rate": 2.304195996675216e-06, "loss": 0.395, "step": 13144 }, { "epoch": 2.7022304450611574, "grad_norm": 0.2279106080532074, "learning_rate": 2.3010418999646995e-06, "loss": 0.3653, "step": 13145 }, { "epoch": 2.702436016034536, "grad_norm": 0.23209701478481293, "learning_rate": 2.2978899068322845e-06, "loss": 0.3991, "step": 13146 }, { "epoch": 2.7026415870079146, "grad_norm": 0.23221181333065033, "learning_rate": 2.294740017433242e-06, "loss": 0.3696, "step": 13147 }, { "epoch": 2.702847157981293, "grad_norm": 0.23377148807048798, "learning_rate": 2.2915922319227536e-06, "loss": 0.3691, "step": 13148 }, { "epoch": 2.7030527289546717, "grad_norm": 0.23326507210731506, "learning_rate": 2.288446550455899e-06, "loss": 0.371, "step": 13149 }, { "epoch": 2.7032582999280503, "grad_norm": 0.11786891520023346, "learning_rate": 2.2853029731876445e-06, "loss": 0.4332, "step": 13150 }, { "epoch": 2.7034638709014285, "grad_norm": 0.23896630108356476, "learning_rate": 2.282161500272867e-06, "loss": 0.3665, "step": 13151 }, { "epoch": 2.7036694418748075, "grad_norm": 0.12194350361824036, "learning_rate": 2.2790221318663267e-06, "loss": 0.4324, "step": 13152 }, { "epoch": 2.7038750128481857, "grad_norm": 0.23250941932201385, "learning_rate": 2.275884868122696e-06, "loss": 0.3881, "step": 13153 }, { "epoch": 2.7040805838215642, "grad_norm": 0.232101172208786, "learning_rate": 2.272749709196515e-06, "loss": 0.3908, "step": 13154 }, { "epoch": 2.704286154794943, "grad_norm": 0.22767187654972076, "learning_rate": 2.269616655242261e-06, "loss": 0.3864, "step": 13155 }, { "epoch": 2.7044917257683214, "grad_norm": 0.232827827334404, "learning_rate": 2.2664857064142654e-06, "loss": 0.3791, "step": 13156 }, { "epoch": 2.7046972967417, "grad_norm": 0.12185569107532501, "learning_rate": 2.2633568628667894e-06, "loss": 0.4662, "step": 13157 }, { "epoch": 2.7049028677150786, "grad_norm": 0.23200562596321106, "learning_rate": 2.2602301247539605e-06, "loss": 0.3772, "step": 13158 }, { "epoch": 2.705108438688457, "grad_norm": 0.24151834845542908, "learning_rate": 2.2571054922298347e-06, "loss": 0.372, "step": 13159 }, { "epoch": 2.7053140096618358, "grad_norm": 0.23124399781227112, "learning_rate": 2.253982965448344e-06, "loss": 0.4018, "step": 13160 }, { "epoch": 2.7055195806352144, "grad_norm": 0.11762725561857224, "learning_rate": 2.250862544563316e-06, "loss": 0.4491, "step": 13161 }, { "epoch": 2.705725151608593, "grad_norm": 0.11691106110811234, "learning_rate": 2.2477442297284817e-06, "loss": 0.4284, "step": 13162 }, { "epoch": 2.7059307225819715, "grad_norm": 0.22589966654777527, "learning_rate": 2.244628021097469e-06, "loss": 0.3723, "step": 13163 }, { "epoch": 2.70613629355535, "grad_norm": 0.23272038996219635, "learning_rate": 2.24151391882379e-06, "loss": 0.3985, "step": 13164 }, { "epoch": 2.7063418645287287, "grad_norm": 0.22128084301948547, "learning_rate": 2.2384019230608664e-06, "loss": 0.3743, "step": 13165 }, { "epoch": 2.706547435502107, "grad_norm": 0.11852707713842392, "learning_rate": 2.2352920339620166e-06, "loss": 0.4401, "step": 13166 }, { "epoch": 2.706753006475486, "grad_norm": 0.23066598176956177, "learning_rate": 2.232184251680447e-06, "loss": 0.3604, "step": 13167 }, { "epoch": 2.706958577448864, "grad_norm": 0.11477980017662048, "learning_rate": 2.229078576369261e-06, "loss": 0.4282, "step": 13168 }, { "epoch": 2.7071641484222426, "grad_norm": 0.2299182116985321, "learning_rate": 2.2259750081814653e-06, "loss": 0.3933, "step": 13169 }, { "epoch": 2.707369719395621, "grad_norm": 0.22713468968868256, "learning_rate": 2.222873547269953e-06, "loss": 0.4137, "step": 13170 }, { "epoch": 2.707575290369, "grad_norm": 0.23036979138851166, "learning_rate": 2.2197741937875274e-06, "loss": 0.3846, "step": 13171 }, { "epoch": 2.7077808613423784, "grad_norm": 0.2358933985233307, "learning_rate": 2.2166769478868607e-06, "loss": 0.3745, "step": 13172 }, { "epoch": 2.707986432315757, "grad_norm": 0.25210806727409363, "learning_rate": 2.2135818097205606e-06, "loss": 0.4011, "step": 13173 }, { "epoch": 2.7081920032891356, "grad_norm": 0.23163393139839172, "learning_rate": 2.210488779441101e-06, "loss": 0.3744, "step": 13174 }, { "epoch": 2.708397574262514, "grad_norm": 0.24861502647399902, "learning_rate": 2.207397857200855e-06, "loss": 0.3905, "step": 13175 }, { "epoch": 2.7086031452358927, "grad_norm": 0.23797625303268433, "learning_rate": 2.20430904315211e-06, "loss": 0.3745, "step": 13176 }, { "epoch": 2.7088087162092713, "grad_norm": 0.23504245281219482, "learning_rate": 2.201222337447034e-06, "loss": 0.3763, "step": 13177 }, { "epoch": 2.70901428718265, "grad_norm": 0.23205745220184326, "learning_rate": 2.1981377402376917e-06, "loss": 0.3683, "step": 13178 }, { "epoch": 2.7092198581560285, "grad_norm": 0.12594787776470184, "learning_rate": 2.195055251676041e-06, "loss": 0.4583, "step": 13179 }, { "epoch": 2.709425429129407, "grad_norm": 0.22511595487594604, "learning_rate": 2.191974871913955e-06, "loss": 0.3569, "step": 13180 }, { "epoch": 2.7096310001027852, "grad_norm": 0.2308862954378128, "learning_rate": 2.1888966011031823e-06, "loss": 0.3656, "step": 13181 }, { "epoch": 2.7098365710761643, "grad_norm": 0.2143402099609375, "learning_rate": 2.1858204393953726e-06, "loss": 0.3644, "step": 13182 }, { "epoch": 2.7100421420495424, "grad_norm": 0.24129731953144073, "learning_rate": 2.1827463869420834e-06, "loss": 0.3925, "step": 13183 }, { "epoch": 2.710247713022921, "grad_norm": 0.23186422884464264, "learning_rate": 2.179674443894749e-06, "loss": 0.3777, "step": 13184 }, { "epoch": 2.7104532839962996, "grad_norm": 0.22773997485637665, "learning_rate": 2.176604610404709e-06, "loss": 0.3691, "step": 13185 }, { "epoch": 2.710658854969678, "grad_norm": 0.22896374762058258, "learning_rate": 2.1735368866232013e-06, "loss": 0.3764, "step": 13186 }, { "epoch": 2.7108644259430568, "grad_norm": 0.23243440687656403, "learning_rate": 2.170471272701371e-06, "loss": 0.367, "step": 13187 }, { "epoch": 2.7110699969164354, "grad_norm": 0.2411787211894989, "learning_rate": 2.1674077687902318e-06, "loss": 0.3867, "step": 13188 }, { "epoch": 2.711275567889814, "grad_norm": 0.21772977709770203, "learning_rate": 2.164346375040713e-06, "loss": 0.372, "step": 13189 }, { "epoch": 2.7114811388631925, "grad_norm": 0.2415088415145874, "learning_rate": 2.1612870916036336e-06, "loss": 0.3886, "step": 13190 }, { "epoch": 2.711686709836571, "grad_norm": 0.22934192419052124, "learning_rate": 2.1582299186297138e-06, "loss": 0.384, "step": 13191 }, { "epoch": 2.7118922808099497, "grad_norm": 0.23373478651046753, "learning_rate": 2.1551748562695627e-06, "loss": 0.3916, "step": 13192 }, { "epoch": 2.7120978517833283, "grad_norm": 0.23007836937904358, "learning_rate": 2.152121904673685e-06, "loss": 0.3613, "step": 13193 }, { "epoch": 2.712303422756707, "grad_norm": 0.12053580582141876, "learning_rate": 2.1490710639925003e-06, "loss": 0.4356, "step": 13194 }, { "epoch": 2.7125089937300855, "grad_norm": 0.23001576960086823, "learning_rate": 2.1460223343762937e-06, "loss": 0.3559, "step": 13195 }, { "epoch": 2.7127145647034636, "grad_norm": 0.2180272787809372, "learning_rate": 2.1429757159752697e-06, "loss": 0.3824, "step": 13196 }, { "epoch": 2.7129201356768426, "grad_norm": 0.23263666033744812, "learning_rate": 2.139931208939513e-06, "loss": 0.3618, "step": 13197 }, { "epoch": 2.713125706650221, "grad_norm": 0.2428431212902069, "learning_rate": 2.136888813419024e-06, "loss": 0.3795, "step": 13198 }, { "epoch": 2.7133312776236, "grad_norm": 0.11948748677968979, "learning_rate": 2.133848529563683e-06, "loss": 0.4315, "step": 13199 }, { "epoch": 2.713536848596978, "grad_norm": 0.2290315479040146, "learning_rate": 2.1308103575232645e-06, "loss": 0.3947, "step": 13200 }, { "epoch": 2.7137424195703566, "grad_norm": 0.11666145920753479, "learning_rate": 2.12777429744745e-06, "loss": 0.4485, "step": 13201 }, { "epoch": 2.713947990543735, "grad_norm": 0.2218608856201172, "learning_rate": 2.124740349485818e-06, "loss": 0.3646, "step": 13202 }, { "epoch": 2.7141535615171137, "grad_norm": 0.23134684562683105, "learning_rate": 2.1217085137878256e-06, "loss": 0.3925, "step": 13203 }, { "epoch": 2.7143591324904923, "grad_norm": 0.22924216091632843, "learning_rate": 2.118678790502843e-06, "loss": 0.3695, "step": 13204 }, { "epoch": 2.714564703463871, "grad_norm": 0.23405931890010834, "learning_rate": 2.11565117978013e-06, "loss": 0.3772, "step": 13205 }, { "epoch": 2.7147702744372495, "grad_norm": 0.22839610278606415, "learning_rate": 2.1126256817688427e-06, "loss": 0.3642, "step": 13206 }, { "epoch": 2.714975845410628, "grad_norm": 0.22964198887348175, "learning_rate": 2.1096022966180274e-06, "loss": 0.3658, "step": 13207 }, { "epoch": 2.7151814163840067, "grad_norm": 0.2313418686389923, "learning_rate": 2.106581024476644e-06, "loss": 0.3822, "step": 13208 }, { "epoch": 2.7153869873573853, "grad_norm": 0.23704691231250763, "learning_rate": 2.10356186549353e-06, "loss": 0.3822, "step": 13209 }, { "epoch": 2.715592558330764, "grad_norm": 0.2327091097831726, "learning_rate": 2.100544819817424e-06, "loss": 0.3948, "step": 13210 }, { "epoch": 2.715798129304142, "grad_norm": 0.23315146565437317, "learning_rate": 2.0975298875969646e-06, "loss": 0.384, "step": 13211 }, { "epoch": 2.716003700277521, "grad_norm": 0.22651349008083344, "learning_rate": 2.0945170689806813e-06, "loss": 0.3692, "step": 13212 }, { "epoch": 2.716209271250899, "grad_norm": 0.22469674050807953, "learning_rate": 2.0915063641170015e-06, "loss": 0.3868, "step": 13213 }, { "epoch": 2.716414842224278, "grad_norm": 0.12531313300132751, "learning_rate": 2.0884977731542454e-06, "loss": 0.4479, "step": 13214 }, { "epoch": 2.7166204131976563, "grad_norm": 0.11977065354585648, "learning_rate": 2.0854912962406403e-06, "loss": 0.4418, "step": 13215 }, { "epoch": 2.716825984171035, "grad_norm": 0.23392470180988312, "learning_rate": 2.0824869335242976e-06, "loss": 0.3844, "step": 13216 }, { "epoch": 2.7170315551444135, "grad_norm": 0.23545394837856293, "learning_rate": 2.0794846851532287e-06, "loss": 0.3921, "step": 13217 }, { "epoch": 2.717237126117792, "grad_norm": 0.11989542841911316, "learning_rate": 2.076484551275335e-06, "loss": 0.4487, "step": 13218 }, { "epoch": 2.7174426970911707, "grad_norm": 0.2391575127840042, "learning_rate": 2.073486532038424e-06, "loss": 0.3802, "step": 13219 }, { "epoch": 2.7176482680645493, "grad_norm": 0.12006057053804398, "learning_rate": 2.0704906275901968e-06, "loss": 0.4567, "step": 13220 }, { "epoch": 2.717853839037928, "grad_norm": 0.23207461833953857, "learning_rate": 2.067496838078241e-06, "loss": 0.3808, "step": 13221 }, { "epoch": 2.7180594100113065, "grad_norm": 0.2288663387298584, "learning_rate": 2.0645051636500534e-06, "loss": 0.3663, "step": 13222 }, { "epoch": 2.718264980984685, "grad_norm": 0.23807507753372192, "learning_rate": 2.061515604453016e-06, "loss": 0.3887, "step": 13223 }, { "epoch": 2.7184705519580636, "grad_norm": 0.23132917284965515, "learning_rate": 2.058528160634411e-06, "loss": 0.4107, "step": 13224 }, { "epoch": 2.7186761229314422, "grad_norm": 0.2228475958108902, "learning_rate": 2.0555428323414157e-06, "loss": 0.3912, "step": 13225 }, { "epoch": 2.7188816939048204, "grad_norm": 0.2397620677947998, "learning_rate": 2.0525596197211022e-06, "loss": 0.3716, "step": 13226 }, { "epoch": 2.7190872648781994, "grad_norm": 0.23399171233177185, "learning_rate": 2.0495785229204432e-06, "loss": 0.3902, "step": 13227 }, { "epoch": 2.7192928358515775, "grad_norm": 0.23543019592761993, "learning_rate": 2.0465995420862917e-06, "loss": 0.3836, "step": 13228 }, { "epoch": 2.7194984068249566, "grad_norm": 0.12511909008026123, "learning_rate": 2.043622677365424e-06, "loss": 0.4333, "step": 13229 }, { "epoch": 2.7197039777983347, "grad_norm": 0.22897110879421234, "learning_rate": 2.0406479289044895e-06, "loss": 0.3777, "step": 13230 }, { "epoch": 2.7199095487717133, "grad_norm": 0.23776206374168396, "learning_rate": 2.0376752968500397e-06, "loss": 0.385, "step": 13231 }, { "epoch": 2.720115119745092, "grad_norm": 0.22551818192005157, "learning_rate": 2.0347047813485274e-06, "loss": 0.3732, "step": 13232 }, { "epoch": 2.7203206907184705, "grad_norm": 0.23058441281318665, "learning_rate": 2.0317363825462867e-06, "loss": 0.3617, "step": 13233 }, { "epoch": 2.720526261691849, "grad_norm": 0.22936634719371796, "learning_rate": 2.0287701005895543e-06, "loss": 0.3821, "step": 13234 }, { "epoch": 2.7207318326652277, "grad_norm": 0.2348644882440567, "learning_rate": 2.025805935624479e-06, "loss": 0.3783, "step": 13235 }, { "epoch": 2.7209374036386063, "grad_norm": 0.22684963047504425, "learning_rate": 2.022843887797084e-06, "loss": 0.3885, "step": 13236 }, { "epoch": 2.721142974611985, "grad_norm": 0.2363407462835312, "learning_rate": 2.0198839572532972e-06, "loss": 0.379, "step": 13237 }, { "epoch": 2.7213485455853634, "grad_norm": 0.11835481971502304, "learning_rate": 2.0169261441389376e-06, "loss": 0.4572, "step": 13238 }, { "epoch": 2.721554116558742, "grad_norm": 0.11983449012041092, "learning_rate": 2.013970448599723e-06, "loss": 0.4419, "step": 13239 }, { "epoch": 2.7217596875321206, "grad_norm": 0.23388756811618805, "learning_rate": 2.011016870781267e-06, "loss": 0.3931, "step": 13240 }, { "epoch": 2.7219652585054988, "grad_norm": 0.23092390596866608, "learning_rate": 2.0080654108290835e-06, "loss": 0.3978, "step": 13241 }, { "epoch": 2.722170829478878, "grad_norm": 0.23938718438148499, "learning_rate": 2.0051160688885714e-06, "loss": 0.3733, "step": 13242 }, { "epoch": 2.722376400452256, "grad_norm": 0.11977725476026535, "learning_rate": 2.0021688451050334e-06, "loss": 0.4444, "step": 13243 }, { "epoch": 2.722581971425635, "grad_norm": 0.22424502670764923, "learning_rate": 1.9992237396236645e-06, "loss": 0.383, "step": 13244 }, { "epoch": 2.722787542399013, "grad_norm": 0.23597703874111176, "learning_rate": 1.996280752589563e-06, "loss": 0.369, "step": 13245 }, { "epoch": 2.7229931133723917, "grad_norm": 0.23005138337612152, "learning_rate": 1.993339884147704e-06, "loss": 0.3738, "step": 13246 }, { "epoch": 2.7231986843457703, "grad_norm": 0.22499051690101624, "learning_rate": 1.9904011344429797e-06, "loss": 0.3796, "step": 13247 }, { "epoch": 2.723404255319149, "grad_norm": 0.22470605373382568, "learning_rate": 1.9874645036201557e-06, "loss": 0.3958, "step": 13248 }, { "epoch": 2.7236098262925275, "grad_norm": 0.22984722256660461, "learning_rate": 1.9845299918239257e-06, "loss": 0.4004, "step": 13249 }, { "epoch": 2.723815397265906, "grad_norm": 0.2346932291984558, "learning_rate": 1.9815975991988445e-06, "loss": 0.3883, "step": 13250 }, { "epoch": 2.7240209682392846, "grad_norm": 0.24680159986019135, "learning_rate": 1.978667325889386e-06, "loss": 0.3792, "step": 13251 }, { "epoch": 2.7242265392126632, "grad_norm": 0.2350476086139679, "learning_rate": 1.9757391720399056e-06, "loss": 0.3892, "step": 13252 }, { "epoch": 2.724432110186042, "grad_norm": 0.22723430395126343, "learning_rate": 1.972813137794662e-06, "loss": 0.3875, "step": 13253 }, { "epoch": 2.7246376811594204, "grad_norm": 0.238324373960495, "learning_rate": 1.969889223297805e-06, "loss": 0.3872, "step": 13254 }, { "epoch": 2.724843252132799, "grad_norm": 0.12185484915971756, "learning_rate": 1.96696742869338e-06, "loss": 0.4491, "step": 13255 }, { "epoch": 2.725048823106177, "grad_norm": 0.23587733507156372, "learning_rate": 1.964047754125341e-06, "loss": 0.3813, "step": 13256 }, { "epoch": 2.725254394079556, "grad_norm": 0.22594283521175385, "learning_rate": 1.961130199737514e-06, "loss": 0.3808, "step": 13257 }, { "epoch": 2.7254599650529343, "grad_norm": 0.117709681391716, "learning_rate": 1.9582147656736426e-06, "loss": 0.4448, "step": 13258 }, { "epoch": 2.7256655360263133, "grad_norm": 0.1209564283490181, "learning_rate": 1.9553014520773535e-06, "loss": 0.4412, "step": 13259 }, { "epoch": 2.7258711069996915, "grad_norm": 0.23455938696861267, "learning_rate": 1.9523902590921657e-06, "loss": 0.3511, "step": 13260 }, { "epoch": 2.72607667797307, "grad_norm": 0.2380744367837906, "learning_rate": 1.94948118686151e-06, "loss": 0.376, "step": 13261 }, { "epoch": 2.7262822489464487, "grad_norm": 0.11584602296352386, "learning_rate": 1.9465742355287014e-06, "loss": 0.4523, "step": 13262 }, { "epoch": 2.7264878199198272, "grad_norm": 0.12415748089551926, "learning_rate": 1.943669405236941e-06, "loss": 0.435, "step": 13263 }, { "epoch": 2.726693390893206, "grad_norm": 0.227211594581604, "learning_rate": 1.9407666961293487e-06, "loss": 0.383, "step": 13264 }, { "epoch": 2.7268989618665844, "grad_norm": 0.12154388427734375, "learning_rate": 1.9378661083489255e-06, "loss": 0.4509, "step": 13265 }, { "epoch": 2.727104532839963, "grad_norm": 0.23259896039962769, "learning_rate": 1.9349676420385665e-06, "loss": 0.385, "step": 13266 }, { "epoch": 2.7273101038133416, "grad_norm": 0.2539547383785248, "learning_rate": 1.9320712973410634e-06, "loss": 0.397, "step": 13267 }, { "epoch": 2.72751567478672, "grad_norm": 0.22695066034793854, "learning_rate": 1.929177074399111e-06, "loss": 0.3688, "step": 13268 }, { "epoch": 2.727721245760099, "grad_norm": 0.23069900274276733, "learning_rate": 1.9262849733552864e-06, "loss": 0.3655, "step": 13269 }, { "epoch": 2.7279268167334774, "grad_norm": 0.22440584003925323, "learning_rate": 1.9233949943520798e-06, "loss": 0.3756, "step": 13270 }, { "epoch": 2.728132387706856, "grad_norm": 0.23021718859672546, "learning_rate": 1.920507137531862e-06, "loss": 0.3766, "step": 13271 }, { "epoch": 2.7283379586802345, "grad_norm": 0.22625602781772614, "learning_rate": 1.9176214030369055e-06, "loss": 0.3834, "step": 13272 }, { "epoch": 2.7285435296536127, "grad_norm": 0.22805316746234894, "learning_rate": 1.9147377910093754e-06, "loss": 0.3796, "step": 13273 }, { "epoch": 2.7287491006269917, "grad_norm": 0.12454908341169357, "learning_rate": 1.9118563015913337e-06, "loss": 0.4406, "step": 13274 }, { "epoch": 2.72895467160037, "grad_norm": 0.23324701189994812, "learning_rate": 1.9089769349247417e-06, "loss": 0.3713, "step": 13275 }, { "epoch": 2.7291602425737485, "grad_norm": 0.23551899194717407, "learning_rate": 1.9060996911514407e-06, "loss": 0.3907, "step": 13276 }, { "epoch": 2.729365813547127, "grad_norm": 0.24228408932685852, "learning_rate": 1.9032245704131973e-06, "loss": 0.386, "step": 13277 }, { "epoch": 2.7295713845205056, "grad_norm": 0.22902436554431915, "learning_rate": 1.9003515728516386e-06, "loss": 0.3902, "step": 13278 }, { "epoch": 2.729776955493884, "grad_norm": 0.2340046763420105, "learning_rate": 1.897480698608316e-06, "loss": 0.3771, "step": 13279 }, { "epoch": 2.729982526467263, "grad_norm": 0.12157081812620163, "learning_rate": 1.8946119478246565e-06, "loss": 0.4443, "step": 13280 }, { "epoch": 2.7301880974406414, "grad_norm": 0.22382938861846924, "learning_rate": 1.8917453206419922e-06, "loss": 0.3782, "step": 13281 }, { "epoch": 2.73039366841402, "grad_norm": 0.2339484691619873, "learning_rate": 1.888880817201545e-06, "loss": 0.3879, "step": 13282 }, { "epoch": 2.7305992393873986, "grad_norm": 0.23548907041549683, "learning_rate": 1.8860184376444418e-06, "loss": 0.3741, "step": 13283 }, { "epoch": 2.730804810360777, "grad_norm": 0.22213146090507507, "learning_rate": 1.8831581821116901e-06, "loss": 0.3853, "step": 13284 }, { "epoch": 2.7310103813341557, "grad_norm": 0.22502891719341278, "learning_rate": 1.8803000507442171e-06, "loss": 0.3773, "step": 13285 }, { "epoch": 2.7312159523075343, "grad_norm": 0.22634708881378174, "learning_rate": 1.877444043682815e-06, "loss": 0.3758, "step": 13286 }, { "epoch": 2.731421523280913, "grad_norm": 0.23306407034397125, "learning_rate": 1.8745901610681915e-06, "loss": 0.373, "step": 13287 }, { "epoch": 2.731627094254291, "grad_norm": 0.2324984073638916, "learning_rate": 1.8717384030409442e-06, "loss": 0.3757, "step": 13288 }, { "epoch": 2.73183266522767, "grad_norm": 0.2317853718996048, "learning_rate": 1.8688887697415653e-06, "loss": 0.3755, "step": 13289 }, { "epoch": 2.7320382362010482, "grad_norm": 0.2325008064508438, "learning_rate": 1.8660412613104379e-06, "loss": 0.3827, "step": 13290 }, { "epoch": 2.732243807174427, "grad_norm": 0.23261573910713196, "learning_rate": 1.8631958778878495e-06, "loss": 0.3833, "step": 13291 }, { "epoch": 2.7324493781478054, "grad_norm": 0.22458091378211975, "learning_rate": 1.860352619613983e-06, "loss": 0.3718, "step": 13292 }, { "epoch": 2.732654949121184, "grad_norm": 0.23159871995449066, "learning_rate": 1.8575114866289118e-06, "loss": 0.3698, "step": 13293 }, { "epoch": 2.7328605200945626, "grad_norm": 0.22185635566711426, "learning_rate": 1.8546724790725984e-06, "loss": 0.3753, "step": 13294 }, { "epoch": 2.733066091067941, "grad_norm": 0.23244544863700867, "learning_rate": 1.851835597084911e-06, "loss": 0.3959, "step": 13295 }, { "epoch": 2.7332716620413198, "grad_norm": 0.2387784868478775, "learning_rate": 1.8490008408056131e-06, "loss": 0.3712, "step": 13296 }, { "epoch": 2.7334772330146984, "grad_norm": 0.2200855165719986, "learning_rate": 1.8461682103743478e-06, "loss": 0.3655, "step": 13297 }, { "epoch": 2.733682803988077, "grad_norm": 0.22560839354991913, "learning_rate": 1.8433377059306835e-06, "loss": 0.3768, "step": 13298 }, { "epoch": 2.7338883749614555, "grad_norm": 0.23543007671833038, "learning_rate": 1.8405093276140534e-06, "loss": 0.4065, "step": 13299 }, { "epoch": 2.734093945934834, "grad_norm": 0.2383396327495575, "learning_rate": 1.8376830755638013e-06, "loss": 0.3916, "step": 13300 }, { "epoch": 2.7342995169082127, "grad_norm": 0.22618170082569122, "learning_rate": 1.834858949919166e-06, "loss": 0.3665, "step": 13301 }, { "epoch": 2.7345050878815913, "grad_norm": 0.12377354502677917, "learning_rate": 1.8320369508192759e-06, "loss": 0.4598, "step": 13302 }, { "epoch": 2.7347106588549694, "grad_norm": 0.24000823497772217, "learning_rate": 1.8292170784031548e-06, "loss": 0.3789, "step": 13303 }, { "epoch": 2.7349162298283485, "grad_norm": 0.23978973925113678, "learning_rate": 1.8263993328097318e-06, "loss": 0.385, "step": 13304 }, { "epoch": 2.7351218008017266, "grad_norm": 0.21829509735107422, "learning_rate": 1.8235837141778206e-06, "loss": 0.3776, "step": 13305 }, { "epoch": 2.735327371775105, "grad_norm": 0.2309700846672058, "learning_rate": 1.8207702226461305e-06, "loss": 0.3829, "step": 13306 }, { "epoch": 2.735532942748484, "grad_norm": 0.225687175989151, "learning_rate": 1.8179588583532753e-06, "loss": 0.3622, "step": 13307 }, { "epoch": 2.7357385137218624, "grad_norm": 0.22879765927791595, "learning_rate": 1.8151496214377546e-06, "loss": 0.3916, "step": 13308 }, { "epoch": 2.735944084695241, "grad_norm": 0.23625633120536804, "learning_rate": 1.8123425120379672e-06, "loss": 0.405, "step": 13309 }, { "epoch": 2.7361496556686196, "grad_norm": 0.2239421159029007, "learning_rate": 1.809537530292203e-06, "loss": 0.3717, "step": 13310 }, { "epoch": 2.736355226641998, "grad_norm": 0.23815853893756866, "learning_rate": 1.806734676338656e-06, "loss": 0.3639, "step": 13311 }, { "epoch": 2.7365607976153767, "grad_norm": 0.23043270409107208, "learning_rate": 1.8039339503154062e-06, "loss": 0.3773, "step": 13312 }, { "epoch": 2.7367663685887553, "grad_norm": 0.23327013850212097, "learning_rate": 1.801135352360433e-06, "loss": 0.3803, "step": 13313 }, { "epoch": 2.736971939562134, "grad_norm": 0.2213982492685318, "learning_rate": 1.798338882611611e-06, "loss": 0.3834, "step": 13314 }, { "epoch": 2.7371775105355125, "grad_norm": 0.23775465786457062, "learning_rate": 1.7955445412067102e-06, "loss": 0.3809, "step": 13315 }, { "epoch": 2.737383081508891, "grad_norm": 0.23932182788848877, "learning_rate": 1.7927523282833902e-06, "loss": 0.3749, "step": 13316 }, { "epoch": 2.7375886524822697, "grad_norm": 0.24264240264892578, "learning_rate": 1.7899622439792063e-06, "loss": 0.378, "step": 13317 }, { "epoch": 2.737794223455648, "grad_norm": 0.22181403636932373, "learning_rate": 1.7871742884316284e-06, "loss": 0.3769, "step": 13318 }, { "epoch": 2.737999794429027, "grad_norm": 0.231339693069458, "learning_rate": 1.7843884617779917e-06, "loss": 0.3888, "step": 13319 }, { "epoch": 2.738205365402405, "grad_norm": 0.23549319803714752, "learning_rate": 1.7816047641555512e-06, "loss": 0.3774, "step": 13320 }, { "epoch": 2.7384109363757836, "grad_norm": 0.12438720464706421, "learning_rate": 1.7788231957014424e-06, "loss": 0.4356, "step": 13321 }, { "epoch": 2.738616507349162, "grad_norm": 0.22666363418102264, "learning_rate": 1.7760437565526955e-06, "loss": 0.3691, "step": 13322 }, { "epoch": 2.7388220783225408, "grad_norm": 0.23813243210315704, "learning_rate": 1.7732664468462463e-06, "loss": 0.3833, "step": 13323 }, { "epoch": 2.7390276492959194, "grad_norm": 0.23540125787258148, "learning_rate": 1.77049126671891e-06, "loss": 0.3851, "step": 13324 }, { "epoch": 2.739233220269298, "grad_norm": 0.22330401837825775, "learning_rate": 1.7677182163074224e-06, "loss": 0.3608, "step": 13325 }, { "epoch": 2.7394387912426765, "grad_norm": 0.2364005595445633, "learning_rate": 1.7649472957483942e-06, "loss": 0.3804, "step": 13326 }, { "epoch": 2.739644362216055, "grad_norm": 0.22581815719604492, "learning_rate": 1.7621785051783213e-06, "loss": 0.3868, "step": 13327 }, { "epoch": 2.7398499331894337, "grad_norm": 0.22957521677017212, "learning_rate": 1.7594118447336294e-06, "loss": 0.3731, "step": 13328 }, { "epoch": 2.7400555041628123, "grad_norm": 0.2309531569480896, "learning_rate": 1.7566473145506097e-06, "loss": 0.3712, "step": 13329 }, { "epoch": 2.740261075136191, "grad_norm": 0.22596019506454468, "learning_rate": 1.753884914765458e-06, "loss": 0.352, "step": 13330 }, { "epoch": 2.7404666461095695, "grad_norm": 0.1303359568119049, "learning_rate": 1.7511246455142555e-06, "loss": 0.4202, "step": 13331 }, { "epoch": 2.740672217082948, "grad_norm": 0.22711026668548584, "learning_rate": 1.7483665069330086e-06, "loss": 0.3873, "step": 13332 }, { "epoch": 2.740877788056326, "grad_norm": 0.12200283259153366, "learning_rate": 1.7456104991575834e-06, "loss": 0.4612, "step": 13333 }, { "epoch": 2.7410833590297052, "grad_norm": 0.2212943285703659, "learning_rate": 1.7428566223237564e-06, "loss": 0.3736, "step": 13334 }, { "epoch": 2.7412889300030834, "grad_norm": 0.2410779595375061, "learning_rate": 1.740104876567204e-06, "loss": 0.3901, "step": 13335 }, { "epoch": 2.741494500976462, "grad_norm": 0.229153111577034, "learning_rate": 1.737355262023483e-06, "loss": 0.3911, "step": 13336 }, { "epoch": 2.7417000719498406, "grad_norm": 0.2225484400987625, "learning_rate": 1.7346077788280646e-06, "loss": 0.3882, "step": 13337 }, { "epoch": 2.741905642923219, "grad_norm": 0.23056496679782867, "learning_rate": 1.731862427116291e-06, "loss": 0.3961, "step": 13338 }, { "epoch": 2.7421112138965977, "grad_norm": 0.12282350659370422, "learning_rate": 1.7291192070234285e-06, "loss": 0.4445, "step": 13339 }, { "epoch": 2.7423167848699763, "grad_norm": 0.23378808796405792, "learning_rate": 1.7263781186846096e-06, "loss": 0.3796, "step": 13340 }, { "epoch": 2.742522355843355, "grad_norm": 0.22584164142608643, "learning_rate": 1.7236391622348857e-06, "loss": 0.3835, "step": 13341 }, { "epoch": 2.7427279268167335, "grad_norm": 0.23093900084495544, "learning_rate": 1.7209023378091844e-06, "loss": 0.3808, "step": 13342 }, { "epoch": 2.742933497790112, "grad_norm": 0.22885221242904663, "learning_rate": 1.7181676455423425e-06, "loss": 0.3878, "step": 13343 }, { "epoch": 2.7431390687634907, "grad_norm": 0.11858902126550674, "learning_rate": 1.715435085569077e-06, "loss": 0.4476, "step": 13344 }, { "epoch": 2.7433446397368693, "grad_norm": 0.21561181545257568, "learning_rate": 1.712704658024011e-06, "loss": 0.3538, "step": 13345 }, { "epoch": 2.743550210710248, "grad_norm": 0.12362432479858398, "learning_rate": 1.709976363041666e-06, "loss": 0.4305, "step": 13346 }, { "epoch": 2.7437557816836264, "grad_norm": 0.2305142730474472, "learning_rate": 1.7072502007564501e-06, "loss": 0.3542, "step": 13347 }, { "epoch": 2.7439613526570046, "grad_norm": 0.22127611935138702, "learning_rate": 1.7045261713026607e-06, "loss": 0.3839, "step": 13348 }, { "epoch": 2.7441669236303836, "grad_norm": 0.22477680444717407, "learning_rate": 1.7018042748145103e-06, "loss": 0.3914, "step": 13349 }, { "epoch": 2.7443724946037618, "grad_norm": 0.23768995702266693, "learning_rate": 1.6990845114260868e-06, "loss": 0.3856, "step": 13350 }, { "epoch": 2.7445780655771403, "grad_norm": 0.23600324988365173, "learning_rate": 1.696366881271383e-06, "loss": 0.3844, "step": 13351 }, { "epoch": 2.744783636550519, "grad_norm": 0.19089475274085999, "learning_rate": 1.6936513844842767e-06, "loss": 0.4466, "step": 13352 }, { "epoch": 2.7449892075238975, "grad_norm": 0.12042814493179321, "learning_rate": 1.690938021198556e-06, "loss": 0.446, "step": 13353 }, { "epoch": 2.745194778497276, "grad_norm": 0.23038606345653534, "learning_rate": 1.688226791547899e-06, "loss": 0.3753, "step": 13354 }, { "epoch": 2.7454003494706547, "grad_norm": 0.12141604721546173, "learning_rate": 1.6855176956658635e-06, "loss": 0.4526, "step": 13355 }, { "epoch": 2.7456059204440333, "grad_norm": 0.23495237529277802, "learning_rate": 1.6828107336859233e-06, "loss": 0.3864, "step": 13356 }, { "epoch": 2.745811491417412, "grad_norm": 0.11687915772199631, "learning_rate": 1.6801059057414314e-06, "loss": 0.4553, "step": 13357 }, { "epoch": 2.7460170623907905, "grad_norm": 0.23180118203163147, "learning_rate": 1.6774032119656463e-06, "loss": 0.3715, "step": 13358 }, { "epoch": 2.746222633364169, "grad_norm": 0.2334972769021988, "learning_rate": 1.6747026524917114e-06, "loss": 0.3905, "step": 13359 }, { "epoch": 2.7464282043375476, "grad_norm": 0.11535855382680893, "learning_rate": 1.6720042274526754e-06, "loss": 0.4416, "step": 13360 }, { "epoch": 2.7466337753109262, "grad_norm": 0.2297336459159851, "learning_rate": 1.6693079369814819e-06, "loss": 0.3875, "step": 13361 }, { "epoch": 2.746839346284305, "grad_norm": 0.23500292003154755, "learning_rate": 1.6666137812109595e-06, "loss": 0.3828, "step": 13362 }, { "epoch": 2.747044917257683, "grad_norm": 0.22683893144130707, "learning_rate": 1.6639217602738322e-06, "loss": 0.3724, "step": 13363 }, { "epoch": 2.747250488231062, "grad_norm": 0.22383469343185425, "learning_rate": 1.6612318743027288e-06, "loss": 0.3791, "step": 13364 }, { "epoch": 2.74745605920444, "grad_norm": 0.23103176057338715, "learning_rate": 1.6585441234301686e-06, "loss": 0.372, "step": 13365 }, { "epoch": 2.7476616301778187, "grad_norm": 0.12242773920297623, "learning_rate": 1.6558585077885553e-06, "loss": 0.431, "step": 13366 }, { "epoch": 2.7478672011511973, "grad_norm": 0.24223840236663818, "learning_rate": 1.6531750275102082e-06, "loss": 0.4015, "step": 13367 }, { "epoch": 2.748072772124576, "grad_norm": 0.24244338274002075, "learning_rate": 1.6504936827273216e-06, "loss": 0.3931, "step": 13368 }, { "epoch": 2.7482783430979545, "grad_norm": 0.23767444491386414, "learning_rate": 1.6478144735719997e-06, "loss": 0.39, "step": 13369 }, { "epoch": 2.748483914071333, "grad_norm": 0.2309853583574295, "learning_rate": 1.6451374001762272e-06, "loss": 0.3812, "step": 13370 }, { "epoch": 2.7486894850447117, "grad_norm": 0.2257552295923233, "learning_rate": 1.6424624626718982e-06, "loss": 0.3695, "step": 13371 }, { "epoch": 2.7488950560180903, "grad_norm": 0.22814756631851196, "learning_rate": 1.6397896611907925e-06, "loss": 0.3859, "step": 13372 }, { "epoch": 2.749100626991469, "grad_norm": 0.23302559554576874, "learning_rate": 1.63711899586458e-06, "loss": 0.389, "step": 13373 }, { "epoch": 2.7493061979648474, "grad_norm": 0.1236652210354805, "learning_rate": 1.6344504668248401e-06, "loss": 0.4524, "step": 13374 }, { "epoch": 2.749511768938226, "grad_norm": 0.12643174827098846, "learning_rate": 1.6317840742030328e-06, "loss": 0.4511, "step": 13375 }, { "epoch": 2.7497173399116046, "grad_norm": 0.122630275785923, "learning_rate": 1.6291198181305279e-06, "loss": 0.4569, "step": 13376 }, { "epoch": 2.749922910884983, "grad_norm": 0.12135348469018936, "learning_rate": 1.6264576987385705e-06, "loss": 0.4401, "step": 13377 }, { "epoch": 2.7501284818583613, "grad_norm": 0.2322167158126831, "learning_rate": 1.6237977161583157e-06, "loss": 0.3739, "step": 13378 }, { "epoch": 2.7503340528317404, "grad_norm": 0.22395208477973938, "learning_rate": 1.6211398705208086e-06, "loss": 0.3799, "step": 13379 }, { "epoch": 2.7505396238051185, "grad_norm": 0.23402728140354156, "learning_rate": 1.6184841619569847e-06, "loss": 0.4011, "step": 13380 }, { "epoch": 2.7507451947784975, "grad_norm": 0.23176077008247375, "learning_rate": 1.6158305905976839e-06, "loss": 0.3742, "step": 13381 }, { "epoch": 2.7509507657518757, "grad_norm": 0.23246802389621735, "learning_rate": 1.6131791565736322e-06, "loss": 0.3758, "step": 13382 }, { "epoch": 2.7511563367252543, "grad_norm": 0.23959018290042877, "learning_rate": 1.6105298600154545e-06, "loss": 0.3795, "step": 13383 }, { "epoch": 2.751361907698633, "grad_norm": 0.22550779581069946, "learning_rate": 1.6078827010536717e-06, "loss": 0.3797, "step": 13384 }, { "epoch": 2.7515674786720115, "grad_norm": 0.23018544912338257, "learning_rate": 1.6052376798186896e-06, "loss": 0.4094, "step": 13385 }, { "epoch": 2.75177304964539, "grad_norm": 0.12095669656991959, "learning_rate": 1.602594796440824e-06, "loss": 0.4569, "step": 13386 }, { "epoch": 2.7519786206187686, "grad_norm": 0.22460295259952545, "learning_rate": 1.5999540510502653e-06, "loss": 0.3634, "step": 13387 }, { "epoch": 2.7521841915921472, "grad_norm": 0.23332244157791138, "learning_rate": 1.597315443777125e-06, "loss": 0.3988, "step": 13388 }, { "epoch": 2.752389762565526, "grad_norm": 0.22451160848140717, "learning_rate": 1.5946789747513935e-06, "loss": 0.3883, "step": 13389 }, { "epoch": 2.7525953335389044, "grad_norm": 0.23024022579193115, "learning_rate": 1.5920446441029474e-06, "loss": 0.3961, "step": 13390 }, { "epoch": 2.752800904512283, "grad_norm": 0.2318277209997177, "learning_rate": 1.5894124519615678e-06, "loss": 0.3562, "step": 13391 }, { "epoch": 2.7530064754856616, "grad_norm": 0.22565960884094238, "learning_rate": 1.5867823984569458e-06, "loss": 0.3716, "step": 13392 }, { "epoch": 2.7532120464590397, "grad_norm": 0.23131342232227325, "learning_rate": 1.5841544837186428e-06, "loss": 0.3782, "step": 13393 }, { "epoch": 2.7534176174324188, "grad_norm": 0.24259567260742188, "learning_rate": 1.5815287078761155e-06, "loss": 0.3828, "step": 13394 }, { "epoch": 2.753623188405797, "grad_norm": 0.23125715553760529, "learning_rate": 1.578905071058735e-06, "loss": 0.3934, "step": 13395 }, { "epoch": 2.753828759379176, "grad_norm": 0.22932687401771545, "learning_rate": 1.5762835733957531e-06, "loss": 0.3589, "step": 13396 }, { "epoch": 2.754034330352554, "grad_norm": 0.22249168157577515, "learning_rate": 1.5736642150163168e-06, "loss": 0.3756, "step": 13397 }, { "epoch": 2.7542399013259327, "grad_norm": 0.23607565462589264, "learning_rate": 1.5710469960494723e-06, "loss": 0.362, "step": 13398 }, { "epoch": 2.7544454722993112, "grad_norm": 0.2208351343870163, "learning_rate": 1.5684319166241568e-06, "loss": 0.3629, "step": 13399 }, { "epoch": 2.75465104327269, "grad_norm": 0.22543267905712128, "learning_rate": 1.5658189768691923e-06, "loss": 0.3795, "step": 13400 }, { "epoch": 2.7548566142460684, "grad_norm": 0.233917698264122, "learning_rate": 1.5632081769133255e-06, "loss": 0.4046, "step": 13401 }, { "epoch": 2.755062185219447, "grad_norm": 0.23534435033798218, "learning_rate": 1.560599516885169e-06, "loss": 0.3964, "step": 13402 }, { "epoch": 2.7552677561928256, "grad_norm": 0.2306138277053833, "learning_rate": 1.5579929969132395e-06, "loss": 0.3845, "step": 13403 }, { "epoch": 2.755473327166204, "grad_norm": 0.23294024169445038, "learning_rate": 1.5553886171259446e-06, "loss": 0.3764, "step": 13404 }, { "epoch": 2.7556788981395828, "grad_norm": 0.23482374846935272, "learning_rate": 1.5527863776515918e-06, "loss": 0.3821, "step": 13405 }, { "epoch": 2.7558844691129614, "grad_norm": 0.12278321385383606, "learning_rate": 1.550186278618388e-06, "loss": 0.4522, "step": 13406 }, { "epoch": 2.75609004008634, "grad_norm": 0.23249104619026184, "learning_rate": 1.5475883201544111e-06, "loss": 0.3943, "step": 13407 }, { "epoch": 2.756295611059718, "grad_norm": 0.23344482481479645, "learning_rate": 1.544992502387669e-06, "loss": 0.3739, "step": 13408 }, { "epoch": 2.756501182033097, "grad_norm": 0.22119440138339996, "learning_rate": 1.5423988254460386e-06, "loss": 0.3884, "step": 13409 }, { "epoch": 2.7567067530064753, "grad_norm": 0.2359190583229065, "learning_rate": 1.5398072894572984e-06, "loss": 0.3688, "step": 13410 }, { "epoch": 2.7569123239798543, "grad_norm": 0.23092953860759735, "learning_rate": 1.537217894549121e-06, "loss": 0.3673, "step": 13411 }, { "epoch": 2.7571178949532325, "grad_norm": 0.23413263261318207, "learning_rate": 1.5346306408490697e-06, "loss": 0.3826, "step": 13412 }, { "epoch": 2.757323465926611, "grad_norm": 0.23555617034435272, "learning_rate": 1.532045528484612e-06, "loss": 0.3885, "step": 13413 }, { "epoch": 2.7575290368999896, "grad_norm": 0.2366188019514084, "learning_rate": 1.5294625575831012e-06, "loss": 0.4028, "step": 13414 }, { "epoch": 2.757734607873368, "grad_norm": 0.23909413814544678, "learning_rate": 1.5268817282717857e-06, "loss": 0.387, "step": 13415 }, { "epoch": 2.757940178846747, "grad_norm": 0.11871679872274399, "learning_rate": 1.5243030406778237e-06, "loss": 0.4433, "step": 13416 }, { "epoch": 2.7581457498201254, "grad_norm": 0.24047636985778809, "learning_rate": 1.5217264949282384e-06, "loss": 0.368, "step": 13417 }, { "epoch": 2.758351320793504, "grad_norm": 0.1217452734708786, "learning_rate": 1.5191520911499786e-06, "loss": 0.4494, "step": 13418 }, { "epoch": 2.7585568917668826, "grad_norm": 0.22655776143074036, "learning_rate": 1.5165798294698625e-06, "loss": 0.3789, "step": 13419 }, { "epoch": 2.758762462740261, "grad_norm": 0.12003947049379349, "learning_rate": 1.5140097100146188e-06, "loss": 0.4444, "step": 13420 }, { "epoch": 2.7589680337136397, "grad_norm": 0.24004173278808594, "learning_rate": 1.5114417329108565e-06, "loss": 0.3891, "step": 13421 }, { "epoch": 2.7591736046870183, "grad_norm": 0.24962955713272095, "learning_rate": 1.5088758982851042e-06, "loss": 0.3885, "step": 13422 }, { "epoch": 2.7593791756603965, "grad_norm": 0.22153249382972717, "learning_rate": 1.5063122062637558e-06, "loss": 0.3772, "step": 13423 }, { "epoch": 2.7595847466337755, "grad_norm": 0.2372589111328125, "learning_rate": 1.5037506569731202e-06, "loss": 0.3729, "step": 13424 }, { "epoch": 2.7597903176071537, "grad_norm": 0.2346244603395462, "learning_rate": 1.5011912505393867e-06, "loss": 0.4007, "step": 13425 }, { "epoch": 2.7599958885805327, "grad_norm": 0.22780828177928925, "learning_rate": 1.498633987088644e-06, "loss": 0.3916, "step": 13426 }, { "epoch": 2.760201459553911, "grad_norm": 0.23186977207660675, "learning_rate": 1.4960788667468816e-06, "loss": 0.384, "step": 13427 }, { "epoch": 2.7604070305272894, "grad_norm": 0.22554920613765717, "learning_rate": 1.493525889639974e-06, "loss": 0.3773, "step": 13428 }, { "epoch": 2.760612601500668, "grad_norm": 0.22903324663639069, "learning_rate": 1.4909750558937003e-06, "loss": 0.3697, "step": 13429 }, { "epoch": 2.7608181724740466, "grad_norm": 0.23504561185836792, "learning_rate": 1.488426365633725e-06, "loss": 0.3907, "step": 13430 }, { "epoch": 2.761023743447425, "grad_norm": 0.2242177128791809, "learning_rate": 1.4858798189856076e-06, "loss": 0.3697, "step": 13431 }, { "epoch": 2.7612293144208038, "grad_norm": 0.21883516013622284, "learning_rate": 1.4833354160748131e-06, "loss": 0.4012, "step": 13432 }, { "epoch": 2.7614348853941824, "grad_norm": 0.23364631831645966, "learning_rate": 1.480793157026676e-06, "loss": 0.403, "step": 13433 }, { "epoch": 2.761640456367561, "grad_norm": 0.21907542645931244, "learning_rate": 1.478253041966461e-06, "loss": 0.3822, "step": 13434 }, { "epoch": 2.7618460273409395, "grad_norm": 0.22405709326267242, "learning_rate": 1.475715071019293e-06, "loss": 0.3756, "step": 13435 }, { "epoch": 2.762051598314318, "grad_norm": 0.2236599624156952, "learning_rate": 1.473179244310212e-06, "loss": 0.3914, "step": 13436 }, { "epoch": 2.7622571692876967, "grad_norm": 0.22199760377407074, "learning_rate": 1.4706455619641485e-06, "loss": 0.3774, "step": 13437 }, { "epoch": 2.7624627402610753, "grad_norm": 0.12080203741788864, "learning_rate": 1.4681140241059221e-06, "loss": 0.4438, "step": 13438 }, { "epoch": 2.762668311234454, "grad_norm": 0.22729521989822388, "learning_rate": 1.4655846308602483e-06, "loss": 0.378, "step": 13439 }, { "epoch": 2.762873882207832, "grad_norm": 0.23560449481010437, "learning_rate": 1.4630573823517425e-06, "loss": 0.3705, "step": 13440 }, { "epoch": 2.763079453181211, "grad_norm": 0.22415785491466522, "learning_rate": 1.4605322787049097e-06, "loss": 0.3701, "step": 13441 }, { "epoch": 2.763285024154589, "grad_norm": 0.12505774199962616, "learning_rate": 1.4580093200441408e-06, "loss": 0.4395, "step": 13442 }, { "epoch": 2.763490595127968, "grad_norm": 0.23564325273036957, "learning_rate": 1.4554885064937462e-06, "loss": 0.4002, "step": 13443 }, { "epoch": 2.7636961661013464, "grad_norm": 0.2280401885509491, "learning_rate": 1.4529698381779067e-06, "loss": 0.3785, "step": 13444 }, { "epoch": 2.763901737074725, "grad_norm": 0.2783918082714081, "learning_rate": 1.4504533152207028e-06, "loss": 0.3812, "step": 13445 }, { "epoch": 2.7641073080481036, "grad_norm": 0.2317892611026764, "learning_rate": 1.4479389377461105e-06, "loss": 0.3993, "step": 13446 }, { "epoch": 2.764312879021482, "grad_norm": 0.23319129645824432, "learning_rate": 1.4454267058780108e-06, "loss": 0.3947, "step": 13447 }, { "epoch": 2.7645184499948607, "grad_norm": 0.23588362336158752, "learning_rate": 1.4429166197401594e-06, "loss": 0.389, "step": 13448 }, { "epoch": 2.7647240209682393, "grad_norm": 0.23121041059494019, "learning_rate": 1.4404086794562177e-06, "loss": 0.3932, "step": 13449 }, { "epoch": 2.764929591941618, "grad_norm": 0.22811183333396912, "learning_rate": 1.4379028851497516e-06, "loss": 0.371, "step": 13450 }, { "epoch": 2.7651351629149965, "grad_norm": 0.24577990174293518, "learning_rate": 1.4353992369441976e-06, "loss": 0.3868, "step": 13451 }, { "epoch": 2.765340733888375, "grad_norm": 0.22762644290924072, "learning_rate": 1.4328977349629019e-06, "loss": 0.3735, "step": 13452 }, { "epoch": 2.7655463048617537, "grad_norm": 0.23274122178554535, "learning_rate": 1.430398379329106e-06, "loss": 0.3751, "step": 13453 }, { "epoch": 2.7657518758351323, "grad_norm": 0.23309700191020966, "learning_rate": 1.4279011701659362e-06, "loss": 0.3838, "step": 13454 }, { "epoch": 2.7659574468085104, "grad_norm": 0.23482760787010193, "learning_rate": 1.4254061075964143e-06, "loss": 0.3797, "step": 13455 }, { "epoch": 2.7661630177818894, "grad_norm": 0.11869847029447556, "learning_rate": 1.4229131917434769e-06, "loss": 0.4533, "step": 13456 }, { "epoch": 2.7663685887552676, "grad_norm": 0.12506672739982605, "learning_rate": 1.4204224227299156e-06, "loss": 0.4454, "step": 13457 }, { "epoch": 2.766574159728646, "grad_norm": 0.12263701856136322, "learning_rate": 1.4179338006784626e-06, "loss": 0.4483, "step": 13458 }, { "epoch": 2.7667797307020248, "grad_norm": 0.24314455687999725, "learning_rate": 1.4154473257117047e-06, "loss": 0.3715, "step": 13459 }, { "epoch": 2.7669853016754034, "grad_norm": 0.22938649356365204, "learning_rate": 1.4129629979521436e-06, "loss": 0.3668, "step": 13460 }, { "epoch": 2.767190872648782, "grad_norm": 0.23843181133270264, "learning_rate": 1.4104808175221717e-06, "loss": 0.3938, "step": 13461 }, { "epoch": 2.7673964436221605, "grad_norm": 0.22452838718891144, "learning_rate": 1.4080007845440713e-06, "loss": 0.3485, "step": 13462 }, { "epoch": 2.767602014595539, "grad_norm": 0.2309243083000183, "learning_rate": 1.4055228991400193e-06, "loss": 0.3835, "step": 13463 }, { "epoch": 2.7678075855689177, "grad_norm": 0.24043014645576477, "learning_rate": 1.4030471614320984e-06, "loss": 0.3677, "step": 13464 }, { "epoch": 2.7680131565422963, "grad_norm": 0.24299444258213043, "learning_rate": 1.4005735715422757e-06, "loss": 0.392, "step": 13465 }, { "epoch": 2.768218727515675, "grad_norm": 0.2283448427915573, "learning_rate": 1.3981021295924091e-06, "loss": 0.3609, "step": 13466 }, { "epoch": 2.7684242984890535, "grad_norm": 0.2528086304664612, "learning_rate": 1.395632835704251e-06, "loss": 0.3771, "step": 13467 }, { "epoch": 2.768629869462432, "grad_norm": 0.23460538685321808, "learning_rate": 1.393165689999464e-06, "loss": 0.3766, "step": 13468 }, { "epoch": 2.7688354404358106, "grad_norm": 0.23330725729465485, "learning_rate": 1.390700692599576e-06, "loss": 0.3756, "step": 13469 }, { "epoch": 2.769041011409189, "grad_norm": 0.11881226301193237, "learning_rate": 1.3882378436260396e-06, "loss": 0.4427, "step": 13470 }, { "epoch": 2.769246582382568, "grad_norm": 0.2295810878276825, "learning_rate": 1.3857771432001881e-06, "loss": 0.374, "step": 13471 }, { "epoch": 2.769452153355946, "grad_norm": 0.12282148748636246, "learning_rate": 1.3833185914432396e-06, "loss": 0.4614, "step": 13472 }, { "epoch": 2.7696577243293246, "grad_norm": 0.2301100343465805, "learning_rate": 1.3808621884763218e-06, "loss": 0.3805, "step": 13473 }, { "epoch": 2.769863295302703, "grad_norm": 0.23633736371994019, "learning_rate": 1.378407934420448e-06, "loss": 0.3947, "step": 13474 }, { "epoch": 2.7700688662760817, "grad_norm": 0.22140897810459137, "learning_rate": 1.375955829396532e-06, "loss": 0.3804, "step": 13475 }, { "epoch": 2.7702744372494603, "grad_norm": 0.12200979143381119, "learning_rate": 1.3735058735253663e-06, "loss": 0.4414, "step": 13476 }, { "epoch": 2.770480008222839, "grad_norm": 0.23393046855926514, "learning_rate": 1.3710580669276601e-06, "loss": 0.3847, "step": 13477 }, { "epoch": 2.7706855791962175, "grad_norm": 0.1250157356262207, "learning_rate": 1.3686124097240066e-06, "loss": 0.4526, "step": 13478 }, { "epoch": 2.770891150169596, "grad_norm": 0.2439979910850525, "learning_rate": 1.3661689020348795e-06, "loss": 0.4, "step": 13479 }, { "epoch": 2.7710967211429747, "grad_norm": 0.23264381289482117, "learning_rate": 1.3637275439806723e-06, "loss": 0.3863, "step": 13480 }, { "epoch": 2.7713022921163533, "grad_norm": 0.23298904299736023, "learning_rate": 1.3612883356816493e-06, "loss": 0.3768, "step": 13481 }, { "epoch": 2.771507863089732, "grad_norm": 0.23129281401634216, "learning_rate": 1.3588512772579887e-06, "loss": 0.3775, "step": 13482 }, { "epoch": 2.7717134340631104, "grad_norm": 0.22436164319515228, "learning_rate": 1.3564163688297398e-06, "loss": 0.386, "step": 13483 }, { "epoch": 2.771919005036489, "grad_norm": 0.12501150369644165, "learning_rate": 1.353983610516872e-06, "loss": 0.457, "step": 13484 }, { "epoch": 2.772124576009867, "grad_norm": 0.22815532982349396, "learning_rate": 1.3515530024392286e-06, "loss": 0.3907, "step": 13485 }, { "epoch": 2.772330146983246, "grad_norm": 0.23359054327011108, "learning_rate": 1.3491245447165596e-06, "loss": 0.38, "step": 13486 }, { "epoch": 2.7725357179566243, "grad_norm": 0.23818518221378326, "learning_rate": 1.3466982374684988e-06, "loss": 0.3788, "step": 13487 }, { "epoch": 2.772741288930003, "grad_norm": 0.2215246707201004, "learning_rate": 1.344274080814586e-06, "loss": 0.3717, "step": 13488 }, { "epoch": 2.7729468599033815, "grad_norm": 0.2501251697540283, "learning_rate": 1.3418520748742352e-06, "loss": 0.3799, "step": 13489 }, { "epoch": 2.77315243087676, "grad_norm": 0.22703197598457336, "learning_rate": 1.3394322197667763e-06, "loss": 0.3996, "step": 13490 }, { "epoch": 2.7733580018501387, "grad_norm": 0.1154903993010521, "learning_rate": 1.3370145156114239e-06, "loss": 0.4539, "step": 13491 }, { "epoch": 2.7735635728235173, "grad_norm": 0.23296396434307098, "learning_rate": 1.3345989625272875e-06, "loss": 0.397, "step": 13492 }, { "epoch": 2.773769143796896, "grad_norm": 0.23191124200820923, "learning_rate": 1.3321855606333673e-06, "loss": 0.3711, "step": 13493 }, { "epoch": 2.7739747147702745, "grad_norm": 0.22586466372013092, "learning_rate": 1.3297743100485627e-06, "loss": 0.3726, "step": 13494 }, { "epoch": 2.774180285743653, "grad_norm": 0.23403707146644592, "learning_rate": 1.327365210891664e-06, "loss": 0.3872, "step": 13495 }, { "epoch": 2.7743858567170316, "grad_norm": 0.2312314212322235, "learning_rate": 1.3249582632813563e-06, "loss": 0.3814, "step": 13496 }, { "epoch": 2.7745914276904102, "grad_norm": 0.12255984544754028, "learning_rate": 1.3225534673362144e-06, "loss": 0.4595, "step": 13497 }, { "epoch": 2.774796998663789, "grad_norm": 0.24602609872817993, "learning_rate": 1.320150823174719e-06, "loss": 0.3767, "step": 13498 }, { "epoch": 2.7750025696371674, "grad_norm": 0.22925207018852234, "learning_rate": 1.3177503309152351e-06, "loss": 0.3627, "step": 13499 }, { "epoch": 2.7752081406105455, "grad_norm": 0.2292235791683197, "learning_rate": 1.3153519906760132e-06, "loss": 0.3868, "step": 13500 }, { "epoch": 2.7754137115839246, "grad_norm": 0.2379998415708542, "learning_rate": 1.3129558025752236e-06, "loss": 0.3589, "step": 13501 }, { "epoch": 2.7756192825573027, "grad_norm": 0.12553149461746216, "learning_rate": 1.3105617667309124e-06, "loss": 0.4474, "step": 13502 }, { "epoch": 2.7758248535306813, "grad_norm": 0.12078402936458588, "learning_rate": 1.3081698832610146e-06, "loss": 0.4486, "step": 13503 }, { "epoch": 2.77603042450406, "grad_norm": 0.22506415843963623, "learning_rate": 1.3057801522833662e-06, "loss": 0.3689, "step": 13504 }, { "epoch": 2.7762359954774385, "grad_norm": 0.12277916818857193, "learning_rate": 1.3033925739157133e-06, "loss": 0.447, "step": 13505 }, { "epoch": 2.776441566450817, "grad_norm": 0.2416677474975586, "learning_rate": 1.3010071482756665e-06, "loss": 0.3757, "step": 13506 }, { "epoch": 2.7766471374241957, "grad_norm": 0.23068147897720337, "learning_rate": 1.2986238754807518e-06, "loss": 0.371, "step": 13507 }, { "epoch": 2.7768527083975743, "grad_norm": 0.23163877427577972, "learning_rate": 1.2962427556483753e-06, "loss": 0.358, "step": 13508 }, { "epoch": 2.777058279370953, "grad_norm": 0.22567118704319, "learning_rate": 1.2938637888958482e-06, "loss": 0.3855, "step": 13509 }, { "epoch": 2.7772638503443314, "grad_norm": 0.2283574789762497, "learning_rate": 1.2914869753403718e-06, "loss": 0.3802, "step": 13510 }, { "epoch": 2.77746942131771, "grad_norm": 0.1205214112997055, "learning_rate": 1.2891123150990376e-06, "loss": 0.4479, "step": 13511 }, { "epoch": 2.7776749922910886, "grad_norm": 0.12035630643367767, "learning_rate": 1.2867398082888366e-06, "loss": 0.4525, "step": 13512 }, { "epoch": 2.777880563264467, "grad_norm": 0.22281195223331451, "learning_rate": 1.2843694550266506e-06, "loss": 0.3897, "step": 13513 }, { "epoch": 2.778086134237846, "grad_norm": 0.22318775951862335, "learning_rate": 1.282001255429251e-06, "loss": 0.3767, "step": 13514 }, { "epoch": 2.778291705211224, "grad_norm": 0.11824406683444977, "learning_rate": 1.2796352096133195e-06, "loss": 0.4477, "step": 13515 }, { "epoch": 2.778497276184603, "grad_norm": 0.2342909723520279, "learning_rate": 1.2772713176954082e-06, "loss": 0.3699, "step": 13516 }, { "epoch": 2.778702847157981, "grad_norm": 0.2417282909154892, "learning_rate": 1.2749095797919785e-06, "loss": 0.3834, "step": 13517 }, { "epoch": 2.7789084181313597, "grad_norm": 0.2351347953081131, "learning_rate": 1.2725499960193826e-06, "loss": 0.3835, "step": 13518 }, { "epoch": 2.7791139891047383, "grad_norm": 0.11795809119939804, "learning_rate": 1.2701925664938675e-06, "loss": 0.4358, "step": 13519 }, { "epoch": 2.779319560078117, "grad_norm": 0.2242356687784195, "learning_rate": 1.267837291331575e-06, "loss": 0.3773, "step": 13520 }, { "epoch": 2.7795251310514955, "grad_norm": 0.23602786660194397, "learning_rate": 1.2654841706485326e-06, "loss": 0.3895, "step": 13521 }, { "epoch": 2.779730702024874, "grad_norm": 0.23076754808425903, "learning_rate": 1.2631332045606725e-06, "loss": 0.3835, "step": 13522 }, { "epoch": 2.7799362729982526, "grad_norm": 0.11837997287511826, "learning_rate": 1.260784393183812e-06, "loss": 0.4325, "step": 13523 }, { "epoch": 2.780141843971631, "grad_norm": 0.23414716124534607, "learning_rate": 1.2584377366336687e-06, "loss": 0.3697, "step": 13524 }, { "epoch": 2.78034741494501, "grad_norm": 0.2339978665113449, "learning_rate": 1.2560932350258498e-06, "loss": 0.3645, "step": 13525 }, { "epoch": 2.7805529859183884, "grad_norm": 0.22943206131458282, "learning_rate": 1.2537508884758581e-06, "loss": 0.3743, "step": 13526 }, { "epoch": 2.780758556891767, "grad_norm": 0.24171233177185059, "learning_rate": 1.2514106970990962e-06, "loss": 0.3866, "step": 13527 }, { "epoch": 2.7809641278651456, "grad_norm": 0.2301642745733261, "learning_rate": 1.2490726610108423e-06, "loss": 0.3776, "step": 13528 }, { "epoch": 2.781169698838524, "grad_norm": 0.23848628997802734, "learning_rate": 1.2467367803262937e-06, "loss": 0.378, "step": 13529 }, { "epoch": 2.7813752698119023, "grad_norm": 0.23197340965270996, "learning_rate": 1.2444030551605185e-06, "loss": 0.3848, "step": 13530 }, { "epoch": 2.7815808407852813, "grad_norm": 0.2298150360584259, "learning_rate": 1.24207148562849e-06, "loss": 0.3695, "step": 13531 }, { "epoch": 2.7817864117586595, "grad_norm": 0.23591184616088867, "learning_rate": 1.2397420718450708e-06, "loss": 0.3615, "step": 13532 }, { "epoch": 2.781991982732038, "grad_norm": 0.23564256727695465, "learning_rate": 1.2374148139250348e-06, "loss": 0.3773, "step": 13533 }, { "epoch": 2.7821975537054167, "grad_norm": 0.2334429770708084, "learning_rate": 1.2350897119830195e-06, "loss": 0.3737, "step": 13534 }, { "epoch": 2.7824031246787952, "grad_norm": 0.12291624397039413, "learning_rate": 1.232766766133579e-06, "loss": 0.4507, "step": 13535 }, { "epoch": 2.782608695652174, "grad_norm": 0.23279529809951782, "learning_rate": 1.2304459764911514e-06, "loss": 0.385, "step": 13536 }, { "epoch": 2.7828142666255524, "grad_norm": 0.22842784225940704, "learning_rate": 1.2281273431700752e-06, "loss": 0.3793, "step": 13537 }, { "epoch": 2.783019837598931, "grad_norm": 0.2227255403995514, "learning_rate": 1.225810866284574e-06, "loss": 0.3711, "step": 13538 }, { "epoch": 2.7832254085723096, "grad_norm": 0.21851521730422974, "learning_rate": 1.2234965459487668e-06, "loss": 0.3655, "step": 13539 }, { "epoch": 2.783430979545688, "grad_norm": 0.5399391651153564, "learning_rate": 1.2211843822766771e-06, "loss": 0.3967, "step": 13540 }, { "epoch": 2.7836365505190668, "grad_norm": 0.21860133111476898, "learning_rate": 1.218874375382214e-06, "loss": 0.3758, "step": 13541 }, { "epoch": 2.7838421214924454, "grad_norm": 0.2336316704750061, "learning_rate": 1.2165665253791764e-06, "loss": 0.393, "step": 13542 }, { "epoch": 2.784047692465824, "grad_norm": 0.22940628230571747, "learning_rate": 1.2142608323812582e-06, "loss": 0.3921, "step": 13543 }, { "epoch": 2.7842532634392025, "grad_norm": 0.12518246471881866, "learning_rate": 1.2119572965020588e-06, "loss": 0.4595, "step": 13544 }, { "epoch": 2.7844588344125807, "grad_norm": 0.12655936181545258, "learning_rate": 1.209655917855057e-06, "loss": 0.4395, "step": 13545 }, { "epoch": 2.7846644053859597, "grad_norm": 0.22794488072395325, "learning_rate": 1.2073566965536327e-06, "loss": 0.4002, "step": 13546 }, { "epoch": 2.784869976359338, "grad_norm": 0.23608453571796417, "learning_rate": 1.2050596327110598e-06, "loss": 0.3706, "step": 13547 }, { "epoch": 2.785075547332717, "grad_norm": 0.22875483334064484, "learning_rate": 1.202764726440503e-06, "loss": 0.3529, "step": 13548 }, { "epoch": 2.785281118306095, "grad_norm": 0.23389698565006256, "learning_rate": 1.2004719778550167e-06, "loss": 0.3826, "step": 13549 }, { "epoch": 2.7854866892794736, "grad_norm": 0.1383344829082489, "learning_rate": 1.1981813870675608e-06, "loss": 0.4421, "step": 13550 }, { "epoch": 2.785692260252852, "grad_norm": 0.2320520430803299, "learning_rate": 1.1958929541909798e-06, "loss": 0.3664, "step": 13551 }, { "epoch": 2.785897831226231, "grad_norm": 0.236195906996727, "learning_rate": 1.1936066793380035e-06, "loss": 0.3693, "step": 13552 }, { "epoch": 2.7861034021996094, "grad_norm": 0.23397257924079895, "learning_rate": 1.191322562621287e-06, "loss": 0.377, "step": 13553 }, { "epoch": 2.786308973172988, "grad_norm": 0.23099073767662048, "learning_rate": 1.1890406041533404e-06, "loss": 0.3729, "step": 13554 }, { "epoch": 2.7865145441463666, "grad_norm": 0.23826487362384796, "learning_rate": 1.1867608040465933e-06, "loss": 0.3772, "step": 13555 }, { "epoch": 2.786720115119745, "grad_norm": 0.23329326510429382, "learning_rate": 1.1844831624133611e-06, "loss": 0.3604, "step": 13556 }, { "epoch": 2.7869256860931237, "grad_norm": 0.22708694636821747, "learning_rate": 1.1822076793658493e-06, "loss": 0.3632, "step": 13557 }, { "epoch": 2.7871312570665023, "grad_norm": 0.23591133952140808, "learning_rate": 1.179934355016158e-06, "loss": 0.3962, "step": 13558 }, { "epoch": 2.787336828039881, "grad_norm": 0.23737779259681702, "learning_rate": 1.1776631894762874e-06, "loss": 0.37, "step": 13559 }, { "epoch": 2.787542399013259, "grad_norm": 0.22978876531124115, "learning_rate": 1.1753941828581283e-06, "loss": 0.3697, "step": 13560 }, { "epoch": 2.787747969986638, "grad_norm": 0.22215284407138824, "learning_rate": 1.1731273352734612e-06, "loss": 0.38, "step": 13561 }, { "epoch": 2.7879535409600162, "grad_norm": 0.22937338054180145, "learning_rate": 1.1708626468339619e-06, "loss": 0.3759, "step": 13562 }, { "epoch": 2.7881591119333953, "grad_norm": 0.2433684915304184, "learning_rate": 1.1686001176512108e-06, "loss": 0.383, "step": 13563 }, { "epoch": 2.7883646829067734, "grad_norm": 0.2298046201467514, "learning_rate": 1.1663397478366539e-06, "loss": 0.3739, "step": 13564 }, { "epoch": 2.788570253880152, "grad_norm": 0.12105909734964371, "learning_rate": 1.1640815375016623e-06, "loss": 0.4447, "step": 13565 }, { "epoch": 2.7887758248535306, "grad_norm": 0.22252750396728516, "learning_rate": 1.1618254867574918e-06, "loss": 0.3771, "step": 13566 }, { "epoch": 2.788981395826909, "grad_norm": 0.2337454855442047, "learning_rate": 1.1595715957152686e-06, "loss": 0.3896, "step": 13567 }, { "epoch": 2.7891869668002878, "grad_norm": 0.23451459407806396, "learning_rate": 1.157319864486054e-06, "loss": 0.3887, "step": 13568 }, { "epoch": 2.7893925377736664, "grad_norm": 0.2217395305633545, "learning_rate": 1.155070293180764e-06, "loss": 0.38, "step": 13569 }, { "epoch": 2.789598108747045, "grad_norm": 0.23397988080978394, "learning_rate": 1.1528228819102348e-06, "loss": 0.3831, "step": 13570 }, { "epoch": 2.7898036797204235, "grad_norm": 0.23299863934516907, "learning_rate": 1.1505776307851784e-06, "loss": 0.3913, "step": 13571 }, { "epoch": 2.790009250693802, "grad_norm": 0.12697121500968933, "learning_rate": 1.148334539916211e-06, "loss": 0.4494, "step": 13572 }, { "epoch": 2.7902148216671807, "grad_norm": 0.23409722745418549, "learning_rate": 1.1460936094138342e-06, "loss": 0.3775, "step": 13573 }, { "epoch": 2.7904203926405593, "grad_norm": 0.23983454704284668, "learning_rate": 1.1438548393884545e-06, "loss": 0.3591, "step": 13574 }, { "epoch": 2.7906259636139374, "grad_norm": 0.12223486602306366, "learning_rate": 1.1416182299503692e-06, "loss": 0.4504, "step": 13575 }, { "epoch": 2.7908315345873165, "grad_norm": 0.2318880558013916, "learning_rate": 1.1393837812097546e-06, "loss": 0.3754, "step": 13576 }, { "epoch": 2.7910371055606946, "grad_norm": 0.22092534601688385, "learning_rate": 1.137151493276703e-06, "loss": 0.3631, "step": 13577 }, { "epoch": 2.7912426765340737, "grad_norm": 0.23008911311626434, "learning_rate": 1.1349213662611764e-06, "loss": 0.3736, "step": 13578 }, { "epoch": 2.791448247507452, "grad_norm": 0.2271631807088852, "learning_rate": 1.1326934002730516e-06, "loss": 0.4001, "step": 13579 }, { "epoch": 2.7916538184808304, "grad_norm": 0.22966791689395905, "learning_rate": 1.1304675954220861e-06, "loss": 0.3774, "step": 13580 }, { "epoch": 2.791859389454209, "grad_norm": 0.2344343513250351, "learning_rate": 1.1282439518179373e-06, "loss": 0.3852, "step": 13581 }, { "epoch": 2.7920649604275876, "grad_norm": 0.21964535117149353, "learning_rate": 1.1260224695701571e-06, "loss": 0.3675, "step": 13582 }, { "epoch": 2.792270531400966, "grad_norm": 0.23566703498363495, "learning_rate": 1.1238031487881785e-06, "loss": 0.3684, "step": 13583 }, { "epoch": 2.7924761023743447, "grad_norm": 0.23792453110218048, "learning_rate": 1.1215859895813436e-06, "loss": 0.4032, "step": 13584 }, { "epoch": 2.7926816733477233, "grad_norm": 0.23992085456848145, "learning_rate": 1.1193709920588803e-06, "loss": 0.3779, "step": 13585 }, { "epoch": 2.792887244321102, "grad_norm": 0.23918254673480988, "learning_rate": 1.117158156329911e-06, "loss": 0.38, "step": 13586 }, { "epoch": 2.7930928152944805, "grad_norm": 0.23621824383735657, "learning_rate": 1.114947482503449e-06, "loss": 0.3967, "step": 13587 }, { "epoch": 2.793298386267859, "grad_norm": 0.23575182259082794, "learning_rate": 1.1127389706884017e-06, "loss": 0.3905, "step": 13588 }, { "epoch": 2.7935039572412377, "grad_norm": 0.11634790897369385, "learning_rate": 1.1105326209935874e-06, "loss": 0.4412, "step": 13589 }, { "epoch": 2.793709528214616, "grad_norm": 0.11823614686727524, "learning_rate": 1.108328433527689e-06, "loss": 0.4613, "step": 13590 }, { "epoch": 2.793915099187995, "grad_norm": 0.2277180552482605, "learning_rate": 1.1061264083992995e-06, "loss": 0.4023, "step": 13591 }, { "epoch": 2.794120670161373, "grad_norm": 0.23190085589885712, "learning_rate": 1.1039265457168973e-06, "loss": 0.3905, "step": 13592 }, { "epoch": 2.794326241134752, "grad_norm": 0.22355376183986664, "learning_rate": 1.1017288455888708e-06, "loss": 0.3748, "step": 13593 }, { "epoch": 2.79453181210813, "grad_norm": 0.22133591771125793, "learning_rate": 1.0995333081234783e-06, "loss": 0.3757, "step": 13594 }, { "epoch": 2.7947373830815088, "grad_norm": 0.2462836503982544, "learning_rate": 1.097339933428893e-06, "loss": 0.3903, "step": 13595 }, { "epoch": 2.7949429540548874, "grad_norm": 0.2253459244966507, "learning_rate": 1.095148721613169e-06, "loss": 0.3692, "step": 13596 }, { "epoch": 2.795148525028266, "grad_norm": 0.2545377016067505, "learning_rate": 1.0929596727842545e-06, "loss": 0.3871, "step": 13597 }, { "epoch": 2.7953540960016445, "grad_norm": 0.2286592274904251, "learning_rate": 1.0907727870499985e-06, "loss": 0.3749, "step": 13598 }, { "epoch": 2.795559666975023, "grad_norm": 0.23702724277973175, "learning_rate": 1.0885880645181395e-06, "loss": 0.3861, "step": 13599 }, { "epoch": 2.7957652379484017, "grad_norm": 0.12967750430107117, "learning_rate": 1.086405505296302e-06, "loss": 0.4553, "step": 13600 }, { "epoch": 2.7959708089217803, "grad_norm": 0.23417022824287415, "learning_rate": 1.0842251094920042e-06, "loss": 0.3808, "step": 13601 }, { "epoch": 2.796176379895159, "grad_norm": 0.23133817315101624, "learning_rate": 1.0820468772126858e-06, "loss": 0.3838, "step": 13602 }, { "epoch": 2.7963819508685375, "grad_norm": 0.12354867160320282, "learning_rate": 1.0798708085656406e-06, "loss": 0.4403, "step": 13603 }, { "epoch": 2.796587521841916, "grad_norm": 0.2297942191362381, "learning_rate": 1.0776969036580831e-06, "loss": 0.3838, "step": 13604 }, { "epoch": 2.7967930928152946, "grad_norm": 0.12144782394170761, "learning_rate": 1.0755251625971025e-06, "loss": 0.4596, "step": 13605 }, { "epoch": 2.7969986637886732, "grad_norm": 0.23768429458141327, "learning_rate": 1.0733555854896931e-06, "loss": 0.389, "step": 13606 }, { "epoch": 2.7972042347620514, "grad_norm": 0.11554042994976044, "learning_rate": 1.0711881724427398e-06, "loss": 0.4285, "step": 13607 }, { "epoch": 2.7974098057354304, "grad_norm": 0.23497696220874786, "learning_rate": 1.0690229235630318e-06, "loss": 0.3711, "step": 13608 }, { "epoch": 2.7976153767088086, "grad_norm": 0.23065055906772614, "learning_rate": 1.0668598389572187e-06, "loss": 0.3752, "step": 13609 }, { "epoch": 2.797820947682187, "grad_norm": 0.2266397476196289, "learning_rate": 1.0646989187318856e-06, "loss": 0.3693, "step": 13610 }, { "epoch": 2.7980265186555657, "grad_norm": 0.2287440001964569, "learning_rate": 1.0625401629934873e-06, "loss": 0.3822, "step": 13611 }, { "epoch": 2.7982320896289443, "grad_norm": 0.23608548939228058, "learning_rate": 1.0603835718483686e-06, "loss": 0.3633, "step": 13612 }, { "epoch": 2.798437660602323, "grad_norm": 0.23724471032619476, "learning_rate": 1.0582291454027792e-06, "loss": 0.3976, "step": 13613 }, { "epoch": 2.7986432315757015, "grad_norm": 0.3131234645843506, "learning_rate": 1.0560768837628549e-06, "loss": 0.3685, "step": 13614 }, { "epoch": 2.79884880254908, "grad_norm": 0.24307109415531158, "learning_rate": 1.0539267870346253e-06, "loss": 0.3986, "step": 13615 }, { "epoch": 2.7990543735224587, "grad_norm": 0.23056308925151825, "learning_rate": 1.051778855324026e-06, "loss": 0.3667, "step": 13616 }, { "epoch": 2.7992599444958373, "grad_norm": 0.2293158620595932, "learning_rate": 1.0496330887368672e-06, "loss": 0.3761, "step": 13617 }, { "epoch": 2.799465515469216, "grad_norm": 0.23687753081321716, "learning_rate": 1.0474894873788643e-06, "loss": 0.4005, "step": 13618 }, { "epoch": 2.7996710864425944, "grad_norm": 0.2287084013223648, "learning_rate": 1.045348051355618e-06, "loss": 0.3946, "step": 13619 }, { "epoch": 2.799876657415973, "grad_norm": 0.23279039561748505, "learning_rate": 1.0432087807726288e-06, "loss": 0.3591, "step": 13620 }, { "epoch": 2.8000822283893516, "grad_norm": 0.23075073957443237, "learning_rate": 1.0410716757352923e-06, "loss": 0.3777, "step": 13621 }, { "epoch": 2.8002877993627298, "grad_norm": 0.23093274235725403, "learning_rate": 1.0389367363488895e-06, "loss": 0.4152, "step": 13622 }, { "epoch": 2.800493370336109, "grad_norm": 0.2433861345052719, "learning_rate": 1.036803962718601e-06, "loss": 0.3827, "step": 13623 }, { "epoch": 2.800698941309487, "grad_norm": 0.2396126538515091, "learning_rate": 1.034673354949498e-06, "loss": 0.3938, "step": 13624 }, { "epoch": 2.8009045122828655, "grad_norm": 0.231951504945755, "learning_rate": 1.0325449131465414e-06, "loss": 0.3815, "step": 13625 }, { "epoch": 2.801110083256244, "grad_norm": 0.23407815396785736, "learning_rate": 1.0304186374145975e-06, "loss": 0.3898, "step": 13626 }, { "epoch": 2.8013156542296227, "grad_norm": 0.23772378265857697, "learning_rate": 1.0282945278584172e-06, "loss": 0.3771, "step": 13627 }, { "epoch": 2.8015212252030013, "grad_norm": 0.3126058578491211, "learning_rate": 1.026172584582632e-06, "loss": 0.3637, "step": 13628 }, { "epoch": 2.80172679617638, "grad_norm": 0.1172962412238121, "learning_rate": 1.0240528076917982e-06, "loss": 0.4601, "step": 13629 }, { "epoch": 2.8019323671497585, "grad_norm": 0.2429337203502655, "learning_rate": 1.0219351972903375e-06, "loss": 0.368, "step": 13630 }, { "epoch": 2.802137938123137, "grad_norm": 0.24216631054878235, "learning_rate": 1.019819753482576e-06, "loss": 0.3908, "step": 13631 }, { "epoch": 2.8023435090965156, "grad_norm": 0.23483149707317352, "learning_rate": 1.0177064763727356e-06, "loss": 0.3796, "step": 13632 }, { "epoch": 2.8025490800698942, "grad_norm": 0.22960315644741058, "learning_rate": 1.0155953660649232e-06, "loss": 0.3897, "step": 13633 }, { "epoch": 2.802754651043273, "grad_norm": 0.22772780060768127, "learning_rate": 1.0134864226631402e-06, "loss": 0.3716, "step": 13634 }, { "epoch": 2.8029602220166514, "grad_norm": 0.12643857300281525, "learning_rate": 1.0113796462712888e-06, "loss": 0.4547, "step": 13635 }, { "epoch": 2.80316579299003, "grad_norm": 0.2468147575855255, "learning_rate": 1.009275036993166e-06, "loss": 0.3853, "step": 13636 }, { "epoch": 2.803371363963408, "grad_norm": 0.25671377778053284, "learning_rate": 1.0071725949324484e-06, "loss": 0.3783, "step": 13637 }, { "epoch": 2.803576934936787, "grad_norm": 0.2457493543624878, "learning_rate": 1.0050723201927136e-06, "loss": 0.391, "step": 13638 }, { "epoch": 2.8037825059101653, "grad_norm": 0.23700536787509918, "learning_rate": 1.002974212877439e-06, "loss": 0.3792, "step": 13639 }, { "epoch": 2.803988076883544, "grad_norm": 0.23173773288726807, "learning_rate": 1.0008782730899764e-06, "loss": 0.4102, "step": 13640 }, { "epoch": 2.8041936478569225, "grad_norm": 0.22917988896369934, "learning_rate": 9.987845009335933e-07, "loss": 0.3808, "step": 13641 }, { "epoch": 2.804399218830301, "grad_norm": 0.22746974229812622, "learning_rate": 9.966928965114325e-07, "loss": 0.3807, "step": 13642 }, { "epoch": 2.8046047898036797, "grad_norm": 0.2384635955095291, "learning_rate": 9.946034599265464e-07, "loss": 0.3678, "step": 13643 }, { "epoch": 2.8048103607770583, "grad_norm": 0.12134691327810287, "learning_rate": 9.925161912818625e-07, "loss": 0.4635, "step": 13644 }, { "epoch": 2.805015931750437, "grad_norm": 0.12150728702545166, "learning_rate": 9.90431090680224e-07, "loss": 0.4485, "step": 13645 }, { "epoch": 2.8052215027238154, "grad_norm": 0.2309166043996811, "learning_rate": 9.88348158224338e-07, "loss": 0.3792, "step": 13646 }, { "epoch": 2.805427073697194, "grad_norm": 0.12357798218727112, "learning_rate": 9.862673940168332e-07, "loss": 0.4412, "step": 13647 }, { "epoch": 2.8056326446705726, "grad_norm": 0.1247616782784462, "learning_rate": 9.841887981602121e-07, "loss": 0.4396, "step": 13648 }, { "epoch": 2.805838215643951, "grad_norm": 0.231892392039299, "learning_rate": 9.82112370756873e-07, "loss": 0.3681, "step": 13649 }, { "epoch": 2.80604378661733, "grad_norm": 0.23392446339130402, "learning_rate": 9.80038111909124e-07, "loss": 0.3689, "step": 13650 }, { "epoch": 2.8062493575907084, "grad_norm": 0.22620777785778046, "learning_rate": 9.779660217191484e-07, "loss": 0.3742, "step": 13651 }, { "epoch": 2.8064549285640865, "grad_norm": 0.23345611989498138, "learning_rate": 9.758961002890242e-07, "loss": 0.3886, "step": 13652 }, { "epoch": 2.8066604995374655, "grad_norm": 0.22174043953418732, "learning_rate": 9.738283477207405e-07, "loss": 0.3853, "step": 13653 }, { "epoch": 2.8068660705108437, "grad_norm": 0.125930517911911, "learning_rate": 9.717627641161502e-07, "loss": 0.4399, "step": 13654 }, { "epoch": 2.8070716414842223, "grad_norm": 0.23766390979290009, "learning_rate": 9.696993495770224e-07, "loss": 0.3779, "step": 13655 }, { "epoch": 2.807277212457601, "grad_norm": 0.22734849154949188, "learning_rate": 9.676381042050053e-07, "loss": 0.3785, "step": 13656 }, { "epoch": 2.8074827834309795, "grad_norm": 0.12355753779411316, "learning_rate": 9.65579028101658e-07, "loss": 0.4431, "step": 13657 }, { "epoch": 2.807688354404358, "grad_norm": 0.23215292394161224, "learning_rate": 9.635221213684143e-07, "loss": 0.3898, "step": 13658 }, { "epoch": 2.8078939253777366, "grad_norm": 0.2282809466123581, "learning_rate": 9.61467384106613e-07, "loss": 0.3711, "step": 13659 }, { "epoch": 2.808099496351115, "grad_norm": 0.23502275347709656, "learning_rate": 9.594148164174731e-07, "loss": 0.3777, "step": 13660 }, { "epoch": 2.808305067324494, "grad_norm": 0.13005268573760986, "learning_rate": 9.57364418402124e-07, "loss": 0.4485, "step": 13661 }, { "epoch": 2.8085106382978724, "grad_norm": 0.12916310131549835, "learning_rate": 9.553161901615748e-07, "loss": 0.433, "step": 13662 }, { "epoch": 2.808716209271251, "grad_norm": 0.22590284049510956, "learning_rate": 9.532701317967247e-07, "loss": 0.374, "step": 13663 }, { "epoch": 2.8089217802446296, "grad_norm": 0.22926348447799683, "learning_rate": 9.512262434083879e-07, "loss": 0.3615, "step": 13664 }, { "epoch": 2.809127351218008, "grad_norm": 0.22875775396823883, "learning_rate": 9.491845250972542e-07, "loss": 0.3893, "step": 13665 }, { "epoch": 2.8093329221913867, "grad_norm": 0.22488847374916077, "learning_rate": 9.47144976963903e-07, "loss": 0.3798, "step": 13666 }, { "epoch": 2.809538493164765, "grad_norm": 0.2324180006980896, "learning_rate": 9.451075991088138e-07, "loss": 0.3821, "step": 13667 }, { "epoch": 2.809744064138144, "grad_norm": 0.22795747220516205, "learning_rate": 9.430723916323663e-07, "loss": 0.3638, "step": 13668 }, { "epoch": 2.809949635111522, "grad_norm": 0.23047983646392822, "learning_rate": 9.410393546348156e-07, "loss": 0.4035, "step": 13669 }, { "epoch": 2.8101552060849007, "grad_norm": 0.23792816698551178, "learning_rate": 9.390084882163214e-07, "loss": 0.3815, "step": 13670 }, { "epoch": 2.8103607770582792, "grad_norm": 0.22404231131076813, "learning_rate": 9.369797924769436e-07, "loss": 0.3589, "step": 13671 }, { "epoch": 2.810566348031658, "grad_norm": 0.23044191300868988, "learning_rate": 9.349532675166223e-07, "loss": 0.3835, "step": 13672 }, { "epoch": 2.8107719190050364, "grad_norm": 0.232622891664505, "learning_rate": 9.329289134351927e-07, "loss": 0.3969, "step": 13673 }, { "epoch": 2.810977489978415, "grad_norm": 0.2428961992263794, "learning_rate": 9.309067303323848e-07, "loss": 0.3955, "step": 13674 }, { "epoch": 2.8111830609517936, "grad_norm": 0.12209093570709229, "learning_rate": 9.288867183078243e-07, "loss": 0.4581, "step": 13675 }, { "epoch": 2.811388631925172, "grad_norm": 0.2335900366306305, "learning_rate": 9.268688774610313e-07, "loss": 0.3835, "step": 13676 }, { "epoch": 2.8115942028985508, "grad_norm": 0.22861804068088531, "learning_rate": 9.248532078914063e-07, "loss": 0.3936, "step": 13677 }, { "epoch": 2.8117997738719294, "grad_norm": 0.2362525463104248, "learning_rate": 9.2283970969826e-07, "loss": 0.3921, "step": 13678 }, { "epoch": 2.812005344845308, "grad_norm": 0.2308216392993927, "learning_rate": 9.208283829807829e-07, "loss": 0.4013, "step": 13679 }, { "epoch": 2.8122109158186865, "grad_norm": 0.22565658390522003, "learning_rate": 9.188192278380709e-07, "loss": 0.3744, "step": 13680 }, { "epoch": 2.812416486792065, "grad_norm": 0.22707243263721466, "learning_rate": 9.168122443690997e-07, "loss": 0.3629, "step": 13681 }, { "epoch": 2.8126220577654433, "grad_norm": 0.22881367802619934, "learning_rate": 9.148074326727402e-07, "loss": 0.3871, "step": 13682 }, { "epoch": 2.8128276287388223, "grad_norm": 0.21950559318065643, "learning_rate": 9.128047928477685e-07, "loss": 0.3675, "step": 13683 }, { "epoch": 2.8130331997122004, "grad_norm": 0.23215575516223907, "learning_rate": 9.108043249928355e-07, "loss": 0.3695, "step": 13684 }, { "epoch": 2.813238770685579, "grad_norm": 0.250355988740921, "learning_rate": 9.088060292065076e-07, "loss": 0.3879, "step": 13685 }, { "epoch": 2.8134443416589576, "grad_norm": 0.2308340221643448, "learning_rate": 9.068099055872259e-07, "loss": 0.3749, "step": 13686 }, { "epoch": 2.813649912632336, "grad_norm": 0.11878734081983566, "learning_rate": 9.048159542333268e-07, "loss": 0.4479, "step": 13687 }, { "epoch": 2.813855483605715, "grad_norm": 0.12243502587080002, "learning_rate": 9.028241752430417e-07, "loss": 0.463, "step": 13688 }, { "epoch": 2.8140610545790934, "grad_norm": 0.22359062731266022, "learning_rate": 9.00834568714507e-07, "loss": 0.3762, "step": 13689 }, { "epoch": 2.814266625552472, "grad_norm": 0.22925424575805664, "learning_rate": 8.988471347457295e-07, "loss": 0.3776, "step": 13690 }, { "epoch": 2.8144721965258506, "grad_norm": 0.2405097633600235, "learning_rate": 8.968618734346207e-07, "loss": 0.3733, "step": 13691 }, { "epoch": 2.814677767499229, "grad_norm": 0.21798452734947205, "learning_rate": 8.948787848789974e-07, "loss": 0.365, "step": 13692 }, { "epoch": 2.8148833384726077, "grad_norm": 0.24408473074436188, "learning_rate": 8.928978691765466e-07, "loss": 0.3723, "step": 13693 }, { "epoch": 2.8150889094459863, "grad_norm": 0.23546668887138367, "learning_rate": 8.909191264248601e-07, "loss": 0.399, "step": 13694 }, { "epoch": 2.815294480419365, "grad_norm": 0.2411290407180786, "learning_rate": 8.889425567214249e-07, "loss": 0.3898, "step": 13695 }, { "epoch": 2.8155000513927435, "grad_norm": 0.12170881778001785, "learning_rate": 8.869681601636181e-07, "loss": 0.453, "step": 13696 }, { "epoch": 2.8157056223661217, "grad_norm": 0.22512827813625336, "learning_rate": 8.849959368487021e-07, "loss": 0.3593, "step": 13697 }, { "epoch": 2.8159111933395007, "grad_norm": 0.23673585057258606, "learning_rate": 8.830258868738439e-07, "loss": 0.3814, "step": 13698 }, { "epoch": 2.816116764312879, "grad_norm": 0.23666398227214813, "learning_rate": 8.81058010336101e-07, "loss": 0.3891, "step": 13699 }, { "epoch": 2.8163223352862574, "grad_norm": 0.23257263004779816, "learning_rate": 8.790923073324159e-07, "loss": 0.3874, "step": 13700 }, { "epoch": 2.816527906259636, "grad_norm": 0.11912301182746887, "learning_rate": 8.771287779596361e-07, "loss": 0.4726, "step": 13701 }, { "epoch": 2.8167334772330146, "grad_norm": 0.24169382452964783, "learning_rate": 8.75167422314489e-07, "loss": 0.3887, "step": 13702 }, { "epoch": 2.816939048206393, "grad_norm": 0.24105940759181976, "learning_rate": 8.732082404936026e-07, "loss": 0.3656, "step": 13703 }, { "epoch": 2.8171446191797718, "grad_norm": 0.23163765668869019, "learning_rate": 8.712512325934946e-07, "loss": 0.3995, "step": 13704 }, { "epoch": 2.8173501901531504, "grad_norm": 0.24219734966754913, "learning_rate": 8.692963987105878e-07, "loss": 0.3994, "step": 13705 }, { "epoch": 2.817555761126529, "grad_norm": 0.23079170286655426, "learning_rate": 8.673437389411804e-07, "loss": 0.386, "step": 13706 }, { "epoch": 2.8177613320999075, "grad_norm": 0.23005284368991852, "learning_rate": 8.653932533814702e-07, "loss": 0.3753, "step": 13707 }, { "epoch": 2.817966903073286, "grad_norm": 0.23586174845695496, "learning_rate": 8.634449421275504e-07, "loss": 0.3902, "step": 13708 }, { "epoch": 2.8181724740466647, "grad_norm": 0.22992920875549316, "learning_rate": 8.614988052754042e-07, "loss": 0.3829, "step": 13709 }, { "epoch": 2.8183780450200433, "grad_norm": 0.2352675497531891, "learning_rate": 8.5955484292091e-07, "loss": 0.3804, "step": 13710 }, { "epoch": 2.818583615993422, "grad_norm": 0.22630825638771057, "learning_rate": 8.576130551598311e-07, "loss": 0.3642, "step": 13711 }, { "epoch": 2.8187891869668, "grad_norm": 0.23707729578018188, "learning_rate": 8.556734420878409e-07, "loss": 0.3683, "step": 13712 }, { "epoch": 2.818994757940179, "grad_norm": 0.23465366661548615, "learning_rate": 8.537360038004883e-07, "loss": 0.3868, "step": 13713 }, { "epoch": 2.819200328913557, "grad_norm": 0.23585152626037598, "learning_rate": 8.518007403932266e-07, "loss": 0.4204, "step": 13714 }, { "epoch": 2.8194058998869362, "grad_norm": 0.23271988332271576, "learning_rate": 8.498676519613947e-07, "loss": 0.3661, "step": 13715 }, { "epoch": 2.8196114708603144, "grad_norm": 0.23224134743213654, "learning_rate": 8.479367386002163e-07, "loss": 0.3807, "step": 13716 }, { "epoch": 2.819817041833693, "grad_norm": 0.22672690451145172, "learning_rate": 8.460080004048404e-07, "loss": 0.3921, "step": 13717 }, { "epoch": 2.8200226128070716, "grad_norm": 0.2301137000322342, "learning_rate": 8.44081437470266e-07, "loss": 0.3761, "step": 13718 }, { "epoch": 2.82022818378045, "grad_norm": 0.24038895964622498, "learning_rate": 8.421570498914222e-07, "loss": 0.3823, "step": 13719 }, { "epoch": 2.8204337547538287, "grad_norm": 0.11897142231464386, "learning_rate": 8.402348377631031e-07, "loss": 0.4372, "step": 13720 }, { "epoch": 2.8206393257272073, "grad_norm": 0.23280301690101624, "learning_rate": 8.383148011800179e-07, "loss": 0.3707, "step": 13721 }, { "epoch": 2.820844896700586, "grad_norm": 0.2358703911304474, "learning_rate": 8.363969402367461e-07, "loss": 0.3826, "step": 13722 }, { "epoch": 2.8210504676739645, "grad_norm": 0.2333759367465973, "learning_rate": 8.34481255027777e-07, "loss": 0.3911, "step": 13723 }, { "epoch": 2.821256038647343, "grad_norm": 0.23327887058258057, "learning_rate": 8.325677456474901e-07, "loss": 0.3781, "step": 13724 }, { "epoch": 2.8214616096207217, "grad_norm": 0.23647433519363403, "learning_rate": 8.30656412190145e-07, "loss": 0.3817, "step": 13725 }, { "epoch": 2.8216671805941003, "grad_norm": 0.12305039912462234, "learning_rate": 8.287472547499165e-07, "loss": 0.4555, "step": 13726 }, { "epoch": 2.8218727515674784, "grad_norm": 0.22186824679374695, "learning_rate": 8.268402734208592e-07, "loss": 0.3963, "step": 13727 }, { "epoch": 2.8220783225408574, "grad_norm": 0.22588272392749786, "learning_rate": 8.249354682969129e-07, "loss": 0.3854, "step": 13728 }, { "epoch": 2.8222838935142356, "grad_norm": 0.23009559512138367, "learning_rate": 8.230328394719228e-07, "loss": 0.3894, "step": 13729 }, { "epoch": 2.8224894644876146, "grad_norm": 0.23012928664684296, "learning_rate": 8.211323870396187e-07, "loss": 0.3711, "step": 13730 }, { "epoch": 2.8226950354609928, "grad_norm": 0.12790702283382416, "learning_rate": 8.192341110936358e-07, "loss": 0.466, "step": 13731 }, { "epoch": 2.8229006064343714, "grad_norm": 0.2347603589296341, "learning_rate": 8.173380117274792e-07, "loss": 0.3855, "step": 13732 }, { "epoch": 2.82310617740775, "grad_norm": 0.11841531097888947, "learning_rate": 8.154440890345794e-07, "loss": 0.4421, "step": 13733 }, { "epoch": 2.8233117483811285, "grad_norm": 0.22990132868289948, "learning_rate": 8.135523431082265e-07, "loss": 0.373, "step": 13734 }, { "epoch": 2.823517319354507, "grad_norm": 0.2206183522939682, "learning_rate": 8.11662774041626e-07, "loss": 0.3587, "step": 13735 }, { "epoch": 2.8237228903278857, "grad_norm": 0.2378583699464798, "learning_rate": 8.097753819278636e-07, "loss": 0.3793, "step": 13736 }, { "epoch": 2.8239284613012643, "grad_norm": 0.22767938673496246, "learning_rate": 8.078901668599149e-07, "loss": 0.3706, "step": 13737 }, { "epoch": 2.824134032274643, "grad_norm": 0.23271609842777252, "learning_rate": 8.060071289306753e-07, "loss": 0.3807, "step": 13738 }, { "epoch": 2.8243396032480215, "grad_norm": 0.21641339361667633, "learning_rate": 8.04126268232901e-07, "loss": 0.3673, "step": 13739 }, { "epoch": 2.8245451742214, "grad_norm": 0.2371521145105362, "learning_rate": 8.022475848592475e-07, "loss": 0.3795, "step": 13740 }, { "epoch": 2.8247507451947786, "grad_norm": 0.22861357033252716, "learning_rate": 8.003710789022811e-07, "loss": 0.3907, "step": 13741 }, { "epoch": 2.824956316168157, "grad_norm": 0.23238502442836761, "learning_rate": 7.984967504544427e-07, "loss": 0.376, "step": 13742 }, { "epoch": 2.825161887141536, "grad_norm": 0.2233378142118454, "learning_rate": 7.966245996080734e-07, "loss": 0.3744, "step": 13743 }, { "epoch": 2.825367458114914, "grad_norm": 0.22623707354068756, "learning_rate": 7.947546264553996e-07, "loss": 0.3867, "step": 13744 }, { "epoch": 2.825573029088293, "grad_norm": 0.24018484354019165, "learning_rate": 7.928868310885573e-07, "loss": 0.3648, "step": 13745 }, { "epoch": 2.825778600061671, "grad_norm": 0.13057471811771393, "learning_rate": 7.910212135995481e-07, "loss": 0.4654, "step": 13746 }, { "epoch": 2.8259841710350497, "grad_norm": 0.22883687913417816, "learning_rate": 7.891577740802985e-07, "loss": 0.3663, "step": 13747 }, { "epoch": 2.8261897420084283, "grad_norm": 0.23778748512268066, "learning_rate": 7.872965126226e-07, "loss": 0.3603, "step": 13748 }, { "epoch": 2.826395312981807, "grad_norm": 0.12038971483707428, "learning_rate": 7.854374293181593e-07, "loss": 0.4537, "step": 13749 }, { "epoch": 2.8266008839551855, "grad_norm": 0.11914535611867905, "learning_rate": 7.835805242585531e-07, "loss": 0.4408, "step": 13750 }, { "epoch": 2.826806454928564, "grad_norm": 0.22773846983909607, "learning_rate": 7.817257975352682e-07, "loss": 0.3739, "step": 13751 }, { "epoch": 2.8270120259019427, "grad_norm": 0.2309103161096573, "learning_rate": 7.798732492396815e-07, "loss": 0.3781, "step": 13752 }, { "epoch": 2.8272175968753213, "grad_norm": 0.12411284446716309, "learning_rate": 7.780228794630451e-07, "loss": 0.4418, "step": 13753 }, { "epoch": 2.8274231678487, "grad_norm": 0.22320185601711273, "learning_rate": 7.761746882965359e-07, "loss": 0.3706, "step": 13754 }, { "epoch": 2.8276287388220784, "grad_norm": 0.23378294706344604, "learning_rate": 7.743286758312013e-07, "loss": 0.3784, "step": 13755 }, { "epoch": 2.827834309795457, "grad_norm": 0.23577441275119781, "learning_rate": 7.724848421579784e-07, "loss": 0.371, "step": 13756 }, { "epoch": 2.828039880768835, "grad_norm": 0.22351431846618652, "learning_rate": 7.706431873677094e-07, "loss": 0.3703, "step": 13757 }, { "epoch": 2.828245451742214, "grad_norm": 0.24170389771461487, "learning_rate": 7.688037115511171e-07, "loss": 0.391, "step": 13758 }, { "epoch": 2.8284510227155923, "grad_norm": 0.23205341398715973, "learning_rate": 7.669664147988387e-07, "loss": 0.3744, "step": 13759 }, { "epoch": 2.8286565936889714, "grad_norm": 0.22255219519138336, "learning_rate": 7.651312972013769e-07, "loss": 0.3775, "step": 13760 }, { "epoch": 2.8288621646623495, "grad_norm": 0.22708290815353394, "learning_rate": 7.632983588491393e-07, "loss": 0.3945, "step": 13761 }, { "epoch": 2.829067735635728, "grad_norm": 0.23190079629421234, "learning_rate": 7.614675998324339e-07, "loss": 0.3955, "step": 13762 }, { "epoch": 2.8292733066091067, "grad_norm": 0.11703302711248398, "learning_rate": 7.596390202414483e-07, "loss": 0.4556, "step": 13763 }, { "epoch": 2.8294788775824853, "grad_norm": 0.232466459274292, "learning_rate": 7.578126201662706e-07, "loss": 0.3894, "step": 13764 }, { "epoch": 2.829684448555864, "grad_norm": 0.23175998032093048, "learning_rate": 7.559883996968787e-07, "loss": 0.36, "step": 13765 }, { "epoch": 2.8298900195292425, "grad_norm": 0.2221493124961853, "learning_rate": 7.541663589231407e-07, "loss": 0.3767, "step": 13766 }, { "epoch": 2.830095590502621, "grad_norm": 0.23145779967308044, "learning_rate": 7.5234649793482e-07, "loss": 0.3761, "step": 13767 }, { "epoch": 2.8303011614759996, "grad_norm": 0.2308301031589508, "learning_rate": 7.505288168215746e-07, "loss": 0.3777, "step": 13768 }, { "epoch": 2.8305067324493782, "grad_norm": 0.22926832735538483, "learning_rate": 7.487133156729531e-07, "loss": 0.3794, "step": 13769 }, { "epoch": 2.830712303422757, "grad_norm": 0.22793909907341003, "learning_rate": 7.468999945783989e-07, "loss": 0.3854, "step": 13770 }, { "epoch": 2.8309178743961354, "grad_norm": 0.23420362174510956, "learning_rate": 7.450888536272455e-07, "loss": 0.3804, "step": 13771 }, { "epoch": 2.8311234453695135, "grad_norm": 0.2258753925561905, "learning_rate": 7.432798929087115e-07, "loss": 0.386, "step": 13772 }, { "epoch": 2.8313290163428926, "grad_norm": 0.12601035833358765, "learning_rate": 7.414731125119256e-07, "loss": 0.4424, "step": 13773 }, { "epoch": 2.8315345873162707, "grad_norm": 0.22683130204677582, "learning_rate": 7.396685125258917e-07, "loss": 0.3806, "step": 13774 }, { "epoch": 2.8317401582896498, "grad_norm": 0.23239809274673462, "learning_rate": 7.378660930395237e-07, "loss": 0.373, "step": 13775 }, { "epoch": 2.831945729263028, "grad_norm": 0.23171231150627136, "learning_rate": 7.360658541416054e-07, "loss": 0.3781, "step": 13776 }, { "epoch": 2.8321513002364065, "grad_norm": 0.23430903255939484, "learning_rate": 7.34267795920841e-07, "loss": 0.3819, "step": 13777 }, { "epoch": 2.832356871209785, "grad_norm": 0.22949565947055817, "learning_rate": 7.324719184657997e-07, "loss": 0.378, "step": 13778 }, { "epoch": 2.8325624421831637, "grad_norm": 0.11871360242366791, "learning_rate": 7.306782218649605e-07, "loss": 0.4448, "step": 13779 }, { "epoch": 2.8327680131565423, "grad_norm": 0.2298881858587265, "learning_rate": 7.288867062066928e-07, "loss": 0.3606, "step": 13780 }, { "epoch": 2.832973584129921, "grad_norm": 0.11663959920406342, "learning_rate": 7.270973715792562e-07, "loss": 0.4501, "step": 13781 }, { "epoch": 2.8331791551032994, "grad_norm": 0.12173844128847122, "learning_rate": 7.253102180707949e-07, "loss": 0.4564, "step": 13782 }, { "epoch": 2.833384726076678, "grad_norm": 0.2263535112142563, "learning_rate": 7.235252457693686e-07, "loss": 0.3858, "step": 13783 }, { "epoch": 2.8335902970500566, "grad_norm": 0.11779969185590744, "learning_rate": 7.21742454762902e-07, "loss": 0.4431, "step": 13784 }, { "epoch": 2.833795868023435, "grad_norm": 0.2434069812297821, "learning_rate": 7.199618451392298e-07, "loss": 0.4067, "step": 13785 }, { "epoch": 2.834001438996814, "grad_norm": 0.22886650264263153, "learning_rate": 7.181834169860719e-07, "loss": 0.3828, "step": 13786 }, { "epoch": 2.8342070099701924, "grad_norm": 0.2306927889585495, "learning_rate": 7.16407170391038e-07, "loss": 0.3762, "step": 13787 }, { "epoch": 2.834412580943571, "grad_norm": 0.2322409451007843, "learning_rate": 7.146331054416483e-07, "loss": 0.3907, "step": 13788 }, { "epoch": 2.834618151916949, "grad_norm": 0.22728115320205688, "learning_rate": 7.128612222252979e-07, "loss": 0.3824, "step": 13789 }, { "epoch": 2.834823722890328, "grad_norm": 0.225159153342247, "learning_rate": 7.110915208292768e-07, "loss": 0.4054, "step": 13790 }, { "epoch": 2.8350292938637063, "grad_norm": 0.12113186717033386, "learning_rate": 7.093240013407704e-07, "loss": 0.439, "step": 13791 }, { "epoch": 2.835234864837085, "grad_norm": 0.2332168072462082, "learning_rate": 7.07558663846854e-07, "loss": 0.3793, "step": 13792 }, { "epoch": 2.8354404358104635, "grad_norm": 0.22835347056388855, "learning_rate": 7.05795508434503e-07, "loss": 0.3758, "step": 13793 }, { "epoch": 2.835646006783842, "grad_norm": 0.12069544196128845, "learning_rate": 7.040345351905731e-07, "loss": 0.4602, "step": 13794 }, { "epoch": 2.8358515777572206, "grad_norm": 0.22868898510932922, "learning_rate": 7.022757442018246e-07, "loss": 0.3804, "step": 13795 }, { "epoch": 2.836057148730599, "grad_norm": 0.232134148478508, "learning_rate": 7.005191355549034e-07, "loss": 0.3889, "step": 13796 }, { "epoch": 2.836262719703978, "grad_norm": 0.23718050122261047, "learning_rate": 6.987647093363503e-07, "loss": 0.3728, "step": 13797 }, { "epoch": 2.8364682906773564, "grad_norm": 0.24368955194950104, "learning_rate": 6.970124656325911e-07, "loss": 0.3852, "step": 13798 }, { "epoch": 2.836673861650735, "grad_norm": 0.2304588258266449, "learning_rate": 6.952624045299617e-07, "loss": 0.3809, "step": 13799 }, { "epoch": 2.8368794326241136, "grad_norm": 0.23114575445652008, "learning_rate": 6.935145261146731e-07, "loss": 0.3808, "step": 13800 }, { "epoch": 2.837085003597492, "grad_norm": 0.22746378183364868, "learning_rate": 6.917688304728315e-07, "loss": 0.3887, "step": 13801 }, { "epoch": 2.8372905745708707, "grad_norm": 0.22767049074172974, "learning_rate": 6.900253176904481e-07, "loss": 0.3729, "step": 13802 }, { "epoch": 2.8374961455442493, "grad_norm": 0.22864069044589996, "learning_rate": 6.882839878534092e-07, "loss": 0.3854, "step": 13803 }, { "epoch": 2.8377017165176275, "grad_norm": 0.22305408120155334, "learning_rate": 6.865448410475112e-07, "loss": 0.4005, "step": 13804 }, { "epoch": 2.8379072874910065, "grad_norm": 0.22816435992717743, "learning_rate": 6.848078773584255e-07, "loss": 0.3775, "step": 13805 }, { "epoch": 2.8381128584643847, "grad_norm": 0.23188713192939758, "learning_rate": 6.830730968717236e-07, "loss": 0.3879, "step": 13806 }, { "epoch": 2.8383184294377632, "grad_norm": 0.11994650214910507, "learning_rate": 6.813404996728823e-07, "loss": 0.4432, "step": 13807 }, { "epoch": 2.838524000411142, "grad_norm": 0.23941002786159515, "learning_rate": 6.796100858472382e-07, "loss": 0.3655, "step": 13808 }, { "epoch": 2.8387295713845204, "grad_norm": 0.12042734026908875, "learning_rate": 6.778818554800581e-07, "loss": 0.451, "step": 13809 }, { "epoch": 2.838935142357899, "grad_norm": 0.23225072026252747, "learning_rate": 6.76155808656479e-07, "loss": 0.3759, "step": 13810 }, { "epoch": 2.8391407133312776, "grad_norm": 0.23144301772117615, "learning_rate": 6.744319454615328e-07, "loss": 0.3922, "step": 13811 }, { "epoch": 2.839346284304656, "grad_norm": 0.24022118747234344, "learning_rate": 6.727102659801515e-07, "loss": 0.3847, "step": 13812 }, { "epoch": 2.8395518552780348, "grad_norm": 0.22620242834091187, "learning_rate": 6.709907702971474e-07, "loss": 0.3849, "step": 13813 }, { "epoch": 2.8397574262514134, "grad_norm": 0.2255433201789856, "learning_rate": 6.692734584972326e-07, "loss": 0.3737, "step": 13814 }, { "epoch": 2.839962997224792, "grad_norm": 0.2278052270412445, "learning_rate": 6.675583306650096e-07, "loss": 0.3742, "step": 13815 }, { "epoch": 2.8401685681981705, "grad_norm": 0.22527383267879486, "learning_rate": 6.658453868849857e-07, "loss": 0.3887, "step": 13816 }, { "epoch": 2.840374139171549, "grad_norm": 0.2278517484664917, "learning_rate": 6.641346272415383e-07, "loss": 0.3734, "step": 13817 }, { "epoch": 2.8405797101449277, "grad_norm": 0.23448723554611206, "learning_rate": 6.624260518189551e-07, "loss": 0.3784, "step": 13818 }, { "epoch": 2.840785281118306, "grad_norm": 0.24033266305923462, "learning_rate": 6.607196607014088e-07, "loss": 0.3812, "step": 13819 }, { "epoch": 2.840990852091685, "grad_norm": 0.22752645611763, "learning_rate": 6.590154539729621e-07, "loss": 0.3747, "step": 13820 }, { "epoch": 2.841196423065063, "grad_norm": 0.2382228821516037, "learning_rate": 6.573134317175728e-07, "loss": 0.3989, "step": 13821 }, { "epoch": 2.8414019940384416, "grad_norm": 0.23340356349945068, "learning_rate": 6.556135940190888e-07, "loss": 0.3767, "step": 13822 }, { "epoch": 2.84160756501182, "grad_norm": 0.12209226191043854, "learning_rate": 6.539159409612633e-07, "loss": 0.4466, "step": 13823 }, { "epoch": 2.841813135985199, "grad_norm": 0.22561949491500854, "learning_rate": 6.522204726277293e-07, "loss": 0.3758, "step": 13824 }, { "epoch": 2.8420187069585774, "grad_norm": 0.225555419921875, "learning_rate": 6.505271891020048e-07, "loss": 0.3724, "step": 13825 }, { "epoch": 2.842224277931956, "grad_norm": 0.2285340279340744, "learning_rate": 6.488360904675234e-07, "loss": 0.3866, "step": 13826 }, { "epoch": 2.8424298489053346, "grad_norm": 0.2325884997844696, "learning_rate": 6.471471768075882e-07, "loss": 0.3787, "step": 13827 }, { "epoch": 2.842635419878713, "grad_norm": 0.1197914183139801, "learning_rate": 6.454604482054077e-07, "loss": 0.4564, "step": 13828 }, { "epoch": 2.8428409908520917, "grad_norm": 0.24161775410175323, "learning_rate": 6.437759047440706e-07, "loss": 0.3779, "step": 13829 }, { "epoch": 2.8430465618254703, "grad_norm": 0.23106519877910614, "learning_rate": 6.420935465065853e-07, "loss": 0.3715, "step": 13830 }, { "epoch": 2.843252132798849, "grad_norm": 0.22928760945796967, "learning_rate": 6.404133735758156e-07, "loss": 0.3916, "step": 13831 }, { "epoch": 2.8434577037722275, "grad_norm": 0.22873489558696747, "learning_rate": 6.387353860345452e-07, "loss": 0.381, "step": 13832 }, { "epoch": 2.843663274745606, "grad_norm": 0.23243139684200287, "learning_rate": 6.370595839654431e-07, "loss": 0.3902, "step": 13833 }, { "epoch": 2.8438688457189842, "grad_norm": 0.2291172593832016, "learning_rate": 6.353859674510582e-07, "loss": 0.3911, "step": 13834 }, { "epoch": 2.8440744166923633, "grad_norm": 0.22925592958927155, "learning_rate": 6.337145365738495e-07, "loss": 0.3684, "step": 13835 }, { "epoch": 2.8442799876657414, "grad_norm": 0.22563427686691284, "learning_rate": 6.320452914161512e-07, "loss": 0.3863, "step": 13836 }, { "epoch": 2.84448555863912, "grad_norm": 0.23132719099521637, "learning_rate": 6.303782320602126e-07, "loss": 0.397, "step": 13837 }, { "epoch": 2.8446911296124986, "grad_norm": 0.12186164408922195, "learning_rate": 6.287133585881528e-07, "loss": 0.4323, "step": 13838 }, { "epoch": 2.844896700585877, "grad_norm": 0.1260182410478592, "learning_rate": 6.270506710819963e-07, "loss": 0.4418, "step": 13839 }, { "epoch": 2.8451022715592558, "grad_norm": 0.11887041479349136, "learning_rate": 6.253901696236575e-07, "loss": 0.4506, "step": 13840 }, { "epoch": 2.8453078425326344, "grad_norm": 0.23686912655830383, "learning_rate": 6.237318542949361e-07, "loss": 0.3608, "step": 13841 }, { "epoch": 2.845513413506013, "grad_norm": 0.2436566948890686, "learning_rate": 6.220757251775316e-07, "loss": 0.3661, "step": 13842 }, { "epoch": 2.8457189844793915, "grad_norm": 0.2323562502861023, "learning_rate": 6.20421782353034e-07, "loss": 0.3828, "step": 13843 }, { "epoch": 2.84592455545277, "grad_norm": 0.12596507370471954, "learning_rate": 6.187700259029227e-07, "loss": 0.4397, "step": 13844 }, { "epoch": 2.8461301264261487, "grad_norm": 0.243175208568573, "learning_rate": 6.17120455908578e-07, "loss": 0.3926, "step": 13845 }, { "epoch": 2.8463356973995273, "grad_norm": 0.24358853697776794, "learning_rate": 6.154730724512648e-07, "loss": 0.3934, "step": 13846 }, { "epoch": 2.846541268372906, "grad_norm": 0.23144344985485077, "learning_rate": 6.13827875612138e-07, "loss": 0.3733, "step": 13847 }, { "epoch": 2.8467468393462845, "grad_norm": 0.33637747168540955, "learning_rate": 6.121848654722528e-07, "loss": 0.3871, "step": 13848 }, { "epoch": 2.8469524103196626, "grad_norm": 0.24188685417175293, "learning_rate": 6.105440421125497e-07, "loss": 0.3871, "step": 13849 }, { "epoch": 2.8471579812930417, "grad_norm": 0.12031394243240356, "learning_rate": 6.089054056138687e-07, "loss": 0.441, "step": 13850 }, { "epoch": 2.84736355226642, "grad_norm": 0.23142001032829285, "learning_rate": 6.072689560569306e-07, "loss": 0.3923, "step": 13851 }, { "epoch": 2.8475691232397984, "grad_norm": 0.23788262903690338, "learning_rate": 6.056346935223656e-07, "loss": 0.3881, "step": 13852 }, { "epoch": 2.847774694213177, "grad_norm": 0.23109963536262512, "learning_rate": 6.040026180906744e-07, "loss": 0.3941, "step": 13853 }, { "epoch": 2.8479802651865556, "grad_norm": 0.23182469606399536, "learning_rate": 6.023727298422726e-07, "loss": 0.3771, "step": 13854 }, { "epoch": 2.848185836159934, "grad_norm": 0.23489411175251007, "learning_rate": 6.007450288574512e-07, "loss": 0.3841, "step": 13855 }, { "epoch": 2.8483914071333127, "grad_norm": 0.23740611970424652, "learning_rate": 5.991195152164009e-07, "loss": 0.3707, "step": 13856 }, { "epoch": 2.8485969781066913, "grad_norm": 0.23565572500228882, "learning_rate": 5.974961889992026e-07, "loss": 0.4023, "step": 13857 }, { "epoch": 2.84880254908007, "grad_norm": 0.23655489087104797, "learning_rate": 5.958750502858274e-07, "loss": 0.3848, "step": 13858 }, { "epoch": 2.8490081200534485, "grad_norm": 0.2304118573665619, "learning_rate": 5.942560991561464e-07, "loss": 0.3871, "step": 13859 }, { "epoch": 2.849213691026827, "grad_norm": 0.22532600164413452, "learning_rate": 5.926393356899207e-07, "loss": 0.3746, "step": 13860 }, { "epoch": 2.8494192620002057, "grad_norm": 0.22565500438213348, "learning_rate": 5.910247599667867e-07, "loss": 0.4012, "step": 13861 }, { "epoch": 2.8496248329735843, "grad_norm": 0.22938272356987, "learning_rate": 5.894123720663009e-07, "loss": 0.3793, "step": 13862 }, { "epoch": 2.849830403946963, "grad_norm": 0.2282402366399765, "learning_rate": 5.878021720678894e-07, "loss": 0.3631, "step": 13863 }, { "epoch": 2.850035974920341, "grad_norm": 0.23935887217521667, "learning_rate": 5.861941600508841e-07, "loss": 0.3811, "step": 13864 }, { "epoch": 2.85024154589372, "grad_norm": 0.12173505127429962, "learning_rate": 5.845883360945065e-07, "loss": 0.4352, "step": 13865 }, { "epoch": 2.850447116867098, "grad_norm": 0.12043416500091553, "learning_rate": 5.829847002778633e-07, "loss": 0.4488, "step": 13866 }, { "epoch": 2.8506526878404768, "grad_norm": 0.23177044093608856, "learning_rate": 5.813832526799562e-07, "loss": 0.3819, "step": 13867 }, { "epoch": 2.8508582588138554, "grad_norm": 0.12020587176084518, "learning_rate": 5.797839933796823e-07, "loss": 0.4398, "step": 13868 }, { "epoch": 2.851063829787234, "grad_norm": 0.2312840223312378, "learning_rate": 5.781869224558384e-07, "loss": 0.3687, "step": 13869 }, { "epoch": 2.8512694007606125, "grad_norm": 0.12858018279075623, "learning_rate": 5.765920399870917e-07, "loss": 0.4559, "step": 13870 }, { "epoch": 2.851474971733991, "grad_norm": 0.24785396456718445, "learning_rate": 5.749993460520242e-07, "loss": 0.3848, "step": 13871 }, { "epoch": 2.8516805427073697, "grad_norm": 0.23876793682575226, "learning_rate": 5.734088407290933e-07, "loss": 0.4002, "step": 13872 }, { "epoch": 2.8518861136807483, "grad_norm": 0.12341229617595673, "learning_rate": 5.718205240966662e-07, "loss": 0.4539, "step": 13873 }, { "epoch": 2.852091684654127, "grad_norm": 0.23897776007652283, "learning_rate": 5.702343962329803e-07, "loss": 0.3986, "step": 13874 }, { "epoch": 2.8522972556275055, "grad_norm": 0.11988009512424469, "learning_rate": 5.686504572161833e-07, "loss": 0.4562, "step": 13875 }, { "epoch": 2.852502826600884, "grad_norm": 0.23703759908676147, "learning_rate": 5.670687071243075e-07, "loss": 0.382, "step": 13876 }, { "epoch": 2.8527083975742626, "grad_norm": 0.23015399277210236, "learning_rate": 5.654891460352707e-07, "loss": 0.3671, "step": 13877 }, { "epoch": 2.8529139685476412, "grad_norm": 0.23037444055080414, "learning_rate": 5.639117740269056e-07, "loss": 0.3773, "step": 13878 }, { "epoch": 2.8531195395210194, "grad_norm": 0.2336786836385727, "learning_rate": 5.623365911769102e-07, "loss": 0.385, "step": 13879 }, { "epoch": 2.8533251104943984, "grad_norm": 0.24950271844863892, "learning_rate": 5.607635975628922e-07, "loss": 0.3763, "step": 13880 }, { "epoch": 2.8535306814677766, "grad_norm": 0.2312586009502411, "learning_rate": 5.591927932623397e-07, "loss": 0.3725, "step": 13881 }, { "epoch": 2.8537362524411556, "grad_norm": 0.23014506697654724, "learning_rate": 5.57624178352646e-07, "loss": 0.3614, "step": 13882 }, { "epoch": 2.8539418234145337, "grad_norm": 0.22436246275901794, "learning_rate": 5.560577529110839e-07, "loss": 0.3772, "step": 13883 }, { "epoch": 2.8541473943879123, "grad_norm": 0.12695522606372833, "learning_rate": 5.544935170148218e-07, "loss": 0.4635, "step": 13884 }, { "epoch": 2.854352965361291, "grad_norm": 0.24410668015480042, "learning_rate": 5.529314707409333e-07, "loss": 0.378, "step": 13885 }, { "epoch": 2.8545585363346695, "grad_norm": 0.12377558648586273, "learning_rate": 5.513716141663616e-07, "loss": 0.435, "step": 13886 }, { "epoch": 2.854764107308048, "grad_norm": 0.24002113938331604, "learning_rate": 5.498139473679603e-07, "loss": 0.3777, "step": 13887 }, { "epoch": 2.8549696782814267, "grad_norm": 0.23580054938793182, "learning_rate": 5.48258470422463e-07, "loss": 0.3832, "step": 13888 }, { "epoch": 2.8551752492548053, "grad_norm": 0.23273934423923492, "learning_rate": 5.467051834065084e-07, "loss": 0.3725, "step": 13889 }, { "epoch": 2.855380820228184, "grad_norm": 0.23366734385490417, "learning_rate": 5.451540863966103e-07, "loss": 0.3706, "step": 13890 }, { "epoch": 2.8555863912015624, "grad_norm": 0.11989044398069382, "learning_rate": 5.436051794691926e-07, "loss": 0.4374, "step": 13891 }, { "epoch": 2.855791962174941, "grad_norm": 0.22055114805698395, "learning_rate": 5.420584627005593e-07, "loss": 0.3711, "step": 13892 }, { "epoch": 2.8559975331483196, "grad_norm": 0.12336910516023636, "learning_rate": 5.405139361669093e-07, "loss": 0.444, "step": 13893 }, { "epoch": 2.8562031041216978, "grad_norm": 0.1187121644616127, "learning_rate": 5.389715999443318e-07, "loss": 0.4488, "step": 13894 }, { "epoch": 2.856408675095077, "grad_norm": 0.21668803691864014, "learning_rate": 5.37431454108816e-07, "loss": 0.3714, "step": 13895 }, { "epoch": 2.856614246068455, "grad_norm": 0.11917508393526077, "learning_rate": 5.358934987362363e-07, "loss": 0.4409, "step": 13896 }, { "epoch": 2.856819817041834, "grad_norm": 0.22866788506507874, "learning_rate": 5.34357733902357e-07, "loss": 0.3774, "step": 13897 }, { "epoch": 2.857025388015212, "grad_norm": 0.12167064100503922, "learning_rate": 5.328241596828376e-07, "loss": 0.452, "step": 13898 }, { "epoch": 2.8572309589885907, "grad_norm": 0.12296809256076813, "learning_rate": 5.312927761532377e-07, "loss": 0.4389, "step": 13899 }, { "epoch": 2.8574365299619693, "grad_norm": 0.24001666903495789, "learning_rate": 5.297635833889969e-07, "loss": 0.3771, "step": 13900 }, { "epoch": 2.857642100935348, "grad_norm": 0.22801834344863892, "learning_rate": 5.2823658146545e-07, "loss": 0.3763, "step": 13901 }, { "epoch": 2.8578476719087265, "grad_norm": 0.22676675021648407, "learning_rate": 5.267117704578267e-07, "loss": 0.3693, "step": 13902 }, { "epoch": 2.858053242882105, "grad_norm": 0.2277052402496338, "learning_rate": 5.251891504412421e-07, "loss": 0.3509, "step": 13903 }, { "epoch": 2.8582588138554836, "grad_norm": 0.22454136610031128, "learning_rate": 5.23668721490716e-07, "loss": 0.3813, "step": 13904 }, { "epoch": 2.8584643848288622, "grad_norm": 0.2237093299627304, "learning_rate": 5.221504836811486e-07, "loss": 0.3734, "step": 13905 }, { "epoch": 2.858669955802241, "grad_norm": 0.24160228669643402, "learning_rate": 5.2063443708734e-07, "loss": 0.3786, "step": 13906 }, { "epoch": 2.8588755267756194, "grad_norm": 0.2331501841545105, "learning_rate": 5.191205817839806e-07, "loss": 0.3789, "step": 13907 }, { "epoch": 2.859081097748998, "grad_norm": 0.24461065232753754, "learning_rate": 5.176089178456406e-07, "loss": 0.3826, "step": 13908 }, { "epoch": 2.859286668722376, "grad_norm": 0.22187209129333496, "learning_rate": 5.160994453468055e-07, "loss": 0.364, "step": 13909 }, { "epoch": 2.859492239695755, "grad_norm": 0.232316792011261, "learning_rate": 5.145921643618257e-07, "loss": 0.3813, "step": 13910 }, { "epoch": 2.8596978106691333, "grad_norm": 0.22536687552928925, "learning_rate": 5.130870749649669e-07, "loss": 0.3738, "step": 13911 }, { "epoch": 2.8599033816425123, "grad_norm": 0.2332964688539505, "learning_rate": 5.115841772303798e-07, "loss": 0.376, "step": 13912 }, { "epoch": 2.8601089526158905, "grad_norm": 0.23040318489074707, "learning_rate": 5.100834712321001e-07, "loss": 0.3887, "step": 13913 }, { "epoch": 2.860314523589269, "grad_norm": 0.2240133285522461, "learning_rate": 5.085849570440638e-07, "loss": 0.3693, "step": 13914 }, { "epoch": 2.8605200945626477, "grad_norm": 0.2326270490884781, "learning_rate": 5.070886347400966e-07, "loss": 0.3749, "step": 13915 }, { "epoch": 2.8607256655360263, "grad_norm": 0.12496310472488403, "learning_rate": 5.055945043939098e-07, "loss": 0.4531, "step": 13916 }, { "epoch": 2.860931236509405, "grad_norm": 0.12099100649356842, "learning_rate": 5.041025660791193e-07, "loss": 0.4613, "step": 13917 }, { "epoch": 2.8611368074827834, "grad_norm": 0.23122435808181763, "learning_rate": 5.026128198692165e-07, "loss": 0.3912, "step": 13918 }, { "epoch": 2.861342378456162, "grad_norm": 0.24232856929302216, "learning_rate": 5.011252658376025e-07, "loss": 0.3617, "step": 13919 }, { "epoch": 2.8615479494295406, "grad_norm": 0.2327503263950348, "learning_rate": 4.996399040575589e-07, "loss": 0.3817, "step": 13920 }, { "epoch": 2.861753520402919, "grad_norm": 0.2326626479625702, "learning_rate": 4.981567346022619e-07, "loss": 0.3987, "step": 13921 }, { "epoch": 2.861959091376298, "grad_norm": 0.22813312709331512, "learning_rate": 4.966757575447833e-07, "loss": 0.3884, "step": 13922 }, { "epoch": 2.8621646623496764, "grad_norm": 0.22625859081745148, "learning_rate": 4.951969729580846e-07, "loss": 0.3947, "step": 13923 }, { "epoch": 2.8623702333230545, "grad_norm": 0.23106195032596588, "learning_rate": 4.937203809150126e-07, "loss": 0.376, "step": 13924 }, { "epoch": 2.8625758042964335, "grad_norm": 0.1207781508564949, "learning_rate": 4.92245981488319e-07, "loss": 0.4405, "step": 13925 }, { "epoch": 2.8627813752698117, "grad_norm": 0.232728511095047, "learning_rate": 4.907737747506308e-07, "loss": 0.3792, "step": 13926 }, { "epoch": 2.8629869462431907, "grad_norm": 0.2338234782218933, "learning_rate": 4.893037607744849e-07, "loss": 0.3716, "step": 13927 }, { "epoch": 2.863192517216569, "grad_norm": 0.24571533501148224, "learning_rate": 4.878359396323035e-07, "loss": 0.3928, "step": 13928 }, { "epoch": 2.8633980881899475, "grad_norm": 0.23208092153072357, "learning_rate": 4.863703113963986e-07, "loss": 0.3748, "step": 13929 }, { "epoch": 2.863603659163326, "grad_norm": 0.23107780516147614, "learning_rate": 4.849068761389675e-07, "loss": 0.3716, "step": 13930 }, { "epoch": 2.8638092301367046, "grad_norm": 0.12082730978727341, "learning_rate": 4.834456339321075e-07, "loss": 0.4541, "step": 13931 }, { "epoch": 2.864014801110083, "grad_norm": 0.12191561609506607, "learning_rate": 4.819865848478212e-07, "loss": 0.4471, "step": 13932 }, { "epoch": 2.864220372083462, "grad_norm": 0.23875342309474945, "learning_rate": 4.805297289579708e-07, "loss": 0.4194, "step": 13933 }, { "epoch": 2.8644259430568404, "grad_norm": 0.22163498401641846, "learning_rate": 4.790750663343391e-07, "loss": 0.3613, "step": 13934 }, { "epoch": 2.864631514030219, "grad_norm": 0.24136824905872345, "learning_rate": 4.776225970485937e-07, "loss": 0.3839, "step": 13935 }, { "epoch": 2.8648370850035976, "grad_norm": 0.22400477528572083, "learning_rate": 4.761723211722824e-07, "loss": 0.3655, "step": 13936 }, { "epoch": 2.865042655976976, "grad_norm": 0.23349706828594208, "learning_rate": 4.7472423877685804e-07, "loss": 0.3814, "step": 13937 }, { "epoch": 2.8652482269503547, "grad_norm": 0.24638283252716064, "learning_rate": 4.732783499336585e-07, "loss": 0.3953, "step": 13938 }, { "epoch": 2.865453797923733, "grad_norm": 0.23078061640262604, "learning_rate": 4.718346547139119e-07, "loss": 0.3858, "step": 13939 }, { "epoch": 2.865659368897112, "grad_norm": 0.23065340518951416, "learning_rate": 4.7039315318875623e-07, "loss": 0.3522, "step": 13940 }, { "epoch": 2.86586493987049, "grad_norm": 0.22871986031532288, "learning_rate": 4.6895384542919477e-07, "loss": 0.3913, "step": 13941 }, { "epoch": 2.866070510843869, "grad_norm": 0.23301458358764648, "learning_rate": 4.6751673150614575e-07, "loss": 0.3834, "step": 13942 }, { "epoch": 2.8662760818172472, "grad_norm": 0.22655089199543, "learning_rate": 4.6608181149039757e-07, "loss": 0.3899, "step": 13943 }, { "epoch": 2.866481652790626, "grad_norm": 0.12195513397455215, "learning_rate": 4.646490854526486e-07, "loss": 0.4349, "step": 13944 }, { "epoch": 2.8666872237640044, "grad_norm": 0.23551727831363678, "learning_rate": 4.6321855346348254e-07, "loss": 0.3738, "step": 13945 }, { "epoch": 2.866892794737383, "grad_norm": 0.23190248012542725, "learning_rate": 4.617902155933679e-07, "loss": 0.3944, "step": 13946 }, { "epoch": 2.8670983657107616, "grad_norm": 0.22424408793449402, "learning_rate": 4.6036407191268337e-07, "loss": 0.3904, "step": 13947 }, { "epoch": 2.86730393668414, "grad_norm": 0.11816349625587463, "learning_rate": 4.5894012249168285e-07, "loss": 0.4426, "step": 13948 }, { "epoch": 2.8675095076575188, "grad_norm": 0.22937704622745514, "learning_rate": 4.5751836740052015e-07, "loss": 0.3796, "step": 13949 }, { "epoch": 2.8677150786308974, "grad_norm": 0.11853787302970886, "learning_rate": 4.560988067092342e-07, "loss": 0.4408, "step": 13950 }, { "epoch": 2.867920649604276, "grad_norm": 0.23124562203884125, "learning_rate": 4.5468144048776416e-07, "loss": 0.3838, "step": 13951 }, { "epoch": 2.8681262205776545, "grad_norm": 0.23542582988739014, "learning_rate": 4.5326626880593416e-07, "loss": 0.3749, "step": 13952 }, { "epoch": 2.868331791551033, "grad_norm": 0.22498956322669983, "learning_rate": 4.5185329173346334e-07, "loss": 0.3877, "step": 13953 }, { "epoch": 2.8685373625244117, "grad_norm": 0.12203694880008698, "learning_rate": 4.5044250933996615e-07, "loss": 0.4589, "step": 13954 }, { "epoch": 2.8687429334977903, "grad_norm": 0.22876019775867462, "learning_rate": 4.490339216949369e-07, "loss": 0.3773, "step": 13955 }, { "epoch": 2.8689485044711684, "grad_norm": 0.22930005192756653, "learning_rate": 4.4762752886778004e-07, "loss": 0.3838, "step": 13956 }, { "epoch": 2.8691540754445475, "grad_norm": 0.2380819171667099, "learning_rate": 4.4622333092777524e-07, "loss": 0.3939, "step": 13957 }, { "epoch": 2.8693596464179256, "grad_norm": 0.24039901793003082, "learning_rate": 4.4482132794410714e-07, "loss": 0.3881, "step": 13958 }, { "epoch": 2.869565217391304, "grad_norm": 0.2359398603439331, "learning_rate": 4.434215199858355e-07, "loss": 0.386, "step": 13959 }, { "epoch": 2.869770788364683, "grad_norm": 0.12011504173278809, "learning_rate": 4.420239071219301e-07, "loss": 0.4551, "step": 13960 }, { "epoch": 2.8699763593380614, "grad_norm": 0.2287997305393219, "learning_rate": 4.406284894212459e-07, "loss": 0.3777, "step": 13961 }, { "epoch": 2.87018193031144, "grad_norm": 0.21278510987758636, "learning_rate": 4.392352669525279e-07, "loss": 0.3631, "step": 13962 }, { "epoch": 2.8703875012848186, "grad_norm": 0.23229098320007324, "learning_rate": 4.3784423978441125e-07, "loss": 0.384, "step": 13963 }, { "epoch": 2.870593072258197, "grad_norm": 0.2308778017759323, "learning_rate": 4.3645540798542605e-07, "loss": 0.394, "step": 13964 }, { "epoch": 2.8707986432315757, "grad_norm": 0.23160767555236816, "learning_rate": 4.3506877162399263e-07, "loss": 0.3779, "step": 13965 }, { "epoch": 2.8710042142049543, "grad_norm": 0.23534901440143585, "learning_rate": 4.336843307684213e-07, "loss": 0.365, "step": 13966 }, { "epoch": 2.871209785178333, "grad_norm": 0.11934797465801239, "learning_rate": 4.323020854869225e-07, "loss": 0.4542, "step": 13967 }, { "epoch": 2.8714153561517115, "grad_norm": 0.11757402122020721, "learning_rate": 4.3092203584759185e-07, "loss": 0.4468, "step": 13968 }, { "epoch": 2.87162092712509, "grad_norm": 0.22173817455768585, "learning_rate": 4.2954418191841484e-07, "loss": 0.3748, "step": 13969 }, { "epoch": 2.8718264980984687, "grad_norm": 0.23279330134391785, "learning_rate": 4.281685237672772e-07, "loss": 0.3775, "step": 13970 }, { "epoch": 2.872032069071847, "grad_norm": 0.23133385181427002, "learning_rate": 4.267950614619498e-07, "loss": 0.3657, "step": 13971 }, { "epoch": 2.872237640045226, "grad_norm": 0.2283874899148941, "learning_rate": 4.2542379507009347e-07, "loss": 0.3612, "step": 13972 }, { "epoch": 2.872443211018604, "grad_norm": 0.12400206178426743, "learning_rate": 4.240547246592641e-07, "loss": 0.4621, "step": 13973 }, { "epoch": 2.8726487819919826, "grad_norm": 0.22691883146762848, "learning_rate": 4.2268785029690783e-07, "loss": 0.362, "step": 13974 }, { "epoch": 2.872854352965361, "grad_norm": 0.23167765140533447, "learning_rate": 4.2132317205037573e-07, "loss": 0.3854, "step": 13975 }, { "epoch": 2.8730599239387398, "grad_norm": 0.26033303141593933, "learning_rate": 4.199606899868841e-07, "loss": 0.3508, "step": 13976 }, { "epoch": 2.8732654949121184, "grad_norm": 0.22448518872261047, "learning_rate": 4.186004041735642e-07, "loss": 0.3895, "step": 13977 }, { "epoch": 2.873471065885497, "grad_norm": 0.11807616800069809, "learning_rate": 4.1724231467743236e-07, "loss": 0.4393, "step": 13978 }, { "epoch": 2.8736766368588755, "grad_norm": 0.23837019503116608, "learning_rate": 4.1588642156539014e-07, "loss": 0.4048, "step": 13979 }, { "epoch": 2.873882207832254, "grad_norm": 0.24100029468536377, "learning_rate": 4.145327249042391e-07, "loss": 0.3877, "step": 13980 }, { "epoch": 2.8740877788056327, "grad_norm": 0.23236291110515594, "learning_rate": 4.131812247606659e-07, "loss": 0.3805, "step": 13981 }, { "epoch": 2.8742933497790113, "grad_norm": 0.234677255153656, "learning_rate": 4.1183192120125723e-07, "loss": 0.3882, "step": 13982 }, { "epoch": 2.87449892075239, "grad_norm": 0.22873461246490479, "learning_rate": 4.10484814292485e-07, "loss": 0.3691, "step": 13983 }, { "epoch": 2.8747044917257685, "grad_norm": 0.22885732352733612, "learning_rate": 4.09139904100716e-07, "loss": 0.3814, "step": 13984 }, { "epoch": 2.874910062699147, "grad_norm": 0.23706702888011932, "learning_rate": 4.0779719069220735e-07, "loss": 0.3747, "step": 13985 }, { "epoch": 2.875115633672525, "grad_norm": 0.22555503249168396, "learning_rate": 4.0645667413310605e-07, "loss": 0.3678, "step": 13986 }, { "epoch": 2.8753212046459042, "grad_norm": 0.11815163493156433, "learning_rate": 4.0511835448945934e-07, "loss": 0.4461, "step": 13987 }, { "epoch": 2.8755267756192824, "grad_norm": 0.23131482303142548, "learning_rate": 4.0378223182718943e-07, "loss": 0.3946, "step": 13988 }, { "epoch": 2.875732346592661, "grad_norm": 0.22287005186080933, "learning_rate": 4.024483062121287e-07, "loss": 0.3732, "step": 13989 }, { "epoch": 2.8759379175660396, "grad_norm": 0.22222553193569183, "learning_rate": 4.011165777099896e-07, "loss": 0.3618, "step": 13990 }, { "epoch": 2.876143488539418, "grad_norm": 0.22416678071022034, "learning_rate": 3.9978704638638455e-07, "loss": 0.3859, "step": 13991 }, { "epoch": 2.8763490595127967, "grad_norm": 0.23659634590148926, "learning_rate": 3.984597123068112e-07, "loss": 0.3624, "step": 13992 }, { "epoch": 2.8765546304861753, "grad_norm": 0.12456272542476654, "learning_rate": 3.971345755366623e-07, "loss": 0.4535, "step": 13993 }, { "epoch": 2.876760201459554, "grad_norm": 0.23349931836128235, "learning_rate": 3.9581163614121564e-07, "loss": 0.3767, "step": 13994 }, { "epoch": 2.8769657724329325, "grad_norm": 0.2434905469417572, "learning_rate": 3.94490894185649e-07, "loss": 0.3731, "step": 13995 }, { "epoch": 2.877171343406311, "grad_norm": 0.12112405896186829, "learning_rate": 3.9317234973503536e-07, "loss": 0.4481, "step": 13996 }, { "epoch": 2.8773769143796897, "grad_norm": 0.22560545802116394, "learning_rate": 3.9185600285432777e-07, "loss": 0.3906, "step": 13997 }, { "epoch": 2.8775824853530683, "grad_norm": 0.12590011954307556, "learning_rate": 3.905418536083744e-07, "loss": 0.4603, "step": 13998 }, { "epoch": 2.877788056326447, "grad_norm": 0.11752758920192719, "learning_rate": 3.8922990206191833e-07, "loss": 0.4465, "step": 13999 }, { "epoch": 2.8779936272998254, "grad_norm": 0.22191815078258514, "learning_rate": 3.87920148279598e-07, "loss": 0.3697, "step": 14000 }, { "epoch": 2.8781991982732036, "grad_norm": 0.23301634192466736, "learning_rate": 3.866125923259367e-07, "loss": 0.3553, "step": 14001 }, { "epoch": 2.8784047692465826, "grad_norm": 0.22838152945041656, "learning_rate": 3.8530723426534797e-07, "loss": 0.3772, "step": 14002 }, { "epoch": 2.8786103402199608, "grad_norm": 0.2294638454914093, "learning_rate": 3.840040741621404e-07, "loss": 0.3832, "step": 14003 }, { "epoch": 2.8788159111933393, "grad_norm": 0.24881219863891602, "learning_rate": 3.8270311208052246e-07, "loss": 0.3631, "step": 14004 }, { "epoch": 2.879021482166718, "grad_norm": 0.2229405790567398, "learning_rate": 3.81404348084583e-07, "loss": 0.3767, "step": 14005 }, { "epoch": 2.8792270531400965, "grad_norm": 0.11796759814023972, "learning_rate": 3.801077822383009e-07, "loss": 0.4422, "step": 14006 }, { "epoch": 2.879432624113475, "grad_norm": 0.23424452543258667, "learning_rate": 3.7881341460555496e-07, "loss": 0.3664, "step": 14007 }, { "epoch": 2.8796381950868537, "grad_norm": 0.23670734465122223, "learning_rate": 3.775212452501192e-07, "loss": 0.3929, "step": 14008 }, { "epoch": 2.8798437660602323, "grad_norm": 0.12096056342124939, "learning_rate": 3.762312742356378e-07, "loss": 0.4595, "step": 14009 }, { "epoch": 2.880049337033611, "grad_norm": 0.2295764833688736, "learning_rate": 3.749435016256747e-07, "loss": 0.3821, "step": 14010 }, { "epoch": 2.8802549080069895, "grad_norm": 0.2285950481891632, "learning_rate": 3.7365792748366934e-07, "loss": 0.3757, "step": 14011 }, { "epoch": 2.880460478980368, "grad_norm": 0.12199006229639053, "learning_rate": 3.72374551872956e-07, "loss": 0.4473, "step": 14012 }, { "epoch": 2.8806660499537466, "grad_norm": 0.22347088158130646, "learning_rate": 3.710933748567541e-07, "loss": 0.3702, "step": 14013 }, { "epoch": 2.8808716209271252, "grad_norm": 0.23266130685806274, "learning_rate": 3.698143964981932e-07, "loss": 0.3802, "step": 14014 }, { "epoch": 2.881077191900504, "grad_norm": 0.23003004491329193, "learning_rate": 3.6853761686026776e-07, "loss": 0.3668, "step": 14015 }, { "epoch": 2.881282762873882, "grad_norm": 0.22506079077720642, "learning_rate": 3.672630360058926e-07, "loss": 0.3672, "step": 14016 }, { "epoch": 2.881488333847261, "grad_norm": 0.23392482101917267, "learning_rate": 3.659906539978575e-07, "loss": 0.3907, "step": 14017 }, { "epoch": 2.881693904820639, "grad_norm": 0.22708185017108917, "learning_rate": 3.647204708988422e-07, "loss": 0.3736, "step": 14018 }, { "epoch": 2.8818994757940177, "grad_norm": 0.11717811226844788, "learning_rate": 3.6345248677142176e-07, "loss": 0.4522, "step": 14019 }, { "epoch": 2.8821050467673963, "grad_norm": 0.22868549823760986, "learning_rate": 3.621867016780661e-07, "loss": 0.3855, "step": 14020 }, { "epoch": 2.882310617740775, "grad_norm": 0.12395808845758438, "learning_rate": 3.6092311568113546e-07, "loss": 0.4369, "step": 14021 }, { "epoch": 2.8825161887141535, "grad_norm": 0.22594808042049408, "learning_rate": 3.5966172884287995e-07, "loss": 0.3708, "step": 14022 }, { "epoch": 2.882721759687532, "grad_norm": 0.11887579411268234, "learning_rate": 3.5840254122544495e-07, "loss": 0.4554, "step": 14023 }, { "epoch": 2.8829273306609107, "grad_norm": 0.12510953843593597, "learning_rate": 3.571455528908657e-07, "loss": 0.4457, "step": 14024 }, { "epoch": 2.8831329016342893, "grad_norm": 0.22904570400714874, "learning_rate": 3.558907639010628e-07, "loss": 0.3703, "step": 14025 }, { "epoch": 2.883338472607668, "grad_norm": 0.24266590178012848, "learning_rate": 3.5463817431785176e-07, "loss": 0.3713, "step": 14026 }, { "epoch": 2.8835440435810464, "grad_norm": 0.22441810369491577, "learning_rate": 3.5338778420294817e-07, "loss": 0.4028, "step": 14027 }, { "epoch": 2.883749614554425, "grad_norm": 0.23846034705638885, "learning_rate": 3.521395936179528e-07, "loss": 0.3993, "step": 14028 }, { "epoch": 2.8839551855278036, "grad_norm": 0.2247145175933838, "learning_rate": 3.5089360262435146e-07, "loss": 0.3895, "step": 14029 }, { "epoch": 2.884160756501182, "grad_norm": 0.2352132946252823, "learning_rate": 3.4964981128354e-07, "loss": 0.3754, "step": 14030 }, { "epoch": 2.8843663274745603, "grad_norm": 0.22683286666870117, "learning_rate": 3.484082196567795e-07, "loss": 0.3893, "step": 14031 }, { "epoch": 2.8845718984479394, "grad_norm": 0.2301369607448578, "learning_rate": 3.4716882780525097e-07, "loss": 0.3909, "step": 14032 }, { "epoch": 2.8847774694213175, "grad_norm": 0.23967629671096802, "learning_rate": 3.4593163579000553e-07, "loss": 0.3981, "step": 14033 }, { "epoch": 2.884983040394696, "grad_norm": 0.2322077453136444, "learning_rate": 3.446966436719945e-07, "loss": 0.3826, "step": 14034 }, { "epoch": 2.8851886113680747, "grad_norm": 0.12146010994911194, "learning_rate": 3.4346385151206416e-07, "loss": 0.4504, "step": 14035 }, { "epoch": 2.8853941823414533, "grad_norm": 0.24295859038829803, "learning_rate": 3.4223325937094096e-07, "loss": 0.369, "step": 14036 }, { "epoch": 2.885599753314832, "grad_norm": 0.24125894904136658, "learning_rate": 3.410048673092614e-07, "loss": 0.3895, "step": 14037 }, { "epoch": 2.8858053242882105, "grad_norm": 0.11830901354551315, "learning_rate": 3.397786753875321e-07, "loss": 0.4409, "step": 14038 }, { "epoch": 2.886010895261589, "grad_norm": 0.2366967350244522, "learning_rate": 3.385546836661696e-07, "loss": 0.3942, "step": 14039 }, { "epoch": 2.8862164662349676, "grad_norm": 0.22245019674301147, "learning_rate": 3.373328922054658e-07, "loss": 0.3795, "step": 14040 }, { "epoch": 2.8864220372083462, "grad_norm": 0.12539364397525787, "learning_rate": 3.3611330106561754e-07, "loss": 0.4422, "step": 14041 }, { "epoch": 2.886627608181725, "grad_norm": 0.22733426094055176, "learning_rate": 3.3489591030671174e-07, "loss": 0.3805, "step": 14042 }, { "epoch": 2.8868331791551034, "grad_norm": 0.24280138313770294, "learning_rate": 3.336807199887204e-07, "loss": 0.3993, "step": 14043 }, { "epoch": 2.887038750128482, "grad_norm": 0.12910622358322144, "learning_rate": 3.3246773017151066e-07, "loss": 0.4552, "step": 14044 }, { "epoch": 2.8872443211018606, "grad_norm": 0.11929771304130554, "learning_rate": 3.3125694091483474e-07, "loss": 0.4486, "step": 14045 }, { "epoch": 2.8874498920752387, "grad_norm": 0.23444950580596924, "learning_rate": 3.3004835227835485e-07, "loss": 0.3619, "step": 14046 }, { "epoch": 2.8876554630486178, "grad_norm": 0.2314281016588211, "learning_rate": 3.2884196432160343e-07, "loss": 0.3573, "step": 14047 }, { "epoch": 2.887861034021996, "grad_norm": 0.22594213485717773, "learning_rate": 3.276377771040179e-07, "loss": 0.3828, "step": 14048 }, { "epoch": 2.8880666049953745, "grad_norm": 0.2312646061182022, "learning_rate": 3.264357906849208e-07, "loss": 0.3858, "step": 14049 }, { "epoch": 2.888272175968753, "grad_norm": 0.23432159423828125, "learning_rate": 3.252360051235248e-07, "loss": 0.3754, "step": 14050 }, { "epoch": 2.8884777469421317, "grad_norm": 0.23932310938835144, "learning_rate": 3.240384204789426e-07, "loss": 0.3918, "step": 14051 }, { "epoch": 2.8886833179155103, "grad_norm": 0.2506803572177887, "learning_rate": 3.2284303681017203e-07, "loss": 0.368, "step": 14052 }, { "epoch": 2.888888888888889, "grad_norm": 0.22862713038921356, "learning_rate": 3.2164985417610596e-07, "loss": 0.3896, "step": 14053 }, { "epoch": 2.8890944598622674, "grad_norm": 0.23301179707050323, "learning_rate": 3.204588726355273e-07, "loss": 0.3869, "step": 14054 }, { "epoch": 2.889300030835646, "grad_norm": 0.2313561588525772, "learning_rate": 3.1927009224710925e-07, "loss": 0.3629, "step": 14055 }, { "epoch": 2.8895056018090246, "grad_norm": 0.22642727196216583, "learning_rate": 3.1808351306941486e-07, "loss": 0.3816, "step": 14056 }, { "epoch": 2.889711172782403, "grad_norm": 0.2348901927471161, "learning_rate": 3.1689913516089743e-07, "loss": 0.3855, "step": 14057 }, { "epoch": 2.889916743755782, "grad_norm": 0.24844767153263092, "learning_rate": 3.1571695857991523e-07, "loss": 0.3891, "step": 14058 }, { "epoch": 2.8901223147291604, "grad_norm": 0.226862832903862, "learning_rate": 3.145369833847067e-07, "loss": 0.3812, "step": 14059 }, { "epoch": 2.890327885702539, "grad_norm": 0.22782935202121735, "learning_rate": 3.1335920963340037e-07, "loss": 0.3698, "step": 14060 }, { "epoch": 2.890533456675917, "grad_norm": 0.22575967013835907, "learning_rate": 3.121836373840198e-07, "loss": 0.3807, "step": 14061 }, { "epoch": 2.890739027649296, "grad_norm": 0.24145731329917908, "learning_rate": 3.110102666944836e-07, "loss": 0.3619, "step": 14062 }, { "epoch": 2.8909445986226743, "grad_norm": 0.24116384983062744, "learning_rate": 3.0983909762259567e-07, "loss": 0.3831, "step": 14063 }, { "epoch": 2.8911501695960533, "grad_norm": 0.21999165415763855, "learning_rate": 3.0867013022604977e-07, "loss": 0.3963, "step": 14064 }, { "epoch": 2.8913557405694315, "grad_norm": 0.23448392748832703, "learning_rate": 3.075033645624448e-07, "loss": 0.3707, "step": 14065 }, { "epoch": 2.89156131154281, "grad_norm": 0.11776132136583328, "learning_rate": 3.063388006892548e-07, "loss": 0.4614, "step": 14066 }, { "epoch": 2.8917668825161886, "grad_norm": 0.12120406329631805, "learning_rate": 3.0517643866385395e-07, "loss": 0.4609, "step": 14067 }, { "epoch": 2.891972453489567, "grad_norm": 0.22066402435302734, "learning_rate": 3.0401627854351133e-07, "loss": 0.3709, "step": 14068 }, { "epoch": 2.892178024462946, "grad_norm": 0.22971779108047485, "learning_rate": 3.0285832038537134e-07, "loss": 0.3811, "step": 14069 }, { "epoch": 2.8923835954363244, "grad_norm": 0.12074688076972961, "learning_rate": 3.0170256424649325e-07, "loss": 0.4428, "step": 14070 }, { "epoch": 2.892589166409703, "grad_norm": 0.23068879544734955, "learning_rate": 3.0054901018380656e-07, "loss": 0.3824, "step": 14071 }, { "epoch": 2.8927947373830816, "grad_norm": 0.23140643537044525, "learning_rate": 2.993976582541458e-07, "loss": 0.3776, "step": 14072 }, { "epoch": 2.89300030835646, "grad_norm": 0.2334955334663391, "learning_rate": 2.982485085142356e-07, "loss": 0.3668, "step": 14073 }, { "epoch": 2.8932058793298387, "grad_norm": 0.22583140432834625, "learning_rate": 2.9710156102068563e-07, "loss": 0.3872, "step": 14074 }, { "epoch": 2.8934114503032173, "grad_norm": 0.23303750157356262, "learning_rate": 2.959568158300008e-07, "loss": 0.383, "step": 14075 }, { "epoch": 2.8936170212765955, "grad_norm": 0.2299990952014923, "learning_rate": 2.948142729985759e-07, "loss": 0.36, "step": 14076 }, { "epoch": 2.8938225922499745, "grad_norm": 0.12316111475229263, "learning_rate": 2.9367393258270094e-07, "loss": 0.4644, "step": 14077 }, { "epoch": 2.8940281632233527, "grad_norm": 0.24173006415367126, "learning_rate": 2.9253579463855097e-07, "loss": 0.3787, "step": 14078 }, { "epoch": 2.8942337341967317, "grad_norm": 0.12229252606630325, "learning_rate": 2.9139985922220114e-07, "loss": 0.4535, "step": 14079 }, { "epoch": 2.89443930517011, "grad_norm": 0.22947286069393158, "learning_rate": 2.9026612638961673e-07, "loss": 0.3694, "step": 14080 }, { "epoch": 2.8946448761434884, "grad_norm": 0.2314113825559616, "learning_rate": 2.8913459619664795e-07, "loss": 0.3772, "step": 14081 }, { "epoch": 2.894850447116867, "grad_norm": 0.23245009779930115, "learning_rate": 2.880052686990353e-07, "loss": 0.3879, "step": 14082 }, { "epoch": 2.8950560180902456, "grad_norm": 0.23955170810222626, "learning_rate": 2.868781439524193e-07, "loss": 0.3769, "step": 14083 }, { "epoch": 2.895261589063624, "grad_norm": 0.22946025431156158, "learning_rate": 2.857532220123305e-07, "loss": 0.3739, "step": 14084 }, { "epoch": 2.8954671600370028, "grad_norm": 0.22186554968357086, "learning_rate": 2.8463050293418946e-07, "loss": 0.3714, "step": 14085 }, { "epoch": 2.8956727310103814, "grad_norm": 0.24299030005931854, "learning_rate": 2.835099867733021e-07, "loss": 0.384, "step": 14086 }, { "epoch": 2.89587830198376, "grad_norm": 0.24568887054920197, "learning_rate": 2.823916735848742e-07, "loss": 0.3973, "step": 14087 }, { "epoch": 2.8960838729571385, "grad_norm": 0.23442420363426208, "learning_rate": 2.812755634239966e-07, "loss": 0.3832, "step": 14088 }, { "epoch": 2.896289443930517, "grad_norm": 0.22998051345348358, "learning_rate": 2.801616563456605e-07, "loss": 0.394, "step": 14089 }, { "epoch": 2.8964950149038957, "grad_norm": 0.2347511351108551, "learning_rate": 2.7904995240473684e-07, "loss": 0.3739, "step": 14090 }, { "epoch": 2.896700585877274, "grad_norm": 0.11745678633451462, "learning_rate": 2.779404516559969e-07, "loss": 0.4466, "step": 14091 }, { "epoch": 2.896906156850653, "grad_norm": 0.23240487277507782, "learning_rate": 2.7683315415410195e-07, "loss": 0.3759, "step": 14092 }, { "epoch": 2.897111727824031, "grad_norm": 0.23751090466976166, "learning_rate": 2.757280599535983e-07, "loss": 0.3839, "step": 14093 }, { "epoch": 2.89731729879741, "grad_norm": 0.12017631530761719, "learning_rate": 2.7462516910893745e-07, "loss": 0.4444, "step": 14094 }, { "epoch": 2.897522869770788, "grad_norm": 0.22520147264003754, "learning_rate": 2.735244816744459e-07, "loss": 0.3866, "step": 14095 }, { "epoch": 2.897728440744167, "grad_norm": 0.23022042214870453, "learning_rate": 2.7242599770435527e-07, "loss": 0.3813, "step": 14096 }, { "epoch": 2.8979340117175454, "grad_norm": 0.23134584724903107, "learning_rate": 2.7132971725277736e-07, "loss": 0.3583, "step": 14097 }, { "epoch": 2.898139582690924, "grad_norm": 0.23426006734371185, "learning_rate": 2.7023564037372383e-07, "loss": 0.3785, "step": 14098 }, { "epoch": 2.8983451536643026, "grad_norm": 0.11582779884338379, "learning_rate": 2.6914376712109166e-07, "loss": 0.4577, "step": 14099 }, { "epoch": 2.898550724637681, "grad_norm": 0.12305998057126999, "learning_rate": 2.6805409754867783e-07, "loss": 0.4363, "step": 14100 }, { "epoch": 2.8987562956110597, "grad_norm": 0.21655605733394623, "learning_rate": 2.6696663171015933e-07, "loss": 0.3598, "step": 14101 }, { "epoch": 2.8989618665844383, "grad_norm": 0.2362840622663498, "learning_rate": 2.658813696591134e-07, "loss": 0.3895, "step": 14102 }, { "epoch": 2.899167437557817, "grad_norm": 0.11493504792451859, "learning_rate": 2.6479831144900714e-07, "loss": 0.4464, "step": 14103 }, { "epoch": 2.8993730085311955, "grad_norm": 0.2330009937286377, "learning_rate": 2.63717457133193e-07, "loss": 0.3753, "step": 14104 }, { "epoch": 2.899578579504574, "grad_norm": 0.23272541165351868, "learning_rate": 2.6263880676492823e-07, "loss": 0.3787, "step": 14105 }, { "epoch": 2.8997841504779522, "grad_norm": 0.12182939052581787, "learning_rate": 2.615623603973405e-07, "loss": 0.4519, "step": 14106 }, { "epoch": 2.8999897214513313, "grad_norm": 0.23827330768108368, "learning_rate": 2.6048811808347227e-07, "loss": 0.3878, "step": 14107 }, { "epoch": 2.9001952924247094, "grad_norm": 0.22565053403377533, "learning_rate": 2.5941607987624626e-07, "loss": 0.3737, "step": 14108 }, { "epoch": 2.9004008633980884, "grad_norm": 0.22274649143218994, "learning_rate": 2.583462458284652e-07, "loss": 0.3767, "step": 14109 }, { "epoch": 2.9006064343714666, "grad_norm": 0.2283952683210373, "learning_rate": 2.57278615992852e-07, "loss": 0.3775, "step": 14110 }, { "epoch": 2.900812005344845, "grad_norm": 0.2287856638431549, "learning_rate": 2.5621319042198945e-07, "loss": 0.3912, "step": 14111 }, { "epoch": 2.9010175763182238, "grad_norm": 0.23402291536331177, "learning_rate": 2.5514996916836564e-07, "loss": 0.3875, "step": 14112 }, { "epoch": 2.9012231472916024, "grad_norm": 0.23402421176433563, "learning_rate": 2.5408895228437366e-07, "loss": 0.3955, "step": 14113 }, { "epoch": 2.901428718264981, "grad_norm": 0.13076432049274445, "learning_rate": 2.530301398222767e-07, "loss": 0.4522, "step": 14114 }, { "epoch": 2.9016342892383595, "grad_norm": 0.23658651113510132, "learning_rate": 2.519735318342331e-07, "loss": 0.3845, "step": 14115 }, { "epoch": 2.901839860211738, "grad_norm": 0.12689454853534698, "learning_rate": 2.509191283723061e-07, "loss": 0.4543, "step": 14116 }, { "epoch": 2.9020454311851167, "grad_norm": 0.12162572145462036, "learning_rate": 2.4986692948843925e-07, "loss": 0.4385, "step": 14117 }, { "epoch": 2.9022510021584953, "grad_norm": 0.23482230305671692, "learning_rate": 2.48816935234461e-07, "loss": 0.381, "step": 14118 }, { "epoch": 2.902456573131874, "grad_norm": 0.22458134591579437, "learning_rate": 2.477691456621051e-07, "loss": 0.3733, "step": 14119 }, { "epoch": 2.9026621441052525, "grad_norm": 0.22772455215454102, "learning_rate": 2.467235608230001e-07, "loss": 0.373, "step": 14120 }, { "epoch": 2.902867715078631, "grad_norm": 0.22812238335609436, "learning_rate": 2.4568018076864484e-07, "loss": 0.3561, "step": 14121 }, { "epoch": 2.9030732860520096, "grad_norm": 0.24260129034519196, "learning_rate": 2.446390055504433e-07, "loss": 0.3947, "step": 14122 }, { "epoch": 2.903278857025388, "grad_norm": 0.224505215883255, "learning_rate": 2.436000352196943e-07, "loss": 0.374, "step": 14123 }, { "epoch": 2.903484427998767, "grad_norm": 0.22481678426265717, "learning_rate": 2.42563269827582e-07, "loss": 0.3731, "step": 14124 }, { "epoch": 2.903689998972145, "grad_norm": 0.2302400767803192, "learning_rate": 2.415287094251756e-07, "loss": 0.3861, "step": 14125 }, { "epoch": 2.9038955699455236, "grad_norm": 0.12294553965330124, "learning_rate": 2.404963540634542e-07, "loss": 0.4498, "step": 14126 }, { "epoch": 2.904101140918902, "grad_norm": 0.23122653365135193, "learning_rate": 2.3946620379327214e-07, "loss": 0.3611, "step": 14127 }, { "epoch": 2.9043067118922807, "grad_norm": 0.2358085960149765, "learning_rate": 2.3843825866537883e-07, "loss": 0.396, "step": 14128 }, { "epoch": 2.9045122828656593, "grad_norm": 0.12024319916963577, "learning_rate": 2.374125187304188e-07, "loss": 0.4486, "step": 14129 }, { "epoch": 2.904717853839038, "grad_norm": 0.22727903723716736, "learning_rate": 2.3638898403892162e-07, "loss": 0.355, "step": 14130 }, { "epoch": 2.9049234248124165, "grad_norm": 0.23375868797302246, "learning_rate": 2.3536765464131695e-07, "loss": 0.3874, "step": 14131 }, { "epoch": 2.905128995785795, "grad_norm": 0.23008592426776886, "learning_rate": 2.343485305879195e-07, "loss": 0.4048, "step": 14132 }, { "epoch": 2.9053345667591737, "grad_norm": 0.12159692496061325, "learning_rate": 2.3333161192893416e-07, "loss": 0.4508, "step": 14133 }, { "epoch": 2.9055401377325523, "grad_norm": 0.22369949519634247, "learning_rate": 2.3231689871446083e-07, "loss": 0.3681, "step": 14134 }, { "epoch": 2.905745708705931, "grad_norm": 0.23495550453662872, "learning_rate": 2.3130439099448953e-07, "loss": 0.3923, "step": 14135 }, { "epoch": 2.9059512796793094, "grad_norm": 0.23136933147907257, "learning_rate": 2.3029408881890535e-07, "loss": 0.3752, "step": 14136 }, { "epoch": 2.906156850652688, "grad_norm": 0.12260935455560684, "learning_rate": 2.292859922374785e-07, "loss": 0.4599, "step": 14137 }, { "epoch": 2.906362421626066, "grad_norm": 0.22997353971004486, "learning_rate": 2.2828010129986922e-07, "loss": 0.4108, "step": 14138 }, { "epoch": 2.906567992599445, "grad_norm": 0.23019467294216156, "learning_rate": 2.2727641605564287e-07, "loss": 0.391, "step": 14139 }, { "epoch": 2.9067735635728233, "grad_norm": 0.219661682844162, "learning_rate": 2.2627493655423492e-07, "loss": 0.3711, "step": 14140 }, { "epoch": 2.906979134546202, "grad_norm": 0.23242846131324768, "learning_rate": 2.252756628449909e-07, "loss": 0.3808, "step": 14141 }, { "epoch": 2.9071847055195805, "grad_norm": 0.21850700676441193, "learning_rate": 2.2427859497713644e-07, "loss": 0.3812, "step": 14142 }, { "epoch": 2.907390276492959, "grad_norm": 0.22501927614212036, "learning_rate": 2.2328373299979723e-07, "loss": 0.3597, "step": 14143 }, { "epoch": 2.9075958474663377, "grad_norm": 0.22534947097301483, "learning_rate": 2.2229107696198403e-07, "loss": 0.3821, "step": 14144 }, { "epoch": 2.9078014184397163, "grad_norm": 0.22857366502285004, "learning_rate": 2.213006269125978e-07, "loss": 0.3938, "step": 14145 }, { "epoch": 2.908006989413095, "grad_norm": 0.23802757263183594, "learning_rate": 2.2031238290042943e-07, "loss": 0.3871, "step": 14146 }, { "epoch": 2.9082125603864735, "grad_norm": 0.2242707461118698, "learning_rate": 2.1932634497417505e-07, "loss": 0.3684, "step": 14147 }, { "epoch": 2.908418131359852, "grad_norm": 0.12156816571950912, "learning_rate": 2.1834251318240573e-07, "loss": 0.4328, "step": 14148 }, { "epoch": 2.9086237023332306, "grad_norm": 0.22905348241329193, "learning_rate": 2.1736088757359274e-07, "loss": 0.3772, "step": 14149 }, { "epoch": 2.9088292733066092, "grad_norm": 0.2287713587284088, "learning_rate": 2.163814681960924e-07, "loss": 0.3649, "step": 14150 }, { "epoch": 2.909034844279988, "grad_norm": 0.2233857661485672, "learning_rate": 2.1540425509816608e-07, "loss": 0.3777, "step": 14151 }, { "epoch": 2.9092404152533664, "grad_norm": 0.23792994022369385, "learning_rate": 2.1442924832794532e-07, "loss": 0.3854, "step": 14152 }, { "epoch": 2.9094459862267446, "grad_norm": 0.22729872167110443, "learning_rate": 2.1345644793346663e-07, "loss": 0.3748, "step": 14153 }, { "epoch": 2.9096515572001236, "grad_norm": 0.2334190458059311, "learning_rate": 2.1248585396265674e-07, "loss": 0.3878, "step": 14154 }, { "epoch": 2.9098571281735017, "grad_norm": 0.11780460178852081, "learning_rate": 2.1151746646333237e-07, "loss": 0.4551, "step": 14155 }, { "epoch": 2.9100626991468803, "grad_norm": 0.12107131630182266, "learning_rate": 2.1055128548320534e-07, "loss": 0.4406, "step": 14156 }, { "epoch": 2.910268270120259, "grad_norm": 0.12362342327833176, "learning_rate": 2.0958731106986762e-07, "loss": 0.4345, "step": 14157 }, { "epoch": 2.9104738410936375, "grad_norm": 0.12129798531532288, "learning_rate": 2.086255432708162e-07, "loss": 0.4588, "step": 14158 }, { "epoch": 2.910679412067016, "grad_norm": 0.2197551131248474, "learning_rate": 2.0766598213342814e-07, "loss": 0.3835, "step": 14159 }, { "epoch": 2.9108849830403947, "grad_norm": 0.23252207040786743, "learning_rate": 2.0670862770498068e-07, "loss": 0.3737, "step": 14160 }, { "epoch": 2.9110905540137733, "grad_norm": 0.23787905275821686, "learning_rate": 2.0575348003263107e-07, "loss": 0.3891, "step": 14161 }, { "epoch": 2.911296124987152, "grad_norm": 0.3696140944957733, "learning_rate": 2.0480053916344666e-07, "loss": 0.3845, "step": 14162 }, { "epoch": 2.9115016959605304, "grad_norm": 0.22981351613998413, "learning_rate": 2.0384980514435993e-07, "loss": 0.3624, "step": 14163 }, { "epoch": 2.911707266933909, "grad_norm": 0.24888327717781067, "learning_rate": 2.0290127802222337e-07, "loss": 0.3841, "step": 14164 }, { "epoch": 2.9119128379072876, "grad_norm": 0.22164028882980347, "learning_rate": 2.0195495784375463e-07, "loss": 0.3711, "step": 14165 }, { "epoch": 2.912118408880666, "grad_norm": 0.23485776782035828, "learning_rate": 2.0101084465558141e-07, "loss": 0.3845, "step": 14166 }, { "epoch": 2.912323979854045, "grad_norm": 0.23392504453659058, "learning_rate": 2.000689385042115e-07, "loss": 0.374, "step": 14167 }, { "epoch": 2.912529550827423, "grad_norm": 0.23598188161849976, "learning_rate": 1.9912923943605278e-07, "loss": 0.384, "step": 14168 }, { "epoch": 2.912735121800802, "grad_norm": 0.23599591851234436, "learning_rate": 1.9819174749739822e-07, "loss": 0.3803, "step": 14169 }, { "epoch": 2.91294069277418, "grad_norm": 0.22775763273239136, "learning_rate": 1.972564627344359e-07, "loss": 0.3712, "step": 14170 }, { "epoch": 2.9131462637475587, "grad_norm": 0.22463448345661163, "learning_rate": 1.9632338519323391e-07, "loss": 0.3685, "step": 14171 }, { "epoch": 2.9133518347209373, "grad_norm": 0.12167978286743164, "learning_rate": 1.9539251491977052e-07, "loss": 0.4519, "step": 14172 }, { "epoch": 2.913557405694316, "grad_norm": 0.22514131665229797, "learning_rate": 1.9446385195990403e-07, "loss": 0.368, "step": 14173 }, { "epoch": 2.9137629766676945, "grad_norm": 0.22279466688632965, "learning_rate": 1.9353739635937784e-07, "loss": 0.3641, "step": 14174 }, { "epoch": 2.913968547641073, "grad_norm": 0.11997415125370026, "learning_rate": 1.9261314816384046e-07, "loss": 0.4445, "step": 14175 }, { "epoch": 2.9141741186144516, "grad_norm": 0.12249033898115158, "learning_rate": 1.9169110741882546e-07, "loss": 0.4391, "step": 14176 }, { "epoch": 2.9143796895878302, "grad_norm": 0.2306622713804245, "learning_rate": 1.907712741697565e-07, "loss": 0.3835, "step": 14177 }, { "epoch": 2.914585260561209, "grad_norm": 0.11753799021244049, "learning_rate": 1.898536484619473e-07, "loss": 0.4275, "step": 14178 }, { "epoch": 2.9147908315345874, "grad_norm": 0.2338220775127411, "learning_rate": 1.8893823034061176e-07, "loss": 0.4019, "step": 14179 }, { "epoch": 2.914996402507966, "grad_norm": 0.1225576251745224, "learning_rate": 1.8802501985083875e-07, "loss": 0.4512, "step": 14180 }, { "epoch": 2.9152019734813446, "grad_norm": 0.11842867732048035, "learning_rate": 1.8711401703762232e-07, "loss": 0.4508, "step": 14181 }, { "epoch": 2.915407544454723, "grad_norm": 0.22266638278961182, "learning_rate": 1.8620522194584156e-07, "loss": 0.377, "step": 14182 }, { "epoch": 2.9156131154281013, "grad_norm": 0.2360514998435974, "learning_rate": 1.8529863462027563e-07, "loss": 0.3851, "step": 14183 }, { "epoch": 2.9158186864014803, "grad_norm": 0.24209058284759521, "learning_rate": 1.8439425510557885e-07, "loss": 0.3813, "step": 14184 }, { "epoch": 2.9160242573748585, "grad_norm": 0.22401560842990875, "learning_rate": 1.8349208344631052e-07, "loss": 0.381, "step": 14185 }, { "epoch": 2.916229828348237, "grad_norm": 0.12131255865097046, "learning_rate": 1.8259211968691514e-07, "loss": 0.462, "step": 14186 }, { "epoch": 2.9164353993216157, "grad_norm": 0.22824054956436157, "learning_rate": 1.8169436387173222e-07, "loss": 0.3783, "step": 14187 }, { "epoch": 2.9166409702949943, "grad_norm": 0.2334217131137848, "learning_rate": 1.807988160449864e-07, "loss": 0.3739, "step": 14188 }, { "epoch": 2.916846541268373, "grad_norm": 0.22181300818920135, "learning_rate": 1.7990547625079735e-07, "loss": 0.3881, "step": 14189 }, { "epoch": 2.9170521122417514, "grad_norm": 0.22861194610595703, "learning_rate": 1.790143445331749e-07, "loss": 0.3828, "step": 14190 }, { "epoch": 2.91725768321513, "grad_norm": 0.2301216721534729, "learning_rate": 1.781254209360289e-07, "loss": 0.3711, "step": 14191 }, { "epoch": 2.9174632541885086, "grad_norm": 0.22555860877037048, "learning_rate": 1.7723870550313938e-07, "loss": 0.3734, "step": 14192 }, { "epoch": 2.917668825161887, "grad_norm": 0.23354589939117432, "learning_rate": 1.7635419827820132e-07, "loss": 0.3687, "step": 14193 }, { "epoch": 2.917874396135266, "grad_norm": 0.23408174514770508, "learning_rate": 1.754718993047899e-07, "loss": 0.3708, "step": 14194 }, { "epoch": 2.9180799671086444, "grad_norm": 0.23139835894107819, "learning_rate": 1.7459180862636037e-07, "loss": 0.3577, "step": 14195 }, { "epoch": 2.918285538082023, "grad_norm": 0.11874835938215256, "learning_rate": 1.7371392628628802e-07, "loss": 0.4303, "step": 14196 }, { "epoch": 2.9184911090554015, "grad_norm": 0.23826338350772858, "learning_rate": 1.7283825232780825e-07, "loss": 0.3858, "step": 14197 }, { "epoch": 2.9186966800287797, "grad_norm": 0.13164587318897247, "learning_rate": 1.7196478679406658e-07, "loss": 0.447, "step": 14198 }, { "epoch": 2.9189022510021587, "grad_norm": 0.22203896939754486, "learning_rate": 1.7109352972809856e-07, "loss": 0.3562, "step": 14199 }, { "epoch": 2.919107821975537, "grad_norm": 0.24076960980892181, "learning_rate": 1.7022448117281487e-07, "loss": 0.3789, "step": 14200 }, { "epoch": 2.9193133929489155, "grad_norm": 0.24343958497047424, "learning_rate": 1.6935764117104125e-07, "loss": 0.3937, "step": 14201 }, { "epoch": 2.919518963922294, "grad_norm": 0.23502768576145172, "learning_rate": 1.6849300976547856e-07, "loss": 0.3881, "step": 14202 }, { "epoch": 2.9197245348956726, "grad_norm": 0.24000953137874603, "learning_rate": 1.6763058699872269e-07, "loss": 0.376, "step": 14203 }, { "epoch": 2.919930105869051, "grad_norm": 0.12082278728485107, "learning_rate": 1.667703729132647e-07, "loss": 0.4705, "step": 14204 }, { "epoch": 2.92013567684243, "grad_norm": 0.22615081071853638, "learning_rate": 1.6591236755148064e-07, "loss": 0.3811, "step": 14205 }, { "epoch": 2.9203412478158084, "grad_norm": 0.23181886970996857, "learning_rate": 1.6505657095563675e-07, "loss": 0.3656, "step": 14206 }, { "epoch": 2.920546818789187, "grad_norm": 0.22532041370868683, "learning_rate": 1.642029831678993e-07, "loss": 0.375, "step": 14207 }, { "epoch": 2.9207523897625656, "grad_norm": 0.11956392228603363, "learning_rate": 1.633516042303196e-07, "loss": 0.4629, "step": 14208 }, { "epoch": 2.920957960735944, "grad_norm": 0.22714190185070038, "learning_rate": 1.6250243418483412e-07, "loss": 0.3596, "step": 14209 }, { "epoch": 2.9211635317093227, "grad_norm": 0.2288563847541809, "learning_rate": 1.6165547307328944e-07, "loss": 0.3806, "step": 14210 }, { "epoch": 2.9213691026827013, "grad_norm": 0.21944314241409302, "learning_rate": 1.6081072093740711e-07, "loss": 0.3687, "step": 14211 }, { "epoch": 2.92157467365608, "grad_norm": 0.11778556555509567, "learning_rate": 1.599681778187989e-07, "loss": 0.4448, "step": 14212 }, { "epoch": 2.921780244629458, "grad_norm": 0.24057716131210327, "learning_rate": 1.591278437589816e-07, "loss": 0.39, "step": 14213 }, { "epoch": 2.921985815602837, "grad_norm": 0.11888077110052109, "learning_rate": 1.5828971879934706e-07, "loss": 0.4486, "step": 14214 }, { "epoch": 2.9221913865762152, "grad_norm": 0.23414359986782074, "learning_rate": 1.574538029811873e-07, "loss": 0.3829, "step": 14215 }, { "epoch": 2.922396957549594, "grad_norm": 0.2228407859802246, "learning_rate": 1.5662009634568432e-07, "loss": 0.3908, "step": 14216 }, { "epoch": 2.9226025285229724, "grad_norm": 0.23232321441173553, "learning_rate": 1.557885989339103e-07, "loss": 0.3872, "step": 14217 }, { "epoch": 2.922808099496351, "grad_norm": 0.23603259027004242, "learning_rate": 1.5495931078683746e-07, "loss": 0.3729, "step": 14218 }, { "epoch": 2.9230136704697296, "grad_norm": 0.23010489344596863, "learning_rate": 1.5413223194530813e-07, "loss": 0.3702, "step": 14219 }, { "epoch": 2.923219241443108, "grad_norm": 0.22785669565200806, "learning_rate": 1.5330736245007972e-07, "loss": 0.3773, "step": 14220 }, { "epoch": 2.9234248124164868, "grad_norm": 0.2386084794998169, "learning_rate": 1.524847023417797e-07, "loss": 0.3803, "step": 14221 }, { "epoch": 2.9236303833898654, "grad_norm": 0.23408401012420654, "learning_rate": 1.5166425166094567e-07, "loss": 0.3836, "step": 14222 }, { "epoch": 2.923835954363244, "grad_norm": 0.23765285313129425, "learning_rate": 1.508460104479903e-07, "loss": 0.376, "step": 14223 }, { "epoch": 2.9240415253366225, "grad_norm": 0.23104673624038696, "learning_rate": 1.5002997874323134e-07, "loss": 0.3758, "step": 14224 }, { "epoch": 2.924247096310001, "grad_norm": 0.2328345626592636, "learning_rate": 1.492161565868616e-07, "loss": 0.3718, "step": 14225 }, { "epoch": 2.9244526672833797, "grad_norm": 0.22445005178451538, "learning_rate": 1.4840454401898407e-07, "loss": 0.363, "step": 14226 }, { "epoch": 2.9246582382567583, "grad_norm": 0.22506146132946014, "learning_rate": 1.4759514107957673e-07, "loss": 0.3583, "step": 14227 }, { "epoch": 2.9248638092301364, "grad_norm": 0.22295387089252472, "learning_rate": 1.4678794780852267e-07, "loss": 0.3901, "step": 14228 }, { "epoch": 2.9250693802035155, "grad_norm": 0.22863556444644928, "learning_rate": 1.4598296424557512e-07, "loss": 0.3554, "step": 14229 }, { "epoch": 2.9252749511768936, "grad_norm": 0.23237614333629608, "learning_rate": 1.4518019043040233e-07, "loss": 0.3843, "step": 14230 }, { "epoch": 2.9254805221502727, "grad_norm": 0.22150248289108276, "learning_rate": 1.4437962640255264e-07, "loss": 0.3708, "step": 14231 }, { "epoch": 2.925686093123651, "grad_norm": 0.2305610030889511, "learning_rate": 1.4358127220146456e-07, "loss": 0.3922, "step": 14232 }, { "epoch": 2.9258916640970294, "grad_norm": 0.2294863909482956, "learning_rate": 1.4278512786646658e-07, "loss": 0.3815, "step": 14233 }, { "epoch": 2.926097235070408, "grad_norm": 0.22797244787216187, "learning_rate": 1.4199119343678236e-07, "loss": 0.3751, "step": 14234 }, { "epoch": 2.9263028060437866, "grad_norm": 0.23715586960315704, "learning_rate": 1.4119946895153058e-07, "loss": 0.3842, "step": 14235 }, { "epoch": 2.926508377017165, "grad_norm": 0.22145721316337585, "learning_rate": 1.4040995444970505e-07, "loss": 0.3793, "step": 14236 }, { "epoch": 2.9267139479905437, "grad_norm": 0.2366815060377121, "learning_rate": 1.396226499702097e-07, "loss": 0.3848, "step": 14237 }, { "epoch": 2.9269195189639223, "grad_norm": 0.24056269228458405, "learning_rate": 1.3883755555183343e-07, "loss": 0.3725, "step": 14238 }, { "epoch": 2.927125089937301, "grad_norm": 0.22281573712825775, "learning_rate": 1.3805467123325035e-07, "loss": 0.3695, "step": 14239 }, { "epoch": 2.9273306609106795, "grad_norm": 0.2354237586259842, "learning_rate": 1.3727399705302458e-07, "loss": 0.3834, "step": 14240 }, { "epoch": 2.927536231884058, "grad_norm": 0.121092788875103, "learning_rate": 1.3649553304962536e-07, "loss": 0.4405, "step": 14241 }, { "epoch": 2.9277418028574367, "grad_norm": 0.23735617101192474, "learning_rate": 1.3571927926139705e-07, "loss": 0.3735, "step": 14242 }, { "epoch": 2.927947373830815, "grad_norm": 0.24596528708934784, "learning_rate": 1.3494523572658402e-07, "loss": 0.3967, "step": 14243 }, { "epoch": 2.928152944804194, "grad_norm": 0.21962697803974152, "learning_rate": 1.3417340248332578e-07, "loss": 0.3747, "step": 14244 }, { "epoch": 2.928358515777572, "grad_norm": 0.2285209596157074, "learning_rate": 1.334037795696369e-07, "loss": 0.3752, "step": 14245 }, { "epoch": 2.928564086750951, "grad_norm": 0.22602157294750214, "learning_rate": 1.3263636702344207e-07, "loss": 0.3754, "step": 14246 }, { "epoch": 2.928769657724329, "grad_norm": 0.22371745109558105, "learning_rate": 1.3187116488254103e-07, "loss": 0.3879, "step": 14247 }, { "epoch": 2.9289752286977078, "grad_norm": 0.23446328938007355, "learning_rate": 1.3110817318463365e-07, "loss": 0.3569, "step": 14248 }, { "epoch": 2.9291807996710864, "grad_norm": 0.22867922484874725, "learning_rate": 1.3034739196730984e-07, "loss": 0.397, "step": 14249 }, { "epoch": 2.929386370644465, "grad_norm": 0.23485369980335236, "learning_rate": 1.295888212680496e-07, "loss": 0.3934, "step": 14250 }, { "epoch": 2.9295919416178435, "grad_norm": 0.23388779163360596, "learning_rate": 1.2883246112422808e-07, "loss": 0.3878, "step": 14251 }, { "epoch": 2.929797512591222, "grad_norm": 0.23058055341243744, "learning_rate": 1.2807831157310046e-07, "loss": 0.3728, "step": 14252 }, { "epoch": 2.9300030835646007, "grad_norm": 0.12012367695569992, "learning_rate": 1.2732637265182702e-07, "loss": 0.428, "step": 14253 }, { "epoch": 2.9302086545379793, "grad_norm": 0.11733004450798035, "learning_rate": 1.265766443974431e-07, "loss": 0.4467, "step": 14254 }, { "epoch": 2.930414225511358, "grad_norm": 0.23373596370220184, "learning_rate": 1.2582912684689418e-07, "loss": 0.3774, "step": 14255 }, { "epoch": 2.9306197964847365, "grad_norm": 0.22442536056041718, "learning_rate": 1.250838200370008e-07, "loss": 0.3723, "step": 14256 }, { "epoch": 2.930825367458115, "grad_norm": 0.119273342192173, "learning_rate": 1.243407240044836e-07, "loss": 0.4466, "step": 14257 }, { "epoch": 2.931030938431493, "grad_norm": 0.12564511597156525, "learning_rate": 1.2359983878595329e-07, "loss": 0.4453, "step": 14258 }, { "epoch": 2.9312365094048722, "grad_norm": 0.2270507961511612, "learning_rate": 1.2286116441790064e-07, "loss": 0.3577, "step": 14259 }, { "epoch": 2.9314420803782504, "grad_norm": 0.24136748909950256, "learning_rate": 1.2212470093673155e-07, "loss": 0.3874, "step": 14260 }, { "epoch": 2.9316476513516294, "grad_norm": 0.22944435477256775, "learning_rate": 1.2139044837871204e-07, "loss": 0.3783, "step": 14261 }, { "epoch": 2.9318532223250076, "grad_norm": 0.2328665405511856, "learning_rate": 1.2065840678002815e-07, "loss": 0.3704, "step": 14262 }, { "epoch": 2.932058793298386, "grad_norm": 0.23235177993774414, "learning_rate": 1.1992857617674103e-07, "loss": 0.3927, "step": 14263 }, { "epoch": 2.9322643642717647, "grad_norm": 0.22136935591697693, "learning_rate": 1.1920095660479691e-07, "loss": 0.3554, "step": 14264 }, { "epoch": 2.9324699352451433, "grad_norm": 0.23103518784046173, "learning_rate": 1.1847554810005212e-07, "loss": 0.3751, "step": 14265 }, { "epoch": 2.932675506218522, "grad_norm": 0.1267227828502655, "learning_rate": 1.177523506982431e-07, "loss": 0.4482, "step": 14266 }, { "epoch": 2.9328810771919005, "grad_norm": 0.23589691519737244, "learning_rate": 1.1703136443499629e-07, "loss": 0.3912, "step": 14267 }, { "epoch": 2.933086648165279, "grad_norm": 0.22941534221172333, "learning_rate": 1.1631258934583333e-07, "loss": 0.3815, "step": 14268 }, { "epoch": 2.9332922191386577, "grad_norm": 0.2415175586938858, "learning_rate": 1.1559602546616089e-07, "loss": 0.3837, "step": 14269 }, { "epoch": 2.9334977901120363, "grad_norm": 0.22201284766197205, "learning_rate": 1.148816728312857e-07, "loss": 0.3859, "step": 14270 }, { "epoch": 2.933703361085415, "grad_norm": 0.23160016536712646, "learning_rate": 1.1416953147639464e-07, "loss": 0.378, "step": 14271 }, { "epoch": 2.9339089320587934, "grad_norm": 0.23736536502838135, "learning_rate": 1.1345960143657463e-07, "loss": 0.363, "step": 14272 }, { "epoch": 2.9341145030321716, "grad_norm": 0.12271010130643845, "learning_rate": 1.127518827468027e-07, "loss": 0.4492, "step": 14273 }, { "epoch": 2.9343200740055506, "grad_norm": 0.2341691106557846, "learning_rate": 1.1204637544194097e-07, "loss": 0.3787, "step": 14274 }, { "epoch": 2.9345256449789288, "grad_norm": 0.23392406105995178, "learning_rate": 1.1134307955675161e-07, "loss": 0.3873, "step": 14275 }, { "epoch": 2.934731215952308, "grad_norm": 0.2216750532388687, "learning_rate": 1.1064199512587692e-07, "loss": 0.3777, "step": 14276 }, { "epoch": 2.934936786925686, "grad_norm": 0.23249836266040802, "learning_rate": 1.0994312218385927e-07, "loss": 0.365, "step": 14277 }, { "epoch": 2.9351423578990645, "grad_norm": 0.23422518372535706, "learning_rate": 1.0924646076513112e-07, "loss": 0.3889, "step": 14278 }, { "epoch": 2.935347928872443, "grad_norm": 0.23376347124576569, "learning_rate": 1.0855201090401002e-07, "loss": 0.3868, "step": 14279 }, { "epoch": 2.9355534998458217, "grad_norm": 0.21998612582683563, "learning_rate": 1.078597726347086e-07, "loss": 0.3909, "step": 14280 }, { "epoch": 2.9357590708192003, "grad_norm": 0.23854362964630127, "learning_rate": 1.0716974599132956e-07, "loss": 0.3771, "step": 14281 }, { "epoch": 2.935964641792579, "grad_norm": 0.11935044080018997, "learning_rate": 1.0648193100787074e-07, "loss": 0.4408, "step": 14282 }, { "epoch": 2.9361702127659575, "grad_norm": 0.23328512907028198, "learning_rate": 1.0579632771821502e-07, "loss": 0.3917, "step": 14283 }, { "epoch": 2.936375783739336, "grad_norm": 0.2255300134420395, "learning_rate": 1.0511293615613539e-07, "loss": 0.3756, "step": 14284 }, { "epoch": 2.9365813547127146, "grad_norm": 0.2301304042339325, "learning_rate": 1.0443175635530489e-07, "loss": 0.3927, "step": 14285 }, { "epoch": 2.9367869256860932, "grad_norm": 0.11829908192157745, "learning_rate": 1.037527883492817e-07, "loss": 0.4427, "step": 14286 }, { "epoch": 2.936992496659472, "grad_norm": 0.23846930265426636, "learning_rate": 1.0307603217151906e-07, "loss": 0.3758, "step": 14287 }, { "epoch": 2.9371980676328504, "grad_norm": 0.22976188361644745, "learning_rate": 1.0240148785534532e-07, "loss": 0.3702, "step": 14288 }, { "epoch": 2.937403638606229, "grad_norm": 0.235699862241745, "learning_rate": 1.0172915543400386e-07, "loss": 0.3791, "step": 14289 }, { "epoch": 2.937609209579607, "grad_norm": 0.23126575350761414, "learning_rate": 1.0105903494060821e-07, "loss": 0.3925, "step": 14290 }, { "epoch": 2.937814780552986, "grad_norm": 0.12287239730358124, "learning_rate": 1.0039112640818193e-07, "loss": 0.4534, "step": 14291 }, { "epoch": 2.9380203515263643, "grad_norm": 0.22776830196380615, "learning_rate": 9.972542986961875e-08, "loss": 0.3802, "step": 14292 }, { "epoch": 2.938225922499743, "grad_norm": 0.23235289752483368, "learning_rate": 9.906194535772739e-08, "loss": 0.3922, "step": 14293 }, { "epoch": 2.9384314934731215, "grad_norm": 0.12463247776031494, "learning_rate": 9.840067290518173e-08, "loss": 0.4412, "step": 14294 }, { "epoch": 2.9386370644465, "grad_norm": 0.12007234990596771, "learning_rate": 9.77416125445707e-08, "loss": 0.4616, "step": 14295 }, { "epoch": 2.9388426354198787, "grad_norm": 0.121745266020298, "learning_rate": 9.708476430835333e-08, "loss": 0.4576, "step": 14296 }, { "epoch": 2.9390482063932573, "grad_norm": 0.23362316191196442, "learning_rate": 9.643012822889375e-08, "loss": 0.387, "step": 14297 }, { "epoch": 2.939253777366636, "grad_norm": 0.21919940412044525, "learning_rate": 9.577770433844613e-08, "loss": 0.3776, "step": 14298 }, { "epoch": 2.9394593483400144, "grad_norm": 0.23474140465259552, "learning_rate": 9.512749266914978e-08, "loss": 0.3797, "step": 14299 }, { "epoch": 2.939664919313393, "grad_norm": 0.22480328381061554, "learning_rate": 9.447949325303407e-08, "loss": 0.3901, "step": 14300 }, { "epoch": 2.9398704902867716, "grad_norm": 0.11820299923419952, "learning_rate": 9.383370612202347e-08, "loss": 0.4407, "step": 14301 }, { "epoch": 2.94007606126015, "grad_norm": 0.23817752301692963, "learning_rate": 9.319013130794252e-08, "loss": 0.4009, "step": 14302 }, { "epoch": 2.940281632233529, "grad_norm": 0.23195527493953705, "learning_rate": 9.254876884248587e-08, "loss": 0.373, "step": 14303 }, { "epoch": 2.9404872032069074, "grad_norm": 0.22849521040916443, "learning_rate": 9.190961875725324e-08, "loss": 0.387, "step": 14304 }, { "epoch": 2.9406927741802855, "grad_norm": 0.2443472295999527, "learning_rate": 9.127268108373444e-08, "loss": 0.395, "step": 14305 }, { "epoch": 2.9408983451536646, "grad_norm": 0.12250496447086334, "learning_rate": 9.063795585330937e-08, "loss": 0.4541, "step": 14306 }, { "epoch": 2.9411039161270427, "grad_norm": 0.23145142197608948, "learning_rate": 9.000544309724302e-08, "loss": 0.37, "step": 14307 }, { "epoch": 2.9413094871004213, "grad_norm": 0.2310493439435959, "learning_rate": 8.937514284670545e-08, "loss": 0.3729, "step": 14308 }, { "epoch": 2.9415150580738, "grad_norm": 0.2356126606464386, "learning_rate": 8.874705513273685e-08, "loss": 0.3647, "step": 14309 }, { "epoch": 2.9417206290471785, "grad_norm": 0.1224084421992302, "learning_rate": 8.812117998629244e-08, "loss": 0.4391, "step": 14310 }, { "epoch": 2.941926200020557, "grad_norm": 0.23388880491256714, "learning_rate": 8.749751743819257e-08, "loss": 0.4037, "step": 14311 }, { "epoch": 2.9421317709939356, "grad_norm": 0.1348462849855423, "learning_rate": 8.687606751917766e-08, "loss": 0.4476, "step": 14312 }, { "epoch": 2.9423373419673142, "grad_norm": 0.11941714584827423, "learning_rate": 8.625683025984821e-08, "loss": 0.4535, "step": 14313 }, { "epoch": 2.942542912940693, "grad_norm": 0.2301827371120453, "learning_rate": 8.563980569071983e-08, "loss": 0.373, "step": 14314 }, { "epoch": 2.9427484839140714, "grad_norm": 0.23292043805122375, "learning_rate": 8.50249938421932e-08, "loss": 0.3848, "step": 14315 }, { "epoch": 2.94295405488745, "grad_norm": 0.23747049272060394, "learning_rate": 8.44123947445491e-08, "loss": 0.3767, "step": 14316 }, { "epoch": 2.9431596258608286, "grad_norm": 0.23186716437339783, "learning_rate": 8.380200842797336e-08, "loss": 0.3821, "step": 14317 }, { "epoch": 2.943365196834207, "grad_norm": 0.23163893818855286, "learning_rate": 8.319383492253696e-08, "loss": 0.3671, "step": 14318 }, { "epoch": 2.9435707678075858, "grad_norm": 0.12333094328641891, "learning_rate": 8.258787425819592e-08, "loss": 0.4495, "step": 14319 }, { "epoch": 2.943776338780964, "grad_norm": 0.12259241193532944, "learning_rate": 8.198412646480636e-08, "loss": 0.46, "step": 14320 }, { "epoch": 2.943981909754343, "grad_norm": 0.23415526747703552, "learning_rate": 8.138259157211447e-08, "loss": 0.3829, "step": 14321 }, { "epoch": 2.944187480727721, "grad_norm": 0.23561497032642365, "learning_rate": 8.078326960975158e-08, "loss": 0.3766, "step": 14322 }, { "epoch": 2.9443930517010997, "grad_norm": 0.22486624121665955, "learning_rate": 8.018616060724904e-08, "loss": 0.3748, "step": 14323 }, { "epoch": 2.9445986226744782, "grad_norm": 0.22078227996826172, "learning_rate": 7.959126459401834e-08, "loss": 0.3688, "step": 14324 }, { "epoch": 2.944804193647857, "grad_norm": 0.23797355592250824, "learning_rate": 7.899858159936601e-08, "loss": 0.387, "step": 14325 }, { "epoch": 2.9450097646212354, "grad_norm": 0.2293400913476944, "learning_rate": 7.840811165249373e-08, "loss": 0.373, "step": 14326 }, { "epoch": 2.945215335594614, "grad_norm": 0.12413428723812103, "learning_rate": 7.781985478249321e-08, "loss": 0.4478, "step": 14327 }, { "epoch": 2.9454209065679926, "grad_norm": 0.22216647863388062, "learning_rate": 7.723381101834126e-08, "loss": 0.3686, "step": 14328 }, { "epoch": 2.945626477541371, "grad_norm": 0.23056413233280182, "learning_rate": 7.66499803889098e-08, "loss": 0.3739, "step": 14329 }, { "epoch": 2.94583204851475, "grad_norm": 0.23194332420825958, "learning_rate": 7.606836292296582e-08, "loss": 0.3727, "step": 14330 }, { "epoch": 2.9460376194881284, "grad_norm": 0.24576567113399506, "learning_rate": 7.548895864915639e-08, "loss": 0.3977, "step": 14331 }, { "epoch": 2.946243190461507, "grad_norm": 0.22875289618968964, "learning_rate": 7.491176759602869e-08, "loss": 0.3842, "step": 14332 }, { "epoch": 2.9464487614348855, "grad_norm": 0.2283722311258316, "learning_rate": 7.433678979201997e-08, "loss": 0.3824, "step": 14333 }, { "epoch": 2.946654332408264, "grad_norm": 0.23309841752052307, "learning_rate": 7.376402526545755e-08, "loss": 0.3859, "step": 14334 }, { "epoch": 2.9468599033816423, "grad_norm": 0.23801040649414062, "learning_rate": 7.31934740445589e-08, "loss": 0.3857, "step": 14335 }, { "epoch": 2.9470654743550213, "grad_norm": 0.23424702882766724, "learning_rate": 7.26251361574265e-08, "loss": 0.3835, "step": 14336 }, { "epoch": 2.9472710453283995, "grad_norm": 0.23089328408241272, "learning_rate": 7.205901163206297e-08, "loss": 0.3669, "step": 14337 }, { "epoch": 2.947476616301778, "grad_norm": 0.22902965545654297, "learning_rate": 7.149510049636099e-08, "loss": 0.3738, "step": 14338 }, { "epoch": 2.9476821872751566, "grad_norm": 0.22217592597007751, "learning_rate": 7.093340277809834e-08, "loss": 0.3853, "step": 14339 }, { "epoch": 2.947887758248535, "grad_norm": 0.24186544120311737, "learning_rate": 7.03739185049529e-08, "loss": 0.3741, "step": 14340 }, { "epoch": 2.948093329221914, "grad_norm": 0.2368420511484146, "learning_rate": 6.98166477044826e-08, "loss": 0.3809, "step": 14341 }, { "epoch": 2.9482989001952924, "grad_norm": 0.11286085844039917, "learning_rate": 6.926159040414049e-08, "loss": 0.4515, "step": 14342 }, { "epoch": 2.948504471168671, "grad_norm": 0.23017874360084534, "learning_rate": 6.870874663127469e-08, "loss": 0.3689, "step": 14343 }, { "epoch": 2.9487100421420496, "grad_norm": 0.22851766645908356, "learning_rate": 6.815811641312342e-08, "loss": 0.3729, "step": 14344 }, { "epoch": 2.948915613115428, "grad_norm": 0.22411444783210754, "learning_rate": 6.760969977680498e-08, "loss": 0.3666, "step": 14345 }, { "epoch": 2.9491211840888067, "grad_norm": 0.22939811646938324, "learning_rate": 6.706349674934776e-08, "loss": 0.3816, "step": 14346 }, { "epoch": 2.9493267550621853, "grad_norm": 0.2309289425611496, "learning_rate": 6.651950735765522e-08, "loss": 0.3866, "step": 14347 }, { "epoch": 2.949532326035564, "grad_norm": 0.25561413168907166, "learning_rate": 6.597773162853094e-08, "loss": 0.4024, "step": 14348 }, { "epoch": 2.9497378970089425, "grad_norm": 0.23126906156539917, "learning_rate": 6.543816958865857e-08, "loss": 0.3858, "step": 14349 }, { "epoch": 2.9499434679823207, "grad_norm": 0.23696114122867584, "learning_rate": 6.490082126462682e-08, "loss": 0.3707, "step": 14350 }, { "epoch": 2.9501490389556997, "grad_norm": 0.12179608643054962, "learning_rate": 6.436568668290455e-08, "loss": 0.4631, "step": 14351 }, { "epoch": 2.950354609929078, "grad_norm": 0.22677427530288696, "learning_rate": 6.383276586985565e-08, "loss": 0.3663, "step": 14352 }, { "epoch": 2.9505601809024564, "grad_norm": 0.23234906792640686, "learning_rate": 6.330205885173413e-08, "loss": 0.3811, "step": 14353 }, { "epoch": 2.950765751875835, "grad_norm": 0.23495686054229736, "learning_rate": 6.277356565468906e-08, "loss": 0.3994, "step": 14354 }, { "epoch": 2.9509713228492136, "grad_norm": 0.23368287086486816, "learning_rate": 6.224728630474964e-08, "loss": 0.3713, "step": 14355 }, { "epoch": 2.951176893822592, "grad_norm": 0.11765862256288528, "learning_rate": 6.17232208278551e-08, "loss": 0.445, "step": 14356 }, { "epoch": 2.9513824647959708, "grad_norm": 0.23425832390785217, "learning_rate": 6.12013692498098e-08, "loss": 0.3983, "step": 14357 }, { "epoch": 2.9515880357693494, "grad_norm": 0.11689037829637527, "learning_rate": 6.068173159633317e-08, "loss": 0.4463, "step": 14358 }, { "epoch": 2.951793606742728, "grad_norm": 0.2250240296125412, "learning_rate": 6.016430789302474e-08, "loss": 0.3852, "step": 14359 }, { "epoch": 2.9519991777161065, "grad_norm": 0.23186476528644562, "learning_rate": 5.964909816536912e-08, "loss": 0.3659, "step": 14360 }, { "epoch": 2.952204748689485, "grad_norm": 0.2178521603345871, "learning_rate": 5.913610243875101e-08, "loss": 0.3706, "step": 14361 }, { "epoch": 2.9524103196628637, "grad_norm": 0.23056325316429138, "learning_rate": 5.8625320738445176e-08, "loss": 0.3751, "step": 14362 }, { "epoch": 2.9526158906362423, "grad_norm": 0.2350500226020813, "learning_rate": 5.811675308961151e-08, "loss": 0.3784, "step": 14363 }, { "epoch": 2.952821461609621, "grad_norm": 0.22323279082775116, "learning_rate": 5.7610399517309956e-08, "loss": 0.3732, "step": 14364 }, { "epoch": 2.953027032582999, "grad_norm": 0.23257021605968475, "learning_rate": 5.7106260046485564e-08, "loss": 0.3833, "step": 14365 }, { "epoch": 2.953232603556378, "grad_norm": 0.22943510115146637, "learning_rate": 5.6604334701968466e-08, "loss": 0.3664, "step": 14366 }, { "epoch": 2.953438174529756, "grad_norm": 0.25284644961357117, "learning_rate": 5.6104623508493883e-08, "loss": 0.3844, "step": 14367 }, { "epoch": 2.953643745503135, "grad_norm": 0.23901039361953735, "learning_rate": 5.560712649067712e-08, "loss": 0.3866, "step": 14368 }, { "epoch": 2.9538493164765134, "grad_norm": 0.23246188461780548, "learning_rate": 5.5111843673028574e-08, "loss": 0.3791, "step": 14369 }, { "epoch": 2.954054887449892, "grad_norm": 0.22920754551887512, "learning_rate": 5.4618775079948725e-08, "loss": 0.3846, "step": 14370 }, { "epoch": 2.9542604584232706, "grad_norm": 0.23537150025367737, "learning_rate": 5.412792073572315e-08, "loss": 0.3787, "step": 14371 }, { "epoch": 2.954466029396649, "grad_norm": 0.23101921379566193, "learning_rate": 5.363928066454249e-08, "loss": 0.3592, "step": 14372 }, { "epoch": 2.9546716003700277, "grad_norm": 0.11517384648323059, "learning_rate": 5.31528548904775e-08, "loss": 0.4418, "step": 14373 }, { "epoch": 2.9548771713434063, "grad_norm": 0.24331872165203094, "learning_rate": 5.266864343748401e-08, "loss": 0.3696, "step": 14374 }, { "epoch": 2.955082742316785, "grad_norm": 0.12557660043239594, "learning_rate": 5.218664632942794e-08, "loss": 0.4405, "step": 14375 }, { "epoch": 2.9552883132901635, "grad_norm": 0.2361089289188385, "learning_rate": 5.170686359005028e-08, "loss": 0.399, "step": 14376 }, { "epoch": 2.955493884263542, "grad_norm": 0.23642629384994507, "learning_rate": 5.122929524298215e-08, "loss": 0.3766, "step": 14377 }, { "epoch": 2.9556994552369207, "grad_norm": 0.22410228848457336, "learning_rate": 5.07539413117647e-08, "loss": 0.369, "step": 14378 }, { "epoch": 2.9559050262102993, "grad_norm": 0.11725395172834396, "learning_rate": 5.028080181980421e-08, "loss": 0.4579, "step": 14379 }, { "epoch": 2.9561105971836774, "grad_norm": 0.11997832357883453, "learning_rate": 4.9809876790412045e-08, "loss": 0.4409, "step": 14380 }, { "epoch": 2.9563161681570564, "grad_norm": 0.22873012721538544, "learning_rate": 4.9341166246794635e-08, "loss": 0.3836, "step": 14381 }, { "epoch": 2.9565217391304346, "grad_norm": 0.24191080033779144, "learning_rate": 4.8874670212033516e-08, "loss": 0.408, "step": 14382 }, { "epoch": 2.956727310103813, "grad_norm": 0.23186847567558289, "learning_rate": 4.841038870912029e-08, "loss": 0.4031, "step": 14383 }, { "epoch": 2.9569328810771918, "grad_norm": 0.22565621137619019, "learning_rate": 4.7948321760926675e-08, "loss": 0.3672, "step": 14384 }, { "epoch": 2.9571384520505704, "grad_norm": 0.23073460161685944, "learning_rate": 4.748846939020946e-08, "loss": 0.3797, "step": 14385 }, { "epoch": 2.957344023023949, "grad_norm": 0.23532749712467194, "learning_rate": 4.703083161963051e-08, "loss": 0.3721, "step": 14386 }, { "epoch": 2.9575495939973275, "grad_norm": 0.27987563610076904, "learning_rate": 4.657540847173181e-08, "loss": 0.368, "step": 14387 }, { "epoch": 2.957755164970706, "grad_norm": 0.2355928122997284, "learning_rate": 4.61221999689504e-08, "loss": 0.3909, "step": 14388 }, { "epoch": 2.9579607359440847, "grad_norm": 0.22605665028095245, "learning_rate": 4.567120613361342e-08, "loss": 0.3669, "step": 14389 }, { "epoch": 2.9581663069174633, "grad_norm": 0.22839273512363434, "learning_rate": 4.52224269879431e-08, "loss": 0.3821, "step": 14390 }, { "epoch": 2.958371877890842, "grad_norm": 0.23111465573310852, "learning_rate": 4.477586255404176e-08, "loss": 0.3917, "step": 14391 }, { "epoch": 2.9585774488642205, "grad_norm": 0.2226291000843048, "learning_rate": 4.433151285391679e-08, "loss": 0.374, "step": 14392 }, { "epoch": 2.958783019837599, "grad_norm": 0.235224187374115, "learning_rate": 4.388937790945569e-08, "loss": 0.3761, "step": 14393 }, { "epoch": 2.9589885908109776, "grad_norm": 0.22255218029022217, "learning_rate": 4.3449457742441025e-08, "loss": 0.3554, "step": 14394 }, { "epoch": 2.959194161784356, "grad_norm": 0.23567332327365875, "learning_rate": 4.3011752374545464e-08, "loss": 0.3757, "step": 14395 }, { "epoch": 2.959399732757735, "grad_norm": 0.2175855040550232, "learning_rate": 4.257626182732677e-08, "loss": 0.3868, "step": 14396 }, { "epoch": 2.959605303731113, "grad_norm": 0.22269536554813385, "learning_rate": 4.214298612225276e-08, "loss": 0.3789, "step": 14397 }, { "epoch": 2.959810874704492, "grad_norm": 0.2323738932609558, "learning_rate": 4.1711925280656376e-08, "loss": 0.3916, "step": 14398 }, { "epoch": 2.96001644567787, "grad_norm": 0.23786410689353943, "learning_rate": 4.1283079323780616e-08, "loss": 0.3622, "step": 14399 }, { "epoch": 2.9602220166512487, "grad_norm": 0.22747676074504852, "learning_rate": 4.085644827275359e-08, "loss": 0.3803, "step": 14400 }, { "epoch": 2.9604275876246273, "grad_norm": 0.23511525988578796, "learning_rate": 4.043203214858848e-08, "loss": 0.3931, "step": 14401 }, { "epoch": 2.960633158598006, "grad_norm": 0.225963294506073, "learning_rate": 4.000983097219358e-08, "loss": 0.3679, "step": 14402 }, { "epoch": 2.9608387295713845, "grad_norm": 0.23282590508460999, "learning_rate": 3.958984476437722e-08, "loss": 0.3793, "step": 14403 }, { "epoch": 2.961044300544763, "grad_norm": 0.2279476523399353, "learning_rate": 3.917207354581787e-08, "loss": 0.3647, "step": 14404 }, { "epoch": 2.9612498715181417, "grad_norm": 0.239571213722229, "learning_rate": 3.875651733710906e-08, "loss": 0.3865, "step": 14405 }, { "epoch": 2.9614554424915203, "grad_norm": 0.2309008538722992, "learning_rate": 3.834317615871941e-08, "loss": 0.3594, "step": 14406 }, { "epoch": 2.961661013464899, "grad_norm": 0.22634616494178772, "learning_rate": 3.793205003100764e-08, "loss": 0.3762, "step": 14407 }, { "epoch": 2.9618665844382774, "grad_norm": 0.24357974529266357, "learning_rate": 3.752313897423754e-08, "loss": 0.3808, "step": 14408 }, { "epoch": 2.962072155411656, "grad_norm": 0.2598305642604828, "learning_rate": 3.7116443008543e-08, "loss": 0.3751, "step": 14409 }, { "epoch": 2.962277726385034, "grad_norm": 0.2315262258052826, "learning_rate": 3.6711962153963e-08, "loss": 0.3667, "step": 14410 }, { "epoch": 2.962483297358413, "grad_norm": 0.22729608416557312, "learning_rate": 3.6309696430431586e-08, "loss": 0.3758, "step": 14411 }, { "epoch": 2.9626888683317913, "grad_norm": 0.23362228274345398, "learning_rate": 3.590964585776291e-08, "loss": 0.402, "step": 14412 }, { "epoch": 2.9628944393051704, "grad_norm": 0.24321232736110687, "learning_rate": 3.551181045566121e-08, "loss": 0.3867, "step": 14413 }, { "epoch": 2.9631000102785485, "grad_norm": 0.2254071682691574, "learning_rate": 3.511619024373081e-08, "loss": 0.3674, "step": 14414 }, { "epoch": 2.963305581251927, "grad_norm": 0.23968133330345154, "learning_rate": 3.472278524145611e-08, "loss": 0.3507, "step": 14415 }, { "epoch": 2.9635111522253057, "grad_norm": 0.22927747666835785, "learning_rate": 3.433159546822662e-08, "loss": 0.3699, "step": 14416 }, { "epoch": 2.9637167231986843, "grad_norm": 0.4465451240539551, "learning_rate": 3.394262094331191e-08, "loss": 0.3874, "step": 14417 }, { "epoch": 2.963922294172063, "grad_norm": 0.23466768860816956, "learning_rate": 3.355586168587166e-08, "loss": 0.376, "step": 14418 }, { "epoch": 2.9641278651454415, "grad_norm": 0.11790206283330917, "learning_rate": 3.3171317714960624e-08, "loss": 0.4474, "step": 14419 }, { "epoch": 2.96433343611882, "grad_norm": 0.23741118609905243, "learning_rate": 3.278898904952366e-08, "loss": 0.3897, "step": 14420 }, { "epoch": 2.9645390070921986, "grad_norm": 0.2253805696964264, "learning_rate": 3.240887570840068e-08, "loss": 0.3498, "step": 14421 }, { "epoch": 2.9647445780655772, "grad_norm": 0.22657155990600586, "learning_rate": 3.203097771031172e-08, "loss": 0.3677, "step": 14422 }, { "epoch": 2.964950149038956, "grad_norm": 0.22919400036334991, "learning_rate": 3.165529507387188e-08, "loss": 0.3756, "step": 14423 }, { "epoch": 2.9651557200123344, "grad_norm": 0.12317074835300446, "learning_rate": 3.128182781760136e-08, "loss": 0.4487, "step": 14424 }, { "epoch": 2.9653612909857126, "grad_norm": 0.12205608189105988, "learning_rate": 3.0910575959890444e-08, "loss": 0.4527, "step": 14425 }, { "epoch": 2.9655668619590916, "grad_norm": 0.22888796031475067, "learning_rate": 3.0541539519029495e-08, "loss": 0.3703, "step": 14426 }, { "epoch": 2.9657724329324697, "grad_norm": 0.24040739238262177, "learning_rate": 3.017471851319897e-08, "loss": 0.3859, "step": 14427 }, { "epoch": 2.9659780039058488, "grad_norm": 0.120720773935318, "learning_rate": 2.9810112960474425e-08, "loss": 0.4494, "step": 14428 }, { "epoch": 2.966183574879227, "grad_norm": 0.23405267298221588, "learning_rate": 2.944772287881148e-08, "loss": 0.3848, "step": 14429 }, { "epoch": 2.9663891458526055, "grad_norm": 0.23550044000148773, "learning_rate": 2.9087548286070853e-08, "loss": 0.38, "step": 14430 }, { "epoch": 2.966594716825984, "grad_norm": 0.22151748836040497, "learning_rate": 2.8729589199993357e-08, "loss": 0.3612, "step": 14431 }, { "epoch": 2.9668002877993627, "grad_norm": 0.2364836186170578, "learning_rate": 2.837384563821488e-08, "loss": 0.3892, "step": 14432 }, { "epoch": 2.9670058587727413, "grad_norm": 0.23630112409591675, "learning_rate": 2.802031761825641e-08, "loss": 0.3829, "step": 14433 }, { "epoch": 2.96721142974612, "grad_norm": 0.23327064514160156, "learning_rate": 2.766900515753901e-08, "loss": 0.3951, "step": 14434 }, { "epoch": 2.9674170007194984, "grad_norm": 0.12371329218149185, "learning_rate": 2.7319908273373828e-08, "loss": 0.4328, "step": 14435 }, { "epoch": 2.967622571692877, "grad_norm": 0.11786897480487823, "learning_rate": 2.697302698295212e-08, "loss": 0.4559, "step": 14436 }, { "epoch": 2.9678281426662556, "grad_norm": 0.23964469134807587, "learning_rate": 2.6628361303365212e-08, "loss": 0.3775, "step": 14437 }, { "epoch": 2.968033713639634, "grad_norm": 0.11859652400016785, "learning_rate": 2.628591125159452e-08, "loss": 0.4539, "step": 14438 }, { "epoch": 2.968239284613013, "grad_norm": 0.24552220106124878, "learning_rate": 2.594567684450655e-08, "loss": 0.3925, "step": 14439 }, { "epoch": 2.968444855586391, "grad_norm": 0.2301911562681198, "learning_rate": 2.560765809887289e-08, "loss": 0.3591, "step": 14440 }, { "epoch": 2.96865042655977, "grad_norm": 0.23525011539459229, "learning_rate": 2.527185503134022e-08, "loss": 0.3905, "step": 14441 }, { "epoch": 2.968855997533148, "grad_norm": 0.123292475938797, "learning_rate": 2.493826765845031e-08, "loss": 0.4365, "step": 14442 }, { "epoch": 2.969061568506527, "grad_norm": 0.23423157632350922, "learning_rate": 2.4606895996635016e-08, "loss": 0.3867, "step": 14443 }, { "epoch": 2.9692671394799053, "grad_norm": 0.2355274260044098, "learning_rate": 2.4277740062226274e-08, "loss": 0.392, "step": 14444 }, { "epoch": 2.969472710453284, "grad_norm": 0.12355451285839081, "learning_rate": 2.395079987144111e-08, "loss": 0.4473, "step": 14445 }, { "epoch": 2.9696782814266625, "grad_norm": 0.23346541821956635, "learning_rate": 2.362607544037665e-08, "loss": 0.3761, "step": 14446 }, { "epoch": 2.969883852400041, "grad_norm": 0.23978963494300842, "learning_rate": 2.3303566785040087e-08, "loss": 0.366, "step": 14447 }, { "epoch": 2.9700894233734196, "grad_norm": 0.22105932235717773, "learning_rate": 2.298327392131372e-08, "loss": 0.3697, "step": 14448 }, { "epoch": 2.9702949943467982, "grad_norm": 0.23754067718982697, "learning_rate": 2.2665196864984918e-08, "loss": 0.3773, "step": 14449 }, { "epoch": 2.970500565320177, "grad_norm": 0.23977546393871307, "learning_rate": 2.2349335631711155e-08, "loss": 0.4008, "step": 14450 }, { "epoch": 2.9707061362935554, "grad_norm": 0.22875571250915527, "learning_rate": 2.2035690237064977e-08, "loss": 0.3623, "step": 14451 }, { "epoch": 2.970911707266934, "grad_norm": 0.23122116923332214, "learning_rate": 2.1724260696494027e-08, "loss": 0.369, "step": 14452 }, { "epoch": 2.9711172782403126, "grad_norm": 0.12258761376142502, "learning_rate": 2.141504702533603e-08, "loss": 0.4432, "step": 14453 }, { "epoch": 2.971322849213691, "grad_norm": 0.2527145445346832, "learning_rate": 2.1108049238833806e-08, "loss": 0.3691, "step": 14454 }, { "epoch": 2.9715284201870698, "grad_norm": 0.22957132756710052, "learning_rate": 2.080326735210525e-08, "loss": 0.3863, "step": 14455 }, { "epoch": 2.9717339911604483, "grad_norm": 0.11908449977636337, "learning_rate": 2.050070138016835e-08, "loss": 0.4381, "step": 14456 }, { "epoch": 2.9719395621338265, "grad_norm": 0.23832279443740845, "learning_rate": 2.020035133793119e-08, "loss": 0.3762, "step": 14457 }, { "epoch": 2.9721451331072055, "grad_norm": 0.22900496423244476, "learning_rate": 1.990221724018193e-08, "loss": 0.3593, "step": 14458 }, { "epoch": 2.9723507040805837, "grad_norm": 0.23259581625461578, "learning_rate": 1.960629910161882e-08, "loss": 0.3722, "step": 14459 }, { "epoch": 2.9725562750539622, "grad_norm": 0.2373165637254715, "learning_rate": 1.93125969368102e-08, "loss": 0.3792, "step": 14460 }, { "epoch": 2.972761846027341, "grad_norm": 0.23144948482513428, "learning_rate": 1.9021110760234494e-08, "loss": 0.3921, "step": 14461 }, { "epoch": 2.9729674170007194, "grad_norm": 0.22227592766284943, "learning_rate": 1.8731840586250217e-08, "loss": 0.3614, "step": 14462 }, { "epoch": 2.973172987974098, "grad_norm": 0.231735497713089, "learning_rate": 1.844478642910097e-08, "loss": 0.39, "step": 14463 }, { "epoch": 2.9733785589474766, "grad_norm": 0.23499642312526703, "learning_rate": 1.8159948302940432e-08, "loss": 0.3636, "step": 14464 }, { "epoch": 2.973584129920855, "grad_norm": 0.12244053184986115, "learning_rate": 1.7877326221787395e-08, "loss": 0.449, "step": 14465 }, { "epoch": 2.973789700894234, "grad_norm": 0.2361563742160797, "learning_rate": 1.7596920199575706e-08, "loss": 0.3727, "step": 14466 }, { "epoch": 2.9739952718676124, "grad_norm": 0.22725822031497955, "learning_rate": 1.731873025011932e-08, "loss": 0.3663, "step": 14467 }, { "epoch": 2.974200842840991, "grad_norm": 0.23438185453414917, "learning_rate": 1.7042756387117275e-08, "loss": 0.3793, "step": 14468 }, { "epoch": 2.9744064138143695, "grad_norm": 0.24105405807495117, "learning_rate": 1.6768998624168698e-08, "loss": 0.393, "step": 14469 }, { "epoch": 2.974611984787748, "grad_norm": 0.23841865360736847, "learning_rate": 1.6497456974762794e-08, "loss": 0.389, "step": 14470 }, { "epoch": 2.9748175557611267, "grad_norm": 0.23006348311901093, "learning_rate": 1.6228131452273864e-08, "loss": 0.3752, "step": 14471 }, { "epoch": 2.975023126734505, "grad_norm": 0.22141233086585999, "learning_rate": 1.5961022069971298e-08, "loss": 0.3466, "step": 14472 }, { "epoch": 2.975228697707884, "grad_norm": 0.2314436435699463, "learning_rate": 1.5696128841014568e-08, "loss": 0.3913, "step": 14473 }, { "epoch": 2.975434268681262, "grad_norm": 0.2326936423778534, "learning_rate": 1.5433451778448238e-08, "loss": 0.3836, "step": 14474 }, { "epoch": 2.9756398396546406, "grad_norm": 0.22499439120292664, "learning_rate": 1.5172990895226948e-08, "loss": 0.3827, "step": 14475 }, { "epoch": 2.975845410628019, "grad_norm": 0.23209989070892334, "learning_rate": 1.4914746204165443e-08, "loss": 0.3626, "step": 14476 }, { "epoch": 2.976050981601398, "grad_norm": 0.2376868724822998, "learning_rate": 1.4658717718003535e-08, "loss": 0.3645, "step": 14477 }, { "epoch": 2.9762565525747764, "grad_norm": 0.2345684915781021, "learning_rate": 1.4404905449336149e-08, "loss": 0.3789, "step": 14478 }, { "epoch": 2.976462123548155, "grad_norm": 0.11508353054523468, "learning_rate": 1.415330941068327e-08, "loss": 0.4672, "step": 14479 }, { "epoch": 2.9766676945215336, "grad_norm": 0.11564578115940094, "learning_rate": 1.3903929614434986e-08, "loss": 0.444, "step": 14480 }, { "epoch": 2.976873265494912, "grad_norm": 0.22484390437602997, "learning_rate": 1.3656766072871475e-08, "loss": 0.3825, "step": 14481 }, { "epoch": 2.9770788364682907, "grad_norm": 0.22326096892356873, "learning_rate": 1.3411818798172993e-08, "loss": 0.3728, "step": 14482 }, { "epoch": 2.9772844074416693, "grad_norm": 0.22746115922927856, "learning_rate": 1.3169087802409885e-08, "loss": 0.3882, "step": 14483 }, { "epoch": 2.977489978415048, "grad_norm": 0.23284806311130524, "learning_rate": 1.2928573097537588e-08, "loss": 0.3927, "step": 14484 }, { "epoch": 2.9776955493884265, "grad_norm": 0.23383115231990814, "learning_rate": 1.2690274695406623e-08, "loss": 0.3897, "step": 14485 }, { "epoch": 2.977901120361805, "grad_norm": 0.23091500997543335, "learning_rate": 1.2454192607752602e-08, "loss": 0.3783, "step": 14486 }, { "epoch": 2.9781066913351832, "grad_norm": 0.23954810202121735, "learning_rate": 1.2220326846211217e-08, "loss": 0.3963, "step": 14487 }, { "epoch": 2.9783122623085623, "grad_norm": 0.23919789493083954, "learning_rate": 1.1988677422303251e-08, "loss": 0.3777, "step": 14488 }, { "epoch": 2.9785178332819404, "grad_norm": 0.2225130796432495, "learning_rate": 1.1759244347434584e-08, "loss": 0.3658, "step": 14489 }, { "epoch": 2.978723404255319, "grad_norm": 0.2334238588809967, "learning_rate": 1.153202763292116e-08, "loss": 0.3906, "step": 14490 }, { "epoch": 2.9789289752286976, "grad_norm": 0.2455325573682785, "learning_rate": 1.1307027289944038e-08, "loss": 0.3829, "step": 14491 }, { "epoch": 2.979134546202076, "grad_norm": 0.2270069569349289, "learning_rate": 1.1084243329594347e-08, "loss": 0.3802, "step": 14492 }, { "epoch": 2.9793401171754548, "grad_norm": 0.22187288105487823, "learning_rate": 1.0863675762843306e-08, "loss": 0.3832, "step": 14493 }, { "epoch": 2.9795456881488334, "grad_norm": 0.2417239248752594, "learning_rate": 1.0645324600562223e-08, "loss": 0.3624, "step": 14494 }, { "epoch": 2.979751259122212, "grad_norm": 0.11916260421276093, "learning_rate": 1.0429189853507493e-08, "loss": 0.4491, "step": 14495 }, { "epoch": 2.9799568300955905, "grad_norm": 0.23340509831905365, "learning_rate": 1.02152715323256e-08, "loss": 0.3903, "step": 14496 }, { "epoch": 2.980162401068969, "grad_norm": 0.2338993400335312, "learning_rate": 1.0003569647558109e-08, "loss": 0.3733, "step": 14497 }, { "epoch": 2.9803679720423477, "grad_norm": 0.23980024456977844, "learning_rate": 9.794084209626687e-09, "loss": 0.3745, "step": 14498 }, { "epoch": 2.9805735430157263, "grad_norm": 0.22634217143058777, "learning_rate": 9.58681522885807e-09, "loss": 0.3821, "step": 14499 }, { "epoch": 2.980779113989105, "grad_norm": 0.2323930710554123, "learning_rate": 9.381762715464093e-09, "loss": 0.3854, "step": 14500 }, { "epoch": 2.9809846849624835, "grad_norm": 0.23172056674957275, "learning_rate": 9.178926679546673e-09, "loss": 0.3735, "step": 14501 }, { "epoch": 2.9811902559358616, "grad_norm": 0.23236456513404846, "learning_rate": 8.978307131097818e-09, "loss": 0.3731, "step": 14502 }, { "epoch": 2.9813958269092407, "grad_norm": 0.23568768799304962, "learning_rate": 8.779904079994628e-09, "loss": 0.3654, "step": 14503 }, { "epoch": 2.981601397882619, "grad_norm": 0.22454003989696503, "learning_rate": 8.583717536019276e-09, "loss": 0.4069, "step": 14504 }, { "epoch": 2.9818069688559974, "grad_norm": 0.2187877893447876, "learning_rate": 8.38974750883903e-09, "loss": 0.3814, "step": 14505 }, { "epoch": 2.982012539829376, "grad_norm": 0.2329930067062378, "learning_rate": 8.197994008001253e-09, "loss": 0.3876, "step": 14506 }, { "epoch": 2.9822181108027546, "grad_norm": 0.22449320554733276, "learning_rate": 8.008457042958384e-09, "loss": 0.3766, "step": 14507 }, { "epoch": 2.982423681776133, "grad_norm": 0.12521809339523315, "learning_rate": 7.821136623047953e-09, "loss": 0.4432, "step": 14508 }, { "epoch": 2.9826292527495117, "grad_norm": 0.12057320028543472, "learning_rate": 7.636032757492583e-09, "loss": 0.4466, "step": 14509 }, { "epoch": 2.9828348237228903, "grad_norm": 0.22894617915153503, "learning_rate": 7.453145455419975e-09, "loss": 0.378, "step": 14510 }, { "epoch": 2.983040394696269, "grad_norm": 0.22911518812179565, "learning_rate": 7.272474725837919e-09, "loss": 0.3645, "step": 14511 }, { "epoch": 2.9832459656696475, "grad_norm": 0.23008479177951813, "learning_rate": 7.0940205776443004e-09, "loss": 0.3793, "step": 14512 }, { "epoch": 2.983451536643026, "grad_norm": 0.23247113823890686, "learning_rate": 6.917783019627089e-09, "loss": 0.3744, "step": 14513 }, { "epoch": 2.9836571076164047, "grad_norm": 0.12385281175374985, "learning_rate": 6.7437620604793304e-09, "loss": 0.4215, "step": 14514 }, { "epoch": 2.9838626785897833, "grad_norm": 0.23346978425979614, "learning_rate": 6.571957708764176e-09, "loss": 0.3934, "step": 14515 }, { "epoch": 2.984068249563162, "grad_norm": 0.24097535014152527, "learning_rate": 6.402369972954847e-09, "loss": 0.3913, "step": 14516 }, { "epoch": 2.98427382053654, "grad_norm": 0.2249852418899536, "learning_rate": 6.234998861399666e-09, "loss": 0.3629, "step": 14517 }, { "epoch": 2.984479391509919, "grad_norm": 0.22656574845314026, "learning_rate": 6.069844382342038e-09, "loss": 0.3682, "step": 14518 }, { "epoch": 2.984684962483297, "grad_norm": 0.2244207262992859, "learning_rate": 5.90690654392545e-09, "loss": 0.3743, "step": 14519 }, { "epoch": 2.9848905334566758, "grad_norm": 0.2316739857196808, "learning_rate": 5.746185354173484e-09, "loss": 0.3774, "step": 14520 }, { "epoch": 2.9850961044300544, "grad_norm": 0.1228955090045929, "learning_rate": 5.587680821004803e-09, "loss": 0.4535, "step": 14521 }, { "epoch": 2.985301675403433, "grad_norm": 0.2285931259393692, "learning_rate": 5.431392952228165e-09, "loss": 0.3802, "step": 14522 }, { "epoch": 2.9855072463768115, "grad_norm": 0.21995897591114044, "learning_rate": 5.2773217555424086e-09, "loss": 0.3836, "step": 14523 }, { "epoch": 2.98571281735019, "grad_norm": 0.23392610251903534, "learning_rate": 5.125467238536463e-09, "loss": 0.3909, "step": 14524 }, { "epoch": 2.9859183883235687, "grad_norm": 0.23672394454479218, "learning_rate": 4.975829408694344e-09, "loss": 0.3759, "step": 14525 }, { "epoch": 2.9861239592969473, "grad_norm": 0.12258664518594742, "learning_rate": 4.828408273385154e-09, "loss": 0.4437, "step": 14526 }, { "epoch": 2.986329530270326, "grad_norm": 0.23568691313266754, "learning_rate": 4.683203839878081e-09, "loss": 0.3702, "step": 14527 }, { "epoch": 2.9865351012437045, "grad_norm": 0.2551220953464508, "learning_rate": 4.540216115317409e-09, "loss": 0.3785, "step": 14528 }, { "epoch": 2.986740672217083, "grad_norm": 0.22651293873786926, "learning_rate": 4.399445106752498e-09, "loss": 0.3663, "step": 14529 }, { "epoch": 2.9869462431904616, "grad_norm": 0.2356937676668167, "learning_rate": 4.260890821117802e-09, "loss": 0.3979, "step": 14530 }, { "epoch": 2.9871518141638402, "grad_norm": 0.2306637316942215, "learning_rate": 4.124553265242859e-09, "loss": 0.3723, "step": 14531 }, { "epoch": 2.9873573851372184, "grad_norm": 0.1231551244854927, "learning_rate": 3.9904324458373e-09, "loss": 0.4615, "step": 14532 }, { "epoch": 2.9875629561105974, "grad_norm": 0.23342475295066833, "learning_rate": 3.8585283695158345e-09, "loss": 0.376, "step": 14533 }, { "epoch": 2.9877685270839756, "grad_norm": 0.23796530067920685, "learning_rate": 3.728841042768272e-09, "loss": 0.3981, "step": 14534 }, { "epoch": 2.987974098057354, "grad_norm": 0.2273947149515152, "learning_rate": 3.601370471994492e-09, "loss": 0.3752, "step": 14535 }, { "epoch": 2.9881796690307327, "grad_norm": 0.22759398818016052, "learning_rate": 3.4761166634644795e-09, "loss": 0.3688, "step": 14536 }, { "epoch": 2.9883852400041113, "grad_norm": 0.22332710027694702, "learning_rate": 3.353079623353295e-09, "loss": 0.3744, "step": 14537 }, { "epoch": 2.98859081097749, "grad_norm": 0.22932596504688263, "learning_rate": 3.232259357726086e-09, "loss": 0.3762, "step": 14538 }, { "epoch": 2.9887963819508685, "grad_norm": 0.22627981007099152, "learning_rate": 3.1136558725280986e-09, "loss": 0.37, "step": 14539 }, { "epoch": 2.989001952924247, "grad_norm": 0.23626869916915894, "learning_rate": 2.9972691736046556e-09, "loss": 0.3708, "step": 14540 }, { "epoch": 2.9892075238976257, "grad_norm": 0.2292552888393402, "learning_rate": 2.8830992666911696e-09, "loss": 0.3854, "step": 14541 }, { "epoch": 2.9894130948710043, "grad_norm": 0.13139550387859344, "learning_rate": 2.7711461574081443e-09, "loss": 0.4664, "step": 14542 }, { "epoch": 2.989618665844383, "grad_norm": 0.2349810004234314, "learning_rate": 2.6614098512811603e-09, "loss": 0.394, "step": 14543 }, { "epoch": 2.9898242368177614, "grad_norm": 0.23067308962345123, "learning_rate": 2.553890353700905e-09, "loss": 0.3803, "step": 14544 }, { "epoch": 2.99002980779114, "grad_norm": 0.238302543759346, "learning_rate": 2.448587669978131e-09, "loss": 0.3568, "step": 14545 }, { "epoch": 2.9902353787645186, "grad_norm": 0.11440926790237427, "learning_rate": 2.345501805298689e-09, "loss": 0.4259, "step": 14546 }, { "epoch": 2.9904409497378968, "grad_norm": 0.23391030728816986, "learning_rate": 2.244632764733523e-09, "loss": 0.3822, "step": 14547 }, { "epoch": 2.990646520711276, "grad_norm": 0.2159079611301422, "learning_rate": 2.145980553253657e-09, "loss": 0.3729, "step": 14548 }, { "epoch": 2.990852091684654, "grad_norm": 0.2323864847421646, "learning_rate": 2.0495451757251983e-09, "loss": 0.3891, "step": 14549 }, { "epoch": 2.9910576626580325, "grad_norm": 0.23049965500831604, "learning_rate": 1.955326636899346e-09, "loss": 0.3736, "step": 14550 }, { "epoch": 2.991263233631411, "grad_norm": 0.227107435464859, "learning_rate": 1.8633249414073963e-09, "loss": 0.3669, "step": 14551 }, { "epoch": 2.9914688046047897, "grad_norm": 0.22962632775306702, "learning_rate": 1.7735400937957114e-09, "loss": 0.3931, "step": 14552 }, { "epoch": 2.9916743755781683, "grad_norm": 0.22868715226650238, "learning_rate": 1.6859720984757631e-09, "loss": 0.3688, "step": 14553 }, { "epoch": 2.991879946551547, "grad_norm": 0.22757934033870697, "learning_rate": 1.6006209597640986e-09, "loss": 0.3572, "step": 14554 }, { "epoch": 2.9920855175249255, "grad_norm": 0.22525522112846375, "learning_rate": 1.5174866818723487e-09, "loss": 0.3889, "step": 14555 }, { "epoch": 2.992291088498304, "grad_norm": 0.22742381691932678, "learning_rate": 1.4365692688922405e-09, "loss": 0.3883, "step": 14556 }, { "epoch": 2.9924966594716826, "grad_norm": 0.2328689843416214, "learning_rate": 1.3578687248055888e-09, "loss": 0.3777, "step": 14557 }, { "epoch": 2.9927022304450612, "grad_norm": 0.2241593301296234, "learning_rate": 1.2813850534992843e-09, "loss": 0.373, "step": 14558 }, { "epoch": 2.99290780141844, "grad_norm": 0.14074623584747314, "learning_rate": 1.207118258730322e-09, "loss": 0.4593, "step": 14559 }, { "epoch": 2.9931133723918184, "grad_norm": 0.22643250226974487, "learning_rate": 1.1350683441657684e-09, "loss": 0.3729, "step": 14560 }, { "epoch": 2.993318943365197, "grad_norm": 0.23651181161403656, "learning_rate": 1.06523531334779e-09, "loss": 0.3741, "step": 14561 }, { "epoch": 2.993524514338575, "grad_norm": 0.23334497213363647, "learning_rate": 9.976191697286253e-10, "loss": 0.3901, "step": 14562 }, { "epoch": 2.993730085311954, "grad_norm": 0.2361806035041809, "learning_rate": 9.322199166256207e-10, "loss": 0.3956, "step": 14563 }, { "epoch": 2.9939356562853323, "grad_norm": 0.23388999700546265, "learning_rate": 8.690375572711906e-10, "loss": 0.3776, "step": 14564 }, { "epoch": 2.9941412272587113, "grad_norm": 0.12321368604898453, "learning_rate": 8.080720947678533e-10, "loss": 0.4526, "step": 14565 }, { "epoch": 2.9943467982320895, "grad_norm": 0.23372013866901398, "learning_rate": 7.493235321331948e-10, "loss": 0.3964, "step": 14566 }, { "epoch": 2.994552369205468, "grad_norm": 0.22259008884429932, "learning_rate": 6.927918722499093e-10, "loss": 0.3632, "step": 14567 }, { "epoch": 2.9947579401788467, "grad_norm": 0.22694145143032074, "learning_rate": 6.384771179057669e-10, "loss": 0.3826, "step": 14568 }, { "epoch": 2.9949635111522253, "grad_norm": 0.22071406245231628, "learning_rate": 5.863792717736293e-10, "loss": 0.3715, "step": 14569 }, { "epoch": 2.995169082125604, "grad_norm": 0.2295757234096527, "learning_rate": 5.364983364314347e-10, "loss": 0.348, "step": 14570 }, { "epoch": 2.9953746530989824, "grad_norm": 0.11967863142490387, "learning_rate": 4.888343143222285e-10, "loss": 0.4525, "step": 14571 }, { "epoch": 2.995580224072361, "grad_norm": 0.23142680525779724, "learning_rate": 4.4338720780412456e-10, "loss": 0.3912, "step": 14572 }, { "epoch": 2.9957857950457396, "grad_norm": 0.12113110721111298, "learning_rate": 4.0015701911533256e-10, "loss": 0.4394, "step": 14573 }, { "epoch": 2.995991366019118, "grad_norm": 0.1214088723063469, "learning_rate": 3.591437503791539e-10, "loss": 0.4441, "step": 14574 }, { "epoch": 2.996196936992497, "grad_norm": 0.24138882756233215, "learning_rate": 3.203474036239662e-10, "loss": 0.381, "step": 14575 }, { "epoch": 2.9964025079658754, "grad_norm": 0.22098585963249207, "learning_rate": 2.8376798075324673e-10, "loss": 0.379, "step": 14576 }, { "epoch": 2.9966080789392535, "grad_norm": 0.22628125548362732, "learning_rate": 2.4940548357554884e-10, "loss": 0.385, "step": 14577 }, { "epoch": 2.9968136499126325, "grad_norm": 0.23406754434108734, "learning_rate": 2.1725991378451772e-10, "loss": 0.3906, "step": 14578 }, { "epoch": 2.9970192208860107, "grad_norm": 0.2328203022480011, "learning_rate": 1.8733127295389452e-10, "loss": 0.393, "step": 14579 }, { "epoch": 2.9972247918593897, "grad_norm": 0.11977335065603256, "learning_rate": 1.5961956256749233e-10, "loss": 0.4311, "step": 14580 }, { "epoch": 2.997430362832768, "grad_norm": 0.22989055514335632, "learning_rate": 1.3412478398922012e-10, "loss": 0.3797, "step": 14581 }, { "epoch": 2.9976359338061465, "grad_norm": 0.23267367482185364, "learning_rate": 1.1084693847307482e-10, "loss": 0.3734, "step": 14582 }, { "epoch": 2.997841504779525, "grad_norm": 0.22674784064292908, "learning_rate": 8.978602716813722e-11, "loss": 0.385, "step": 14583 }, { "epoch": 2.9980470757529036, "grad_norm": 0.24478791654109955, "learning_rate": 7.09420511085801e-11, "loss": 0.3813, "step": 14584 }, { "epoch": 2.9982526467262822, "grad_norm": 0.23882992565631866, "learning_rate": 5.431501122366012e-11, "loss": 0.3949, "step": 14585 }, { "epoch": 2.998458217699661, "grad_norm": 0.2423866242170334, "learning_rate": 3.990490833771787e-11, "loss": 0.3893, "step": 14586 }, { "epoch": 2.9986637886730394, "grad_norm": 0.24024717509746552, "learning_rate": 2.771174315019387e-11, "loss": 0.3785, "step": 14587 }, { "epoch": 2.998869359646418, "grad_norm": 0.23211045563220978, "learning_rate": 1.773551627060055e-11, "loss": 0.3623, "step": 14588 }, { "epoch": 2.9990749306197966, "grad_norm": 0.22888019680976868, "learning_rate": 9.976228188546265e-12, "loss": 0.3982, "step": 14589 }, { "epoch": 2.999280501593175, "grad_norm": 0.23154133558273315, "learning_rate": 4.433879288723298e-12, "loss": 0.3689, "step": 14590 }, { "epoch": 2.9994860725665538, "grad_norm": 0.22858844697475433, "learning_rate": 1.1084698359198341e-12, "loss": 0.3763, "step": 14591 }, { "epoch": 2.999691643539932, "grad_norm": 0.22756846249103546, "learning_rate": 0.0, "loss": 0.3625, "step": 14592 }, { "epoch": 2.999691643539932, "step": 14592, "total_flos": 6.439866310377226e+20, "train_loss": 0.5454843599418701, "train_runtime": 158902.6427, "train_samples_per_second": 188.085, "train_steps_per_second": 0.092 } ], "logging_steps": 1.0, "max_steps": 14592, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.439866310377226e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }